/
main.py
86 lines (58 loc) · 2.44 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import requests
import re
import imdb
from bs4 import BeautifulSoup
from collections import OrderedDict
DEFAULT_SUFFIXES = ['leg', 'dob', 'extended', 'imax', 'atmos', '3d', '4dx', 'hfr', 'vp']
def generate_suffixes_variations():
suffixes_long = DEFAULT_SUFFIXES.copy()
with_dot = [s + '.' for s in suffixes_long]
suffixes_long.extend(with_dot)
with_surrounding_parenthesis = ['(' + s + ')' for s in suffixes_long]
with_trailing_parenthesis = [s + ')' for s in suffixes_long]
with_leading_parenthesis = ['(' + s for s in suffixes_long]
suffixes_long.extend(with_surrounding_parenthesis)
suffixes_long.extend(with_trailing_parenthesis)
suffixes_long.extend(with_leading_parenthesis)
return suffixes_long
def remove_suffixes(titles):
titles_without_suffixes = []
suffixes = generate_suffixes_variations()
for t in titles:
parts = t.split()
parts = [p for p in parts if p.lower() not in suffixes]
# Takes care of the trailing '-' in e.g. "Vingadores: Endgame - Extended"
if parts[-1] == '-':
parts = parts[:-1]
titles_without_suffixes.append(' '.join(parts))
return list(set(titles_without_suffixes))
def get_ratings(titles):
ia = imdb.IMDb()
ratings = {}
for t in titles:
info = ia.search_movie(t)
if not info:
ratings[t] = -1 # movie has not been found
continue
id = info[0].getID()
more_info = ia.get_movie(id)
try:
rating = more_info['rating']
ratings[t] = rating
except KeyError:
ratings[t] = -2 # movie has no user rating yet
return ratings
def main():
r = requests.get('http://cinemas.nos.pt/pages/cartaz.aspx')
soup = BeautifulSoup(r.content, 'lxml')
titles = soup.findAll('a', {'class': 'list-item', 'href': re.compile('.*Filmes.*')})
titles = [t.contents.pop() for t in titles]
titles = remove_suffixes(titles)
ratings = get_ratings(titles)
ratings = OrderedDict(sorted(ratings.items(), key=lambda x: x[1], reverse=True)) # order dict by rating
ratings = {m: 'N/A (movie has not been found)' if r == -1 else r for m, r in ratings.items()} # not found message
ratings = {m: 'N/A (movie has no rating yet)' if r == -2 else r for m, r in ratings.items()} # not rated message
for movie, rating in ratings.items():
print(movie, '-->', rating)
if __name__ == '__main__':
main()