1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
import requests
import os
from database import *
from tqdm import tqdm
from datetime import datetime
import time
trakt_id = os.getenv("TRAKT_ID")
trakt_se = os.getenv("TRAKT_SE")
max_requests = 5000 # How many requests do you want to make
req_count = 0
years = "1900-2021"
page = 1
extended = "full" # Required to get additional information
limit = "10" # No of entires per request
languages = "en" # Limit to particular language
api_base = "https://api.trakt.tv"
database_url = "sqlite:///jlm.db"
headers = {
"Content-Type": "application/json",
"trakt-api-version": "2",
"trakt-api-key": trakt_id,
}
params = {
"query": "",
"years": years,
"page": page,
"extended": extended,
"limit": limit,
"languages": languages,
}
def create_movie_dict(movie: dict):
m = movie["movie"]
movie_dict = {
"title": m["title"],
"overview": m["overview"],
"genres": m["genres"],
"language": m["language"],
"year": int(m["year"]),
"trakt_id": m["ids"]["trakt"],
"released": m["released"],
"runtime": int(m["runtime"]),
"country": m["country"],
"rating": int(m["rating"]),
"votes": int(m["votes"]),
"comment_count": int(m["comment_count"]),
"tagline": m["tagline"],
}
return movie_dict
params["limit"] = 1
res = requests.get(f"{api_base}/search/movie", headers=headers, params=params)
total_items = res.headers["x-pagination-item-count"]
print(f"There are {total_items} movies")
print(f"Started from page {page}")
"""
movies = []
params["limit"] = limit
res = requests.get(f"{api_base}/search/movie",headers=headers,params=params)
if res.status_code == 200:
for movie in res.json():
movies.append(create_movie_dict(movie))
print(create_movie_dict(movie)["title"])
"""
engine, Session = init_db_stuff(database_url)
start_time = datetime.now()
for page in tqdm(range(1, max_requests + 10)):
if req_count == 999:
seconds_to_sleep = 300 - (datetime.now() - start_time).seconds
if seconds_to_sleep < 1:
seconds_to_sleep = 60
print(f"Sleeping {seconds_to_sleep}s")
# Need to respect their rate limitting
# Better to use x-ratelimit header
time.sleep(seconds_to_sleep)
start_time = datetime.now()
req_count = 0
params["page"] = page
params["limit"] = int(int(total_items) / max_requests)
movies = []
res = requests.get(f"{api_base}/search/movie", headers=headers, params=params)
if res.status_code == 500:
break
elif res.status_code == 200:
None
else:
print(f"OwO Code {res.status_code}")
for movie in res.json():
movies.append(create_movie_dict(movie))
with engine.connect() as conn:
for movie in movies:
with conn.begin() as trans:
stmt = insert(movies_table).values(
trakt_id=movie["trakt_id"],
title=movie["title"],
genres=" ".join(movie["genres"]),
language=movie["language"],
year=movie["year"],
released=movie["released"],
runtime=movie["runtime"],
country=movie["country"],
overview=movie["overview"],
rating=movie["rating"],
votes=movie["votes"],
comment_count=movie["comment_count"],
tagline=movie["tagline"],
)
try:
result = conn.execute(stmt)
trans.commit()
except IntegrityError:
trans.rollback()
req_count += 1
|