From d75527f7eecc4e2fcdd18ab157412506717c8adb Mon Sep 17 00:00:00 2001
From: navanchauhan First, I needed to check the total number of records in Trakt’s database. First, I needed to declare the database schema in ( In the end, I could have dropped the embeddings field from the table schema as I never got around to using it. (Note: I was well within the rate-limit so I did not have to slow down or implement any other measures)
+import requests
+
import requests
import os
trakt_id = os.getenv("TRAKT_ID")
@@ -87,14 +88,16 @@
res = requests.get(f"{api_base}/search/movie",headers=headers,params=params)
total_items = res.headers["x-pagination-item-count"]
print(f"There are {total_items} movies")
-
There are 333946 movies
database.py
):
+import sqlalchemy
+
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, PickleType
from sqlalchemy import insert
@@ -129,13 +132,15 @@
meta.create_all(engine)
Session = sessionmaker(bind=engine)
return engine, Session
-
Scripting Time
-
+from database import *
+
from database import *
from tqdm import tqdm
import requests
import os
@@ -228,7 +233,8 @@
except IntegrityError:
trans.rollback()
req_count += 1
-
Installing the Python module (pinecone-client)
import pandas as pd
+
+import pandas as pd
import pinecone
from sentence_transformers import SentenceTransformer
from tqdm import tqdm
@@ -293,7 +300,8 @@ As of writing this post, I did not include any other database except Trakt.
str(value), embeddings[idx].tolist()
))
index.upsert(to_send)
-
+
+That's it!
@@ -304,7 +312,8 @@ As of writing this post, I did not include any other database except Trakt.To find similar items, we will first have to map the name of the movie to its trakt_id, get the embeddings we have for that id and then perform a similarity search. It is possible that this additional step of mapping could be avoided by storing information as metadata in the index.
-def get_trakt_id(df, title: str):
+
+def get_trakt_id(df, title: str):
rec = df[df["title"].str.lower()==movie_name.lower()]
if len(rec.trakt_id.values.tolist()) > 1:
print(f"multiple values found... {len(rec.trakt_id.values)}")
@@ -344,11 +353,13 @@ It is possible that this additional step of mapping could be avoided by storing
"runtime": df.runtime.values[0],
"year": df.year.values[0]
}
-
+
+movie_name = "Now You See Me"
+
+movie_name = "Now You See Me"
movie_trakt_id = get_trakt_id(df, movie_name)
print(movie_trakt_id)
@@ -360,7 +371,8 @@ It is possible that this additional step of mapping could be avoided by storing
for trakt_id in movie_ids:
deets = get_deets_by_trakt_id(df, trakt_id)
print(f"{deets['title']} ({deets['year']}): {deets['overview']}")
-
+
+Output:
-- cgit v1.2.3