aboutsummaryrefslogtreecommitdiff
path: root/app.py
diff options
context:
space:
mode:
authornavanchauhan <navanchauhan@gmail.com>2022-05-22 11:42:00 -0600
committernavanchauhan <navanchauhan@gmail.com>2022-05-22 11:42:00 -0600
commit404c6fb1a3aa8e19db25a5c53098ce4f8a917fd7 (patch)
tree578b9b21384b889c37b7f9ab2268456e05da8a96 /app.py
parent801289d123b6e598110a5a1af96605fce9573f7c (diff)
cleaned with black
Diffstat (limited to 'app.py')
-rw-r--r--app.py90
1 files changed, 53 insertions, 37 deletions
diff --git a/app.py b/app.py
index fbd5c7d..a843c44 100644
--- a/app.py
+++ b/app.py
@@ -19,47 +19,58 @@ index = pinecone.Index("movies")
app = Flask(__name__, template_folder="./templates")
+
def title2trakt_id(title: str, df=df):
- #Matches Exact Title, Otherwise Returns None
+ # Matches Exact Title, Otherwise Returns None
records = df[df["title"].str.lower() == title.lower()]
if len(records) == 0:
return 0, None
elif len(records) == 1:
return 1, records.trakt_id.tolist()[0]
else:
- return 2, records.trakt_id.tolist()
+ return 2, records.trakt_id.tolist()
+
def get_vector_value(trakt_id: int):
- fetch_response = index.fetch(ids=[str(trakt_id)])
- return fetch_response["vectors"][str(trakt_id)]["values"]
-
-def query_vectors(vector: list, top_k: int = 20, include_values: bool = False, include_metada: bool = True):
- query_response = index.query(
- queries=[
- (vector),
- ],
- top_k=top_k,
- include_values=include_values,
- include_metadata=include_metada
- )
- return query_response
+ fetch_response = index.fetch(ids=[str(trakt_id)])
+ return fetch_response["vectors"][str(trakt_id)]["values"]
+
+
+def query_vectors(
+ vector: list,
+ top_k: int = 20,
+ include_values: bool = False,
+ include_metada: bool = True,
+):
+ query_response = index.query(
+ queries=[
+ (vector),
+ ],
+ top_k=top_k,
+ include_values=include_values,
+ include_metadata=include_metada,
+ )
+ return query_response
+
def query2ids(query_response):
- trakt_ids = []
- for match in query_response["results"][0]["matches"]:
- trakt_ids.append(int(match["id"]))
- return trakt_ids
+ trakt_ids = []
+ for match in query_response["results"][0]["matches"]:
+ trakt_ids.append(int(match["id"]))
+ return trakt_ids
+
def get_deets_by_trakt_id(df, trakt_id: int):
- df = df[df["trakt_id"]==trakt_id]
- return {
- "title": df.title.values[0],
- "overview": df.overview.values[0],
- "runtime": int(df.runtime.values[0]),
- "year": int(df.year.values[0]),
- "trakt_id": trakt_id,
- "tagline": df.tagline.values[0]
- }
+ df = df[df["trakt_id"] == trakt_id]
+ return {
+ "title": df.title.values[0],
+ "overview": df.overview.values[0],
+ "runtime": int(df.runtime.values[0]),
+ "year": int(df.year.values[0]),
+ "trakt_id": trakt_id,
+ "tagline": df.tagline.values[0],
+ }
+
@app.route("/similar")
def get_similar_titles():
@@ -99,10 +110,10 @@ def get_similar_titles():
except TypeError:
maxRuntime = 220
vector = get_vector_value(trakt_id)
- movie_queries = query_vectors(vector, top_k = 69)
+ movie_queries = query_vectors(vector, top_k=69)
movie_ids = query2ids(movie_queries)
results = []
- #for trakt_id in movie_ids:
+ # for trakt_id in movie_ids:
# deets = get_deets_by_trakt_id(df, trakt_id)
# results.append(deets)
max_res = 30
@@ -111,12 +122,15 @@ def get_similar_titles():
if cur_res >= max_res:
break
deets = get_deets_by_trakt_id(df, trakt_id)
- if ((deets["year"]>=min_year) and (deets["year"]<=max_year)) and ((deets["runtime"]>=minRuntime) and (deets["runtime"]<=maxRuntime)):
+ if ((deets["year"] >= min_year) and (deets["year"] <= max_year)) and (
+ (deets["runtime"] >= minRuntime) and (deets["runtime"] <= maxRuntime)
+ ):
results.append(deets)
cur_res += 1
- return render_template("show_results.html",deets=results)
+ return render_template("show_results.html", deets=results)
+
-@app.route("/",methods=("GET","POST"))
+@app.route("/", methods=("GET", "POST"))
def find_similar_title():
if request.method == "GET":
return render_template("index.html")
@@ -125,7 +139,9 @@ def find_similar_title():
code, values = title2trakt_id(to_search_title)
print(f"Code {code} for {to_search_title}")
if code == 0:
- search_results = process.extract(to_search_title, movie_titles, scorer=fuzz.token_sort_ratio)
+ search_results = process.extract(
+ to_search_title, movie_titles, scorer=fuzz.token_sort_ratio
+ )
to_search_titles = []
to_search_ids = []
results = []
@@ -143,7 +159,7 @@ def find_similar_title():
deets = get_deets_by_trakt_id(df, int(trakt_id))
deets["trakt_id"] = trakt_id
results.append(deets)
- return render_template("same_titles.html",deets=results)
+ return render_template("same_titles.html", deets=results)
elif code == 1:
vector = get_vector_value(values)
@@ -153,11 +169,11 @@ def find_similar_title():
for trakt_id in movie_ids:
deets = get_deets_by_trakt_id(df, trakt_id)
results.append(deets)
- return render_template("show_results.html",deets=results)
+ return render_template("show_results.html", deets=results)
else:
results = []
for trakt_id in values:
deets = get_deets_by_trakt_id(df, int(trakt_id))
deets["trakt_id"] = trakt_id
results.append(deets)
- return render_template("same_titles.html",deets=results)
+ return render_template("same_titles.html", deets=results)