diff options
Diffstat (limited to 'app.py')
-rw-r--r-- | app.py | 90 |
1 files changed, 53 insertions, 37 deletions
@@ -19,47 +19,58 @@ index = pinecone.Index("movies") app = Flask(__name__, template_folder="./templates") + def title2trakt_id(title: str, df=df): - #Matches Exact Title, Otherwise Returns None + # Matches Exact Title, Otherwise Returns None records = df[df["title"].str.lower() == title.lower()] if len(records) == 0: return 0, None elif len(records) == 1: return 1, records.trakt_id.tolist()[0] else: - return 2, records.trakt_id.tolist() + return 2, records.trakt_id.tolist() + def get_vector_value(trakt_id: int): - fetch_response = index.fetch(ids=[str(trakt_id)]) - return fetch_response["vectors"][str(trakt_id)]["values"] - -def query_vectors(vector: list, top_k: int = 20, include_values: bool = False, include_metada: bool = True): - query_response = index.query( - queries=[ - (vector), - ], - top_k=top_k, - include_values=include_values, - include_metadata=include_metada - ) - return query_response + fetch_response = index.fetch(ids=[str(trakt_id)]) + return fetch_response["vectors"][str(trakt_id)]["values"] + + +def query_vectors( + vector: list, + top_k: int = 20, + include_values: bool = False, + include_metada: bool = True, +): + query_response = index.query( + queries=[ + (vector), + ], + top_k=top_k, + include_values=include_values, + include_metadata=include_metada, + ) + return query_response + def query2ids(query_response): - trakt_ids = [] - for match in query_response["results"][0]["matches"]: - trakt_ids.append(int(match["id"])) - return trakt_ids + trakt_ids = [] + for match in query_response["results"][0]["matches"]: + trakt_ids.append(int(match["id"])) + return trakt_ids + def get_deets_by_trakt_id(df, trakt_id: int): - df = df[df["trakt_id"]==trakt_id] - return { - "title": df.title.values[0], - "overview": df.overview.values[0], - "runtime": int(df.runtime.values[0]), - "year": int(df.year.values[0]), - "trakt_id": trakt_id, - "tagline": df.tagline.values[0] - } + df = df[df["trakt_id"] == trakt_id] + return { + "title": df.title.values[0], + "overview": df.overview.values[0], + "runtime": int(df.runtime.values[0]), + "year": int(df.year.values[0]), + "trakt_id": trakt_id, + "tagline": df.tagline.values[0], + } + @app.route("/similar") def get_similar_titles(): @@ -99,10 +110,10 @@ def get_similar_titles(): except TypeError: maxRuntime = 220 vector = get_vector_value(trakt_id) - movie_queries = query_vectors(vector, top_k = 69) + movie_queries = query_vectors(vector, top_k=69) movie_ids = query2ids(movie_queries) results = [] - #for trakt_id in movie_ids: + # for trakt_id in movie_ids: # deets = get_deets_by_trakt_id(df, trakt_id) # results.append(deets) max_res = 30 @@ -111,12 +122,15 @@ def get_similar_titles(): if cur_res >= max_res: break deets = get_deets_by_trakt_id(df, trakt_id) - if ((deets["year"]>=min_year) and (deets["year"]<=max_year)) and ((deets["runtime"]>=minRuntime) and (deets["runtime"]<=maxRuntime)): + if ((deets["year"] >= min_year) and (deets["year"] <= max_year)) and ( + (deets["runtime"] >= minRuntime) and (deets["runtime"] <= maxRuntime) + ): results.append(deets) cur_res += 1 - return render_template("show_results.html",deets=results) + return render_template("show_results.html", deets=results) + -@app.route("/",methods=("GET","POST")) +@app.route("/", methods=("GET", "POST")) def find_similar_title(): if request.method == "GET": return render_template("index.html") @@ -125,7 +139,9 @@ def find_similar_title(): code, values = title2trakt_id(to_search_title) print(f"Code {code} for {to_search_title}") if code == 0: - search_results = process.extract(to_search_title, movie_titles, scorer=fuzz.token_sort_ratio) + search_results = process.extract( + to_search_title, movie_titles, scorer=fuzz.token_sort_ratio + ) to_search_titles = [] to_search_ids = [] results = [] @@ -143,7 +159,7 @@ def find_similar_title(): deets = get_deets_by_trakt_id(df, int(trakt_id)) deets["trakt_id"] = trakt_id results.append(deets) - return render_template("same_titles.html",deets=results) + return render_template("same_titles.html", deets=results) elif code == 1: vector = get_vector_value(values) @@ -153,11 +169,11 @@ def find_similar_title(): for trakt_id in movie_ids: deets = get_deets_by_trakt_id(df, trakt_id) results.append(deets) - return render_template("show_results.html",deets=results) + return render_template("show_results.html", deets=results) else: results = [] for trakt_id in values: deets = get_deets_by_trakt_id(df, int(trakt_id)) deets["trakt_id"] = trakt_id results.append(deets) - return render_template("same_titles.html",deets=results) + return render_template("same_titles.html", deets=results) |