aboutsummaryrefslogtreecommitdiff
path: root/db2pc.py
diff options
context:
space:
mode:
authorNavan Chauhan <gitstuff@navan.email>2022-05-21 10:21:57 -0600
committerNavan Chauhan <gitstuff@navan.email>2022-05-21 10:21:57 -0600
commit078b992f1b65ef05d621ca7155822ab3b2bfdb1a (patch)
treea51ac9b8d8d8c146f10057aeae10ceee82a42f76 /db2pc.py
parent50be0f106712373c21ab0866b73f83538a23595f (diff)
parent5b3ccc0747f9c0c1c5a2af719a5ce6387952cc8c (diff)
Merge branch 'master' of https://github.com/navanchauhan/FlixRec
Diffstat (limited to 'db2pc.py')
-rw-r--r--db2pc.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/db2pc.py b/db2pc.py
new file mode 100644
index 0000000..f0d6ba9
--- /dev/null
+++ b/db2pc.py
@@ -0,0 +1,16 @@
+from database import *
+import pandas as pd
+
+from sentence_transformers import SentenceTransformer
+
+database_url = "sqlite:///jlm.db"
+
+engine, Session = init_db_stuff(database_url)
+
+model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
+
+df = pd.read_sql("Select * from movies", engine)
+df["combined_text"] = df["title"] + ": " + df["overview"].fillna('') + " - " + df["tagline"].fillna('') + " Genres:- " + df["genres"].fillna('')
+
+print(len(df["combined_text"].tolist()))
+