diff options
author | Navan Chauhan <gitstuff@navan.email> | 2022-05-21 10:21:57 -0600 |
---|---|---|
committer | Navan Chauhan <gitstuff@navan.email> | 2022-05-21 10:21:57 -0600 |
commit | 078b992f1b65ef05d621ca7155822ab3b2bfdb1a (patch) | |
tree | a51ac9b8d8d8c146f10057aeae10ceee82a42f76 | |
parent | 50be0f106712373c21ab0866b73f83538a23595f (diff) | |
parent | 5b3ccc0747f9c0c1c5a2af719a5ce6387952cc8c (diff) |
Merge branch 'master' of https://github.com/navanchauhan/FlixRec
-rw-r--r-- | db2pc.py | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/db2pc.py b/db2pc.py new file mode 100644 index 0000000..f0d6ba9 --- /dev/null +++ b/db2pc.py @@ -0,0 +1,16 @@ +from database import * +import pandas as pd + +from sentence_transformers import SentenceTransformer + +database_url = "sqlite:///jlm.db" + +engine, Session = init_db_stuff(database_url) + +model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2") + +df = pd.read_sql("Select * from movies", engine) +df["combined_text"] = df["title"] + ": " + df["overview"].fillna('') + " - " + df["tagline"].fillna('') + " Genres:- " + df["genres"].fillna('') + +print(len(df["combined_text"].tolist())) + |