aboutsummaryrefslogtreecommitdiff
path: root/autoAidModules/search_funcs.py
diff options
context:
space:
mode:
authorNavan Chauhan <navanchauhan@gmail.com>2023-10-07 22:47:27 -0600
committerGitHub <noreply@github.com>2023-10-07 22:47:27 -0600
commitcc6fc72154f1b2d70cde89381651877526eef78a (patch)
tree3ff90cfc688eb21ab87c82be9f297b85458305be /autoAidModules/search_funcs.py
parent87d23597f9008c39e18ae679ca5aa5fbe6174ae9 (diff)
parent61a48906f26d7074f7ad64f221d3948cb84f21ea (diff)
Merge pull request #2 from navanchauhan/devel
py module
Diffstat (limited to 'autoAidModules/search_funcs.py')
-rw-r--r--autoAidModules/search_funcs.py76
1 files changed, 76 insertions, 0 deletions
diff --git a/autoAidModules/search_funcs.py b/autoAidModules/search_funcs.py
new file mode 100644
index 0000000..241872e
--- /dev/null
+++ b/autoAidModules/search_funcs.py
@@ -0,0 +1,76 @@
+from serpapi import GoogleSearch
+from .sample_res import res
+from boilerpy3 import extractors
+from fake_useragent import UserAgent
+
+import requests
+
+extractor = extractors.ArticleExtractor()
+
+preferred_forums = {
+ "BMW": ["bimmerforums.com"]
+}
+
+ua = UserAgent()
+
+"""
+Website data:
+[
+
+{
+ "title":"",
+ "link": "",
+ "date": "", # prioritise older posts for older cars?,
+ "full-text": "",
+},
+
+]
+"""
+
+def find_preferred_forums(make):
+ if make not in preferred_forums:
+ return None
+ return preferred_forums[make]
+
+def get_preferred_forums(make):
+ if make not in preferred_forums:
+ return find_preferred_forums(make)
+ return preferred_forums[make]
+
+def parse_page(url):
+ content = extractor.get_content_from_url(url)
+ return content
+
+
+def search_on_forum(forum, query, max_results: int = 5):
+ params = {
+ "q": query + f" {forum}",
+ "location": "Austin, Texas, United States",
+ "hl": "en",
+ "gl": "us",
+ "google_domain": "google.com",
+ "api_key": "KEY"
+ }
+ #search = GoogleSearch(params)
+ #results = search.get_dict()
+
+ results = res
+ if results["search_metadata"]['status'] == "Success":
+ data = []
+ for idx, result in enumerate(results["organic_results"]):
+ if idx >= max_results:
+ break
+ new_dict = {
+ "title": result["title"],
+ "link": result["link"],
+ "full-text": ""
+ }
+ try:
+ resp = requests.get(result["link"], headers={"User-Agent": ua.random})
+ new_dict["full-text"] = extractor.get_content(resp.text)
+ except Exception as e:
+ print(f"Error parsing page {result['link']}: {e}")
+ data.append(new_dict)
+ return data
+ else:
+ return [] \ No newline at end of file