aboutsummaryrefslogtreecommitdiff
path: root/autoAidModules/search_funcs.py
blob: 241872e1039fa5dbafd6a36a17da12bc59c9ba23 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from serpapi import GoogleSearch
from .sample_res import res
from boilerpy3 import extractors
from fake_useragent import UserAgent

import requests

extractor = extractors.ArticleExtractor()

preferred_forums = {
    "BMW": ["bimmerforums.com"]
}

ua = UserAgent()

"""
Website data: 
[

{
    "title":"",
    "link": "",
    "date": "", # prioritise older posts for older cars?,
    "full-text": "",
},

]
"""

def find_preferred_forums(make):
    if make not in preferred_forums:
        return None
    return preferred_forums[make]

def get_preferred_forums(make):
    if make not in preferred_forums:
        return find_preferred_forums(make)
    return preferred_forums[make]

def parse_page(url):
    content = extractor.get_content_from_url(url)
    return content


def search_on_forum(forum, query, max_results: int = 5):
    params = {
        "q": query + f" {forum}",
        "location": "Austin, Texas, United States",
        "hl": "en",
        "gl": "us",
        "google_domain": "google.com",
        "api_key": "KEY"
    }
    #search = GoogleSearch(params)
    #results = search.get_dict()

    results = res
    if results["search_metadata"]['status'] == "Success":
        data = []
        for idx, result in enumerate(results["organic_results"]):
            if idx >= max_results:
                break
            new_dict = {
                "title": result["title"],
                "link": result["link"],
                "full-text": ""
            }
            try:
                resp = requests.get(result["link"], headers={"User-Agent": ua.random})
                new_dict["full-text"] = extractor.get_content(resp.text)
            except Exception as e:
                print(f"Error parsing page {result['link']}: {e}")
            data.append(new_dict)
        return data
    else:
        return []