From b1b14f0293e1a3a02404af9c8e7a01d59355af1a Mon Sep 17 00:00:00 2001 From: MeexReay Date: Sun, 1 Sep 2024 17:02:12 +0300 Subject: [PATCH] first commit --- avito_api.py | 195 ++++++++++++++++++++++++++++++++++++++++++++++++ cookies.json | 1 + find_by_hash.py | 11 +++ main.py | 44 +++++++++++ top_best.py | 42 +++++++++++ top_best_ai.py | 32 ++++++++ 6 files changed, 325 insertions(+) create mode 100644 avito_api.py create mode 100644 cookies.json create mode 100644 find_by_hash.py create mode 100644 main.py create mode 100644 top_best.py create mode 100644 top_best_ai.py diff --git a/avito_api.py b/avito_api.py new file mode 100644 index 0000000..810847d --- /dev/null +++ b/avito_api.py @@ -0,0 +1,195 @@ +from selenium.webdriver import Firefox +from selenium.webdriver.common.by import By +from selenium.webdriver.firefox.options import Options +from selenium.common.exceptions import NoSuchElementException +from hashlib import sha256 + +def create_driver( + headless = True, + clear_cookies = False +): + options = Options() + if headless: + options.headless = True + options.add_argument("--headless") + if clear_cookies: + options.set_preference("general.useragent.override", "Mozilla/5.0 (Windows; U; Windows NT 10.4;; en-US) AppleWebKit/603.21 (KHTML, like Gecko) Chrome/49.0.3713.352 Safari/603.2 Edge/10.30739") + options.set_preference("browser.cache.disk.enable", False) + options.set_preference("browser.cache.memory.enable", False) + options.set_preference("browser.cache.offline.enable", False) + options.set_preference("network.http.use-cache", False) + driver = Firefox(options=options) + if clear_cookies: + driver.delete_all_cookies() + driver.get('about:blank') + return driver + +def close_driver(driver: Firefox): + driver.close() + driver.quit() + +def avito_search( + driver: Firefox, + query: str, + page: int, + url = "https://www.avito.ru/all/noutbuki?cd=1&p={1}&q={0}", + cookie = {} +): + driver.get(url.format(query, page)) + for a in [{"name": i[0], "value": i[1]} for i in cookie.items()]: + driver.add_cookie(a) + driver.get(url.format(query, page)) + + objects = [] + for element in driver.find_elements(By.CSS_SELECTOR, "div[data-marker=\"item\"]"): + try: + url = element.find_element(By.CSS_SELECTOR, "a[itemprop=\"url\"]").get_attribute("href") + name = element.find_element(By.CSS_SELECTOR, "h3[itemprop=\"name\"]").text + price, small_description, *_ = map(lambda a: a.text, element.find_elements(By.TAG_NAME, "p")) + price = int("".join(list(filter(lambda a: a in "0123456789", price)))) + path = url.removeprefix("https://www.avito.ru/").split("?")[0] + + objects.append({ + "url": url, + "path": path, + "name": name, + "price": price, + "small_description": small_description, + "hash": sha256(path.encode('utf-8')).hexdigest() + }) + except: + pass + + return objects + +def avito_get_info( + driver: Firefox, + path: str, + url = "https://www.avito.ru/{0}", + times = 0 +): + if times == 10: return {} + + params = {} + description = "N/A" + + try: + driver.get(url.format(path)) + + params_div = driver.find_element(By.CSS_SELECTOR, "div[data-marker=\"item-view/item-params\"]") + + for param in params_div.find_elements(By.TAG_NAME, "p"): + param_text = param.text + if ": " in param_text: + k, v = param_text.split(": ") + params[k] = v + + description = driver.find_element(By.CSS_SELECTOR, "div[itemprop=\"description\"]").text + except NoSuchElementException: pass + except Exception: + return avito_get_info(driver, path, url, times=times+1) + + return {"params": params, "description": description} + +def get_benchmark_score( + driver: Firefox, + cpu: str, + gpu: str, + cpu_url = "https://browser.geekbench.com/search?q={0}", + gpu_url = "https://browser.geekbench.com/search?k=v6_compute&q={0}" +): + cpu_score = None + gpu_score = None + + if cpu != None: + driver.get(cpu_url.format(cpu.split(",")[0])) + + cpu_score = 0 + score_size = 0 + for column in driver.find_elements(By.CLASS_NAME, "list-col-inner"): + for score in column.find_elements(By.CLASS_NAME, "list-col-text-score"): + cpu_score += int(score.text) + score_size += 1 + if cpu_score != 0 and score_size != 0: + cpu_score = int(cpu_score / score_size) + else: + cpu_score = None + + if gpu != None: + driver.get(gpu_url.format(gpu)) + + gpu_score = 0 + score_size = 0 + for column in driver.find_elements(By.CLASS_NAME, "list-col-inner"): + for score in column.find_elements(By.CLASS_NAME, "list-col-text-score"): + gpu_score += int(score.text) + score_size += 1 + if gpu_score != 0 and score_size != 0: + gpu_score = int(gpu_score / score_size) + else: + gpu_score = None + + if gpu_score == None: + gpu_score = cpu_score + + return {"cpu": cpu_score, "gpu": gpu_score} + +def pack_thinkpad( + driver: Firefox, + item: dict[str, object], + info: dict[str, str], + minimal = True +): + g = lambda x,y,m=lambda a:a: m(x[y]) if y in x else None + + def disk_size(s): + t = "" + for c in s: + if c in "0123456789": + t += c + else: + break + return int(t) + + def disk_type(s): + t = "" + for c in s: + if c in "SDH": + t += c + else: + break + return t + + data = { + "url": item["url"], + "hash": item["hash"], + "price": item["price"], + "name": item["name"], + "description": info["description"], + "params": info["params"], + "state": g(info["params"],"Состояние"), + "manufacturer": g(info["params"],"Производитель"), + "screen_diagonal": g(info["params"],"Диагональ, дюйм"), + "screen_size": g(info["params"],"Разрешение экрана"), + "cpu": g(info["params"],"Процессор"), + "cpu_cores": g(info["params"],"Количество ядер процессора",int), + "ram": g(info["params"],"Оперативная память, ГБ",int), + "disk_type": g(info["params"],"Конфигурация накопителей",disk_type), + "disk_size": g(info["params"],"Объем накопителей, ГБ",disk_size), + "gpu": g(info["params"],"Видеокарта"), + "os": g(info["params"],"Операционная система"), + } + + data["benchmarks"] = get_benchmark_score(driver, data["cpu"], data["gpu"]) + + if minimal: + del data["description"] + del data["name"] + del data["params"] + del data["os"] + del data["cpu_cores"] + del data["screen_diagonal"] + del data["state"] + del data["manufacturer"] + + return data \ No newline at end of file diff --git a/cookies.json b/cookies.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/cookies.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/find_by_hash.py b/find_by_hash.py new file mode 100644 index 0000000..7d81c6a --- /dev/null +++ b/find_by_hash.py @@ -0,0 +1,11 @@ +import json + +hash = input("hash > ") + +for item in open("thinkpads.txt", "r").readlines(): + item = json.loads(item) + + if item["hash"] == hash: + print(json.dumps(item, indent=2)) + + break \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..9c90bac --- /dev/null +++ b/main.py @@ -0,0 +1,44 @@ +from avito_api import * +import threading +import json +import time + +URL = "https://www.avito.ru/all/noutbuki?cd=1&f=ASgCAgECA0XGmgwUeyJmcm9tIjo0MDAwLCJ0byI6MH2coRQUeyJmcm9tIjo0LCJ0byI6bnVsbH2eoRQWeyJmcm9tIjoyMDAsInRvIjpudWxsfQ&q={0}&s=1&p={1}" +COOKIE = json.load(open("cookies.json", "r")) +THREADS = 10 +PAGES_PER_THREAD = 10 +HEADLESS = True + +def on_pack(pack): + for i in open("thinkpads.txt", mode="r").readlines(): + if json.loads(i)["hash"] == pack["hash"]: + return + open("thinkpads.txt", mode="a").write(json.dumps(pack,ensure_ascii=False).replace("\n","\\n")+"\n") + +def parse_page(page): + driver = create_driver(headless = HEADLESS) + + for p in range(1, PAGES_PER_THREAD + 1): + p = page * PAGES_PER_THREAD + p + + print(p) + + for item in avito_search(driver, "thinkpad", p, url=URL, cookie=COOKIE): + info = avito_get_info(driver, item["path"]) + pack = pack_thinkpad(driver, item, info) + + print(pack) + + on_pack(pack) + + close_driver(driver) + +threads = [] + +for page in range(THREADS): + thread = threading.Thread(target=parse_page, args=(page,)) + thread.start() + threads.append(thread) + +for thread in threads: + thread.join() \ No newline at end of file diff --git a/top_best.py b/top_best.py new file mode 100644 index 0000000..cd19271 --- /dev/null +++ b/top_best.py @@ -0,0 +1,42 @@ +import json +import webbrowser + +items = [json.loads(i) for i in open("thinkpads.txt", "r").readlines()] + +avg_ram = 0 +avg_gpu = 0 +avg_cpu = 0 +avg_disk = 0 +min_price = -1 +max_price = -1 + +for item in items: + if item["ram"] != None: avg_ram = (avg_ram + item["ram"]) / 2.0 if avg_ram != 0 else item["ram"] + if item["benchmarks"]["cpu"] != None: avg_cpu = (avg_cpu + item["benchmarks"]["cpu"]) / 2.0 if avg_cpu != 0 else item["benchmarks"]["cpu"] + if item["benchmarks"]["gpu"] != None: avg_gpu = (avg_gpu + item["benchmarks"]["gpu"]) / 2.0 if avg_gpu != 0 else item["benchmarks"]["gpu"] + if item["disk_size"] != None: avg_disk = (avg_disk + item["disk_size"]) / 2.0 if avg_disk != 0 else item["disk_size"] + if min_price == -1 or min_price > item["price"]: min_price = item["price"] + if max_price == -1 or max_price < item["price"]: max_price = item["price"] + +def get_score(item): + quality = int(((item["ram"] / avg_ram) + \ + (item["benchmarks"]["cpu"] / avg_cpu) + \ + (item["benchmarks"]["gpu"] / avg_gpu) + \ + (item["disk_size"] / avg_disk)) / 4 * 1000) + + return ((max_price - (item["price"] - min_price)) / min_price) * 1500 + quality + +def key_filter(item): + if item["ram"] == None: return False + if item["benchmarks"]["cpu"] == None: return False + if item["benchmarks"]["gpu"] == None: return False + if item["disk_size"] == None: return False + return True + +result = list(filter(key_filter, items)) +result = sorted(result, key = get_score, reverse=True) + +for item in result: + webbrowser.open_new_tab(item["url"]) + print(f"{item} (score: {get_score(item)})") + input() \ No newline at end of file diff --git a/top_best_ai.py b/top_best_ai.py new file mode 100644 index 0000000..16b1e58 --- /dev/null +++ b/top_best_ai.py @@ -0,0 +1,32 @@ +import ollama + +thinkpads_content = "\n".join(open("thinkpads.txt", "r").readlines()[:10]) +prompt = """ +Подбери самый дешевый ноутбук подходящий по предпочтениям из данных +Предпочтения: + Хороший аккумулятор + Среднее кол-во памяти (6+ гб) + Тип диска - SSD + Вместительность диска - минимум 250 гб + Хороший процессор + Видеокарта не важна + Цена: минимально возможная + Клавиатура и тачпад: в нормальном состоянии + USB порты: минимум 2 + Тип системы: не важен + Монитор: без требования в ремонте + Зарядное устройство: имеется +Данные: +"""+thinkpads_content + +ollama.pull("llama3.1") + +stream = ollama.generate( + model='llama3.1', + prompt=prompt, + stream=True, +) + +for chunk in stream: + print(chunk['response'], end='', flush=True) +print() \ No newline at end of file