summaryrefslogtreecommitdiff
path: root/misc/py-aider-chat/files/patch-aider_scrape.py
diff options
context:
space:
mode:
authorDave Cottlehuber <dch@FreeBSD.org>2024-07-27 19:20:20 +0000
committerDave Cottlehuber <dch@FreeBSD.org>2024-07-27 19:26:02 +0000
commit800bb7c50cfe116a92e6668875bec471cd4cb4dd (patch)
tree3359a5d2d8d2fe85a9d8deafa15eece213f20e2a /misc/py-aider-chat/files/patch-aider_scrape.py
parentsecurity/aws-iam-authenticator: Update to 0.6.22 (diff)
misc/py-aider-chat: update to 0.45.0
Reported by: portscout Sponsored by: SkunkWerks, GmbH
Diffstat (limited to 'misc/py-aider-chat/files/patch-aider_scrape.py')
-rw-r--r--misc/py-aider-chat/files/patch-aider_scrape.py120
1 files changed, 56 insertions, 64 deletions
diff --git a/misc/py-aider-chat/files/patch-aider_scrape.py b/misc/py-aider-chat/files/patch-aider_scrape.py
index 6ba15a5ce575..f590cbbf2c49 100644
--- a/misc/py-aider-chat/files/patch-aider_scrape.py
+++ b/misc/py-aider-chat/files/patch-aider_scrape.py
@@ -1,80 +1,72 @@
---- aider/scrape.py.orig 2024-07-04 14:23:32 UTC
+--- aider/scrape.py.orig 2024-07-27 19:09:04 UTC
+++ aider/scrape.py
-@@ -3,9 +3,7 @@ import sys
- import re
- import sys
+@@ -15,56 +15,9 @@ def install_playwright(io):
--import playwright
- import pypandoc
--from playwright.sync_api import sync_playwright
- from aider import __version__, urls
- from aider.dump import dump # noqa: F401
-@@ -42,14 +40,10 @@ class Scraper:
- """
- Scrape a url and turn it into readable markdown.
+ def install_playwright(io):
+- try:
+- from playwright.sync_api import sync_playwright
++ return
-- `url` - the URLto scrape.
-+ `url` - the URL to scrape.
- """
-- self.try_playwright()
+- has_pip = True
+- except ImportError:
+- has_pip = False
-- if self.playwright_available:
-- content = self.scrape_with_playwright(url)
-- else:
-- content = self.scrape_with_httpx(url)
-+ content = self.scrape_with_httpx(url)
-
- if not content:
- return
-@@ -62,50 +56,8 @@ class Scraper:
-
- # Internals...
- def scrape_with_playwright(self, url):
+- try:
- with sync_playwright() as p:
-- try:
-- browser = p.chromium.launch()
-- except Exception as e:
-- self.playwright_available = False
-- self.print_error(e)
-- return
+- p.chromium.launch()
+- has_chromium = True
+- except Exception:
+- has_chromium = False
-
-- page = browser.new_page()
+- if has_pip and has_chromium:
+- return True
-
-- user_agent = page.evaluate("navigator.userAgent")
-- user_agent = user_agent.replace("Headless", "")
-- user_agent = user_agent.replace("headless", "")
-- user_agent += " " + aider_user_agent
+- pip_cmd = utils.get_pip_install(["aider-chat[playwright]"])
+- chromium_cmd = "playwright install --with-deps chromium".split()
-
-- page = browser.new_page(user_agent=user_agent)
-- try:
-- page.goto(url, wait_until="networkidle", timeout=5000)
-- except playwright._impl._errors.TimeoutError:
-- pass
-- content = page.content()
-- browser.close()
+- cmds = ""
+- if not has_pip:
+- cmds += " ".join(pip_cmd) + "\n"
+- if not has_chromium:
+- cmds += " ".join(chromium_cmd) + "\n"
-
-- return content
+- text = f"""For the best web scraping, install Playwright:
-
-- def try_playwright(self):
-- if self.playwright_available is not None:
-- return
+-{cmds}
+-See {urls.enable_playwright} for more info.
+-"""
-
-- try:
-- with sync_playwright() as p:
-- p.chromium.launch()
-- self.playwright_available = True
-- except Exception:
- self.playwright_available = False
+- io.tool_error(text)
+- if not io.confirm_ask("Install playwright?", default="y"):
+- return
-
-- def get_playwright_instructions(self):
-- if self.playwright_available in (True, None):
- return
-- if self.playwright_instructions_shown:
+- if not has_pip:
+- success, output = utils.run_install(pip_cmd)
+- if not success:
+- io.tool_error(output)
- return
-
-- self.playwright_instructions_shown = True
-- return PLAYWRIGHT_INFO
+- success, output = utils.run_install(chromium_cmd)
+- if not success:
+- io.tool_error(output)
+- return
+-
+- return True
+-
+-
+ class Scraper:
+ pandoc_available = None
+ playwright_available = None
+@@ -89,10 +42,7 @@ class Scraper:
+ `url` - the URLto scrape.
+ """
+
+- if self.playwright_available:
+- content = self.scrape_with_playwright(url)
+- else:
+- content = self.scrape_with_httpx(url)
++ content = self.scrape_with_httpx(url)
- def scrape_with_httpx(self, url):
- import httpx
+ if not content:
+ return