diff options
author | Jordan Bracco <href@random.sh> | 2025-06-25 19:22:59 +0200 |
---|---|---|
committer | Jordan Bracco <href@random.sh> | 2025-06-25 19:22:59 +0200 |
commit | c934e79e5852e05f714b2d542cc2678e287c49b8 (patch) | |
tree | 55779a0168260fce03e4775eacdd613ffc945588 /lib/plugins/link/scraper.ex | |
parent | updates (diff) |
format.
Diffstat (limited to 'lib/plugins/link/scraper.ex')
-rw-r--r-- | lib/plugins/link/scraper.ex | 39 |
1 files changed, 30 insertions, 9 deletions
diff --git a/lib/plugins/link/scraper.ex b/lib/plugins/link/scraper.ex index f5487e3..c30ae5f 100644 --- a/lib/plugins/link/scraper.ex +++ b/lib/plugins/link/scraper.ex @@ -1,5 +1,4 @@ defmodule Nola.Plugins.Link.Scraper do - defmodule UseScraper do require Logger @@ -7,28 +6,50 @@ defmodule Nola.Plugins.Link.Scraper do base_url = Keyword.get(config, :base_url, "https://api.usescraper.com") api_key = Keyword.get(config, :api_key, "unset api key") options = Keyword.get(config, :http_options, []) - headers = [{"user-agent", "nola, href@random.sh"}, - {"content-type", "application/json"}, - {"authorization", "Bearer " <> api_key}] + + headers = [ + {"user-agent", "nola, href@random.sh"}, + {"content-type", "application/json"}, + {"authorization", "Bearer " <> api_key} + ] + Logger.debug("scraper: use_scraper: get: #{url}") + with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}), - {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options), - {:ok, %{"status" => "scraped", "html" => body, "meta" => meta = %{"fetchedUrlStatusCode" => 200}}} <- Poison.decode(body) do + {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- + HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options), + {:ok, + %{ + "status" => "scraped", + "html" => body, + "meta" => meta = %{"fetchedUrlStatusCode" => 200} + }} <- Poison.decode(body) do {:ok, body, meta} else - {:ok, %{"status" => "scraped", "text" => body, "meta" => meta = %{"fetchedUrlStatusCode" => code}}} -> + {:ok, + %{ + "status" => "scraped", + "text" => body, + "meta" => meta = %{"fetchedUrlStatusCode" => code} + }} -> Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}") status = Plug.Conn.Status.reason_atom(code) {:error, status} + {:ok, %{"status" => "failed"}} -> Logger.error("scraper: use_scraper: scraper service failed for #{url}") {:error, :scrape_failed} + {:ok, %HTTPoison.Response{status_code: code, body: body}} -> Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}") status = Plug.Conn.Status.reason_atom(code) {:error, status} + {:error, %HTTPoison.Error{reason: reason}} -> - Logger.error("scraper: use_scraper: scraper service failed (http #{inspect reason}) for #{url}") + Logger.error( + "scraper: use_scraper: scraper service failed (http #{inspect(reason)}) for #{url}" + ) + {:error, reason} end end @@ -36,10 +57,10 @@ defmodule Nola.Plugins.Link.Scraper do def get(url) do config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || [] + case config[:service] do "usescraper" -> UseScraper.get(url, config[:config] || []) _ -> {:error, :scraping_disabled} end end - end |