diff options
Diffstat (limited to 'lib/plugins/link/scraper.ex')
-rw-r--r-- | lib/plugins/link/scraper.ex | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/lib/plugins/link/scraper.ex b/lib/plugins/link/scraper.ex new file mode 100644 index 0000000..f5487e3 --- /dev/null +++ b/lib/plugins/link/scraper.ex @@ -0,0 +1,45 @@ +defmodule Nola.Plugins.Link.Scraper do + + defmodule UseScraper do + require Logger + + def get(url, config) do + base_url = Keyword.get(config, :base_url, "https://api.usescraper.com") + api_key = Keyword.get(config, :api_key, "unset api key") + options = Keyword.get(config, :http_options, []) + headers = [{"user-agent", "nola, href@random.sh"}, + {"content-type", "application/json"}, + {"authorization", "Bearer " <> api_key}] + Logger.debug("scraper: use_scraper: get: #{url}") + with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}), + {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options), + {:ok, %{"status" => "scraped", "html" => body, "meta" => meta = %{"fetchedUrlStatusCode" => 200}}} <- Poison.decode(body) do + {:ok, body, meta} + else + {:ok, %{"status" => "scraped", "text" => body, "meta" => meta = %{"fetchedUrlStatusCode" => code}}} -> + Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}") + status = Plug.Conn.Status.reason_atom(code) + {:error, status} + {:ok, %{"status" => "failed"}} -> + Logger.error("scraper: use_scraper: scraper service failed for #{url}") + {:error, :scrape_failed} + {:ok, %HTTPoison.Response{status_code: code, body: body}} -> + Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}") + status = Plug.Conn.Status.reason_atom(code) + {:error, status} + {:error, %HTTPoison.Error{reason: reason}} -> + Logger.error("scraper: use_scraper: scraper service failed (http #{inspect reason}) for #{url}") + {:error, reason} + end + end + end + + def get(url) do + config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || [] + case config[:service] do + "usescraper" -> UseScraper.get(url, config[:config] || []) + _ -> {:error, :scraping_disabled} + end + end + +end |