defmodule Nola.Plugins.Link.Scraper do defmodule UseScraper do require Logger def get(url, config) do base_url = Keyword.get(config, :base_url, "https://api.usescraper.com") api_key = Keyword.get(config, :api_key, "unset api key") options = Keyword.get(config, :http_options, []) headers = [{"user-agent", "nola, href@random.sh"}, {"content-type", "application/json"}, {"authorization", "Bearer " <> api_key}] Logger.debug("scraper: use_scraper: get: #{url}") with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}), {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options), {:ok, %{"status" => "scraped", "html" => body, "meta" => meta = %{"fetchedUrlStatusCode" => 200}}} <- Poison.decode(body) do {:ok, body, meta} else {:ok, %{"status" => "scraped", "text" => body, "meta" => meta = %{"fetchedUrlStatusCode" => code}}} -> Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}") status = Plug.Conn.Status.reason_atom(code) {:error, status} {:ok, %{"status" => "failed"}} -> Logger.error("scraper: use_scraper: scraper service failed for #{url}") {:error, :scrape_failed} {:ok, %HTTPoison.Response{status_code: code, body: body}} -> Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}") status = Plug.Conn.Status.reason_atom(code) {:error, status} {:error, %HTTPoison.Error{reason: reason}} -> Logger.error("scraper: use_scraper: scraper service failed (http #{inspect reason}) for #{url}") {:error, reason} end end end def get(url) do config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || [] case config[:service] do "usescraper" -> UseScraper.get(url, config[:config] || []) _ -> {:error, :scraping_disabled} end end end