summaryrefslogtreecommitdiff
path: root/lib/plugins/link/scraper.ex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/plugins/link/scraper.ex')
-rw-r--r--lib/plugins/link/scraper.ex45
1 files changed, 45 insertions, 0 deletions
diff --git a/lib/plugins/link/scraper.ex b/lib/plugins/link/scraper.ex
new file mode 100644
index 0000000..f5487e3
--- /dev/null
+++ b/lib/plugins/link/scraper.ex
@@ -0,0 +1,45 @@
+defmodule Nola.Plugins.Link.Scraper do
+
+ defmodule UseScraper do
+ require Logger
+
+ def get(url, config) do
+ base_url = Keyword.get(config, :base_url, "https://api.usescraper.com")
+ api_key = Keyword.get(config, :api_key, "unset api key")
+ options = Keyword.get(config, :http_options, [])
+ headers = [{"user-agent", "nola, href@random.sh"},
+ {"content-type", "application/json"},
+ {"authorization", "Bearer " <> api_key}]
+ Logger.debug("scraper: use_scraper: get: #{url}")
+ with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}),
+ {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options),
+ {:ok, %{"status" => "scraped", "html" => body, "meta" => meta = %{"fetchedUrlStatusCode" => 200}}} <- Poison.decode(body) do
+ {:ok, body, meta}
+ else
+ {:ok, %{"status" => "scraped", "text" => body, "meta" => meta = %{"fetchedUrlStatusCode" => code}}} ->
+ Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}")
+ status = Plug.Conn.Status.reason_atom(code)
+ {:error, status}
+ {:ok, %{"status" => "failed"}} ->
+ Logger.error("scraper: use_scraper: scraper service failed for #{url}")
+ {:error, :scrape_failed}
+ {:ok, %HTTPoison.Response{status_code: code, body: body}} ->
+ Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}")
+ status = Plug.Conn.Status.reason_atom(code)
+ {:error, status}
+ {:error, %HTTPoison.Error{reason: reason}} ->
+ Logger.error("scraper: use_scraper: scraper service failed (http #{inspect reason}) for #{url}")
+ {:error, reason}
+ end
+ end
+ end
+
+ def get(url) do
+ config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || []
+ case config[:service] do
+ "usescraper" -> UseScraper.get(url, config[:config] || [])
+ _ -> {:error, :scraping_disabled}
+ end
+ end
+
+end