summaryrefslogtreecommitdiff
path: root/lib/plugins/link/scraper.ex
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/plugins/link/scraper.ex39
1 files changed, 30 insertions, 9 deletions
diff --git a/lib/plugins/link/scraper.ex b/lib/plugins/link/scraper.ex
index f5487e3..c30ae5f 100644
--- a/lib/plugins/link/scraper.ex
+++ b/lib/plugins/link/scraper.ex
@@ -1,5 +1,4 @@
defmodule Nola.Plugins.Link.Scraper do
-
defmodule UseScraper do
require Logger
@@ -7,28 +6,50 @@ defmodule Nola.Plugins.Link.Scraper do
base_url = Keyword.get(config, :base_url, "https://api.usescraper.com")
api_key = Keyword.get(config, :api_key, "unset api key")
options = Keyword.get(config, :http_options, [])
- headers = [{"user-agent", "nola, href@random.sh"},
- {"content-type", "application/json"},
- {"authorization", "Bearer " <> api_key}]
+
+ headers = [
+ {"user-agent", "nola, href@random.sh"},
+ {"content-type", "application/json"},
+ {"authorization", "Bearer " <> api_key}
+ ]
+
Logger.debug("scraper: use_scraper: get: #{url}")
+
with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}),
- {:ok, %HTTPoison.Response{status_code: 200, body: body}} <- HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options),
- {:ok, %{"status" => "scraped", "html" => body, "meta" => meta = %{"fetchedUrlStatusCode" => 200}}} <- Poison.decode(body) do
+ {:ok, %HTTPoison.Response{status_code: 200, body: body}} <-
+ HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options),
+ {:ok,
+ %{
+ "status" => "scraped",
+ "html" => body,
+ "meta" => meta = %{"fetchedUrlStatusCode" => 200}
+ }} <- Poison.decode(body) do
{:ok, body, meta}
else
- {:ok, %{"status" => "scraped", "text" => body, "meta" => meta = %{"fetchedUrlStatusCode" => code}}} ->
+ {:ok,
+ %{
+ "status" => "scraped",
+ "text" => body,
+ "meta" => meta = %{"fetchedUrlStatusCode" => code}
+ }} ->
Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}")
status = Plug.Conn.Status.reason_atom(code)
{:error, status}
+
{:ok, %{"status" => "failed"}} ->
Logger.error("scraper: use_scraper: scraper service failed for #{url}")
{:error, :scrape_failed}
+
{:ok, %HTTPoison.Response{status_code: code, body: body}} ->
Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}")
status = Plug.Conn.Status.reason_atom(code)
{:error, status}
+
{:error, %HTTPoison.Error{reason: reason}} ->
- Logger.error("scraper: use_scraper: scraper service failed (http #{inspect reason}) for #{url}")
+ Logger.error(
+ "scraper: use_scraper: scraper service failed (http #{inspect(reason)}) for #{url}"
+ )
+
{:error, reason}
end
end
@@ -36,10 +57,10 @@ defmodule Nola.Plugins.Link.Scraper do
def get(url) do
config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || []
+
case config[:service] do
"usescraper" -> UseScraper.get(url, config[:config] || [])
_ -> {:error, :scraping_disabled}
end
end
-
end