summaryrefslogtreecommitdiff
path: root/lib/plugins/link/scraper.ex
blob: c30ae5f7b21a3e5a57d0e1ca6ac011e249b26fe4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
defmodule Nola.Plugins.Link.Scraper do
  defmodule UseScraper do
    require Logger

    def get(url, config) do
      base_url = Keyword.get(config, :base_url, "https://api.usescraper.com")
      api_key = Keyword.get(config, :api_key, "unset api key")
      options = Keyword.get(config, :http_options, [])

      headers = [
        {"user-agent", "nola, href@random.sh"},
        {"content-type", "application/json"},
        {"authorization", "Bearer " <> api_key}
      ]

      Logger.debug("scraper: use_scraper: get: #{url}")

      with {:ok, json} <- Poison.encode(%{"url" => url, "format" => "html"}),
           {:ok, %HTTPoison.Response{status_code: 200, body: body}} <-
             HTTPoison.post("#{base_url}/scraper/scrape", json, headers, options),
           {:ok,
            %{
              "status" => "scraped",
              "html" => body,
              "meta" => meta = %{"fetchedUrlStatusCode" => 200}
            }} <- Poison.decode(body) do
        {:ok, body, meta}
      else
        {:ok,
         %{
           "status" => "scraped",
           "text" => body,
           "meta" => meta = %{"fetchedUrlStatusCode" => code}
         }} ->
          Logger.error("scraper: use_scraper: scraper got http #{code} for #{url}")
          status = Plug.Conn.Status.reason_atom(code)
          {:error, status}

        {:ok, %{"status" => "failed"}} ->
          Logger.error("scraper: use_scraper: scraper service failed for #{url}")
          {:error, :scrape_failed}

        {:ok, %HTTPoison.Response{status_code: code, body: body}} ->
          Logger.error("scraper: use_scraper: scraper service failed (http #{code}) for #{url}")
          status = Plug.Conn.Status.reason_atom(code)
          {:error, status}

        {:error, %HTTPoison.Error{reason: reason}} ->
          Logger.error(
            "scraper: use_scraper: scraper service failed (http #{inspect(reason)}) for #{url}"
          )

          {:error, reason}
      end
    end
  end

  def get(url) do
    config = Keyword.get(Application.get_env(:nola, Nola.Plugins.Link, []), :scraper) || []

    case config[:service] do
      "usescraper" -> UseScraper.get(url, config[:config] || [])
      _ -> {:error, :scraping_disabled}
    end
  end
end