diff options
Diffstat (limited to 'lib/plugins/link.ex')
-rw-r--r-- | lib/plugins/link.ex | 50 |
1 files changed, 41 insertions, 9 deletions
diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex index 4c4261f..84eb976 100644 --- a/lib/plugins/link.ex +++ b/lib/plugins/link.ex @@ -37,6 +37,8 @@ defmodule Nola.Plugins.Link do def short_irc_doc, do: false def irc_doc, do: @ircdoc require Logger + alias __MODULE__.Store + alias __MODULE__.Scraper def start_link() do GenServer.start_link(__MODULE__, [], name: __MODULE__) @@ -52,6 +54,7 @@ defmodule Nola.Plugins.Link do defstruct [:client] def init([]) do + Store.setup() {:ok, _} = Registry.register(Nola.PubSub, "messages", [plugin: __MODULE__]) #{:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__]) Logger.info("Link handler started") @@ -71,16 +74,19 @@ defmodule Nola.Plugins.Link do text = case uris do [uri] -> text [luri | _] -> - if luri.host == uri.host && luri.path == luri.path do + if luri.host == uri.host && luri.path == uri.path do text else ["-> #{URI.to_string(luri)}", text] end end - if is_list(text) do - for line <- text, do: message.replyfun.(line) - else - message.replyfun.(text) + case text do + lines when is_list(lines) -> + for text <- lines, do: message.replyfun.(text) + text when is_binary(text) -> + message.replyfun.(text) + nil -> + nil end _ -> nil end @@ -239,6 +245,7 @@ defmodule Nola.Plugins.Link do Logger.debug("link: expanding #{uri} with default") headers = [{"user-agent", "DmzBot (like TwitterBot)"}] options = [follow_redirect: false, max_body_length: 30_000_000] + url = URI.to_string(uri) case get(URI.to_string(uri), headers, options) do {:ok, text} -> {:ok, acc, text} @@ -247,12 +254,15 @@ defmodule Nola.Plugins.Link do #new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} expand_link([new_uri | acc]) {:error, status, _headers} -> - text = Plug.Conn.Status.reason_phrase(status) - {:ok, acc, "Error: HTTP #{text} (#{status})"} + #text = Plug.Conn.Status.reason_phrase(status) + #{:ok, acc, "Error: HTTP #{text} (#{status})"} + retry_expand_with_scraper(acc, url) {:error, {:tls_alert, {:handshake_failure, err}}} -> - {:ok, acc, "TLS Error: #{to_string(err)}"} + {:ok, acc, nil} # "TLS Error: #{to_string(err)}"} + {:error, :timeout} -> + retry_expand_with_scraper(acc, url) {:error, reason} -> - {:ok, acc, "Error: #{to_string(reason)}"} + {:ok, acc, nil} #"Error: #{to_string(reason)}"} end end @@ -261,6 +271,27 @@ defmodule Nola.Plugins.Link do {:ok, [uri], "-> #{URI.to_string(uri)}"} end + # Last resort: scrape the page + # We'll be mostly calling this when 403 or 500 or timeout because site blocks us. + # An external service will scrape the page for us and return the body. + # We'll call directly the HTML handler on the result. + defp retry_expand_with_scraper(acc, url) do + Logger.info("Attempting scraper") + handlers = Keyword.get(Application.get_env(:nola, __MODULE__), :handlers) + Logger.info("Attempting scraper #{inspect handlers}") + with true <- Keyword.has_key?(handlers, :"Nola.Plugins.Link.HTML"), + {:ok, body, _meta} <- Scraper.get(url), + {:ok, text} <- __MODULE__.HTML.post_expand(url, body, nil, nil) + do + {:ok, acc, text} + else + error -> + Logger.debug("Attempt with scraper failed: #{inspect error}") + # We give up here. We don't return anything (the acc from caller `expand default` + # does not matter anymore) and I see returning error messages as useless. + {:ok, acc, nil} + end + end defp human_size(bytes) do bytes @@ -268,4 +299,5 @@ defmodule Nola.Plugins.Link do |> FileSize.scale() |> FileSize.format() end + end |