summaryrefslogtreecommitdiff
path: root/lib/plugins/link.ex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/plugins/link.ex')
-rw-r--r--lib/plugins/link.ex50
1 files changed, 41 insertions, 9 deletions
diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex
index 4c4261f..84eb976 100644
--- a/lib/plugins/link.ex
+++ b/lib/plugins/link.ex
@@ -37,6 +37,8 @@ defmodule Nola.Plugins.Link do
def short_irc_doc, do: false
def irc_doc, do: @ircdoc
require Logger
+ alias __MODULE__.Store
+ alias __MODULE__.Scraper
def start_link() do
GenServer.start_link(__MODULE__, [], name: __MODULE__)
@@ -52,6 +54,7 @@ defmodule Nola.Plugins.Link do
defstruct [:client]
def init([]) do
+ Store.setup()
{:ok, _} = Registry.register(Nola.PubSub, "messages", [plugin: __MODULE__])
#{:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__])
Logger.info("Link handler started")
@@ -71,16 +74,19 @@ defmodule Nola.Plugins.Link do
text = case uris do
[uri] -> text
[luri | _] ->
- if luri.host == uri.host && luri.path == luri.path do
+ if luri.host == uri.host && luri.path == uri.path do
text
else
["-> #{URI.to_string(luri)}", text]
end
end
- if is_list(text) do
- for line <- text, do: message.replyfun.(line)
- else
- message.replyfun.(text)
+ case text do
+ lines when is_list(lines) ->
+ for text <- lines, do: message.replyfun.(text)
+ text when is_binary(text) ->
+ message.replyfun.(text)
+ nil ->
+ nil
end
_ -> nil
end
@@ -239,6 +245,7 @@ defmodule Nola.Plugins.Link do
Logger.debug("link: expanding #{uri} with default")
headers = [{"user-agent", "DmzBot (like TwitterBot)"}]
options = [follow_redirect: false, max_body_length: 30_000_000]
+ url = URI.to_string(uri)
case get(URI.to_string(uri), headers, options) do
{:ok, text} ->
{:ok, acc, text}
@@ -247,12 +254,15 @@ defmodule Nola.Plugins.Link do
#new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port}
expand_link([new_uri | acc])
{:error, status, _headers} ->
- text = Plug.Conn.Status.reason_phrase(status)
- {:ok, acc, "Error: HTTP #{text} (#{status})"}
+ #text = Plug.Conn.Status.reason_phrase(status)
+ #{:ok, acc, "Error: HTTP #{text} (#{status})"}
+ retry_expand_with_scraper(acc, url)
{:error, {:tls_alert, {:handshake_failure, err}}} ->
- {:ok, acc, "TLS Error: #{to_string(err)}"}
+ {:ok, acc, nil} # "TLS Error: #{to_string(err)}"}
+ {:error, :timeout} ->
+ retry_expand_with_scraper(acc, url)
{:error, reason} ->
- {:ok, acc, "Error: #{to_string(reason)}"}
+ {:ok, acc, nil} #"Error: #{to_string(reason)}"}
end
end
@@ -261,6 +271,27 @@ defmodule Nola.Plugins.Link do
{:ok, [uri], "-> #{URI.to_string(uri)}"}
end
+ # Last resort: scrape the page
+ # We'll be mostly calling this when 403 or 500 or timeout because site blocks us.
+ # An external service will scrape the page for us and return the body.
+ # We'll call directly the HTML handler on the result.
+ defp retry_expand_with_scraper(acc, url) do
+ Logger.info("Attempting scraper")
+ handlers = Keyword.get(Application.get_env(:nola, __MODULE__), :handlers)
+ Logger.info("Attempting scraper #{inspect handlers}")
+ with true <- Keyword.has_key?(handlers, :"Nola.Plugins.Link.HTML"),
+ {:ok, body, _meta} <- Scraper.get(url),
+ {:ok, text} <- __MODULE__.HTML.post_expand(url, body, nil, nil)
+ do
+ {:ok, acc, text}
+ else
+ error ->
+ Logger.debug("Attempt with scraper failed: #{inspect error}")
+ # We give up here. We don't return anything (the acc from caller `expand default`
+ # does not matter anymore) and I see returning error messages as useless.
+ {:ok, acc, nil}
+ end
+ end
defp human_size(bytes) do
bytes
@@ -268,4 +299,5 @@ defmodule Nola.Plugins.Link do
|> FileSize.scale()
|> FileSize.format()
end
+
end