summaryrefslogtreecommitdiff
path: root/lib/plugins/link.ex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/plugins/link.ex')
-rw-r--r--lib/plugins/link.ex292
1 files changed, 202 insertions, 90 deletions
diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex
index 4c4261f..0dca6ae 100644
--- a/lib/plugins/link.ex
+++ b/lib/plugins/link.ex
@@ -37,57 +37,53 @@ defmodule Nola.Plugins.Link do
def short_irc_doc, do: false
def irc_doc, do: @ircdoc
require Logger
+ alias __MODULE__.Quirks
+ alias __MODULE__.Store
+ alias __MODULE__.Scraper
def start_link() do
GenServer.start_link(__MODULE__, [], name: __MODULE__)
end
- @callback match(uri :: URI.t, options :: Keyword.t) :: {true, params :: Map.t} | false
- @callback expand(uri :: URI.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error
- @callback post_match(uri :: URI.t, content_type :: binary, headers :: [], opts :: Keyword.t) :: {:body | :file, params :: Map.t} | false
- @callback post_expand(uri :: URI.t, body :: binary() | Path.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error
+ @callback match(uri :: URI.t(), options :: Keyword.t()) :: {true, params :: Map.t()} | false
+ @callback expand(uri :: URI.t(), params :: Map.t(), options :: Keyword.t()) ::
+ {:ok, lines :: [] | String.t()} | :error
+ @callback post_match(uri :: URI.t(), content_type :: binary, headers :: [], opts :: Keyword.t()) ::
+ {:body | :file, params :: Map.t()} | false
+ @callback post_expand(
+ uri :: URI.t(),
+ body :: binary() | Path.t(),
+ params :: Map.t(),
+ options :: Keyword.t()
+ ) :: {:ok, lines :: [] | String.t()} | :error
@optional_callbacks [expand: 3, post_expand: 4]
defstruct [:client]
def init([]) do
- {:ok, _} = Registry.register(Nola.PubSub, "messages", [plugin: __MODULE__])
- #{:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__])
+ {:ok, _} = Registry.register(Nola.PubSub, "messages", plugin: __MODULE__)
+ # {:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__])
Logger.info("Link handler started")
{:ok, %__MODULE__{}}
end
def handle_info({:irc, :text, message = %{text: text}}, state) do
String.split(text)
- |> Enum.map(fn(word) ->
+ |> Enum.map(fn word ->
if String.starts_with?(word, "http://") || String.starts_with?(word, "https://") do
uri = URI.parse(word)
+
if uri.scheme && uri.host do
- spawn(fn() ->
- :timer.kill_after(:timer.seconds(30))
- case expand_link([uri]) do
- {:ok, uris, text} ->
- text = case uris do
- [uri] -> text
- [luri | _] ->
- if luri.host == uri.host && luri.path == luri.path do
- text
- else
- ["-> #{URI.to_string(luri)}", text]
- end
- end
- if is_list(text) do
- for line <- text, do: message.replyfun.(line)
- else
- message.replyfun.(text)
- end
- _ -> nil
- end
- end)
+ if Store.inhibit_link?(word, {message.network, message.channel}) do
+ Logger.debug("link inhibited #{word}")
+ else
+ handle_link(word, uri, message)
+ end
end
end
end)
+
{:noreply, state}
end
@@ -99,6 +95,48 @@ defmodule Nola.Plugins.Link do
:ok
end
+ def handle_link(url, uri, message) do
+ spawn(fn ->
+ :timer.kill_after(:timer.seconds(30))
+
+ store = Store.get_link(url)
+
+ case store || expand_link([uri]) do
+ {:ok, uris, text} = save ->
+ text =
+ case uris do
+ [uri] ->
+ text
+
+ [luri | _] ->
+ if luri.host == uri.host && luri.path == uri.path do
+ text
+ else
+ ["-> #{URI.to_string(luri)}", text]
+ end
+ end
+
+ case text do
+ lines when is_list(lines) ->
+ for text <- lines, do: message.replyfun.(text)
+ if !store, do: Store.insert_link(url, save)
+ Store.witness_link(url, {message.network, message.channel})
+
+ text when is_binary(text) ->
+ message.replyfun.(text)
+ if !store, do: Store.insert_link(url, save)
+ Store.witness_link(url, {message.network, message.channel})
+
+ nil ->
+ nil
+ end
+
+ _ ->
+ nil
+ end
+ end)
+ end
+
# 1. Match the first valid handler
# 2. Try to run the handler
# 3. If :error or crash, default link.
@@ -110,17 +148,24 @@ defmodule Nola.Plugins.Link do
{:ok, acc, "link redirects more than five times"}
end
- def expand_link(acc=[uri | _]) do
- Logger.debug("link: expanding: #{inspect uri}")
- handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers)
- handler = Enum.reduce_while(handlers, nil, fn({module, opts}, acc) ->
- Logger.debug("link: attempt expanding: #{inspect module} for #{inspect uri}")
- module = Module.concat([module])
- case module.match(uri, opts) do
- {true, params} -> {:halt, {module, params, opts}}
- false -> {:cont, acc}
- end
- end)
+ def expand_link(acc = [uri | _]) do
+ Logger.debug("link: expanding: #{inspect(uri)}")
+ handlers = Keyword.get(Application.get_env(:nola, __MODULE__, handlers: []), :handlers)
+
+ handler =
+ Enum.reduce_while(handlers, nil, fn {module, opts}, acc ->
+ module = Module.concat([module])
+
+ case module.match(uri, opts) do
+ {true, params} ->
+ Logger.debug("link: will expand with #{inspect(module)} for #{inspect(uri)}")
+ {:halt, {module, params, opts}}
+
+ false ->
+ {:cont, acc}
+ end
+ end)
+
run_expand(acc, handler)
end
@@ -128,21 +173,27 @@ defmodule Nola.Plugins.Link do
expand_default(acc)
end
- def run_expand(acc=[uri|_], {module, params, opts}) do
- Logger.debug("link: expanding #{inspect uri} with #{inspect module}")
+ def run_expand(acc = [uri | _], {module, params, opts}) do
case module.expand(uri, params, opts) do
- {:ok, data} -> {:ok, acc, data}
- :error -> expand_default(acc)
- :skip -> nil
+ {:ok, data} ->
+ Logger.debug("link: expanded #{inspect(uri)} with #{inspect(module)}")
+ {:ok, acc, data}
+
+ :error ->
+ Logger.error("Error expanding URL #{uri} with #{inspect(module)}")
+ expand_default(acc)
+
+ :skip ->
+ nil
end
rescue
e ->
- Logger.error("link: rescued #{inspect uri} with #{inspect module}: #{inspect e}")
+ Logger.error("link: rescued #{inspect(uri)} with #{inspect(module)}: #{inspect(e)}")
Logger.error(Exception.format(:error, e, __STACKTRACE__))
expand_default(acc)
catch
e, b ->
- Logger.error("link: catched #{inspect uri} with #{inspect module}: #{inspect {e, b}}")
+ Logger.error("link: catched #{inspect(uri)} with #{inspect(module)}: #{inspect({e, b})}")
expand_default(acc)
end
@@ -155,40 +206,48 @@ defmodule Nola.Plugins.Link do
end
defp get_req(url, {:ok, 200, headers, client}) do
- headers = Enum.reduce(headers, %{}, fn({key, value}, acc) ->
- Map.put(acc, String.downcase(key), value)
- end)
+ headers =
+ Enum.reduce(headers, %{}, fn {key, value}, acc ->
+ Map.put(acc, String.downcase(key), value)
+ end)
+
content_type = Map.get(headers, "content-type", "application/octect-stream")
length = Map.get(headers, "content-length", "0")
{length, _} = Integer.parse(length)
- handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers)
- handler = Enum.reduce_while(handlers, false, fn({module, opts}, acc) ->
- module = Module.concat([module])
- try do
- case module.post_match(url, content_type, headers, opts) do
- {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}}
- false -> {:cont, acc}
+ handlers = Keyword.get(Application.get_env(:nola, __MODULE__, handlers: []), :handlers)
+
+ handler =
+ Enum.reduce_while(handlers, false, fn {module, opts}, acc ->
+ module = Module.concat([module])
+
+ try do
+ case module.post_match(url, content_type, headers, opts) do
+ {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}}
+ false -> {:cont, acc}
+ end
+ rescue
+ e ->
+ Logger.error(inspect(e))
+ {:cont, false}
+ catch
+ e, b ->
+ Logger.error(inspect({b}))
+ {:cont, false}
end
- rescue
- e ->
- Logger.error(inspect(e))
- {:cont, false}
- catch
- e, b ->
- Logger.error(inspect({b}))
- {:cont, false}
- end
- end)
+ end)
cond do
handler != false and length <= 30_000_000 ->
case get_body(url, 30_000_000, client, handler, <<>>) do
- {:ok, _} = ok -> ok
+ {:ok, _} = ok ->
+ ok
+
:error ->
{:ok, "file: #{content_type}, size: #{human_size(length)}"}
end
- #String.starts_with?(content_type, "text/html") && length <= 30_000_000 ->
+
+ # String.starts_with?(content_type, "text/html") && length <= 30_000_000 ->
# get_body(url, 30_000_000, client, <<>>)
true ->
:hackney.close(client)
@@ -197,62 +256,94 @@ defmodule Nola.Plugins.Link do
end
defp get_req(_, {:ok, redirect, headers, client}) when redirect in 300..399 do
- headers = Enum.reduce(headers, %{}, fn({key, value}, acc) ->
- Map.put(acc, String.downcase(key), value)
- end)
+ headers =
+ Enum.reduce(headers, %{}, fn {key, value}, acc ->
+ Map.put(acc, String.downcase(key), value)
+ end)
+
location = Map.get(headers, "location")
:hackney.close(client)
{:redirect, location}
end
- defp get_req(_, {:ok, status, headers, client}) do
+ defp get_req(url, {:ok, status, headers, client}) do
+ Logger.error("Error fetching URL #{url} = #{status}")
:hackney.close(client)
{:error, status, headers}
end
- defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) when len >= byte_size(acc) do
+ defp get_body(url, len, client, {handler, params, opts, mode} = h, acc)
+ when len >= byte_size(acc) do
case :hackney.stream_body(client) do
{:ok, data} ->
- get_body(url, len, client, h, << acc::binary, data::binary >>)
+ get_body(url, len, client, h, <<acc::binary, data::binary>>)
+
:done ->
- body = case mode do
- :body -> acc
- :file ->
- {:ok, tmpfile} = Plug.Upload.random_file("linkplugin")
- File.write!(tmpfile, acc)
- tmpfile
- end
+ body =
+ case mode do
+ :body ->
+ acc
+
+ :file ->
+ {:ok, tmpfile} = Plug.Upload.random_file("linkplugin")
+ File.write!(tmpfile, acc)
+ tmpfile
+ end
+
+ Logger.debug("expanding body with #{inspect(handler)}: #{inspect(body)}")
handler.post_expand(url, body, params, opts)
+
{:error, reason} ->
- {:ok, "failed to fetch body: #{inspect reason}"}
+ {:ok, "failed to fetch body: #{inspect(reason)}"}
end
end
defp get_body(_, len, client, h, _acc) do
:hackney.close(client)
- IO.inspect(h)
{:ok, "Error: file over 30"}
end
def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do
Logger.debug("link: expanding #{uri} with default")
- headers = [{"user-agent", "DmzBot (like TwitterBot)"}]
- options = [follow_redirect: false, max_body_length: 30_000_000]
+
+ uri = Quirks.uri(uri)
+
+ headers = [
+ {"user-agent", Quirks.user_agent(uri.host)}
+ ]
+
+ proxy = Keyword.get(Application.get_env(:nola, __MODULE__, []), :proxy, nil)
+ options = [follow_redirect: false, max_body_length: 30_000_000, proxy: proxy]
+ url = URI.to_string(uri)
+
case get(URI.to_string(uri), headers, options) do
{:ok, text} ->
{:ok, acc, text}
+
{:redirect, link} ->
new_uri = URI.parse(link)
- #new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port}
expand_link([new_uri | acc])
+
+ {:error, status, _headers} when status in [400, 403] ->
+ Logger.warning("Was denied to fetch URL, using scraper #{url} = #{status}")
+ retry_expand_with_scraper(acc, url)
+
{:error, status, _headers} ->
- text = Plug.Conn.Status.reason_phrase(status)
- {:ok, acc, "Error: HTTP #{text} (#{status})"}
+ Logger.error("Error fetching URL #{url} = #{status}")
+ {:ok, acc, nil}
+
{:error, {:tls_alert, {:handshake_failure, err}}} ->
- {:ok, acc, "TLS Error: #{to_string(err)}"}
+ Logger.error("Error fetching URL #{url} = TLS Error: #{to_string(err)}")
+ {:ok, acc, nil}
+
+ {:error, :timeout} ->
+ Logger.error("Error fetching URL #{url} = timeout")
+ retry_expand_with_scraper(acc, url)
+
{:error, reason} ->
- {:ok, acc, "Error: #{to_string(reason)}"}
+ Logger.error("Error fetching URL #{url} = #{to_string(reason)}")
+ {:ok, acc, nil}
end
end
@@ -261,6 +352,27 @@ defmodule Nola.Plugins.Link do
{:ok, [uri], "-> #{URI.to_string(uri)}"}
end
+ # Last resort: scrape the page
+ # We'll be mostly calling this when 403 or 500 or timeout because site blocks us.
+ # An external service will scrape the page for us and return the body.
+ # We'll call directly the HTML handler on the result.
+ defp retry_expand_with_scraper(acc, url) do
+ Logger.info("Attempting scraper")
+ handlers = Keyword.get(Application.get_env(:nola, __MODULE__), :handlers)
+ Logger.info("Attempting scraper #{inspect(handlers)}")
+
+ with true <- Keyword.has_key?(handlers, :"Nola.Plugins.Link.HTML"),
+ {:ok, body, _meta} <- Scraper.get(url),
+ {:ok, text} <- __MODULE__.HTML.post_expand(url, body, nil, nil) do
+ {:ok, acc, text}
+ else
+ error ->
+ Logger.debug("Attempt with scraper failed: #{inspect(error)}")
+ # We give up here. We don't return anything (the acc from caller `expand default`
+ # does not matter anymore) and I see returning error messages as useless.
+ {:ok, acc, nil}
+ end
+ end
defp human_size(bytes) do
bytes