diff options
Diffstat (limited to 'lib/plugins/link.ex')
-rw-r--r-- | lib/plugins/link.ex | 292 |
1 files changed, 202 insertions, 90 deletions
diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex index 4c4261f..0dca6ae 100644 --- a/lib/plugins/link.ex +++ b/lib/plugins/link.ex @@ -37,57 +37,53 @@ defmodule Nola.Plugins.Link do def short_irc_doc, do: false def irc_doc, do: @ircdoc require Logger + alias __MODULE__.Quirks + alias __MODULE__.Store + alias __MODULE__.Scraper def start_link() do GenServer.start_link(__MODULE__, [], name: __MODULE__) end - @callback match(uri :: URI.t, options :: Keyword.t) :: {true, params :: Map.t} | false - @callback expand(uri :: URI.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error - @callback post_match(uri :: URI.t, content_type :: binary, headers :: [], opts :: Keyword.t) :: {:body | :file, params :: Map.t} | false - @callback post_expand(uri :: URI.t, body :: binary() | Path.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error + @callback match(uri :: URI.t(), options :: Keyword.t()) :: {true, params :: Map.t()} | false + @callback expand(uri :: URI.t(), params :: Map.t(), options :: Keyword.t()) :: + {:ok, lines :: [] | String.t()} | :error + @callback post_match(uri :: URI.t(), content_type :: binary, headers :: [], opts :: Keyword.t()) :: + {:body | :file, params :: Map.t()} | false + @callback post_expand( + uri :: URI.t(), + body :: binary() | Path.t(), + params :: Map.t(), + options :: Keyword.t() + ) :: {:ok, lines :: [] | String.t()} | :error @optional_callbacks [expand: 3, post_expand: 4] defstruct [:client] def init([]) do - {:ok, _} = Registry.register(Nola.PubSub, "messages", [plugin: __MODULE__]) - #{:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__]) + {:ok, _} = Registry.register(Nola.PubSub, "messages", plugin: __MODULE__) + # {:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__]) Logger.info("Link handler started") {:ok, %__MODULE__{}} end def handle_info({:irc, :text, message = %{text: text}}, state) do String.split(text) - |> Enum.map(fn(word) -> + |> Enum.map(fn word -> if String.starts_with?(word, "http://") || String.starts_with?(word, "https://") do uri = URI.parse(word) + if uri.scheme && uri.host do - spawn(fn() -> - :timer.kill_after(:timer.seconds(30)) - case expand_link([uri]) do - {:ok, uris, text} -> - text = case uris do - [uri] -> text - [luri | _] -> - if luri.host == uri.host && luri.path == luri.path do - text - else - ["-> #{URI.to_string(luri)}", text] - end - end - if is_list(text) do - for line <- text, do: message.replyfun.(line) - else - message.replyfun.(text) - end - _ -> nil - end - end) + if Store.inhibit_link?(word, {message.network, message.channel}) do + Logger.debug("link inhibited #{word}") + else + handle_link(word, uri, message) + end end end end) + {:noreply, state} end @@ -99,6 +95,48 @@ defmodule Nola.Plugins.Link do :ok end + def handle_link(url, uri, message) do + spawn(fn -> + :timer.kill_after(:timer.seconds(30)) + + store = Store.get_link(url) + + case store || expand_link([uri]) do + {:ok, uris, text} = save -> + text = + case uris do + [uri] -> + text + + [luri | _] -> + if luri.host == uri.host && luri.path == uri.path do + text + else + ["-> #{URI.to_string(luri)}", text] + end + end + + case text do + lines when is_list(lines) -> + for text <- lines, do: message.replyfun.(text) + if !store, do: Store.insert_link(url, save) + Store.witness_link(url, {message.network, message.channel}) + + text when is_binary(text) -> + message.replyfun.(text) + if !store, do: Store.insert_link(url, save) + Store.witness_link(url, {message.network, message.channel}) + + nil -> + nil + end + + _ -> + nil + end + end) + end + # 1. Match the first valid handler # 2. Try to run the handler # 3. If :error or crash, default link. @@ -110,17 +148,24 @@ defmodule Nola.Plugins.Link do {:ok, acc, "link redirects more than five times"} end - def expand_link(acc=[uri | _]) do - Logger.debug("link: expanding: #{inspect uri}") - handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers) - handler = Enum.reduce_while(handlers, nil, fn({module, opts}, acc) -> - Logger.debug("link: attempt expanding: #{inspect module} for #{inspect uri}") - module = Module.concat([module]) - case module.match(uri, opts) do - {true, params} -> {:halt, {module, params, opts}} - false -> {:cont, acc} - end - end) + def expand_link(acc = [uri | _]) do + Logger.debug("link: expanding: #{inspect(uri)}") + handlers = Keyword.get(Application.get_env(:nola, __MODULE__, handlers: []), :handlers) + + handler = + Enum.reduce_while(handlers, nil, fn {module, opts}, acc -> + module = Module.concat([module]) + + case module.match(uri, opts) do + {true, params} -> + Logger.debug("link: will expand with #{inspect(module)} for #{inspect(uri)}") + {:halt, {module, params, opts}} + + false -> + {:cont, acc} + end + end) + run_expand(acc, handler) end @@ -128,21 +173,27 @@ defmodule Nola.Plugins.Link do expand_default(acc) end - def run_expand(acc=[uri|_], {module, params, opts}) do - Logger.debug("link: expanding #{inspect uri} with #{inspect module}") + def run_expand(acc = [uri | _], {module, params, opts}) do case module.expand(uri, params, opts) do - {:ok, data} -> {:ok, acc, data} - :error -> expand_default(acc) - :skip -> nil + {:ok, data} -> + Logger.debug("link: expanded #{inspect(uri)} with #{inspect(module)}") + {:ok, acc, data} + + :error -> + Logger.error("Error expanding URL #{uri} with #{inspect(module)}") + expand_default(acc) + + :skip -> + nil end rescue e -> - Logger.error("link: rescued #{inspect uri} with #{inspect module}: #{inspect e}") + Logger.error("link: rescued #{inspect(uri)} with #{inspect(module)}: #{inspect(e)}") Logger.error(Exception.format(:error, e, __STACKTRACE__)) expand_default(acc) catch e, b -> - Logger.error("link: catched #{inspect uri} with #{inspect module}: #{inspect {e, b}}") + Logger.error("link: catched #{inspect(uri)} with #{inspect(module)}: #{inspect({e, b})}") expand_default(acc) end @@ -155,40 +206,48 @@ defmodule Nola.Plugins.Link do end defp get_req(url, {:ok, 200, headers, client}) do - headers = Enum.reduce(headers, %{}, fn({key, value}, acc) -> - Map.put(acc, String.downcase(key), value) - end) + headers = + Enum.reduce(headers, %{}, fn {key, value}, acc -> + Map.put(acc, String.downcase(key), value) + end) + content_type = Map.get(headers, "content-type", "application/octect-stream") length = Map.get(headers, "content-length", "0") {length, _} = Integer.parse(length) - handlers = Keyword.get(Application.get_env(:nola, __MODULE__, [handlers: []]), :handlers) - handler = Enum.reduce_while(handlers, false, fn({module, opts}, acc) -> - module = Module.concat([module]) - try do - case module.post_match(url, content_type, headers, opts) do - {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}} - false -> {:cont, acc} + handlers = Keyword.get(Application.get_env(:nola, __MODULE__, handlers: []), :handlers) + + handler = + Enum.reduce_while(handlers, false, fn {module, opts}, acc -> + module = Module.concat([module]) + + try do + case module.post_match(url, content_type, headers, opts) do + {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}} + false -> {:cont, acc} + end + rescue + e -> + Logger.error(inspect(e)) + {:cont, false} + catch + e, b -> + Logger.error(inspect({b})) + {:cont, false} end - rescue - e -> - Logger.error(inspect(e)) - {:cont, false} - catch - e, b -> - Logger.error(inspect({b})) - {:cont, false} - end - end) + end) cond do handler != false and length <= 30_000_000 -> case get_body(url, 30_000_000, client, handler, <<>>) do - {:ok, _} = ok -> ok + {:ok, _} = ok -> + ok + :error -> {:ok, "file: #{content_type}, size: #{human_size(length)}"} end - #String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> + + # String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> # get_body(url, 30_000_000, client, <<>>) true -> :hackney.close(client) @@ -197,62 +256,94 @@ defmodule Nola.Plugins.Link do end defp get_req(_, {:ok, redirect, headers, client}) when redirect in 300..399 do - headers = Enum.reduce(headers, %{}, fn({key, value}, acc) -> - Map.put(acc, String.downcase(key), value) - end) + headers = + Enum.reduce(headers, %{}, fn {key, value}, acc -> + Map.put(acc, String.downcase(key), value) + end) + location = Map.get(headers, "location") :hackney.close(client) {:redirect, location} end - defp get_req(_, {:ok, status, headers, client}) do + defp get_req(url, {:ok, status, headers, client}) do + Logger.error("Error fetching URL #{url} = #{status}") :hackney.close(client) {:error, status, headers} end - defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) when len >= byte_size(acc) do + defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) + when len >= byte_size(acc) do case :hackney.stream_body(client) do {:ok, data} -> - get_body(url, len, client, h, << acc::binary, data::binary >>) + get_body(url, len, client, h, <<acc::binary, data::binary>>) + :done -> - body = case mode do - :body -> acc - :file -> - {:ok, tmpfile} = Plug.Upload.random_file("linkplugin") - File.write!(tmpfile, acc) - tmpfile - end + body = + case mode do + :body -> + acc + + :file -> + {:ok, tmpfile} = Plug.Upload.random_file("linkplugin") + File.write!(tmpfile, acc) + tmpfile + end + + Logger.debug("expanding body with #{inspect(handler)}: #{inspect(body)}") handler.post_expand(url, body, params, opts) + {:error, reason} -> - {:ok, "failed to fetch body: #{inspect reason}"} + {:ok, "failed to fetch body: #{inspect(reason)}"} end end defp get_body(_, len, client, h, _acc) do :hackney.close(client) - IO.inspect(h) {:ok, "Error: file over 30"} end def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do Logger.debug("link: expanding #{uri} with default") - headers = [{"user-agent", "DmzBot (like TwitterBot)"}] - options = [follow_redirect: false, max_body_length: 30_000_000] + + uri = Quirks.uri(uri) + + headers = [ + {"user-agent", Quirks.user_agent(uri.host)} + ] + + proxy = Keyword.get(Application.get_env(:nola, __MODULE__, []), :proxy, nil) + options = [follow_redirect: false, max_body_length: 30_000_000, proxy: proxy] + url = URI.to_string(uri) + case get(URI.to_string(uri), headers, options) do {:ok, text} -> {:ok, acc, text} + {:redirect, link} -> new_uri = URI.parse(link) - #new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} expand_link([new_uri | acc]) + + {:error, status, _headers} when status in [400, 403] -> + Logger.warning("Was denied to fetch URL, using scraper #{url} = #{status}") + retry_expand_with_scraper(acc, url) + {:error, status, _headers} -> - text = Plug.Conn.Status.reason_phrase(status) - {:ok, acc, "Error: HTTP #{text} (#{status})"} + Logger.error("Error fetching URL #{url} = #{status}") + {:ok, acc, nil} + {:error, {:tls_alert, {:handshake_failure, err}}} -> - {:ok, acc, "TLS Error: #{to_string(err)}"} + Logger.error("Error fetching URL #{url} = TLS Error: #{to_string(err)}") + {:ok, acc, nil} + + {:error, :timeout} -> + Logger.error("Error fetching URL #{url} = timeout") + retry_expand_with_scraper(acc, url) + {:error, reason} -> - {:ok, acc, "Error: #{to_string(reason)}"} + Logger.error("Error fetching URL #{url} = #{to_string(reason)}") + {:ok, acc, nil} end end @@ -261,6 +352,27 @@ defmodule Nola.Plugins.Link do {:ok, [uri], "-> #{URI.to_string(uri)}"} end + # Last resort: scrape the page + # We'll be mostly calling this when 403 or 500 or timeout because site blocks us. + # An external service will scrape the page for us and return the body. + # We'll call directly the HTML handler on the result. + defp retry_expand_with_scraper(acc, url) do + Logger.info("Attempting scraper") + handlers = Keyword.get(Application.get_env(:nola, __MODULE__), :handlers) + Logger.info("Attempting scraper #{inspect(handlers)}") + + with true <- Keyword.has_key?(handlers, :"Nola.Plugins.Link.HTML"), + {:ok, body, _meta} <- Scraper.get(url), + {:ok, text} <- __MODULE__.HTML.post_expand(url, body, nil, nil) do + {:ok, acc, text} + else + error -> + Logger.debug("Attempt with scraper failed: #{inspect(error)}") + # We give up here. We don't return anything (the acc from caller `expand default` + # does not matter anymore) and I see returning error messages as useless. + {:ok, acc, nil} + end + end defp human_size(bytes) do bytes |