defmodule Nola.Plugins.Link do @moduledoc """ # Link Previewer An extensible link previewer for IRC. To extend the supported sites, create a new handler implementing the callbacks. See `link/` directory. The first in list handler that returns true to the `match/2` callback will be used, and if the handler returns `:error` or crashes, will fallback to the default preview. Unsupported websites will use the default link preview method, which is for html document the title, otherwise it'll use the mimetype and size. ## Configuration: ``` config :nola, Nola.Plugins.Link, handlers: [ Nola.Plugins.Link.Youtube: [ invidious: true ], Nola.Plugins.Link.Twitter: [], Nola.Plugins.Link.Imgur: [], ] ``` """ @ircdoc """ # Link preview Previews links (just post a link!). Announces real URL after redirections and provides extended support for YouTube, Twitter and Imgur. """ def short_irc_doc, do: false def irc_doc, do: @ircdoc require Logger alias __MODULE__.Quirks alias __MODULE__.Store alias __MODULE__.Scraper def start_link() do GenServer.start_link(__MODULE__, [], name: __MODULE__) end @callback match(uri :: URI.t(), options :: Keyword.t()) :: {true, params :: Map.t()} | false @callback expand(uri :: URI.t(), params :: Map.t(), options :: Keyword.t()) :: {:ok, lines :: [] | String.t()} | :error @callback post_match(uri :: URI.t(), content_type :: binary, headers :: [], opts :: Keyword.t()) :: {:body | :file, params :: Map.t()} | false @callback post_expand( uri :: URI.t(), body :: binary() | Path.t(), params :: Map.t(), options :: Keyword.t() ) :: {:ok, lines :: [] | String.t()} | :error @optional_callbacks [expand: 3, post_expand: 4] defstruct [:client] def init([]) do {:ok, _} = Registry.register(Nola.PubSub, "messages", plugin: __MODULE__) # {:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__]) Logger.info("Link handler started") {:ok, %__MODULE__{}} end def handle_info({:irc, :text, message = %{text: text}}, state) do String.split(text) |> Enum.map(fn word -> if String.starts_with?(word, "http://") || String.starts_with?(word, "https://") do uri = URI.parse(word) if uri.scheme && uri.host do if Store.inhibit_link?(word, {message.network, message.channel}) do Logger.debug("link inhibited #{word}") else handle_link(word, uri, message) end end end end) {:noreply, state} end def handle_info(msg, state) do {:noreply, state} end def terminate(_reason, state) do :ok end def handle_link(url, uri, message) do spawn(fn -> :timer.kill_after(:timer.seconds(30)) store = Store.get_link(url) case store || expand_link([uri]) do {:ok, uris, text} = save -> text = case uris do [uri] -> text [luri | _] -> if luri.host == uri.host && luri.path == uri.path do text else ["-> #{URI.to_string(luri)}", text] end end case text do lines when is_list(lines) -> for text <- lines, do: message.replyfun.(text) if !store, do: Store.insert_link(url, save) Store.witness_link(url, {message.network, message.channel}) text when is_binary(text) -> message.replyfun.(text) if !store, do: Store.insert_link(url, save) Store.witness_link(url, {message.network, message.channel}) nil -> nil end _ -> nil end end) end # 1. Match the first valid handler # 2. Try to run the handler # 3. If :error or crash, default link. # If :skip, nothing # 4. ? # Over five redirections: cancel. def expand_link(acc = [_, _, _, _, _ | _]) do {:ok, acc, "link redirects more than five times"} end def expand_link(acc = [uri | _]) do Logger.debug("link: expanding: #{inspect(uri)}") handlers = Keyword.get(Application.get_env(:nola, __MODULE__, handlers: []), :handlers) handler = Enum.reduce_while(handlers, nil, fn {module, opts}, acc -> module = Module.concat([module]) case module.match(uri, opts) do {true, params} -> Logger.debug("link: will expand with #{inspect(module)} for #{inspect(uri)}") {:halt, {module, params, opts}} false -> {:cont, acc} end end) run_expand(acc, handler) end def run_expand(acc, nil) do expand_default(acc) end def run_expand(acc = [uri | _], {module, params, opts}) do case module.expand(uri, params, opts) do {:ok, data} -> Logger.debug("link: expanded #{inspect(uri)} with #{inspect(module)}") {:ok, acc, data} :error -> Logger.error("Error expanding URL #{uri} with #{inspect(module)}") expand_default(acc) :skip -> nil end rescue e -> Logger.error("link: rescued #{inspect(uri)} with #{inspect(module)}: #{inspect(e)}") Logger.error(Exception.format(:error, e, __STACKTRACE__)) expand_default(acc) catch e, b -> Logger.error("link: catched #{inspect(uri)} with #{inspect(module)}: #{inspect({e, b})}") expand_default(acc) end defp get(url, headers \\ [], options \\ []) do get_req(url, :hackney.get(url, headers, <<>>, options)) end defp get_req(_, {:error, reason}) do {:error, reason} end defp get_req(url, {:ok, 200, headers, client}) do headers = Enum.reduce(headers, %{}, fn {key, value}, acc -> Map.put(acc, String.downcase(key), value) end) content_type = Map.get(headers, "content-type", "application/octect-stream") length = Map.get(headers, "content-length", "0") {length, _} = Integer.parse(length) handlers = Keyword.get(Application.get_env(:nola, __MODULE__, handlers: []), :handlers) handler = Enum.reduce_while(handlers, false, fn {module, opts}, acc -> module = Module.concat([module]) try do case module.post_match(url, content_type, headers, opts) do {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}} false -> {:cont, acc} end rescue e -> Logger.error(inspect(e)) {:cont, false} catch e, b -> Logger.error(inspect({b})) {:cont, false} end end) cond do handler != false and length <= 30_000_000 -> case get_body(url, 30_000_000, client, handler, <<>>) do {:ok, _} = ok -> ok :error -> {:ok, "file: #{content_type}, size: #{human_size(length)}"} end # String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> # get_body(url, 30_000_000, client, <<>>) true -> :hackney.close(client) {:ok, "file: #{content_type}, size: #{human_size(length)}"} end end defp get_req(_, {:ok, redirect, headers, client}) when redirect in 300..399 do headers = Enum.reduce(headers, %{}, fn {key, value}, acc -> Map.put(acc, String.downcase(key), value) end) location = Map.get(headers, "location") :hackney.close(client) {:redirect, location} end defp get_req(url, {:ok, status, headers, client}) do Logger.error("Error fetching URL #{url} = #{status}") :hackney.close(client) {:error, status, headers} end defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) when len >= byte_size(acc) do case :hackney.stream_body(client) do {:ok, data} -> get_body(url, len, client, h, <>) :done -> body = case mode do :body -> acc :file -> {:ok, tmpfile} = Plug.Upload.random_file("linkplugin") File.write!(tmpfile, acc) tmpfile end Logger.debug("expanding body with #{inspect(handler)}: #{inspect(body)}") handler.post_expand(url, body, params, opts) {:error, reason} -> {:ok, "failed to fetch body: #{inspect(reason)}"} end end defp get_body(_, len, client, h, _acc) do :hackney.close(client) {:ok, "Error: file over 30"} end def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do Logger.debug("link: expanding #{uri} with default") uri = Quirks.uri(uri) headers = [ {"user-agent", Quirks.user_agent(uri.host)} ] proxy = Keyword.get(Application.get_env(:nola, __MODULE__, []), :proxy, nil) options = [follow_redirect: false, max_body_length: 30_000_000, proxy: proxy] url = URI.to_string(uri) case get(URI.to_string(uri), headers, options) do {:ok, text} -> {:ok, acc, text} {:redirect, link} -> new_uri = URI.parse(link) expand_link([new_uri | acc]) {:error, status, _headers} when status in [400, 403] -> Logger.warning("Was denied to fetch URL, using scraper #{url} = #{status}") retry_expand_with_scraper(acc, url) {:error, status, _headers} -> Logger.error("Error fetching URL #{url} = #{status}") {:ok, acc, nil} {:error, {:tls_alert, {:handshake_failure, err}}} -> Logger.error("Error fetching URL #{url} = TLS Error: #{to_string(err)}") {:ok, acc, nil} {:error, :timeout} -> Logger.error("Error fetching URL #{url} = timeout") retry_expand_with_scraper(acc, url) {:error, reason} -> Logger.error("Error fetching URL #{url} = #{to_string(reason)}") {:ok, acc, nil} end end # Unsupported scheme, came from a redirect. def expand_default(acc = [uri | _]) do {:ok, [uri], "-> #{URI.to_string(uri)}"} end # Last resort: scrape the page # We'll be mostly calling this when 403 or 500 or timeout because site blocks us. # An external service will scrape the page for us and return the body. # We'll call directly the HTML handler on the result. defp retry_expand_with_scraper(acc, url) do Logger.info("Attempting scraper") handlers = Keyword.get(Application.get_env(:nola, __MODULE__), :handlers) Logger.info("Attempting scraper #{inspect(handlers)}") with true <- Keyword.has_key?(handlers, :"Nola.Plugins.Link.HTML"), {:ok, body, _meta} <- Scraper.get(url), {:ok, text} <- __MODULE__.HTML.post_expand(url, body, nil, nil) do {:ok, acc, text} else error -> Logger.debug("Attempt with scraper failed: #{inspect(error)}") # We give up here. We don't return anything (the acc from caller `expand default` # does not matter anymore) and I see returning error messages as useless. {:ok, acc, nil} end end defp human_size(bytes) do bytes |> FileSize.new(:b) |> FileSize.scale() |> FileSize.format() end end