diff options
author | Jordan Bracco <href@random.sh> | 2025-06-26 16:10:16 +0200 |
---|---|---|
committer | Jordan Bracco <href@random.sh> | 2025-06-26 16:10:16 +0200 |
commit | a94c1cc0ac767db64d92420620202838da52545b (patch) | |
tree | 1bc180f1a0b0c26f6e7acff7cb903a2ef821d1c7 /lib/plugins | |
parent | txt: do not list, link only (diff) |
link: cache and inhibit
Diffstat (limited to 'lib/plugins')
-rw-r--r-- | lib/plugins/link.ex | 126 | ||||
-rw-r--r-- | lib/plugins/link/html.ex | 4 | ||||
-rw-r--r-- | lib/plugins/link/store.ex | 84 |
3 files changed, 152 insertions, 62 deletions
diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex index 89fe944..bdc0fe9 100644 --- a/lib/plugins/link.ex +++ b/lib/plugins/link.ex @@ -61,7 +61,6 @@ defmodule Nola.Plugins.Link do defstruct [:client] def init([]) do - Store.setup() {:ok, _} = Registry.register(Nola.PubSub, "messages", plugin: __MODULE__) # {:ok, _} = Registry.register(Nola.PubSub, "messages:telegram", [plugin: __MODULE__]) Logger.info("Link handler started") @@ -75,39 +74,11 @@ defmodule Nola.Plugins.Link do uri = URI.parse(word) if uri.scheme && uri.host do - spawn(fn -> - :timer.kill_after(:timer.seconds(30)) - - case expand_link([uri]) do - {:ok, uris, text} -> - text = - case uris do - [uri] -> - text - - [luri | _] -> - if luri.host == uri.host && luri.path == uri.path do - text - else - ["-> #{URI.to_string(luri)}", text] - end - end - - case text do - lines when is_list(lines) -> - for text <- lines, do: message.replyfun.(text) - - text when is_binary(text) -> - message.replyfun.(text) - - nil -> - nil - end - - _ -> - nil - end - end) + if Store.inhibit_link?(word, {message.network, message.channel}) do + Logger.debug("link inhibited #{word}") + else + handle_link(word, uri, message) + end end end end) @@ -123,6 +94,48 @@ defmodule Nola.Plugins.Link do :ok end + def handle_link(url, uri, message) do + spawn(fn -> + :timer.kill_after(:timer.seconds(30)) + + store = Store.get_link(url) + + case store || expand_link([uri]) do + {:ok, uris, text} = save -> + text = + case uris do + [uri] -> + text + + [luri | _] -> + if luri.host == uri.host && luri.path == uri.path do + text + else + ["-> #{URI.to_string(luri)}", text] + end + end + + case text do + lines when is_list(lines) -> + for text <- lines, do: message.replyfun.(text) + if !store, do: Store.insert_link(url, save) + Store.witness_link(url, {message.network, message.channel}) + + text when is_binary(text) -> + message.replyfun.(text) + if !store, do: Store.insert_link(url, save) + Store.witness_link(url, {message.network, message.channel}) + + nil -> + nil + end + + _ -> + nil + end + end) + end + # 1. Match the first valid handler # 2. Try to run the handler # 3. If :error or crash, default link. @@ -140,12 +153,15 @@ defmodule Nola.Plugins.Link do handler = Enum.reduce_while(handlers, nil, fn {module, opts}, acc -> - Logger.debug("link: attempt expanding: #{inspect(module)} for #{inspect(uri)}") module = Module.concat([module]) case module.match(uri, opts) do - {true, params} -> {:halt, {module, params, opts}} - false -> {:cont, acc} + {true, params} -> + Logger.debug("link: will expand with #{inspect(module)} for #{inspect(uri)}") + {:halt, {module, params, opts}} + + false -> + {:cont, acc} end end) @@ -157,12 +173,17 @@ defmodule Nola.Plugins.Link do end def run_expand(acc = [uri | _], {module, params, opts}) do - Logger.debug("link: expanding #{inspect(uri)} with #{inspect(module)}") - case module.expand(uri, params, opts) do - {:ok, data} -> {:ok, acc, data} - :error -> expand_default(acc) - :skip -> nil + {:ok, data} -> + Logger.debug("link: expanded #{inspect(uri)} with #{inspect(module)}") + {:ok, acc, data} + + :error -> + Logger.error("Error expanding URL #{uri} with #{inspect(module)}") + expand_default(acc) + + :skip -> + nil end rescue e -> @@ -245,7 +266,8 @@ defmodule Nola.Plugins.Link do {:redirect, location} end - defp get_req(_, {:ok, status, headers, client}) do + defp get_req(url, {:ok, status, headers, client}) do + Logger.error("Error fetching URL #{url} = #{status}") :hackney.close(client) {:error, status, headers} end @@ -268,6 +290,7 @@ defmodule Nola.Plugins.Link do tmpfile end + Logger.debug("expanding body with #{inspect(handler)}: #{inspect(body)}") handler.post_expand(url, body, params, opts) {:error, reason} -> @@ -277,7 +300,6 @@ defmodule Nola.Plugins.Link do defp get_body(_, len, client, h, _acc) do :hackney.close(client) - IO.inspect(h) {:ok, "Error: file over 30"} end @@ -299,24 +321,26 @@ defmodule Nola.Plugins.Link do {:redirect, link} -> new_uri = URI.parse(link) - - # new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} expand_link([new_uri | acc]) - {:error, status, _headers} -> - # text = Plug.Conn.Status.reason_phrase(status) - # {:ok, acc, "Error: HTTP #{text} (#{status})"} + {:error, status, _headers} when status in [400, 403] -> + Logger.warning("Was denied to fetch URL, using scraper #{url} = #{status}") retry_expand_with_scraper(acc, url) + {:error, status, _headers} -> + Logger.error("Error fetching URL #{url} = #{status}") + {:ok, acc, nil} + {:error, {:tls_alert, {:handshake_failure, err}}} -> - # "TLS Error: #{to_string(err)}"} + Logger.error("Error fetching URL #{url} = TLS Error: #{to_string(err)}") {:ok, acc, nil} {:error, :timeout} -> + Logger.error("Error fetching URL #{url} = timeout") retry_expand_with_scraper(acc, url) {:error, reason} -> - # "Error: #{to_string(reason)}"} + Logger.error("Error fetching URL #{url} = #{to_string(reason)}") {:ok, acc, nil} end end diff --git a/lib/plugins/link/html.ex b/lib/plugins/link/html.ex index bef9640..78f3192 100644 --- a/lib/plugins/link/html.ex +++ b/lib/plugins/link/html.ex @@ -30,7 +30,7 @@ defmodule Nola.Plugins.Link.HTML do defp generate_text_from_opengraph(url, html, opengraph) do itemprops = collect_itemprops(html) prefix = collect_prefix_and_site_name(url, opengraph, itemprops) - description = collect_description(opengraph, itemprops, 500) + description = collect_description(opengraph, itemprops, 400) [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ description end @@ -138,7 +138,7 @@ defmodule Nola.Plugins.Link.HTML do defp transform_description(nil, _), do: nil defp transform_description(string, length) when is_binary(string) do - if String.length(string) >= length, do: String.truncate(string, length), else: string + if String.length(string) > length, do: "#{String.slice(string, 0..length)}…", else: string end defp clean_text(text) do diff --git a/lib/plugins/link/store.ex b/lib/plugins/link/store.ex index ea43070..4e2aa58 100644 --- a/lib/plugins/link/store.ex +++ b/lib/plugins/link/store.ex @@ -1,29 +1,95 @@ defmodule Nola.Plugins.Link.Store do + alias DialyxirVendored.Warnings.Apply + use GenServer + require Logger require Record import Ex2ms @type url() :: String.t() - Record.defrecord(:link, link: nil, at: nil) - @type link :: record(:link, link: String.t(), at: nil) + Record.defrecord(:link, link: nil, result: nil, at: nil) + @type link :: record(:link, link: url(), result: any(), at: nil) - Record.defrecord(:link_entry, key: nil, at: nil) - @type link_entry :: record(:link_entry, key: {url(), String.t()}, at: nil) + Record.defrecord(:link_seen, key: nil, at: nil) + + @doc "A `link_seen` record represents a link that has been seen at a specific time in a given context." + @type link_seen :: record(:link_seen, key: {url(), String.t()}, at: nil) def setup do :ets.new(:links, [:set, :public, :named_table, keypos: 2]) + :ets.new(:links_witness, [:set, :public, :named_table, keypos: 2]) end - @spec insert_link(url()) :: true - def insert_link(url) do - :ets.insert(:links, link(link: url, at: NaiveDateTime.utc_now() |> NaiveDateTime.to_unix())) + @spec insert_link(url(), any()) :: true + def insert_link(url, result) do + :ets.insert( + :links, + link(link: url, result: result, at: DateTime.utc_now() |> DateTime.to_unix()) + ) end - @spec get_link(url()) :: String.t() | nil + @spec get_link(url()) :: any() | nil def get_link(url) do case :ets.lookup(:links, url) do - [link] -> link + [link(result: result)] -> result [] -> nil end end + + @spec witness_link(url(), String.t()) :: boolean() + def inhibit_link?(url, key) do + case :ets.lookup(:links_witness, {url, key}) do + [_] -> true + [] -> false + end + end + + @spec witness_link(url(), String.t()) :: :ok | :inhibit + def witness_link(url, key) do + if inhibit_link?(url, key) do + :inhibit + else + :ets.insert( + :links_witness, + link_seen(key: {url, key}, at: DateTime.utc_now() |> DateTime.to_unix()) + ) + + :ok + end + end + + def start_link(), do: GenServer.start_link(__MODULE__, [], name: __MODULE__) + + @doc false + @impl true + def init(_) do + setup() + env = Keyword.fetch!(Application.get_env(:nola, Nola.Plugins.Link, []), :store) + :erlang.send_after(env[:interval], self(), :expire) + {:ok, nil} + end + + @doc false + @impl true + def handle_info(:expire, state) do + env = Keyword.fetch!(Application.get_env(:nola, Nola.Plugins.Link, []), :store) + :erlang.send_after(env[:interval], self(), :expire) + ttl = env[:ttl] / 1000 + inhibit = env[:inhibit] / 1000 + now = DateTime.utc_now() |> DateTime.to_unix() + + links_evicted = + :ets.select_delete(:links, [ + {{:_, :_, :_, :"$1"}, [{:<, :"$1", now - ttl}], [true]} + ]) + + witness_evicted = + :ets.select_delete(:links_witness, [ + {{:_, :_, :"$1"}, [{:<, :"$1", now - inhibit}], [true]} + ]) + + Logger.debug("evicted #{links_evicted} links and #{witness_evicted} witnesses") + + {:noreply, state} + end end |