diff options
author | href <href@random.sh> | 2020-07-25 17:41:12 +0200 |
---|---|---|
committer | href <href@random.sh> | 2020-07-25 17:41:12 +0200 |
commit | b8a36d46d687353921144a0b98cd804522b0d34f (patch) | |
tree | c8b25669ccbcd5c98ed6f4959def7fb1b5c6dc2f | |
parent | pouet (diff) |
link: post_* callbacks; html & pdftitle.
Diffstat (limited to '')
-rw-r--r-- | lib/lsg_irc/link_plugin.ex | 147 | ||||
-rw-r--r-- | lib/lsg_irc/link_plugin/github.ex | 5 | ||||
-rw-r--r-- | lib/lsg_irc/link_plugin/html.ex | 106 | ||||
-rw-r--r-- | lib/lsg_irc/link_plugin/imgur.ex | 4 | ||||
-rw-r--r-- | lib/lsg_irc/link_plugin/pdf.ex | 39 | ||||
-rw-r--r-- | lib/lsg_irc/link_plugin/reddit.ex (renamed from lib/lsg_irc/link_plugin/reddit_plugin.ex) | 5 | ||||
-rw-r--r-- | lib/lsg_irc/link_plugin/twitter.ex | 3 | ||||
-rw-r--r-- | lib/lsg_irc/link_plugin/youtube.ex | 4 |
8 files changed, 216 insertions, 97 deletions
diff --git a/lib/lsg_irc/link_plugin.ex b/lib/lsg_irc/link_plugin.ex index 97835e4..ea6df0c 100644 --- a/lib/lsg_irc/link_plugin.ex +++ b/lib/lsg_irc/link_plugin.ex @@ -44,6 +44,10 @@ defmodule LSG.IRC.LinkPlugin do @callback match(uri :: URI.t, options :: Keyword.t) :: {true, params :: Map.t} | false @callback expand(uri :: URI.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error + @callback post_match(uri :: URI.t, content_type :: binary, headers :: [], opts :: Keyword.t) :: {:body | :file, params :: Map.t} | false + @callback post_expand(uri :: URI.t, body :: binary() | Path.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error + + @optional_callbacks [expand: 3, post_expand: 4] defstruct [:client] @@ -61,13 +65,18 @@ defmodule LSG.IRC.LinkPlugin do uri = URI.parse(word) if uri.scheme && uri.host do spawn(fn() -> + :timer.kill_after(:timer.seconds(30)) case expand_link([uri]) do {:ok, uris, text} -> text = case uris do [uri] -> text - [uri | _] -> ["-> #{URI.to_string(uri)}", text] + [luri | _] -> + if luri.host == uri.host && luri.path == luri.path do + text + else + ["-> #{URI.to_string(luri)}", text] + end end - IO.inspect(text) if is_list(text) do for line <- text, do: message.replyfun.(line) else @@ -149,9 +158,34 @@ defmodule LSG.IRC.LinkPlugin do length = Map.get(headers, "content-length", "0") {length, _} = Integer.parse(length) + handlers = Keyword.get(Application.get_env(:lsg, __MODULE__, [handlers: []]), :handlers) + handler = Enum.reduce_while(handlers, nil, fn({module, opts}, acc) -> + module = Module.concat([module]) + try do + case module.post_match(url, content_type, headers, opts) do + {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}} + false -> {:cont, acc} + end + rescue + e -> + Logger.error(inspect(e)) + {:cont, false} + catch + e, b -> + Logger.error(inspect({b})) + {:cont, false} + end + end) + cond do - String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> - get_body(url, 30_000_000, client, <<>>) + handler != false and length <= 30_000_000 -> + case get_body(url, 30_000_000, client, handler, <<>>) do + {:ok, _} = ok -> ok + :error -> + {:ok, "file: #{content_type}, size: #{length} bytes"} + end + #String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> + # get_body(url, 30_000_000, client, <<>>) true -> :hackney.close(client) {:ok, "file: #{content_type}, size: #{length} bytes"} @@ -173,76 +207,38 @@ defmodule LSG.IRC.LinkPlugin do {:error, status, headers} end - defp get_body(url, len, client, acc) when len >= byte_size(acc) do + defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) when len >= byte_size(acc) do case :hackney.stream_body(client) do {:ok, data} -> - get_body(url, len, client, << acc::binary, data::binary >>) + get_body(url, len, client, h, << acc::binary, data::binary >>) :done -> - html = Floki.parse(acc) - title = collect_title(html) - opengraph = collect_open_graph(html) - itemprops = collect_itemprops(html) - Logger.debug("OG: #{inspect opengraph}") - text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do - sitename = if sn = Map.get(opengraph, "site_name") do - "#{sn}" - else - "" - end - paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do - "" - else - "[paywall] " - end - section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do - ": #{section}" - else - "" - end - date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do - {:ok, date, _} -> - "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " - _ -> - "" - end - uri = URI.parse(url) - - prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}" - prefix = unless prefix == "" do - "#{prefix} — " - else - "" - end - [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}")) - else - clean_text(title) + body = case mode do + :body -> acc + :file -> + {:ok, tmpfile} = Plug.Upload.random_file("linkplugin") + File.write!(tmpfile, acc) + tmpfile end - {:ok, text} + handler.post_expand(url, body, params, opts) {:error, reason} -> {:ok, "failed to fetch body: #{inspect reason}"} end end - defp clean_text(text) do - text - |> String.replace("\n", " ") - |> HtmlEntities.decode() - end - - defp get_body(len, client, _acc) do + defp get_body(_, len, client, _, _acc) do :hackney.close(client) {:ok, "Error: file over 30"} end def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do - headers = [] + headers = [{"user-agent", "DmzBot (like TwitterBot)"}] options = [follow_redirect: false, max_body_length: 30_000_000] case get(URI.to_string(uri), headers, options) do {:ok, text} -> {:ok, acc, text} {:redirect, link} -> new_uri = URI.parse(link) - new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} + #new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} expand_link([new_uri | acc]) {:error, status, _headers} -> text = Plug.Conn.Status.reason_phrase(status) @@ -257,47 +253,4 @@ defmodule LSG.IRC.LinkPlugin do {:ok, [uri], "-> #{URI.to_string(uri)}"} end - defp collect_title(html) do - case Floki.find(html, "title") do - [{"title", [], [title]} | _] -> - String.trim(title) - _ -> - nil - end - end - - defp collect_open_graph(html) do - Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) -> - case tag do - {"meta", values, []} -> - name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1) - content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) - case name do - "og:" <> key -> - Map.put(acc, key, content) - "article:"<>_ -> - Map.put(acc, name, content) - _other -> acc - end - _other -> acc - end - end) - end - - defp collect_itemprops(html) do - Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) -> - case tag do - {"meta", values, []} -> - name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1) - content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) - case name do - "article:" <> key -> - Map.put(acc, name, content) - _other -> acc - end - _other -> acc - end - end) - end - end diff --git a/lib/lsg_irc/link_plugin/github.ex b/lib/lsg_irc/link_plugin/github.ex index c7444c2..19be89b 100644 --- a/lib/lsg_irc/link_plugin/github.ex +++ b/lib/lsg_irc/link_plugin/github.ex @@ -1,6 +1,7 @@ defmodule LSG.IRC.LinkPlugin.Github do @behaviour LSG.IRC.LinkPlugin + @impl true def match(uri = %URI{host: "github.com", path: path}, _) do case String.split(path, "/") do ["", user, repo] -> @@ -12,6 +13,10 @@ defmodule LSG.IRC.LinkPlugin.Github do def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + + @impl true def expand(_uri, %{user: user, repo: repo}, _opts) do case HTTPoison.get("https://api.github.com/repos/#{user}/#{repo}") do {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> diff --git a/lib/lsg_irc/link_plugin/html.ex b/lib/lsg_irc/link_plugin/html.ex new file mode 100644 index 0000000..e0e4229 --- /dev/null +++ b/lib/lsg_irc/link_plugin/html.ex @@ -0,0 +1,106 @@ +defmodule LSG.IRC.LinkPlugin.HTML do + @behaviour LSG.IRC.LinkPlugin + + @impl true + def match(_, _), do: false + + @impl true + def post_match(_url, "text/html"<>_, _header, _opts) do + {:body, nil} + end + def post_match(_, _, _, _), do: false + + @impl true + def post_expand(url, body, _params, _opts) do + html = Floki.parse(body) + title = collect_title(html) + opengraph = collect_open_graph(html) + itemprops = collect_itemprops(html) + text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do + sitename = if sn = Map.get(opengraph, "site_name") do + "#{sn}" + else + "" + end + paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do + "" + else + "[paywall] " + end + section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do + ": #{section}" + else + "" + end + date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do + {:ok, date, _} -> + "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " + _ -> + "" + end + uri = URI.parse(url) + + prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}" + prefix = unless prefix == "" do + "#{prefix} — " + else + "" + end + [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}")) + else + clean_text(title) + end + {:ok, text} + end + + defp collect_title(html) do + case Floki.find(html, "title") do + [{"title", [], [title]} | _] -> + String.trim(title) + _ -> + nil + end + end + + defp collect_open_graph(html) do + Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) -> + case tag do + {"meta", values, []} -> + name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1) + content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) + case name do + "og:" <> key -> + Map.put(acc, key, content) + "article:"<>_ -> + Map.put(acc, name, content) + _other -> acc + end + _other -> acc + end + end) + end + + defp collect_itemprops(html) do + Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) -> + case tag do + {"meta", values, []} -> + name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1) + content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) + case name do + "article:" <> key -> + Map.put(acc, name, content) + _other -> acc + end + _other -> acc + end + end) + end + + defp clean_text(text) do + text + |> String.replace("\n", " ") + |> HtmlEntities.decode() + end + + +end diff --git a/lib/lsg_irc/link_plugin/imgur.ex b/lib/lsg_irc/link_plugin/imgur.ex index 1b8173f..41b7e08 100644 --- a/lib/lsg_irc/link_plugin/imgur.ex +++ b/lib/lsg_irc/link_plugin/imgur.ex @@ -15,6 +15,7 @@ defmodule LSG.IRC.LinkPlugin.Imgur do ``` """ + @impl true def match(uri = %URI{host: "imgur.com", path: "/a/"<>album_id}, _) do {true, %{album_id: album_id}} end @@ -27,6 +28,9 @@ defmodule LSG.IRC.LinkPlugin.Imgur do end def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + def expand(_uri, %{album_id: album_id}, opts) do expand_imgur_album(album_id, opts) end diff --git a/lib/lsg_irc/link_plugin/pdf.ex b/lib/lsg_irc/link_plugin/pdf.ex new file mode 100644 index 0000000..8c4869c --- /dev/null +++ b/lib/lsg_irc/link_plugin/pdf.ex @@ -0,0 +1,39 @@ +defmodule LSG.IRC.LinkPlugin.PDF do + require Logger + @behaviour LSG.IRC.LinkPlugin + + @impl true + def match(_, _), do: false + + @impl true + def post_match(_url, "application/pdf"<>_, _header, _opts) do + {:file, nil} + end + + def post_match(_, _, _, _), do: false + + @impl true + def post_expand(url, file, _, _) do + case System.cmd("pdftitle", ["-p", file]) do + {text, 0} -> + text = text + |> String.trim() + + if text == "" do + :error + else + basename = Path.basename(url, ".pdf") + text = "[#{basename}] " <> text + |> String.split("\n") + {:ok, text} + end + {_, 127} -> + Logger.error("dependency `pdftitle` is missing, please install it: `pip3 install pdftitle`.") + :error + {error, code} -> + Logger.warn("command `pdftitle` exited with status code #{code}:\n#{inspect error}") + :error + end + end + +end diff --git a/lib/lsg_irc/link_plugin/reddit_plugin.ex b/lib/lsg_irc/link_plugin/reddit.ex index a7f5235..6fc1723 100644 --- a/lib/lsg_irc/link_plugin/reddit_plugin.ex +++ b/lib/lsg_irc/link_plugin/reddit.ex @@ -1,6 +1,7 @@ defmodule LSG.IRC.LinkPlugin.Reddit do @behaviour LSG.IRC.LinkPlugin + @impl true def match(uri = %URI{host: "reddit.com", path: path}, _) do case String.split(path, "/") do ["", "r", sub, "comments", post_id, _slug] -> @@ -26,6 +27,10 @@ defmodule LSG.IRC.LinkPlugin.Reddit do end end + @impl true + def post_match(_, _, _, _), do: false + + @impl true def expand(_, %{mode: :sub, sub: sub}, _opts) do url = "https://api.reddit.com/r/#{sub}/about" case HTTPoison.get(url) do diff --git a/lib/lsg_irc/link_plugin/twitter.ex b/lib/lsg_irc/link_plugin/twitter.ex index a6b6e29..e462384 100644 --- a/lib/lsg_irc/link_plugin/twitter.ex +++ b/lib/lsg_irc/link_plugin/twitter.ex @@ -33,6 +33,9 @@ defmodule LSG.IRC.LinkPlugin.Twitter do def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + def expand(_uri, %{status_id: status_id}, opts) do expand_tweet(ExTwitter.show(status_id, tweet_mode: "extended"), opts) end diff --git a/lib/lsg_irc/link_plugin/youtube.ex b/lib/lsg_irc/link_plugin/youtube.ex index ea4f213..b68a86f 100644 --- a/lib/lsg_irc/link_plugin/youtube.ex +++ b/lib/lsg_irc/link_plugin/youtube.ex @@ -16,6 +16,7 @@ defmodule LSG.IRC.LinkPlugin.YouTube do * `invidious`: Add a link to invidio.us. Default: true. """ + @impl true def match(uri = %URI{host: yt, path: "/watch", query: "v="<>video_id}, _opts) when yt in ["youtube.com", "www.youtube.com"] do {true, %{video_id: video_id}} end @@ -26,7 +27,10 @@ defmodule LSG.IRC.LinkPlugin.YouTube do def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + @impl true def expand(uri, %{video_id: video_id}, opts) do key = Application.get_env(:lsg, :youtube)[:api_key] params = %{ |