From b8a36d46d687353921144a0b98cd804522b0d34f Mon Sep 17 00:00:00 2001 From: href Date: Sat, 25 Jul 2020 17:41:12 +0200 Subject: link: post_* callbacks; html & pdftitle. --- lib/lsg_irc/link_plugin.ex | 147 +++++++++++-------------------- lib/lsg_irc/link_plugin/github.ex | 5 ++ lib/lsg_irc/link_plugin/html.ex | 106 ++++++++++++++++++++++ lib/lsg_irc/link_plugin/imgur.ex | 4 + lib/lsg_irc/link_plugin/pdf.ex | 39 ++++++++ lib/lsg_irc/link_plugin/reddit.ex | 119 +++++++++++++++++++++++++ lib/lsg_irc/link_plugin/reddit_plugin.ex | 114 ------------------------ lib/lsg_irc/link_plugin/twitter.ex | 3 + lib/lsg_irc/link_plugin/youtube.ex | 4 + 9 files changed, 330 insertions(+), 211 deletions(-) create mode 100644 lib/lsg_irc/link_plugin/html.ex create mode 100644 lib/lsg_irc/link_plugin/pdf.ex create mode 100644 lib/lsg_irc/link_plugin/reddit.ex delete mode 100644 lib/lsg_irc/link_plugin/reddit_plugin.ex diff --git a/lib/lsg_irc/link_plugin.ex b/lib/lsg_irc/link_plugin.ex index 97835e4..ea6df0c 100644 --- a/lib/lsg_irc/link_plugin.ex +++ b/lib/lsg_irc/link_plugin.ex @@ -44,6 +44,10 @@ defmodule LSG.IRC.LinkPlugin do @callback match(uri :: URI.t, options :: Keyword.t) :: {true, params :: Map.t} | false @callback expand(uri :: URI.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error + @callback post_match(uri :: URI.t, content_type :: binary, headers :: [], opts :: Keyword.t) :: {:body | :file, params :: Map.t} | false + @callback post_expand(uri :: URI.t, body :: binary() | Path.t, params :: Map.t, options :: Keyword.t) :: {:ok, lines :: [] | String.t} | :error + + @optional_callbacks [expand: 3, post_expand: 4] defstruct [:client] @@ -61,13 +65,18 @@ defmodule LSG.IRC.LinkPlugin do uri = URI.parse(word) if uri.scheme && uri.host do spawn(fn() -> + :timer.kill_after(:timer.seconds(30)) case expand_link([uri]) do {:ok, uris, text} -> text = case uris do [uri] -> text - [uri | _] -> ["-> #{URI.to_string(uri)}", text] + [luri | _] -> + if luri.host == uri.host && luri.path == luri.path do + text + else + ["-> #{URI.to_string(luri)}", text] + end end - IO.inspect(text) if is_list(text) do for line <- text, do: message.replyfun.(line) else @@ -149,9 +158,34 @@ defmodule LSG.IRC.LinkPlugin do length = Map.get(headers, "content-length", "0") {length, _} = Integer.parse(length) + handlers = Keyword.get(Application.get_env(:lsg, __MODULE__, [handlers: []]), :handlers) + handler = Enum.reduce_while(handlers, nil, fn({module, opts}, acc) -> + module = Module.concat([module]) + try do + case module.post_match(url, content_type, headers, opts) do + {mode, params} when mode in [:body, :file] -> {:halt, {module, params, opts, mode}} + false -> {:cont, acc} + end + rescue + e -> + Logger.error(inspect(e)) + {:cont, false} + catch + e, b -> + Logger.error(inspect({b})) + {:cont, false} + end + end) + cond do - String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> - get_body(url, 30_000_000, client, <<>>) + handler != false and length <= 30_000_000 -> + case get_body(url, 30_000_000, client, handler, <<>>) do + {:ok, _} = ok -> ok + :error -> + {:ok, "file: #{content_type}, size: #{length} bytes"} + end + #String.starts_with?(content_type, "text/html") && length <= 30_000_000 -> + # get_body(url, 30_000_000, client, <<>>) true -> :hackney.close(client) {:ok, "file: #{content_type}, size: #{length} bytes"} @@ -173,76 +207,38 @@ defmodule LSG.IRC.LinkPlugin do {:error, status, headers} end - defp get_body(url, len, client, acc) when len >= byte_size(acc) do + defp get_body(url, len, client, {handler, params, opts, mode} = h, acc) when len >= byte_size(acc) do case :hackney.stream_body(client) do {:ok, data} -> - get_body(url, len, client, << acc::binary, data::binary >>) + get_body(url, len, client, h, << acc::binary, data::binary >>) :done -> - html = Floki.parse(acc) - title = collect_title(html) - opengraph = collect_open_graph(html) - itemprops = collect_itemprops(html) - Logger.debug("OG: #{inspect opengraph}") - text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do - sitename = if sn = Map.get(opengraph, "site_name") do - "#{sn}" - else - "" - end - paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do - "" - else - "[paywall] " - end - section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do - ": #{section}" - else - "" - end - date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do - {:ok, date, _} -> - "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " - _ -> - "" - end - uri = URI.parse(url) - - prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}" - prefix = unless prefix == "" do - "#{prefix} — " - else - "" - end - [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}")) - else - clean_text(title) + body = case mode do + :body -> acc + :file -> + {:ok, tmpfile} = Plug.Upload.random_file("linkplugin") + File.write!(tmpfile, acc) + tmpfile end - {:ok, text} + handler.post_expand(url, body, params, opts) {:error, reason} -> {:ok, "failed to fetch body: #{inspect reason}"} end end - defp clean_text(text) do - text - |> String.replace("\n", " ") - |> HtmlEntities.decode() - end - - defp get_body(len, client, _acc) do + defp get_body(_, len, client, _, _acc) do :hackney.close(client) {:ok, "Error: file over 30"} end def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do - headers = [] + headers = [{"user-agent", "DmzBot (like TwitterBot)"}] options = [follow_redirect: false, max_body_length: 30_000_000] case get(URI.to_string(uri), headers, options) do {:ok, text} -> {:ok, acc, text} {:redirect, link} -> new_uri = URI.parse(link) - new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} + #new_uri = %URI{new_uri | scheme: scheme, authority: uri.authority, host: uri.host, port: uri.port} expand_link([new_uri | acc]) {:error, status, _headers} -> text = Plug.Conn.Status.reason_phrase(status) @@ -257,47 +253,4 @@ defmodule LSG.IRC.LinkPlugin do {:ok, [uri], "-> #{URI.to_string(uri)}"} end - defp collect_title(html) do - case Floki.find(html, "title") do - [{"title", [], [title]} | _] -> - String.trim(title) - _ -> - nil - end - end - - defp collect_open_graph(html) do - Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) -> - case tag do - {"meta", values, []} -> - name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1) - content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) - case name do - "og:" <> key -> - Map.put(acc, key, content) - "article:"<>_ -> - Map.put(acc, name, content) - _other -> acc - end - _other -> acc - end - end) - end - - defp collect_itemprops(html) do - Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) -> - case tag do - {"meta", values, []} -> - name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1) - content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) - case name do - "article:" <> key -> - Map.put(acc, name, content) - _other -> acc - end - _other -> acc - end - end) - end - end diff --git a/lib/lsg_irc/link_plugin/github.ex b/lib/lsg_irc/link_plugin/github.ex index c7444c2..19be89b 100644 --- a/lib/lsg_irc/link_plugin/github.ex +++ b/lib/lsg_irc/link_plugin/github.ex @@ -1,6 +1,7 @@ defmodule LSG.IRC.LinkPlugin.Github do @behaviour LSG.IRC.LinkPlugin + @impl true def match(uri = %URI{host: "github.com", path: path}, _) do case String.split(path, "/") do ["", user, repo] -> @@ -12,6 +13,10 @@ defmodule LSG.IRC.LinkPlugin.Github do def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + + @impl true def expand(_uri, %{user: user, repo: repo}, _opts) do case HTTPoison.get("https://api.github.com/repos/#{user}/#{repo}") do {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> diff --git a/lib/lsg_irc/link_plugin/html.ex b/lib/lsg_irc/link_plugin/html.ex new file mode 100644 index 0000000..e0e4229 --- /dev/null +++ b/lib/lsg_irc/link_plugin/html.ex @@ -0,0 +1,106 @@ +defmodule LSG.IRC.LinkPlugin.HTML do + @behaviour LSG.IRC.LinkPlugin + + @impl true + def match(_, _), do: false + + @impl true + def post_match(_url, "text/html"<>_, _header, _opts) do + {:body, nil} + end + def post_match(_, _, _, _), do: false + + @impl true + def post_expand(url, body, _params, _opts) do + html = Floki.parse(body) + title = collect_title(html) + opengraph = collect_open_graph(html) + itemprops = collect_itemprops(html) + text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do + sitename = if sn = Map.get(opengraph, "site_name") do + "#{sn}" + else + "" + end + paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do + "" + else + "[paywall] " + end + section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do + ": #{section}" + else + "" + end + date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do + {:ok, date, _} -> + "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " + _ -> + "" + end + uri = URI.parse(url) + + prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}" + prefix = unless prefix == "" do + "#{prefix} — " + else + "" + end + [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}")) + else + clean_text(title) + end + {:ok, text} + end + + defp collect_title(html) do + case Floki.find(html, "title") do + [{"title", [], [title]} | _] -> + String.trim(title) + _ -> + nil + end + end + + defp collect_open_graph(html) do + Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) -> + case tag do + {"meta", values, []} -> + name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1) + content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) + case name do + "og:" <> key -> + Map.put(acc, key, content) + "article:"<>_ -> + Map.put(acc, name, content) + _other -> acc + end + _other -> acc + end + end) + end + + defp collect_itemprops(html) do + Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) -> + case tag do + {"meta", values, []} -> + name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1) + content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) + case name do + "article:" <> key -> + Map.put(acc, name, content) + _other -> acc + end + _other -> acc + end + end) + end + + defp clean_text(text) do + text + |> String.replace("\n", " ") + |> HtmlEntities.decode() + end + + +end diff --git a/lib/lsg_irc/link_plugin/imgur.ex b/lib/lsg_irc/link_plugin/imgur.ex index 1b8173f..41b7e08 100644 --- a/lib/lsg_irc/link_plugin/imgur.ex +++ b/lib/lsg_irc/link_plugin/imgur.ex @@ -15,6 +15,7 @@ defmodule LSG.IRC.LinkPlugin.Imgur do ``` """ + @impl true def match(uri = %URI{host: "imgur.com", path: "/a/"<>album_id}, _) do {true, %{album_id: album_id}} end @@ -27,6 +28,9 @@ defmodule LSG.IRC.LinkPlugin.Imgur do end def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + def expand(_uri, %{album_id: album_id}, opts) do expand_imgur_album(album_id, opts) end diff --git a/lib/lsg_irc/link_plugin/pdf.ex b/lib/lsg_irc/link_plugin/pdf.ex new file mode 100644 index 0000000..8c4869c --- /dev/null +++ b/lib/lsg_irc/link_plugin/pdf.ex @@ -0,0 +1,39 @@ +defmodule LSG.IRC.LinkPlugin.PDF do + require Logger + @behaviour LSG.IRC.LinkPlugin + + @impl true + def match(_, _), do: false + + @impl true + def post_match(_url, "application/pdf"<>_, _header, _opts) do + {:file, nil} + end + + def post_match(_, _, _, _), do: false + + @impl true + def post_expand(url, file, _, _) do + case System.cmd("pdftitle", ["-p", file]) do + {text, 0} -> + text = text + |> String.trim() + + if text == "" do + :error + else + basename = Path.basename(url, ".pdf") + text = "[#{basename}] " <> text + |> String.split("\n") + {:ok, text} + end + {_, 127} -> + Logger.error("dependency `pdftitle` is missing, please install it: `pip3 install pdftitle`.") + :error + {error, code} -> + Logger.warn("command `pdftitle` exited with status code #{code}:\n#{inspect error}") + :error + end + end + +end diff --git a/lib/lsg_irc/link_plugin/reddit.ex b/lib/lsg_irc/link_plugin/reddit.ex new file mode 100644 index 0000000..6fc1723 --- /dev/null +++ b/lib/lsg_irc/link_plugin/reddit.ex @@ -0,0 +1,119 @@ +defmodule LSG.IRC.LinkPlugin.Reddit do + @behaviour LSG.IRC.LinkPlugin + + @impl true + def match(uri = %URI{host: "reddit.com", path: path}, _) do + case String.split(path, "/") do + ["", "r", sub, "comments", post_id, _slug] -> + {true, %{mode: :post, path: path, sub: sub, post_id: post_id}} + ["", "r", sub, "comments", post_id, _slug, ""] -> + {true, %{mode: :post, path: path, sub: sub, post_id: post_id}} + ["", "r", sub, ""] -> + {true, %{mode: :sub, path: path, sub: sub}} + ["", "r", sub] -> + {true, %{mode: :sub, path: path, sub: sub}} +# ["", "u", user] -> +# {true, %{mode: :user, path: path, user: user}} + _ -> + false + end + end + + def match(uri = %URI{host: host, path: path}, opts) do + if String.ends_with?(host, ".reddit.com") do + match(%URI{uri | host: "reddit.com"}, opts) + else + false + end + end + + @impl true + def post_match(_, _, _, _), do: false + + @impl true + def expand(_, %{mode: :sub, sub: sub}, _opts) do + url = "https://api.reddit.com/r/#{sub}/about" + case HTTPoison.get(url) do + {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> + sr = Jason.decode!(body) + |> Map.get("data") + |> IO.inspect(limit: :infinity) + description = Map.get(sr, "public_description")||Map.get(sr, "description", "") + |> String.split("\n") + |> List.first() + name = if title = Map.get(sr, "title") do + Map.get(sr, "display_name_prefixed") <> ": " <> title + else + Map.get(sr, "display_name_prefixed") + end + nsfw = if Map.get(sr, "over18") do + "[NSFW] " + else + "" + end + quarantine = if Map.get(sr, "quarantine") do + "[Quarantined] " + else + "" + end + count = "#{Map.get(sr, "subscribers")} subscribers, #{Map.get(sr, "active_user_count")} active" + preview = "#{quarantine}#{nsfw}#{name} — #{description} (#{count})" + {:ok, preview} + _ -> + :error + end + end + + def expand(_uri, %{mode: :post, path: path, sub: sub, post_id: post_id}, _opts) do + case HTTPoison.get("https://api.reddit.com#{path}?sr_detail=true") do + {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> + json = Jason.decode!(body) + op = List.first(json) + |> Map.get("data") + |> Map.get("children") + |> List.first() + |> Map.get("data") + |> IO.inspect(limit: :infinity) + sr = get_in(op, ["sr_detail", "display_name_prefixed"]) + {self?, url} = if Map.get(op, "selftext") == "" do + {false, Map.get(op, "url")} + else + {true, nil} + end + + self_str = if(self?, do: "text", else: url) + up = Map.get(op, "ups") + down = Map.get(op, "downs") + comments = Map.get(op, "num_comments") + nsfw = if Map.get(op, "over_18") do + "[NSFW] " + else + "" + end + state = cond do + Map.get(op, "hidden") -> "hidden" + Map.get(op, "archived") -> "archived" + Map.get(op, "locked") -> "locked" + Map.get(op, "quarantine") -> "quarantined" + Map.get(op, "removed_by") || Map.get(op, "removed_by_category") -> "removed" + Map.get(op, "banned_by") -> "banned" + Map.get(op, "pinned") -> "pinned" + Map.get(op, "stickied") -> "stickied" + true -> nil + end + flair = if flair = Map.get(op, "link_flair_text") do + "[#{flair}] " + else + "" + end + title = "#{nsfw}#{sr}: #{flair}#{Map.get(op, "title")}" + state_str = if(state, do: "#{state}, ") + content = "by u/#{Map.get(op, "author")} - #{state_str}#{up} up, #{down} down, #{comments} comments - #{self_str}" + + {:ok, [title, content]} + err -> + :error + end + end + +end diff --git a/lib/lsg_irc/link_plugin/reddit_plugin.ex b/lib/lsg_irc/link_plugin/reddit_plugin.ex deleted file mode 100644 index a7f5235..0000000 --- a/lib/lsg_irc/link_plugin/reddit_plugin.ex +++ /dev/null @@ -1,114 +0,0 @@ -defmodule LSG.IRC.LinkPlugin.Reddit do - @behaviour LSG.IRC.LinkPlugin - - def match(uri = %URI{host: "reddit.com", path: path}, _) do - case String.split(path, "/") do - ["", "r", sub, "comments", post_id, _slug] -> - {true, %{mode: :post, path: path, sub: sub, post_id: post_id}} - ["", "r", sub, "comments", post_id, _slug, ""] -> - {true, %{mode: :post, path: path, sub: sub, post_id: post_id}} - ["", "r", sub, ""] -> - {true, %{mode: :sub, path: path, sub: sub}} - ["", "r", sub] -> - {true, %{mode: :sub, path: path, sub: sub}} -# ["", "u", user] -> -# {true, %{mode: :user, path: path, user: user}} - _ -> - false - end - end - - def match(uri = %URI{host: host, path: path}, opts) do - if String.ends_with?(host, ".reddit.com") do - match(%URI{uri | host: "reddit.com"}, opts) - else - false - end - end - - def expand(_, %{mode: :sub, sub: sub}, _opts) do - url = "https://api.reddit.com/r/#{sub}/about" - case HTTPoison.get(url) do - {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> - sr = Jason.decode!(body) - |> Map.get("data") - |> IO.inspect(limit: :infinity) - description = Map.get(sr, "public_description")||Map.get(sr, "description", "") - |> String.split("\n") - |> List.first() - name = if title = Map.get(sr, "title") do - Map.get(sr, "display_name_prefixed") <> ": " <> title - else - Map.get(sr, "display_name_prefixed") - end - nsfw = if Map.get(sr, "over18") do - "[NSFW] " - else - "" - end - quarantine = if Map.get(sr, "quarantine") do - "[Quarantined] " - else - "" - end - count = "#{Map.get(sr, "subscribers")} subscribers, #{Map.get(sr, "active_user_count")} active" - preview = "#{quarantine}#{nsfw}#{name} — #{description} (#{count})" - {:ok, preview} - _ -> - :error - end - end - - def expand(_uri, %{mode: :post, path: path, sub: sub, post_id: post_id}, _opts) do - case HTTPoison.get("https://api.reddit.com#{path}?sr_detail=true") do - {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> - json = Jason.decode!(body) - op = List.first(json) - |> Map.get("data") - |> Map.get("children") - |> List.first() - |> Map.get("data") - |> IO.inspect(limit: :infinity) - sr = get_in(op, ["sr_detail", "display_name_prefixed"]) - {self?, url} = if Map.get(op, "selftext") == "" do - {false, Map.get(op, "url")} - else - {true, nil} - end - - self_str = if(self?, do: "text", else: url) - up = Map.get(op, "ups") - down = Map.get(op, "downs") - comments = Map.get(op, "num_comments") - nsfw = if Map.get(op, "over_18") do - "[NSFW] " - else - "" - end - state = cond do - Map.get(op, "hidden") -> "hidden" - Map.get(op, "archived") -> "archived" - Map.get(op, "locked") -> "locked" - Map.get(op, "quarantine") -> "quarantined" - Map.get(op, "removed_by") || Map.get(op, "removed_by_category") -> "removed" - Map.get(op, "banned_by") -> "banned" - Map.get(op, "pinned") -> "pinned" - Map.get(op, "stickied") -> "stickied" - true -> nil - end - flair = if flair = Map.get(op, "link_flair_text") do - "[#{flair}] " - else - "" - end - title = "#{nsfw}#{sr}: #{flair}#{Map.get(op, "title")}" - state_str = if(state, do: "#{state}, ") - content = "by u/#{Map.get(op, "author")} - #{state_str}#{up} up, #{down} down, #{comments} comments - #{self_str}" - - {:ok, [title, content]} - err -> - :error - end - end - -end diff --git a/lib/lsg_irc/link_plugin/twitter.ex b/lib/lsg_irc/link_plugin/twitter.ex index a6b6e29..e462384 100644 --- a/lib/lsg_irc/link_plugin/twitter.ex +++ b/lib/lsg_irc/link_plugin/twitter.ex @@ -33,6 +33,9 @@ defmodule LSG.IRC.LinkPlugin.Twitter do def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + def expand(_uri, %{status_id: status_id}, opts) do expand_tweet(ExTwitter.show(status_id, tweet_mode: "extended"), opts) end diff --git a/lib/lsg_irc/link_plugin/youtube.ex b/lib/lsg_irc/link_plugin/youtube.ex index ea4f213..b68a86f 100644 --- a/lib/lsg_irc/link_plugin/youtube.ex +++ b/lib/lsg_irc/link_plugin/youtube.ex @@ -16,6 +16,7 @@ defmodule LSG.IRC.LinkPlugin.YouTube do * `invidious`: Add a link to invidio.us. Default: true. """ + @impl true def match(uri = %URI{host: yt, path: "/watch", query: "v="<>video_id}, _opts) when yt in ["youtube.com", "www.youtube.com"] do {true, %{video_id: video_id}} end @@ -26,7 +27,10 @@ defmodule LSG.IRC.LinkPlugin.YouTube do def match(_, _), do: false + @impl true + def post_match(_, _, _, _), do: false + @impl true def expand(uri, %{video_id: video_id}, opts) do key = Application.get_env(:lsg, :youtube)[:api_key] params = %{ -- cgit v1.2.3