diff options
Diffstat (limited to '')
-rw-r--r-- | lib/lsg_irc/link_plugin/html.ex | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/lib/lsg_irc/link_plugin/html.ex b/lib/lsg_irc/link_plugin/html.ex new file mode 100644 index 0000000..e0e4229 --- /dev/null +++ b/lib/lsg_irc/link_plugin/html.ex @@ -0,0 +1,106 @@ +defmodule LSG.IRC.LinkPlugin.HTML do + @behaviour LSG.IRC.LinkPlugin + + @impl true + def match(_, _), do: false + + @impl true + def post_match(_url, "text/html"<>_, _header, _opts) do + {:body, nil} + end + def post_match(_, _, _, _), do: false + + @impl true + def post_expand(url, body, _params, _opts) do + html = Floki.parse(body) + title = collect_title(html) + opengraph = collect_open_graph(html) + itemprops = collect_itemprops(html) + text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do + sitename = if sn = Map.get(opengraph, "site_name") do + "#{sn}" + else + "" + end + paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do + "" + else + "[paywall] " + end + section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do + ": #{section}" + else + "" + end + date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do + {:ok, date, _} -> + "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " + _ -> + "" + end + uri = URI.parse(url) + + prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}" + prefix = unless prefix == "" do + "#{prefix} — " + else + "" + end + [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}")) + else + clean_text(title) + end + {:ok, text} + end + + defp collect_title(html) do + case Floki.find(html, "title") do + [{"title", [], [title]} | _] -> + String.trim(title) + _ -> + nil + end + end + + defp collect_open_graph(html) do + Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) -> + case tag do + {"meta", values, []} -> + name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1) + content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) + case name do + "og:" <> key -> + Map.put(acc, key, content) + "article:"<>_ -> + Map.put(acc, name, content) + _other -> acc + end + _other -> acc + end + end) + end + + defp collect_itemprops(html) do + Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) -> + case tag do + {"meta", values, []} -> + name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1) + content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) + case name do + "article:" <> key -> + Map.put(acc, name, content) + _other -> acc + end + _other -> acc + end + end) + end + + defp clean_text(text) do + text + |> String.replace("\n", " ") + |> HtmlEntities.decode() + end + + +end |