summaryrefslogtreecommitdiff
path: root/lib/lsg_irc/link_plugin/html.ex
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/lsg_irc/link_plugin/html.ex106
1 files changed, 106 insertions, 0 deletions
diff --git a/lib/lsg_irc/link_plugin/html.ex b/lib/lsg_irc/link_plugin/html.ex
new file mode 100644
index 0000000..e0e4229
--- /dev/null
+++ b/lib/lsg_irc/link_plugin/html.ex
@@ -0,0 +1,106 @@
+defmodule LSG.IRC.LinkPlugin.HTML do
+ @behaviour LSG.IRC.LinkPlugin
+
+ @impl true
+ def match(_, _), do: false
+
+ @impl true
+ def post_match(_url, "text/html"<>_, _header, _opts) do
+ {:body, nil}
+ end
+ def post_match(_, _, _, _), do: false
+
+ @impl true
+ def post_expand(url, body, _params, _opts) do
+ html = Floki.parse(body)
+ title = collect_title(html)
+ opengraph = collect_open_graph(html)
+ itemprops = collect_itemprops(html)
+ text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do
+ sitename = if sn = Map.get(opengraph, "site_name") do
+ "#{sn}"
+ else
+ ""
+ end
+ paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do
+ ""
+ else
+ "[paywall] "
+ end
+ section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do
+ ": #{section}"
+ else
+ ""
+ end
+ date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do
+ {:ok, date, _} ->
+ "#{Timex.format!(date, "%d/%m/%y", :strftime)}. "
+ _ ->
+ ""
+ end
+ uri = URI.parse(url)
+
+ prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}"
+ prefix = unless prefix == "" do
+ "#{prefix} — "
+ else
+ ""
+ end
+ [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}"))
+ else
+ clean_text(title)
+ end
+ {:ok, text}
+ end
+
+ defp collect_title(html) do
+ case Floki.find(html, "title") do
+ [{"title", [], [title]} | _] ->
+ String.trim(title)
+ _ ->
+ nil
+ end
+ end
+
+ defp collect_open_graph(html) do
+ Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) ->
+ case tag do
+ {"meta", values, []} ->
+ name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1)
+ content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
+ case name do
+ "og:" <> key ->
+ Map.put(acc, key, content)
+ "article:"<>_ ->
+ Map.put(acc, name, content)
+ _other -> acc
+ end
+ _other -> acc
+ end
+ end)
+ end
+
+ defp collect_itemprops(html) do
+ Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) ->
+ case tag do
+ {"meta", values, []} ->
+ name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1)
+ content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
+ case name do
+ "article:" <> key ->
+ Map.put(acc, name, content)
+ _other -> acc
+ end
+ _other -> acc
+ end
+ end)
+ end
+
+ defp clean_text(text) do
+ text
+ |> String.replace("\n", " ")
+ |> HtmlEntities.decode()
+ end
+
+
+end