defmodule LSG.IRC.LinkPlugin.HTML do @behaviour LSG.IRC.LinkPlugin @impl true def match(_, _), do: false @impl true def post_match(_url, "text/html"<>_, _header, _opts) do {:body, nil} end def post_match(_, _, _, _), do: false @impl true def post_expand(url, body, _params, _opts) do html = Floki.parse(body) title = collect_title(html) opengraph = collect_open_graph(html) itemprops = collect_itemprops(html) text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do sitename = if sn = Map.get(opengraph, "site_name") do "#{sn}" else "" end paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do "" else "[paywall] " end section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do ": #{section}" else "" end date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do {:ok, date, _} -> "#{Timex.format!(date, "%d/%m/%y", :strftime)}. " _ -> "" end uri = URI.parse(url) prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}" prefix = unless prefix == "" do "#{prefix} — " else "" end [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}")) else clean_text(title) end {:ok, text} end defp collect_title(html) do case Floki.find(html, "title") do [{"title", [], [title]} | _] -> String.trim(title) _ -> nil end end defp collect_open_graph(html) do Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) -> case tag do {"meta", values, []} -> name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1) content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) case name do "og:" <> key -> Map.put(acc, key, content) "article:"<>_ -> Map.put(acc, name, content) _other -> acc end _other -> acc end end) end defp collect_itemprops(html) do Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) -> case tag do {"meta", values, []} -> name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1) content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1) case name do "article:" <> key -> Map.put(acc, name, content) _other -> acc end _other -> acc end end) end defp clean_text(text) do text |> String.replace("\n", " ") |> HtmlEntities.decode() end end