summaryrefslogtreecommitdiff
path: root/lib/plugins/link/html.ex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/plugins/link/html.ex')
-rw-r--r--lib/plugins/link/html.ex168
1 files changed, 98 insertions, 70 deletions
diff --git a/lib/plugins/link/html.ex b/lib/plugins/link/html.ex
index a941aac..5899ed5 100644
--- a/lib/plugins/link/html.ex
+++ b/lib/plugins/link/html.ex
@@ -5,102 +5,130 @@ defmodule Nola.Plugins.Link.HTML do
def match(_, _), do: false
@impl true
- def post_match(_url, "text/html"<>_, _header, _opts) do
- {:body, nil}
- end
+ def post_match(_url, "text/html" <> _, _header, _opts), do: {:body, nil}
def post_match(_, _, _, _), do: false
@impl true
def post_expand(url, body, _params, _opts) do
html = Floki.parse(body)
- title = collect_title(html)
opengraph = collect_open_graph(html)
- itemprops = collect_itemprops(html)
- text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do
- sitename = if sn = Map.get(opengraph, "site_name") do
- "#{sn}"
- else
- ""
- end
- paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do
- ""
- else
- "[paywall] "
- end
- section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do
- ": #{section}"
- else
- ""
- end
- date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do
- {:ok, date, _} ->
- "#{Timex.format!(date, "%d/%m/%y", :strftime)}. "
- _ ->
- ""
- end
- uri = URI.parse(url)
-
- prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}"
- prefix = unless prefix == "" do
- "#{prefix} — "
- else
- ""
- end
- [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ Nola.Irc.Message.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}"))
+
+ text = if has_sufficient_opengraph_data?(opengraph) do
+ generate_text_from_opengraph(url, html, opengraph)
else
- clean_text(title)
+ clean_text(collect_title(html))
end
+
{:ok, text}
end
+ defp has_sufficient_opengraph_data?(opengraph) do
+ Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description")
+ end
+
+ defp generate_text_from_opengraph(url, html, opengraph) do
+ itemprops = collect_itemprops(html)
+ prefix = collect_prefix_and_site_name(url, opengraph, itemprops)
+ description = collect_description(opengraph, itemprops, 500)
+
+ [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ description
+ end
+
defp collect_title(html) do
case Floki.find(html, "title") do
- [{"title", [], [title]} | _] ->
- String.trim(title)
- _ ->
- nil
+ [{"title", [], [title]} | _] -> String.trim(title)
+ _ -> ""
end
end
defp collect_open_graph(html) do
- Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) ->
- case tag do
- {"meta", values, []} ->
- name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1)
- content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
- case name do
- "og:" <> key ->
- Map.put(acc, key, content)
- "article:"<>_ ->
- Map.put(acc, name, content)
- _other -> acc
- end
- _other -> acc
- end
- end)
+ Floki.find(html, "head meta")
+ |> Enum.reduce(%{}, &extract_meta_tag/2)
end
+ defp extract_meta_tag({"meta", values, []}, acc) do
+ with {_, name} <- List.keyfind(values, "property", 0, {nil, nil}),
+ {_, content} <- List.keyfind(values, "content", 0, {nil, nil}),
+ true <- is_valid_meta_tag?(name) do
+ Map.put(acc, strip_prefix(name), content)
+ else
+ _ -> acc
+ end
+ end
+ defp extract_meta_tag(_, acc), do: acc
+
+ defp is_valid_meta_tag?(name) do
+ String.starts_with?(name, "og:") || String.starts_with?(name, "article:")
+ end
+
+ defp is_valid_meta_tag?(nil) do
+ false
+ end
+
+ defp strip_prefix("og:" <> key), do: key
+ defp strip_prefix(other), do: other
+
defp collect_itemprops(html) do
- Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) ->
- case tag do
- {"meta", values, []} ->
- name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1)
- content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
- case name do
- "article:" <> key ->
- Map.put(acc, name, content)
- _other -> acc
- end
- _other -> acc
- end
- end)
+ Floki.find(html, "[itemprop]")
+ |> Enum.reduce(%{}, &extract_itemprop/2)
end
+ defp extract_itemprop({"meta", values, []}, acc) do
+ with {_, name} <- List.keyfind(values, "itemprop", 0, {nil, nil}),
+ {_, content} <- List.keyfind(values, "content", 0, {nil, nil}),
+ true <- String.starts_with?(name, "article:") do
+ Map.put(acc, name, content)
+ else
+ _ -> acc
+ end
+ end
+ defp extract_itemprop(_, acc), do: acc
+
+ defp collect_prefix_and_site_name(url, opengraph, itemprops) do
+ uri = URI.parse(url)
+ site_name = Map.get(opengraph, "site_name", uri.host)
+ paywall_status = get_paywall_status(opengraph, itemprops)
+ section = get_section(opengraph, itemprops)
+
+ prefix = "#{paywall_status}#{site_name}#{section}"
+ if prefix == "", do: "", else: "#{prefix} — "
+ end
+
+ defp get_paywall_status(opengraph, itemprops) do
+ content_tier = Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free"))
+ if content_tier == "free", do: "", else: "[paywall] "
+ end
+
+ defp get_section(opengraph, itemprops) do
+ section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section"))
+ if section, do: ": #{section}", else: ""
+ end
+
+ defp collect_description(opengraph, itemprops, max_length) do
+ date = get_formatted_date(opengraph, itemprops)
+ description = transform_description(Map.get(opengraph, "description"), max_length)
+
+ Nola.Irc.Message.splitlong(clean_text("#{date}#{description}"))
+ end
+
+ defp get_formatted_date(opengraph, itemprops) do
+ published_time = Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))
+ case DateTime.from_iso8601(published_time) do
+ {:ok, date, _} -> "#{Timex.format!(date, "%d/%m/%y", :strftime)}. "
+ _ -> ""
+ end
+ end
+
+ # TODO: Swap with AI description instead of truncating.
+ defp transform_description(string, length) when is_binary(string) do
+ if String.length(string) >= length, do: String.truncate(string, length), else: string
+ end
+ defp transform_description(nil, _), do: nil
+
defp clean_text(text) do
text
|> String.replace("\n", " ")
|> HtmlEntities.decode()
end
-
end