1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
defmodule LSG.IRC.LinkPlugin.HTML do
@behaviour LSG.IRC.LinkPlugin
@impl true
def match(_, _), do: false
@impl true
def post_match(_url, "text/html"<>_, _header, _opts) do
{:body, nil}
end
def post_match(_, _, _, _), do: false
@impl true
def post_expand(url, body, _params, _opts) do
html = Floki.parse(body)
title = collect_title(html)
opengraph = collect_open_graph(html)
itemprops = collect_itemprops(html)
text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do
sitename = if sn = Map.get(opengraph, "site_name") do
"#{sn}"
else
""
end
paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do
""
else
"[paywall] "
end
section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do
": #{section}"
else
""
end
date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do
{:ok, date, _} ->
"#{Timex.format!(date, "%d/%m/%y", :strftime)}. "
_ ->
""
end
uri = URI.parse(url)
prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}"
prefix = unless prefix == "" do
"#{prefix} — "
else
""
end
[clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}"))
else
clean_text(title)
end
{:ok, text}
end
defp collect_title(html) do
case Floki.find(html, "title") do
[{"title", [], [title]} | _] ->
String.trim(title)
_ ->
nil
end
end
defp collect_open_graph(html) do
Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) ->
case tag do
{"meta", values, []} ->
name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1)
content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
case name do
"og:" <> key ->
Map.put(acc, key, content)
"article:"<>_ ->
Map.put(acc, name, content)
_other -> acc
end
_other -> acc
end
end)
end
defp collect_itemprops(html) do
Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) ->
case tag do
{"meta", values, []} ->
name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1)
content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
case name do
"article:" <> key ->
Map.put(acc, name, content)
_other -> acc
end
_other -> acc
end
end)
end
defp clean_text(text) do
text
|> String.replace("\n", " ")
|> HtmlEntities.decode()
end
end
|