summaryrefslogtreecommitdiff
path: root/lib/lsg_irc/link_plugin/html.ex
blob: e0e4229f04147b03f86bde34787b2095d4b6fef3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
defmodule LSG.IRC.LinkPlugin.HTML do
  @behaviour LSG.IRC.LinkPlugin

  @impl true
  def match(_, _), do: false

  @impl true
  def post_match(_url, "text/html"<>_, _header, _opts) do
    {:body, nil}
  end
  def post_match(_, _, _, _), do: false

  @impl true
  def post_expand(url, body, _params, _opts) do
    html = Floki.parse(body)
    title = collect_title(html)
    opengraph = collect_open_graph(html)
    itemprops = collect_itemprops(html)
    text = if Map.has_key?(opengraph, "title") && Map.has_key?(opengraph, "description") do
      sitename = if sn = Map.get(opengraph, "site_name") do
        "#{sn}"
      else
        ""
      end
      paywall? = if Map.get(opengraph, "article:content_tier", Map.get(itemprops, "article:content_tier", "free")) == "free" do
        ""
      else
        "[paywall] "
      end
      section = if section = Map.get(opengraph, "article:section", Map.get(itemprops, "article:section", nil)) do
        ": #{section}"
      else
        ""
      end
      date = case DateTime.from_iso8601(Map.get(opengraph, "article:published_time", Map.get(itemprops, "article:published_time", ""))) do
        {:ok, date, _} ->
          "#{Timex.format!(date, "%d/%m/%y", :strftime)}. "
        _ ->
          ""
      end
      uri = URI.parse(url)

      prefix = "#{paywall?}#{Map.get(opengraph, "site_name", uri.host)}#{section}"
      prefix = unless prefix == "" do
        "#{prefix} — "
      else
          ""
      end
      [clean_text("#{prefix}#{Map.get(opengraph, "title")}")] ++ IRC.splitlong(clean_text("#{date}#{Map.get(opengraph, "description")}"))
    else
      clean_text(title)
    end
    {:ok, text}
  end

  defp collect_title(html) do
    case Floki.find(html, "title") do
    [{"title", [], [title]} | _] ->
      String.trim(title)
    _ ->
      nil
    end
  end

  defp collect_open_graph(html) do
    Enum.reduce(Floki.find(html, "head meta"), %{}, fn(tag, acc) ->
      case tag do
        {"meta", values, []} ->
          name = List.keyfind(values, "property", 0, {nil, nil}) |> elem(1)
          content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
          case name do
           "og:" <> key ->
              Map.put(acc, key, content)
            "article:"<>_ ->
              Map.put(acc, name, content)
            _other -> acc
          end
        _other -> acc
      end
    end)
  end

  defp collect_itemprops(html) do
    Enum.reduce(Floki.find(html, "[itemprop]"), %{}, fn(tag, acc) ->
      case tag do
        {"meta", values, []} ->
          name = List.keyfind(values, "itemprop", 0, {nil, nil}) |> elem(1)
          content = List.keyfind(values, "content", 0, {nil, nil}) |> elem(1)
          case name do
           "article:" <> key ->
              Map.put(acc, name, content)
            _other -> acc
          end
        _other -> acc
      end
    end)
  end

  defp clean_text(text) do
    text
    |> String.replace("\n", " ")
    |> HtmlEntities.decode()
  end


end