summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJordan Bracco <href@random.sh>2025-06-26 17:36:17 +0200
committerJordan Bracco <href@random.sh>2025-06-26 17:36:17 +0200
commit0b07abb95f6fb6f536bc277c7e3f026f66d81f3e (patch)
tree536b42760e5b951bec01c7d69a868b74c10ac437
parentlink: cache and inhibit (diff)
link: quirky
-rw-r--r--lib/plugins/link.ex6
-rw-r--r--lib/plugins/link/html.ex3
-rw-r--r--lib/plugins/link/quirks.ex26
3 files changed, 33 insertions, 2 deletions
diff --git a/lib/plugins/link.ex b/lib/plugins/link.ex
index bdc0fe9..0dca6ae 100644
--- a/lib/plugins/link.ex
+++ b/lib/plugins/link.ex
@@ -37,6 +37,7 @@ defmodule Nola.Plugins.Link do
def short_irc_doc, do: false
def irc_doc, do: @ircdoc
require Logger
+ alias __MODULE__.Quirks
alias __MODULE__.Store
alias __MODULE__.Scraper
@@ -306,9 +307,10 @@ defmodule Nola.Plugins.Link do
def expand_default(acc = [uri = %URI{scheme: scheme} | _]) when scheme in ["http", "https"] do
Logger.debug("link: expanding #{uri} with default")
+ uri = Quirks.uri(uri)
+
headers = [
- {"user-agent",
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"}
+ {"user-agent", Quirks.user_agent(uri.host)}
]
proxy = Keyword.get(Application.get_env(:nola, __MODULE__, []), :proxy, nil)
diff --git a/lib/plugins/link/html.ex b/lib/plugins/link/html.ex
index 78f3192..aa78810 100644
--- a/lib/plugins/link/html.ex
+++ b/lib/plugins/link/html.ex
@@ -144,6 +144,9 @@ defmodule Nola.Plugins.Link.HTML do
defp clean_text(text) do
text
|> String.replace("\n", " ")
+ |> String.replace("<br>", " ")
+ |> String.replace("<br/>", " ")
+ |> String.replace("<br />", " ")
|> HtmlEntities.decode()
end
end
diff --git a/lib/plugins/link/quirks.ex b/lib/plugins/link/quirks.ex
new file mode 100644
index 0000000..af259a2
--- /dev/null
+++ b/lib/plugins/link/quirks.ex
@@ -0,0 +1,26 @@
+defmodule Nola.Plugins.Link.Quirks do
+ # def uri(%URI{host: "x.com"} = uri) do
+ # %URI{uri | host: "vxtwitter.com"}
+ # end
+
+ def uri(url) do
+ url
+ end
+
+ def user_agent(host)
+ when host in [
+ "x.com",
+ "vxtwitter.com",
+ "fxtwitter.com",
+ "instagram.com",
+ "facebook.com",
+ "xnstagram.com",
+ "ddinstagram.com"
+ ] do
+ "TelegramBot (like TwitterBot)"
+ end
+
+ def user_agent(_host) do
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
+ end
+end