summaryrefslogtreecommitdiff
path: root/src/idna.erl
diff options
context:
space:
mode:
authorAlexey Shchepin <alexey@process-one.net>2004-04-10 19:15:02 +0000
committerAlexey Shchepin <alexey@process-one.net>2004-04-10 19:15:02 +0000
commit6f600f1a3789fa00a6f77e947acf7e169aa500ae (patch)
treef7541a08fccc5e7652b51c58a9a8cc25a70ae53a /src/idna.erl
parent* (all): Updated win32 stuff (thanks to Sergei Golovan) (diff)
* src/idna.erl: Support for IDNA (RFC3490)
* src/ejabberd_s2s_out.erl: Likewise * src/xml.erl: element_to_string/1 and crypt/1 now returns deep list * src/mod_muc/mod_muc_room.erl (add_message_to_history): Replaced string:len with lists:flatlength SVN Revision: 222
Diffstat (limited to '')
-rw-r--r--src/idna.erl179
1 files changed, 179 insertions, 0 deletions
diff --git a/src/idna.erl b/src/idna.erl
new file mode 100644
index 00000000..cba1dbc9
--- /dev/null
+++ b/src/idna.erl
@@ -0,0 +1,179 @@
+%%%----------------------------------------------------------------------
+%%% File : idna.erl
+%%% Author : Alexey Shchepin <alexey@sevcom.net>
+%%% Purpose : Support for IDNA (RFC3490)
+%%% Created : 10 Apr 2004 by Alexey Shchepin <alexey@sevcom.net>
+%%% Id : $Id$
+%%%----------------------------------------------------------------------
+
+-module(idna).
+-author('alexey@sevcom.net').
+-vsn('$Revision$ ').
+
+%-compile(export_all).
+-export([domain_utf8_to_ascii/1,
+ domain_ucs2_to_ascii/1]).
+
+
+domain_utf8_to_ascii(Domain) ->
+ domain_ucs2_to_ascii(utf8_to_ucs2(Domain)).
+
+utf8_to_ucs2(S) ->
+ utf8_to_ucs2(S, "").
+
+utf8_to_ucs2([], R) ->
+ lists:reverse(R);
+utf8_to_ucs2([C | S], R) when C < 16#80 ->
+ utf8_to_ucs2(S, [C | R]);
+utf8_to_ucs2([C1, C2 | S], R) when C1 < 16#E0 ->
+ utf8_to_ucs2(S, [((C1 band 16#1F) bsl 6) bor
+ (C2 band 16#3F) | R]);
+utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 16#F0 ->
+ utf8_to_ucs2(S, [((C1 band 16#0F) bsl 12) bor
+ ((C2 band 16#3F) bsl 6) bor
+ (C3 band 16#3F) | R]).
+
+
+domain_ucs2_to_ascii(Domain) ->
+ case catch domain_ucs2_to_ascii1(Domain) of
+ {'EXIT', _Reason} ->
+ false;
+ Res ->
+ Res
+ end.
+
+domain_ucs2_to_ascii1(Domain) ->
+ Parts = string:tokens(Domain, [16#002E, 16#3002, 16#FF0E, 16#FF61]),
+ ASCIIParts = lists:map(fun(P) ->
+ to_ascii(P)
+ end, Parts),
+ string:strip(lists:flatmap(fun(P) -> [$. | P] end, ASCIIParts),
+ left, $.).
+
+% Domain names are already nameprep'ed in ejabberd, so we skiping this step
+to_ascii(Name) ->
+ false = lists:any(
+ fun(C) when
+ ( 0 =< C) and (C =< 16#2C) or
+ (16#2E =< C) and (C =< 16#2F) or
+ (16#3A =< C) and (C =< 16#40) or
+ (16#5B =< C) and (C =< 16#60) or
+ (16#7B =< C) and (C =< 16#7F) ->
+ true;
+ (_) ->
+ false
+ end, Name),
+ case Name of
+ [H | _] when H /= $- ->
+ true = lists:last(Name) /= $-
+ end,
+ ASCIIName = case lists:any(fun(C) -> C > 16#7F end, Name) of
+ true ->
+ true = case Name of
+ "xn--" ++ _ -> false;
+ _ -> true
+ end,
+ "xn--" ++ punycode_encode(Name);
+ false ->
+ Name
+ end,
+ L = length(ASCIIName),
+ true = (1 =< L) and (L =< 63),
+ ASCIIName.
+
+
+%%% PUNYCODE (RFC3492)
+
+-define(BASE, 36).
+-define(TMIN, 1).
+-define(TMAX, 26).
+-define(SKEW, 38).
+-define(DAMP, 700).
+-define(INITIAL_BIAS, 72).
+-define(INITIAL_N, 128).
+
+punycode_encode(Input) ->
+ N = ?INITIAL_N,
+ Delta = 0,
+ Bias = ?INITIAL_BIAS,
+ Basic = lists:filter(fun(C) -> C =< 16#7f end, Input),
+ NonBasic = lists:filter(fun(C) -> C > 16#7f end, Input),
+ L = length(Input),
+ B = length(Basic),
+ SNonBasic = lists:usort(NonBasic),
+ Output1 = if
+ B > 0 -> Basic ++ "-";
+ true -> ""
+ end,
+ Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N, Delta, Bias, ""),
+ Output1 ++ Output2.
+
+
+punycode_encode1(Input, [M | SNonBasic], B, H, L, N, Delta, Bias, Out)
+ when H < L ->
+ Delta1 = Delta + (M - N) * (H + 1),
+ % let n = m
+ {NewDelta, NewBias, NewH, NewOut} =
+ lists:foldl(
+ fun(C, {ADelta, ABias, AH, AOut}) ->
+ if
+ C < M ->
+ {ADelta + 1, ABias, AH, AOut};
+ C == M ->
+ NewOut = punycode_encode_delta(ADelta, ABias, AOut),
+ NewBias = adapt(ADelta, H + 1, H == B),
+ {0, NewBias, AH + 1, NewOut};
+ true ->
+ {ADelta, ABias, AH, AOut}
+ end
+ end, {Delta1, Bias, H, Out}, Input),
+ punycode_encode1(
+ Input, SNonBasic, B, NewH, L, M + 1, NewDelta + 1, NewBias, NewOut);
+
+punycode_encode1(Input, SNonBasic, B, H, L, N, Delta, Bias, Out) ->
+ lists:reverse(Out).
+
+
+punycode_encode_delta(Delta, Bias, Out) ->
+ punycode_encode_delta(Delta, Bias, Out, ?BASE).
+
+punycode_encode_delta(Delta, Bias, Out, K) ->
+ T = if
+ K =< Bias -> ?TMIN;
+ K >= Bias + ?TMAX -> ?TMAX;
+ true -> K - Bias
+ end,
+ if
+ Delta < T ->
+ [codepoint(Delta) | Out];
+ true ->
+ C = T + ((Delta - T) rem (?BASE - T)),
+ punycode_encode_delta((Delta - T) div (?BASE - T), Bias,
+ [codepoint(C) | Out], K + ?BASE)
+ end.
+
+
+adapt(Delta, NumPoints, FirstTime) ->
+ Delta1 = if
+ FirstTime -> Delta div ?DAMP;
+ true -> Delta div 2
+ end,
+ Delta2 = Delta1 + (Delta1 div NumPoints),
+ adapt1(Delta2, 0).
+
+adapt1(Delta, K) ->
+ if
+ Delta > ((?BASE - ?TMIN) * ?TMAX) div 2 ->
+ adapt1(Delta div (?BASE - ?TMIN), K + ?BASE);
+ true ->
+ K + (((?BASE - ?TMIN + 1) * Delta) div (Delta + ?SKEW))
+ end.
+
+
+codepoint(C) ->
+ if
+ (0 =< C) and (C =< 25) ->
+ C + 97;
+ (26 =< C) and (C =< 35) ->
+ C + 22
+ end.