diff options
author | Alexey Shchepin <alexey@process-one.net> | 2004-04-10 19:15:02 +0000 |
---|---|---|
committer | Alexey Shchepin <alexey@process-one.net> | 2004-04-10 19:15:02 +0000 |
commit | 6f600f1a3789fa00a6f77e947acf7e169aa500ae (patch) | |
tree | f7541a08fccc5e7652b51c58a9a8cc25a70ae53a /src/idna.erl | |
parent | * (all): Updated win32 stuff (thanks to Sergei Golovan) (diff) |
* src/idna.erl: Support for IDNA (RFC3490)
* src/ejabberd_s2s_out.erl: Likewise
* src/xml.erl: element_to_string/1 and crypt/1 now returns deep
list
* src/mod_muc/mod_muc_room.erl (add_message_to_history): Replaced
string:len with lists:flatlength
SVN Revision: 222
Diffstat (limited to '')
-rw-r--r-- | src/idna.erl | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/src/idna.erl b/src/idna.erl new file mode 100644 index 00000000..cba1dbc9 --- /dev/null +++ b/src/idna.erl @@ -0,0 +1,179 @@ +%%%---------------------------------------------------------------------- +%%% File : idna.erl +%%% Author : Alexey Shchepin <alexey@sevcom.net> +%%% Purpose : Support for IDNA (RFC3490) +%%% Created : 10 Apr 2004 by Alexey Shchepin <alexey@sevcom.net> +%%% Id : $Id$ +%%%---------------------------------------------------------------------- + +-module(idna). +-author('alexey@sevcom.net'). +-vsn('$Revision$ '). + +%-compile(export_all). +-export([domain_utf8_to_ascii/1, + domain_ucs2_to_ascii/1]). + + +domain_utf8_to_ascii(Domain) -> + domain_ucs2_to_ascii(utf8_to_ucs2(Domain)). + +utf8_to_ucs2(S) -> + utf8_to_ucs2(S, ""). + +utf8_to_ucs2([], R) -> + lists:reverse(R); +utf8_to_ucs2([C | S], R) when C < 16#80 -> + utf8_to_ucs2(S, [C | R]); +utf8_to_ucs2([C1, C2 | S], R) when C1 < 16#E0 -> + utf8_to_ucs2(S, [((C1 band 16#1F) bsl 6) bor + (C2 band 16#3F) | R]); +utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 16#F0 -> + utf8_to_ucs2(S, [((C1 band 16#0F) bsl 12) bor + ((C2 band 16#3F) bsl 6) bor + (C3 band 16#3F) | R]). + + +domain_ucs2_to_ascii(Domain) -> + case catch domain_ucs2_to_ascii1(Domain) of + {'EXIT', _Reason} -> + false; + Res -> + Res + end. + +domain_ucs2_to_ascii1(Domain) -> + Parts = string:tokens(Domain, [16#002E, 16#3002, 16#FF0E, 16#FF61]), + ASCIIParts = lists:map(fun(P) -> + to_ascii(P) + end, Parts), + string:strip(lists:flatmap(fun(P) -> [$. | P] end, ASCIIParts), + left, $.). + +% Domain names are already nameprep'ed in ejabberd, so we skiping this step +to_ascii(Name) -> + false = lists:any( + fun(C) when + ( 0 =< C) and (C =< 16#2C) or + (16#2E =< C) and (C =< 16#2F) or + (16#3A =< C) and (C =< 16#40) or + (16#5B =< C) and (C =< 16#60) or + (16#7B =< C) and (C =< 16#7F) -> + true; + (_) -> + false + end, Name), + case Name of + [H | _] when H /= $- -> + true = lists:last(Name) /= $- + end, + ASCIIName = case lists:any(fun(C) -> C > 16#7F end, Name) of + true -> + true = case Name of + "xn--" ++ _ -> false; + _ -> true + end, + "xn--" ++ punycode_encode(Name); + false -> + Name + end, + L = length(ASCIIName), + true = (1 =< L) and (L =< 63), + ASCIIName. + + +%%% PUNYCODE (RFC3492) + +-define(BASE, 36). +-define(TMIN, 1). +-define(TMAX, 26). +-define(SKEW, 38). +-define(DAMP, 700). +-define(INITIAL_BIAS, 72). +-define(INITIAL_N, 128). + +punycode_encode(Input) -> + N = ?INITIAL_N, + Delta = 0, + Bias = ?INITIAL_BIAS, + Basic = lists:filter(fun(C) -> C =< 16#7f end, Input), + NonBasic = lists:filter(fun(C) -> C > 16#7f end, Input), + L = length(Input), + B = length(Basic), + SNonBasic = lists:usort(NonBasic), + Output1 = if + B > 0 -> Basic ++ "-"; + true -> "" + end, + Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N, Delta, Bias, ""), + Output1 ++ Output2. + + +punycode_encode1(Input, [M | SNonBasic], B, H, L, N, Delta, Bias, Out) + when H < L -> + Delta1 = Delta + (M - N) * (H + 1), + % let n = m + {NewDelta, NewBias, NewH, NewOut} = + lists:foldl( + fun(C, {ADelta, ABias, AH, AOut}) -> + if + C < M -> + {ADelta + 1, ABias, AH, AOut}; + C == M -> + NewOut = punycode_encode_delta(ADelta, ABias, AOut), + NewBias = adapt(ADelta, H + 1, H == B), + {0, NewBias, AH + 1, NewOut}; + true -> + {ADelta, ABias, AH, AOut} + end + end, {Delta1, Bias, H, Out}, Input), + punycode_encode1( + Input, SNonBasic, B, NewH, L, M + 1, NewDelta + 1, NewBias, NewOut); + +punycode_encode1(Input, SNonBasic, B, H, L, N, Delta, Bias, Out) -> + lists:reverse(Out). + + +punycode_encode_delta(Delta, Bias, Out) -> + punycode_encode_delta(Delta, Bias, Out, ?BASE). + +punycode_encode_delta(Delta, Bias, Out, K) -> + T = if + K =< Bias -> ?TMIN; + K >= Bias + ?TMAX -> ?TMAX; + true -> K - Bias + end, + if + Delta < T -> + [codepoint(Delta) | Out]; + true -> + C = T + ((Delta - T) rem (?BASE - T)), + punycode_encode_delta((Delta - T) div (?BASE - T), Bias, + [codepoint(C) | Out], K + ?BASE) + end. + + +adapt(Delta, NumPoints, FirstTime) -> + Delta1 = if + FirstTime -> Delta div ?DAMP; + true -> Delta div 2 + end, + Delta2 = Delta1 + (Delta1 div NumPoints), + adapt1(Delta2, 0). + +adapt1(Delta, K) -> + if + Delta > ((?BASE - ?TMIN) * ?TMAX) div 2 -> + adapt1(Delta div (?BASE - ?TMIN), K + ?BASE); + true -> + K + (((?BASE - ?TMIN + 1) * Delta) div (Delta + ?SKEW)) + end. + + +codepoint(C) -> + if + (0 =< C) and (C =< 25) -> + C + 97; + (26 =< C) and (C =< 35) -> + C + 22 + end. |