diff options
Diffstat (limited to 'src/idna.erl')
-rw-r--r-- | src/idna.erl | 228 |
1 files changed, 119 insertions, 109 deletions
diff --git a/src/idna.erl b/src/idna.erl index b5ba1f95a..a23bed001 100644 --- a/src/idna.erl +++ b/src/idna.erl @@ -25,172 +25,182 @@ %%%---------------------------------------------------------------------- -module(idna). + -author('alexey@process-one.net'). %%-compile(export_all). -export([domain_utf8_to_ascii/1, - domain_ucs2_to_ascii/1]). + domain_ucs2_to_ascii/1, + utf8_to_ucs2/1]). +-spec domain_utf8_to_ascii(binary()) -> false | binary(). domain_utf8_to_ascii(Domain) -> domain_ucs2_to_ascii(utf8_to_ucs2(Domain)). utf8_to_ucs2(S) -> - utf8_to_ucs2(S, ""). + list_to_binary(utf8_to_ucs2(binary_to_list(S), "")). -utf8_to_ucs2([], R) -> - lists:reverse(R); -utf8_to_ucs2([C | S], R) when C < 16#80 -> +utf8_to_ucs2([], R) -> lists:reverse(R); +utf8_to_ucs2([C | S], R) when C < 128 -> utf8_to_ucs2(S, [C | R]); -utf8_to_ucs2([C1, C2 | S], R) when C1 < 16#E0 -> - utf8_to_ucs2(S, [((C1 band 16#1F) bsl 6) bor - (C2 band 16#3F) | R]); -utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 16#F0 -> - utf8_to_ucs2(S, [((C1 band 16#0F) bsl 12) bor - ((C2 band 16#3F) bsl 6) bor - (C3 band 16#3F) | R]). +utf8_to_ucs2([C1, C2 | S], R) when C1 < 224 -> + utf8_to_ucs2(S, [C1 band 31 bsl 6 bor C2 band 63 | R]); +utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 240 -> + utf8_to_ucs2(S, + [C1 band 15 bsl 12 bor (C2 band 63 bsl 6) bor C3 band 63 + | R]). +-spec domain_ucs2_to_ascii(binary()) -> false | binary(). domain_ucs2_to_ascii(Domain) -> - case catch domain_ucs2_to_ascii1(Domain) of - {'EXIT', _Reason} -> - false; - Res -> - Res + case catch domain_ucs2_to_ascii1(binary_to_list(Domain)) of + {'EXIT', _Reason} -> false; + Res -> iolist_to_binary(Res) end. domain_ucs2_to_ascii1(Domain) -> - Parts = string:tokens(Domain, [16#002E, 16#3002, 16#FF0E, 16#FF61]), - ASCIIParts = lists:map(fun(P) -> - to_ascii(P) - end, Parts), - string:strip(lists:flatmap(fun(P) -> [$. | P] end, ASCIIParts), + Parts = string:tokens(Domain, + [46, 12290, 65294, 65377]), + ASCIIParts = lists:map(fun (P) -> to_ascii(P) end, + Parts), + string:strip(lists:flatmap(fun (P) -> [$. | P] end, + ASCIIParts), left, $.). %% Domain names are already nameprep'ed in ejabberd, so we skiping this step to_ascii(Name) -> - false = lists:any( - fun(C) when - ( 0 =< C) and (C =< 16#2C) or - (16#2E =< C) and (C =< 16#2F) or - (16#3A =< C) and (C =< 16#40) or - (16#5B =< C) and (C =< 16#60) or - (16#7B =< C) and (C =< 16#7F) -> - true; - (_) -> - false - end, Name), + false = lists:any(fun (C) + when (0 =< C) and (C =< 44) or + (46 =< C) and (C =< 47) + or (58 =< C) and (C =< 64) + or (91 =< C) and (C =< 96) + or (123 =< C) and (C =< 127) -> + true; + (_) -> false + end, + Name), case Name of - [H | _] when H /= $- -> - true = lists:last(Name) /= $- + [H | _] when H /= $- -> true = lists:last(Name) /= $- end, - ASCIIName = case lists:any(fun(C) -> C > 16#7F end, Name) of - true -> - true = case Name of - "xn--" ++ _ -> false; - _ -> true - end, - "xn--" ++ punycode_encode(Name); - false -> - Name + ASCIIName = case lists:any(fun (C) -> C > 127 end, Name) + of + true -> + true = case Name of + "xn--" ++ _ -> false; + _ -> true + end, + "xn--" ++ punycode_encode(Name); + false -> Name end, L = length(ASCIIName), true = (1 =< L) and (L =< 63), ASCIIName. - %%% PUNYCODE (RFC3492) --define(BASE, 36). --define(TMIN, 1). --define(TMAX, 26). --define(SKEW, 38). --define(DAMP, 700). +-define(BASE, 36). + +-define(TMIN, 1). + +-define(TMAX, 26). + +-define(SKEW, 38). + +-define(DAMP, 700). + -define(INITIAL_BIAS, 72). --define(INITIAL_N, 128). + +-define(INITIAL_N, 128). punycode_encode(Input) -> - N = ?INITIAL_N, + N = (?INITIAL_N), Delta = 0, - Bias = ?INITIAL_BIAS, - Basic = lists:filter(fun(C) -> C =< 16#7f end, Input), - NonBasic = lists:filter(fun(C) -> C > 16#7f end, Input), + Bias = (?INITIAL_BIAS), + Basic = lists:filter(fun (C) -> C =< 127 end, Input), + NonBasic = lists:filter(fun (C) -> C > 127 end, Input), L = length(Input), B = length(Basic), SNonBasic = lists:usort(NonBasic), - Output1 = if - B > 0 -> Basic ++ "-"; - true -> "" + Output1 = if B > 0 -> Basic ++ "-"; + true -> "" end, - Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N, Delta, Bias, ""), + Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N, + Delta, Bias, ""), Output1 ++ Output2. - -punycode_encode1(Input, [M | SNonBasic], B, H, L, N, Delta, Bias, Out) - when H < L -> +punycode_encode1(Input, [M | SNonBasic], B, H, L, N, + Delta, Bias, Out) + when H < L -> Delta1 = Delta + (M - N) * (H + 1), - % let n = m - {NewDelta, NewBias, NewH, NewOut} = - lists:foldl( - fun(C, {ADelta, ABias, AH, AOut}) -> - if - C < M -> - {ADelta + 1, ABias, AH, AOut}; - C == M -> - NewOut = punycode_encode_delta(ADelta, ABias, AOut), - NewBias = adapt(ADelta, H + 1, H == B), - {0, NewBias, AH + 1, NewOut}; - true -> - {ADelta, ABias, AH, AOut} - end - end, {Delta1, Bias, H, Out}, Input), - punycode_encode1( - Input, SNonBasic, B, NewH, L, M + 1, NewDelta + 1, NewBias, NewOut); - -punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N, _Delta, _Bias, Out) -> + % let n = m + {NewDelta, NewBias, NewH, NewOut} = lists:foldl(fun (C, + {ADelta, ABias, AH, + AOut}) -> + if C < M -> + {ADelta + 1, + ABias, AH, + AOut}; + C == M -> + NewOut = + punycode_encode_delta(ADelta, + ABias, + AOut), + NewBias = + adapt(ADelta, + H + + 1, + H + == + B), + {0, NewBias, + AH + 1, + NewOut}; + true -> + {ADelta, + ABias, AH, + AOut} + end + end, + {Delta1, Bias, H, Out}, + Input), + punycode_encode1(Input, SNonBasic, B, NewH, L, M + 1, + NewDelta + 1, NewBias, NewOut); +punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N, + _Delta, _Bias, Out) -> lists:reverse(Out). - punycode_encode_delta(Delta, Bias, Out) -> punycode_encode_delta(Delta, Bias, Out, ?BASE). punycode_encode_delta(Delta, Bias, Out, K) -> - T = if - K =< Bias -> ?TMIN; - K >= Bias + ?TMAX -> ?TMAX; - true -> K - Bias + T = if K =< Bias -> ?TMIN; + K >= Bias + (?TMAX) -> ?TMAX; + true -> K - Bias end, - if - Delta < T -> - [codepoint(Delta) | Out]; - true -> - C = T + ((Delta - T) rem (?BASE - T)), - punycode_encode_delta((Delta - T) div (?BASE - T), Bias, - [codepoint(C) | Out], K + ?BASE) + if Delta < T -> [codepoint(Delta) | Out]; + true -> + C = T + (Delta - T) rem ((?BASE) - T), + punycode_encode_delta((Delta - T) div ((?BASE) - T), + Bias, [codepoint(C) | Out], K + (?BASE)) end. - adapt(Delta, NumPoints, FirstTime) -> - Delta1 = if - FirstTime -> Delta div ?DAMP; - true -> Delta div 2 + Delta1 = if FirstTime -> Delta div (?DAMP); + true -> Delta div 2 end, - Delta2 = Delta1 + (Delta1 div NumPoints), + Delta2 = Delta1 + Delta1 div NumPoints, adapt1(Delta2, 0). adapt1(Delta, K) -> - if - Delta > ((?BASE - ?TMIN) * ?TMAX) div 2 -> - adapt1(Delta div (?BASE - ?TMIN), K + ?BASE); - true -> - K + (((?BASE - ?TMIN + 1) * Delta) div (Delta + ?SKEW)) + if Delta > ((?BASE) - (?TMIN)) * (?TMAX) div 2 -> + adapt1(Delta div ((?BASE) - (?TMIN)), K + (?BASE)); + true -> + K + + ((?BASE) - (?TMIN) + 1) * Delta div (Delta + (?SKEW)) end. - codepoint(C) -> - if - (0 =< C) and (C =< 25) -> - C + 97; - (26 =< C) and (C =< 35) -> - C + 22 + if (0 =< C) and (C =< 25) -> C + 97; + (26 =< C) and (C =< 35) -> C + 22 end. |