aboutsummaryrefslogtreecommitdiff
path: root/src/ejabberd_idna.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/ejabberd_idna.erl')
-rw-r--r--src/ejabberd_idna.erl224
1 files changed, 224 insertions, 0 deletions
diff --git a/src/ejabberd_idna.erl b/src/ejabberd_idna.erl
new file mode 100644
index 000000000..bee3f87ed
--- /dev/null
+++ b/src/ejabberd_idna.erl
@@ -0,0 +1,224 @@
+%%%----------------------------------------------------------------------
+%%% File : ejabberd_idna.erl
+%%% Author : Alexey Shchepin <alexey@process-one.net>
+%%% Purpose : Support for IDNA (RFC3490)
+%%% Created : 10 Apr 2004 by Alexey Shchepin <alexey@process-one.net>
+%%%
+%%%
+%%% ejabberd, Copyright (C) 2002-2015 ProcessOne
+%%%
+%%% This program is free software; you can redistribute it and/or
+%%% modify it under the terms of the GNU General Public License as
+%%% published by the Free Software Foundation; either version 2 of the
+%%% License, or (at your option) any later version.
+%%%
+%%% This program is distributed in the hope that it will be useful,
+%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
+%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+%%% General Public License for more details.
+%%%
+%%% You should have received a copy of the GNU General Public License along
+%%% with this program; if not, write to the Free Software Foundation, Inc.,
+%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+%%%
+%%%----------------------------------------------------------------------
+
+-module(ejabberd_idna).
+
+-author('alexey@process-one.net').
+
+-export([domain_utf8_to_ascii/1,
+ domain_ucs2_to_ascii/1,
+ utf8_to_ucs2/1]).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
+
+-spec domain_utf8_to_ascii(binary()) -> false | binary().
+
+domain_utf8_to_ascii(Domain) ->
+ domain_ucs2_to_ascii(utf8_to_ucs2(Domain)).
+
+utf8_to_ucs2(S) ->
+ utf8_to_ucs2(binary_to_list(S), "").
+
+utf8_to_ucs2([], R) -> lists:reverse(R);
+utf8_to_ucs2([C | S], R) when C < 128 ->
+ utf8_to_ucs2(S, [C | R]);
+utf8_to_ucs2([C1, C2 | S], R) when C1 < 224 ->
+ utf8_to_ucs2(S, [C1 band 31 bsl 6 bor C2 band 63 | R]);
+utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 240 ->
+ utf8_to_ucs2(S,
+ [C1 band 15 bsl 12 bor (C2 band 63 bsl 6) bor C3 band 63
+ | R]).
+
+-spec domain_ucs2_to_ascii(list()) -> false | binary().
+
+domain_ucs2_to_ascii(Domain) ->
+ case catch domain_ucs2_to_ascii1(Domain) of
+ {'EXIT', _Reason} -> false;
+ Res -> iolist_to_binary(Res)
+ end.
+
+domain_ucs2_to_ascii1(Domain) ->
+ Parts = string:tokens(Domain,
+ [46, 12290, 65294, 65377]),
+ ASCIIParts = lists:map(fun (P) -> to_ascii(P) end,
+ Parts),
+ string:strip(lists:flatmap(fun (P) -> [$. | P] end,
+ ASCIIParts),
+ left, $.).
+
+%% Domain names are already nameprep'ed in ejabberd, so we skiping this step
+to_ascii(Name) ->
+ false = lists:any(fun (C)
+ when (0 =< C) and (C =< 44) or
+ (46 =< C) and (C =< 47)
+ or (58 =< C) and (C =< 64)
+ or (91 =< C) and (C =< 96)
+ or (123 =< C) and (C =< 127) ->
+ true;
+ (_) -> false
+ end,
+ Name),
+ case Name of
+ [H | _] when H /= $- -> true = lists:last(Name) /= $-
+ end,
+ ASCIIName = case lists:any(fun (C) -> C > 127 end, Name)
+ of
+ true ->
+ true = case Name of
+ "xn--" ++ _ -> false;
+ _ -> true
+ end,
+ "xn--" ++ punycode_encode(Name);
+ false -> Name
+ end,
+ L = length(ASCIIName),
+ true = (1 =< L) and (L =< 63),
+ ASCIIName.
+
+%%% PUNYCODE (RFC3492)
+
+-define(BASE, 36).
+
+-define(TMIN, 1).
+
+-define(TMAX, 26).
+
+-define(SKEW, 38).
+
+-define(DAMP, 700).
+
+-define(INITIAL_BIAS, 72).
+
+-define(INITIAL_N, 128).
+
+punycode_encode(Input) ->
+ N = (?INITIAL_N),
+ Delta = 0,
+ Bias = (?INITIAL_BIAS),
+ Basic = lists:filter(fun (C) -> C =< 127 end, Input),
+ NonBasic = lists:filter(fun (C) -> C > 127 end, Input),
+ L = length(Input),
+ B = length(Basic),
+ SNonBasic = lists:usort(NonBasic),
+ Output1 = if B > 0 -> Basic ++ "-";
+ true -> ""
+ end,
+ Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N,
+ Delta, Bias, ""),
+ Output1 ++ Output2.
+
+punycode_encode1(Input, [M | SNonBasic], B, H, L, N,
+ Delta, Bias, Out)
+ when H < L ->
+ Delta1 = Delta + (M - N) * (H + 1),
+ % let n = m
+ {NewDelta, NewBias, NewH, NewOut} = lists:foldl(fun (C,
+ {ADelta, ABias, AH,
+ AOut}) ->
+ if C < M ->
+ {ADelta + 1,
+ ABias, AH,
+ AOut};
+ C == M ->
+ NewOut =
+ punycode_encode_delta(ADelta,
+ ABias,
+ AOut),
+ NewBias =
+ adapt(ADelta,
+ H +
+ 1,
+ H
+ ==
+ B),
+ {0, NewBias,
+ AH + 1,
+ NewOut};
+ true ->
+ {ADelta,
+ ABias, AH,
+ AOut}
+ end
+ end,
+ {Delta1, Bias, H, Out},
+ Input),
+ punycode_encode1(Input, SNonBasic, B, NewH, L, M + 1,
+ NewDelta + 1, NewBias, NewOut);
+punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N,
+ _Delta, _Bias, Out) ->
+ lists:reverse(Out).
+
+punycode_encode_delta(Delta, Bias, Out) ->
+ punycode_encode_delta(Delta, Bias, Out, ?BASE).
+
+punycode_encode_delta(Delta, Bias, Out, K) ->
+ T = if K =< Bias -> ?TMIN;
+ K >= Bias + (?TMAX) -> ?TMAX;
+ true -> K - Bias
+ end,
+ if Delta < T -> [codepoint(Delta) | Out];
+ true ->
+ C = T + (Delta - T) rem ((?BASE) - T),
+ punycode_encode_delta((Delta - T) div ((?BASE) - T),
+ Bias, [codepoint(C) | Out], K + (?BASE))
+ end.
+
+adapt(Delta, NumPoints, FirstTime) ->
+ Delta1 = if FirstTime -> Delta div (?DAMP);
+ true -> Delta div 2
+ end,
+ Delta2 = Delta1 + Delta1 div NumPoints,
+ adapt1(Delta2, 0).
+
+adapt1(Delta, K) ->
+ if Delta > ((?BASE) - (?TMIN)) * (?TMAX) div 2 ->
+ adapt1(Delta div ((?BASE) - (?TMIN)), K + (?BASE));
+ true ->
+ K +
+ ((?BASE) - (?TMIN) + 1) * Delta div (Delta + (?SKEW))
+ end.
+
+codepoint(C) ->
+ if (0 =< C) and (C =< 25) -> C + 97;
+ (26 =< C) and (C =< 35) -> C + 22
+ end.
+
+%%%===================================================================
+%%% Unit tests
+%%%===================================================================
+-ifdef(TEST).
+
+acsii_test() ->
+ ?assertEqual(<<"test.org">>, domain_utf8_to_ascii(<<"test.org">>)).
+
+utf8_test() ->
+ ?assertEqual(
+ <<"xn--d1acufc.xn--p1ai">>,
+ domain_utf8_to_ascii(
+ <<208,180,208,190,208,188,208,181,208,189,46,209,128,209,132>>)).
+
+-endif.