aboutsummaryrefslogtreecommitdiff
path: root/src/idna.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/idna.erl')
-rw-r--r--src/idna.erl228
1 files changed, 119 insertions, 109 deletions
diff --git a/src/idna.erl b/src/idna.erl
index b5ba1f95a..a23bed001 100644
--- a/src/idna.erl
+++ b/src/idna.erl
@@ -25,172 +25,182 @@
%%%----------------------------------------------------------------------
-module(idna).
+
-author('alexey@process-one.net').
%%-compile(export_all).
-export([domain_utf8_to_ascii/1,
- domain_ucs2_to_ascii/1]).
+ domain_ucs2_to_ascii/1,
+ utf8_to_ucs2/1]).
+-spec domain_utf8_to_ascii(binary()) -> false | binary().
domain_utf8_to_ascii(Domain) ->
domain_ucs2_to_ascii(utf8_to_ucs2(Domain)).
utf8_to_ucs2(S) ->
- utf8_to_ucs2(S, "").
+ list_to_binary(utf8_to_ucs2(binary_to_list(S), "")).
-utf8_to_ucs2([], R) ->
- lists:reverse(R);
-utf8_to_ucs2([C | S], R) when C < 16#80 ->
+utf8_to_ucs2([], R) -> lists:reverse(R);
+utf8_to_ucs2([C | S], R) when C < 128 ->
utf8_to_ucs2(S, [C | R]);
-utf8_to_ucs2([C1, C2 | S], R) when C1 < 16#E0 ->
- utf8_to_ucs2(S, [((C1 band 16#1F) bsl 6) bor
- (C2 band 16#3F) | R]);
-utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 16#F0 ->
- utf8_to_ucs2(S, [((C1 band 16#0F) bsl 12) bor
- ((C2 band 16#3F) bsl 6) bor
- (C3 band 16#3F) | R]).
+utf8_to_ucs2([C1, C2 | S], R) when C1 < 224 ->
+ utf8_to_ucs2(S, [C1 band 31 bsl 6 bor C2 band 63 | R]);
+utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 240 ->
+ utf8_to_ucs2(S,
+ [C1 band 15 bsl 12 bor (C2 band 63 bsl 6) bor C3 band 63
+ | R]).
+-spec domain_ucs2_to_ascii(binary()) -> false | binary().
domain_ucs2_to_ascii(Domain) ->
- case catch domain_ucs2_to_ascii1(Domain) of
- {'EXIT', _Reason} ->
- false;
- Res ->
- Res
+ case catch domain_ucs2_to_ascii1(binary_to_list(Domain)) of
+ {'EXIT', _Reason} -> false;
+ Res -> iolist_to_binary(Res)
end.
domain_ucs2_to_ascii1(Domain) ->
- Parts = string:tokens(Domain, [16#002E, 16#3002, 16#FF0E, 16#FF61]),
- ASCIIParts = lists:map(fun(P) ->
- to_ascii(P)
- end, Parts),
- string:strip(lists:flatmap(fun(P) -> [$. | P] end, ASCIIParts),
+ Parts = string:tokens(Domain,
+ [46, 12290, 65294, 65377]),
+ ASCIIParts = lists:map(fun (P) -> to_ascii(P) end,
+ Parts),
+ string:strip(lists:flatmap(fun (P) -> [$. | P] end,
+ ASCIIParts),
left, $.).
%% Domain names are already nameprep'ed in ejabberd, so we skiping this step
to_ascii(Name) ->
- false = lists:any(
- fun(C) when
- ( 0 =< C) and (C =< 16#2C) or
- (16#2E =< C) and (C =< 16#2F) or
- (16#3A =< C) and (C =< 16#40) or
- (16#5B =< C) and (C =< 16#60) or
- (16#7B =< C) and (C =< 16#7F) ->
- true;
- (_) ->
- false
- end, Name),
+ false = lists:any(fun (C)
+ when (0 =< C) and (C =< 44) or
+ (46 =< C) and (C =< 47)
+ or (58 =< C) and (C =< 64)
+ or (91 =< C) and (C =< 96)
+ or (123 =< C) and (C =< 127) ->
+ true;
+ (_) -> false
+ end,
+ Name),
case Name of
- [H | _] when H /= $- ->
- true = lists:last(Name) /= $-
+ [H | _] when H /= $- -> true = lists:last(Name) /= $-
end,
- ASCIIName = case lists:any(fun(C) -> C > 16#7F end, Name) of
- true ->
- true = case Name of
- "xn--" ++ _ -> false;
- _ -> true
- end,
- "xn--" ++ punycode_encode(Name);
- false ->
- Name
+ ASCIIName = case lists:any(fun (C) -> C > 127 end, Name)
+ of
+ true ->
+ true = case Name of
+ "xn--" ++ _ -> false;
+ _ -> true
+ end,
+ "xn--" ++ punycode_encode(Name);
+ false -> Name
end,
L = length(ASCIIName),
true = (1 =< L) and (L =< 63),
ASCIIName.
-
%%% PUNYCODE (RFC3492)
--define(BASE, 36).
--define(TMIN, 1).
--define(TMAX, 26).
--define(SKEW, 38).
--define(DAMP, 700).
+-define(BASE, 36).
+
+-define(TMIN, 1).
+
+-define(TMAX, 26).
+
+-define(SKEW, 38).
+
+-define(DAMP, 700).
+
-define(INITIAL_BIAS, 72).
--define(INITIAL_N, 128).
+
+-define(INITIAL_N, 128).
punycode_encode(Input) ->
- N = ?INITIAL_N,
+ N = (?INITIAL_N),
Delta = 0,
- Bias = ?INITIAL_BIAS,
- Basic = lists:filter(fun(C) -> C =< 16#7f end, Input),
- NonBasic = lists:filter(fun(C) -> C > 16#7f end, Input),
+ Bias = (?INITIAL_BIAS),
+ Basic = lists:filter(fun (C) -> C =< 127 end, Input),
+ NonBasic = lists:filter(fun (C) -> C > 127 end, Input),
L = length(Input),
B = length(Basic),
SNonBasic = lists:usort(NonBasic),
- Output1 = if
- B > 0 -> Basic ++ "-";
- true -> ""
+ Output1 = if B > 0 -> Basic ++ "-";
+ true -> ""
end,
- Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N, Delta, Bias, ""),
+ Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N,
+ Delta, Bias, ""),
Output1 ++ Output2.
-
-punycode_encode1(Input, [M | SNonBasic], B, H, L, N, Delta, Bias, Out)
- when H < L ->
+punycode_encode1(Input, [M | SNonBasic], B, H, L, N,
+ Delta, Bias, Out)
+ when H < L ->
Delta1 = Delta + (M - N) * (H + 1),
- % let n = m
- {NewDelta, NewBias, NewH, NewOut} =
- lists:foldl(
- fun(C, {ADelta, ABias, AH, AOut}) ->
- if
- C < M ->
- {ADelta + 1, ABias, AH, AOut};
- C == M ->
- NewOut = punycode_encode_delta(ADelta, ABias, AOut),
- NewBias = adapt(ADelta, H + 1, H == B),
- {0, NewBias, AH + 1, NewOut};
- true ->
- {ADelta, ABias, AH, AOut}
- end
- end, {Delta1, Bias, H, Out}, Input),
- punycode_encode1(
- Input, SNonBasic, B, NewH, L, M + 1, NewDelta + 1, NewBias, NewOut);
-
-punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N, _Delta, _Bias, Out) ->
+ % let n = m
+ {NewDelta, NewBias, NewH, NewOut} = lists:foldl(fun (C,
+ {ADelta, ABias, AH,
+ AOut}) ->
+ if C < M ->
+ {ADelta + 1,
+ ABias, AH,
+ AOut};
+ C == M ->
+ NewOut =
+ punycode_encode_delta(ADelta,
+ ABias,
+ AOut),
+ NewBias =
+ adapt(ADelta,
+ H +
+ 1,
+ H
+ ==
+ B),
+ {0, NewBias,
+ AH + 1,
+ NewOut};
+ true ->
+ {ADelta,
+ ABias, AH,
+ AOut}
+ end
+ end,
+ {Delta1, Bias, H, Out},
+ Input),
+ punycode_encode1(Input, SNonBasic, B, NewH, L, M + 1,
+ NewDelta + 1, NewBias, NewOut);
+punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N,
+ _Delta, _Bias, Out) ->
lists:reverse(Out).
-
punycode_encode_delta(Delta, Bias, Out) ->
punycode_encode_delta(Delta, Bias, Out, ?BASE).
punycode_encode_delta(Delta, Bias, Out, K) ->
- T = if
- K =< Bias -> ?TMIN;
- K >= Bias + ?TMAX -> ?TMAX;
- true -> K - Bias
+ T = if K =< Bias -> ?TMIN;
+ K >= Bias + (?TMAX) -> ?TMAX;
+ true -> K - Bias
end,
- if
- Delta < T ->
- [codepoint(Delta) | Out];
- true ->
- C = T + ((Delta - T) rem (?BASE - T)),
- punycode_encode_delta((Delta - T) div (?BASE - T), Bias,
- [codepoint(C) | Out], K + ?BASE)
+ if Delta < T -> [codepoint(Delta) | Out];
+ true ->
+ C = T + (Delta - T) rem ((?BASE) - T),
+ punycode_encode_delta((Delta - T) div ((?BASE) - T),
+ Bias, [codepoint(C) | Out], K + (?BASE))
end.
-
adapt(Delta, NumPoints, FirstTime) ->
- Delta1 = if
- FirstTime -> Delta div ?DAMP;
- true -> Delta div 2
+ Delta1 = if FirstTime -> Delta div (?DAMP);
+ true -> Delta div 2
end,
- Delta2 = Delta1 + (Delta1 div NumPoints),
+ Delta2 = Delta1 + Delta1 div NumPoints,
adapt1(Delta2, 0).
adapt1(Delta, K) ->
- if
- Delta > ((?BASE - ?TMIN) * ?TMAX) div 2 ->
- adapt1(Delta div (?BASE - ?TMIN), K + ?BASE);
- true ->
- K + (((?BASE - ?TMIN + 1) * Delta) div (Delta + ?SKEW))
+ if Delta > ((?BASE) - (?TMIN)) * (?TMAX) div 2 ->
+ adapt1(Delta div ((?BASE) - (?TMIN)), K + (?BASE));
+ true ->
+ K +
+ ((?BASE) - (?TMIN) + 1) * Delta div (Delta + (?SKEW))
end.
-
codepoint(C) ->
- if
- (0 =< C) and (C =< 25) ->
- C + 97;
- (26 =< C) and (C =< 35) ->
- C + 22
+ if (0 =< C) and (C =< 25) -> C + 97;
+ (26 =< C) and (C =< 35) -> C + 22
end.