- domain_ucs2_to_ascii/1,
- utf8_to_ucs2/1]).
--spec domain_utf8_to_ascii(binary()) -> false | binary().
-domain_utf8_to_ascii(Domain) ->
- domain_ucs2_to_ascii(utf8_to_ucs2(Domain)).
-utf8_to_ucs2(S) ->
- utf8_to_ucs2(binary_to_list(S), "").
-utf8_to_ucs2([], R) -> lists:reverse(R);
-utf8_to_ucs2([C | S], R) when C < 128 ->
- utf8_to_ucs2(S, [C | R]);
-utf8_to_ucs2([C1, C2 | S], R) when C1 < 224 ->
- utf8_to_ucs2(S, [C1 band 31 bsl 6 bor C2 band 63 | R]);
-utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 240 ->
- utf8_to_ucs2(S,
- [C1 band 15 bsl 12 bor (C2 band 63 bsl 6) bor C3 band 63
- | R]).
--spec domain_ucs2_to_ascii(list()) -> false | binary().
-domain_ucs2_to_ascii(Domain) ->
- case catch domain_ucs2_to_ascii1(Domain) of
- {'EXIT', _Reason} -> false;
- Res -> iolist_to_binary(Res)
- end.
-domain_ucs2_to_ascii1(Domain) ->
- Parts = string:tokens(Domain,
- [46, 12290, 65294, 65377]),
- ASCIIParts = lists:map(fun (P) -> to_ascii(P) end,
- Parts),
- string:strip(lists:flatmap(fun (P) -> [$. | P] end,
- ASCIIParts),
- left, $.).
-%% Domain names are already nameprep'ed in ejabberd, so we skiping this step
-to_ascii(Name) ->
- false = lists:any(fun (C)
- when (0 =< C) and (C =< 44) or
- (46 =< C) and (C =< 47)
- or (58 =< C) and (C =< 64)
- or (91 =< C) and (C =< 96)
- or (123 =< C) and (C =< 127) ->
- true;
- (_) -> false
- end,
- Name),
- case Name of
- [H | _] when H /= $- -> true = lists:last(Name) /= $-
- end,
- ASCIIName = case lists:any(fun (C) -> C > 127 end, Name)
- of
- true ->
- true = case Name of
- "xn--" ++ _ -> false;
- _ -> true
- end,
- "xn--" ++ punycode_encode(Name);
- false -> Name
- end,
- L = length(ASCIIName),
- true = (1 =< L) and (L =< 63),
- ASCIIName.
-%%% PUNYCODE (RFC3492)
--define(BASE, 36).
--define(TMIN, 1).
--define(TMAX, 26).
--define(SKEW, 38).
--define(DAMP, 700).
--define(INITIAL_BIAS, 72).
--define(INITIAL_N, 128).
-punycode_encode(Input) ->
- N = (?INITIAL_N),
- Delta = 0,
- Bias = (?INITIAL_BIAS),
- Basic = lists:filter(fun (C) -> C =< 127 end, Input),
- NonBasic = lists:filter(fun (C) -> C > 127 end, Input),
- L = length(Input),
- B = length(Basic),
- SNonBasic = lists:usort(NonBasic),
- Output1 = if B > 0 -> Basic ++ "-";
- true -> ""
- end,
- Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N,
- Delta, Bias, ""),
- Output1 ++ Output2.
-punycode_encode1(Input, [M | SNonBasic], B, H, L, N,
- Delta, Bias, Out)
- when H < L ->
- Delta1 = Delta + (M - N) * (H + 1),
- % let n = m
- {NewDelta, NewBias, NewH, NewOut} = lists:foldl(fun (C,
- {ADelta, ABias, AH,
- AOut}) ->
- if C < M ->
- {ADelta + 1,
- ABias, AH,
- AOut};
- C == M ->
- NewOut =
- punycode_encode_delta(ADelta,
- ABias,
- AOut),
- NewBias =
- adapt(ADelta,
- H +
- 1,
- H
- ==
- B),
- {0, NewBias,
- AH + 1,
- NewOut};
- true ->
- {ADelta,
- ABias, AH,
- AOut}
- end
- end,
- {Delta1, Bias, H, Out},
- Input),
- punycode_encode1(Input, SNonBasic, B, NewH, L, M + 1,
- NewDelta + 1, NewBias, NewOut);
-punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N,
- _Delta, _Bias, Out) ->
- lists:reverse(Out).
-punycode_encode_delta(Delta, Bias, Out) ->
- punycode_encode_delta(Delta, Bias, Out, ?BASE).
-punycode_encode_delta(Delta, Bias, Out, K) ->
- T = if K =< Bias -> ?TMIN;
- K >= Bias + (?TMAX) -> ?TMAX;
- true -> K - Bias
- end,
- if Delta < T -> [codepoint(Delta) | Out];
- true ->
- C = T + (Delta - T) rem ((?BASE) - T),
- punycode_encode_delta((Delta - T) div ((?BASE) - T),
- Bias, [codepoint(C) | Out], K + (?BASE))
- end.
-adapt(Delta, NumPoints, FirstTime) ->
- Delta1 = if FirstTime -> Delta div (?DAMP);
- true -> Delta div 2
- end,
- Delta2 = Delta1 + Delta1 div NumPoints,
- adapt1(Delta2, 0).
-adapt1(Delta, K) ->
- if Delta > ((?BASE) - (?TMIN)) * (?TMAX) div 2 ->
- adapt1(Delta div ((?BASE) - (?TMIN)), K + (?BASE));
- true ->
- K +
- ((?BASE) - (?TMIN) + 1) * Delta div (Delta + (?SKEW))
- end.
-codepoint(C) ->
- if (0 =< C) and (C =< 25) -> C + 97;
- (26 =< C) and (C =< 35) -> C + 22
- end.
-%%% Unit tests
-acsii_test() ->
- ?assertEqual(<<"test.org">>, domain_utf8_to_ascii(<<"test.org">>)).
-utf8_test() ->
- ?assertEqual(
- <<"xn--d1acufc.xn--p1ai">>,
- domain_utf8_to_ascii(
- <<208,180,208,190,208,188,208,181,208,189,46,209,128,209,132>>)).