aboutsummaryrefslogblamecommitdiff
path: root/src/idna.erl
blob: 7e3d0aefbcfba22e1439fdc8a8c39667ffe04b10 (plain) (tree)
1
2
3
4
5
6
7
8

                                                                         
                                                      
                                        


                                                                     
                                                  









                                                                      
   




                                                                     


                                                                         
 
                                  
 
                                

                                
 



                                        
                                                         




                                               
                                        
 

                                        
                             





                                                                         
 
                                                       

                               
                                               

                                  


                                





                                                       

                           
                                                                            
                 









                                                                    
                
                                                           
        








                                                            




                                  

                      









                   
                          

                        

                         
                     
              


                                                           


                                      

                                       
                  

                                                            

                       


                                                    
                                       


































                                                                                                    

                       



                                                   


                                        
            




                                                                         

        
                                     

                                               
                 
                                           


                      




                                                                  
        
 
               

                                        
        















                                                                          
%%%----------------------------------------------------------------------
%%% File    : idna.erl
%%% Author  : Alexey Shchepin <alexey@process-one.net>
%%% Purpose : Support for IDNA (RFC3490)
%%% Created : 10 Apr 2004 by Alexey Shchepin <alexey@process-one.net>
%%%
%%%
%%% ejabberd, Copyright (C) 2002-2013   ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License
%%% along with this program; if not, write to the Free Software
%%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
%%% 02111-1307 USA
%%%
%%%----------------------------------------------------------------------

-module(idna).

-author('alexey@process-one.net').

-export([domain_utf8_to_ascii/1,
	 domain_ucs2_to_ascii/1,
         utf8_to_ucs2/1]).

-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
-endif.

-spec domain_utf8_to_ascii(binary()) -> false | binary().

domain_utf8_to_ascii(Domain) ->
    domain_ucs2_to_ascii(utf8_to_ucs2(Domain)).

utf8_to_ucs2(S) ->
    utf8_to_ucs2(binary_to_list(S), "").

utf8_to_ucs2([], R) -> lists:reverse(R);
utf8_to_ucs2([C | S], R) when C < 128 ->
    utf8_to_ucs2(S, [C | R]);
utf8_to_ucs2([C1, C2 | S], R) when C1 < 224 ->
    utf8_to_ucs2(S, [C1 band 31 bsl 6 bor C2 band 63 | R]);
utf8_to_ucs2([C1, C2, C3 | S], R) when C1 < 240 ->
    utf8_to_ucs2(S,
		 [C1 band 15 bsl 12 bor (C2 band 63 bsl 6) bor C3 band 63
		  | R]).

-spec domain_ucs2_to_ascii(list()) -> false | binary().

domain_ucs2_to_ascii(Domain) ->
    case catch domain_ucs2_to_ascii1(Domain) of
      {'EXIT', _Reason} -> false;
      Res -> iolist_to_binary(Res)
    end.

domain_ucs2_to_ascii1(Domain) ->
    Parts = string:tokens(Domain,
			  [46, 12290, 65294, 65377]),
    ASCIIParts = lists:map(fun (P) -> to_ascii(P) end,
			   Parts),
    string:strip(lists:flatmap(fun (P) -> [$. | P] end,
			       ASCIIParts),
		 left, $.).

%% Domain names are already nameprep'ed in ejabberd, so we skiping this step
to_ascii(Name) ->
    false = lists:any(fun (C)
			      when (0 =< C) and (C =< 44) or
				     (46 =< C) and (C =< 47)
				     or (58 =< C) and (C =< 64)
				     or (91 =< C) and (C =< 96)
				     or (123 =< C) and (C =< 127) ->
			      true;
			  (_) -> false
		      end,
		      Name),
    case Name of
      [H | _] when H /= $- -> true = lists:last(Name) /= $-
    end,
    ASCIIName = case lists:any(fun (C) -> C > 127 end, Name)
		    of
		  true ->
		      true = case Name of
			       "xn--" ++ _ -> false;
			       _ -> true
			     end,
		      "xn--" ++ punycode_encode(Name);
		  false -> Name
		end,
    L = length(ASCIIName),
    true = (1 =< L) and (L =< 63),
    ASCIIName.

%%% PUNYCODE (RFC3492)

-define(BASE, 36).

-define(TMIN, 1).

-define(TMAX, 26).

-define(SKEW, 38).

-define(DAMP, 700).

-define(INITIAL_BIAS, 72).

-define(INITIAL_N, 128).

punycode_encode(Input) ->
    N = (?INITIAL_N),
    Delta = 0,
    Bias = (?INITIAL_BIAS),
    Basic = lists:filter(fun (C) -> C =< 127 end, Input),
    NonBasic = lists:filter(fun (C) -> C > 127 end, Input),
    L = length(Input),
    B = length(Basic),
    SNonBasic = lists:usort(NonBasic),
    Output1 = if B > 0 -> Basic ++ "-";
		 true -> ""
	      end,
    Output2 = punycode_encode1(Input, SNonBasic, B, B, L, N,
			       Delta, Bias, ""),
    Output1 ++ Output2.

punycode_encode1(Input, [M | SNonBasic], B, H, L, N,
		 Delta, Bias, Out)
    when H < L ->
    Delta1 = Delta + (M - N) * (H + 1),
    % let n = m
    {NewDelta, NewBias, NewH, NewOut} = lists:foldl(fun (C,
							 {ADelta, ABias, AH,
							  AOut}) ->
							    if C < M ->
								   {ADelta + 1,
								    ABias, AH,
								    AOut};
							       C == M ->
								   NewOut =
								       punycode_encode_delta(ADelta,
											     ABias,
											     AOut),
								   NewBias =
								       adapt(ADelta,
									     H +
									       1,
									     H
									       ==
									       B),
								   {0, NewBias,
								    AH + 1,
								    NewOut};
							       true ->
								   {ADelta,
								    ABias, AH,
								    AOut}
							    end
						    end,
						    {Delta1, Bias, H, Out},
						    Input),
    punycode_encode1(Input, SNonBasic, B, NewH, L, M + 1,
		     NewDelta + 1, NewBias, NewOut);
punycode_encode1(_Input, _SNonBasic, _B, _H, _L, _N,
		 _Delta, _Bias, Out) ->
    lists:reverse(Out).

punycode_encode_delta(Delta, Bias, Out) ->
    punycode_encode_delta(Delta, Bias, Out, ?BASE).

punycode_encode_delta(Delta, Bias, Out, K) ->
    T = if K =< Bias -> ?TMIN;
	   K >= Bias + (?TMAX) -> ?TMAX;
	   true -> K - Bias
	end,
    if Delta < T -> [codepoint(Delta) | Out];
       true ->
	   C = T + (Delta - T) rem ((?BASE) - T),
	   punycode_encode_delta((Delta - T) div ((?BASE) - T),
				 Bias, [codepoint(C) | Out], K + (?BASE))
    end.

adapt(Delta, NumPoints, FirstTime) ->
    Delta1 = if FirstTime -> Delta div (?DAMP);
		true -> Delta div 2
	     end,
    Delta2 = Delta1 + Delta1 div NumPoints,
    adapt1(Delta2, 0).

adapt1(Delta, K) ->
    if Delta > ((?BASE) - (?TMIN)) * (?TMAX) div 2 ->
	   adapt1(Delta div ((?BASE) - (?TMIN)), K + (?BASE));
       true ->
	   K +
	     ((?BASE) - (?TMIN) + 1) * Delta div (Delta + (?SKEW))
    end.

codepoint(C) ->
    if (0 =< C) and (C =< 25) -> C + 97;
       (26 =< C) and (C =< 35) -> C + 22
    end.

%%%===================================================================
%%% Unit tests
%%%===================================================================
-ifdef(TEST).

acsii_test() ->
    ?assertEqual(<<"test.org">>, domain_utf8_to_ascii(<<"test.org">>)).

utf8_test() ->
    ?assertEqual(
       <<"xn--d1acufc.xn--p1ai">>,
       domain_utf8_to_ascii(
         <<208,180,208,190,208,188,208,181,208,189,46,209,128,209,132>>)).

-endif.