aboutsummaryrefslogblamecommitdiff
path: root/src/xml_stream.erl
blob: 7976ba53bb8722ef29fbcde8b618d115e2084c3c (plain) (tree)
1
2
3
4
5
6
7
8

                                                                         
                                                      
                               


                                                                     
                                                   















                                                                      


                                                                         
                                  
 
               
               
                 

                           
 




                      
                          
                                
 
                                                                      
 

                                         
                                      

                              

                                                                            

                       

                                                    
                              


                                                                          

                                

                                                                              
                                 
                              

                                                                                



                                                                          

                       
                              


                         








                                                                                         





                                                                           
                                                                        


        
                   


                               


                                                   


                                         



                                                   






                                                           
                                                  
















                                                                                


                                        


















































                                                                             
%%%----------------------------------------------------------------------
%%% File    : xml_stream.erl
%%% Author  : Alexey Shchepin <alexey@process-one.net>
%%% Purpose : Parse XML streams
%%% Created : 17 Nov 2002 by Alexey Shchepin <alexey@process-one.net>
%%%
%%%
%%% ejabberd, Copyright (C) 2002-2008   Process-one
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
%%% General Public License for more details.
%%%                         
%%% You should have received a copy of the GNU General Public License
%%% along with this program; if not, write to the Free Software
%%% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
%%% 02111-1307 USA
%%%
%%%----------------------------------------------------------------------

-module(xml_stream).
-author('alexey@process-one.net').

-export([new/1,
	 new/2,
	 parse/2,
	 close/1,
	 parse_element/1]).

-define(XML_START, 0).
-define(XML_END,   1).
-define(XML_CDATA, 2).
-define(XML_ERROR, 3).

-define(PARSE_COMMAND, 0).
-define(PARSE_FINAL_COMMAND, 1).

-record(xml_stream_state, {callback_pid, port, stack, size, maxsize}).

process_data(CallbackPid, Stack, Data) ->
    case Data of
	{?XML_START, {Name, Attrs}} ->
	    if
		Stack == [] ->
		    catch gen_fsm:send_event(CallbackPid,
					     {xmlstreamstart, Name, Attrs});
		true ->
		    ok
	    end,
	    [{xmlelement, Name, Attrs, []} | Stack];
	{?XML_END, EndName} ->
	    case Stack of
		[{xmlelement, Name, Attrs, Els} | Tail] ->
		    NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)},
		    case Tail of
			[] ->
			    catch gen_fsm:send_event(CallbackPid,
						     {xmlstreamend, EndName}),
			    Tail;
			[_] ->
			    catch gen_fsm:send_event(CallbackPid,
						     {xmlstreamelement, NewEl}),
			    Tail;
			[{xmlelement, Name1, Attrs1, Els1} | Tail1] ->
			    [{xmlelement, Name1, Attrs1, [NewEl | Els1]} |
			     Tail1]
		    end
	    end;
	{?XML_CDATA, CData} ->
	    case Stack of
		[El] ->
		    [El];
		%% Merge CDATA nodes if they are contiguous
		%% This does not change the semantic: the split in
		%% several CDATA nodes depends on the TCP/IP packet
		%% fragmentation
		[{xmlelement, Name, Attrs,
		  [{xmlcdata, PreviousCData}|Els]} | Tail] ->
		    [{xmlelement, Name, Attrs,
		      [{xmlcdata, concat_binary([PreviousCData, CData])} | Els]} | Tail];
		%% No previous CDATA
		[{xmlelement, Name, Attrs, Els} | Tail] ->
		    [{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} |
		     Tail];
		[] -> []
	    end;
	{?XML_ERROR, Err} ->
	    catch gen_fsm:send_event(CallbackPid, {xmlstreamerror, Err})
    end.


new(CallbackPid) ->
    new(CallbackPid, infinity).

new(CallbackPid, MaxSize) ->
    Port = open_port({spawn, expat_erl}, [binary]),
    #xml_stream_state{callback_pid = CallbackPid,
		      port = Port,
		      stack = [],
		      size = 0,
		      maxsize = MaxSize}.


parse(#xml_stream_state{callback_pid = CallbackPid,
			port = Port,
			stack = Stack,
			size = Size,
			maxsize = MaxSize} = State, Str) ->
    StrSize = if
		  is_list(Str) -> length(Str);
		  is_binary(Str) -> size(Str)
	      end,
    Res = port_control(Port, ?PARSE_COMMAND, Str),
    {NewStack, NewSize} =
	lists:foldl(
	  fun(Data, {St, Sz}) ->
		  NewSt = process_data(CallbackPid, St, Data),
		  case NewSt of
		      [_] -> {NewSt, 0};
		      _ -> {NewSt, Sz}
		  end
	  end, {Stack, Size + StrSize}, binary_to_term(Res)),
    if
	NewSize > MaxSize ->
	    catch gen_fsm:send_event(CallbackPid,
				     {xmlstreamerror, "XML stanza is too big"});
	true ->
	    ok
    end,
    State#xml_stream_state{stack = NewStack, size = NewSize}.

close(#xml_stream_state{port = Port}) ->
    port_close(Port).


parse_element(Str) ->
    Port = open_port({spawn, expat_erl}, [binary]),
    Res = port_control(Port, ?PARSE_FINAL_COMMAND, Str),
    port_close(Port),
    process_element_events(binary_to_term(Res)).

process_element_events(Events) ->
    process_element_events(Events, []).

process_element_events([], _Stack) ->
    {error, parse_error};
process_element_events([Event | Events], Stack) ->
    case Event of
	{?XML_START, {Name, Attrs}} ->
	    process_element_events(
	      Events, [{xmlelement, Name, Attrs, []} | Stack]);
	{?XML_END, _EndName} ->
	    case Stack of
		[{xmlelement, Name, Attrs, Els} | Tail] ->
		    NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)},
		    case Tail of
			[] ->
			    if
				Events == [] ->
				    NewEl;
				true ->
				    {error, parse_error}
			    end;
			[{xmlelement, Name1, Attrs1, Els1} | Tail1] ->
			    process_element_events(
			      Events,
			      [{xmlelement, Name1, Attrs1, [NewEl | Els1]} |
			       Tail1])
		    end
	    end;
	{?XML_CDATA, CData} ->
	    case Stack of
		[{xmlelement, Name, Attrs, Els} | Tail] ->
		    process_element_events(
		      Events, 
		      [{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} |
		       Tail]);
		[] ->
		    process_element_events(Events, [])
	    end;
	{?XML_ERROR, Err} ->
	    {error, Err}
    end.