diff options
Diffstat (limited to 'src/xml_stream.erl')
-rw-r--r-- | src/xml_stream.erl | 210 |
1 files changed, 127 insertions, 83 deletions
diff --git a/src/xml_stream.erl b/src/xml_stream.erl index e1eb7287c..17244aff0 100644 --- a/src/xml_stream.erl +++ b/src/xml_stream.erl @@ -25,23 +25,48 @@ %%%---------------------------------------------------------------------- -module(xml_stream). + -author('alexey@process-one.net'). --export([new/1, - new/2, - parse/2, - close/1, +-export([new/1, new/2, parse/2, close/1, parse_element/1]). -define(XML_START, 0). --define(XML_END, 1). + +-define(XML_END, 1). + -define(XML_CDATA, 2). + -define(XML_ERROR, 3). -define(PARSE_COMMAND, 0). + -define(PARSE_FINAL_COMMAND, 1). --record(xml_stream_state, {callback_pid, port, stack, size, maxsize}). +-record(xml_stream_state, + {callback_pid = self() :: pid(), + port :: port(), + stack = [] :: stack(), + size = 0 :: non_neg_integer(), + maxsize = infinity :: non_neg_integer() | infinity}). + +-type xml_stream_el() :: {xmlstreamraw, binary()} | + {xmlstreamcdata, binary()} | + {xmlstreamelement, xmlel()} | + {xmlstreamend, binary()} | + {xmlstreamstart, binary(), [attr()]} | + {xmlstreamerror, binary()}. + +-type xml_stream_state() :: #xml_stream_state{}. +-type stack() :: [xmlel()]. +-type event() :: {?XML_START, {binary(), [attr()]}} | + {?XML_END, binary()} | + {?XML_CDATA, binary()} | + {?XML_ERROR, binary()}. + +-export_type([xml_stream_state/0, xml_stream_el/0]). + +-include("jlib.hrl"). process_data(CallbackPid, Stack, Data) -> case Data of @@ -55,7 +80,7 @@ process_data(CallbackPid, Stack, Data) -> %% anymore. [xmlstreamstart]; true -> - [{xmlelement, Name, Attrs, []} | Stack] + [#xmlel{name = Name, attrs = Attrs, children = []} | Stack] end; {?XML_END, EndName} -> case Stack of @@ -63,14 +88,15 @@ process_data(CallbackPid, Stack, Data) -> catch gen_fsm:send_event(CallbackPid, {xmlstreamend, EndName}), []; - [{xmlelement, Name, Attrs, Els}, xmlstreamstart] -> - NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)}, + [#xmlel{name = Name, attrs = Attrs, children = Els}, xmlstreamstart] -> + NewEl = #xmlel{name = Name, attrs = Attrs, children = lists:reverse(Els)}, catch gen_fsm:send_event(CallbackPid, {xmlstreamelement, NewEl}), [xmlstreamstart]; - [{xmlelement, Name, Attrs, Els}, {xmlelement, Name1, Attrs1, Els1} | Tail] -> - NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)}, - [{xmlelement, Name1, Attrs1, [NewEl | Els1]} | Tail] + [#xmlel{name = Name, attrs = Attrs, children = Els}, + #xmlel{name = Name1, attrs = Attrs1, children = Els1} | Tail] -> + NewEl = #xmlel{name = Name, attrs = Attrs, children = lists:reverse(Els)}, + [#xmlel{name = Name1, attrs = Attrs1, children = [NewEl | Els1]} | Tail] end; {?XML_CDATA, CData} -> case Stack of @@ -80,64 +106,75 @@ process_data(CallbackPid, Stack, Data) -> %% This does not change the semantic: the split in %% several CDATA nodes depends on the TCP/IP packet %% fragmentation - [{xmlelement, Name, Attrs, - [{xmlcdata, PreviousCData}|Els]} | Tail] -> - [{xmlelement, Name, Attrs, - [{xmlcdata, list_to_binary([PreviousCData, CData])} | Els]} | Tail]; + [#xmlel{name = Name, attrs = Attrs, + children = [{xmlcdata, PreviousCData} | Els]} + | Tail] -> + [#xmlel{name = Name, attrs = Attrs, + children = + [{xmlcdata, + iolist_to_binary([PreviousCData, CData])} + | Els]} + | Tail]; %% No previous CDATA - [{xmlelement, Name, Attrs, Els} | Tail] -> - [{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} | - Tail]; + [#xmlel{name = Name, attrs = Attrs, children = Els} + | Tail] -> + [#xmlel{name = Name, attrs = Attrs, + children = [{xmlcdata, CData} | Els]} + | Tail]; [] -> [] end; {?XML_ERROR, Err} -> catch gen_fsm:send_event(CallbackPid, {xmlstreamerror, Err}) end. +-spec new(pid()) -> xml_stream_state(). + +new(CallbackPid) -> new(CallbackPid, infinity). -new(CallbackPid) -> - new(CallbackPid, infinity). +-spec new(pid(), non_neg_integer() | infinity) -> xml_stream_state(). new(CallbackPid, MaxSize) -> Port = open_port({spawn, "expat_erl"}, [binary]), #xml_stream_state{callback_pid = CallbackPid, - port = Port, - stack = [], - size = 0, - maxsize = MaxSize}. + port = Port, stack = [], size = 0, maxsize = MaxSize}. +-spec parse(xml_stream_state(), iodata()) -> xml_stream_state(). parse(#xml_stream_state{callback_pid = CallbackPid, - port = Port, - stack = Stack, - size = Size, - maxsize = MaxSize} = State, Str) -> - StrSize = if - is_list(Str) -> length(Str); - is_binary(Str) -> size(Str) - end, + port = Port, stack = Stack, size = Size, + maxsize = MaxSize} = + State, + Str) -> + StrSize = byte_size(Str), Res = port_control(Port, ?PARSE_COMMAND, Str), - {NewStack, NewSize} = - lists:foldl( - fun(Data, {St, Sz}) -> - NewSt = process_data(CallbackPid, St, Data), - case NewSt of - [_] -> {NewSt, 0}; - _ -> {NewSt, Sz} - end - end, {Stack, Size + StrSize}, binary_to_term(Res)), - if - NewSize > MaxSize -> - catch gen_fsm:send_event(CallbackPid, - {xmlstreamerror, "XML stanza is too big"}); - true -> - ok + {NewStack, NewSize} = lists:foldl(fun (Data, + {St, Sz}) -> + NewSt = process_data(CallbackPid, + St, Data), + case NewSt of + [_] -> {NewSt, 0}; + _ -> {NewSt, Sz} + end + end, + {Stack, Size + StrSize}, + binary_to_term(Res)), + if NewSize > MaxSize -> + catch gen_fsm:send_event(CallbackPid, + {xmlstreamerror, + <<"XML stanza is too big">>}); + true -> ok end, - State#xml_stream_state{stack = NewStack, size = NewSize}. + State#xml_stream_state{stack = NewStack, + size = NewSize}. + +-spec close(xml_stream_state()) -> true. close(#xml_stream_state{port = Port}) -> port_close(Port). +-spec parse_element(iodata()) -> xmlel() | + {error, parse_error} | + {error, binary()}. parse_element(Str) -> Port = open_port({spawn, "expat_erl"}, [binary]), @@ -148,42 +185,49 @@ parse_element(Str) -> process_element_events(Events) -> process_element_events(Events, []). +-spec process_element_events([event()], stack()) -> xmlel() | + {error, parse_error} | + {error, binary()}. + process_element_events([], _Stack) -> {error, parse_error}; process_element_events([Event | Events], Stack) -> case Event of - {?XML_START, {Name, Attrs}} -> - process_element_events( - Events, [{xmlelement, Name, Attrs, []} | Stack]); - {?XML_END, _EndName} -> - case Stack of - [{xmlelement, Name, Attrs, Els} | Tail] -> - NewEl = {xmlelement, Name, Attrs, lists:reverse(Els)}, - case Tail of - [] -> - if - Events == [] -> - NewEl; - true -> - {error, parse_error} - end; - [{xmlelement, Name1, Attrs1, Els1} | Tail1] -> - process_element_events( - Events, - [{xmlelement, Name1, Attrs1, [NewEl | Els1]} | - Tail1]) - end - end; - {?XML_CDATA, CData} -> - case Stack of - [{xmlelement, Name, Attrs, Els} | Tail] -> - process_element_events( - Events, - [{xmlelement, Name, Attrs, [{xmlcdata, CData} | Els]} | - Tail]); - [] -> - process_element_events(Events, []) - end; - {?XML_ERROR, Err} -> - {error, Err} + {?XML_START, {Name, Attrs}} -> + process_element_events(Events, + [#xmlel{name = Name, attrs = Attrs, + children = []} + | Stack]); + {?XML_END, _EndName} -> + case Stack of + [#xmlel{name = Name, attrs = Attrs, children = Els} + | Tail] -> + NewEl = #xmlel{name = Name, attrs = Attrs, + children = lists:reverse(Els)}, + case Tail of + [] -> + if Events == [] -> NewEl; + true -> {error, parse_error} + end; + [#xmlel{name = Name1, attrs = Attrs1, children = Els1} + | Tail1] -> + process_element_events(Events, + [#xmlel{name = Name1, + attrs = Attrs1, + children = [NewEl | Els1]} + | Tail1]) + end + end; + {?XML_CDATA, CData} -> + case Stack of + [#xmlel{name = Name, attrs = Attrs, children = Els} + | Tail] -> + process_element_events(Events, + [#xmlel{name = Name, attrs = Attrs, + children = + [{xmlcdata, CData} | Els]} + | Tail]); + [] -> process_element_events(Events, []) + end; + {?XML_ERROR, Err} -> {error, Err} end. |