aboutsummaryrefslogtreecommitdiff
path: root/src/xml.erl
diff options
context:
space:
mode:
authorBadlop <badlop@process-one.net>2013-03-14 10:33:02 +0100
committerBadlop <badlop@process-one.net>2013-03-14 10:33:02 +0100
commit9deb294328bb3f9eb6bd2c0e7cd500732e9b5830 (patch)
tree7e1066c130250627ee0abab44a135f583a28d07f /src/xml.erl
parentlist_to_integer/2 only works in OTP R14 and newer (diff)
Accumulated patch to binarize and indent code
Diffstat (limited to 'src/xml.erl')
-rw-r--r--src/xml.erl444
1 files changed, 308 insertions, 136 deletions
diff --git a/src/xml.erl b/src/xml.erl
index 6e126240c..7cd3acdfb 100644
--- a/src/xml.erl
+++ b/src/xml.erl
@@ -25,110 +25,127 @@
%%%----------------------------------------------------------------------
-module(xml).
+
-author('alexey@process-one.net').
--export([element_to_string/1,
- element_to_binary/1,
- crypt/1, make_text_node/1,
- remove_cdata/1,
- get_cdata/1, get_tag_cdata/1,
- get_attr/2, get_attr_s/2,
- get_tag_attr/2, get_tag_attr_s/2,
- get_subtag/2, get_subtag_cdata/2,
- append_subtags/2,
- get_path_s/2,
- start/0,
- replace_tag_attr/3]).
+-export([element_to_binary/1,
+ crypt/1, make_text_node/1, remove_cdata/1,
+ remove_subtags/3, get_cdata/1, get_tag_cdata/1,
+ get_attr/2, get_attr_s/2, get_tag_attr/2,
+ get_tag_attr_s/2, get_subtag/2, get_subtag_cdata/2,
+ append_subtags/2, get_path_s/2, start/0,
+ replace_tag_attr/3, to_xmlel/1]).
+-include("jlib.hrl").
-include("ejabberd.hrl").
%% Select at compile time how to escape characters in binary text
%% nodes.
%% Can be choosen with ./configure --enable-full-xml
-ifdef(FULL_XML_SUPPORT).
+
-define(ESCAPE_BINARY(CData), make_text_node(CData)).
+
-else.
+
-define(ESCAPE_BINARY(CData), crypt(CData)).
+
-endif.
%% Replace element_to_binary/1 with NIF
%% Can be choosen with ./configure --enable-nif
-ifdef(NIF).
+
start() ->
SOPath = filename:join(ejabberd:get_so_path(), "xml"),
case catch erlang:load_nif(SOPath, 0) of
- ok ->
- ok;
- Err ->
- ?WARNING_MSG("unable to load xml NIF: ~p", [Err])
+ ok -> ok;
+ Err -> ?WARNING_MSG("unable to load xml NIF: ~p", [Err])
end.
+
-else.
-start() ->
- ok.
+
+start() -> ok.
+
-endif.
+%%
+-spec(element_to_binary/1 ::
+(
+ El :: xmlel() | cdata())
+ -> binary()
+).
+
element_to_binary(El) ->
iolist_to_binary(element_to_string(El)).
+%%
+-spec(element_to_string/1 ::
+(
+ El :: xmlel() | cdata())
+ -> string()
+).
+
element_to_string(El) ->
case catch element_to_string_nocatch(El) of
- {'EXIT', Reason} ->
- erlang:error({badxml, El, Reason});
- Result ->
- Result
+ {'EXIT', Reason} -> erlang:error({badxml, El, Reason});
+ Result -> Result
end.
+-spec(element_to_string_nocatch/1 ::
+(
+ El :: xmlel() | cdata())
+ -> iolist()
+).
+
element_to_string_nocatch(El) ->
case El of
- {xmlelement, Name, Attrs, Els} ->
- if
- Els /= [] ->
- [$<, Name, attrs_to_list(Attrs), $>,
- [element_to_string_nocatch(E) || E <- Els],
- $<, $/, Name, $>];
- true ->
- [$<, Name, attrs_to_list(Attrs), $/, $>]
- end;
- %% We do not crypt CDATA binary, but we enclose it in XML CDATA
- {xmlcdata, CData} when is_binary(CData) ->
- ?ESCAPE_BINARY(CData);
- %% We crypt list and possibly binaries if full XML usage is
- %% disabled unsupported (implies a conversion to list).
- {xmlcdata, CData} ->
- crypt(CData)
+ #xmlel{name = Name, attrs = Attrs, children = Els} ->
+ if Els /= [] ->
+ [$<, Name, attrs_to_list(Attrs), $>,
+ [element_to_string_nocatch(E) || E <- Els], $<, $/,
+ Name, $>];
+ true -> [$<, Name, attrs_to_list(Attrs), $/, $>]
+ end;
+ %% We do not crypt CDATA binary, but we enclose it in XML CDATA
+ {xmlcdata, CData} ->
+ ?ESCAPE_BINARY(CData)
end.
-attrs_to_list(Attrs) ->
- [attr_to_list(A) || A <- Attrs].
+attrs_to_list(Attrs) -> [attr_to_list(A) || A <- Attrs].
attr_to_list({Name, Value}) ->
[$\s, Name, $=, $', crypt(Value), $'].
-crypt(S) when is_list(S) ->
- [case C of
- $& -> "&amp;";
- $< -> "&lt;";
- $> -> "&gt;";
- $" -> "&quot;";
- $' -> "&apos;";
- _ when is_list(C); is_binary(C) -> crypt(C);
- _ -> C
- end || C <- S];
-crypt(S) when is_binary(S) ->
- crypt(binary_to_list(S)).
-
%% Make a cdata_binary depending on what characters it contains
+crypt(S) ->
+ << <<(case C of
+ $& -> <<"&amp;">>;
+ $< -> <<"&lt;">>;
+ $> -> <<"&gt;">>;
+ $" -> <<"&quot;">>;
+ $' -> <<"&apos;">>;
+ _ -> <<C>>
+ end)/binary>>
+ || <<C>> <= S >>.
+
+%%
+-spec(make_text_node/1 ::
+(
+ CData :: binary())
+ -> binary()
+).
+
make_text_node(CData) ->
case cdata_need_escape(CData) of
- cdata ->
- CDATA1 = <<"<![CDATA[">>,
- CDATA2 = <<"]]>">>,
- list_to_binary([CDATA1, CData, CDATA2]);
- none ->
- CData;
- {cdata, EndTokens} ->
- EscapedCData = escape_cdata(CData, EndTokens),
- list_to_binary(EscapedCData)
+ cdata ->
+ CDATA1 = <<"<![CDATA[">>,
+ CDATA2 = <<"]]>">>,
+ iolist_to_binary([CDATA1, CData, CDATA2]);
+ none -> CData;
+ {cdata, EndTokens} ->
+ EscapedCData = escape_cdata(CData, EndTokens),
+ iolist_to_binary(EscapedCData)
end.
%% Returns escape type needed for the text node
@@ -137,134 +154,289 @@ make_text_node(CData) ->
%% tokens, so that they can be escaped
cdata_need_escape(CData) ->
cdata_need_escape(CData, 0, false, []).
-cdata_need_escape(<<>>, _, false, _) ->
- none;
-cdata_need_escape(<<>>, _, true, []) ->
- cdata;
+
+cdata_need_escape(<<>>, _, false, _) -> none;
+cdata_need_escape(<<>>, _, true, []) -> cdata;
cdata_need_escape(<<>>, _, true, CDataEndTokens) ->
{cdata, lists:reverse(CDataEndTokens)};
-cdata_need_escape(<<$],$],$>,Rest/binary>>, CurrentPosition,
- _XMLEscape, CDataEndTokens) ->
+cdata_need_escape(<<$], $], $>, Rest/binary>>,
+ CurrentPosition, _XMLEscape, CDataEndTokens) ->
NewPosition = CurrentPosition + 3,
cdata_need_escape(Rest, NewPosition, true,
- [CurrentPosition+1|CDataEndTokens]);
+ [CurrentPosition + 1 | CDataEndTokens]);
%% Only <, & need to be escaped in XML text node
%% See reference: http://www.w3.org/TR/xml11/#syntax
-cdata_need_escape(<<$<,Rest/binary>>, CurrentPosition,
- _XMLEscape, CDataEndTokens) ->
- cdata_need_escape(Rest, CurrentPosition+1, true, CDataEndTokens);
-cdata_need_escape(<<$&,Rest/binary>>, CurrentPosition,
- _XMLEscape, CDataEndTokens) ->
- cdata_need_escape(Rest, CurrentPosition+1, true, CDataEndTokens);
-cdata_need_escape(<<_:8,Rest/binary>>, CurrentPosition,
+cdata_need_escape(<<$<, Rest/binary>>, CurrentPosition,
+ _XMLEscape, CDataEndTokens) ->
+ cdata_need_escape(Rest, CurrentPosition + 1, true,
+ CDataEndTokens);
+cdata_need_escape(<<$&, Rest/binary>>, CurrentPosition,
+ _XMLEscape, CDataEndTokens) ->
+ cdata_need_escape(Rest, CurrentPosition + 1, true,
+ CDataEndTokens);
+cdata_need_escape(<<_:8, Rest/binary>>, CurrentPosition,
XMLEscape, CDataEndTokens) ->
- cdata_need_escape(Rest, CurrentPosition+1, XMLEscape,
- CDataEndTokens).
-
%% escape cdata that contain CDATA end tokens
%% EndTokens is a list of position of end tokens (integer)
%% This is supposed to be a very rare case: You need to generate several
%% fields, splitting it in the middle of the end token.
%% See example: http://en.wikipedia.org/wiki/CDATA#Uses_of_CDATA_sections
+ cdata_need_escape(Rest, CurrentPosition + 1, XMLEscape,
+ CDataEndTokens).
+
escape_cdata(CData, EndTokens) ->
escape_cdata(CData, 0, EndTokens, []).
+
escape_cdata(<<>>, _CurrentPosition, [], Acc) ->
lists:reverse(Acc);
escape_cdata(Rest, CurrentPosition, [], Acc) ->
CDATA1 = <<"<![CDATA[">>,
CDATA2 = <<"]]>">>,
- escape_cdata(<<>>, CurrentPosition, [], [CDATA2, Rest, CDATA1|Acc]);
-escape_cdata(CData, Index, [Pos|Positions], Acc) ->
+ escape_cdata(<<>>, CurrentPosition, [],
+ [CDATA2, Rest, CDATA1 | Acc]);
+escape_cdata(CData, Index, [Pos | Positions], Acc) ->
CDATA1 = <<"<![CDATA[">>,
CDATA2 = <<"]]>">>,
- Split = Pos-Index,
- {Part, Rest} = split_binary(CData, Split+1),
- %% Note: We build the list in reverse to optimize construction
- escape_cdata(Rest, Pos+1, Positions, [CDATA2, Part, CDATA1|Acc]).
-
-remove_cdata_p({xmlelement, _Name, _Attrs, _Els}) -> true;
+ Split = Pos - Index,
+ {Part, Rest} = split_binary(CData, Split + 1),
+ escape_cdata(Rest, Pos + 1, Positions,
+ [CDATA2, Part, CDATA1 | Acc]).
+
+%%
+-spec(remove_cdata_p/1 ::
+(
+ El :: xmlel() | cdata())
+ -> boolean()
+).
+
+remove_cdata_p(#xmlel{}) -> true;
remove_cdata_p(_) -> false.
+%%
+-spec(remove_cdata/1 ::
+(
+ L :: [xmlel() | cdata()])
+ -> [xmlel()]
+).
+
remove_cdata(L) -> [E || E <- L, remove_cdata_p(E)].
+-spec(remove_subtags/3 ::
+(
+ Xmlel :: xmlel(),
+ Name :: binary(),
+ Attr :: attr())
+ -> Xmlel :: xmlel()
+).
+
+remove_subtags(#xmlel{name = TagName, attrs = TagAttrs, children = Els},
+ Name, Attr) ->
+ #xmlel{name = TagName, attrs = TagAttrs,
+ children = remove_subtags1(Els, [], Name, Attr)}.
+
+%%
+-spec(remove_subtags1/4 ::
+(
+ Els :: [xmlel() | cdata()],
+ NewEls :: [xmlel()],
+ Name :: binary(),
+ Attr :: attr())
+ -> NewEls :: [xmlel()]
+).
+
+remove_subtags1([], NewEls, _Name, _Attr) ->
+ lists:reverse(NewEls);
+remove_subtags1([El | Els], NewEls, Name,
+ {AttrName, AttrValue} = Attr) ->
+ case El of
+ #xmlel{name = Name, attrs = Attrs} ->
+ case get_attr(AttrName, Attrs) of
+ false ->
+ remove_subtags1(Els, [El | NewEls], Name, Attr);
+ {value, AttrValue} ->
+ remove_subtags1(Els, NewEls, Name, Attr);
+ _ -> remove_subtags1(Els, [El | NewEls], Name, Attr)
+ end;
+ _ -> remove_subtags1(Els, [El | NewEls], Name, Attr)
+ end.
+
+-spec(get_cdata/1 ::
+(
+ L :: [xmlel() | cdata()])
+ -> binary()
+).
+
get_cdata(L) ->
- binary_to_list(list_to_binary(get_cdata(L, ""))).
+ (iolist_to_binary(get_cdata(L, <<"">>))).
-get_cdata([{xmlcdata, CData} | L], S) ->
- get_cdata(L, [S, CData]);
-get_cdata([_ | L], S) ->
- get_cdata(L, S);
-get_cdata([], S) ->
- S.
+-spec(get_cdata/2 ::
+(
+ L :: [xmlel() | cdata()],
+ S :: binary() | iolist())
+ -> binary() | iolist()
+).
-get_tag_cdata({xmlelement, _Name, _Attrs, Els}) ->
- get_cdata(Els).
+get_cdata([{xmlcdata, CData} | L], S) ->
+ get_cdata(L, [S, CData]);
+get_cdata([_ | L], S) -> get_cdata(L, S);
+get_cdata([], S) -> S.
+
+-spec(get_tag_cdata/1 ::
+(
+ Xmlel :: xmlel())
+ -> binary()
+).
+
+get_tag_cdata(#xmlel{children = Els}) -> get_cdata(Els).
+
+%%
+-spec(get_attr/2 ::
+(
+ AttrName :: binary(),
+ Attrs :: [attr()])
+ -> {value, binary()}
+ | false
+).
get_attr(AttrName, Attrs) ->
case lists:keysearch(AttrName, 1, Attrs) of
- {value, {_, Val}} ->
- {value, Val};
- _ ->
- false
+ {value, {_, Val}} -> {value, Val};
+ _ -> false
end.
+%%
+-spec(get_attr_s/2 ::
+(
+ AttrName :: binary(),
+ Attrs :: [attr()])
+ -> Val :: binary()
+).
+
get_attr_s(AttrName, Attrs) ->
case lists:keysearch(AttrName, 1, Attrs) of
- {value, {_, Val}} ->
- Val;
- _ ->
- ""
+ {value, {_, Val}} -> Val;
+ _ -> <<"">>
end.
-get_tag_attr(AttrName, {xmlelement, _Name, Attrs, _Els}) ->
+%%
+-spec(get_tag_attr/2 ::
+(
+ AttrName :: binary(),
+ Xmlel :: xmlel())
+ -> {value, binary()}
+ | false
+).
+
+get_tag_attr(AttrName, #xmlel{attrs = Attrs}) ->
get_attr(AttrName, Attrs).
-get_tag_attr_s(AttrName, {xmlelement, _Name, Attrs, _Els}) ->
+%%
+-spec(get_tag_attr_s/2 ::
+(
+ AttrName :: binary(),
+ Xmlel :: xmlel())
+ -> binary()
+).
+
+get_tag_attr_s(AttrName, #xmlel{attrs = Attrs}) ->
get_attr_s(AttrName, Attrs).
+%%
+-spec(get_subtag/2 ::
+(
+ Xmlel :: xmlel(),
+ Name :: binary())
+ -> xmlel() | false
+).
-get_subtag({xmlelement, _, _, Els}, Name) ->
+get_subtag(#xmlel{children = Els}, Name) ->
get_subtag1(Els, Name).
+%%
+-spec(get_subtag1/2 ::
+(
+ Els :: [xmlel() | cdata()],
+ Name :: binary())
+ -> xmlel() | false
+).
+
get_subtag1([El | Els], Name) ->
case El of
- {xmlelement, Name, _, _} ->
- El;
- _ ->
- get_subtag1(Els, Name)
+ #xmlel{name = Name} -> El;
+ _ -> get_subtag1(Els, Name)
end;
-get_subtag1([], _) ->
- false.
+get_subtag1([], _) -> false.
+
+%%
+-spec(get_subtag_cdata/2 ::
+(
+ Tag :: xmlel(),
+ Name :: binary())
+ -> binary()
+).
get_subtag_cdata(Tag, Name) ->
case get_subtag(Tag, Name) of
- false ->
- "";
- Subtag ->
- get_tag_cdata(Subtag)
+ false -> <<"">>;
+ Subtag -> get_tag_cdata(Subtag)
end.
-append_subtags({xmlelement, Name, Attrs, SubTags1}, SubTags2) ->
- {xmlelement, Name, Attrs, SubTags1 ++ SubTags2}.
-
-get_path_s(El, []) ->
- El;
+%%
+-spec(append_subtags/2 ::
+(
+ Xmlel :: xmlel(),
+ SubTags2 :: [xmlel() | cdata()])
+ -> Xmlel :: xmlel()
+).
+
+append_subtags(#xmlel{name = Name, attrs = Attrs, children = SubTags1}, SubTags2) ->
+ #xmlel{name = Name, attrs = Attrs, children = SubTags1 ++ SubTags2}.
+
+%%
+-spec(get_path_s/2 ::
+(
+ El :: xmlel(),
+ Path :: [{elem, Name::binary()}
+ |{attr, Name::binary()}
+ |cdata])
+ -> xmlel()
+ | binary()
+).
+
+get_path_s(El, []) -> El;
get_path_s(El, [{elem, Name} | Path]) ->
case get_subtag(El, Name) of
- false ->
- "";
- SubEl ->
- get_path_s(SubEl, Path)
+ false -> <<"">>;
+ SubEl -> get_path_s(SubEl, Path)
end;
get_path_s(El, [{attr, Name}]) ->
get_tag_attr_s(Name, El);
-get_path_s(El, [cdata]) ->
- get_tag_cdata(El).
-
-
-replace_tag_attr(Attr, Value, {xmlelement, Name, Attrs, Els}) ->
- Attrs1 = lists:keydelete(Attr, 1, Attrs),
- Attrs2 = [{Attr, Value} | Attrs1],
- {xmlelement, Name, Attrs2, Els}.
-
-
+get_path_s(El, [cdata]) -> get_tag_cdata(El).
+
+%%
+-spec(replace_tag_attr/3 ::
+(
+ Name :: binary(),
+ Value :: binary(),
+ Xmlel :: xmlel())
+ -> Xmlel :: #xmlel{
+ name :: binary(),
+ attrs :: [attr(),...],
+ children :: [xmlel() | cdata()]
+ }
+).
+
+replace_tag_attr(Name, Value, Xmlel) ->
+ Xmlel#xmlel{
+ attrs = [{Name, Value} | lists:keydelete(Name, 1, Xmlel#xmlel.attrs)]
+ }.
+
+-spec to_xmlel(xmlelement() | xmlel()) -> xmlel().
+
+to_xmlel({_, Name, Attrs, Els}) ->
+ #xmlel{name = iolist_to_binary(Name),
+ attrs = [{iolist_to_binary(K), iolist_to_binary(V)}
+ || {K, V} <- Attrs],
+ children = [to_xmlel(El) || El <- Els]};
+to_xmlel({xmlcdata, CData}) ->
+ {xmlcdata, iolist_to_binary(CData)}.