diff options
Diffstat (limited to 'src/ejabberd_regexp.erl')
-rw-r--r-- | src/ejabberd_regexp.erl | 70 |
1 files changed, 61 insertions, 9 deletions
diff --git a/src/ejabberd_regexp.erl b/src/ejabberd_regexp.erl index b79774e31..9e1a979a4 100644 --- a/src/ejabberd_regexp.erl +++ b/src/ejabberd_regexp.erl @@ -5,7 +5,7 @@ %%% Created : 8 Dec 2011 by Badlop %%% %%% -%%% ejabberd, Copyright (C) 2002-2016 ProcessOne +%%% ejabberd, Copyright (C) 2002-2019 ProcessOne %%% %%% This program is free software; you can redistribute it and/or %%% modify it under the terms of the GNU General Public License as @@ -25,7 +25,7 @@ -module(ejabberd_regexp). --compile([export_all]). +-export([exec/2, run/2, split/2, replace/3, greplace/3, sh_to_awk/1]). exec({ReM, ReF, ReA}, {RgM, RgF, RgA}) -> try apply(ReM, ReF, ReA) catch @@ -36,7 +36,7 @@ exec({ReM, ReF, ReA}, {RgM, RgF, RgA}) -> -spec run(binary(), binary()) -> match | nomatch | {error, any()}. run(String, Regexp) -> - case exec({re, run, [String, Regexp, [{capture, none}]]}, + case exec({re, run, [String, Regexp, [{capture, none}, unicode]]}, {regexp, first_match, [binary_to_list(String), binary_to_list(Regexp)]}) of @@ -85,11 +85,63 @@ greplace(String, Regexp, New) -> A -> A end. + +%% This code was copied and adapted from xmerl_regexp.erl + -spec sh_to_awk(binary()) -> binary(). +sh_to_awk(Sh) -> + iolist_to_binary([<<"^(">>, sh_to_awk_1(Sh)]). %Fix the beginning -sh_to_awk(ShRegExp) -> - case exec({xmerl_regexp, sh_to_awk, [binary_to_list(ShRegExp)]}, - {regexp, sh_to_awk, [binary_to_list(ShRegExp)]}) - of - A -> iolist_to_binary(A) - end. +sh_to_awk_1(<<"*", Sh/binary>>) -> %This matches any string + [<<".*">>, sh_to_awk_1(Sh)]; +sh_to_awk_1(<<"?", Sh/binary>>) -> %This matches any character + [$., sh_to_awk_1(Sh)]; +sh_to_awk_1(<<"[^]", Sh/binary>>) -> %This takes careful handling + [<<"\\^">>, sh_to_awk_1(Sh)]; +%% Must move '^' to end. +sh_to_awk_1(<<"[^", Sh/binary>>) -> + [$[, sh_to_awk_2(Sh, true)]; +sh_to_awk_1(<<"[!", Sh/binary>>) -> + [<<"[^">>, sh_to_awk_2(Sh, false)]; +sh_to_awk_1(<<"[", Sh/binary>>) -> + [$[, sh_to_awk_2(Sh, false)]; +sh_to_awk_1(<<C:8, Sh/binary>>) -> %% Unspecialise everything else which is not an escape character. + case sh_special_char(C) of + true -> [$\\,C|sh_to_awk_1(Sh)]; + false -> [C|sh_to_awk_1(Sh)] + end; +sh_to_awk_1(<<>>) -> + <<")$">>. %Fix the end + +sh_to_awk_2(<<"]", Sh/binary>>, UpArrow) -> + [$]|sh_to_awk_3(Sh, UpArrow)]; +sh_to_awk_2(Sh, UpArrow) -> + sh_to_awk_3(Sh, UpArrow). + +sh_to_awk_3(<<"]", Sh/binary>>, true) -> + [<<"^]">>, sh_to_awk_1(Sh)]; +sh_to_awk_3(<<"]", Sh/binary>>, false) -> + [$]|sh_to_awk_1(Sh)]; +sh_to_awk_3(<<C:8, Sh/binary>>, UpArrow) -> + [C|sh_to_awk_3(Sh, UpArrow)]; +sh_to_awk_3(<<>>, true) -> + [$^|sh_to_awk_1(<<>>)]; +sh_to_awk_3(<<>>, false) -> + sh_to_awk_1(<<>>). + +%% Test if a character is a special character. +-spec sh_special_char(char()) -> boolean(). +sh_special_char($|) -> true; +sh_special_char($*) -> true; +sh_special_char($+) -> true; +sh_special_char($?) -> true; +sh_special_char($() -> true; +sh_special_char($)) -> true; +sh_special_char($\\) -> true; +sh_special_char($^) -> true; +sh_special_char($$) -> true; +sh_special_char($.) -> true; +sh_special_char($[) -> true; +sh_special_char($]) -> true; +sh_special_char($") -> true; +sh_special_char(_C) -> false. |