aboutsummaryrefslogtreecommitdiff
path: root/src/ejabberd_regexp.erl
diff options
context:
space:
mode:
Diffstat (limited to 'src/ejabberd_regexp.erl')
-rw-r--r--src/ejabberd_regexp.erl70
1 files changed, 61 insertions, 9 deletions
diff --git a/src/ejabberd_regexp.erl b/src/ejabberd_regexp.erl
index b79774e31..9e1a979a4 100644
--- a/src/ejabberd_regexp.erl
+++ b/src/ejabberd_regexp.erl
@@ -5,7 +5,7 @@
%%% Created : 8 Dec 2011 by Badlop
%%%
%%%
-%%% ejabberd, Copyright (C) 2002-2016 ProcessOne
+%%% ejabberd, Copyright (C) 2002-2019 ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
@@ -25,7 +25,7 @@
-module(ejabberd_regexp).
--compile([export_all]).
+-export([exec/2, run/2, split/2, replace/3, greplace/3, sh_to_awk/1]).
exec({ReM, ReF, ReA}, {RgM, RgF, RgA}) ->
try apply(ReM, ReF, ReA) catch
@@ -36,7 +36,7 @@ exec({ReM, ReF, ReA}, {RgM, RgF, RgA}) ->
-spec run(binary(), binary()) -> match | nomatch | {error, any()}.
run(String, Regexp) ->
- case exec({re, run, [String, Regexp, [{capture, none}]]},
+ case exec({re, run, [String, Regexp, [{capture, none}, unicode]]},
{regexp, first_match, [binary_to_list(String),
binary_to_list(Regexp)]})
of
@@ -85,11 +85,63 @@ greplace(String, Regexp, New) ->
A -> A
end.
+
+%% This code was copied and adapted from xmerl_regexp.erl
+
-spec sh_to_awk(binary()) -> binary().
+sh_to_awk(Sh) ->
+ iolist_to_binary([<<"^(">>, sh_to_awk_1(Sh)]). %Fix the beginning
-sh_to_awk(ShRegExp) ->
- case exec({xmerl_regexp, sh_to_awk, [binary_to_list(ShRegExp)]},
- {regexp, sh_to_awk, [binary_to_list(ShRegExp)]})
- of
- A -> iolist_to_binary(A)
- end.
+sh_to_awk_1(<<"*", Sh/binary>>) -> %This matches any string
+ [<<".*">>, sh_to_awk_1(Sh)];
+sh_to_awk_1(<<"?", Sh/binary>>) -> %This matches any character
+ [$., sh_to_awk_1(Sh)];
+sh_to_awk_1(<<"[^]", Sh/binary>>) -> %This takes careful handling
+ [<<"\\^">>, sh_to_awk_1(Sh)];
+%% Must move '^' to end.
+sh_to_awk_1(<<"[^", Sh/binary>>) ->
+ [$[, sh_to_awk_2(Sh, true)];
+sh_to_awk_1(<<"[!", Sh/binary>>) ->
+ [<<"[^">>, sh_to_awk_2(Sh, false)];
+sh_to_awk_1(<<"[", Sh/binary>>) ->
+ [$[, sh_to_awk_2(Sh, false)];
+sh_to_awk_1(<<C:8, Sh/binary>>) -> %% Unspecialise everything else which is not an escape character.
+ case sh_special_char(C) of
+ true -> [$\\,C|sh_to_awk_1(Sh)];
+ false -> [C|sh_to_awk_1(Sh)]
+ end;
+sh_to_awk_1(<<>>) ->
+ <<")$">>. %Fix the end
+
+sh_to_awk_2(<<"]", Sh/binary>>, UpArrow) ->
+ [$]|sh_to_awk_3(Sh, UpArrow)];
+sh_to_awk_2(Sh, UpArrow) ->
+ sh_to_awk_3(Sh, UpArrow).
+
+sh_to_awk_3(<<"]", Sh/binary>>, true) ->
+ [<<"^]">>, sh_to_awk_1(Sh)];
+sh_to_awk_3(<<"]", Sh/binary>>, false) ->
+ [$]|sh_to_awk_1(Sh)];
+sh_to_awk_3(<<C:8, Sh/binary>>, UpArrow) ->
+ [C|sh_to_awk_3(Sh, UpArrow)];
+sh_to_awk_3(<<>>, true) ->
+ [$^|sh_to_awk_1(<<>>)];
+sh_to_awk_3(<<>>, false) ->
+ sh_to_awk_1(<<>>).
+
+%% Test if a character is a special character.
+-spec sh_special_char(char()) -> boolean().
+sh_special_char($|) -> true;
+sh_special_char($*) -> true;
+sh_special_char($+) -> true;
+sh_special_char($?) -> true;
+sh_special_char($() -> true;
+sh_special_char($)) -> true;
+sh_special_char($\\) -> true;
+sh_special_char($^) -> true;
+sh_special_char($$) -> true;
+sh_special_char($.) -> true;
+sh_special_char($[) -> true;
+sh_special_char($]) -> true;
+sh_special_char($") -> true;
+sh_special_char(_C) -> false.