aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Shchepin <alexey@process-one.net>2007-04-12 05:31:53 +0000
committerAlexey Shchepin <alexey@process-one.net>2007-04-12 05:31:53 +0000
commitc65e1e7ae0b2ac49f28f0db750f31b7a506ce56f (patch)
treee25f2eeaeea4ed406760a840212dc0e65e60259c
parent* src/guide.tex: Fixed typo. (diff)
* src/ejabberd_sm.erl: Minor optimisation
* src/ejabberd_system_monitor.erl: Experimental watchdog * src/ejabberd_sup.erl: Likewise * src/ejabberd_config.erl: Likewise SVN Revision: 750
-rw-r--r--ChangeLog10
-rw-r--r--src/ejabberd_config.erl2
-rw-r--r--src/ejabberd_sm.erl4
-rw-r--r--src/ejabberd_sup.erl8
-rw-r--r--src/ejabberd_system_monitor.erl313
5 files changed, 336 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 57e20b244..bb69000da 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2007-04-09 Alexey Shchepin <alexey@sevcom.net>
+
+ * src/ejabberd_sm.erl: Minor optimisation
+
+ * src/ejabberd_system_monitor.erl: Experimental watchdog
+ * src/ejabberd_sup.erl: Likewise
+ * src/ejabberd_config.erl: Likewise
+
2007-03-22 Mickael Remond <mickael.remond@process-one.net>
* src/guide.tex: Fixed typo.
@@ -95,7 +103,7 @@
* src/cyrsasl_plain.erl: bad-auth error code replaced by
not-authorized (EJAB-187).
-
+
* src/aclocal.m4: configure --with-erlang option is now working
(Thanks to Jerome Sautret) (EJAB-186).
diff --git a/src/ejabberd_config.erl b/src/ejabberd_config.erl
index 199f374f3..35b9212a9 100644
--- a/src/ejabberd_config.erl
+++ b/src/ejabberd_config.erl
@@ -123,6 +123,8 @@ process_term(Term, State) ->
add_option({domain_balancing, Domain}, Balancing, State);
{domain_balancing_component_number, Domain, N} ->
add_option({domain_balancing_component_number, Domain}, N, State);
+ {watchdog_admins, Admins} ->
+ add_option(watchdog_admins, Admins, State);
{loglevel, Loglevel} ->
ejabberd_loglevel:set(Loglevel),
State;
diff --git a/src/ejabberd_sm.erl b/src/ejabberd_sm.erl
index c320b4bef..4b67ef719 100644
--- a/src/ejabberd_sm.erl
+++ b/src/ejabberd_sm.erl
@@ -429,6 +429,10 @@ route_message(From, To, Packet) ->
case xml:get_tag_attr_s("type", Packet) of
"error" ->
ok;
+ "groupchat" ->
+ bounce_offline_message(From, To, Packet);
+ "headline" ->
+ bounce_offline_message(From, To, Packet);
_ ->
case ejabberd_auth:is_user_exists(LUser, LServer) of
true ->
diff --git a/src/ejabberd_sup.erl b/src/ejabberd_sup.erl
index e093b6a2b..af2c16f8f 100644
--- a/src/ejabberd_sup.erl
+++ b/src/ejabberd_sup.erl
@@ -40,6 +40,13 @@ init([]) ->
brutal_kill,
worker,
[ejabberd_node_groups]},
+ SystemMonitor =
+ {ejabberd_system_monitor,
+ {ejabberd_system_monitor, start_link, []},
+ permanent,
+ brutal_kill,
+ worker,
+ [ejabberd_system_monitor]},
Router =
{ejabberd_router,
{ejabberd_router, start_link, []},
@@ -150,6 +157,7 @@ init([]) ->
[Hooks,
StringPrep,
NodeGroups,
+ SystemMonitor,
Router,
SM,
S2S,
diff --git a/src/ejabberd_system_monitor.erl b/src/ejabberd_system_monitor.erl
new file mode 100644
index 000000000..3192b7763
--- /dev/null
+++ b/src/ejabberd_system_monitor.erl
@@ -0,0 +1,313 @@
+%%%-------------------------------------------------------------------
+%%% File : ejabberd_system_monitor.erl
+%%% Author : Alexey Shchepin <alexey@process-one.net>
+%%% Description : Ejabberd watchdog
+%%% Created : 21 Mar 2007 by Alexey Shchepin <alexey@process-one.net>
+%%% Id : $Id$
+%%%-------------------------------------------------------------------
+
+-module(ejabberd_system_monitor).
+-author('alexey@process-one.net').
+-vsn('$Revision$ ').
+
+-behaviour(gen_server).
+
+%% API
+-export([start_link/0,
+ process_command/3,
+ process_remote_command/1]).
+
+%% gen_server callbacks
+-export([init/1, handle_call/3, handle_cast/2, handle_info/2,
+ terminate/2, code_change/3]).
+
+-include("ejabberd.hrl").
+-include("jlib.hrl").
+
+-record(state, {}).
+
+%%====================================================================
+%% API
+%%====================================================================
+%%--------------------------------------------------------------------
+%% Function: start_link() -> {ok,Pid} | ignore | {error,Error}
+%% Description: Starts the server
+%%--------------------------------------------------------------------
+start_link() ->
+ gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+process_command(From, To, Packet) ->
+ case To of
+ #jid{luser = "", lresource = "watchdog"} ->
+ {xmlelement, Name, _Attrs, _Els} = Packet,
+ case Name of
+ "message" ->
+ LFrom = jlib:jid_tolower(jlib:jid_remove_resource(From)),
+ case lists:member(LFrom, get_admin_jids()) of
+ true ->
+ Body = xml:get_path_s(
+ Packet, [{elem, "body"}, cdata]),
+ spawn(fun() ->
+ process_flag(priority, high),
+ process_command1(From, To, Body)
+ end),
+ stop;
+ false ->
+ ok
+ end;
+ _ ->
+ ok
+ end;
+ _ ->
+ ok
+ end.
+
+%%====================================================================
+%% gen_server callbacks
+%%====================================================================
+
+%%--------------------------------------------------------------------
+%% Function: init(Args) -> {ok, State} |
+%% {ok, State, Timeout} |
+%% ignore |
+%% {stop, Reason}
+%% Description: Initiates the server
+%%--------------------------------------------------------------------
+init([]) ->
+ process_flag(priority, high),
+ erlang:system_monitor(self(), [{large_heap, 1000000}]),
+ lists:foreach(
+ fun(Host) ->
+ ejabberd_hooks:add(local_send_to_resource_hook, Host,
+ ?MODULE, process_command, 50)
+ end, ?MYHOSTS),
+ {ok, #state{}}.
+
+%%--------------------------------------------------------------------
+%% Function: %% handle_call(Request, From, State) -> {reply, Reply, State} |
+%% {reply, Reply, State, Timeout} |
+%% {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, Reply, State} |
+%% {stop, Reason, State}
+%% Description: Handling call messages
+%%--------------------------------------------------------------------
+handle_call(_Request, _From, State) ->
+ Reply = ok,
+ {reply, Reply, State}.
+
+%%--------------------------------------------------------------------
+%% Function: handle_cast(Msg, State) -> {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State}
+%% Description: Handling cast messages
+%%--------------------------------------------------------------------
+handle_cast(_Msg, State) ->
+ {noreply, State}.
+
+%%--------------------------------------------------------------------
+%% Function: handle_info(Info, State) -> {noreply, State} |
+%% {noreply, State, Timeout} |
+%% {stop, Reason, State}
+%% Description: Handling all non call/cast messages
+%%--------------------------------------------------------------------
+handle_info({monitor, Pid, large_heap, Info}, State) ->
+ spawn(fun() ->
+ process_flag(priority, high),
+ process_large_heap(Pid, Info)
+ end),
+ {noreply, State};
+handle_info(_Info, State) ->
+ {noreply, State}.
+
+%%--------------------------------------------------------------------
+%% Function: terminate(Reason, State) -> void()
+%% Description: This function is called by a gen_server when it is about to
+%% terminate. It should be the opposite of Module:init/1 and do any necessary
+%% cleaning up. When it returns, the gen_server terminates with Reason.
+%% The return value is ignored.
+%%--------------------------------------------------------------------
+terminate(_Reason, _State) ->
+ ok.
+
+%%--------------------------------------------------------------------
+%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState}
+%% Description: Convert process state when code is changed
+%%--------------------------------------------------------------------
+code_change(_OldVsn, State, _Extra) ->
+ {ok, State}.
+
+%%--------------------------------------------------------------------
+%%% Internal functions
+%%--------------------------------------------------------------------
+
+process_large_heap(Pid, Info) ->
+ Host = ?MYNAME,
+ case ejabberd_config:get_local_option(watchdog_admins) of
+ JIDs when is_list(JIDs),
+ JIDs /= [] ->
+ DetailedInfo = detailed_info(Pid),
+ Body = io_lib:format(
+ "(~w) The process ~w is consuming too much memory: ~w.~n"
+ "~s",
+ [node(), Pid, Info, DetailedInfo]),
+ From = jlib:make_jid("", Host, "watchdog"),
+ lists:foreach(
+ fun(S) ->
+ case jlib:string_to_jid(S) of
+ error -> ok;
+ JID ->
+ send_message(From, JID, Body)
+ end
+ end, JIDs);
+ _ ->
+ ok
+ end.
+
+send_message(From, To, Body) ->
+ ejabberd_router:route(
+ From, To,
+ {xmlelement, "message", [{"type", "chat"}],
+ [{xmlelement, "body", [],
+ [{xmlcdata, lists:flatten(Body)}]}]}).
+
+get_admin_jids() ->
+ case ejabberd_config:get_local_option(watchdog_admins) of
+ JIDs when is_list(JIDs) ->
+ lists:flatmap(
+ fun(S) ->
+ case jlib:string_to_jid(S) of
+ error -> [];
+ JID -> [jlib:jid_tolower(JID)]
+ end
+ end, JIDs);
+ _ ->
+ []
+ end.
+
+detailed_info(Pid) ->
+ case process_info(Pid, dictionary) of
+ {dictionary, Dict} ->
+ case lists:keysearch('$ancestors', 1, Dict) of
+ {value, {'$ancestors', [Sup | _]}} ->
+ case Sup of
+ ejabberd_c2s_sup ->
+ c2s_info(Pid);
+ ejabberd_s2s_out_sup ->
+ s2s_out_info(Pid);
+ ejabberd_service_sup ->
+ service_info(Pid);
+ _ ->
+ detailed_info1(Pid)
+ end;
+ _ ->
+ detailed_info1(Pid)
+ end;
+ _ ->
+ detailed_info1(Pid)
+ end.
+
+detailed_info1(Pid) ->
+ io_lib:format(
+ "~p", [[process_info(Pid, current_function),
+ process_info(Pid, initial_call),
+ process_info(Pid, message_queue_len),
+ process_info(Pid, links),
+ process_info(Pid, dictionary),
+ process_info(Pid, heap_size),
+ process_info(Pid, stack_size)
+ ]]).
+
+c2s_info(Pid) ->
+ ["Process type: c2s",
+ check_send_queue(Pid),
+ "\n",
+ io_lib:format("Command to kill this process: kill ~s ~w",
+ [atom_to_list(node()), Pid])].
+
+s2s_out_info(Pid) ->
+ FromTo = mnesia:dirty_select(
+ s2s, [{{s2s, '$1', Pid, '_'}, [], ['$1']}]),
+ ["Process type: s2s_out",
+ case FromTo of
+ [{From, To}] ->
+ "\n" ++ io_lib:format("S2S connection: from ~s to ~s",
+ [From, To]);
+ _ ->
+ ""
+ end,
+ check_send_queue(Pid),
+ "\n",
+ io_lib:format("Command to kill this process: kill ~s ~w",
+ [atom_to_list(node()), Pid])].
+
+service_info(Pid) ->
+ Routes = mnesia:dirty_select(
+ route, [{{route, '$1', Pid, '_'}, [], ['$1']}]),
+ ["Process type: s2s_out",
+ case Routes of
+ [Route] ->
+ "\nServiced domain: " ++ Route;
+ _ ->
+ ""
+ end,
+ check_send_queue(Pid),
+ "\n",
+ io_lib:format("Command to kill this process: kill ~s ~w",
+ [atom_to_list(node()), Pid])].
+
+check_send_queue(Pid) ->
+ case {process_info(Pid, current_function),
+ process_info(Pid, message_queue_len)} of
+ {{current_function, MFA}, {message_queue_len, MLen}} ->
+ if
+ MLen > 100 ->
+ case MFA of
+ {prim_inet, send, 2} ->
+ "\nPossible reason: the process is blocked "
+ "trying to send data over its TCP connection.";
+ {M, F, A} ->
+ ["\nPossible reason: the process can't process "
+ "messages faster than they arrive. ",
+ io_lib:format("Current function is ~w:~w/~w",
+ [M, F, A])
+ ]
+ end;
+ true ->
+ ""
+ end;
+ _ ->
+ ""
+ end.
+
+process_command1(From, To, Body) ->
+ process_command2(string:tokens(Body, " "), From, To).
+
+process_command2(["kill", SNode, SPid], From, To) ->
+ Node = list_to_atom(SNode),
+ remote_command(Node, [kill, SPid], From, To);
+process_command2(["help"], From, To) ->
+ send_message(To, From, help());
+process_command2(_, From, To) ->
+ send_message(To, From, help()).
+
+help() ->
+ "Commands:\n"
+ " kill <node> <pid>".
+
+remote_command(Node, Args, From, To) ->
+ Message =
+ case rpc:call(Node, ?MODULE, process_remote_command, [Args]) of
+ {badrpc, Reason} ->
+ io_lib:format("Command failed:~n~p", [Reason]);
+ Result ->
+ Result
+ end,
+ send_message(To, From, Message).
+
+process_remote_command([kill, SPid]) ->
+ exit(list_to_pid(SPid), kill),
+ "ok";
+process_remote_command(_) ->
+ throw(unknown_command).
+