summaryrefslogblamecommitdiff
path: root/src/ejabberd_system_monitor.erl
blob: 3f6c0566771e6889d4ede50e8414ee0ee3f2a53c (plain) (tree)
1
2
3
4
5
6
7
8




                                                                      

   
                                                  









                                                                      
   


                                                                           
   


                                                                      
 

                            
                                  



                       
                                                          

                                    
                                              
                                                                 

                         
                       
 











                                                                      
                                    


                                                     
                              

                                                          


                                    




                                                         
                                                           

                                                             
                                                    










                                                                         

        



                                                         










                                                                      

                                               
                                 
                                                      
                                                 










                                                                            

                                               



                                                                  


                                               
                                      
                                      
 


                                          
 



                                             





                                                                      
                                             







                                                                      
                   



                                               
                                              







                                                                             
                                 




                                                                      
                                                   





                                                                      






                                                                  
                                                  

                                                        
                              
                                                                       
                             

                               


                                         





                                                                           

                                                                    

                   
                               

                      

                                 

                                                      


                                         











                                                            


                      






                                                                            

                

                                                    
                                                              
                                                                   

                    


                                                                        
                   
                      


                                                            
                  
         
                                     
                                                              
                                                                   

                    


                                                                          
                   

                                                          
         
                                     
                                                              
                                                                   


                                              
















                                                                          


                                   
                                                          
 

                                                        
                                                 

                                                    
                                             



                                                      
                                                      
                                           




                                   

                                                 

                                       
                                                       






                                                                   

                                       
                                            
                                   

                                                  

                                                   





                                                        


                            
                                                              




                                                      
%%%-------------------------------------------------------------------
%%% File    : ejabberd_system_monitor.erl
%%% Author  : Alexey Shchepin <alexey@process-one.net>
%%% Description : Ejabberd watchdog
%%% Created : 21 Mar 2007 by Alexey Shchepin <alexey@process-one.net>
%%%
%%%
%%% ejabberd, Copyright (C) 2002-2016   ProcessOne
%%%
%%% This program is free software; you can redistribute it and/or
%%% modify it under the terms of the GNU General Public License as
%%% published by the Free Software Foundation; either version 2 of the
%%% License, or (at your option) any later version.
%%%
%%% This program is distributed in the hope that it will be useful,
%%% but WITHOUT ANY WARRANTY; without even the implied warranty of
%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
%%% General Public License for more details.
%%%
%%% You should have received a copy of the GNU General Public License along
%%% with this program; if not, write to the Free Software Foundation, Inc.,
%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
%%%
%%%-------------------------------------------------------------------

-module(ejabberd_system_monitor).

-behaviour(ejabberd_config).

-author('alexey@process-one.net').

-behaviour(gen_server).

%% API
-export([start_link/0, process_command/3, register_hook/1,
	 process_remote_command/1]).

-export([init/1, handle_call/3, handle_cast/2,
	 handle_info/2, terminate/2, code_change/3, opt_type/1]).

-include("ejabberd.hrl").
-include("logger.hrl").

-include("jlib.hrl").

-record(state, {}).

%%====================================================================
%% API
%%====================================================================
%%--------------------------------------------------------------------
%% Function: start_link() -> {ok,Pid} | ignore | {error,Error}
%% Description: Starts the server
%%--------------------------------------------------------------------
start_link() ->
    LH = ejabberd_config:get_option(
           watchdog_large_heap,
           fun(I) when is_integer(I), I > 0 -> I end,
	   1000000),
    Opts = [{large_heap, LH}],
    gen_server:start_link({local, ?MODULE}, ?MODULE, Opts,
			  []).

process_command(From, To, Packet) ->
    case To of
      #jid{luser = <<"">>, lresource = <<"watchdog">>} ->
	  #xmlel{name = Name} = Packet,
	  case Name of
	    <<"message">> ->
		LFrom =
		    jid:tolower(jid:remove_resource(From)),
		case lists:member(LFrom, get_admin_jids()) of
		  true ->
		      Body = fxml:get_path_s(Packet,
					    [{elem, <<"body">>}, cdata]),
		      spawn(fun () ->
				    process_flag(priority, high),
				    process_command1(From, To, Body)
			    end),
		      stop;
		  false -> ok
		end;
	    _ -> ok
	  end;
      _ -> ok
    end.

register_hook(Host) ->
    ejabberd_hooks:add(local_send_to_resource_hook, Host,
		       ?MODULE, process_command, 50).

%%====================================================================
%% gen_server callbacks
%%====================================================================

%%--------------------------------------------------------------------
%% Function: init(Args) -> {ok, State} |
%%                         {ok, State, Timeout} |
%%                         ignore               |
%%                         {stop, Reason}
%% Description: Initiates the server
%%--------------------------------------------------------------------
init(Opts) ->
    LH = proplists:get_value(large_heap, Opts),
    process_flag(priority, high),
    erlang:system_monitor(self(), [{large_heap, LH}]),
    lists:foreach(fun register_hook/1, ?MYHOSTS),
    {ok, #state{}}.

%%--------------------------------------------------------------------
%% Function: %% handle_call(Request, From, State) -> {reply, Reply, State} |
%%                                      {reply, Reply, State, Timeout} |
%%                                      {noreply, State} |
%%                                      {noreply, State, Timeout} |
%%                                      {stop, Reason, Reply, State} |
%%                                      {stop, Reason, State}
%% Description: Handling call messages
%%--------------------------------------------------------------------
handle_call({get, large_heap}, _From, State) ->
    {reply, get_large_heap(), State};
handle_call({set, large_heap, NewValue}, _From,
	    State) ->
    MonSettings = erlang:system_monitor(self(),
					[{large_heap, NewValue}]),
    OldLH = get_large_heap(MonSettings),
    NewLH = get_large_heap(),
    {reply, {lh_changed, OldLH, NewLH}, State};
handle_call(_Request, _From, State) ->
    Reply = ok, {reply, Reply, State}.

get_large_heap() ->
    MonSettings = erlang:system_monitor(),
    get_large_heap(MonSettings).

get_large_heap(MonSettings) ->
    {_MonitorPid, Options} = MonSettings,
    proplists:get_value(large_heap, Options).

%%--------------------------------------------------------------------
%% Function: handle_cast(Msg, State) -> {noreply, State} |
%%                                      {noreply, State, Timeout} |
%%                                      {stop, Reason, State}
%% Description: Handling cast messages
%%--------------------------------------------------------------------
handle_cast(_Msg, State) -> {noreply, State}.

%%--------------------------------------------------------------------
%% Function: handle_info(Info, State) -> {noreply, State} |
%%                                       {noreply, State, Timeout} |
%%                                       {stop, Reason, State}
%% Description: Handling all non call/cast messages
%%--------------------------------------------------------------------
handle_info({monitor, Pid, large_heap, Info}, State) ->
    spawn(fun () ->
		  process_flag(priority, high),
		  process_large_heap(Pid, Info)
	  end),
    {noreply, State};
handle_info(_Info, State) -> {noreply, State}.

%%--------------------------------------------------------------------
%% Function: terminate(Reason, State) -> void()
%% Description: This function is called by a gen_server when it is about to
%% terminate. It should be the opposite of Module:init/1 and do any necessary
%% cleaning up. When it returns, the gen_server terminates with Reason.
%% The return value is ignored.
%%--------------------------------------------------------------------
terminate(_Reason, _State) -> ok.

%%--------------------------------------------------------------------
%% Func: code_change(OldVsn, State, Extra) -> {ok, NewState}
%% Description: Convert process state when code is changed
%%--------------------------------------------------------------------
code_change(_OldVsn, State, _Extra) -> {ok, State}.

%%--------------------------------------------------------------------
%%% Internal functions
%%--------------------------------------------------------------------

process_large_heap(Pid, Info) ->
    Host = (?MYNAME),
    JIDs = get_admin_jids(),
    DetailedInfo = detailed_info(Pid),
    Body = iolist_to_binary(
             io_lib:format("(~w) The process ~w is consuming too "
                           "much memory:~n~p~n~s",
                           [node(), Pid, Info, DetailedInfo])),
    From = jid:make(<<"">>, Host, <<"watchdog">>),
    Hint = [#xmlel{name = <<"no-permanent-store">>,
		   attrs = [{<<"xmlns">>, ?NS_HINTS}]}],
    lists:foreach(fun (JID) ->
                          send_message(From, jid:make(JID), Body, Hint)
                  end, JIDs).

send_message(From, To, Body) ->
    send_message(From, To, Body, []).

send_message(From, To, Body, ExtraEls) ->
    ejabberd_router:route(From, To,
			  #xmlel{name = <<"message">>,
				 attrs = [{<<"type">>, <<"chat">>}],
				 children =
				     [#xmlel{name = <<"body">>, attrs = [],
					     children =
						 [{xmlcdata, Body}]}
				      | ExtraEls]}).

get_admin_jids() ->
    ejabberd_config:get_option(
      watchdog_admins,
      fun(JIDs) ->
              [jid:tolower(
                 jid:from_string(
                   iolist_to_binary(S))) || S <- JIDs]
      end, []).

detailed_info(Pid) ->
    case process_info(Pid, dictionary) of
      {dictionary, Dict} ->
	  case lists:keysearch('$ancestors', 1, Dict) of
	    {value, {'$ancestors', [Sup | _]}} ->
		case Sup of
		  ejabberd_c2s_sup -> c2s_info(Pid);
		  ejabberd_s2s_out_sup -> s2s_out_info(Pid);
		  ejabberd_service_sup -> service_info(Pid);
		  _ -> detailed_info1(Pid)
		end;
	    _ -> detailed_info1(Pid)
	  end;
      _ -> detailed_info1(Pid)
    end.

detailed_info1(Pid) ->
    io_lib:format("~p",
		  [[process_info(Pid, current_function),
		    process_info(Pid, initial_call),
		    process_info(Pid, message_queue_len),
		    process_info(Pid, links), process_info(Pid, dictionary),
		    process_info(Pid, heap_size),
		    process_info(Pid, stack_size)]]).

c2s_info(Pid) ->
    [<<"Process type: c2s">>, check_send_queue(Pid),
     <<"\n">>,
     io_lib:format("Command to kill this process: kill ~s ~w",
		   [iolist_to_binary(atom_to_list(node())), Pid])].

s2s_out_info(Pid) ->
    FromTo = mnesia:dirty_select(s2s,
				 [{{s2s, '$1', Pid, '_'}, [], ['$1']}]),
    [<<"Process type: s2s_out">>,
     case FromTo of
       [{From, To}] ->
	   <<"\n",
	     (io_lib:format("S2S connection: from ~s to ~s",
			    [From, To]))/binary>>;
       _ -> <<"">>
     end,
     check_send_queue(Pid), <<"\n">>,
     io_lib:format("Command to kill this process: kill ~s ~w",
		   [iolist_to_binary(atom_to_list(node())), Pid])].

service_info(Pid) ->
    Routes = mnesia:dirty_select(route,
				 [{{route, '$1', Pid, '_'}, [], ['$1']}]),
    [<<"Process type: s2s_out">>,
     case Routes of
       [Route] -> <<"\nServiced domain: ", Route/binary>>;
       _ -> <<"">>
     end,
     check_send_queue(Pid), <<"\n">>,
     io_lib:format("Command to kill this process: kill ~s ~w",
		   [iolist_to_binary(atom_to_list(node())), Pid])].

check_send_queue(Pid) ->
    case {process_info(Pid, current_function),
	  process_info(Pid, message_queue_len)}
	of
      {{current_function, MFA}, {message_queue_len, MLen}} ->
	  if MLen > 100 ->
		 case MFA of
		   {prim_inet, send, 2} ->
		       <<"\nPossible reason: the process is blocked "
			 "trying to send data over its TCP connection.">>;
		   {M, F, A} ->
		       [<<"\nPossible reason: the process can't "
			  "process messages faster than they arrive.  ">>,
			io_lib:format("Current function is ~w:~w/~w",
				      [M, F, A])]
		 end;
	     true -> <<"">>
	  end;
      _ -> <<"">>
    end.

process_command1(From, To, Body) ->
    process_command2(str:tokens(Body, <<" ">>), From, To).

process_command2([<<"kill">>, SNode, SPid], From, To) ->
    Node = jlib:binary_to_atom(SNode),
    remote_command(Node, [kill, SPid], From, To);
process_command2([<<"showlh">>, SNode], From, To) ->
    Node = jlib:binary_to_atom(SNode),
    remote_command(Node, [showlh], From, To);
process_command2([<<"setlh">>, SNode, NewValueString],
		 From, To) ->
    Node = jlib:binary_to_atom(SNode),
    NewValue = jlib:binary_to_integer(NewValueString),
    remote_command(Node, [setlh, NewValue], From, To);
process_command2([<<"help">>], From, To) ->
    send_message(To, From, help());
process_command2(_, From, To) ->
    send_message(To, From, help()).

help() ->
    <<"Commands:\n  kill <node> <pid>\n  showlh "
      "<node>\n  setlh <node> <integer>">>.

remote_command(Node, Args, From, To) ->
    Message = case ejabberd_cluster:call(Node, ?MODULE,
			    process_remote_command, [Args])
		  of
		{badrpc, Reason} ->
		    io_lib:format("Command failed:~n~p", [Reason]);
		Result -> Result
	      end,
    send_message(To, From, iolist_to_binary(Message)).

process_remote_command([kill, SPid]) ->
    exit(list_to_pid(SPid), kill), <<"ok">>;
process_remote_command([showlh]) ->
    Res = gen_server:call(ejabberd_system_monitor,
			  {get, large_heap}),
    io_lib:format("Current large heap: ~p", [Res]);
process_remote_command([setlh, NewValue]) ->
    {lh_changed, OldLH, NewLH} =
	gen_server:call(ejabberd_system_monitor,
			{set, large_heap, NewValue}),
    io_lib:format("Result of set large heap: ~p --> ~p",
		  [OldLH, NewLH]);
process_remote_command(_) -> throw(unknown_command).

opt_type(watchdog_admins) ->
    fun (JIDs) ->
	    [jid:tolower(jid:from_string(iolist_to_binary(S)))
	     || S <- JIDs]
    end;
opt_type(watchdog_large_heap) ->
    fun (I) when is_integer(I), I > 0 -> I end;
opt_type(_) -> [watchdog_admins, watchdog_large_heap].