Local modifications to ClusterLabs/Anvil by Alteeve
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

160 lines
7.0 KiB

#!/usr/bin/perl
#
# This daemon monitors and logs preformance data. This is meant to help debug issues related to (potential)
# performance issues.
#
# NOTE: This is designed to be minimal overhead, so there is no attempt to connect to the database. As such,
# be mindful of what this daemon is used for.
#
use strict;
use warnings;
use Data::Dumper;
use Anvil::Tools;
my $THIS_FILE = ($0 =~ /^.*\/(.*)$/)[0];
my $running_directory = ($0 =~ /^(.*?)\/$THIS_FILE$/)[0];
if (($running_directory =~ /^\./) && ($ENV{PWD}))
{
$running_directory =~ s/^\./$ENV{PWD}/;
}
# Turn off buffering so that the pinwheel will display while waiting for the SSH call(s) to complete.
$| = 1;
my $anvil = Anvil::Tools->new();
# Read switches
$anvil->Get->switches({list => [
"detailed",
"interval",
"print",
"run-once",
], man => $THIS_FILE});
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => $anvil->data->{switches}});
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 3, secure => 0, key => "log_0115", variables => { program => $THIS_FILE }});
# Calculate my sum so that we can exit if it changes later.
$anvil->Storage->record_md5sums;
my $next_md5sum_check = time + 30;
our $interval = $anvil->data->{switches}{interval} =~ /^\d+$/ ? $anvil->data->{switches}{interval} : 5;
our $print = $anvil->data->{switches}{'print'} ? 1 : 0;
our $detailed = $anvil->data->{switches}{detailed} ? 1 : 0;
# Now go into the main loop
while(1)
{
my $scan_time = time;
record_data($anvil);
if ($anvil->data->{switches}{'run-once'})
{
# We're done.
$anvil->nice_exit({exit_code => 0});
}
if (time > $next_md5sum_check)
{
$next_md5sum_check = time + 30;
$anvil->Log->variables({source => $THIS_FILE, line => __LINE__, level => 3, list => { next_md5sum_check => $next_md5sum_check }});
if ($anvil->Storage->check_md5sums)
{
# NOTE: We exit with '0' to prevent systemctl from showing a scary red message.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, level => 0, priority => "alert", key => "message_0014"});
$anvil->nice_exit({exit_code => 0});
}
}
sleep $interval;
}
sub record_data
{
my ($anvil) = @_;
$anvil->Get->load_average({debug => 3});
if ($detailed)
{
# Show the CPU load percents.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => $print, level => 1, key => "log_0840", variables => {
iowait => $anvil->data->{loads}{load_percent}{iowait},
user => $anvil->data->{loads}{load_percent}{user},
steal => $anvil->data->{loads}{load_percent}{steal},
idle => $anvil->data->{loads}{load_percent}{idle},
nice => $anvil->data->{loads}{load_percent}{nice},
'system' => $anvil->data->{loads}{load_percent}{'system'},
}});
# Show the CPU load levels
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => $print, level => 1, key => "log_0835", variables => {
one_minute => $anvil->data->{loads}{load_average}{one_minute},
five_minutes => $anvil->data->{loads}{load_average}{five_minute},
ten_minutes => $anvil->data->{loads}{load_average}{ten_minute},
}});
# Show the processes
my $key = $detailed ? "log_0841" : "log_0836";
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => $print, level => 1, key => $key, variables => {
total => $anvil->Convert->add_commas({number => $anvil->data->{loads}{processes}{total}}),
running => $anvil->Convert->add_commas({number => $anvil->data->{loads}{processes}{running}}),
blocked => $anvil->Convert->add_commas({number => $anvil->data->{loads}{processes}{blocked}}),
interrupts => $anvil->Convert->add_commas({number => $anvil->data->{loads}{interrupts}{total}}),
}});
# CPU average load times
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => $print, level => 1, key => "log_0837", variables => {
io_wait => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{average}{io_wait} / 100)}),
user_mode => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{average}{user_mode} / 100)}),
user_mode_nice => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{average}{user_mode_nice} / 100)}),
system_mode => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{average}{system_mode} / 100)}),
idle_tasks => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{average}{idle_tasks} / 100)}),
hard_irq => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{average}{hard_irq} / 100)}),
soft_irq => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{average}{soft_irq} / 100)}),
}});
# Show per-cores
foreach my $core (sort {$a <=> $b} keys %{$anvil->data->{loads}{cpu}{core}})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => $print, level => 1, key => "log_0838", variables => {
core => $core,
user_mode => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{core}{$core}{user_mode} / 100)}),
user_mode_nice => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{core}{$core}{user_mode_nice} / 100)}),
system_mode => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{core}{$core}{system_mode} / 100)}),
idle_tasks => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{core}{$core}{idle_tasks} / 100)}),
hard_irq => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{core}{$core}{hard_irq} / 100)}),
soft_irq => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{cpu}{core}{$core}{soft_irq} / 100)}),
}});
}
# This is the number of IO operations in progress. When IOs in progress is non-zero, the weighted time (in 1/100ths of a second), doing those IOs.
foreach my $device_name (sort {$a cmp $b} keys %{$anvil->data->{loads}{storage}})
{
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => $print, level => 1, key => "log_0839", variables => {
device_name => $device_name,
in_progress => $anvil->data->{loads}{storage}{$device_name}{ios_currently_in_progress},
weighted_time_spent => $anvil->Convert->add_commas({number => ($anvil->data->{loads}{storage}{$device_name}{weighted_time_spent_doing_ios} / 100)}),
}});
}
}
else
{
# This is much more condensed.
$anvil->Log->entry({source => $THIS_FILE, line => __LINE__, 'print' => $print, level => 1, key => "log_0846", variables => {
one_minute => $anvil->data->{loads}{load_average}{one_minute},
five_minutes => $anvil->data->{loads}{load_average}{five_minute},
ten_minutes => $anvil->data->{loads}{load_average}{ten_minute},
iowait => $anvil->data->{loads}{load_percent}{iowait},
running => $anvil->Convert->add_commas({number => $anvil->data->{loads}{processes}{running}}),
blocked => $anvil->Convert->add_commas({number => $anvil->data->{loads}{processes}{blocked}}),
}});
}
return(0);
}