Written
on
Guard free space and status of your RAID devices
Problem
You might need to guard free space and status of your RAID devices on a server, without heavy machinery.
Solution
I wrote a simple Ruby script to do just that.
Also available as plaintext file.
#!/usr/bin/ruby
# Simple script to guard free space AND raid devices
# Author: Michal Jirků (box at wejn dot org)
# Notes:
# 1) customize report_error and check = [] below to get most of it
# 2) this one is not idiot-proof. If you have $DF wrong, missing
# sendmail etc, it won't tell you about it in user-friendly way
# config
$DF = '/bin/df'
$MDSTAT = '/proc/mdstat'
Partition = Struct.new(:device, :min_free)
class Partition
def initialize(dev, mf)
super(dev, mf)
raise ArgumentError if device.nil? || min_free.nil?
end
def check
@status = []
c = IO.popen("#{$DF} --block-size=1 /dev/#{device}", 'r').read
c = c.split("\n")[1..-1].join(' ').split(/\n/).grep(/#{device}/)
if c.to_s.empty? || c.size != 1
@status << "P/#{device}: not found"
return false
end
free = c[0].split(/\s+/)[3].to_i
if free < min_free
@status << "P/#{device}: #{hrs(free)}, want: #{hrs(min_free)}"
end
@status.empty?
end
# Human-readable size
def hrs(size)
divisor = 1024.0
desc = %w{ B KB MB GB TB }
while size >= divisor
size /= divisor
desc.shift
end
#"%.3f%s" % [size, desc.first]
"%d%s" % [size.to_i, desc.first]
end
private :hrs
def to_s
if @status.empty?
"P/#{device}: OK"
else
@status.join("\n")
end
end
end
RaidDevice = Struct.new(:device, :members)
class RaidDevice
def initialize(dev, mem)
super(dev, mem)
raise ArgumentError if device.nil? || members.nil?
end
def check
@status = []
c = File.open($MDSTAT, 'r').sysread(10240).split("\n")
c = (c.grep(/^#{device}\s+:/) || [])[0]
if c.nil?
@status << "R/#{device}: not found"
return false
end
members.each do |x|
@status << "R/#{device}: FAIL: #{x}" if c =~ /#{x}\[\d+\]\(F\)/
@status << "R/#{device}: MISS: #{x}" if c !~ /#{x}\[\d+\]/
end
@status.empty?
end
def to_s
if @status.empty?
"R: #{device}: OK"
else
@status.join("\n")
end
end
end
# Customize this
check = [
#Partition.new('xvda2', 40*1024*1024*1024),
Partition.new('md1', 1024*1024*1024),
Partition.new('md0', 20*1024*1024),
Partition.new('vg/home', 250*1024*1024),
Partition.new('vg/opt', 32*1024*1024),
Partition.new('vg/tmp', 128*1024*1024),
Partition.new('vg/usr', 750*1024*1024),
Partition.new('vg/var', 10*1024*1024*1024),
RaidDevice.new('md0', ['sdc1', 'sdb1', 'sda1']),
RaidDevice.new('md2', ['sdc3', 'sdb3', 'sda3']),
RaidDevice.new('md1', ['sdc2', 'sdb2', 'sda2']),
RaidDevice.new('md3', ['sdc4', 'sdb4', 'sda4']),
]
def report_error(what)
IO.popen("/usr/sbin/sendmail -t", "w") do |fl|
fl.puts <<-EOF
From: alert@example.com
To: admin@example.com
Cc: pager@example.com
Subject: #{`uname -n`.chomp}:
#{what}
(TS: #{Time.now.strftime("%Y%m%d %H%M%S")})
EOF
end
end
# End customize this
out = []
check.each { |e| out << e.to_s unless e.check }
report_error(out.join("\n")) unless out.empty?
# vim: set ts=4 sw=4 ai ft=ruby :