Guard free space and status of your RAID devices


Problem

You might need to guard free space and status of your RAID devices on a server, without heavy machinery.

Solution

I wrote a simple Ruby script to do just that.

Also available as plaintext file.

#!/usr/bin/ruby

# Simple script to guard free space AND raid devices
# Author: Michal Jirků (box at wejn dot org)
# Notes:
#  1) customize report_error and check = [] below to get most of it
#  2) this one is not idiot-proof. If you have $DF wrong, missing
#     sendmail etc, it won't tell you about it in user-friendly way

# config
$DF = '/bin/df'
$MDSTAT = '/proc/mdstat'

Partition = Struct.new(:device, :min_free)

class Partition
	def initialize(dev, mf)
		super(dev, mf)
		raise ArgumentError if device.nil? || min_free.nil?
	end

	def check
		@status = []
		c = IO.popen("#{$DF} --block-size=1 /dev/#{device}", 'r').read
		c = c.split("\n")[1..-1].join(' ').split(/\n/).grep(/#{device}/)
		if c.to_s.empty? || c.size != 1
			@status << "P/#{device}: not found"
			return false
		end
		free = c[0].split(/\s+/)[3].to_i
		if free < min_free
			@status << "P/#{device}: #{hrs(free)}, want: #{hrs(min_free)}"
		end
		@status.empty?
	end

	# Human-readable size 
	def hrs(size)
		divisor = 1024.0
		desc = %w{ B KB MB GB TB }
		while size >= divisor
			size /= divisor
			desc.shift
		end

		#"%.3f%s" % [size, desc.first]
		"%d%s" % [size.to_i, desc.first]
	end
	private :hrs

	def to_s
		if @status.empty?
			"P/#{device}: OK"
		else
			@status.join("\n")
		end
	end
end

RaidDevice = Struct.new(:device, :members)

class RaidDevice
	def initialize(dev, mem)
		super(dev, mem)
		raise ArgumentError if device.nil? || members.nil?
	end

	def check
		@status = []
		
		c = File.open($MDSTAT, 'r').sysread(10240).split("\n")
		c = (c.grep(/^#{device}\s+:/) || [])[0]
		if c.nil?
			@status << "R/#{device}: not found"
			return false
		end
		
		members.each do |x|
			@status << "R/#{device}: FAIL: #{x}" if c =~ /#{x}\[\d+\]\(F\)/
			@status << "R/#{device}: MISS: #{x}" if c !~ /#{x}\[\d+\]/
		end
		
		@status.empty?
	end

	def to_s
		if @status.empty?
			"R: #{device}: OK"
		else
			@status.join("\n")
		end
	end
end

# Customize this

check = [
	#Partition.new('xvda2', 40*1024*1024*1024),
	Partition.new('md1', 1024*1024*1024),
	Partition.new('md0', 20*1024*1024),
	Partition.new('vg/home', 250*1024*1024),
	Partition.new('vg/opt', 32*1024*1024),
	Partition.new('vg/tmp', 128*1024*1024),
	Partition.new('vg/usr', 750*1024*1024),
	Partition.new('vg/var', 10*1024*1024*1024),
	RaidDevice.new('md0', ['sdc1', 'sdb1', 'sda1']),
	RaidDevice.new('md2', ['sdc3', 'sdb3', 'sda3']),
	RaidDevice.new('md1', ['sdc2', 'sdb2', 'sda2']),
	RaidDevice.new('md3', ['sdc4', 'sdb4', 'sda4']),
]

def report_error(what)
	IO.popen("/usr/sbin/sendmail -t", "w") do |fl|
		fl.puts <<-EOF
From: alert@example.com
To: admin@example.com
Cc: pager@example.com
Subject: #{`uname -n`.chomp}:

#{what}
(TS: #{Time.now.strftime("%Y%m%d %H%M%S")})
		EOF
	end
end
# End customize this

out = []
check.each { |e| out << e.to_s unless e.check }

report_error(out.join("\n")) unless out.empty?

# vim: set ts=4 sw=4 ai ft=ruby :