#!/bin/bash source test-variables DISK=$1 if [[ -z $DISK ]] ; then echo "Missing DISK parameter" exit 11 fi SERIAL=$2 if [[ -z $SERIAL ]] ; then echo "Missing SERIAL parameter" exit 12 fi MY_PROC=$3 if [[ -z $MY_PROC ]] ; then echo "Missing parameter 2: MY_PROC" exit 16 fi if [[ ! -b /dev/$DISK ]] ; then echo "Device /dev/$DISK is not a block special file" exit 13 fi DISK_SERIAL=`smartctl -i /dev/$DISK \ | grep -i 'serial number' \ | cut -d ':' -f 2 \ | sed 's/\s\+//'` if [[ -z $DISK_SERIAL ]] ; then echo "Empty serial number in disk /dev/$DISK" exit 14 fi if [[ $DISK_SERIAL != $SERIAL ]] ; then echo "Serial numbers $SERIAL and $DISK_SERIAL for $DISK do not match" exit 15 fi #TPATTERN=0x55 TPATTERN=0xaa # TODO: Debug limit. Comment out for production #LST_BLCK=10000000 SMARTLOG=300 # check every 5 minutes #SMARTLOG=60 # check every minute # TODO: Debug variable. Comment out for production #SMARTLOG=15 # Check every second BLK_SIZE=`cat /sys/block/$DISK/queue/physical_block_size` MAX_SAME=`cat /sys/block/$DISK/queue/write_same_max_bytes` CONCU=$(( MAX_SAME / ( BLK_SIZE / 2 ) )) [[ $CONCU == 0 ]] && CONCU=32768 BEGIN=`date +%s` # Check SMART overall self test ERROR=`smartctl -H /dev/$DISK \ | grep '\(overall-health\|Health Status\)' \ | grep -v '\(PASSED\|OK\)' ` if [[ -n $ERROR ]] ; then smartctl -H /dev/$DISK \ > $OUTPUT_DIR/disk/$SERIAL.smart.error else ERROR=`smartctl -A -f brief /dev/$DISK \ | parse-smart-attr \ | grep ERROR \ | cut -d ':' -f 2 ` # TODO: Debug variable. Comment out for production #ERROR= if [[ -n $ERROR ]] ; then RAW_ERROR=$ERROR ERROR='' for TOKEN in $RAW_ERROR ; do ERROR="$ERROR $TOKEN" done smartctl -A -f brief /dev/$DISK \ | parse-smart-attr \ | sed "s/ERROR/$RES_ERROR/g" \ > $OUTPUT_DIR/disk/$SERIAL.smart.error fi fi if [[ -z $ERROR ]] ; then badblocks -b $BLK_SIZE -c $CONCU -e 1 -t $TPATTERN \ -o $OUTPUT_DIR/disk/$SERIAL.badblocks \ -svw /dev/$DISK $LST_BLCK \ 2> /run/$SERIAL.badblocks.out & TEST_PID=`jobs -p %+` echo $TEST_PID > /run/$SERIAL.badblocks.pid while [[ -d /proc/$TEST_PID ]] ; do # Check SMART error log ERROR=`smartctl -l error /dev/$DISK \ | parse-smart-error \ | sed 's/://g' ` if [[ -n $ERROR ]] ; then smartctl -l error /dev/$DISK \ > $OUTPUT_DIR/disk/$SERIAL.smart.error kill $TEST_PID sleep 5 fi # TODO: Debug procedure. unnecessary for production #BADBLOCK=`cat $OUTPUT_DIR/disk/$SERIAL.badblocks` #if [[ -n $BADBLOCK ]] ; then # kill $TEST_PID # sleep 5 #fi sleep $SMARTLOG done if [[ -z $ERROR ]] ; then # Check smart attributes after the badblocks test ERROR=`smartctl -A -f brief /dev/$DISK \ | parse-smart-attr \ | grep ERROR \ | cut -d ':' -f 2 ` # TODO: Debug variable. Comment out for production #ERROR= if [[ -n $ERROR ]] ; then RAW_ERROR=$ERROR ERROR='' for TOKEN in $RAW_ERROR ; do ERROR="$ERROR $TOKEN" done smartctl -A -f brief /dev/$DISK \ | parse-smart-attr \ | sed "s/ERROR/$RES_ERROR/g" \ > $OUTPUT_DIR/disk/$SERIAL.smart.error fi fi #RET=$? #echo $RET > $OUTPUT_DIR/disk/$SERIAL.badblocks.ret rm -f /run/$SERIAL.badblocks.pid fi END=`date +%s` DURATION=$(( END - BEGIN )) if [[ -f $OUTPUT_DIR/disk/$SERIAL.badblocks ]] ; then BADBLOCK=`cat $OUTPUT_DIR/disk/$SERIAL.badblocks` fi echo -n "${BLD}Process $MY_PROC${OFF} : " \ > $OUTPUT_DIR/disk/$SERIAL.badblocks.result if [[ -n $ERROR ]] ; then echo -n "Disk $DISK $SERIAL $ERROR : " \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.result echo $RES_FAIL \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.result echo "$SERVER_SERIAL|$DISK|$SERIAL|FAIL|$ERROR" \ > $OUTPUT_DIR/disk/$SERIAL.badblocks.csv elif [[ -z $BADBLOCK ]] ; then echo -n "disk $DISK $SERIAL tested in ${DURATION}s : " \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.result echo $RES_OK \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.result echo "$SERVER_SERIAL|$DISK|$SERIAL|OK|${DURATION}s" \ > $OUTPUT_DIR/disk/$SERIAL.badblocks.csv else echo -n "Bad block $BADBLOCK found in disk $DISK $SERIAL : " \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.result echo $RES_FAIL \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.result echo "$SERVER_SERIAL|$DISK|$SERIAL|FAIL|Bad block $BADBLOCK" \ > $OUTPUT_DIR/disk/$SERIAL.badblocks.csv fi if [[ -f $OUTPUT_DIR/disk/$SERIAL.badblocks ]] ; then ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks \ $OUTPUT_DIR/disk/$DISK.badblocks fi if [[ -f /run/$SERIAL.badblocks.out ]] ; then # strings /run/$SERIAL.badblocks.out \ # | sed 's/\s\+[0-9.%]\+ done, [0-9:]\+ elapsed.//' \ # | uniq > $OUTPUT_DIR/disk/$SERIAL.badblocks.out date > $OUTPUT_DIR/disk/$SERIAL.badblocks.out strings /run/$SERIAL.badblocks.out \ | grep -B5 -A5 Testing \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.out echo "..." >> $OUTPUT_DIR/disk/$SERIAL.badblocks.out strings /run/$SERIAL.badblocks.out \ | grep -B5 -A5 Reading \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.out echo "..." >> $OUTPUT_DIR/disk/$SERIAL.badblocks.out strings /run/$SERIAL.badblocks.out \ | grep -B5 -A5 completed \ >> $OUTPUT_DIR/disk/$SERIAL.badblocks.out ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.out \ $OUTPUT_DIR/disk/$DISK.badblocks.out # rm -f /run/$SERIAL.badblocks.out fi #ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.ret \ # $OUTPUT_DIR/disk/$DISK.badblocks.ret ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.result \ $OUTPUT_DIR/disk/$DISK.badblocks.result ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.csv \ $OUTPUT_DIR/disk/$DISK.badblocks.csv if [[ -f $OUTPUT_DIR/disk/$SERIAL.smart.error ]] ; then ln -sf $OUTPUT_DIR/disk/$SERIAL.smart.error \ $OUTPUT_DIR/disk/$DISK.smart.error fi cat $OUTPUT_DIR/disk/$SERIAL.badblocks.result cat $OUTPUT_DIR/disk/$SERIAL.badblocks.result \ >> $OUTPUT_DIR/badblocks.log