hw-test/scripts/wrapper-badblocks

208 lines
5.6 KiB
Text
Raw Normal View History

2016-12-14 21:01:45 -06:00
#!/bin/bash
2016-12-15 05:06:33 -06:00
source test-variables
DISK=$1
if [[ -z $DISK ]] ; then
echo "Missing DISK parameter"
2016-12-14 22:57:46 -06:00
exit 11
fi
2016-12-14 21:01:45 -06:00
2016-12-14 22:57:46 -06:00
SERIAL=$2
2016-12-15 05:06:33 -06:00
if [[ -z $SERIAL ]] ; then
2016-12-14 22:57:46 -06:00
echo "Missing SERIAL parameter"
exit 12
fi
2016-12-15 05:06:33 -06:00
MY_PROC=$3
if [[ -z $MY_PROC ]] ; then
echo "Missing parameter 2: MY_PROC"
exit 16
fi
if [[ ! -b /dev/$DISK ]] ; then
echo "Device /dev/$DISK is not a block special file"
exit 13
2016-12-14 22:57:46 -06:00
fi
2016-12-15 05:06:33 -06:00
DISK_SERIAL=`smartctl -i /dev/$DISK \
2016-12-14 22:57:46 -06:00
| grep -i 'serial number' \
| cut -d ':' -f 2 \
2016-12-15 05:06:33 -06:00
| sed 's/\s\+//'`
2016-12-14 22:57:46 -06:00
if [[ -z $DISK_SERIAL ]] ; then
2016-12-15 05:06:33 -06:00
echo "Empty serial number in disk /dev/$DISK"
exit 14
2016-12-14 22:57:46 -06:00
fi
if [[ $DISK_SERIAL != $SERIAL ]] ; then
2016-12-15 05:06:33 -06:00
echo "Serial numbers $SERIAL and $DISK_SERIAL for $DISK do not match"
exit 15
2016-12-14 22:57:46 -06:00
fi
2016-12-15 22:08:43 -06:00
#TPATTERN=0x55
TPATTERN=0xaa
2016-12-23 03:42:14 -06:00
# TODO: Debug limit. Comment out for production
#LST_BLCK=10000000
SMARTLOG=300 # check every 5 minutes
#SMARTLOG=60 # check every minute
# TODO: Debug variable. Comment out for production
#SMARTLOG=15 # Check every second
2016-12-15 22:08:43 -06:00
BLK_SIZE=`cat /sys/block/$DISK/queue/physical_block_size`
2016-12-15 05:06:33 -06:00
MAX_SAME=`cat /sys/block/$DISK/queue/write_same_max_bytes`
2016-12-14 22:57:46 -06:00
CONCU=$(( MAX_SAME / ( BLK_SIZE / 2 ) ))
2016-12-15 22:08:43 -06:00
[[ $CONCU == 0 ]] && CONCU=32768
2016-12-15 05:06:33 -06:00
BEGIN=`date +%s`
2016-12-16 05:52:04 -06:00
# Check SMART overall self test
ERROR=`smartctl -H /dev/$DISK \
2016-12-16 08:02:21 -06:00
| grep '\(overall-health\|Health Status\)' \
| grep -v '\(PASSED\|OK\)'
2016-12-16 05:52:04 -06:00
`
if [[ -n $ERROR ]] ; then
smartctl -H /dev/$DISK \
> $OUTPUT_DIR/disk/$SERIAL.smart.error
else
2016-12-16 08:02:21 -06:00
ERROR=`smartctl -A -f brief /dev/$DISK \
| parse-smart-attr \
| grep ERROR \
| cut -d ':' -f 2
`
2016-12-23 03:42:14 -06:00
# TODO: Debug variable. Comment out for production
#ERROR=
2016-12-16 08:02:21 -06:00
if [[ -n $ERROR ]] ; then
RAW_ERROR=$ERROR
ERROR=''
for TOKEN in $RAW_ERROR ; do
ERROR="$ERROR $TOKEN"
done
smartctl -A -f brief /dev/$DISK \
| parse-smart-attr \
2016-12-23 00:26:02 -06:00
| sed "s/ERROR/$RES_ERROR/g" \
2016-12-16 08:02:21 -06:00
> $OUTPUT_DIR/disk/$SERIAL.smart.error
fi
fi
if [[ -z $ERROR ]] ; then
2016-12-16 05:52:04 -06:00
badblocks -b $BLK_SIZE -c $CONCU -e 1 -t $TPATTERN \
-o $OUTPUT_DIR/disk/$SERIAL.badblocks \
-svw /dev/$DISK $LST_BLCK \
2> /run/$SERIAL.badblocks.out &
2016-12-16 05:52:04 -06:00
TEST_PID=`jobs -p %+`
echo $TEST_PID > /run/$SERIAL.badblocks.pid
while [[ -d /proc/$TEST_PID ]] ; do
# Check SMART error log
ERROR=`smartctl -l error /dev/$DISK \
| parse-smart-error \
| sed 's/://g'
`
if [[ -n $ERROR ]] ; then
smartctl -l error /dev/$DISK \
> $OUTPUT_DIR/disk/$SERIAL.smart.error
kill $TEST_PID
sleep 5
fi
2016-12-23 03:42:14 -06:00
# TODO: Debug procedure. unnecessary for production
#BADBLOCK=`cat $OUTPUT_DIR/disk/$SERIAL.badblocks`
#if [[ -n $BADBLOCK ]] ; then
# kill $TEST_PID
# sleep 5
#fi
2016-12-16 05:52:04 -06:00
sleep $SMARTLOG
done
if [[ -z $ERROR ]] ; then
# Check smart attributes after the badblocks test
ERROR=`smartctl -A -f brief /dev/$DISK \
| parse-smart-attr \
| grep ERROR \
| cut -d ':' -f 2
`
# TODO: Debug variable. Comment out for production
#ERROR=
if [[ -n $ERROR ]] ; then
RAW_ERROR=$ERROR
ERROR=''
for TOKEN in $RAW_ERROR ; do
ERROR="$ERROR $TOKEN"
done
smartctl -A -f brief /dev/$DISK \
| parse-smart-attr \
| sed "s/ERROR/$RES_ERROR/g" \
> $OUTPUT_DIR/disk/$SERIAL.smart.error
fi
2016-12-23 03:42:14 -06:00
fi
2016-12-16 05:52:04 -06:00
#RET=$?
#echo $RET > $OUTPUT_DIR/disk/$SERIAL.badblocks.ret
2016-12-23 03:42:14 -06:00
rm -f /run/$SERIAL.badblocks.pid
2016-12-16 05:52:04 -06:00
fi
2016-12-15 05:06:33 -06:00
END=`date +%s`
DURATION=$(( END - BEGIN ))
if [[ -f $OUTPUT_DIR/disk/$SERIAL.badblocks ]] ; then
BADBLOCK=`cat $OUTPUT_DIR/disk/$SERIAL.badblocks`
fi
2016-12-15 05:06:33 -06:00
echo -n "${BLD}Process $MY_PROC${OFF} : " \
> $OUTPUT_DIR/disk/$SERIAL.badblocks.result
if [[ -n $ERROR ]] ; then
echo -n "Disk $DISK $SERIAL $ERROR : " \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.result
echo $RES_FAIL \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.result
2016-12-16 08:02:21 -06:00
echo "$SERVER_SERIAL|$DISK|$SERIAL|FAIL|$ERROR" \
2016-12-15 22:08:43 -06:00
> $OUTPUT_DIR/disk/$SERIAL.badblocks.csv
elif [[ -z $BADBLOCK ]] ; then
echo -n "disk $DISK $SERIAL tested in ${DURATION}s : " \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.result
2016-12-15 05:06:33 -06:00
echo $RES_OK \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.result
2016-12-16 08:02:21 -06:00
echo "$SERVER_SERIAL|$DISK|$SERIAL|OK|${DURATION}s" \
2016-12-15 22:08:43 -06:00
> $OUTPUT_DIR/disk/$SERIAL.badblocks.csv
2016-12-15 05:06:33 -06:00
else
2016-12-15 22:08:43 -06:00
echo -n "Bad block $BADBLOCK found in disk $DISK $SERIAL : " \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.result
2016-12-15 05:06:33 -06:00
echo $RES_FAIL \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.result
2016-12-16 08:02:21 -06:00
echo "$SERVER_SERIAL|$DISK|$SERIAL|FAIL|Bad block $BADBLOCK" \
2016-12-15 22:08:43 -06:00
> $OUTPUT_DIR/disk/$SERIAL.badblocks.csv
2016-12-15 05:06:33 -06:00
fi
2016-12-20 04:43:12 -06:00
if [[ -f $OUTPUT_DIR/disk/$SERIAL.badblocks ]] ; then
ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks \
$OUTPUT_DIR/disk/$DISK.badblocks
fi
if [[ -f /run/$SERIAL.badblocks.out ]] ; then
# strings /run/$SERIAL.badblocks.out \
# | sed 's/\s\+[0-9.%]\+ done, [0-9:]\+ elapsed.//' \
# | uniq > $OUTPUT_DIR/disk/$SERIAL.badblocks.out
date > $OUTPUT_DIR/disk/$SERIAL.badblocks.out
strings /run/$SERIAL.badblocks.out \
| grep -B5 -A5 Testing \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.out
echo "..." >> $OUTPUT_DIR/disk/$SERIAL.badblocks.out
strings /run/$SERIAL.badblocks.out \
| grep -B5 -A5 Reading \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.out
echo "..." >> $OUTPUT_DIR/disk/$SERIAL.badblocks.out
strings /run/$SERIAL.badblocks.out \
| grep -B5 -A5 completed \
>> $OUTPUT_DIR/disk/$SERIAL.badblocks.out
ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.out \
$OUTPUT_DIR/disk/$DISK.badblocks.out
2016-12-23 03:42:14 -06:00
# rm -f /run/$SERIAL.badblocks.out
fi
#ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.ret \
# $OUTPUT_DIR/disk/$DISK.badblocks.ret
ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.result \
$OUTPUT_DIR/disk/$DISK.badblocks.result
2016-12-15 22:08:43 -06:00
ln -sf $OUTPUT_DIR/disk/$SERIAL.badblocks.csv \
$OUTPUT_DIR/disk/$DISK.badblocks.csv
if [[ -f $OUTPUT_DIR/disk/$SERIAL.smart.error ]] ; then
ln -sf $OUTPUT_DIR/disk/$SERIAL.smart.error \
$OUTPUT_DIR/disk/$DISK.smart.error
fi
2016-12-15 05:06:33 -06:00
cat $OUTPUT_DIR/disk/$SERIAL.badblocks.result
2016-12-15 22:08:43 -06:00
cat $OUTPUT_DIR/disk/$SERIAL.badblocks.result \
>> $OUTPUT_DIR/badblocks.log
2016-12-14 22:57:46 -06:00