From 3bbbf9fb52fee9348e4182a82cccbb4be31ccb71 Mon Sep 17 00:00:00 2001 From: Sandino Araico Sanchez Date: Mon, 16 Jan 2017 03:05:51 -0600 Subject: [PATCH] parallel test scripts for fio --- scripts/scan-dmidecode | 13 ++- scripts/test-all | 8 +- scripts/test-fio | 132 +++++++++++++++++++++++++++ scripts/watcher-fio | 25 +++++ scripts/watcher-screen | 5 +- scripts/wrapper-fio | 203 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 381 insertions(+), 5 deletions(-) create mode 100755 scripts/test-fio create mode 100755 scripts/watcher-fio create mode 100755 scripts/wrapper-fio diff --git a/scripts/scan-dmidecode b/scripts/scan-dmidecode index 4180125..85b8bec 100755 --- a/scripts/scan-dmidecode +++ b/scripts/scan-dmidecode @@ -1,13 +1,24 @@ #!/bin/bash -SERVER_SERIAL=`dmidecode -s chassis-serial-number | sed 's/\s.*$//'` +#SERVER_SERIAL=`dmidecode -s chassis-serial-number | sed 's/\s.*$//'` +SERVER_SERIAL=`dmidecode -s system-serial-number | sed 's/\s.*$//'` MY_DATETIME=`date +%Y-%m-%d_%H-%M-%S` OUTPUT_HOME=/output/hwtests OUTPUT_DIR=$OUTPUT_HOME/$SERVER_SERIAL +OUTPUT_BAD="$OUTPUT_HOME/> $OUTPUT_DIR/fio.log + date >> $OUTPUT_DIR/fio.log + for DISK in `cat $OUTPUT_DIR/disk.lst \ + | sed 's/#.*$//' \ + | cut -d ':' -f 1` + do + SERIAL=`cat $OUTPUT_DIR/disk.lst \ + | grep $DISK \ + | cut -d ':' -f 2` + if [[ -f $OUTPUT_DIR/disk/$SERIAL.fio.result ]] ; then + cat $OUTPUT_DIR/disk/$SERIAL.fio.result \ + | sed 's/Process [0-9]\+/Test already done/' + cat $OUTPUT_DIR/disk/$SERIAL.fio.result \ + | sed 's/Process [0-9]\+/Result found/' \ + >> $OUTPUT_DIR/fio.log + continue + fi + JOBS=$(( JOBS + 1 )) + echo -n "Process $JOBS: Testing disk $DISK with fio ..." + echo -n "Process $JOBS : Disk $DISK : " \ + >> $OUTPUT_DIR/fio.log + COUNT=0 + TEST_PID= + wrapper-fio $DISK $SERIAL $JOBS & + sleep 1 + RET=`jobs %+ | awk '/Exit [0-9]+/ {print $3}'` + if [[ -n $RET ]] ; then + echo -n "Exit $RET " + COUNT=$RET + fi + while (( $COUNT < $MAXWAIT )) ; do + COUNT=$(( COUNT + 1 )) + sleep 1 + if [[ -f /run/$SERIAL.fio.pid ]] ; then + TEST_PID=`cat /run/$SERIAL.fio.pid` + echo " PID $TEST_PID" + echo " PID $TEST_PID" >> $OUTPUT_DIR/fio.log + break + fi + echo -n '.' + done + if [[ -z $TEST_PID ]] ; then + echo "${RED}Process not running.${OFF}" + echo "${RED}Process not running.${OFF}" \ + >> $OUTPUT_DIR/fio.log + fi + done +fi + +JOBS_WAITING=$JOBS +JOBS_OLD=0 +COUNT=0 +NOTICE_INTERVAL=60 +while (( $JOBS_WAITING > 0 )) ; do + sleep 1 + JOBS_WAITING=`jobs -r | wc -l` + JOBS_FINISHED=$(( JOBS - JOBS_WAITING )) + if (( $COUNT % $NOTICE_INTERVAL == 0 )) ; then + if [[ $JOBS_WAITING != $JOBS_OLD ]] ; then + echo "$JOBS_FINISHED jobs finished, $JOBS_WAITING jobs still running..." + JOBS_OLD=$JOBS_WAITING + fi + fi + COUNT=$(( COUNT + 1 )) + echo $JOBS_WAITING > /run/fio.waiting +done +rm -f /run/fio.waiting + +echo -n "END: " >> $OUTPUT_DIR/fio.log +date >> $OUTPUT_DIR/fio.log +echo >> $OUTPUT_DIR/fio.log + +#snapshot +mkdir -p $SNAPSHOT_D +cp -af $OUTPUT_DIR/* $SNAPSHOT_D/ + +if [[ $SUMMARY != 0 ]] ; then + if [[ $SUMMARY == plain ]] ; then + echo "=====> FIO SUMMARY <=====" + fi + echo "SERVER|DISK|SERIAL|RESULT|COMMENT" > $OUTPUT_DIR/fio.csv + for DISK in `cat $OUTPUT_DIR/disk.lst \ + | sed 's/#.*$//' \ + | cut -d ':' -f 1` + do + SERIAL=`cat $OUTPUT_DIR/disk.lst \ + | grep $DISK \ + | cut -d ':' -f 2` + #echo "looking for $OUTPUT_DIR/disk/$SERIAL.fio.csv" + if [[ $SUMMARY == plain ]] ; then + if [[ -f $OUTPUT_DIR/disk/$SERIAL.fio.result ]] ; then + cat $OUTPUT_DIR/disk/$SERIAL.fio.result + fi + fi + if [[ -f $OUTPUT_DIR/disk/$SERIAL.fio.csv ]] ; then + cat $OUTPUT_DIR/disk/$SERIAL.fio.csv \ + >> $OUTPUT_DIR/fio.csv + fi + done +fi +if [[ $SUMMARY == csv ]] ; then +echo "=====> BADBLOCKS CSV SUMMARY <=====" +cat $OUTPUT_DIR/fio.csv \ + | sed "s/^\([^|]\+\)/${BLD}\1${OFF}/ ; s/OK/$RES_OK/ ; s/FAIL/$RES_FAIL/ ; s/RESULT/$RES_ULT/ " +# | sed 's/empty/W87152YGWGM/g' # Serial number for debugging +fi + diff --git a/scripts/watcher-fio b/scripts/watcher-fio new file mode 100755 index 0000000..6366b0b --- /dev/null +++ b/scripts/watcher-fio @@ -0,0 +1,25 @@ +#!/bin/bash + +if [[ -z $OUTPUT_DIR ]] ; then + source scan-dmidecode +fi + +RUNNING=0 +while [[ $RUNNING == 0 ]] ; do + sleep 1 + if [[ -f /run/fio.waiting ]] ; then + RUNNING=1 + fi +done +sleep 5 +tail -Fq /run/*.fio.out & + +TAIL_PID=`jobs -p %+` + +while [[ -f /run/fio.waiting ]] ; do + sleep 1 +done + +kill $TAIL_PID +rm -vf /run/*.fio.out + diff --git a/scripts/watcher-screen b/scripts/watcher-screen index a10c64c..af7605b 100755 --- a/scripts/watcher-screen +++ b/scripts/watcher-screen @@ -2,8 +2,9 @@ source /etc/profile -watcher-memtester & -watcher-badblocks & +#watcher-memtester & +#watcher-badblocks & +watcher-fio & journalctl -f & while [[ -f /run/test.pid ]] ; do diff --git a/scripts/wrapper-fio b/scripts/wrapper-fio new file mode 100755 index 0000000..2efcee9 --- /dev/null +++ b/scripts/wrapper-fio @@ -0,0 +1,203 @@ +#!/bin/bash + +source test-variables + +DISK=$1 +if [[ -z $DISK ]] ; then + echo "Missing DISK parameter" + exit 11 +fi + +SERIAL=$2 +if [[ -z $SERIAL ]] ; then + echo "Missing SERIAL parameter" + exit 12 +fi + +MY_PROC=$3 +if [[ -z $MY_PROC ]] ; then + echo "Missing parameter 2: MY_PROC" + exit 16 +fi + +if [[ ! -b /dev/$DISK ]] ; then + echo "Device /dev/$DISK is not a block special file" + exit 13 +fi + +DISK_SERIAL=`smartctl -i /dev/$DISK \ + | grep -i 'serial number' \ + | cut -d ':' -f 2 \ + | sed 's/\s\+//'` + +if [[ -z $DISK_SERIAL ]] ; then + echo "Empty serial number in disk /dev/$DISK" + exit 14 +fi + +if [[ $DISK_SERIAL != $SERIAL ]] ; then + echo "Serial numbers $SERIAL and $DISK_SERIAL for $DISK do not match" + exit 15 +fi + +#TPATTERN=0x55 +TPATTERN=0xaa +# TODO: Debug limit. Comment out for production +#LST_BLCK=10000000 +SMARTLOG=300 # check every 5 minutes +#SMARTLOG=60 # check every minute +# TODO: Debug variable. Comment out for production +#SMARTLOG=15 # Check every second +BLK_SIZE=`cat /sys/block/$DISK/queue/physical_block_size` +MAX_SAME=`cat /sys/block/$DISK/queue/write_same_max_bytes` +CONCU=$(( MAX_SAME / ( BLK_SIZE / 2 ) )) +[[ $CONCU == 0 ]] && CONCU=32768 + +cat >> $OUTPUT_DIR/disk/$DISK.job << EOF +[global] +ioengine=libaio +direct=1 +gtod_reduce=1 +bs=$BLK_SIZE +iodepth=64 +# TODO: Uncomment for production +#size=4G +# TODO: Debug variable. Comment out for production +size=128M +readwrite=randrw +rwmixread=75 +directory=/mnt/$DISK + +[file1] +name=test +filename=test + +EOF + +BEGIN=`date +%s` +# Check SMART overall self test +ERROR=`smartctl -H /dev/$DISK \ + | grep '\(overall-health\|Health Status\)' \ + | grep -v '\(PASSED\|OK\)' +` +if [[ -n $ERROR ]] ; then + smartctl -H /dev/$DISK \ + > $OUTPUT_DIR/disk/$SERIAL.smart.error +else + ERROR=`smartctl -A -f brief /dev/$DISK \ + | parse-smart-attr \ + | grep ERROR \ + | cut -d ':' -f 2 + ` + # TODO: Debug variable. Comment out for production + ERROR= + if [[ -n $ERROR ]] ; then + RAW_ERROR=$ERROR + ERROR='' + for TOKEN in $RAW_ERROR ; do + ERROR="$ERROR $TOKEN" + done + smartctl -A -f brief /dev/$DISK \ + | parse-smart-attr \ + | sed "s/ERROR/$RES_ERROR/g" \ + > $OUTPUT_DIR/disk/$SERIAL.smart.error + fi +fi +if [[ -z $ERROR ]] ; then + sfdisk /dev/$DISK << EOF +# partition table of /dev/sdb +unit: sectors + +/dev/sdb1 : start= 1, size=1953525167, Id=ee +/dev/sdb2 : start= 0, size= 0, Id= 0 +/dev/sdb3 : start= 0, size= 0, Id= 0 +/dev/sdb4 : start= 0, size= 0, Id= 0 +EOF + mkfs.ext4 /dev/${DISK}1 + mkdir -pv /mnt/${DISK} + mount /dev/${DISK}1 /mnt/${DISK} + fio --output $OUTPUT_DIR/disk/$SERIAL.fio.out \ + $OUTPUT_DIR/disk/$DISK.job \ + 2> /run/$SERIAL.fio.out & + TEST_PID=`jobs -p %+` + echo $TEST_PID > /run/$SERIAL.fio.pid + while [[ -d /proc/$TEST_PID ]] ; do + # Check SMART error log + ERROR=`smartctl -l error /dev/$DISK \ + | parse-smart-error \ + | sed 's/://g' + ` + if [[ -n $ERROR ]] ; then + smartctl -l error /dev/$DISK \ + > $OUTPUT_DIR/disk/$SERIAL.smart.error + kill $TEST_PID + sleep 5 + fi + sleep $SMARTLOG + done + umount /mnt/${DISK} + # Reset partition table to zero + dd if=/dev/zero of=/dev/${DISK} bs=$BLK_SIZE count=128 \ + >> /run/$SERIAL.fio.out 2>&1 + if [[ -z $ERROR ]] ; then + # Check smart attributes after the fio test + ERROR=`smartctl -A -f brief /dev/$DISK \ + | parse-smart-attr \ + | grep ERROR \ + | cut -d ':' -f 2 + ` + # TODO: Debug variable. Comment out for production + ERROR= + if [[ -n $ERROR ]] ; then + RAW_ERROR=$ERROR + ERROR='' + for TOKEN in $RAW_ERROR ; do + ERROR="$ERROR $TOKEN" + done + smartctl -A -f brief /dev/$DISK \ + | parse-smart-attr \ + | sed "s/ERROR/$RES_ERROR/g" \ + > $OUTPUT_DIR/disk/$SERIAL.smart.error + fi + fi + #RET=$? + #echo $RET > $OUTPUT_DIR/disk/$SERIAL.fio.ret + rm -f /run/$SERIAL.fio.pid +fi +END=`date +%s` + +DURATION=$(( END - BEGIN )) + +echo -n "${BLD}Process $MY_PROC${OFF} : " \ + > $OUTPUT_DIR/disk/$SERIAL.fio.result +if [[ -n $ERROR ]] ; then + echo -n "Disk $DISK $SERIAL $ERROR : " \ + >> $OUTPUT_DIR/disk/$SERIAL.fio.result + echo $RES_FAIL \ + >> $OUTPUT_DIR/disk/$SERIAL.fio.result + echo "$SERVER_SERIAL|$DISK|$SERIAL|FAIL|$ERROR" \ + > $OUTPUT_DIR/disk/$SERIAL.fio.csv +else + echo -n "disk $DISK $SERIAL tested in ${DURATION}s : " \ + >> $OUTPUT_DIR/disk/$SERIAL.fio.result + echo $RES_OK \ + >> $OUTPUT_DIR/disk/$SERIAL.fio.result + echo "$SERVER_SERIAL|$DISK|$SERIAL|OK|${DURATION}s" \ + > $OUTPUT_DIR/disk/$SERIAL.fio.csv +fi + +ln -sf $OUTPUT_DIR/disk/$SERIAL.fio.out \ + $OUTPUT_DIR/disk/$DISK.fio.out +ln -sf $OUTPUT_DIR/disk/$SERIAL.fio.result \ + $OUTPUT_DIR/disk/$DISK.fio.result +ln -sf $OUTPUT_DIR/disk/$SERIAL.fio.csv \ + $OUTPUT_DIR/disk/$DISK.fio.csv +if [[ -f $OUTPUT_DIR/disk/$SERIAL.smart.error ]] ; then + ln -sf $OUTPUT_DIR/disk/$SERIAL.smart.error \ + $OUTPUT_DIR/disk/$DISK.smart.error +fi + +cat $OUTPUT_DIR/disk/$SERIAL.fio.result +cat $OUTPUT_DIR/disk/$SERIAL.fio.result \ + >> $OUTPUT_DIR/fio.log +