parallel test scripts for fio
This commit is contained in:
parent
d784b863d2
commit
3bbbf9fb52
6 changed files with 381 additions and 5 deletions
|
@ -1,13 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
SERVER_SERIAL=`dmidecode -s chassis-serial-number | sed 's/\s.*$//'`
|
||||
#SERVER_SERIAL=`dmidecode -s chassis-serial-number | sed 's/\s.*$//'`
|
||||
SERVER_SERIAL=`dmidecode -s system-serial-number | sed 's/\s.*$//'`
|
||||
MY_DATETIME=`date +%Y-%m-%d_%H-%M-%S`
|
||||
OUTPUT_HOME=/output/hwtests
|
||||
OUTPUT_DIR=$OUTPUT_HOME/$SERVER_SERIAL
|
||||
OUTPUT_BAD="$OUTPUT_HOME/<BAD"
|
||||
SNAPSHOT_D=$OUTPUT_HOME/snapshot/$SERVER_SERIAL/$MY_DATETIME
|
||||
if [[ -d $OUTPUT_BAD ]] ; then
|
||||
if [[ -f $OUTPUT_BAD/dmidecode ]] ; then
|
||||
IS_MY_SERIAL=`cat $OUTPUT_BAD/dmidecode | grep $SERVER_SERIAL`
|
||||
if [[ -n $IS_MY_SERIAL ]] ; then
|
||||
mv -vf $OUTPUT_BAD $OUTPUT_DIR
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
export SERVER_SERIAL
|
||||
export OUTPUT_DIR
|
||||
export SNAPSHOT_D
|
||||
export MY_DATETIME
|
||||
mkdir -p $OUTPUT_DIR
|
||||
|
||||
if [[ ! -f $OUTPUT_DIR/dmidecode ]] ; then
|
||||
|
|
|
@ -21,7 +21,7 @@ scan-netdev
|
|||
# sleep 3600
|
||||
# done
|
||||
#fi
|
||||
test-badblocks
|
||||
#test-badblocks
|
||||
#if [[ -f $OUTPUT_DIR/badblocks.fail ]] ; then
|
||||
# echo "One or more disks have ${RED}FAILED${OFF} the badblocks test."
|
||||
# echo "${BLD}Please replace the failing disks and restart this tests again.{OFF}"
|
||||
|
@ -32,7 +32,11 @@ test-badblocks
|
|||
#fi
|
||||
|
||||
#test-memtester summary
|
||||
test-badblocks summary
|
||||
#test-badblocks summary
|
||||
|
||||
test-pmbw
|
||||
test-linpack
|
||||
test-fio
|
||||
|
||||
# Take snapshot
|
||||
mkdir -p $SNAPSHOT_D
|
||||
|
|
132
scripts/test-fio
Executable file
132
scripts/test-fio
Executable file
|
@ -0,0 +1,132 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [[ -z $OUTPUT_DIR ]] ; then
|
||||
source scan-dmidecode
|
||||
fi
|
||||
|
||||
source test-variables
|
||||
SUMMARY=0
|
||||
if [[ "$1" == "summary" ]] ; then
|
||||
SUMMARY=plain
|
||||
elif [[ "$1" == "csv" ]] ; then
|
||||
SUMMARY=csv
|
||||
else
|
||||
if [[ -f $OUTPUT_DIR/disk.lst ]] ; then
|
||||
mv -vf $OUTPUT_DIR/disk.lst $OUTPUT_DIR/disk.MY_DATETIME
|
||||
fi
|
||||
scan-hdd # Always rescan
|
||||
fi
|
||||
if [[ "$1" == "force" ]] ; then
|
||||
rm -f $OUTPUT_DIR/disk/*.fio.result
|
||||
fi
|
||||
|
||||
MAXWAIT=9
|
||||
JOBS=0
|
||||
if [[ $SUMMARY == 0 ]] ; then
|
||||
echo -n "BEGIN: " >> $OUTPUT_DIR/fio.log
|
||||
date >> $OUTPUT_DIR/fio.log
|
||||
for DISK in `cat $OUTPUT_DIR/disk.lst \
|
||||
| sed 's/#.*$//' \
|
||||
| cut -d ':' -f 1`
|
||||
do
|
||||
SERIAL=`cat $OUTPUT_DIR/disk.lst \
|
||||
| grep $DISK \
|
||||
| cut -d ':' -f 2`
|
||||
if [[ -f $OUTPUT_DIR/disk/$SERIAL.fio.result ]] ; then
|
||||
cat $OUTPUT_DIR/disk/$SERIAL.fio.result \
|
||||
| sed 's/Process [0-9]\+/Test already done/'
|
||||
cat $OUTPUT_DIR/disk/$SERIAL.fio.result \
|
||||
| sed 's/Process [0-9]\+/Result found/' \
|
||||
>> $OUTPUT_DIR/fio.log
|
||||
continue
|
||||
fi
|
||||
JOBS=$(( JOBS + 1 ))
|
||||
echo -n "Process $JOBS: Testing disk $DISK with fio ..."
|
||||
echo -n "Process $JOBS : Disk $DISK : " \
|
||||
>> $OUTPUT_DIR/fio.log
|
||||
COUNT=0
|
||||
TEST_PID=
|
||||
wrapper-fio $DISK $SERIAL $JOBS &
|
||||
sleep 1
|
||||
RET=`jobs %+ | awk '/Exit [0-9]+/ {print $3}'`
|
||||
if [[ -n $RET ]] ; then
|
||||
echo -n "Exit $RET "
|
||||
COUNT=$RET
|
||||
fi
|
||||
while (( $COUNT < $MAXWAIT )) ; do
|
||||
COUNT=$(( COUNT + 1 ))
|
||||
sleep 1
|
||||
if [[ -f /run/$SERIAL.fio.pid ]] ; then
|
||||
TEST_PID=`cat /run/$SERIAL.fio.pid`
|
||||
echo " PID $TEST_PID"
|
||||
echo " PID $TEST_PID" >> $OUTPUT_DIR/fio.log
|
||||
break
|
||||
fi
|
||||
echo -n '.'
|
||||
done
|
||||
if [[ -z $TEST_PID ]] ; then
|
||||
echo "${RED}Process not running.${OFF}"
|
||||
echo "${RED}Process not running.${OFF}" \
|
||||
>> $OUTPUT_DIR/fio.log
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
JOBS_WAITING=$JOBS
|
||||
JOBS_OLD=0
|
||||
COUNT=0
|
||||
NOTICE_INTERVAL=60
|
||||
while (( $JOBS_WAITING > 0 )) ; do
|
||||
sleep 1
|
||||
JOBS_WAITING=`jobs -r | wc -l`
|
||||
JOBS_FINISHED=$(( JOBS - JOBS_WAITING ))
|
||||
if (( $COUNT % $NOTICE_INTERVAL == 0 )) ; then
|
||||
if [[ $JOBS_WAITING != $JOBS_OLD ]] ; then
|
||||
echo "$JOBS_FINISHED jobs finished, $JOBS_WAITING jobs still running..."
|
||||
JOBS_OLD=$JOBS_WAITING
|
||||
fi
|
||||
fi
|
||||
COUNT=$(( COUNT + 1 ))
|
||||
echo $JOBS_WAITING > /run/fio.waiting
|
||||
done
|
||||
rm -f /run/fio.waiting
|
||||
|
||||
echo -n "END: " >> $OUTPUT_DIR/fio.log
|
||||
date >> $OUTPUT_DIR/fio.log
|
||||
echo >> $OUTPUT_DIR/fio.log
|
||||
|
||||
#snapshot
|
||||
mkdir -p $SNAPSHOT_D
|
||||
cp -af $OUTPUT_DIR/* $SNAPSHOT_D/
|
||||
|
||||
if [[ $SUMMARY != 0 ]] ; then
|
||||
if [[ $SUMMARY == plain ]] ; then
|
||||
echo "=====> FIO SUMMARY <====="
|
||||
fi
|
||||
echo "SERVER|DISK|SERIAL|RESULT|COMMENT" > $OUTPUT_DIR/fio.csv
|
||||
for DISK in `cat $OUTPUT_DIR/disk.lst \
|
||||
| sed 's/#.*$//' \
|
||||
| cut -d ':' -f 1`
|
||||
do
|
||||
SERIAL=`cat $OUTPUT_DIR/disk.lst \
|
||||
| grep $DISK \
|
||||
| cut -d ':' -f 2`
|
||||
#echo "looking for $OUTPUT_DIR/disk/$SERIAL.fio.csv"
|
||||
if [[ $SUMMARY == plain ]] ; then
|
||||
if [[ -f $OUTPUT_DIR/disk/$SERIAL.fio.result ]] ; then
|
||||
cat $OUTPUT_DIR/disk/$SERIAL.fio.result
|
||||
fi
|
||||
fi
|
||||
if [[ -f $OUTPUT_DIR/disk/$SERIAL.fio.csv ]] ; then
|
||||
cat $OUTPUT_DIR/disk/$SERIAL.fio.csv \
|
||||
>> $OUTPUT_DIR/fio.csv
|
||||
fi
|
||||
done
|
||||
fi
|
||||
if [[ $SUMMARY == csv ]] ; then
|
||||
echo "=====> BADBLOCKS CSV SUMMARY <====="
|
||||
cat $OUTPUT_DIR/fio.csv \
|
||||
| sed "s/^\([^|]\+\)/${BLD}\1${OFF}/ ; s/OK/$RES_OK/ ; s/FAIL/$RES_FAIL/ ; s/RESULT/$RES_ULT/ "
|
||||
# | sed 's/empty/W87152YGWGM/g' # Serial number for debugging
|
||||
fi
|
||||
|
25
scripts/watcher-fio
Executable file
25
scripts/watcher-fio
Executable file
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [[ -z $OUTPUT_DIR ]] ; then
|
||||
source scan-dmidecode
|
||||
fi
|
||||
|
||||
RUNNING=0
|
||||
while [[ $RUNNING == 0 ]] ; do
|
||||
sleep 1
|
||||
if [[ -f /run/fio.waiting ]] ; then
|
||||
RUNNING=1
|
||||
fi
|
||||
done
|
||||
sleep 5
|
||||
tail -Fq /run/*.fio.out &
|
||||
|
||||
TAIL_PID=`jobs -p %+`
|
||||
|
||||
while [[ -f /run/fio.waiting ]] ; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
kill $TAIL_PID
|
||||
rm -vf /run/*.fio.out
|
||||
|
|
@ -2,8 +2,9 @@
|
|||
|
||||
source /etc/profile
|
||||
|
||||
watcher-memtester &
|
||||
watcher-badblocks &
|
||||
#watcher-memtester &
|
||||
#watcher-badblocks &
|
||||
watcher-fio &
|
||||
journalctl -f &
|
||||
|
||||
while [[ -f /run/test.pid ]] ; do
|
||||
|
|
203
scripts/wrapper-fio
Executable file
203
scripts/wrapper-fio
Executable file
|
@ -0,0 +1,203 @@
|
|||
#!/bin/bash
|
||||
|
||||
source test-variables
|
||||
|
||||
DISK=$1
|
||||
if [[ -z $DISK ]] ; then
|
||||
echo "Missing DISK parameter"
|
||||
exit 11
|
||||
fi
|
||||
|
||||
SERIAL=$2
|
||||
if [[ -z $SERIAL ]] ; then
|
||||
echo "Missing SERIAL parameter"
|
||||
exit 12
|
||||
fi
|
||||
|
||||
MY_PROC=$3
|
||||
if [[ -z $MY_PROC ]] ; then
|
||||
echo "Missing parameter 2: MY_PROC"
|
||||
exit 16
|
||||
fi
|
||||
|
||||
if [[ ! -b /dev/$DISK ]] ; then
|
||||
echo "Device /dev/$DISK is not a block special file"
|
||||
exit 13
|
||||
fi
|
||||
|
||||
DISK_SERIAL=`smartctl -i /dev/$DISK \
|
||||
| grep -i 'serial number' \
|
||||
| cut -d ':' -f 2 \
|
||||
| sed 's/\s\+//'`
|
||||
|
||||
if [[ -z $DISK_SERIAL ]] ; then
|
||||
echo "Empty serial number in disk /dev/$DISK"
|
||||
exit 14
|
||||
fi
|
||||
|
||||
if [[ $DISK_SERIAL != $SERIAL ]] ; then
|
||||
echo "Serial numbers $SERIAL and $DISK_SERIAL for $DISK do not match"
|
||||
exit 15
|
||||
fi
|
||||
|
||||
#TPATTERN=0x55
|
||||
TPATTERN=0xaa
|
||||
# TODO: Debug limit. Comment out for production
|
||||
#LST_BLCK=10000000
|
||||
SMARTLOG=300 # check every 5 minutes
|
||||
#SMARTLOG=60 # check every minute
|
||||
# TODO: Debug variable. Comment out for production
|
||||
#SMARTLOG=15 # Check every second
|
||||
BLK_SIZE=`cat /sys/block/$DISK/queue/physical_block_size`
|
||||
MAX_SAME=`cat /sys/block/$DISK/queue/write_same_max_bytes`
|
||||
CONCU=$(( MAX_SAME / ( BLK_SIZE / 2 ) ))
|
||||
[[ $CONCU == 0 ]] && CONCU=32768
|
||||
|
||||
cat >> $OUTPUT_DIR/disk/$DISK.job << EOF
|
||||
[global]
|
||||
ioengine=libaio
|
||||
direct=1
|
||||
gtod_reduce=1
|
||||
bs=$BLK_SIZE
|
||||
iodepth=64
|
||||
# TODO: Uncomment for production
|
||||
#size=4G
|
||||
# TODO: Debug variable. Comment out for production
|
||||
size=128M
|
||||
readwrite=randrw
|
||||
rwmixread=75
|
||||
directory=/mnt/$DISK
|
||||
|
||||
[file1]
|
||||
name=test
|
||||
filename=test
|
||||
|
||||
EOF
|
||||
|
||||
BEGIN=`date +%s`
|
||||
# Check SMART overall self test
|
||||
ERROR=`smartctl -H /dev/$DISK \
|
||||
| grep '\(overall-health\|Health Status\)' \
|
||||
| grep -v '\(PASSED\|OK\)'
|
||||
`
|
||||
if [[ -n $ERROR ]] ; then
|
||||
smartctl -H /dev/$DISK \
|
||||
> $OUTPUT_DIR/disk/$SERIAL.smart.error
|
||||
else
|
||||
ERROR=`smartctl -A -f brief /dev/$DISK \
|
||||
| parse-smart-attr \
|
||||
| grep ERROR \
|
||||
| cut -d ':' -f 2
|
||||
`
|
||||
# TODO: Debug variable. Comment out for production
|
||||
ERROR=
|
||||
if [[ -n $ERROR ]] ; then
|
||||
RAW_ERROR=$ERROR
|
||||
ERROR=''
|
||||
for TOKEN in $RAW_ERROR ; do
|
||||
ERROR="$ERROR $TOKEN"
|
||||
done
|
||||
smartctl -A -f brief /dev/$DISK \
|
||||
| parse-smart-attr \
|
||||
| sed "s/ERROR/$RES_ERROR/g" \
|
||||
> $OUTPUT_DIR/disk/$SERIAL.smart.error
|
||||
fi
|
||||
fi
|
||||
if [[ -z $ERROR ]] ; then
|
||||
sfdisk /dev/$DISK << EOF
|
||||
# partition table of /dev/sdb
|
||||
unit: sectors
|
||||
|
||||
/dev/sdb1 : start= 1, size=1953525167, Id=ee
|
||||
/dev/sdb2 : start= 0, size= 0, Id= 0
|
||||
/dev/sdb3 : start= 0, size= 0, Id= 0
|
||||
/dev/sdb4 : start= 0, size= 0, Id= 0
|
||||
EOF
|
||||
mkfs.ext4 /dev/${DISK}1
|
||||
mkdir -pv /mnt/${DISK}
|
||||
mount /dev/${DISK}1 /mnt/${DISK}
|
||||
fio --output $OUTPUT_DIR/disk/$SERIAL.fio.out \
|
||||
$OUTPUT_DIR/disk/$DISK.job \
|
||||
2> /run/$SERIAL.fio.out &
|
||||
TEST_PID=`jobs -p %+`
|
||||
echo $TEST_PID > /run/$SERIAL.fio.pid
|
||||
while [[ -d /proc/$TEST_PID ]] ; do
|
||||
# Check SMART error log
|
||||
ERROR=`smartctl -l error /dev/$DISK \
|
||||
| parse-smart-error \
|
||||
| sed 's/://g'
|
||||
`
|
||||
if [[ -n $ERROR ]] ; then
|
||||
smartctl -l error /dev/$DISK \
|
||||
> $OUTPUT_DIR/disk/$SERIAL.smart.error
|
||||
kill $TEST_PID
|
||||
sleep 5
|
||||
fi
|
||||
sleep $SMARTLOG
|
||||
done
|
||||
umount /mnt/${DISK}
|
||||
# Reset partition table to zero
|
||||
dd if=/dev/zero of=/dev/${DISK} bs=$BLK_SIZE count=128 \
|
||||
>> /run/$SERIAL.fio.out 2>&1
|
||||
if [[ -z $ERROR ]] ; then
|
||||
# Check smart attributes after the fio test
|
||||
ERROR=`smartctl -A -f brief /dev/$DISK \
|
||||
| parse-smart-attr \
|
||||
| grep ERROR \
|
||||
| cut -d ':' -f 2
|
||||
`
|
||||
# TODO: Debug variable. Comment out for production
|
||||
ERROR=
|
||||
if [[ -n $ERROR ]] ; then
|
||||
RAW_ERROR=$ERROR
|
||||
ERROR=''
|
||||
for TOKEN in $RAW_ERROR ; do
|
||||
ERROR="$ERROR $TOKEN"
|
||||
done
|
||||
smartctl -A -f brief /dev/$DISK \
|
||||
| parse-smart-attr \
|
||||
| sed "s/ERROR/$RES_ERROR/g" \
|
||||
> $OUTPUT_DIR/disk/$SERIAL.smart.error
|
||||
fi
|
||||
fi
|
||||
#RET=$?
|
||||
#echo $RET > $OUTPUT_DIR/disk/$SERIAL.fio.ret
|
||||
rm -f /run/$SERIAL.fio.pid
|
||||
fi
|
||||
END=`date +%s`
|
||||
|
||||
DURATION=$(( END - BEGIN ))
|
||||
|
||||
echo -n "${BLD}Process $MY_PROC${OFF} : " \
|
||||
> $OUTPUT_DIR/disk/$SERIAL.fio.result
|
||||
if [[ -n $ERROR ]] ; then
|
||||
echo -n "Disk $DISK $SERIAL $ERROR : " \
|
||||
>> $OUTPUT_DIR/disk/$SERIAL.fio.result
|
||||
echo $RES_FAIL \
|
||||
>> $OUTPUT_DIR/disk/$SERIAL.fio.result
|
||||
echo "$SERVER_SERIAL|$DISK|$SERIAL|FAIL|$ERROR" \
|
||||
> $OUTPUT_DIR/disk/$SERIAL.fio.csv
|
||||
else
|
||||
echo -n "disk $DISK $SERIAL tested in ${DURATION}s : " \
|
||||
>> $OUTPUT_DIR/disk/$SERIAL.fio.result
|
||||
echo $RES_OK \
|
||||
>> $OUTPUT_DIR/disk/$SERIAL.fio.result
|
||||
echo "$SERVER_SERIAL|$DISK|$SERIAL|OK|${DURATION}s" \
|
||||
> $OUTPUT_DIR/disk/$SERIAL.fio.csv
|
||||
fi
|
||||
|
||||
ln -sf $OUTPUT_DIR/disk/$SERIAL.fio.out \
|
||||
$OUTPUT_DIR/disk/$DISK.fio.out
|
||||
ln -sf $OUTPUT_DIR/disk/$SERIAL.fio.result \
|
||||
$OUTPUT_DIR/disk/$DISK.fio.result
|
||||
ln -sf $OUTPUT_DIR/disk/$SERIAL.fio.csv \
|
||||
$OUTPUT_DIR/disk/$DISK.fio.csv
|
||||
if [[ -f $OUTPUT_DIR/disk/$SERIAL.smart.error ]] ; then
|
||||
ln -sf $OUTPUT_DIR/disk/$SERIAL.smart.error \
|
||||
$OUTPUT_DIR/disk/$DISK.smart.error
|
||||
fi
|
||||
|
||||
cat $OUTPUT_DIR/disk/$SERIAL.fio.result
|
||||
cat $OUTPUT_DIR/disk/$SERIAL.fio.result \
|
||||
>> $OUTPUT_DIR/fio.log
|
||||
|
Loading…
Reference in a new issue