#! /bin/bash
# SPDX-License-Identifier: GPL-2.0-or-later
# Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
#
# FS QA Test No. 791
#
# Check that fsnotify can report file IO errors.

. ./common/preamble
_begin_fstest auto quick eio selfhealing

# Override the default cleanup function.
_cleanup()
{
	cd /
	test -n "$fsmonitor_pid" && kill -TERM $fsmonitor_pid
	rm -f $tmp.*
	_dmerror_cleanup
}

# Import common functions.
. ./common/fuzzy
. ./common/filter
. ./common/dmerror
. ./common/systemd

_require_scratch
_require_dm_target error
_require_test_program fs-monitor
_require_xfs_io_command "fiemap"
_require_odirect
_require_fanotify_ioerrors

# no out of place writes
test "$FSTYP" = "xfs" && _require_no_xfs_always_cow

# fsnotify only gives us a file handle, the error number, and the number of
# times it was seen in between event deliveries.   The handle is mostly useless
# since we have no generic way to map that to a file path.  Therefore we can
# only coalesce all the I/O errors into one report.
filter_fsnotify_errors() {
	_filter_scratch | \
		grep -E '(FAN_FS_ERROR|Generic Error Record|error: 5)' | \
		sed -e "s/len=[0-9]*/len=XXX/g" | \
		sort | \
		uniq
}

_scratch_mkfs >> $seqres.full

#
# The dm-error map added by this test doesn't work on zoned devices because
# table sizes need to be aligned to the zone size, and even for zoned on
# conventional this test will get confused because of the internal RT device.
#
# That check requires a mounted file system, so do a dummy mount before setting
# up DM.
#
_scratch_mount
test $FSTYP = xfs && _require_xfs_scratch_non_zoned
_scratch_unmount

_dmerror_init
_dmerror_mount >> $seqres.full 2>&1

test $FSTYP = xfs && _xfs_force_bdev data $SCRATCH_MNT

# Write a file with 4 file blocks worth of data, figure out the LBA to target
victim=$SCRATCH_MNT/a
file_blksz=$(_get_file_block_size $SCRATCH_MNT)
$XFS_IO_PROG -f -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c "fsync" $victim >> $seqres.full

awk_len_prog='{print $4}'
bmap_str="$($XFS_IO_PROG -c "fiemap -v" $victim | grep "^[[:space:]]*0:")"
echo "$bmap_str" >> $seqres.full

phys="$(echo "$bmap_str" | $AWK_PROG '{print $3}')"
len="$(echo "$bmap_str" | $AWK_PROG "$awk_len_prog")"

fs_blksz=$(_get_block_size $SCRATCH_MNT)
echo "file_blksz:$file_blksz:fs_blksz:$fs_blksz" >> $seqres.full
kernel_sectors_per_fs_block=$((fs_blksz / 512))

# Did we get at least 4 fs blocks worth of extent?
min_len_sectors=$(( 4 * kernel_sectors_per_fs_block ))
test "$len" -lt $min_len_sectors && \
	_fail "could not format a long enough extent on an empty fs??"

phys_start=$(echo "$phys" | sed -e 's/\.\..*//g')

echo "$phys:$len:$fs_blksz:$phys_start" >> $seqres.full
echo "victim file:" >> $seqres.full
od -tx1 -Ad -c $victim >> $seqres.full

# Set the dmerror table so that all IO will pass through.
_dmerror_reset_table

cat >> $seqres.full << ENDL
dmerror before:
$DMERROR_TABLE
$DMERROR_RTTABLE
<end table>
ENDL

# All sector numbers that we feed to the kernel must be in units of 512b, but
# they also must be aligned to the device's logical block size.
logical_block_size=`$here/src/min_dio_alignment $SCRATCH_MNT $SCRATCH_DEV`
kernel_sectors_per_device_lba=$((logical_block_size / 512))

# Mark as bad one of the device LBAs in the middle of the extent.  Target the
# second LBA of the third block of the four-block file extent that we allocated
# earlier, but without overflowing into the fourth file block.
bad_sector=$(( phys_start + (2 * kernel_sectors_per_fs_block) ))
bad_len=$kernel_sectors_per_device_lba
if (( kernel_sectors_per_device_lba < kernel_sectors_per_fs_block )); then
	bad_sector=$((bad_sector + kernel_sectors_per_device_lba))
fi
if (( (bad_sector % kernel_sectors_per_device_lba) != 0)); then
	echo "bad_sector $bad_sector not congruent with device logical block size $logical_block_size"
fi

# Remount to flush the page cache, start fsnotify, and make the LBA bad
_dmerror_unmount
_dmerror_mount

$here/src/fs-monitor $SCRATCH_MNT > $tmp.fsmonitor &
fsmonitor_pid=$!
sleep 1

_dmerror_mark_range_bad $bad_sector $bad_len

cat >> $seqres.full << ENDL
dmerror after marking bad:
$DMERROR_TABLE
$DMERROR_RTTABLE
<end table>
ENDL

_dmerror_load_error_table

# See if buffered reads pick it up
echo "Try buffered read"
$XFS_IO_PROG -c "pread 0 $((4 * file_blksz))" $victim >> $seqres.full

# See if directio reads pick it up
echo "Try directio read"
$XFS_IO_PROG -d -c "pread 0 $((4 * file_blksz))" $victim >> $seqres.full

# See if directio writes pick it up
echo "Try directio write"
$XFS_IO_PROG -d -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c fsync $victim >> $seqres.full

# See if buffered writes pick it up
echo "Try buffered write"
$XFS_IO_PROG -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c fsync $victim >> $seqres.full

# Now mark the bad range good so that unmount won't fail due to IO errors.
echo "Fix device"
_dmerror_mark_range_good $bad_sector $bad_len
_dmerror_load_error_table

cat >> $seqres.full << ENDL
dmerror after marking good:
$DMERROR_TABLE
$DMERROR_RTTABLE
<end table>
ENDL

# Unmount filesystem to start fresh
echo "Kill fsnotify"
_dmerror_unmount
sleep 1
kill -TERM $fsmonitor_pid
unset fsmonitor_pid
echo fsnotify log >> $seqres.full
cat $tmp.fsmonitor >> $seqres.full
cat $tmp.fsmonitor | filter_fsnotify_errors

# Start fsnotify again so that can verify that the errors don't persist after
# we flip back to the good dm table.
echo "Remount and restart fsnotify"
_dmerror_mount
$here/src/fs-monitor $SCRATCH_MNT > $tmp.fsmonitor &
fsmonitor_pid=$!
sleep 1

# See if buffered reads pick it up
echo "Try buffered read again"
$XFS_IO_PROG -c "pread 0 $((4 * file_blksz))" $victim >> $seqres.full

# See if directio reads pick it up
echo "Try directio read again"
$XFS_IO_PROG -d -c "pread 0 $((4 * file_blksz))" $victim >> $seqres.full

# See if directio writes pick it up
echo "Try directio write again"
$XFS_IO_PROG -d -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c fsync $victim >> $seqres.full

# See if buffered writes pick it up
echo "Try buffered write again"
$XFS_IO_PROG -c "pwrite -S 0x58 0 $((4 * file_blksz))" -c fsync $victim >> $seqres.full

# Unmount fs and kill fsnotify, then wait for it to finish
echo "Kill fsnotify again"
_dmerror_unmount
sleep 1
kill -TERM $fsmonitor_pid
unset fsmonitor_pid
cat $tmp.fsmonitor >> $seqres.full
cat $tmp.fsmonitor | filter_fsnotify_errors

# success, all done
_exit 0
