#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2025-2026 Oracle.  All Rights Reserved.
#
# FS QA Test No. 664
#
# Ensure that autonomous self healing won't fix the wrong filesystem if a
# snapshot of the original filesystem is now mounted on the same directory as
# the original.
#
. ./common/preamble
_begin_fstest auto selfhealing

. ./common/filter
. ./common/fuzzy
. ./common/systemd

_cleanup()
{
	command -v _kill_fsstress &>/dev/null && _kill_fsstress
	cd /
	rm -r -f $tmp.*
	test -e "$mntpt" && _unmount "$mntpt" &>/dev/null
	test -e "$mntpt" && _unmount "$mntpt" &>/dev/null
	test -e "$loop1" && _destroy_loop_device "$loop1"
	test -e "$loop2" && _destroy_loop_device "$loop2"
	test -e "$testdir" && rm -r -f "$testdir"
}

_require_test
_require_scrub
_require_xfs_io_command "repair"	# online repair support
_require_xfs_db_command "blocktrash"
_require_command "$XFS_HEALER_PROG" "xfs_healer"
_require_command "$XFS_PROPERTY_PROG" "xfs_property"

testdir=$TEST_DIR/$seq
mntpt=$testdir/mount
disk1=$testdir/disk1
disk2=$testdir/disk2

mkdir -p "$mntpt"
$XFS_IO_PROG -f -c "truncate 300m" $disk1
$XFS_IO_PROG -f -c "truncate 300m" $disk2
loop1="$(_create_loop_device "$disk1")"

filter_mntpt() {
	sed -e "s|$mntpt|MNTPT|g"
}

_mkfs_dev "$loop1" >> $seqres.full
_mount "$loop1" "$mntpt" || _notrun "cannot mount victim filesystem"

_xfs_has_feature $mntpt rmapbt || \
	_notrun "reverse mapping required to test directory auto-repair"
_xfs_has_feature $mntpt parent || \
	_notrun "parent pointers required to test directory auto-repair"
_require_xfs_healer $mntpt --repair

# Configure the filesystem for automatic repair of the filesystem.
$XFS_PROPERTY_PROG $mntpt set autofsck=repair >> $seqres.full

# Create a largeish directory
dblksz=$(_xfs_get_dir_blocksize "$mntpt")
echo testdata > $mntpt/a
mkdir -p "$mntpt/some/victimdir"
for ((i = 0; i < (dblksz / 255); i++)); do
	fname="$(printf "%0255d" "$i")"
	ln $mntpt/a $mntpt/some/victimdir/$fname
done

# Did we get at least two dir blocks?
dirsize=$(stat -c '%s' $mntpt/some/victimdir)
test "$dirsize" -gt "$dblksz" || echo "failed to create two-block directory"

# Clone the fs, break the directory, remount filesystem
_unmount "$mntpt"

cp --sparse=always "$disk1" "$disk2" || _fail "cannot copy disk1"
loop2="$(_create_loop_device_like_bdev "$disk2" "$loop1")"

$XFS_DB_PROG "$loop1" -x \
	-c 'path /some/victimdir' \
	-c 'bmap' \
	-c 'dblock 1' \
	-c 'blocktrash -z -0 -o 0 -x 2048 -y 2048 -n 2048' >> $seqres.full
_mount "$loop1" "$mntpt" || _fail "cannot mount broken fs"

_invoke_xfs_healer "$mntpt" "$tmp.healer" --repair

# Stop the healer process so that it can't read error events while we do some
# shenanigans.
test -n "$XFS_HEALER_PID" || _fail "nobody set XFS_HEALER_PID?"
kill -STOP $XFS_HEALER_PID

echo "LOG $XFS_HEALER_PID SO FAR:" >> $seqres.full
cat $tmp.healer >> $seqres.full

# Access the broken directory to trigger a repair event, which will not yet be
# processed.
ls $mntpt/some/victimdir > /dev/null 2> $tmp.err
filter_mntpt < $tmp.err

ps auxfww | grep xfs_healer >> $seqres.full

echo "LOG AFTER TRYING TO POKE:" >> $seqres.full
cat $tmp.healer >> $seqres.full

# Mount the clone filesystem to the same mountpoint so that the healer cannot
# actually reopen it to perform repairs.
_mount "$loop2" "$mntpt" -o nouuid || _fail "cannot mount decoy fs"

grep -w xfs /proc/mounts >> $seqres.full

# Continue the healer process so it can handle events now.  Wait a few seconds
# while it fails to reopen disk1's mount point to repair things.
kill -CONT $XFS_HEALER_PID
sleep 2

new_dir_unmount() {
	_unmount "$mntpt"
	_unmount "$mntpt"
}

# Unmount to kill the healer
_kill_xfs_healer new_dir_unmount
echo "LOG AFTER FAILURE" >> $seqres.full
cat $tmp.healer >> $seqres.full

# Did the healer log complaints about not being able to reopen the mountpoint
# to enact repairs?
grep -q 'Stale file handle' $tmp.healer || \
	echo "Should have seen stale file handle complaints"

_exit 0
