mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-22 16:06:04 -05:00
c2b47df81c
[BUG] With CONFIG_DEBUG_VM set, test case generic/476 has some chance to crash with the following VM_BUG_ON_FOLIO(): BTRFS error (device dm-3): cow_file_range failed, start 1146880 end 1253375 len 106496 ret -28 BTRFS error (device dm-3): run_delalloc_nocow failed, start 1146880 end 1253375 len 106496 ret -28 page: refcount:4 mapcount:0 mapping:00000000592787cc index:0x12 pfn:0x10664 aops:btrfs_aops [btrfs] ino:101 dentry name(?):"f1774" flags: 0x2fffff80004028(uptodate|lru|private|node=0|zone=2|lastcpupid=0xfffff) page dumped because: VM_BUG_ON_FOLIO(!folio_test_locked(folio)) ------------[ cut here ]------------ kernel BUG at mm/page-writeback.c:2992! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP CPU: 2 UID: 0 PID: 3943513 Comm: kworker/u24:15 Tainted: G OE 6.12.0-rc7-custom+ #87 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 Workqueue: events_unbound btrfs_async_reclaim_data_space [btrfs] pc : folio_clear_dirty_for_io+0x128/0x258 lr : folio_clear_dirty_for_io+0x128/0x258 Call trace: folio_clear_dirty_for_io+0x128/0x258 btrfs_folio_clamp_clear_dirty+0x80/0xd0 [btrfs] __process_folios_contig+0x154/0x268 [btrfs] extent_clear_unlock_delalloc+0x5c/0x80 [btrfs] run_delalloc_nocow+0x5f8/0x760 [btrfs] btrfs_run_delalloc_range+0xa8/0x220 [btrfs] writepage_delalloc+0x230/0x4c8 [btrfs] extent_writepage+0xb8/0x358 [btrfs] extent_write_cache_pages+0x21c/0x4e8 [btrfs] btrfs_writepages+0x94/0x150 [btrfs] do_writepages+0x74/0x190 filemap_fdatawrite_wbc+0x88/0xc8 start_delalloc_inodes+0x178/0x3a8 [btrfs] btrfs_start_delalloc_roots+0x174/0x280 [btrfs] shrink_delalloc+0x114/0x280 [btrfs] flush_space+0x250/0x2f8 [btrfs] btrfs_async_reclaim_data_space+0x180/0x228 [btrfs] process_one_work+0x164/0x408 worker_thread+0x25c/0x388 kthread+0x100/0x118 ret_from_fork+0x10/0x20 Code: 910a8021 a90363f7 a9046bf9 94012379 (d4210000) ---[ end trace 0000000000000000 ]--- [CAUSE] The first two lines of extra debug messages show the problem is caused by the error handling of run_delalloc_nocow(). E.g. we have the following dirtied range (4K blocksize 4K page size): 0 16K 32K |//////////////////////////////////////| | Pre-allocated | And the range [0, 16K) has a preallocated extent. - Enter run_delalloc_nocow() for range [0, 16K) Which found range [0, 16K) is preallocated, can do the proper NOCOW write. - Enter fallback_to_fow() for range [16K, 32K) Since the range [16K, 32K) is not backed by preallocated extent, we have to go COW. - cow_file_range() failed for range [16K, 32K) So cow_file_range() will do the clean up by clearing folio dirty, unlock the folios. Now the folios in range [16K, 32K) is unlocked. - Enter extent_clear_unlock_delalloc() from run_delalloc_nocow() Which is called with PAGE_START_WRITEBACK to start page writeback. But folios can only be marked writeback when it's properly locked, thus this triggered the VM_BUG_ON_FOLIO(). Furthermore there is another hidden but common bug that run_delalloc_nocow() is not clearing the folio dirty flags in its error handling path. This is the common bug shared between run_delalloc_nocow() and cow_file_range(). [FIX] - Clear folio dirty for range [@start, @cur_offset) Introduce a helper, cleanup_dirty_folios(), which will find and lock the folio in the range, clear the dirty flag and start/end the writeback, with the extra handling for the @locked_folio. - Introduce a helper to clear folio dirty, start and end writeback - Introduce a helper to record the last failed COW range end This is to trace which range we should skip, to avoid double unlocking. - Skip the failed COW range for the error handling CC: stable@vger.kernel.org Reviewed-by: Boris Burkov <boris@bur.io> Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
164 lines
5.6 KiB
C
164 lines
5.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef BTRFS_SUBPAGE_H
|
|
#define BTRFS_SUBPAGE_H
|
|
|
|
#include <linux/spinlock.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/sizes.h>
|
|
|
|
struct address_space;
|
|
struct folio;
|
|
struct btrfs_fs_info;
|
|
|
|
/*
|
|
* Extra info for subpapge bitmap.
|
|
*
|
|
* For subpage we pack all uptodate/dirty/writeback/ordered bitmaps into
|
|
* one larger bitmap.
|
|
*
|
|
* This structure records how they are organized in the bitmap:
|
|
*
|
|
* /- uptodate /- dirty /- ordered
|
|
* | | |
|
|
* v v v
|
|
* |u|u|u|u|........|u|u|d|d|.......|d|d|o|o|.......|o|o|
|
|
* |< sectors_per_page >|
|
|
*
|
|
* Unlike regular macro-like enums, here we do not go upper-case names, as
|
|
* these names will be utilized in various macros to define function names.
|
|
*/
|
|
enum {
|
|
btrfs_bitmap_nr_uptodate = 0,
|
|
btrfs_bitmap_nr_dirty,
|
|
btrfs_bitmap_nr_writeback,
|
|
btrfs_bitmap_nr_ordered,
|
|
btrfs_bitmap_nr_checked,
|
|
btrfs_bitmap_nr_locked,
|
|
btrfs_bitmap_nr_max
|
|
};
|
|
|
|
/*
|
|
* Structure to trace status of each sector inside a page, attached to
|
|
* page::private for both data and metadata inodes.
|
|
*/
|
|
struct btrfs_subpage {
|
|
/* Common members for both data and metadata pages */
|
|
spinlock_t lock;
|
|
union {
|
|
/*
|
|
* Structures only used by metadata
|
|
*
|
|
* @eb_refs should only be operated under private_lock, as it
|
|
* manages whether the subpage can be detached.
|
|
*/
|
|
atomic_t eb_refs;
|
|
|
|
/*
|
|
* Structures only used by data,
|
|
*
|
|
* How many sectors inside the page is locked.
|
|
*/
|
|
atomic_t nr_locked;
|
|
};
|
|
unsigned long bitmaps[];
|
|
};
|
|
|
|
enum btrfs_subpage_type {
|
|
BTRFS_SUBPAGE_METADATA,
|
|
BTRFS_SUBPAGE_DATA,
|
|
};
|
|
|
|
#if PAGE_SIZE > SZ_4K
|
|
bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping);
|
|
#else
|
|
static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info,
|
|
struct address_space *mapping)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
|
struct folio *folio, enum btrfs_subpage_type type);
|
|
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio);
|
|
|
|
/* Allocate additional data where page represents more than one sector */
|
|
struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
|
enum btrfs_subpage_type type);
|
|
void btrfs_free_subpage(struct btrfs_subpage *subpage);
|
|
|
|
void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
|
|
void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
|
|
|
|
void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
|
|
struct folio *folio, u64 start, u32 len);
|
|
void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
|
|
struct folio *folio, u64 start, u32 len);
|
|
void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
|
struct folio *folio, unsigned long bitmap);
|
|
/*
|
|
* Template for subpage related operations.
|
|
*
|
|
* btrfs_subpage_*() are for call sites where the folio has subpage attached and
|
|
* the range is ensured to be inside the folio's single page.
|
|
*
|
|
* btrfs_folio_*() are for call sites where the page can either be subpage
|
|
* specific or regular folios. The function will handle both cases.
|
|
* But the range still needs to be inside one single page.
|
|
*
|
|
* btrfs_folio_clamp_*() are similar to btrfs_folio_*(), except the range doesn't
|
|
* need to be inside the page. Those functions will truncate the range
|
|
* automatically.
|
|
*/
|
|
#define DECLARE_BTRFS_SUBPAGE_OPS(name) \
|
|
void btrfs_subpage_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
void btrfs_subpage_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len); \
|
|
bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct folio *folio, u64 start, u32 len);
|
|
|
|
DECLARE_BTRFS_SUBPAGE_OPS(uptodate);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(checked);
|
|
|
|
/*
|
|
* Helper for error cleanup, where a folio will have its dirty flag cleared,
|
|
* with writeback started and finished.
|
|
*/
|
|
static inline void btrfs_folio_clamp_finish_io(struct btrfs_fs_info *fs_info,
|
|
struct folio *locked_folio,
|
|
u64 start, u32 len)
|
|
{
|
|
btrfs_folio_clamp_clear_dirty(fs_info, locked_folio, start, len);
|
|
btrfs_folio_clamp_set_writeback(fs_info, locked_folio, start, len);
|
|
btrfs_folio_clamp_clear_writeback(fs_info, locked_folio, start, len);
|
|
}
|
|
|
|
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
|
struct folio *folio, u64 start, u32 len);
|
|
|
|
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
|
struct folio *folio, u64 start, u32 len);
|
|
void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
|
|
struct folio *folio,
|
|
unsigned long *ret_bitmap);
|
|
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
|
|
struct folio *folio, u64 start, u32 len);
|
|
|
|
#endif
|