mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 01:09:38 -05:00
btrfs: add support for inserting raid stripe extents
Add support for inserting stripe extents into the raid stripe tree on completion of every write that needs an extra logical-to-physical translation when using RAID. Inserting the stripe extents happens after the data I/O has completed, this is done to a) support zone-append and b) rule out the possibility of a RAID-write-hole. Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
515020900d
commit
02c372e1f0
10 changed files with 168 additions and 9 deletions
|
@ -33,7 +33,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||||
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
|
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
|
||||||
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
|
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
|
||||||
lru_cache.o
|
lru_cache.o raid-stripe-tree.o
|
||||||
|
|
||||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||||
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
|
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "rcu-string.h"
|
#include "rcu-string.h"
|
||||||
#include "zoned.h"
|
#include "zoned.h"
|
||||||
#include "file-item.h"
|
#include "file-item.h"
|
||||||
|
#include "raid-stripe-tree.h"
|
||||||
|
|
||||||
static struct bio_set btrfs_bioset;
|
static struct bio_set btrfs_bioset;
|
||||||
static struct bio_set btrfs_clone_bioset;
|
static struct bio_set btrfs_clone_bioset;
|
||||||
|
@ -415,6 +416,9 @@ static void btrfs_orig_write_end_io(struct bio *bio)
|
||||||
else
|
else
|
||||||
bio->bi_status = BLK_STS_OK;
|
bio->bi_status = BLK_STS_OK;
|
||||||
|
|
||||||
|
if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
|
||||||
|
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||||
|
|
||||||
btrfs_orig_bbio_end_io(bbio);
|
btrfs_orig_bbio_end_io(bbio);
|
||||||
btrfs_put_bioc(bioc);
|
btrfs_put_bioc(bioc);
|
||||||
}
|
}
|
||||||
|
@ -426,6 +430,8 @@ static void btrfs_clone_write_end_io(struct bio *bio)
|
||||||
if (bio->bi_status) {
|
if (bio->bi_status) {
|
||||||
atomic_inc(&stripe->bioc->error);
|
atomic_inc(&stripe->bioc->error);
|
||||||
btrfs_log_dev_io_error(bio, stripe->dev);
|
btrfs_log_dev_io_error(bio, stripe->dev);
|
||||||
|
} else if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||||
|
stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Pass on control to the original bio this one was cloned from */
|
/* Pass on control to the original bio this one was cloned from */
|
||||||
|
@ -487,6 +493,7 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
|
||||||
bio->bi_private = &bioc->stripes[dev_nr];
|
bio->bi_private = &bioc->stripes[dev_nr];
|
||||||
bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
|
bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
|
||||||
bioc->stripes[dev_nr].bioc = bioc;
|
bioc->stripes[dev_nr].bioc = bioc;
|
||||||
|
bioc->size = bio->bi_iter.bi_size;
|
||||||
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
|
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -496,6 +503,8 @@ static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
|
||||||
if (!bioc) {
|
if (!bioc) {
|
||||||
/* Single mirror read/write fast path. */
|
/* Single mirror read/write fast path. */
|
||||||
btrfs_bio(bio)->mirror_num = mirror_num;
|
btrfs_bio(bio)->mirror_num = mirror_num;
|
||||||
|
if (bio_op(bio) != REQ_OP_READ)
|
||||||
|
btrfs_bio(bio)->orig_physical = smap->physical;
|
||||||
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
|
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
|
||||||
if (bio_op(bio) != REQ_OP_READ)
|
if (bio_op(bio) != REQ_OP_READ)
|
||||||
btrfs_bio(bio)->orig_physical = smap->physical;
|
btrfs_bio(bio)->orig_physical = smap->physical;
|
||||||
|
@ -688,6 +697,18 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
|
||||||
bio->bi_opf |= REQ_OP_ZONE_APPEND;
|
bio->bi_opf |= REQ_OP_ZONE_APPEND;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (is_data_bbio(bbio) && bioc &&
|
||||||
|
btrfs_need_stripe_tree_update(bioc->fs_info, bioc->map_type)) {
|
||||||
|
/*
|
||||||
|
* No locking for the list update, as we only add to
|
||||||
|
* the list in the I/O submission path, and list
|
||||||
|
* iteration only happens in the completion path, which
|
||||||
|
* can't happen until after the last submission.
|
||||||
|
*/
|
||||||
|
btrfs_get_bioc(bioc);
|
||||||
|
list_add_tail(&bioc->rst_ordered_entry, &bbio->ordered->bioc_list);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Csum items for reloc roots have already been cloned at this
|
* Csum items for reloc roots have already been cloned at this
|
||||||
* point, so they are handled as part of the no-checksum case.
|
* point, so they are handled as part of the no-checksum case.
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
#include "file-item.h"
|
#include "file-item.h"
|
||||||
#include "orphan.h"
|
#include "orphan.h"
|
||||||
#include "tree-checker.h"
|
#include "tree-checker.h"
|
||||||
|
#include "raid-stripe-tree.h"
|
||||||
|
|
||||||
#undef SCRAMBLE_DELAYED_REFS
|
#undef SCRAMBLE_DELAYED_REFS
|
||||||
|
|
||||||
|
|
|
@ -71,6 +71,7 @@
|
||||||
#include "super.h"
|
#include "super.h"
|
||||||
#include "orphan.h"
|
#include "orphan.h"
|
||||||
#include "backref.h"
|
#include "backref.h"
|
||||||
|
#include "raid-stripe-tree.h"
|
||||||
|
|
||||||
struct btrfs_iget_args {
|
struct btrfs_iget_args {
|
||||||
u64 ino;
|
u64 ino;
|
||||||
|
@ -3091,6 +3092,10 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
|
||||||
|
|
||||||
trans->block_rsv = &inode->block_rsv;
|
trans->block_rsv = &inode->block_rsv;
|
||||||
|
|
||||||
|
ret = btrfs_insert_raid_extent(trans, ordered_extent);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
|
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
|
||||||
compress_type = ordered_extent->compress_type;
|
compress_type = ordered_extent->compress_type;
|
||||||
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
|
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
|
||||||
|
@ -3224,7 +3229,8 @@ out:
|
||||||
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
|
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
|
||||||
{
|
{
|
||||||
if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
|
if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
|
||||||
!test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
|
!test_bit(BTRFS_ORDERED_IOERR, &ordered->flags) &&
|
||||||
|
list_empty(&ordered->bioc_list))
|
||||||
btrfs_finish_ordered_zoned(ordered);
|
btrfs_finish_ordered_zoned(ordered);
|
||||||
return btrfs_finish_one_ordered(ordered);
|
return btrfs_finish_one_ordered(ordered);
|
||||||
}
|
}
|
||||||
|
|
|
@ -191,6 +191,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
|
||||||
INIT_LIST_HEAD(&entry->log_list);
|
INIT_LIST_HEAD(&entry->log_list);
|
||||||
INIT_LIST_HEAD(&entry->root_extent_list);
|
INIT_LIST_HEAD(&entry->root_extent_list);
|
||||||
INIT_LIST_HEAD(&entry->work_list);
|
INIT_LIST_HEAD(&entry->work_list);
|
||||||
|
INIT_LIST_HEAD(&entry->bioc_list);
|
||||||
init_completion(&entry->completion);
|
init_completion(&entry->completion);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -151,6 +151,8 @@ struct btrfs_ordered_extent {
|
||||||
struct completion completion;
|
struct completion completion;
|
||||||
struct btrfs_work flush_work;
|
struct btrfs_work flush_work;
|
||||||
struct list_head work_list;
|
struct list_head work_list;
|
||||||
|
|
||||||
|
struct list_head bioc_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
|
87
fs/btrfs/raid-stripe-tree.c
Normal file
87
fs/btrfs/raid-stripe-tree.c
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2023 Western Digital Corporation or its affiliates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/btrfs_tree.h>
|
||||||
|
#include "ctree.h"
|
||||||
|
#include "fs.h"
|
||||||
|
#include "accessors.h"
|
||||||
|
#include "transaction.h"
|
||||||
|
#include "disk-io.h"
|
||||||
|
#include "raid-stripe-tree.h"
|
||||||
|
#include "volumes.h"
|
||||||
|
#include "misc.h"
|
||||||
|
#include "print-tree.h"
|
||||||
|
|
||||||
|
static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
|
||||||
|
struct btrfs_io_context *bioc)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
|
struct btrfs_key stripe_key;
|
||||||
|
struct btrfs_root *stripe_root = fs_info->stripe_root;
|
||||||
|
const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
|
||||||
|
u8 encoding = btrfs_bg_flags_to_raid_index(bioc->map_type);
|
||||||
|
struct btrfs_stripe_extent *stripe_extent;
|
||||||
|
const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
stripe_extent = kzalloc(item_size, GFP_NOFS);
|
||||||
|
if (!stripe_extent) {
|
||||||
|
btrfs_abort_transaction(trans, -ENOMEM);
|
||||||
|
btrfs_end_transaction(trans);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
btrfs_set_stack_stripe_extent_encoding(stripe_extent, encoding);
|
||||||
|
for (int i = 0; i < num_stripes; i++) {
|
||||||
|
u64 devid = bioc->stripes[i].dev->devid;
|
||||||
|
u64 physical = bioc->stripes[i].physical;
|
||||||
|
u64 length = bioc->stripes[i].length;
|
||||||
|
struct btrfs_raid_stride *raid_stride = &stripe_extent->strides[i];
|
||||||
|
|
||||||
|
if (length == 0)
|
||||||
|
length = bioc->size;
|
||||||
|
|
||||||
|
btrfs_set_stack_raid_stride_devid(raid_stride, devid);
|
||||||
|
btrfs_set_stack_raid_stride_physical(raid_stride, physical);
|
||||||
|
}
|
||||||
|
|
||||||
|
stripe_key.objectid = bioc->logical;
|
||||||
|
stripe_key.type = BTRFS_RAID_STRIPE_KEY;
|
||||||
|
stripe_key.offset = bioc->size;
|
||||||
|
|
||||||
|
ret = btrfs_insert_item(trans, stripe_root, &stripe_key, stripe_extent,
|
||||||
|
item_size);
|
||||||
|
if (ret)
|
||||||
|
btrfs_abort_transaction(trans, ret);
|
||||||
|
|
||||||
|
kfree(stripe_extent);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
|
||||||
|
struct btrfs_ordered_extent *ordered_extent)
|
||||||
|
{
|
||||||
|
struct btrfs_io_context *bioc;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!btrfs_fs_incompat(trans->fs_info, RAID_STRIPE_TREE))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
list_for_each_entry(bioc, &ordered_extent->bioc_list, rst_ordered_entry) {
|
||||||
|
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!list_empty(&ordered_extent->bioc_list)) {
|
||||||
|
bioc = list_first_entry(&ordered_extent->bioc_list,
|
||||||
|
typeof(*bioc), rst_ordered_entry);
|
||||||
|
list_del(&bioc->rst_ordered_entry);
|
||||||
|
btrfs_put_bioc(bioc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
35
fs/btrfs/raid-stripe-tree.h
Normal file
35
fs/btrfs/raid-stripe-tree.h
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2023 Western Digital Corporation or its affiliates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BTRFS_RAID_STRIPE_TREE_H
|
||||||
|
#define BTRFS_RAID_STRIPE_TREE_H
|
||||||
|
|
||||||
|
struct btrfs_io_context;
|
||||||
|
struct btrfs_io_stripe;
|
||||||
|
struct btrfs_ordered_extent;
|
||||||
|
struct btrfs_trans_handle;
|
||||||
|
|
||||||
|
int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
|
||||||
|
struct btrfs_ordered_extent *ordered_extent);
|
||||||
|
|
||||||
|
static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
|
||||||
|
u64 map_type)
|
||||||
|
{
|
||||||
|
u64 type = map_type & BTRFS_BLOCK_GROUP_TYPE_MASK;
|
||||||
|
u64 profile = map_type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
|
||||||
|
|
||||||
|
if (!btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (type != BTRFS_BLOCK_GROUP_DATA)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (profile & BTRFS_BLOCK_GROUP_RAID1_MASK)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -5906,6 +5906,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
|
static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
|
||||||
|
u64 logical,
|
||||||
u16 total_stripes)
|
u16 total_stripes)
|
||||||
{
|
{
|
||||||
struct btrfs_io_context *bioc;
|
struct btrfs_io_context *bioc;
|
||||||
|
@ -5925,6 +5926,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
|
||||||
bioc->fs_info = fs_info;
|
bioc->fs_info = fs_info;
|
||||||
bioc->replace_stripe_src = -1;
|
bioc->replace_stripe_src = -1;
|
||||||
bioc->full_stripe_logical = (u64)-1;
|
bioc->full_stripe_logical = (u64)-1;
|
||||||
|
bioc->logical = logical;
|
||||||
|
|
||||||
return bioc;
|
return bioc;
|
||||||
}
|
}
|
||||||
|
@ -6451,7 +6453,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes);
|
bioc = alloc_btrfs_io_context(fs_info, logical, num_alloc_stripes);
|
||||||
if (!bioc) {
|
if (!bioc) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
|
|
|
@ -390,12 +390,11 @@ struct btrfs_fs_devices {
|
||||||
|
|
||||||
struct btrfs_io_stripe {
|
struct btrfs_io_stripe {
|
||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
union {
|
/* Block mapping. */
|
||||||
/* Block mapping */
|
u64 physical;
|
||||||
u64 physical;
|
u64 length;
|
||||||
/* For the endio handler */
|
/* For the endio handler. */
|
||||||
struct btrfs_io_context *bioc;
|
struct btrfs_io_context *bioc;
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btrfs_discard_stripe {
|
struct btrfs_discard_stripe {
|
||||||
|
@ -428,6 +427,11 @@ struct btrfs_io_context {
|
||||||
atomic_t error;
|
atomic_t error;
|
||||||
u16 max_errors;
|
u16 max_errors;
|
||||||
|
|
||||||
|
u64 logical;
|
||||||
|
u64 size;
|
||||||
|
/* Raid stripe tree ordered entry. */
|
||||||
|
struct list_head rst_ordered_entry;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The total number of stripes, including the extra duplicated
|
* The total number of stripes, including the extra duplicated
|
||||||
* stripe for replace.
|
* stripe for replace.
|
||||||
|
|
Loading…
Add table
Reference in a new issue