mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-26 02:25:00 -05:00
bcachefs: Hacky fixes for device removal
The device remove test was sporadically failing, because we hadn't finished dropping btree sector counts for the device when bch2_replicas_gc2() was called - mainly due to in flight journal writes. We don't yet have a good mechanism for flushing the counts that correspend to open journal entries yet. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
3e548da8f5
commit
31ba2cd330
2 changed files with 47 additions and 48 deletions
|
@ -53,9 +53,6 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
|
|||
while ((k = bch2_btree_iter_peek(iter)).k &&
|
||||
!(ret = bkey_err(k))) {
|
||||
if (!bch2_bkey_has_device(k, dev_idx)) {
|
||||
ret = bch2_mark_bkey_replicas(c, k);
|
||||
if (ret)
|
||||
break;
|
||||
bch2_btree_iter_next(iter);
|
||||
continue;
|
||||
}
|
||||
|
@ -129,34 +126,27 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
|||
struct bkey_i_btree_ptr *new_key;
|
||||
retry:
|
||||
if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
|
||||
dev_idx)) {
|
||||
/*
|
||||
* we might have found a btree node key we
|
||||
* needed to update, and then tried to update it
|
||||
* but got -EINTR after upgrading the iter, but
|
||||
* then raced and the node is now gone:
|
||||
*/
|
||||
bch2_btree_iter_downgrade(iter);
|
||||
dev_idx))
|
||||
continue;
|
||||
|
||||
ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key));
|
||||
if (ret)
|
||||
goto err;
|
||||
} else {
|
||||
bkey_copy(&tmp.k, &b->key);
|
||||
new_key = bkey_i_to_btree_ptr(&tmp.k);
|
||||
bkey_copy(&tmp.k, &b->key);
|
||||
new_key = bkey_i_to_btree_ptr(&tmp.k);
|
||||
|
||||
ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i),
|
||||
dev_idx, flags, true);
|
||||
if (ret)
|
||||
goto err;
|
||||
ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i),
|
||||
dev_idx, flags, true);
|
||||
if (ret) {
|
||||
bch_err(c, "Cannot drop device without losing data");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_node_update_key(c, iter, b, new_key);
|
||||
if (ret == -EINTR) {
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
goto retry;
|
||||
}
|
||||
if (ret)
|
||||
goto err;
|
||||
ret = bch2_btree_node_update_key(c, iter, b, new_key);
|
||||
if (ret == -EINTR) {
|
||||
b = bch2_btree_iter_peek_node(iter);
|
||||
goto retry;
|
||||
}
|
||||
if (ret) {
|
||||
bch_err(c, "Error updating btree node key: %i", ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
bch2_trans_iter_free(&trans, iter);
|
||||
|
@ -167,9 +157,10 @@ retry:
|
|||
closure_wait_event(&c->btree_interior_update_wait,
|
||||
!bch2_btree_interior_updates_nr_pending(c) ||
|
||||
c->btree_roots_dirty);
|
||||
if (c->btree_roots_dirty)
|
||||
bch2_journal_meta(&c->journal);
|
||||
if (!bch2_btree_interior_updates_nr_pending(c))
|
||||
break;
|
||||
bch2_journal_meta(&c->journal);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
@ -184,6 +175,5 @@ err:
|
|||
int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
|
||||
{
|
||||
return bch2_dev_usrdata_drop(c, dev_idx, flags) ?:
|
||||
bch2_dev_metadata_drop(c, dev_idx, flags) ?:
|
||||
bch2_replicas_gc2(c);
|
||||
bch2_dev_metadata_drop(c, dev_idx, flags);
|
||||
}
|
||||
|
|
|
@ -1381,7 +1381,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
|||
|
||||
mutex_lock(&c->state_lock);
|
||||
|
||||
percpu_ref_put(&ca->ref); /* XXX */
|
||||
/*
|
||||
* We consume a reference to ca->ref, regardless of whether we succeed
|
||||
* or fail:
|
||||
*/
|
||||
percpu_ref_put(&ca->ref);
|
||||
|
||||
if (!bch2_dev_state_allowed(c, ca, BCH_MEMBER_STATE_FAILED, flags)) {
|
||||
bch_err(ca, "Cannot remove without losing data");
|
||||
|
@ -1390,11 +1394,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
|||
|
||||
__bch2_dev_read_only(c, ca);
|
||||
|
||||
/*
|
||||
* XXX: verify that dev_idx is really not in use anymore, anywhere
|
||||
*
|
||||
* flag_data_bad() does not check btree pointers
|
||||
*/
|
||||
ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed: error %i dropping data", ret);
|
||||
|
@ -1407,17 +1406,6 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
|||
goto err;
|
||||
}
|
||||
|
||||
data = bch2_dev_has_data(c, ca);
|
||||
if (data) {
|
||||
char data_has_str[100];
|
||||
|
||||
bch2_flags_to_text(&PBUF(data_has_str),
|
||||
bch2_data_types, data);
|
||||
bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
|
||||
ret = -EBUSY;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_btree_delete_range(c, BTREE_ID_ALLOC,
|
||||
POS(ca->dev_idx, 0),
|
||||
POS(ca->dev_idx + 1, 0),
|
||||
|
@ -1432,12 +1420,33 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
|
|||
* (overwritten) keys that point to the device we're removing:
|
||||
*/
|
||||
bch2_journal_flush_all_pins(&c->journal);
|
||||
/*
|
||||
* hack to ensure bch2_replicas_gc2() clears out entries to this device
|
||||
*/
|
||||
bch2_journal_meta(&c->journal);
|
||||
ret = bch2_journal_error(&c->journal);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed, journal error");
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = bch2_replicas_gc2(c);
|
||||
if (ret) {
|
||||
bch_err(ca, "Remove failed: error %i from replicas gc", ret);
|
||||
goto err;
|
||||
}
|
||||
|
||||
data = bch2_dev_has_data(c, ca);
|
||||
if (data) {
|
||||
char data_has_str[100];
|
||||
|
||||
bch2_flags_to_text(&PBUF(data_has_str),
|
||||
bch2_data_types, data);
|
||||
bch_err(ca, "Remove failed, still has data (%s)", data_has_str);
|
||||
ret = -EBUSY;
|
||||
goto err;
|
||||
}
|
||||
|
||||
__bch2_dev_offline(c, ca);
|
||||
|
||||
mutex_lock(&c->sb_lock);
|
||||
|
|
Loading…
Add table
Reference in a new issue