mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 09:13:20 -05:00
bcachefs: ja->discard_idx, ja->dirty_idx
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
fcbf3e5096
commit
0ce2dbbe99
4 changed files with 85 additions and 60 deletions
|
@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
|||
|
||||
while (ja->nr < nr) {
|
||||
struct open_bucket *ob = NULL;
|
||||
unsigned pos;
|
||||
long bucket;
|
||||
|
||||
if (new_fs) {
|
||||
|
@ -786,21 +787,25 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
|
|||
preempt_disable();
|
||||
}
|
||||
|
||||
__array_insert_item(ja->buckets, ja->nr, ja->last_idx);
|
||||
__array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx);
|
||||
__array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx);
|
||||
|
||||
ja->buckets[ja->last_idx] = bucket;
|
||||
ja->bucket_seq[ja->last_idx] = 0;
|
||||
journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket);
|
||||
|
||||
if (ja->last_idx < ja->nr) {
|
||||
if (ja->cur_idx >= ja->last_idx)
|
||||
ja->cur_idx++;
|
||||
ja->last_idx++;
|
||||
}
|
||||
pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0;
|
||||
__array_insert_item(ja->buckets, ja->nr, pos);
|
||||
__array_insert_item(ja->bucket_seq, ja->nr, pos);
|
||||
__array_insert_item(journal_buckets->buckets, ja->nr, pos);
|
||||
ja->nr++;
|
||||
|
||||
ja->buckets[pos] = bucket;
|
||||
ja->bucket_seq[pos] = 0;
|
||||
journal_buckets->buckets[pos] = cpu_to_le64(bucket);
|
||||
|
||||
if (pos <= ja->discard_idx)
|
||||
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
|
||||
if (pos <= ja->dirty_idx_ondisk)
|
||||
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
|
||||
if (pos <= ja->dirty_idx)
|
||||
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
|
||||
if (pos <= ja->cur_idx)
|
||||
ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
|
||||
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
|
||||
ca->mi.bucket_size,
|
||||
gc_phase(GC_PHASE_SB),
|
||||
|
@ -1042,6 +1047,7 @@ int bch2_fs_journal_init(struct journal *j)
|
|||
mutex_init(&j->blacklist_lock);
|
||||
INIT_LIST_HEAD(&j->seq_blacklist);
|
||||
mutex_init(&j->reclaim_lock);
|
||||
mutex_init(&j->discard_lock);
|
||||
|
||||
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
|
||||
|
||||
|
@ -1138,13 +1144,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
|
|||
"dev %u:\n"
|
||||
"\tnr\t\t%u\n"
|
||||
"\tavailable\t%u:%u\n"
|
||||
"\tcur_idx\t\t%u (seq %llu)\n"
|
||||
"\tlast_idx\t%u (seq %llu)\n",
|
||||
"\tdiscard_idx\t\t%u\n"
|
||||
"\tdirty_idx_ondisk\t%u (seq %llu)\n"
|
||||
"\tdirty_idx\t\t%u (seq %llu)\n"
|
||||
"\tcur_idx\t\t%u (seq %llu)\n",
|
||||
iter, ja->nr,
|
||||
bch2_journal_dev_buckets_available(j, ja),
|
||||
ja->sectors_free,
|
||||
ja->cur_idx, ja->bucket_seq[ja->cur_idx],
|
||||
ja->last_idx, ja->bucket_seq[ja->last_idx]);
|
||||
ja->discard_idx,
|
||||
ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk],
|
||||
ja->dirty_idx, ja->bucket_seq[ja->dirty_idx],
|
||||
ja->cur_idx, ja->bucket_seq[ja->cur_idx]);
|
||||
}
|
||||
|
||||
spin_unlock(&j->lock);
|
||||
|
|
|
@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl)
|
|||
ja->sectors_free = 0;
|
||||
|
||||
/*
|
||||
* Set last_idx to indicate the entire journal is full and needs to be
|
||||
* Set dirty_idx to indicate the entire journal is full and needs to be
|
||||
* reclaimed - journal reclaim will immediately reclaim whatever isn't
|
||||
* pinned when it first runs:
|
||||
*/
|
||||
ja->last_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
ja->discard_idx = ja->dirty_idx_ondisk =
|
||||
ja->dirty_idx = (ja->cur_idx + 1) % ja->nr;
|
||||
out:
|
||||
kvpfree(buf.data, buf.size);
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
|
@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl)
|
|||
goto err;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
j->seq_ondisk = seq;
|
||||
j->last_seq_ondisk = last_seq;
|
||||
|
||||
if (seq >= j->pin.front)
|
||||
journal_seq_pin(j, seq)->devs = devs;
|
||||
|
||||
j->seq_ondisk = seq;
|
||||
j->last_seq_ondisk = last_seq;
|
||||
bch2_journal_space_available(j);
|
||||
|
||||
/*
|
||||
* Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard
|
||||
* more buckets:
|
||||
|
|
|
@ -14,22 +14,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
|
|||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
unsigned next = (ja->cur_idx + 1) % ja->nr;
|
||||
unsigned available = (ja->last_idx + ja->nr - next) % ja->nr;
|
||||
unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr;
|
||||
|
||||
/*
|
||||
* Allocator startup needs some journal space before we can do journal
|
||||
* replay:
|
||||
*/
|
||||
if (available &&
|
||||
test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
|
||||
available--;
|
||||
if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags))
|
||||
--available;
|
||||
|
||||
/*
|
||||
* Don't use the last bucket unless writing the new last_seq
|
||||
* will make another bucket available:
|
||||
*/
|
||||
if (available &&
|
||||
journal_last_seq(j) <= ja->bucket_seq[ja->last_idx])
|
||||
if (available && ja->dirty_idx_ondisk == ja->dirty_idx)
|
||||
--available;
|
||||
|
||||
return available;
|
||||
|
@ -55,12 +53,34 @@ void bch2_journal_space_available(struct journal *j)
|
|||
for_each_member_device_rcu(ca, c, i,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
unsigned buckets_this_device, sectors_this_device;
|
||||
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
while (ja->dirty_idx != ja->cur_idx &&
|
||||
ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j))
|
||||
ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr;
|
||||
|
||||
while (ja->dirty_idx_ondisk != ja->dirty_idx &&
|
||||
ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk)
|
||||
ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr;
|
||||
|
||||
nr_online++;
|
||||
}
|
||||
|
||||
if (nr_online < c->opts.metadata_replicas_required) {
|
||||
ret = -EROFS;
|
||||
sectors_next_entry = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_member_device_rcu(ca, c, i,
|
||||
&c->rw_devs[BCH_DATA_JOURNAL]) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
unsigned buckets_this_device, sectors_this_device;
|
||||
|
||||
if (!ja->nr)
|
||||
continue;
|
||||
|
||||
buckets_this_device = bch2_journal_dev_buckets_available(j, ja);
|
||||
sectors_this_device = ja->sectors_free;
|
||||
|
@ -100,20 +120,17 @@ void bch2_journal_space_available(struct journal *j)
|
|||
|
||||
nr_devs++;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (nr_online < c->opts.metadata_replicas_required) {
|
||||
ret = -EROFS;
|
||||
sectors_next_entry = 0;
|
||||
} else if (!sectors_next_entry ||
|
||||
nr_devs < min_t(unsigned, nr_online,
|
||||
c->opts.metadata_replicas)) {
|
||||
if (!sectors_next_entry ||
|
||||
nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) {
|
||||
ret = -ENOSPC;
|
||||
sectors_next_entry = 0;
|
||||
} else if (!fifo_free(&j->pin)) {
|
||||
ret = -ENOSPC;
|
||||
sectors_next_entry = 0;
|
||||
}
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
j->cur_entry_sectors = sectors_next_entry;
|
||||
j->cur_entry_error = ret;
|
||||
|
@ -129,25 +146,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja)
|
|||
bool ret;
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ret = ja->nr &&
|
||||
ja->last_idx != ja->cur_idx &&
|
||||
ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk;
|
||||
ret = ja->discard_idx != ja->dirty_idx_ondisk;
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance ja->last_idx as long as it points to buckets that are no longer
|
||||
* Advance ja->discard_idx as long as it points to buckets that are no longer
|
||||
* dirty, issuing discards if necessary:
|
||||
*/
|
||||
static void journal_do_discards(struct journal *j)
|
||||
static void bch2_journal_do_discards(struct journal *j)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
struct bch_dev *ca;
|
||||
unsigned iter;
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
mutex_lock(&j->discard_lock);
|
||||
|
||||
for_each_rw_member(ca, c, iter) {
|
||||
struct journal_device *ja = &ca->journal;
|
||||
|
@ -157,18 +172,18 @@ static void journal_do_discards(struct journal *j)
|
|||
bdev_max_discard_sectors(ca->disk_sb.bdev))
|
||||
blkdev_issue_discard(ca->disk_sb.bdev,
|
||||
bucket_to_sector(ca,
|
||||
ja->buckets[ja->last_idx]),
|
||||
ja->buckets[ja->discard_idx]),
|
||||
ca->mi.bucket_size, GFP_NOIO);
|
||||
|
||||
spin_lock(&j->lock);
|
||||
ja->last_idx = (ja->last_idx + 1) % ja->nr;
|
||||
ja->discard_idx = (ja->discard_idx + 1) % ja->nr;
|
||||
|
||||
bch2_journal_space_available(j);
|
||||
spin_unlock(&j->lock);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_unlock(&j->reclaim_lock);
|
||||
mutex_unlock(&j->discard_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -399,7 +414,7 @@ void bch2_journal_reclaim_work(struct work_struct *work)
|
|||
unsigned iter, bucket_to_flush, min_nr = 0;
|
||||
u64 seq_to_flush = 0;
|
||||
|
||||
journal_do_discards(j);
|
||||
bch2_journal_do_discards(j);
|
||||
|
||||
mutex_lock(&j->reclaim_lock);
|
||||
spin_lock(&j->lock);
|
||||
|
|
|
@ -193,9 +193,6 @@ struct journal {
|
|||
struct journal_entry_pin_list *data;
|
||||
} pin;
|
||||
|
||||
struct journal_entry_pin *flush_in_progress;
|
||||
wait_queue_head_t pin_flush_wait;
|
||||
|
||||
u64 replay_journal_seq;
|
||||
|
||||
struct mutex blacklist_lock;
|
||||
|
@ -206,10 +203,13 @@ struct journal {
|
|||
spinlock_t err_lock;
|
||||
|
||||
struct delayed_work reclaim_work;
|
||||
unsigned long last_flushed;
|
||||
|
||||
/* protects advancing ja->last_idx: */
|
||||
struct mutex reclaim_lock;
|
||||
unsigned long last_flushed;
|
||||
struct journal_entry_pin *flush_in_progress;
|
||||
wait_queue_head_t pin_flush_wait;
|
||||
|
||||
/* protects advancing ja->discard_idx: */
|
||||
struct mutex discard_lock;
|
||||
unsigned write_delay_ms;
|
||||
unsigned reclaim_delay_ms;
|
||||
|
||||
|
@ -240,17 +240,15 @@ struct journal_device {
|
|||
|
||||
unsigned sectors_free;
|
||||
|
||||
/* Journal bucket we're currently writing to */
|
||||
unsigned cur_idx;
|
||||
|
||||
/* Last journal bucket that still contains an open journal entry */
|
||||
|
||||
/*
|
||||
* j->lock and j->reclaim_lock must both be held to modify, j->lock
|
||||
* sufficient to read:
|
||||
* discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx:
|
||||
*/
|
||||
unsigned last_idx;
|
||||
unsigned discard_idx; /* Next bucket to discard */
|
||||
unsigned dirty_idx_ondisk;
|
||||
unsigned dirty_idx;
|
||||
unsigned cur_idx; /* Journal bucket we're currently writing to */
|
||||
unsigned nr;
|
||||
|
||||
u64 *buckets;
|
||||
|
||||
/* Bio for journal reads/writes to this device */
|
||||
|
|
Loading…
Add table
Reference in a new issue