1
0
Fork 0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-01-26 02:25:00 -05:00
linux/drivers/mtd/ubi/fastmap-wl.c
Zhihao Cheng 90e0be5614 ubi: fastmap: Fix lapsed wear leveling for first 64 PEBs
The anchor PEB must be picked from first 64 PEBs, these PEBs could have
large erase counter greater than other PEBs especially when free space
is nearly running out.
The ubi_update_fastmap will be called as long as pool/wl_pool is empty,
old anchor PEB is erased when updating fastmap. Given an UBI device with
N PEBs, free PEBs is nearly running out and pool will be filled with 1
PEB every time ubi_update_fastmap invoked. So t=N/POOL_SIZE[1]/64 means
that in worst case the erase counter of first 64 PEBs is t times greater
than other PEBs in theory.
After running fsstress for 24h, the erase counter statistics for two UBI
devices shown as follow(CONFIG_MTD_UBI_WL_THRESHOLD=128):

Device A(1024 PEBs, pool=50, wl_pool=25):
=========================================================
from              to     count      min      avg      max
---------------------------------------------------------
0        ..        9:        0        0        0        0
10       ..       99:        0        0        0        0
100      ..      999:        0        0        0        0
1000     ..     9999:        0        0        0        0
10000    ..    99999:      960    29224    29282    29362
100000   ..      inf:       64   117897   117934   117940
---------------------------------------------------------
Total               :     1024    29224    34822   117940

Device B(8192 PEBs, pool=256, wl_pool=128):
=========================================================
from              to     count      min      avg      max
---------------------------------------------------------
0        ..        9:        0        0        0        0
10       ..       99:        0        0        0        0
100      ..      999:        0        0        0        0
1000     ..     9999:     8128     2253     2321     2387
10000    ..    99999:       64    35387    35387    35388
100000   ..      inf:        0        0        0        0
---------------------------------------------------------
Total               :     8192     2253     2579    35388

The key point is reducing fastmap updating frequency by enlarging
POOL_SIZE, so let UBI reserve ubi->fm_pool.max_size PEBs during
attaching. Then POOL_SIZE will become ubi->fm_pool.max_size/2 even
in free space running out case.
Given an UBI device with 8192 PEBs(16384\8192\4096 is common
large-capacity flash), t=8192/128/64=1. The fastmap updating will
happen in either wl_pool or pool is empty, so setting fm_pool_rsv_cnt
as ubi->fm_pool.max_size can fill wl_pool in full state.

After pool reservation, running fsstress for 24h:

Device A(1024 PEBs, pool=50, wl_pool=25):
=========================================================
from              to     count      min      avg      max
---------------------------------------------------------
0        ..        9:        0        0        0        0
10       ..       99:        0        0        0        0
100      ..      999:        0        0        0        0
1000     ..     9999:        0        0        0        0
10000    ..    99999:     1024    33801    33997    34056
100000   ..      inf:        0        0        0        0
---------------------------------------------------------
Total               :     1024    33801    33997    34056

Device B(8192 PEBs, pool=256, wl_pool=128):
=========================================================
from              to     count      min      avg      max
---------------------------------------------------------
0        ..        9:        0        0        0        0
10       ..       99:        0        0        0        0
100      ..      999:        0        0        0        0
1000     ..     9999:     8192     2205     2397     2460
10000    ..    99999:        0        0        0        0
100000   ..      inf:        0        0        0        0
---------------------------------------------------------
Total               :     8192     2205     2397     2460

The difference of erase counter between first 64 PEBs and others is
under WL_FREE_MAX_DIFF(2*UBI_WL_THRESHOLD=2*128=256).
  Device A: 34056 - 33801 = 255
  Device B: 2460 - 2205 = 255

Next patch will add a switch to control whether UBI needs to reserve
PEBs for filling pool.

Fixes: dbb7d2a88d ("UBI: Add fastmap core")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
2023-10-28 23:14:55 +02:00

554 lines
13 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012 Linutronix GmbH
* Copyright (c) 2014 sigma star gmbh
* Author: Richard Weinberger <richard@nod.at>
*/
/**
* update_fastmap_work_fn - calls ubi_update_fastmap from a work queue
* @wrk: the work description object
*/
static void update_fastmap_work_fn(struct work_struct *wrk)
{
struct ubi_device *ubi = container_of(wrk, struct ubi_device, fm_work);
ubi_update_fastmap(ubi);
spin_lock(&ubi->wl_lock);
ubi->fm_work_scheduled = 0;
spin_unlock(&ubi->wl_lock);
}
/**
* find_anchor_wl_entry - find wear-leveling entry to used as anchor PEB.
* @root: the RB-tree where to look for
*/
static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root)
{
struct rb_node *p;
struct ubi_wl_entry *e, *victim = NULL;
int max_ec = UBI_MAX_ERASECOUNTER;
ubi_rb_for_each_entry(p, e, root, u.rb) {
if (e->pnum < UBI_FM_MAX_START && e->ec < max_ec) {
victim = e;
max_ec = e->ec;
}
}
return victim;
}
static inline void return_unused_peb(struct ubi_device *ubi,
struct ubi_wl_entry *e)
{
wl_tree_add(e, &ubi->free);
ubi->free_count++;
}
/**
* return_unused_pool_pebs - returns unused PEB to the free tree.
* @ubi: UBI device description object
* @pool: fastmap pool description object
*/
static void return_unused_pool_pebs(struct ubi_device *ubi,
struct ubi_fm_pool *pool)
{
int i;
struct ubi_wl_entry *e;
for (i = pool->used; i < pool->size; i++) {
e = ubi->lookuptbl[pool->pebs[i]];
return_unused_peb(ubi, e);
}
}
/**
* ubi_wl_get_fm_peb - find a physical erase block with a given maximal number.
* @ubi: UBI device description object
* @anchor: This PEB will be used as anchor PEB by fastmap
*
* The function returns a physical erase block with a given maximal number
* and removes it from the wl subsystem.
* Must be called with wl_lock held!
*/
struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor)
{
struct ubi_wl_entry *e = NULL;
if (!ubi->free.rb_node)
goto out;
if (anchor)
e = find_anchor_wl_entry(&ubi->free);
else
e = find_mean_wl_entry(ubi, &ubi->free);
if (!e)
goto out;
self_check_in_wl_tree(ubi, e, &ubi->free);
/* remove it from the free list,
* the wl subsystem does no longer know this erase block */
rb_erase(&e->u.rb, &ubi->free);
ubi->free_count--;
out:
return e;
}
/*
* wait_free_pebs_for_pool - wait until there enough free pebs
* @ubi: UBI device description object
*
* Wait and execute do_work until there are enough free pebs, fill pool
* as much as we can. This will reduce pool refilling times, which can
* reduce the fastmap updating frequency.
*/
static void wait_free_pebs_for_pool(struct ubi_device *ubi)
{
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
struct ubi_fm_pool *pool = &ubi->fm_pool;
int free, expect_free, executed;
/*
* There are at least following free pebs which reserved by UBI:
* 1. WL_RESERVED_PEBS[1]
* 2. EBA_RESERVED_PEBS[1]
* 3. fm pebs - 1: Twice fastmap size deducted by fastmap and fm_anchor
* 4. beb_rsvd_pebs: This value should be get under lock ubi->wl_lock
*/
int reserved = WL_RESERVED_PEBS + EBA_RESERVED_PEBS +
ubi->fm_size / ubi->leb_size - 1 + ubi->fm_pool_rsv_cnt;
do {
spin_lock(&ubi->wl_lock);
free = ubi->free_count;
free += pool->size - pool->used + wl_pool->size - wl_pool->used;
expect_free = reserved + ubi->beb_rsvd_pebs;
spin_unlock(&ubi->wl_lock);
/*
* Break out if there are no works or work is executed failure,
* given the fact that erase_worker will schedule itself when
* -EBUSY is returned from mtd layer caused by system shutdown.
*/
if (do_work(ubi, &executed) || !executed)
break;
} while (free < expect_free);
}
/*
* left_free_count - returns the number of free pebs to fill fm pools
* @ubi: UBI device description object
*
* This helper function returns the number of free pebs (deducted
* by fastmap pebs) to fill fm_pool and fm_wl_pool.
*/
static int left_free_count(struct ubi_device *ubi)
{
int fm_used = 0; // fastmap non anchor pebs.
if (!ubi->free.rb_node)
return 0;
if (!ubi->ro_mode && !ubi->fm_disabled)
fm_used = ubi->fm_size / ubi->leb_size - 1;
return ubi->free_count - fm_used;
}
/*
* can_fill_pools - whether free PEBs will be left after filling pools
* @ubi: UBI device description object
* @free: current number of free PEBs
*
* Return %1 if there are still left free PEBs after filling pools,
* otherwise %0 is returned.
*/
static int can_fill_pools(struct ubi_device *ubi, int free)
{
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
struct ubi_fm_pool *pool = &ubi->fm_pool;
int pool_need = pool->max_size - pool->size +
wl_pool->max_size - wl_pool->size;
if (free - pool_need < 1)
return 0;
return 1;
}
/**
* ubi_refill_pools_and_lock - refills all fastmap PEB pools and takes fm locks.
* @ubi: UBI device description object
*/
void ubi_refill_pools_and_lock(struct ubi_device *ubi)
{
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
struct ubi_fm_pool *pool = &ubi->fm_pool;
struct ubi_wl_entry *e;
int enough;
if (!ubi->ro_mode && !ubi->fm_disabled)
wait_free_pebs_for_pool(ubi);
down_write(&ubi->fm_protect);
down_write(&ubi->work_sem);
down_write(&ubi->fm_eba_sem);
spin_lock(&ubi->wl_lock);
return_unused_pool_pebs(ubi, wl_pool);
return_unused_pool_pebs(ubi, pool);
wl_pool->size = 0;
pool->size = 0;
if (ubi->fm_anchor) {
wl_tree_add(ubi->fm_anchor, &ubi->free);
ubi->free_count++;
ubi->fm_anchor = NULL;
}
if (!ubi->fm_disabled)
/*
* All available PEBs are in ubi->free, now is the time to get
* the best anchor PEBs.
*/
ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
for (;;) {
enough = 0;
if (pool->size < pool->max_size) {
if (left_free_count(ubi) <= 0)
break;
e = wl_get_wle(ubi);
if (!e)
break;
pool->pebs[pool->size] = e->pnum;
pool->size++;
} else
enough++;
if (wl_pool->size < wl_pool->max_size) {
int left_free = left_free_count(ubi);
if (left_free <= 0)
break;
e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF,
!can_fill_pools(ubi, left_free));
self_check_in_wl_tree(ubi, e, &ubi->free);
rb_erase(&e->u.rb, &ubi->free);
ubi->free_count--;
wl_pool->pebs[wl_pool->size] = e->pnum;
wl_pool->size++;
} else
enough++;
if (enough == 2)
break;
}
wl_pool->used = 0;
pool->used = 0;
spin_unlock(&ubi->wl_lock);
}
/**
* produce_free_peb - produce a free physical eraseblock.
* @ubi: UBI device description object
*
* This function tries to make a free PEB by means of synchronous execution of
* pending works. This may be needed if, for example the background thread is
* disabled. Returns zero in case of success and a negative error code in case
* of failure.
*/
static int produce_free_peb(struct ubi_device *ubi)
{
int err;
while (!ubi->free.rb_node && ubi->works_count) {
dbg_wl("do one work synchronously");
err = do_work(ubi, NULL);
if (err)
return err;
}
return 0;
}
/**
* ubi_wl_get_peb - get a physical eraseblock.
* @ubi: UBI device description object
*
* This function returns a physical eraseblock in case of success and a
* negative error code in case of failure.
* Returns with ubi->fm_eba_sem held in read mode!
*/
int ubi_wl_get_peb(struct ubi_device *ubi)
{
int ret, attempts = 0;
struct ubi_fm_pool *pool = &ubi->fm_pool;
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
again:
down_read(&ubi->fm_eba_sem);
spin_lock(&ubi->wl_lock);
/* We check here also for the WL pool because at this point we can
* refill the WL pool synchronous. */
if (pool->used == pool->size || wl_pool->used == wl_pool->size) {
spin_unlock(&ubi->wl_lock);
up_read(&ubi->fm_eba_sem);
ret = ubi_update_fastmap(ubi);
if (ret) {
ubi_msg(ubi, "Unable to write a new fastmap: %i", ret);
down_read(&ubi->fm_eba_sem);
return -ENOSPC;
}
down_read(&ubi->fm_eba_sem);
spin_lock(&ubi->wl_lock);
}
if (pool->used == pool->size) {
spin_unlock(&ubi->wl_lock);
attempts++;
if (attempts == 10) {
ubi_err(ubi, "Unable to get a free PEB from user WL pool");
ret = -ENOSPC;
goto out;
}
up_read(&ubi->fm_eba_sem);
ret = produce_free_peb(ubi);
if (ret < 0) {
down_read(&ubi->fm_eba_sem);
goto out;
}
goto again;
}
ubi_assert(pool->used < pool->size);
ret = pool->pebs[pool->used++];
prot_queue_add(ubi, ubi->lookuptbl[ret]);
spin_unlock(&ubi->wl_lock);
out:
return ret;
}
/**
* next_peb_for_wl - returns next PEB to be used internally by the
* WL sub-system.
*
* @ubi: UBI device description object
*/
static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi)
{
struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
int pnum;
if (pool->used == pool->size)
return NULL;
pnum = pool->pebs[pool->used];
return ubi->lookuptbl[pnum];
}
/**
* need_wear_leveling - checks whether to trigger a wear leveling work.
* UBI fetches free PEB from wl_pool, we check free PEBs from both 'wl_pool'
* and 'ubi->free', because free PEB in 'ubi->free' tree maybe moved into
* 'wl_pool' by ubi_refill_pools().
*
* @ubi: UBI device description object
*/
static bool need_wear_leveling(struct ubi_device *ubi)
{
int ec;
struct ubi_wl_entry *e;
if (!ubi->used.rb_node)
return false;
e = next_peb_for_wl(ubi);
if (!e) {
if (!ubi->free.rb_node)
return false;
e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0);
ec = e->ec;
} else {
ec = e->ec;
if (ubi->free.rb_node) {
e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0);
ec = max(ec, e->ec);
}
}
e = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
return ec - e->ec >= UBI_WL_THRESHOLD;
}
/* get_peb_for_wl - returns a PEB to be used internally by the WL sub-system.
*
* @ubi: UBI device description object
*/
static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
{
struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
int pnum;
ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
if (pool->used == pool->size) {
/* We cannot update the fastmap here because this
* function is called in atomic context.
* Let's fail here and refill/update it as soon as possible. */
if (!ubi->fm_work_scheduled) {
ubi->fm_work_scheduled = 1;
schedule_work(&ubi->fm_work);
}
return NULL;
}
pnum = pool->pebs[pool->used++];
return ubi->lookuptbl[pnum];
}
/**
* ubi_ensure_anchor_pebs - schedule wear-leveling to produce an anchor PEB.
* @ubi: UBI device description object
*/
int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
{
struct ubi_work *wrk;
struct ubi_wl_entry *anchor;
spin_lock(&ubi->wl_lock);
/* Do we already have an anchor? */
if (ubi->fm_anchor) {
spin_unlock(&ubi->wl_lock);
return 0;
}
/* See if we can find an anchor PEB on the list of free PEBs */
anchor = ubi_wl_get_fm_peb(ubi, 1);
if (anchor) {
ubi->fm_anchor = anchor;
spin_unlock(&ubi->wl_lock);
return 0;
}
ubi->fm_do_produce_anchor = 1;
/* No luck, trigger wear leveling to produce a new anchor PEB. */
if (ubi->wl_scheduled) {
spin_unlock(&ubi->wl_lock);
return 0;
}
ubi->wl_scheduled = 1;
spin_unlock(&ubi->wl_lock);
wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
if (!wrk) {
spin_lock(&ubi->wl_lock);
ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock);
return -ENOMEM;
}
wrk->func = &wear_leveling_worker;
__schedule_ubi_work(ubi, wrk);
return 0;
}
/**
* ubi_wl_put_fm_peb - returns a PEB used in a fastmap to the wear-leveling
* sub-system.
* see: ubi_wl_put_peb()
*
* @ubi: UBI device description object
* @fm_e: physical eraseblock to return
* @lnum: the last used logical eraseblock number for the PEB
* @torture: if this physical eraseblock has to be tortured
*/
int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e,
int lnum, int torture)
{
struct ubi_wl_entry *e;
int vol_id, pnum = fm_e->pnum;
dbg_wl("PEB %d", pnum);
ubi_assert(pnum >= 0);
ubi_assert(pnum < ubi->peb_count);
spin_lock(&ubi->wl_lock);
e = ubi->lookuptbl[pnum];
/* This can happen if we recovered from a fastmap the very
* first time and writing now a new one. In this case the wl system
* has never seen any PEB used by the original fastmap.
*/
if (!e) {
e = fm_e;
ubi_assert(e->ec >= 0);
ubi->lookuptbl[pnum] = e;
}
spin_unlock(&ubi->wl_lock);
vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID;
return schedule_erase(ubi, e, vol_id, lnum, torture, true);
}
/**
* ubi_is_erase_work - checks whether a work is erase work.
* @wrk: The work object to be checked
*/
int ubi_is_erase_work(struct ubi_work *wrk)
{
return wrk->func == erase_worker;
}
static void ubi_fastmap_close(struct ubi_device *ubi)
{
int i;
return_unused_pool_pebs(ubi, &ubi->fm_pool);
return_unused_pool_pebs(ubi, &ubi->fm_wl_pool);
if (ubi->fm_anchor) {
return_unused_peb(ubi, ubi->fm_anchor);
ubi->fm_anchor = NULL;
}
if (ubi->fm) {
for (i = 0; i < ubi->fm->used_blocks; i++)
kfree(ubi->fm->e[i]);
}
kfree(ubi->fm);
}
/**
* may_reserve_for_fm - tests whether a PEB shall be reserved for fastmap.
* See find_mean_wl_entry()
*
* @ubi: UBI device description object
* @e: physical eraseblock to return
* @root: RB tree to test against.
*/
static struct ubi_wl_entry *may_reserve_for_fm(struct ubi_device *ubi,
struct ubi_wl_entry *e,
struct rb_root *root) {
if (e && !ubi->fm_disabled && !ubi->fm && !ubi->fm_anchor &&
e->pnum < UBI_FM_MAX_START)
e = rb_entry(rb_next(root->rb_node),
struct ubi_wl_entry, u.rb);
return e;
}