mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 01:09:38 -05:00
mm: hugetlb_cgroup: convert to lockless page counters
Abandon the spinlock-protected byte counters in favor of the unlocked page counters in the hugetlb controller as well. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
3e32cb2e0a
commit
71f87bee38
4 changed files with 63 additions and 50 deletions
|
@ -29,7 +29,7 @@ Brief summary of control files
|
||||||
|
|
||||||
hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage
|
hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage
|
||||||
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
|
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
|
||||||
hugetlb.<hugepagesize>.usage_in_bytes # show current res_counter usage for "hugepagesize" hugetlb
|
hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
|
||||||
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit
|
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit
|
||||||
|
|
||||||
For a system supporting two hugepage size (16M and 16G) the control
|
For a system supporting two hugepage size (16M and 16G) the control
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
#define _LINUX_HUGETLB_CGROUP_H
|
#define _LINUX_HUGETLB_CGROUP_H
|
||||||
|
|
||||||
#include <linux/mmdebug.h>
|
#include <linux/mmdebug.h>
|
||||||
#include <linux/res_counter.h>
|
|
||||||
|
|
||||||
struct hugetlb_cgroup;
|
struct hugetlb_cgroup;
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1051,7 +1051,8 @@ config MEMCG_KMEM
|
||||||
|
|
||||||
config CGROUP_HUGETLB
|
config CGROUP_HUGETLB
|
||||||
bool "HugeTLB Resource Controller for Control Groups"
|
bool "HugeTLB Resource Controller for Control Groups"
|
||||||
depends on RESOURCE_COUNTERS && HUGETLB_PAGE
|
depends on HUGETLB_PAGE
|
||||||
|
select PAGE_COUNTER
|
||||||
default n
|
default n
|
||||||
help
|
help
|
||||||
Provides a cgroup Resource Controller for HugeTLB pages.
|
Provides a cgroup Resource Controller for HugeTLB pages.
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
|
#include <linux/page_counter.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
#include <linux/hugetlb_cgroup.h>
|
#include <linux/hugetlb_cgroup.h>
|
||||||
|
@ -23,7 +24,7 @@ struct hugetlb_cgroup {
|
||||||
/*
|
/*
|
||||||
* the counter to account for hugepages from hugetlb.
|
* the counter to account for hugepages from hugetlb.
|
||||||
*/
|
*/
|
||||||
struct res_counter hugepage[HUGE_MAX_HSTATE];
|
struct page_counter hugepage[HUGE_MAX_HSTATE];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
|
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
|
||||||
|
@ -60,7 +61,7 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
|
||||||
int idx;
|
int idx;
|
||||||
|
|
||||||
for (idx = 0; idx < hugetlb_max_hstate; idx++) {
|
for (idx = 0; idx < hugetlb_max_hstate; idx++) {
|
||||||
if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
|
if (page_counter_read(&h_cg->hugepage[idx]))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -79,12 +80,12 @@ hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||||
|
|
||||||
if (parent_h_cgroup) {
|
if (parent_h_cgroup) {
|
||||||
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
|
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
|
||||||
res_counter_init(&h_cgroup->hugepage[idx],
|
page_counter_init(&h_cgroup->hugepage[idx],
|
||||||
&parent_h_cgroup->hugepage[idx]);
|
&parent_h_cgroup->hugepage[idx]);
|
||||||
} else {
|
} else {
|
||||||
root_h_cgroup = h_cgroup;
|
root_h_cgroup = h_cgroup;
|
||||||
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
|
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
|
||||||
res_counter_init(&h_cgroup->hugepage[idx], NULL);
|
page_counter_init(&h_cgroup->hugepage[idx], NULL);
|
||||||
}
|
}
|
||||||
return &h_cgroup->css;
|
return &h_cgroup->css;
|
||||||
}
|
}
|
||||||
|
@ -108,9 +109,8 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
|
||||||
static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
|
static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
|
||||||
struct page *page)
|
struct page *page)
|
||||||
{
|
{
|
||||||
int csize;
|
unsigned int nr_pages;
|
||||||
struct res_counter *counter;
|
struct page_counter *counter;
|
||||||
struct res_counter *fail_res;
|
|
||||||
struct hugetlb_cgroup *page_hcg;
|
struct hugetlb_cgroup *page_hcg;
|
||||||
struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
|
struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
|
||||||
|
|
||||||
|
@ -123,15 +123,15 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
|
||||||
if (!page_hcg || page_hcg != h_cg)
|
if (!page_hcg || page_hcg != h_cg)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
csize = PAGE_SIZE << compound_order(page);
|
nr_pages = 1 << compound_order(page);
|
||||||
if (!parent) {
|
if (!parent) {
|
||||||
parent = root_h_cgroup;
|
parent = root_h_cgroup;
|
||||||
/* root has no limit */
|
/* root has no limit */
|
||||||
res_counter_charge_nofail(&parent->hugepage[idx],
|
page_counter_charge(&parent->hugepage[idx], nr_pages);
|
||||||
csize, &fail_res);
|
|
||||||
}
|
}
|
||||||
counter = &h_cg->hugepage[idx];
|
counter = &h_cg->hugepage[idx];
|
||||||
res_counter_uncharge_until(counter, counter->parent, csize);
|
/* Take the pages off the local counter */
|
||||||
|
page_counter_cancel(counter, nr_pages);
|
||||||
|
|
||||||
set_hugetlb_cgroup(page, parent);
|
set_hugetlb_cgroup(page, parent);
|
||||||
out:
|
out:
|
||||||
|
@ -166,9 +166,8 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
|
||||||
struct hugetlb_cgroup **ptr)
|
struct hugetlb_cgroup **ptr)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct res_counter *fail_res;
|
struct page_counter *counter;
|
||||||
struct hugetlb_cgroup *h_cg = NULL;
|
struct hugetlb_cgroup *h_cg = NULL;
|
||||||
unsigned long csize = nr_pages * PAGE_SIZE;
|
|
||||||
|
|
||||||
if (hugetlb_cgroup_disabled())
|
if (hugetlb_cgroup_disabled())
|
||||||
goto done;
|
goto done;
|
||||||
|
@ -187,7 +186,7 @@ again:
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res);
|
ret = page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter);
|
||||||
css_put(&h_cg->css);
|
css_put(&h_cg->css);
|
||||||
done:
|
done:
|
||||||
*ptr = h_cg;
|
*ptr = h_cg;
|
||||||
|
@ -213,7 +212,6 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
|
||||||
struct page *page)
|
struct page *page)
|
||||||
{
|
{
|
||||||
struct hugetlb_cgroup *h_cg;
|
struct hugetlb_cgroup *h_cg;
|
||||||
unsigned long csize = nr_pages * PAGE_SIZE;
|
|
||||||
|
|
||||||
if (hugetlb_cgroup_disabled())
|
if (hugetlb_cgroup_disabled())
|
||||||
return;
|
return;
|
||||||
|
@ -222,61 +220,76 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
|
||||||
if (unlikely(!h_cg))
|
if (unlikely(!h_cg))
|
||||||
return;
|
return;
|
||||||
set_hugetlb_cgroup(page, NULL);
|
set_hugetlb_cgroup(page, NULL);
|
||||||
res_counter_uncharge(&h_cg->hugepage[idx], csize);
|
page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||||
struct hugetlb_cgroup *h_cg)
|
struct hugetlb_cgroup *h_cg)
|
||||||
{
|
{
|
||||||
unsigned long csize = nr_pages * PAGE_SIZE;
|
|
||||||
|
|
||||||
if (hugetlb_cgroup_disabled() || !h_cg)
|
if (hugetlb_cgroup_disabled() || !h_cg)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
|
if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
res_counter_uncharge(&h_cg->hugepage[idx], csize);
|
page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum {
|
||||||
|
RES_USAGE,
|
||||||
|
RES_LIMIT,
|
||||||
|
RES_MAX_USAGE,
|
||||||
|
RES_FAILCNT,
|
||||||
|
};
|
||||||
|
|
||||||
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
|
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
|
||||||
struct cftype *cft)
|
struct cftype *cft)
|
||||||
{
|
{
|
||||||
int idx, name;
|
struct page_counter *counter;
|
||||||
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
|
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
|
||||||
|
|
||||||
idx = MEMFILE_IDX(cft->private);
|
counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
|
||||||
name = MEMFILE_ATTR(cft->private);
|
|
||||||
|
|
||||||
return res_counter_read_u64(&h_cg->hugepage[idx], name);
|
switch (MEMFILE_ATTR(cft->private)) {
|
||||||
|
case RES_USAGE:
|
||||||
|
return (u64)page_counter_read(counter) * PAGE_SIZE;
|
||||||
|
case RES_LIMIT:
|
||||||
|
return (u64)counter->limit * PAGE_SIZE;
|
||||||
|
case RES_MAX_USAGE:
|
||||||
|
return (u64)counter->watermark * PAGE_SIZE;
|
||||||
|
case RES_FAILCNT:
|
||||||
|
return counter->failcnt;
|
||||||
|
default:
|
||||||
|
BUG();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEFINE_MUTEX(hugetlb_limit_mutex);
|
||||||
|
|
||||||
static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
|
static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
|
||||||
char *buf, size_t nbytes, loff_t off)
|
char *buf, size_t nbytes, loff_t off)
|
||||||
{
|
{
|
||||||
int idx, name, ret;
|
int ret, idx;
|
||||||
unsigned long long val;
|
unsigned long nr_pages;
|
||||||
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
|
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
|
||||||
|
|
||||||
buf = strstrip(buf);
|
if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
|
||||||
idx = MEMFILE_IDX(of_cft(of)->private);
|
return -EINVAL;
|
||||||
name = MEMFILE_ATTR(of_cft(of)->private);
|
|
||||||
|
|
||||||
switch (name) {
|
buf = strstrip(buf);
|
||||||
case RES_LIMIT:
|
ret = page_counter_memparse(buf, &nr_pages);
|
||||||
if (hugetlb_cgroup_is_root(h_cg)) {
|
|
||||||
/* Can't set limit on root */
|
|
||||||
ret = -EINVAL;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* This function does all necessary parse...reuse it */
|
|
||||||
ret = res_counter_memparse_write_strategy(buf, &val);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
return ret;
|
||||||
val = ALIGN(val, 1ULL << huge_page_shift(&hstates[idx]));
|
|
||||||
ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
|
idx = MEMFILE_IDX(of_cft(of)->private);
|
||||||
|
|
||||||
|
switch (MEMFILE_ATTR(of_cft(of)->private)) {
|
||||||
|
case RES_LIMIT:
|
||||||
|
mutex_lock(&hugetlb_limit_mutex);
|
||||||
|
ret = page_counter_limit(&h_cg->hugepage[idx], nr_pages);
|
||||||
|
mutex_unlock(&hugetlb_limit_mutex);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
|
@ -288,18 +301,18 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
|
||||||
static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
|
static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
|
||||||
char *buf, size_t nbytes, loff_t off)
|
char *buf, size_t nbytes, loff_t off)
|
||||||
{
|
{
|
||||||
int idx, name, ret = 0;
|
int ret = 0;
|
||||||
|
struct page_counter *counter;
|
||||||
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
|
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
|
||||||
|
|
||||||
idx = MEMFILE_IDX(of_cft(of)->private);
|
counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
|
||||||
name = MEMFILE_ATTR(of_cft(of)->private);
|
|
||||||
|
|
||||||
switch (name) {
|
switch (MEMFILE_ATTR(of_cft(of)->private)) {
|
||||||
case RES_MAX_USAGE:
|
case RES_MAX_USAGE:
|
||||||
res_counter_reset_max(&h_cg->hugepage[idx]);
|
page_counter_reset_watermark(counter);
|
||||||
break;
|
break;
|
||||||
case RES_FAILCNT:
|
case RES_FAILCNT:
|
||||||
res_counter_reset_failcnt(&h_cg->hugepage[idx]);
|
counter->failcnt = 0;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
|
|
Loading…
Add table
Reference in a new issue