Skip to content

Commit

Permalink
zswap: memcg accounting
Browse files Browse the repository at this point in the history
Applications can currently escape their cgroup memory containment when
zswap is enabled.  This patch adds per-cgroup tracking and limiting of
zswap backend memory to rectify this.

The existing cgroup2 memory.stat file is extended to show zswap statistics
analogous to what's in meminfo and vmstat.  Furthermore, two new control
files, memory.zswap.current and memory.zswap.max, are added to allow
tuning zswap usage on a per-workload basis.  This is important since not
all workloads benefit from zswap equally; some even suffer compared to
disk swap when memory contents don't compress well.  The optimal size of
the zswap pool, and the threshold for writeback, also depends on the size
of the workload's warm set.

The implementation doesn't use a traditional page_counter transaction. 
zswap is unconventional as a memory consumer in that we only know the
amount of memory to charge once expensive compression has occurred.  If
zwap is disabled or the limit is already exceeded we obviously don't want
to compress page upon page only to reject them all.  Instead, the limit is
checked against current usage, then we compress and charge.  This allows
some limit overrun, but not enough to matter in practice.

[[email protected]: fix for CONFIG_SLOB builds]
  Link: https://lkml.kernel.org/r/[email protected]
[[email protected]: opt out of cgroups v1]
  Link: https://lkml.kernel.org/r/Yn6it9mBYFA+/[email protected]
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Johannes Weiner <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Roman Gushchin <[email protected]>
Cc: Shakeel Butt <[email protected]>
Cc: Seth Jennings <[email protected]>
Cc: Dan Streetman <[email protected]>
Cc: Minchan Kim <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
hnaz authored and akpm00 committed May 19, 2022
1 parent f6498b7 commit f4840cc
Show file tree
Hide file tree
Showing 4 changed files with 302 additions and 15 deletions.
21 changes: 21 additions & 0 deletions Documentation/admin-guide/cgroup-v2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1354,6 +1354,12 @@ PAGE_SIZE multiple when read back.
Amount of cached filesystem data that is swap-backed,
such as tmpfs, shm segments, shared anonymous mmap()s

zswap
Amount of memory consumed by the zswap compression backend.

zswapped
Amount of application memory swapped out to zswap.

file_mapped
Amount of cached filesystem data mapped with mmap()

Expand Down Expand Up @@ -1544,6 +1550,21 @@ PAGE_SIZE multiple when read back.
higher than the limit for an extended period of time. This
reduces the impact on the workload and memory management.

memory.zswap.current
A read-only single value file which exists on non-root
cgroups.

The total amount of memory consumed by the zswap compression
backend.

memory.zswap.max
A read-write single value file which exists on non-root
cgroups. The default is "max".

Zswap usage hard limit. If a cgroup's zswap pool reaches this
limit, it will refuse to take any more stores before existing
entries fault back in or are written out to disk.

memory.pressure
A read-only nested-keyed file.

Expand Down
54 changes: 54 additions & 0 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ enum memcg_stat_item {
MEMCG_PERCPU_B,
MEMCG_VMALLOC,
MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
MEMCG_NR_STAT,
};

Expand Down Expand Up @@ -252,6 +254,10 @@ struct mem_cgroup {
/* Range enforcement for interrupt charges */
struct work_struct high_work;

#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
unsigned long zswap_max;
#endif

unsigned long soft_limit;

/* vmpressure notifications */
Expand Down Expand Up @@ -1273,6 +1279,10 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
return NULL;
}

static inline void obj_cgroup_put(struct obj_cgroup *objcg)
{
}

static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
Expand Down Expand Up @@ -1694,6 +1704,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
void __memcg_kmem_uncharge_page(struct page *page, int order);

struct obj_cgroup *get_obj_cgroup_from_current(void);
struct obj_cgroup *get_obj_cgroup_from_page(struct page *page);

int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
Expand Down Expand Up @@ -1730,6 +1741,20 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)

struct mem_cgroup *mem_cgroup_from_obj(void *p);

static inline void count_objcg_event(struct obj_cgroup *objcg,
enum vm_event_item idx)
{
struct mem_cgroup *memcg;

if (mem_cgroup_kmem_disabled())
return;

rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
count_memcg_events(memcg, idx, 1);
rcu_read_unlock();
}

#else
static inline bool mem_cgroup_kmem_disabled(void)
{
Expand All @@ -1756,6 +1781,11 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
{
}

static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page)
{
return NULL;
}

static inline bool memcg_kmem_enabled(void)
{
return false;
Expand All @@ -1771,6 +1801,30 @@ static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
return NULL;
}

static inline void count_objcg_event(struct obj_cgroup *objcg,
enum vm_event_item idx)
{
}

#endif /* CONFIG_MEMCG_KMEM */

#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
#else
static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
{
return true;
}
static inline void obj_cgroup_charge_zswap(struct obj_cgroup *objcg,
size_t size)
{
}
static inline void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg,
size_t size)
{
}
#endif

#endif /* _LINUX_MEMCONTROL_H */
Loading

0 comments on commit f4840cc

Please sign in to comment.