mirror of
https://github.com/hanwckf/immortalwrt-mt798x.git
synced 2025-01-10 03:09:08 +08:00
f23a3e25ac
https://chromium.googlesource.com/chromiumos/third_party/kernel/+log/refs/heads/chromeos-5.4/mm/ Signed-off-by: Tianling Shen <cnsztl@immortalwrt.org>
663 lines
19 KiB
Diff
663 lines
19 KiB
Diff
From ebd2497c0f0383fd24e536c5de0f73cbe939c5af Mon Sep 17 00:00:00 2001
|
|
From: Yu Zhao <yuzhao@google.com>
|
|
Date: Mon, 5 Apr 2021 04:38:24 -0600
|
|
Subject: [PATCH] BACKPORT: FROMLIST: mm: multigenerational lru: eviction
|
|
|
|
The eviction consumes old generations. Given an lruvec, the eviction
|
|
scans the pages on the per-zone lists indexed by either of min_seq[2].
|
|
It first tries to select a type based on the values of min_seq[2].
|
|
When anon and file types are both available from the same generation,
|
|
it selects the one that has a lower refault rate.
|
|
|
|
During a scan, the eviction sorts pages according to their new
|
|
generation numbers, if the aging has found them referenced. It also
|
|
moves pages from the tiers that have higher refault rates than tier 0
|
|
to the next generation. When it finds all the per-zone lists of a
|
|
selected type are empty, the eviction increments min_seq[2] indexed by
|
|
this selected type.
|
|
|
|
With the aging and the eviction in place, we can build page reclaim in
|
|
a straightforward manner:
|
|
1) In order to reduce the latency, direct reclaim only invokes the
|
|
aging when both min_seq[2] reaches max_seq-1; otherwise it invokes
|
|
the eviction.
|
|
2) In order to avoid the aging in the direct reclaim path, kswapd
|
|
does the background aging. It invokes the aging when either of
|
|
min_seq[2] reaches max_seq-1; otherwise it invokes the eviction.
|
|
|
|
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
|
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
|
(am from https://lore.kernel.org/patchwork/patch/1432186/)
|
|
|
|
BUG=b:123039911
|
|
TEST=Built
|
|
|
|
Change-Id: I64c06d8f2cdb83ac7d56c7e1d07f043483956cac
|
|
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2987190
|
|
Reviewed-by: Yu Zhao <yuzhao@chromium.org>
|
|
Tested-by: Yu Zhao <yuzhao@chromium.org>
|
|
Commit-Queue: Sonny Rao <sonnyrao@chromium.org>
|
|
Commit-Queue: Yu Zhao <yuzhao@chromium.org>
|
|
---
|
|
include/linux/mmzone.h | 5 +
|
|
mm/vmscan.c | 531 +++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 536 insertions(+)
|
|
|
|
--- a/include/linux/mmzone.h
|
|
+++ b/include/linux/mmzone.h
|
|
@@ -811,6 +811,8 @@ struct deferred_split {
|
|
};
|
|
#endif
|
|
|
|
+struct mm_walk_args;
|
|
+
|
|
/*
|
|
* On NUMA machines, each NUMA node would have a pg_data_t to describe
|
|
* it's memory layout. On UMA machines there is a single pglist_data which
|
|
@@ -900,6 +902,9 @@ typedef struct pglist_data {
|
|
|
|
unsigned long flags;
|
|
|
|
+#ifdef CONFIG_LRU_GEN
|
|
+ struct mm_walk_args *mm_walk_args;
|
|
+#endif
|
|
ZONE_PADDING(_pad2_)
|
|
|
|
/* Per-node vmstats */
|
|
--- a/mm/vmscan.c
|
|
+++ b/mm/vmscan.c
|
|
@@ -1169,6 +1169,11 @@ static unsigned long shrink_page_list(st
|
|
if (!sc->may_unmap && page_mapped(page))
|
|
goto keep_locked;
|
|
|
|
+ /* in case the page was found accessed by lru_gen_scan_around() */
|
|
+ if (lru_gen_enabled() && !ignore_references &&
|
|
+ page_mapped(page) && PageReferenced(page))
|
|
+ goto keep_locked;
|
|
+
|
|
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
|
|
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
|
|
|
|
@@ -3818,6 +3823,482 @@ out:
|
|
}
|
|
|
|
/******************************************************************************
|
|
+ * the eviction
|
|
+ ******************************************************************************/
|
|
+
|
|
+static bool should_skip_page(struct page *page, struct scan_control *sc)
|
|
+{
|
|
+ if (!sc->may_unmap && page_mapped(page))
|
|
+ return true;
|
|
+
|
|
+ if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
|
|
+ (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
|
|
+ return true;
|
|
+
|
|
+ if (!get_page_unless_zero(page))
|
|
+ return true;
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate)
|
|
+{
|
|
+ bool success;
|
|
+ int gen = page_lru_gen(page);
|
|
+ int type = page_is_file_cache(page);
|
|
+ int zone = page_zonenum(page);
|
|
+ int tier = lru_tier_from_usage(page_tier_usage(page));
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
+
|
|
+ VM_BUG_ON_PAGE(gen == -1, page);
|
|
+ VM_BUG_ON_PAGE(tier_to_isolate < 0, page);
|
|
+
|
|
+ /* a lazy-free page that has been written into? */
|
|
+ if (type && PageDirty(page) && PageAnon(page)) {
|
|
+ success = lru_gen_deletion(page, lruvec);
|
|
+ VM_BUG_ON_PAGE(!success, page);
|
|
+ SetPageSwapBacked(page);
|
|
+ add_page_to_lru_list_tail(page, lruvec);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ /* page_update_gen() has updated the gen #? */
|
|
+ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
|
|
+ list_move(&page->lru, &lrugen->lists[gen][type][zone]);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ /* activate this page if its tier has a higher refault rate */
|
|
+ if (tier_to_isolate < tier) {
|
|
+ int hist = hist_from_seq_or_gen(gen);
|
|
+
|
|
+ page_inc_gen(page, lruvec, false);
|
|
+ WRITE_ONCE(lrugen->activated[hist][type][tier - 1],
|
|
+ lrugen->activated[hist][type][tier - 1] + hpage_nr_pages(page));
|
|
+ inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ /* mark this page for reclaim if it's pending writeback */
|
|
+ if (PageWriteback(page) || (type && PageDirty(page))) {
|
|
+ page_inc_gen(page, lruvec, true);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
+static void isolate_page(struct page *page, struct lruvec *lruvec)
|
|
+{
|
|
+ bool success;
|
|
+
|
|
+ success = lru_gen_deletion(page, lruvec);
|
|
+ VM_BUG_ON_PAGE(!success, page);
|
|
+
|
|
+ ClearPageLRU(page);
|
|
+
|
|
+ if (PageActive(page)) {
|
|
+ ClearPageActive(page);
|
|
+ /* make sure shrink_page_list() rejects this page */
|
|
+ SetPageReferenced(page);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* make sure shrink_page_list() doesn't try to write this page */
|
|
+ ClearPageReclaim(page);
|
|
+ /* make sure shrink_page_list() doesn't reject this page */
|
|
+ ClearPageReferenced(page);
|
|
+}
|
|
+
|
|
+static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, long *nr_to_scan,
|
|
+ int type, int tier, struct list_head *list)
|
|
+{
|
|
+ bool success;
|
|
+ int gen, zone;
|
|
+ enum vm_event_item item;
|
|
+ int sorted = 0;
|
|
+ int scanned = 0;
|
|
+ int isolated = 0;
|
|
+ int batch_size = 0;
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
+
|
|
+ VM_BUG_ON(!list_empty(list));
|
|
+
|
|
+ if (get_nr_gens(lruvec, type) == MIN_NR_GENS)
|
|
+ return -ENOENT;
|
|
+
|
|
+ gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
|
+
|
|
+ for (zone = sc->reclaim_idx; zone >= 0; zone--) {
|
|
+ LIST_HEAD(moved);
|
|
+ int skipped = 0;
|
|
+ struct list_head *head = &lrugen->lists[gen][type][zone];
|
|
+
|
|
+ while (!list_empty(head)) {
|
|
+ struct page *page = lru_to_page(head);
|
|
+ int delta = hpage_nr_pages(page);
|
|
+
|
|
+ VM_BUG_ON_PAGE(PageTail(page), page);
|
|
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
|
|
+ VM_BUG_ON_PAGE(PageActive(page), page);
|
|
+ VM_BUG_ON_PAGE(page_is_file_cache(page) != type, page);
|
|
+ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
|
|
+
|
|
+ prefetchw_prev_lru_page(page, head, flags);
|
|
+
|
|
+ scanned += delta;
|
|
+
|
|
+ if (sort_page(page, lruvec, tier))
|
|
+ sorted += delta;
|
|
+ else if (should_skip_page(page, sc)) {
|
|
+ list_move(&page->lru, &moved);
|
|
+ skipped += delta;
|
|
+ } else {
|
|
+ isolate_page(page, lruvec);
|
|
+ list_add(&page->lru, list);
|
|
+ isolated += delta;
|
|
+ }
|
|
+
|
|
+ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
|
|
+ ++batch_size == MAX_BATCH_SIZE)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ list_splice(&moved, head);
|
|
+ __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
|
|
+
|
|
+ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
|
|
+ batch_size == MAX_BATCH_SIZE)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ success = try_inc_min_seq(lruvec, type);
|
|
+
|
|
+ item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
|
|
+ if (global_reclaim(sc)) {
|
|
+ __count_vm_events(item, isolated);
|
|
+ __count_vm_events(PGREFILL, sorted);
|
|
+ }
|
|
+ __count_memcg_events(memcg, item, isolated);
|
|
+ __count_memcg_events(memcg, PGREFILL, sorted);
|
|
+
|
|
+ *nr_to_scan -= scanned;
|
|
+
|
|
+ if (*nr_to_scan <= 0 || success || isolated)
|
|
+ return isolated;
|
|
+ /*
|
|
+ * We may have trouble finding eligible pages due to reclaim_idx,
|
|
+ * may_unmap and may_writepage. The following check makes sure we won't
|
|
+ * be stuck if we aren't making enough progress.
|
|
+ */
|
|
+ return batch_size == MAX_BATCH_SIZE && sorted >= SWAP_CLUSTER_MAX ? 0 : -ENOENT;
|
|
+}
|
|
+
|
|
+static int get_tier_to_isolate(struct lruvec *lruvec, int type)
|
|
+{
|
|
+ int tier;
|
|
+ struct controller_pos sp, pv;
|
|
+
|
|
+ /*
|
|
+ * Ideally we don't want to evict upper tiers that have higher refault
|
|
+ * rates. However, we need to leave a margin for the fluctuations in
|
|
+ * refault rates. So we use a larger gain factor to make sure upper
|
|
+ * tiers are indeed more active. We choose 2 because the lowest upper
|
|
+ * tier would have twice of the refault rate of the base tier, according
|
|
+ * to their numbers of accesses.
|
|
+ */
|
|
+ read_controller_pos(&sp, lruvec, type, 0, 1);
|
|
+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
|
|
+ read_controller_pos(&pv, lruvec, type, tier, 2);
|
|
+ if (!positive_ctrl_err(&sp, &pv))
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return tier - 1;
|
|
+}
|
|
+
|
|
+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_to_isolate)
|
|
+{
|
|
+ int type, tier;
|
|
+ struct controller_pos sp, pv;
|
|
+ int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
|
|
+
|
|
+ /*
|
|
+ * Compare the refault rates between the base tiers of anon and file to
|
|
+ * determine which type to evict. Also need to compare the refault rates
|
|
+ * of the upper tiers of the selected type with that of the base tier of
|
|
+ * the other type to determine which tier of the selected type to evict.
|
|
+ */
|
|
+ read_controller_pos(&sp, lruvec, 0, 0, gain[0]);
|
|
+ read_controller_pos(&pv, lruvec, 1, 0, gain[1]);
|
|
+ type = positive_ctrl_err(&sp, &pv);
|
|
+
|
|
+ read_controller_pos(&sp, lruvec, !type, 0, gain[!type]);
|
|
+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
|
|
+ read_controller_pos(&pv, lruvec, type, tier, gain[type]);
|
|
+ if (!positive_ctrl_err(&sp, &pv))
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ *tier_to_isolate = tier - 1;
|
|
+
|
|
+ return type;
|
|
+}
|
|
+
|
|
+static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
|
+ long *nr_to_scan, int *type_to_scan, struct list_head *list)
|
|
+{
|
|
+ int i;
|
|
+ int type;
|
|
+ int isolated;
|
|
+ int tier = -1;
|
|
+ DEFINE_MAX_SEQ();
|
|
+ DEFINE_MIN_SEQ();
|
|
+
|
|
+ VM_BUG_ON(!seq_is_valid(lruvec));
|
|
+
|
|
+ if (get_hi_wmark(max_seq, min_seq, swappiness) == MIN_NR_GENS)
|
|
+ return 0;
|
|
+ /*
|
|
+ * Try to select a type based on generations and swappiness, and if that
|
|
+ * fails, fall back to get_type_to_scan(). When anon and file are both
|
|
+ * available from the same generation, swappiness 200 is interpreted as
|
|
+ * anon first and swappiness 1 is interpreted as file first.
|
|
+ */
|
|
+ type = !swappiness || min_seq[0] > min_seq[1] ||
|
|
+ (min_seq[0] == min_seq[1] && swappiness != 200 &&
|
|
+ (swappiness == 1 || get_type_to_scan(lruvec, swappiness, &tier)));
|
|
+
|
|
+ if (tier == -1)
|
|
+ tier = get_tier_to_isolate(lruvec, type);
|
|
+
|
|
+ for (i = !swappiness; i < ANON_AND_FILE; i++) {
|
|
+ isolated = scan_pages(lruvec, sc, nr_to_scan, type, tier, list);
|
|
+ if (isolated >= 0)
|
|
+ break;
|
|
+
|
|
+ type = !type;
|
|
+ tier = get_tier_to_isolate(lruvec, type);
|
|
+ }
|
|
+
|
|
+ if (isolated < 0)
|
|
+ isolated = *nr_to_scan = 0;
|
|
+
|
|
+ *type_to_scan = type;
|
|
+
|
|
+ return isolated;
|
|
+}
|
|
+
|
|
+/* Main function used by the foreground, the background and the user-triggered eviction. */
|
|
+static bool evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
|
+ long *nr_to_scan)
|
|
+{
|
|
+ int type;
|
|
+ int isolated;
|
|
+ int reclaimed;
|
|
+ LIST_HEAD(list);
|
|
+ struct page *page;
|
|
+ enum vm_event_item item;
|
|
+ struct reclaim_stat stat;
|
|
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
|
+
|
|
+ spin_lock_irq(&pgdat->lru_lock);
|
|
+
|
|
+ isolated = isolate_pages(lruvec, sc, swappiness, nr_to_scan, &type, &list);
|
|
+ VM_BUG_ON(list_empty(&list) == !!isolated);
|
|
+
|
|
+ if (isolated)
|
|
+ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + type, isolated);
|
|
+
|
|
+ spin_unlock_irq(&pgdat->lru_lock);
|
|
+
|
|
+ if (!isolated)
|
|
+ goto done;
|
|
+
|
|
+ reclaimed = shrink_page_list(&list, pgdat, sc, 0, &stat, false);
|
|
+ /*
|
|
+ * We need to prevent rejected pages from being added back to the same
|
|
+ * lists they were isolated from. Otherwise we may risk looping on them
|
|
+ * forever. We use PageActive() or !PageReferenced() && PageWorkingset()
|
|
+ * to tell lru_gen_addition() not to add them to the oldest generation.
|
|
+ */
|
|
+ list_for_each_entry(page, &list, lru) {
|
|
+ if (PageMlocked(page))
|
|
+ continue;
|
|
+
|
|
+ if (page_mapped(page) && PageReferenced(page))
|
|
+ SetPageActive(page);
|
|
+ else if (!PageActive(page))
|
|
+ SetPageWorkingset(page);
|
|
+ ClearPageReferenced(page);
|
|
+ }
|
|
+
|
|
+ spin_lock_irq(&pgdat->lru_lock);
|
|
+
|
|
+ move_pages_to_lru(lruvec, &list);
|
|
+
|
|
+ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + type, -isolated);
|
|
+
|
|
+ item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
|
|
+ if (global_reclaim(sc))
|
|
+ __count_vm_events(item, reclaimed);
|
|
+ __count_memcg_events(lruvec_memcg(lruvec), item, reclaimed);
|
|
+
|
|
+ spin_unlock_irq(&pgdat->lru_lock);
|
|
+
|
|
+ mem_cgroup_uncharge_list(&list);
|
|
+ free_unref_page_list(&list);
|
|
+
|
|
+ sc->nr_reclaimed += reclaimed;
|
|
+done:
|
|
+ return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim;
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
+ * page reclaim
|
|
+ ******************************************************************************/
|
|
+
|
|
+static int get_swappiness(struct lruvec *lruvec)
|
|
+{
|
|
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
+ int swappiness = mem_cgroup_get_nr_swap_pages(memcg) >= (long)SWAP_CLUSTER_MAX ?
|
|
+ mem_cgroup_swappiness(memcg) : 0;
|
|
+
|
|
+ VM_BUG_ON(swappiness > 200U);
|
|
+
|
|
+ return swappiness;
|
|
+}
|
|
+
|
|
+static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
|
|
+{
|
|
+ int gen, type, zone;
|
|
+ long nr_to_scan = 0;
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
+ DEFINE_MAX_SEQ();
|
|
+ DEFINE_MIN_SEQ();
|
|
+
|
|
+ lru_add_drain();
|
|
+
|
|
+ for (type = !swappiness; type < ANON_AND_FILE; type++) {
|
|
+ unsigned long seq;
|
|
+
|
|
+ for (seq = min_seq[type]; seq <= max_seq; seq++) {
|
|
+ gen = lru_gen_from_seq(seq);
|
|
+
|
|
+ for (zone = 0; zone <= sc->reclaim_idx; zone++)
|
|
+ nr_to_scan += READ_ONCE(lrugen->sizes[gen][type][zone]);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ nr_to_scan = max(nr_to_scan, 0L);
|
|
+ nr_to_scan = round_up(nr_to_scan >> sc->priority, SWAP_CLUSTER_MAX);
|
|
+
|
|
+ if (get_hi_wmark(max_seq, min_seq, swappiness) > MIN_NR_GENS)
|
|
+ return nr_to_scan;
|
|
+
|
|
+ /* kswapd uses lru_gen_age_node() */
|
|
+ if (current_is_kswapd())
|
|
+ return 0;
|
|
+
|
|
+ return walk_mm_list(lruvec, max_seq, sc, swappiness, NULL) ? nr_to_scan : 0;
|
|
+}
|
|
+
|
|
+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
|
+{
|
|
+ struct blk_plug plug;
|
|
+ long scanned = 0;
|
|
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
|
+ enum mem_cgroup_protection prot = mem_cgroup_protected(sc->target_mem_cgroup, memcg);
|
|
+
|
|
+ blk_start_plug(&plug);
|
|
+
|
|
+ while (true) {
|
|
+ long nr_to_scan;
|
|
+ int swappiness = sc->may_swap ? get_swappiness(lruvec) : 0;
|
|
+
|
|
+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness) - scanned;
|
|
+ if (nr_to_scan < (long)SWAP_CLUSTER_MAX)
|
|
+ break;
|
|
+
|
|
+ scanned += nr_to_scan;
|
|
+
|
|
+ if (!evict_pages(lruvec, sc, swappiness, &nr_to_scan))
|
|
+ break;
|
|
+
|
|
+ scanned -= nr_to_scan;
|
|
+
|
|
+ if (prot == MEMCG_PROT_MIN || (prot == MEMCG_PROT_LOW && !sc->memcg_low_reclaim))
|
|
+ break;
|
|
+
|
|
+ cond_resched();
|
|
+ }
|
|
+
|
|
+ blk_finish_plug(&plug);
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
+ * the background aging
|
|
+ ******************************************************************************/
|
|
+
|
|
+static int lru_gen_spread = MIN_NR_GENS;
|
|
+
|
|
+static void try_walk_mm_list(struct lruvec *lruvec, struct scan_control *sc)
|
|
+{
|
|
+ int gen, type, zone;
|
|
+ long old_and_young[2] = {};
|
|
+ int spread = READ_ONCE(lru_gen_spread);
|
|
+ int swappiness = get_swappiness(lruvec);
|
|
+ struct lrugen *lrugen = &lruvec->evictable;
|
|
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
|
+ DEFINE_MAX_SEQ();
|
|
+ DEFINE_MIN_SEQ();
|
|
+
|
|
+ lru_add_drain();
|
|
+
|
|
+ for (type = !swappiness; type < ANON_AND_FILE; type++) {
|
|
+ unsigned long seq;
|
|
+
|
|
+ for (seq = min_seq[type]; seq <= max_seq; seq++) {
|
|
+ gen = lru_gen_from_seq(seq);
|
|
+
|
|
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
|
|
+ old_and_young[seq == max_seq] +=
|
|
+ READ_ONCE(lrugen->sizes[gen][type][zone]);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ old_and_young[0] = max(old_and_young[0], 0L);
|
|
+ old_and_young[1] = max(old_and_young[1], 0L);
|
|
+
|
|
+ /* try to spread pages out across spread+1 generations */
|
|
+ if (old_and_young[0] >= old_and_young[1] * spread &&
|
|
+ get_lo_wmark(max_seq, min_seq, swappiness) > max(spread, MIN_NR_GENS))
|
|
+ return;
|
|
+
|
|
+ walk_mm_list(lruvec, max_seq, sc, swappiness, pgdat->mm_walk_args);
|
|
+}
|
|
+
|
|
+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
|
+{
|
|
+ struct mem_cgroup *memcg;
|
|
+
|
|
+ VM_BUG_ON(!current_is_kswapd());
|
|
+
|
|
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
|
+ do {
|
|
+ struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
|
|
+ enum mem_cgroup_protection prot = mem_cgroup_protected(sc->target_mem_cgroup,
|
|
+ memcg);
|
|
+
|
|
+ if (prot != MEMCG_PROT_MIN && (prot != MEMCG_PROT_LOW || sc->memcg_low_reclaim))
|
|
+ try_walk_mm_list(lruvec, sc);
|
|
+
|
|
+ cond_resched();
|
|
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
|
+}
|
|
+
|
|
+/******************************************************************************
|
|
* state change
|
|
******************************************************************************/
|
|
|
|
@@ -4020,6 +4501,21 @@ static int __meminit __maybe_unused lru_
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
+static void lru_gen_start_kswapd(int nid)
|
|
+{
|
|
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
|
+
|
|
+ pgdat->mm_walk_args = kvzalloc_node(size_of_mm_walk_args(), GFP_KERNEL, nid);
|
|
+ WARN_ON_ONCE(!pgdat->mm_walk_args);
|
|
+}
|
|
+
|
|
+static void lru_gen_stop_kswapd(int nid)
|
|
+{
|
|
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
|
+
|
|
+ kvfree(pgdat->mm_walk_args);
|
|
+}
|
|
+
|
|
/******************************************************************************
|
|
* initialization
|
|
******************************************************************************/
|
|
@@ -4068,6 +4564,24 @@ static int __init init_lru_gen(void)
|
|
*/
|
|
arch_initcall(init_lru_gen);
|
|
|
|
+#else /* CONFIG_LRU_GEN */
|
|
+
|
|
+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
|
+{
|
|
+}
|
|
+
|
|
+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
|
+{
|
|
+}
|
|
+
|
|
+static void lru_gen_start_kswapd(int nid)
|
|
+{
|
|
+}
|
|
+
|
|
+static void lru_gen_stop_kswapd(int nid)
|
|
+{
|
|
+}
|
|
+
|
|
#endif /* CONFIG_LRU_GEN */
|
|
|
|
/*
|
|
@@ -4086,6 +4600,12 @@ static void shrink_node_memcg(struct pgl
|
|
struct blk_plug plug;
|
|
bool scan_adjusted;
|
|
|
|
+ if (lru_gen_enabled()) {
|
|
+ *lru_pages = 0;
|
|
+ lru_gen_shrink_lruvec(lruvec, sc);
|
|
+ return;
|
|
+ }
|
|
+
|
|
get_scan_count(lruvec, memcg, sc, nr, lru_pages);
|
|
|
|
/* Record the original scan target for proportional adjustments later */
|
|
@@ -4560,6 +5080,9 @@ static void snapshot_refaults(struct mem
|
|
{
|
|
struct mem_cgroup *memcg;
|
|
|
|
+ if (lru_gen_enabled())
|
|
+ return;
|
|
+
|
|
memcg = mem_cgroup_iter(root_memcg, NULL, NULL);
|
|
do {
|
|
unsigned long refaults;
|
|
@@ -4922,6 +5445,11 @@ static void age_active_anon(struct pglis
|
|
{
|
|
struct mem_cgroup *memcg;
|
|
|
|
+ if (lru_gen_enabled()) {
|
|
+ lru_gen_age_node(pgdat, sc);
|
|
+ return;
|
|
+ }
|
|
+
|
|
if (!total_swap_pages)
|
|
return;
|
|
|
|
@@ -5615,6 +6143,8 @@ int kswapd_run(int nid)
|
|
if (pgdat->kswapd)
|
|
return 0;
|
|
|
|
+ lru_gen_start_kswapd(nid);
|
|
+
|
|
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
|
|
if (IS_ERR(pgdat->kswapd)) {
|
|
/* failure at boot is fatal */
|
|
@@ -5637,6 +6167,7 @@ void kswapd_stop(int nid)
|
|
if (kswapd) {
|
|
kthread_stop(kswapd);
|
|
NODE_DATA(nid)->kswapd = NULL;
|
|
+ lru_gen_stop_kswapd(nid);
|
|
}
|
|
}
|
|
|