mm: numa: cleanup flow of transhuge page migration
When correcting commit 04fa5d6a65
("mm: migrate: check page_count of
THP before migrating") Hugh Dickins noted that the control flow for
transhuge migration was difficult to follow. Unconditionally calling
put_page() in numamigrate_isolate_page() made the failure paths of both
migrate_misplaced_transhuge_page() and migrate_misplaced_page() more
complex that they should be. Further, he was extremely wary that an
unlock_page() should ever happen after a put_page() even if the
put_page() should never be the final put_page.
Hugh implemented the following cleanup to simplify the path by calling
putback_lru_page() inside numamigrate_isolate_page() if it failed to
isolate and always calling unlock_page() within
migrate_misplaced_transhuge_page().
There is no functional change after this patch is applied but the code
is easier to follow and unlock_page() always happens before put_page().
[mgorman@suse.de: changelog only]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Simon Jeons <simon.jeons@gmail.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
75980e97da
commit
340ef3902c
2 changed files with 54 additions and 73 deletions
|
@ -1296,7 +1296,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
int target_nid;
|
int target_nid;
|
||||||
int current_nid = -1;
|
int current_nid = -1;
|
||||||
bool migrated;
|
bool migrated;
|
||||||
bool page_locked = false;
|
|
||||||
|
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
if (unlikely(!pmd_same(pmd, *pmdp)))
|
if (unlikely(!pmd_same(pmd, *pmdp)))
|
||||||
|
@ -1318,7 +1317,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
/* Acquire the page lock to serialise THP migrations */
|
/* Acquire the page lock to serialise THP migrations */
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
page_locked = true;
|
|
||||||
|
|
||||||
/* Confirm the PTE did not while locked */
|
/* Confirm the PTE did not while locked */
|
||||||
spin_lock(&mm->page_table_lock);
|
spin_lock(&mm->page_table_lock);
|
||||||
|
@ -1331,34 +1329,26 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
|
|
||||||
/* Migrate the THP to the requested node */
|
/* Migrate the THP to the requested node */
|
||||||
migrated = migrate_misplaced_transhuge_page(mm, vma,
|
migrated = migrate_misplaced_transhuge_page(mm, vma,
|
||||||
pmdp, pmd, addr,
|
pmdp, pmd, addr, page, target_nid);
|
||||||
page, target_nid);
|
if (!migrated)
|
||||||
if (migrated)
|
goto check_same;
|
||||||
current_nid = target_nid;
|
|
||||||
else {
|
|
||||||
spin_lock(&mm->page_table_lock);
|
|
||||||
if (unlikely(!pmd_same(pmd, *pmdp))) {
|
|
||||||
unlock_page(page);
|
|
||||||
goto out_unlock;
|
|
||||||
}
|
|
||||||
goto clear_pmdnuma;
|
|
||||||
}
|
|
||||||
|
|
||||||
task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
|
task_numa_fault(target_nid, HPAGE_PMD_NR, true);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
check_same:
|
||||||
|
spin_lock(&mm->page_table_lock);
|
||||||
|
if (unlikely(!pmd_same(pmd, *pmdp)))
|
||||||
|
goto out_unlock;
|
||||||
clear_pmdnuma:
|
clear_pmdnuma:
|
||||||
pmd = pmd_mknonnuma(pmd);
|
pmd = pmd_mknonnuma(pmd);
|
||||||
set_pmd_at(mm, haddr, pmdp, pmd);
|
set_pmd_at(mm, haddr, pmdp, pmd);
|
||||||
VM_BUG_ON(pmd_numa(*pmdp));
|
VM_BUG_ON(pmd_numa(*pmdp));
|
||||||
update_mmu_cache_pmd(vma, addr, pmdp);
|
update_mmu_cache_pmd(vma, addr, pmdp);
|
||||||
if (page_locked)
|
|
||||||
unlock_page(page);
|
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
spin_unlock(&mm->page_table_lock);
|
spin_unlock(&mm->page_table_lock);
|
||||||
if (current_nid != -1)
|
if (current_nid != -1)
|
||||||
task_numa_fault(current_nid, HPAGE_PMD_NR, migrated);
|
task_numa_fault(current_nid, HPAGE_PMD_NR, false);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
91
mm/migrate.c
91
mm/migrate.c
|
@ -1557,41 +1557,40 @@ bool numamigrate_update_ratelimit(pg_data_t *pgdat, unsigned long nr_pages)
|
||||||
|
|
||||||
int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
|
int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int page_lru;
|
||||||
|
|
||||||
VM_BUG_ON(compound_order(page) && !PageTransHuge(page));
|
VM_BUG_ON(compound_order(page) && !PageTransHuge(page));
|
||||||
|
|
||||||
/* Avoid migrating to a node that is nearly full */
|
/* Avoid migrating to a node that is nearly full */
|
||||||
if (migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) {
|
if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
|
||||||
int page_lru;
|
return 0;
|
||||||
|
|
||||||
if (isolate_lru_page(page)) {
|
if (isolate_lru_page(page))
|
||||||
put_page(page);
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* migrate_misplaced_transhuge_page() skips page migration's usual
|
||||||
|
* check on page_count(), so we must do it here, now that the page
|
||||||
|
* has been isolated: a GUP pin, or any other pin, prevents migration.
|
||||||
|
* The expected page count is 3: 1 for page's mapcount and 1 for the
|
||||||
|
* caller's pin and 1 for the reference taken by isolate_lru_page().
|
||||||
|
*/
|
||||||
|
if (PageTransHuge(page) && page_count(page) != 3) {
|
||||||
|
putback_lru_page(page);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Page is isolated */
|
|
||||||
ret = 1;
|
|
||||||
page_lru = page_is_file_cache(page);
|
page_lru = page_is_file_cache(page);
|
||||||
if (!PageTransHuge(page))
|
mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
|
||||||
inc_zone_page_state(page, NR_ISOLATED_ANON + page_lru);
|
hpage_nr_pages(page));
|
||||||
else
|
|
||||||
mod_zone_page_state(page_zone(page),
|
|
||||||
NR_ISOLATED_ANON + page_lru,
|
|
||||||
HPAGE_PMD_NR);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Page is either isolated or there is not enough space on the target
|
* Isolating the page has taken another reference, so the
|
||||||
* node. If isolated, then it has taken a reference count and the
|
* caller's reference can be safely dropped without the page
|
||||||
* callers reference can be safely dropped without the page
|
* disappearing underneath us during migration.
|
||||||
* disappearing underneath us during migration. Otherwise the page is
|
|
||||||
* not to be migrated but the callers reference should still be
|
|
||||||
* dropped so it does not leak.
|
|
||||||
*/
|
*/
|
||||||
put_page(page);
|
put_page(page);
|
||||||
|
return 1;
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1602,7 +1601,7 @@ int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
|
||||||
int migrate_misplaced_page(struct page *page, int node)
|
int migrate_misplaced_page(struct page *page, int node)
|
||||||
{
|
{
|
||||||
pg_data_t *pgdat = NODE_DATA(node);
|
pg_data_t *pgdat = NODE_DATA(node);
|
||||||
int isolated = 0;
|
int isolated;
|
||||||
int nr_remaining;
|
int nr_remaining;
|
||||||
LIST_HEAD(migratepages);
|
LIST_HEAD(migratepages);
|
||||||
|
|
||||||
|
@ -1610,20 +1609,16 @@ int migrate_misplaced_page(struct page *page, int node)
|
||||||
* Don't migrate pages that are mapped in multiple processes.
|
* Don't migrate pages that are mapped in multiple processes.
|
||||||
* TODO: Handle false sharing detection instead of this hammer
|
* TODO: Handle false sharing detection instead of this hammer
|
||||||
*/
|
*/
|
||||||
if (page_mapcount(page) != 1) {
|
if (page_mapcount(page) != 1)
|
||||||
put_page(page);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Rate-limit the amount of data that is being migrated to a node.
|
* Rate-limit the amount of data that is being migrated to a node.
|
||||||
* Optimal placement is no good if the memory bus is saturated and
|
* Optimal placement is no good if the memory bus is saturated and
|
||||||
* all the time is being spent migrating!
|
* all the time is being spent migrating!
|
||||||
*/
|
*/
|
||||||
if (numamigrate_update_ratelimit(pgdat, 1)) {
|
if (numamigrate_update_ratelimit(pgdat, 1))
|
||||||
put_page(page);
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
|
|
||||||
isolated = numamigrate_isolate_page(pgdat, page);
|
isolated = numamigrate_isolate_page(pgdat, page);
|
||||||
if (!isolated)
|
if (!isolated)
|
||||||
|
@ -1640,12 +1635,19 @@ int migrate_misplaced_page(struct page *page, int node)
|
||||||
} else
|
} else
|
||||||
count_vm_numa_event(NUMA_PAGE_MIGRATE);
|
count_vm_numa_event(NUMA_PAGE_MIGRATE);
|
||||||
BUG_ON(!list_empty(&migratepages));
|
BUG_ON(!list_empty(&migratepages));
|
||||||
out:
|
|
||||||
return isolated;
|
return isolated;
|
||||||
|
|
||||||
|
out:
|
||||||
|
put_page(page);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_NUMA_BALANCING */
|
#endif /* CONFIG_NUMA_BALANCING */
|
||||||
|
|
||||||
#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
#if defined(CONFIG_NUMA_BALANCING) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
|
||||||
|
/*
|
||||||
|
* Migrates a THP to a given target node. page must be locked and is unlocked
|
||||||
|
* before returning.
|
||||||
|
*/
|
||||||
int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||||
struct vm_area_struct *vma,
|
struct vm_area_struct *vma,
|
||||||
pmd_t *pmd, pmd_t entry,
|
pmd_t *pmd, pmd_t entry,
|
||||||
|
@ -1676,29 +1678,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||||
|
|
||||||
new_page = alloc_pages_node(node,
|
new_page = alloc_pages_node(node,
|
||||||
(GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
|
(GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER);
|
||||||
if (!new_page) {
|
if (!new_page)
|
||||||
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
goto out_fail;
|
||||||
goto out_dropref;
|
|
||||||
}
|
|
||||||
page_xchg_last_nid(new_page, page_last_nid(page));
|
page_xchg_last_nid(new_page, page_last_nid(page));
|
||||||
|
|
||||||
isolated = numamigrate_isolate_page(pgdat, page);
|
isolated = numamigrate_isolate_page(pgdat, page);
|
||||||
|
if (!isolated) {
|
||||||
/*
|
|
||||||
* Failing to isolate or a GUP pin prevents migration. The expected
|
|
||||||
* page count is 2. 1 for anonymous pages without a mapping and 1
|
|
||||||
* for the callers pin. If the page was isolated, the page will
|
|
||||||
* need to be put back on the LRU.
|
|
||||||
*/
|
|
||||||
if (!isolated || page_count(page) != 2) {
|
|
||||||
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
|
||||||
put_page(new_page);
|
put_page(new_page);
|
||||||
if (isolated) {
|
goto out_fail;
|
||||||
putback_lru_page(page);
|
|
||||||
isolated = 0;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
goto out_keep_locked;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prepare a page as a migration target */
|
/* Prepare a page as a migration target */
|
||||||
|
@ -1730,6 +1718,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
|
||||||
putback_lru_page(page);
|
putback_lru_page(page);
|
||||||
|
|
||||||
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
||||||
|
isolated = 0;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1774,9 +1763,11 @@ out:
|
||||||
-HPAGE_PMD_NR);
|
-HPAGE_PMD_NR);
|
||||||
return isolated;
|
return isolated;
|
||||||
|
|
||||||
|
out_fail:
|
||||||
|
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
|
||||||
out_dropref:
|
out_dropref:
|
||||||
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
out_keep_locked:
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_NUMA_BALANCING */
|
#endif /* CONFIG_NUMA_BALANCING */
|
||||||
|
|
Loading…
Add table
Reference in a new issue