From a50b854e073cd3335bbbada8dcff83a857297dd7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 23 Sep 2019 15:34:25 -0700 Subject: mm: introduce page_size() Patch series "Make working with compound pages easier", v2. These three patches add three helpers and convert the appropriate places to use them. This patch (of 3): It's unnecessarily hard to find out the size of a potentially huge page. Replace 'PAGE_SIZE << compound_order(page)' with page_size(page). Link: http://lkml.kernel.org/r/20190721104612.19120-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Acked-by: Michal Hocko Reviewed-by: Andrew Morton Reviewed-by: Ira Weiny Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/flush.c | 3 +-- arch/arm64/mm/flush.c | 3 +-- arch/ia64/mm/init.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6ecbda87ee46..4c7ebe094a83 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -204,8 +204,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) * coherent with the kernels mapping. */ if (!PageHighMem(page)) { - size_t page_size = PAGE_SIZE << compound_order(page); - __cpuc_flush_dcache_area(page_address(page), page_size); + __cpuc_flush_dcache_area(page_address(page), page_size(page)); } else { unsigned long i; if (cache_is_vipt_nonaliasing()) { diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index dc19300309d2..ac485163a4a7 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -56,8 +56,7 @@ void __sync_icache_dcache(pte_t pte) struct page *page = pte_page(pte); if (!test_and_set_bit(PG_dcache_clean, &page->flags)) - sync_icache_aliases(page_address(page), - PAGE_SIZE << compound_order(page)); + sync_icache_aliases(page_address(page), page_size(page)); } EXPORT_SYMBOL_GPL(__sync_icache_dcache); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 678b98a09c85..bf9df2625bc8 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -64,7 +64,7 @@ __ia64_sync_icache_dcache (pte_t pte) if (test_bit(PG_arch_1, &page->flags)) return; /* i-cache is already coherent with d-cache */ - flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page))); + flush_icache_range(addr, addr + page_size(page)); set_bit(PG_arch_1, &page->flags); /* mark page as clean */ } -- cgit v1.2.3 From 94ad9338109fe9d0b8a4a16828719dd6dcaee4c2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 23 Sep 2019 15:34:28 -0700 Subject: mm: introduce page_shift() Replace PAGE_SHIFT + compound_order(page) with the new page_shift() function. Minor improvements in readability. [akpm@linux-foundation.org: fix build in tce_page_is_contained()] Link: http://lkml.kernel.org/r/201907241853.yNQTrJWd%25lkp@intel.com Link: http://lkml.kernel.org/r/20190721104612.19120-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Andrew Morton Reviewed-by: Ira Weiny Acked-by: Kirill A. Shutemov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/book3s64/iommu_api.c | 7 ++----- drivers/vfio/vfio_iommu_spapr_tce.c | 8 ++++---- include/linux/mm.h | 6 ++++++ 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index b056cae3388b..56cc84520577 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -129,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, * Allow to use larger than 64k IOMMU pages. Only do that * if we are backed by hugetlb. */ - if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) { - struct page *head = compound_head(page); - - pageshift = compound_order(head) + PAGE_SHIFT; - } + if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) + pageshift = page_shift(compound_head(page)); mem->pageshift = min(mem->pageshift, pageshift); /* * We don't need struct page reference any more, switch diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 3b18fa4d090a..26cef65b41e7 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -176,13 +176,13 @@ put_exit: } static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa, - unsigned int page_shift) + unsigned int it_page_shift) { struct page *page; unsigned long size = 0; - if (mm_iommu_is_devmem(mm, hpa, page_shift, &size)) - return size == (1UL << page_shift); + if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size)) + return size == (1UL << it_page_shift); page = pfn_to_page(hpa >> PAGE_SHIFT); /* @@ -190,7 +190,7 @@ static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa, * a page we just found. Otherwise the hardware can get access to * a bigger memory chunk that it should. */ - return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; + return page_shift(compound_head(page)) >= it_page_shift; } static inline bool tce_groups_attached(struct tce_container *container) diff --git a/include/linux/mm.h b/include/linux/mm.h index d46d5585e2a2..9238548bdec5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -811,6 +811,12 @@ static inline unsigned long page_size(struct page *page) return PAGE_SIZE << compound_order(page); } +/* Returns the number of bits needed for the number of bytes in a page */ +static inline unsigned int page_shift(struct page *page) +{ + return PAGE_SHIFT + compound_order(page); +} + void free_compound_page(struct page *page); #ifdef CONFIG_MMU -- cgit v1.2.3 From d8c6546b1aea843fbeb4d54a1202f1adda6504be Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 23 Sep 2019 15:34:30 -0700 Subject: mm: introduce compound_nr() Replace 1 << compound_order(page) with compound_nr(page). Minor improvements in readability. Link: http://lkml.kernel.org/r/20190721104612.19120-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Andrew Morton Reviewed-by: Ira Weiny Acked-by: Kirill A. Shutemov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/flush.c | 4 ++-- arch/powerpc/mm/hugetlbpage.c | 2 +- fs/proc/task_mmu.c | 2 +- include/linux/mm.h | 6 ++++++ mm/compaction.c | 2 +- mm/filemap.c | 2 +- mm/gup.c | 2 +- mm/hugetlb_cgroup.c | 2 +- mm/kasan/common.c | 2 +- mm/memcontrol.c | 4 ++-- mm/memory_hotplug.c | 4 ++-- mm/migrate.c | 2 +- mm/page_alloc.c | 2 +- mm/rmap.c | 3 +-- mm/shmem.c | 8 ++++---- mm/swap_state.c | 2 +- mm/util.c | 2 +- mm/vmscan.c | 4 ++-- 18 files changed, 30 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 4c7ebe094a83..6d89db7895d1 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -208,13 +208,13 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) } else { unsigned long i; if (cache_is_vipt_nonaliasing()) { - for (i = 0; i < (1 << compound_order(page)); i++) { + for (i = 0; i < compound_nr(page); i++) { void *addr = kmap_atomic(page + i); __cpuc_flush_dcache_area(addr, PAGE_SIZE); kunmap_atomic(addr); } } else { - for (i = 0; i < (1 << compound_order(page)); i++) { + for (i = 0; i < compound_nr(page); i++) { void *addr = kmap_high_get(page + i); if (addr) { __cpuc_flush_dcache_area(addr, PAGE_SIZE); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a8953f108808..73d4873fc7f8 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -667,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page) BUG_ON(!PageCompound(page)); - for (i = 0; i < (1UL << compound_order(page)); i++) { + for (i = 0; i < compound_nr(page); i++) { if (!PageHighMem(page)) { __flush_dcache_icache(page_address(page+i)); } else { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index bf43d1d60059..ea1630465474 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -461,7 +461,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, static void smaps_account(struct mem_size_stats *mss, struct page *page, bool compound, bool young, bool dirty, bool locked) { - int i, nr = compound ? 1 << compound_order(page) : 1; + int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; /* diff --git a/include/linux/mm.h b/include/linux/mm.h index 9238548bdec5..69b7314c8d24 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -805,6 +805,12 @@ static inline void set_compound_order(struct page *page, unsigned int order) page[1].compound_order = order; } +/* Returns the number of pages in this potentially compound page. */ +static inline unsigned long compound_nr(struct page *page) +{ + return 1UL << compound_order(page); +} + /* Returns the number of bytes in this potentially compound page. */ static inline unsigned long page_size(struct page *page) { diff --git a/mm/compaction.c b/mm/compaction.c index 952dc2fb24e5..777c088e9113 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -969,7 +969,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * is safe to read and it's 0 for tail pages. */ if (unlikely(PageCompound(page))) { - low_pfn += (1UL << compound_order(page)) - 1; + low_pfn += compound_nr(page) - 1; goto isolate_fail; } } diff --git a/mm/filemap.c b/mm/filemap.c index 40667c2f3383..5f30aedd7363 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -126,7 +126,7 @@ static void page_cache_delete(struct address_space *mapping, /* hugetlb pages are represented by a single entry in the xarray */ if (!PageHuge(page)) { xas_set_order(&xas, page->index, compound_order(page)); - nr = 1U << compound_order(page); + nr = compound_nr(page); } VM_BUG_ON_PAGE(!PageLocked(page), page); diff --git a/mm/gup.c b/mm/gup.c index 98f13ab37bac..84a36d80dd2e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1460,7 +1460,7 @@ check_again: * gup may start from a tail page. Advance step by the left * part. */ - step = (1 << compound_order(head)) - (pages[i] - head); + step = compound_nr(head) - (pages[i] - head); /* * If we get a page from the CMA zone, since we are going to * be pinning these entries, we might as well move them out diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 68c2f2f3c05b..f1930fa0b445 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -139,7 +139,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, if (!page_hcg || page_hcg != h_cg) goto out; - nr_pages = 1 << compound_order(page); + nr_pages = compound_nr(page); if (!parent) { parent = root_h_cgroup; /* root has no limit */ diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 307631d9c62b..6814d6d6a023 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -336,7 +336,7 @@ void kasan_poison_slab(struct page *page) { unsigned long i; - for (i = 0; i < (1 << compound_order(page)); i++) + for (i = 0; i < compound_nr(page); i++) page_kasan_tag_reset(page + i); kasan_poison_shadow(page_address(page), page_size(page), KASAN_KMALLOC_REDZONE); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f3c15bb07cce..6c6032c03d1d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6511,7 +6511,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) unsigned int nr_pages = 1; if (PageTransHuge(page)) { - nr_pages <<= compound_order(page); + nr_pages = compound_nr(page); ug->nr_huge += nr_pages; } if (PageAnon(page)) @@ -6523,7 +6523,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) } ug->pgpgout++; } else { - ug->nr_kmem += 1 << compound_order(page); + ug->nr_kmem += compound_nr(page); __ClearPageKmemcg(page); } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c73f09913165..5f2c83ce9fde 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1309,7 +1309,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end) head = compound_head(page); if (page_huge_active(head)) return pfn; - skip = (1 << compound_order(head)) - (page - head); + skip = compound_nr(head) - (page - head); pfn += skip - 1; } return 0; @@ -1347,7 +1347,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (PageHuge(page)) { struct page *head = compound_head(page); - pfn = page_to_pfn(head) + (1<i_pages, index, compound_order(page)); unsigned long i = 0; - unsigned long nr = 1UL << compound_order(page); + unsigned long nr = compound_nr(page); VM_BUG_ON_PAGE(PageTail(page), page); VM_BUG_ON_PAGE(index != round_down(index, nr), page); @@ -1884,7 +1884,7 @@ alloc_nohuge: lru_cache_add_anon(page); spin_lock_irq(&info->lock); - info->alloced += 1 << compound_order(page); + info->alloced += compound_nr(page); inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); shmem_recalc_inode(inode); spin_unlock_irq(&info->lock); @@ -1925,7 +1925,7 @@ clear: struct page *head = compound_head(page); int i; - for (i = 0; i < (1 << compound_order(head)); i++) { + for (i = 0; i < compound_nr(head); i++) { clear_highpage(head + i); flush_dcache_page(head + i); } @@ -1952,7 +1952,7 @@ clear: * Error recovery. */ unacct: - shmem_inode_unacct_blocks(inode, 1 << compound_order(page)); + shmem_inode_unacct_blocks(inode, compound_nr(page)); if (PageTransHuge(page)) { unlock_page(page); diff --git a/mm/swap_state.c b/mm/swap_state.c index 8368621a0fc7..f844af5f09ba 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -116,7 +116,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp) struct address_space *address_space = swap_address_space(entry); pgoff_t idx = swp_offset(entry); XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page)); - unsigned long i, nr = 1UL << compound_order(page); + unsigned long i, nr = compound_nr(page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageSwapCache(page), page); diff --git a/mm/util.c b/mm/util.c index e6351a80f248..bab284d69c8c 100644 --- a/mm/util.c +++ b/mm/util.c @@ -521,7 +521,7 @@ bool page_mapped(struct page *page) return true; if (PageHuge(page)) return false; - for (i = 0; i < (1 << compound_order(page)); i++) { + for (i = 0; i < compound_nr(page); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; } diff --git a/mm/vmscan.c b/mm/vmscan.c index a6c5d0b28321..8e03427cb64f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1149,7 +1149,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, VM_BUG_ON_PAGE(PageActive(page), page); - nr_pages = 1 << compound_order(page); + nr_pages = compound_nr(page); /* Account the number of base pages even though THP */ sc->nr_scanned += nr_pages; @@ -1705,7 +1705,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, VM_BUG_ON_PAGE(!PageLRU(page), page); - nr_pages = 1 << compound_order(page); + nr_pages = compound_nr(page); total_scan += nr_pages; if (page_zonenum(page) > sc->reclaim_idx) { -- cgit v1.2.3 From 13224794cb0832caa403ad583d8605202cabc6bc Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Mon, 23 Sep 2019 15:35:19 -0700 Subject: mm: remove quicklist page table caches Patch series "mm: remove quicklist page table caches". A while ago Nicholas proposed to remove quicklist page table caches [1]. I've rebased his patch on the curren upstream and switched ia64 and sh to use generic versions of PTE allocation. [1] https://lore.kernel.org/linux-mm/20190711030339.20892-1-npiggin@gmail.com This patch (of 3): Remove page table allocator "quicklists". These have been around for a long time, but have not got much traction in the last decade and are only used on ia64 and sh architectures. The numbers in the initial commit look interesting but probably don't apply anymore. If anybody wants to resurrect this it's in the git history, but it's unhelpful to have this code and divergent allocator behaviour for minor archs. Also it might be better to instead make more general improvements to page allocator if this is still so slow. Link: http://lkml.kernel.org/r/1565250728-21721-2-git-send-email-rppt@linux.ibm.com Signed-off-by: Nicholas Piggin Signed-off-by: Mike Rapoport Cc: Tony Luck Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 2 - arch/arc/include/asm/pgalloc.h | 1 - arch/arm/include/asm/pgalloc.h | 2 - arch/arm64/include/asm/pgalloc.h | 2 - arch/csky/include/asm/pgalloc.h | 2 - arch/hexagon/include/asm/pgalloc.h | 2 - arch/ia64/Kconfig | 4 -- arch/ia64/include/asm/pgalloc.h | 32 ++++------- arch/m68k/include/asm/pgtable_mm.h | 2 - arch/m68k/include/asm/pgtable_no.h | 2 - arch/microblaze/include/asm/pgalloc.h | 89 +++-------------------------- arch/microblaze/mm/pgtable.c | 4 -- arch/mips/include/asm/pgalloc.h | 2 - arch/nds32/include/asm/pgalloc.h | 2 - arch/nios2/include/asm/pgalloc.h | 2 - arch/openrisc/include/asm/pgalloc.h | 2 - arch/parisc/include/asm/pgalloc.h | 2 - arch/powerpc/include/asm/pgalloc.h | 2 - arch/riscv/include/asm/pgalloc.h | 4 -- arch/s390/include/asm/pgtable.h | 1 - arch/sh/include/asm/pgalloc.h | 22 ++------ arch/sh/mm/Kconfig | 3 - arch/sparc/include/asm/pgalloc_32.h | 2 - arch/sparc/include/asm/pgalloc_64.h | 2 - arch/sparc/mm/init_32.c | 1 - arch/um/include/asm/pgalloc.h | 2 - arch/unicore32/include/asm/pgalloc.h | 2 - arch/x86/include/asm/pgtable_32.h | 1 - arch/x86/include/asm/pgtable_64.h | 1 - arch/xtensa/include/asm/tlbflush.h | 3 - fs/proc/meminfo.c | 4 -- include/asm-generic/pgalloc.h | 5 -- include/linux/quicklist.h | 94 ------------------------------- kernel/sched/idle.c | 1 - lib/show_mem.c | 5 -- mm/Kconfig | 5 -- mm/Makefile | 1 - mm/mmu_gather.c | 2 - mm/quicklist.c | 103 ---------------------------------- 39 files changed, 25 insertions(+), 395 deletions(-) delete mode 100644 include/linux/quicklist.h delete mode 100644 mm/quicklist.c (limited to 'arch') diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index 71ded3b7d82d..eb91f1e85629 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -53,6 +53,4 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) free_page((unsigned long)pmd); } -#define check_pgt_cache() do { } while (0) - #endif /* _ALPHA_PGALLOC_H */ diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 9bdb8ed5b0db..4751f2251cd9 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -129,7 +129,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) -#define check_pgt_cache() do { } while (0) #define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) #endif /* _ASM_ARC_PGALLOC_H */ diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index a2a68b751971..069da393110c 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -15,8 +15,6 @@ #include #include -#define check_pgt_cache() do { } while (0) - #ifdef CONFIG_MMU #define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER)) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 14d0bc44d451..172d76fa0245 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -15,8 +15,6 @@ #include /* for pte_{alloc,free}_one */ -#define check_pgt_cache() do { } while (0) - #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index 98c5716708d6..d089113fe41f 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -75,8 +75,6 @@ do { \ tlb_remove_page(tlb, pte); \ } while (0) -#define check_pgt_cache() do {} while (0) - extern void pagetable_init(void); extern void pre_mmu_init(void); extern void pre_trap_init(void); diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index d6544dc71258..5a6e79e7926d 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -13,8 +13,6 @@ #include /* for pte_{alloc,free}_one */ -#define check_pgt_cache() do {} while (0) - extern unsigned long long kmap_generation; /* diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 685a3df126ca..16714477eef4 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -72,10 +72,6 @@ config 64BIT config ZONE_DMA32 def_bool y -config QUICKLIST - bool - default y - config MMU bool default y diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index c9e481023c25..b03d993f5d7e 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -19,18 +19,17 @@ #include #include #include -#include #include static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { - quicklist_free(0, NULL, pgd); + free_page((unsigned long)pgd); } #if CONFIG_PGTABLE_LEVELS == 4 @@ -42,12 +41,12 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline void pud_free(struct mm_struct *mm, pud_t *pud) { - quicklist_free(0, NULL, pud); + free_page((unsigned long)pud); } #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) #endif /* CONFIG_PGTABLE_LEVELS == 4 */ @@ -60,12 +59,12 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return (pmd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { - quicklist_free(0, NULL, pmd); + free_page((unsigned long)pmd); } #define __pmd_free_tlb(tlb, pmd, address) pmd_free((tlb)->mm, pmd) @@ -86,14 +85,12 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { struct page *page; - void *pg; - pg = quicklist_alloc(0, GFP_KERNEL, NULL); - if (!pg) + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) return NULL; - page = virt_to_page(pg); if (!pgtable_page_ctor(page)) { - quicklist_free(0, NULL, pg); + __free_page(page); return NULL; } return page; @@ -101,23 +98,18 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - return quicklist_alloc(0, GFP_KERNEL, NULL); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { pgtable_page_dtor(pte); - quicklist_free_page(0, NULL, pte); + __free_page(pte); } static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - quicklist_free(0, NULL, pte); -} - -static inline void check_pgt_cache(void) -{ - quicklist_trim(0, NULL, 25, 16); + free_page((unsigned long)pte); } #define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index fde4534b974f..cc476c1d72e5 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -181,6 +181,4 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot); */ #define pgtable_cache_init() do { } while (0) -#define check_pgt_cache() do { } while (0) - #endif /* _M68K_PGTABLE_H */ diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index fc3a96c77bd8..69e271101223 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -60,6 +60,4 @@ extern void paging_init(void); #include -#define check_pgt_cache() do { } while (0) - #endif /* _M68KNOMMU_PGTABLE_H */ diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index f4cc9ffc449e..ac0731881751 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -21,83 +21,20 @@ #include #include -#define PGDIR_ORDER 0 - -/* - * This is handled very differently on MicroBlaze since out page tables - * are all 0's and I want to be able to use these zero'd pages elsewhere - * as well - it gives us quite a speedup. - * -- Cort - */ -extern struct pgtable_cache_struct { - unsigned long *pgd_cache; - unsigned long *pte_cache; - unsigned long pgtable_cache_sz; -} quicklists; - -#define pgd_quicklist (quicklists.pgd_cache) -#define pmd_quicklist ((unsigned long *)0) -#define pte_quicklist (quicklists.pte_cache) -#define pgtable_cache_size (quicklists.pgtable_cache_sz) - -extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */ -extern atomic_t zero_sz; /* # currently pre-zero'd pages */ -extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */ -extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */ -extern atomic_t zerototal; /* # pages zero'd over time */ - -#define zero_quicklist (zero_cache) -#define zero_cache_sz (zero_sz) -#define zero_cache_calls (zeropage_calls) -#define zero_cache_hits (zeropage_hits) -#define zero_cache_total (zerototal) - -/* - * return a pre-zero'd page from the list, - * return NULL if none available -- Cort - */ -extern unsigned long get_zero_page_fast(void); - extern void __bad_pte(pmd_t *pmd); -static inline pgd_t *get_pgd_slow(void) +static inline pgd_t *get_pgd(void) { - pgd_t *ret; - - ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER); - if (ret != NULL) - clear_page(ret); - return ret; + return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0); } -static inline pgd_t *get_pgd_fast(void) -{ - unsigned long *ret; - - ret = pgd_quicklist; - if (ret != NULL) { - pgd_quicklist = (unsigned long *)(*ret); - ret[0] = 0; - pgtable_cache_size--; - } else - ret = (unsigned long *)get_pgd_slow(); - return (pgd_t *)ret; -} - -static inline void free_pgd_fast(pgd_t *pgd) -{ - *(unsigned long **)pgd = pgd_quicklist; - pgd_quicklist = (unsigned long *) pgd; - pgtable_cache_size++; -} - -static inline void free_pgd_slow(pgd_t *pgd) +static inline void free_pgd(pgd_t *pgd) { free_page((unsigned long)pgd); } -#define pgd_free(mm, pgd) free_pgd_fast(pgd) -#define pgd_alloc(mm) get_pgd_fast() +#define pgd_free(mm, pgd) free_pgd(pgd) +#define pgd_alloc(mm) get_pgd() #define pmd_pgtable(pmd) pmd_page(pmd) @@ -115,15 +52,14 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm) struct page *ptepage; #ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_HIGHMEM; + int flags = GFP_KERNEL | __GFP_ZERO | __GFP_HIGHMEM; #else - int flags = GFP_KERNEL; + int flags = GFP_KERNEL | __GFP_ZERO; #endif ptepage = alloc_pages(flags, 0); if (!ptepage) return NULL; - clear_highpage(ptepage); if (!pgtable_page_ctor(ptepage)) { __free_page(ptepage); return NULL; @@ -131,13 +67,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm) return ptepage; } -static inline void pte_free_fast(pte_t *pte) -{ - *(unsigned long **)pte = pte_quicklist; - pte_quicklist = (unsigned long *) pte; - pgtable_cache_size++; -} - static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { free_page((unsigned long)pte); @@ -171,10 +100,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *ptepage) #define __pmd_free_tlb(tlb, x, addr) pmd_free((tlb)->mm, x) #define pgd_populate(mm, pmd, pte) BUG() -extern int do_check_pgt_cache(int, int); - #endif /* CONFIG_MMU */ -#define check_pgt_cache() do { } while (0) - #endif /* _ASM_MICROBLAZE_PGALLOC_H */ diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 8fe54fda31dc..010bb9cee2e4 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -44,10 +44,6 @@ unsigned long ioremap_base; unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); -#ifndef CONFIG_SMP -struct pgtable_cache_struct quicklists; -#endif - static void __iomem *__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) { diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index aa16b85ddffc..aa73cb187a07 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -105,8 +105,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) #endif /* __PAGETABLE_PUD_FOLDED */ -#define check_pgt_cache() do { } while (0) - extern void pagetable_init(void); #endif /* _ASM_PGALLOC_H */ diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h index e78b43d8389f..37125e6884d7 100644 --- a/arch/nds32/include/asm/pgalloc.h +++ b/arch/nds32/include/asm/pgalloc.h @@ -23,8 +23,6 @@ extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t * pgd); -#define check_pgt_cache() do { } while (0) - static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { pgtable_t pte; diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h index 4bc8cf72067e..750d18d5980b 100644 --- a/arch/nios2/include/asm/pgalloc.h +++ b/arch/nios2/include/asm/pgalloc.h @@ -45,6 +45,4 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) tlb_remove_page((tlb), (pte)); \ } while (0) -#define check_pgt_cache() do { } while (0) - #endif /* _ASM_NIOS2_PGALLOC_H */ diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 3d4b397c2d06..787c1b9d2f6d 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -101,6 +101,4 @@ do { \ #define pmd_pgtable(pmd) pmd_page(pmd) -#define check_pgt_cache() do { } while (0) - #endif diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index 4f2059a50fae..d98647c29b74 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -124,6 +124,4 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) pmd_populate_kernel(mm, pmd, page_address(pte_page)) #define pmd_pgtable(pmd) pmd_page(pmd) -#define check_pgt_cache() do { } while (0) - #endif diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index 2b2c60a1a66d..6dd78a2dc03a 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -64,8 +64,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) extern struct kmem_cache *pgtable_cache[]; #define PGT_CACHE(shift) pgtable_cache[shift] -static inline void check_pgt_cache(void) { } - #ifdef CONFIG_PPC_BOOK3S #include #else diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 56a67d66f72f..f66a00d8cb19 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -82,8 +82,4 @@ do { \ tlb_remove_page((tlb), pte); \ } while (0) -static inline void check_pgt_cache(void) -{ -} - #endif /* _ASM_RISCV_PGALLOC_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 0c4600725fc2..8f59454ac407 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1686,7 +1686,6 @@ extern void s390_reset_cmma(struct mm_struct *mm); * No page table caches to initialise */ static inline void pgtable_cache_init(void) { } -static inline void check_pgt_cache(void) { } #include diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index b56f908b1395..9e15054858b4 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -2,11 +2,8 @@ #ifndef __ASM_SH_PGALLOC_H #define __ASM_SH_PGALLOC_H -#include #include -#define QUICK_PT 0 /* Other page table pages that are zero on free */ - extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); @@ -34,20 +31,18 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, */ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { - return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); + return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { struct page *page; - void *pg; - pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL); - if (!pg) + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) return NULL; - page = virt_to_page(pg); if (!pgtable_page_ctor(page)) { - quicklist_free(QUICK_PT, NULL, pg); + __free_page(page); return NULL; } return page; @@ -55,13 +50,13 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm) static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - quicklist_free(QUICK_PT, NULL, pte); + free_page((unsigned long)pte); } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { pgtable_page_dtor(pte); - quicklist_free_page(QUICK_PT, NULL, pte); + __free_page(pte); } #define __pte_free_tlb(tlb,pte,addr) \ @@ -79,9 +74,4 @@ do { \ } while (0); #endif -static inline void check_pgt_cache(void) -{ - quicklist_trim(QUICK_PT, NULL, 25, 16); -} - #endif /* __ASM_SH_PGALLOC_H */ diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index 02ed2df25a54..5c8a2ebfc720 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -1,9 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 menu "Memory management options" -config QUICKLIST - def_bool y - config MMU bool "Support for memory management hardware" depends on !CPU_SH2 diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h index 282be50a4adf..10538a4d1a1e 100644 --- a/arch/sparc/include/asm/pgalloc_32.h +++ b/arch/sparc/include/asm/pgalloc_32.h @@ -17,8 +17,6 @@ void srmmu_free_nocache(void *addr, int size); extern struct resource sparc_iomap; -#define check_pgt_cache() do { } while (0) - pgd_t *get_pgd_fast(void); static inline void free_pgd_fast(pgd_t *pgd) { diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h index 48abccba4991..9d3e5cc95bbb 100644 --- a/arch/sparc/include/asm/pgalloc_64.h +++ b/arch/sparc/include/asm/pgalloc_64.h @@ -69,8 +69,6 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage); #define pmd_populate(MM, PMD, PTE) pmd_set(MM, PMD, PTE) #define pmd_pgtable(PMD) ((pte_t *)__pmd_page(PMD)) -#define check_pgt_cache() do { } while (0) - void pgtable_free(void *table, bool is_page); #ifdef CONFIG_SMP diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index 046ab116cc8c..906eda1158b4 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -31,7 +31,6 @@ #include #include #include -#include /* bug in asm-generic/tlb.h: check_pgt_cache */ #include #include #include diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 023599c3fa51..446e0c0f4018 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -43,7 +43,5 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x)) #endif -#define check_pgt_cache() do { } while (0) - #endif diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 3f0903bd98e9..ba1c9a79993b 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -18,8 +18,6 @@ #define __HAVE_ARCH_PTE_ALLOC_ONE #include -#define check_pgt_cache() do { } while (0) - #define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_PRESENT) #define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_PRESENT) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index c78da8eda8f2..b9b9f8aa963e 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -30,7 +30,6 @@ extern pgd_t initial_page_table[1024]; extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } -static inline void check_pgt_cache(void) { } void paging_init(void); void sync_initial_page_table(void); diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 4990d26dfc73..a26d2d58b9c9 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -242,7 +242,6 @@ extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define pgtable_cache_init() do { } while (0) -#define check_pgt_cache() do { } while (0) #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 diff --git a/arch/xtensa/include/asm/tlbflush.h b/arch/xtensa/include/asm/tlbflush.h index 06875feb27c2..856e2da2e397 100644 --- a/arch/xtensa/include/asm/tlbflush.h +++ b/arch/xtensa/include/asm/tlbflush.h @@ -160,9 +160,6 @@ static inline void invalidate_dtlb_mapping (unsigned address) invalidate_dtlb_entry(tlb_entry); } -#define check_pgt_cache() do { } while (0) - - /* * DO NOT USE THESE FUNCTIONS. These instructions aren't part of the Xtensa * ISA and exist only for test purposes.. diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 465ea0153b2a..4bd80e68eb03 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_zone_page_state(NR_KERNEL_STACK_KB)); show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); -#ifdef CONFIG_QUICKLIST - show_val_kb(m, "Quicklists: ", quicklist_total_size()); -#endif show_val_kb(m, "NFS_Unstable: ", global_node_page_state(NR_UNSTABLE_NFS)); diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 8476175c07e7..6f8cc06ee44e 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -102,11 +102,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page) __free_page(pte_page); } -#else /* CONFIG_MMU */ - -/* This is enough for a nommu architecture */ -#define check_pgt_cache() do { } while (0) - #endif /* CONFIG_MMU */ #endif /* __ASM_GENERIC_PGALLOC_H */ diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h deleted file mode 100644 index 034982c98c8b..000000000000 --- a/include/linux/quicklist.h +++ /dev/null @@ -1,94 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_QUICKLIST_H -#define LINUX_QUICKLIST_H -/* - * Fast allocations and disposal of pages. Pages must be in the condition - * as needed after allocation when they are freed. Per cpu lists of pages - * are kept that only contain node local pages. - * - * (C) 2007, SGI. Christoph Lameter - */ -#include -#include -#include - -#ifdef CONFIG_QUICKLIST - -struct quicklist { - void *page; - int nr_pages; -}; - -DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; - -/* - * The two key functions quicklist_alloc and quicklist_free are inline so - * that they may be custom compiled for the platform. - * Specifying a NULL ctor can remove constructor support. Specifying - * a constant quicklist allows the determination of the exact address - * in the per cpu area. - * - * The fast patch in quicklist_alloc touched only a per cpu cacheline and - * the first cacheline of the page itself. There is minmal overhead involved. - */ -static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *)) -{ - struct quicklist *q; - void **p = NULL; - - q =&get_cpu_var(quicklist)[nr]; - p = q->page; - if (likely(p)) { - q->page = p[0]; - p[0] = NULL; - q->nr_pages--; - } - put_cpu_var(quicklist); - if (likely(p)) - return p; - - p = (void *)__get_free_page(flags | __GFP_ZERO); - if (ctor && p) - ctor(p); - return p; -} - -static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p, - struct page *page) -{ - struct quicklist *q; - - q = &get_cpu_var(quicklist)[nr]; - *(void **)p = q->page; - q->page = p; - q->nr_pages++; - put_cpu_var(quicklist); -} - -static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp) -{ - __quicklist_free(nr, dtor, pp, virt_to_page(pp)); -} - -static inline void quicklist_free_page(int nr, void (*dtor)(void *), - struct page *page) -{ - __quicklist_free(nr, dtor, page_address(page), page); -} - -void quicklist_trim(int nr, void (*dtor)(void *), - unsigned long min_pages, unsigned long max_free); - -unsigned long quicklist_total_size(void); - -#else - -static inline unsigned long quicklist_total_size(void) -{ - return 0; -} - -#endif - -#endif /* LINUX_QUICKLIST_H */ - diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index c892c6280c9f..8dad5aa600ea 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -238,7 +238,6 @@ static void do_idle(void) tick_nohz_idle_enter(); while (!need_resched()) { - check_pgt_cache(); rmb(); local_irq_disable(); diff --git a/lib/show_mem.c b/lib/show_mem.c index 5c86ef4c899f..1c26c14ffbb9 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -6,7 +6,6 @@ */ #include -#include #include void show_mem(unsigned int filter, nodemask_t *nodemask) @@ -39,10 +38,6 @@ void show_mem(unsigned int filter, nodemask_t *nodemask) #ifdef CONFIG_CMA printk("%lu pages cma reserved\n", totalcma_pages); #endif -#ifdef CONFIG_QUICKLIST - printk("%lu pages in pagetable cache\n", - quicklist_total_size()); -#endif #ifdef CONFIG_MEMORY_FAILURE printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); #endif diff --git a/mm/Kconfig b/mm/Kconfig index 2fe4902ad755..f88be1cbcfb2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -273,11 +273,6 @@ config BOUNCE by default when ZONE_DMA or HIGHMEM is selected, but you may say n to override this. -config NR_QUICK - int - depends on QUICKLIST - default "1" - config VIRT_TO_BUS bool help diff --git a/mm/Makefile b/mm/Makefile index d0b295c3b764..d11de59e9c3c 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -72,7 +72,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_MEMTEST) += memtest.o obj-$(CONFIG_MIGRATION) += migrate.o -obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o obj-$(CONFIG_PAGE_COUNTER) += page_counter.o obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 8c943a6e1696..7d70e5c78f97 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -271,8 +271,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb, tlb_flush_mmu(tlb); - /* keep the page table cache within bounds */ - check_pgt_cache(); #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER tlb_batch_list_free(tlb); #endif diff --git a/mm/quicklist.c b/mm/quicklist.c deleted file mode 100644 index 5e98ac78e410..000000000000 --- a/mm/quicklist.c +++ /dev/null @@ -1,103 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Quicklist support. - * - * Quicklists are light weight lists of pages that have a defined state - * on alloc and free. Pages must be in the quicklist specific defined state - * (zero by default) when the page is freed. It seems that the initial idea - * for such lists first came from Dave Miller and then various other people - * improved on it. - * - * Copyright (C) 2007 SGI, - * Christoph Lameter - * Generalized, added support for multiple lists and - * constructors / destructors. - */ -#include - -#include -#include -#include -#include - -DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist); - -#define FRACTION_OF_NODE_MEM 16 - -static unsigned long max_pages(unsigned long min_pages) -{ - unsigned long node_free_pages, max; - int node = numa_node_id(); - struct zone *zones = NODE_DATA(node)->node_zones; - int num_cpus_on_node; - - node_free_pages = -#ifdef CONFIG_ZONE_DMA - zone_page_state(&zones[ZONE_DMA], NR_FREE_PAGES) + -#endif -#ifdef CONFIG_ZONE_DMA32 - zone_page_state(&zones[ZONE_DMA32], NR_FREE_PAGES) + -#endif - zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES); - - max = node_free_pages / FRACTION_OF_NODE_MEM; - - num_cpus_on_node = cpumask_weight(cpumask_of_node(node)); - max /= num_cpus_on_node; - - return max(max, min_pages); -} - -static long min_pages_to_free(struct quicklist *q, - unsigned long min_pages, long max_free) -{ - long pages_to_free; - - pages_to_free = q->nr_pages - max_pages(min_pages); - - return min(pages_to_free, max_free); -} - -/* - * Trim down the number of pages in the quicklist - */ -void quicklist_trim(int nr, void (*dtor)(void *), - unsigned long min_pages, unsigned long max_free) -{ - long pages_to_free; - struct quicklist *q; - - q = &get_cpu_var(quicklist)[nr]; - if (q->nr_pages > min_pages) { - pages_to_free = min_pages_to_free(q, min_pages, max_free); - - while (pages_to_free > 0) { - /* - * We pass a gfp_t of 0 to quicklist_alloc here - * because we will never call into the page allocator. - */ - void *p = quicklist_alloc(nr, 0, NULL); - - if (dtor) - dtor(p); - free_page((unsigned long)p); - pages_to_free--; - } - } - put_cpu_var(quicklist); -} - -unsigned long quicklist_total_size(void) -{ - unsigned long count = 0; - int cpu; - struct quicklist *ql, *q; - - for_each_online_cpu(cpu) { - ql = per_cpu(quicklist, cpu); - for (q = ql; q < ql + CONFIG_NR_QUICK; q++) - count += q->nr_pages; - } - return count; -} - -- cgit v1.2.3 From 013199211c8bfe9cad9312f5083f0d5a576cf74a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 23 Sep 2019 15:35:22 -0700 Subject: ia64: switch to generic version of pte allocation The ia64 implementation pte_alloc_one(), pte_alloc_one_kernel(), pte_free_kernel() and pte_free() is identical to the generic except of lack of __GFP_ACCOUNT for the user PTEs allocation. Switch ia64 to use generic version of these functions. Link: http://lkml.kernel.org/r/1565250728-21721-3-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/pgalloc.h | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index b03d993f5d7e..f4c491044882 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -20,6 +20,8 @@ #include #include +#include + #include static inline pgd_t *pgd_alloc(struct mm_struct *mm) @@ -82,36 +84,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) pmd_val(*pmd_entry) = __pa(pte); } -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *page; - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - #define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) #endif /* _ASM_IA64_PGALLOC_H */ -- cgit v1.2.3 From 6fb12766f7fcd7934b09fbfbd32a725cc2febf96 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 23 Sep 2019 15:35:25 -0700 Subject: sh: switch to generic version of pte allocation The sh implementation pte_alloc_one(), pte_alloc_one_kernel(), pte_free_kernel() and pte_free() is identical to the generic except of lack of __GFP_ACCOUNT for the user PTEs allocation. Switch sh to use generic version of these functions. Link: http://lkml.kernel.org/r/1565250728-21721-4-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/asm/pgalloc.h | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 9e15054858b4..8c6341a4d807 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -3,6 +3,7 @@ #define __ASM_SH_PGALLOC_H #include +#include extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); @@ -26,39 +27,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, } #define pmd_pgtable(pmd) pmd_page(pmd) -/* - * Allocate and free page tables. - */ -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *page; - - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_page_dtor(pte); - __free_page(pte); -} - #define __pte_free_tlb(tlb,pte,addr) \ do { \ pgtable_page_dtor(pte); \ -- cgit v1.2.3 From 1b9a9d8564cbc49e1e5529460ca70bf7353aa4d1 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 23 Sep 2019 15:35:28 -0700 Subject: microblaze: switch to generic version of pte allocation The microblaze implementation of pte_alloc_one() has a provision to allocated PTEs from high memory, but neither CONFIG_HIGHPTE nor pte_map*() versions for suitable for HIGHPTE are defined. Except that, microblaze version of pte_alloc_one() is identical to the generic one as well as the implementations of pte_free() and pte_free_kernel(). Switch microblaze to use the generic versions of these functions. Also remove pte_free_slow() that is not referenced anywhere in the code. Link: http://lkml.kernel.org/r/1565690952-32158-1-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Mark Rutland Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/pgalloc.h | 39 +++-------------------------------- 1 file changed, 3 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index ac0731881751..7ecb05baa601 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -21,6 +21,9 @@ #include #include +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#include + extern void __bad_pte(pmd_t *pmd); static inline pgd_t *get_pgd(void) @@ -47,42 +50,6 @@ static inline void free_pgd(pgd_t *pgd) extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *ptepage; - -#ifdef CONFIG_HIGHPTE - int flags = GFP_KERNEL | __GFP_ZERO | __GFP_HIGHMEM; -#else - int flags = GFP_KERNEL | __GFP_ZERO; -#endif - - ptepage = alloc_pages(flags, 0); - if (!ptepage) - return NULL; - if (!pgtable_page_ctor(ptepage)) { - __free_page(ptepage); - return NULL; - } - return ptepage; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free_slow(struct page *ptepage) -{ - __free_page(ptepage); -} - -static inline void pte_free(struct mm_struct *mm, struct page *ptepage) -{ - pgtable_page_dtor(ptepage); - __free_page(ptepage); -} - #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, (pte)) #define pmd_populate(mm, pmd, pte) \ -- cgit v1.2.3 From 782de70c42930baae55234f3df0dc90774924447 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 23 Sep 2019 15:35:31 -0700 Subject: mm: consolidate pgtable_cache_init() and pgd_cache_init() Both pgtable_cache_init() and pgd_cache_init() are used to initialize kmem cache for page table allocations on several architectures that do not use PAGE_SIZE tables for one or more levels of the page table hierarchy. Most architectures do not implement these functions and use __weak default NOP implementation of pgd_cache_init(). Since there is no such default for pgtable_cache_init(), its empty stub is duplicated among most architectures. Rename the definitions of pgd_cache_init() to pgtable_cache_init() and drop empty stubs of pgtable_cache_init(). Link: http://lkml.kernel.org/r/1566457046-22637-1-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Acked-by: Will Deacon [arm64] Acked-by: Thomas Gleixner [x86] Cc: Catalin Marinas Cc: Ingo Molnar Cc: Borislav Petkov Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 5 ----- arch/arc/include/asm/pgtable.h | 5 ----- arch/arm/include/asm/pgtable-nommu.h | 5 ----- arch/arm/include/asm/pgtable.h | 2 -- arch/arm64/include/asm/pgtable.h | 2 -- arch/arm64/mm/pgd.c | 2 +- arch/c6x/include/asm/pgtable.h | 5 ----- arch/csky/include/asm/pgtable.h | 5 ----- arch/h8300/include/asm/pgtable.h | 6 ------ arch/hexagon/include/asm/pgtable.h | 3 --- arch/hexagon/mm/Makefile | 2 +- arch/hexagon/mm/pgalloc.c | 10 ---------- arch/ia64/include/asm/pgtable.h | 5 ----- arch/m68k/include/asm/pgtable_mm.h | 5 ----- arch/m68k/include/asm/pgtable_no.h | 5 ----- arch/microblaze/include/asm/pgtable.h | 7 ------- arch/mips/include/asm/pgtable.h | 5 ----- arch/nds32/include/asm/pgtable.h | 2 -- arch/nios2/include/asm/pgtable.h | 2 -- arch/openrisc/include/asm/pgtable.h | 5 ----- arch/parisc/include/asm/pgtable.h | 2 -- arch/powerpc/include/asm/pgtable.h | 1 - arch/riscv/include/asm/pgtable.h | 5 ----- arch/s390/include/asm/pgtable.h | 5 ----- arch/sh/include/asm/pgtable.h | 5 ----- arch/sh/mm/nommu.c | 4 ---- arch/sparc/include/asm/pgtable_32.h | 5 ----- arch/sparc/include/asm/pgtable_64.h | 1 - arch/um/include/asm/pgtable.h | 2 -- arch/unicore32/include/asm/pgtable.h | 2 -- arch/x86/include/asm/pgtable_32.h | 1 - arch/x86/include/asm/pgtable_64.h | 2 -- arch/x86/mm/pgtable.c | 6 +----- arch/xtensa/include/asm/pgtable.h | 1 - include/asm-generic/pgtable.h | 2 +- init/main.c | 3 +-- 36 files changed, 5 insertions(+), 130 deletions(-) delete mode 100644 arch/hexagon/mm/pgalloc.c (limited to 'arch') diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 89c2032f9960..065b57f408c3 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -359,11 +359,6 @@ extern void paging_init(void); #include -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - /* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ #define HAVE_ARCH_UNMAPPED_AREA diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 1d87c18a2976..7addd0301c51 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -395,11 +395,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* to cope with aliasing VIPT cache */ #define HAVE_ARCH_UNMAPPED_AREA -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index d0de24f06724..010fa1a35a68 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -70,11 +70,6 @@ typedef pte_t *pte_addr_t; */ extern unsigned int kobjsize(const void *objp); -/* - * No page table caches to initialise. - */ -#define pgtable_cache_init() do { } while (0) - /* * All 32bit addresses are effectively valid for vmalloc... * Sort of meaningless for non-VM targets. diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index f2e990dc27e7..3ae120cd1715 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -368,8 +368,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#define pgtable_cache_init() do { } while (0) - #endif /* !__ASSEMBLY__ */ #endif /* CONFIG_MMU */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 57427d17580e..7576df00eb50 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -861,8 +861,6 @@ extern int kern_addr_valid(unsigned long addr); #include -static inline void pgtable_cache_init(void) { } - /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 7548f9ca1f11..4a64089e5771 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -35,7 +35,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } -void __init pgd_cache_init(void) +void __init pgtable_cache_init(void) { if (PGD_SIZE == PAGE_SIZE) return; diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h index 0bd805964ea6..0b6919c00413 100644 --- a/arch/c6x/include/asm/pgtable.h +++ b/arch/c6x/include/asm/pgtable.h @@ -59,11 +59,6 @@ extern unsigned long empty_zero_page; #define swapper_pg_dir ((pgd_t *) 0) -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - /* * c6x is !MMU, so define the simpliest implementation */ diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index c429a6f347de..0040b3a05b61 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -296,11 +296,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ #define kern_addr_valid(addr) (1) -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do {} while (0) - #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h index a99caa49d265..4d00152fab58 100644 --- a/arch/h8300/include/asm/pgtable.h +++ b/arch/h8300/include/asm/pgtable.h @@ -4,7 +4,6 @@ #define __ARCH_USE_5LEVEL_HACK #include #include -#define pgtable_cache_init() do { } while (0) extern void paging_init(void); #define PAGE_NONE __pgprot(0) /* these mean nothing to NO_MM */ #define PAGE_SHARED __pgprot(0) /* these mean nothing to NO_MM */ @@ -34,11 +33,6 @@ static inline int pte_file(pte_t pte) { return 0; } extern unsigned int kobjsize(const void *objp); extern int is_in_rom(unsigned long); -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - /* * All 32bit addresses are effectively valid for vmalloc... * Sort of meaningless for non-VM targets. diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index a3ff6d24c09e..2fec20ad939e 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -431,9 +431,6 @@ static inline int pte_exec(pte_t pte) #define __pte_offset(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) -/* I think this is in case we have page table caches; needed by init/main.c */ -#define pgtable_cache_init() do { } while (0) - /* * Swap/file PTE definitions. If _PAGE_PRESENT is zero, the rest of the PTE is * interpreted as swap information. The remaining free bits are interpreted as diff --git a/arch/hexagon/mm/Makefile b/arch/hexagon/mm/Makefile index 1894263ae5bc..893838499591 100644 --- a/arch/hexagon/mm/Makefile +++ b/arch/hexagon/mm/Makefile @@ -3,5 +3,5 @@ # Makefile for Hexagon memory management subsystem # -obj-y := init.o pgalloc.o ioremap.o uaccess.o vm_fault.o cache.o +obj-y := init.o ioremap.o uaccess.o vm_fault.o cache.o obj-y += copy_to_user.o copy_from_user.o strnlen_user.o vm_tlb.o diff --git a/arch/hexagon/mm/pgalloc.c b/arch/hexagon/mm/pgalloc.c deleted file mode 100644 index 4d4316140237..000000000000 --- a/arch/hexagon/mm/pgalloc.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#include - -void __init pgtable_cache_init(void) -{ -} diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index b1e7468eb65a..d602e7c622db 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -566,11 +566,6 @@ extern struct page *zero_page_memmap_ptr; #define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M #define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - /* These tell get_user_pages() that the first gate page is accessible from user-level. */ #define FIXADDR_USER_START GATE_ADDR #ifdef HAVE_BUGGY_SEGREL diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index cc476c1d72e5..646c174fff99 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -176,9 +176,4 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot); #include #endif /* !__ASSEMBLY__ */ -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - #endif /* _M68K_PGTABLE_H */ diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h index 69e271101223..c18165b0d904 100644 --- a/arch/m68k/include/asm/pgtable_no.h +++ b/arch/m68k/include/asm/pgtable_no.h @@ -44,11 +44,6 @@ extern void paging_init(void); */ #define ZERO_PAGE(vaddr) (virt_to_page(0)) -/* - * No page table caches to initialise. - */ -#define pgtable_cache_init() do { } while (0) - /* * All 32bit addresses are effectively valid for vmalloc... * Sort of meaningless for non-VM targets. diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 142d3f004848..954b69af451f 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -46,8 +46,6 @@ extern int mem_init_done; #define swapper_pg_dir ((pgd_t *) NULL) -#define pgtable_cache_init() do {} while (0) - #define arch_enter_lazy_cpu_mode() do {} while (0) #define pgprot_noncached_wc(prot) prot @@ -526,11 +524,6 @@ extern unsigned long iopa(unsigned long addr); /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ #define kern_addr_valid(addr) (1) -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - void do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code); diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 4dca733d5076..f85bd5b15f51 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -661,9 +661,4 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - #endif /* _ASM_PGTABLE_H */ diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index c70cc56bec09..0588ec99725c 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -403,8 +403,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; * into virtual address `from' */ -#define pgtable_cache_init() do { } while (0) - #endif /* !__ASSEMBLY__ */ #endif /* _ASMNDS32_PGTABLE_H */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 95237b7f6fc1..99985d8b7166 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -291,8 +291,6 @@ static inline void pte_clear(struct mm_struct *mm, #include -#define pgtable_cache_init() do { } while (0) - extern void __init paging_init(void); extern void __init mmu_init(void); diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 2fe9ff5b5d6f..248d22d8faa7 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -443,11 +443,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #include -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - typedef pte_t *pte_addr_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 6d58c1739b42..4ac374b3a99f 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -132,8 +132,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define PTRS_PER_PTE (1UL << BITS_PER_PTE) /* Definitions for 2nd level */ -#define pgtable_cache_init() do { } while (0) - #define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 8b7865a2d576..4053b2ab427c 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -87,7 +87,6 @@ extern unsigned long ioremap_bot; unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned int shift); -void pgtable_cache_init(void); #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32) void mark_initmem_nx(void); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 80905b27ee98..c60123f018f5 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -424,11 +424,6 @@ extern void *dtb_early_va; extern void setup_bootmem(void); extern void paging_init(void); -static inline void pgtable_cache_init(void) -{ - /* No page table caches to initialize */ -} - #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 8f59454ac407..36c578c0ff96 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1682,11 +1682,6 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -/* - * No page table caches to initialise - */ -static inline void pgtable_cache_init(void) { } - #include #endif /* _S390_PAGE_H */ diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index 9085d1142fa3..cbd0f3c55a0c 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -123,11 +123,6 @@ typedef pte_t *pte_addr_t; #define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT))) -/* - * Initialise the page table caches - */ -extern void pgtable_cache_init(void); - struct vm_area_struct; struct mm_struct; diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c index cc779a90d917..dca946f426c6 100644 --- a/arch/sh/mm/nommu.c +++ b/arch/sh/mm/nommu.c @@ -97,7 +97,3 @@ void __init page_table_range_init(unsigned long start, unsigned long end, void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot) { } - -void pgtable_cache_init(void) -{ -} diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 4eebed6c6781..31da44826645 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -445,9 +445,4 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma, /* We provide our own get_unmapped_area to cope with VA holes for userland */ #define HAVE_ARCH_UNMAPPED_AREA -/* - * No page table caches to initialise - */ -#define pgtable_cache_init() do { } while (0) - #endif /* !(_SPARC_PGTABLE_H) */ diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 1599de730532..b57f9c631eca 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -1135,7 +1135,6 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long, unsigned long); #define HAVE_ARCH_FB_UNMAPPED_AREA -void pgtable_cache_init(void); void sun4v_register_fault_status(void); void sun4v_ktsb_register(void); void __init cheetah_ecache_flush_init(void); diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index e4d3ed980d82..36a44d58f373 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -32,8 +32,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* zero page used for uninitialized stuff */ extern unsigned long *empty_zero_page; -#define pgtable_cache_init() do ; while (0) - /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 126e961a8cb0..c8f7ba12f309 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -285,8 +285,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; #include -#define pgtable_cache_init() do { } while (0) - #endif /* !__ASSEMBLY__ */ #endif /* __UNICORE_PGTABLE_H__ */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b9b9f8aa963e..0dca7f7aeff2 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -29,7 +29,6 @@ extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; extern pmd_t initial_pg_pmd[]; -static inline void pgtable_cache_init(void) { } void paging_init(void); void sync_initial_page_table(void); diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index a26d2d58b9c9..0b6c4042942a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -241,8 +241,6 @@ extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN -#define pgtable_cache_init() do { } while (0) - #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 44816ff6411f..463940faf52f 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -357,7 +357,7 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm, static struct kmem_cache *pgd_cache; -void __init pgd_cache_init(void) +void __init pgtable_cache_init(void) { /* * When PAE kernel is running as a Xen domain, it does not use @@ -402,10 +402,6 @@ static inline void _pgd_free(pgd_t *pgd) } #else -void __init pgd_cache_init(void) -{ -} - static inline pgd_t *_pgd_alloc(void) { return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER, diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index ce3ff5e591b9..3f7fe5a8c286 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -238,7 +238,6 @@ extern void paging_init(void); # define swapper_pg_dir NULL static inline void paging_init(void) { } #endif -static inline void pgtable_cache_init(void) { } /* * The pmd contains the kernel virtual address of the pte page. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 75d9d68a6de7..fae6abb3d586 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1126,7 +1126,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, static inline void init_espfix_bsp(void) { } #endif -extern void __init pgd_cache_init(void); +extern void __init pgtable_cache_init(void); #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot) diff --git a/init/main.c b/init/main.c index 3ca67e8b92fd..99a5f55e0d02 100644 --- a/init/main.c +++ b/init/main.c @@ -507,7 +507,7 @@ void __init __weak mem_encrypt_init(void) { } void __init __weak poking_init(void) { } -void __init __weak pgd_cache_init(void) { } +void __init __weak pgtable_cache_init(void) { } bool initcall_debug; core_param(initcall_debug, initcall_debug, bool, 0644); @@ -565,7 +565,6 @@ static void __init mm_init(void) init_espfix_bsp(); /* Should be run after espfix64 is set up. */ pti_init(); - pgd_cache_init(); } void __init __weak arch_call_rest_init(void) -- cgit v1.2.3 From 9ef258bad7af2f6abee2bdfaaa6c41890d4f4db6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 23 Sep 2019 15:37:41 -0700 Subject: thp: update split_huge_page_pmd() comment According to 78ddc5347341 ("thp: rename split_huge_page_pmd() to split_huge_pmd()"), update related comment. Link: http://lkml.kernel.org/r/20190731033406.185285-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: "Kirill A . Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/book3s64/hash_utils.c | 2 +- include/asm-generic/pgtable.h | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 3410ea9f4de1..6c123760164e 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1748,7 +1748,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, /* * IF we try to do a HUGE PTE update after a withdraw is done. * we will find the below NULL. This happens when we do - * split_huge_page_pmd + * split_huge_pmd */ if (!hpte_slot_array) return; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index fae6abb3d586..818691846c90 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1002,9 +1002,8 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) * need this). If THP is not enabled, the pmd can't go away under the * code even if MADV_DONTNEED runs, but if THP is enabled we need to * run a pmd_trans_unstable before walking the ptes after - * split_huge_page_pmd returns (because it may have run when the pmd - * become null, but then a page fault can map in a THP and not a - * regular page). + * split_huge_pmd returns (because it may have run when the pmd become + * null, but then a page fault can map in a THP and not a regular page). */ static inline int pmd_trans_unstable(pmd_t *pmd) { -- cgit v1.2.3 From 28058ed61fc869d7e67916725a3f7e9de50e606b Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:38:40 -0700 Subject: arm64: make use of is_compat_task instead of hardcoding this test Each architecture has its own way to determine if a task is a compat task, by using is_compat_task in arch_mmap_rnd, it allows more genericity and then it prepares its moving to mm/. Link: http://lkml.kernel.org/r/20190730055113.23635-3-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Catalin Marinas Acked-by: Kees Cook Reviewed-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index b050641b5139..bb0140afed66 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -43,7 +43,7 @@ unsigned long arch_mmap_rnd(void) unsigned long rnd; #ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_32BIT)) + if (is_compat_task()) rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); else #endif -- cgit v1.2.3 From e8d54b62c55ab6201de6d195fc2c276294c1f6ae Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:38:43 -0700 Subject: arm64: consider stack randomization for mmap base only when necessary Do not offset mmap base address because of stack randomization if current task does not want randomization. Note that x86 already implements this behaviour. Link: http://lkml.kernel.org/r/20190730055113.23635-4-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Catalin Marinas Acked-by: Kees Cook Reviewed-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/mm/mmap.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index bb0140afed66..e4acaead67de 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -54,7 +54,11 @@ unsigned long arch_mmap_rnd(void) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { unsigned long gap = rlim_stack->rlim_cur; - unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap; + unsigned long pad = stack_guard_gap; + + /* Account for stack randomization if necessary */ + if (current->flags & PF_RANDOMIZE) + pad += (STACK_RND_MASK << PAGE_SHIFT); /* Values close to RLIM_INFINITY can overflow. */ if (gap + pad > gap) -- cgit v1.2.3 From 67f3977f805b34cf0e41090679800d2091d41d49 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:38:47 -0700 Subject: arm64, mm: move generic mmap layout functions to mm arm64 handles top-down mmap layout in a way that can be easily reused by other architectures, so make it available in mm. It then introduces a new config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT that can be set by other architectures to benefit from those functions. Note that this new config depends on MMU being enabled, if selected without MMU support, a warning will be thrown. Link: http://lkml.kernel.org/r/20190730055113.23635-5-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Suggested-by: Christoph Hellwig Acked-by: Catalin Marinas Acked-by: Kees Cook Reviewed-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 10 +++++ arch/arm64/Kconfig | 1 + arch/arm64/include/asm/processor.h | 2 - arch/arm64/mm/mmap.c | 76 ------------------------------------- kernel/sysctl.c | 6 ++- mm/util.c | 78 +++++++++++++++++++++++++++++++++++++- 6 files changed, 92 insertions(+), 81 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 0fcf8ec1e098..dfce421b8e8a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -706,6 +706,16 @@ config HAVE_ARCH_COMPAT_MMAP_BASES and vice-versa 32-bit applications to call 64-bit mmap(). Required for applications doing different bitness syscalls. +# This allows to use a set of generic functions to determine mmap base +# address by giving priority to top-down scheme only if the process +# is not in legacy mode (compat task, unlimited stack size or +# sysctl_legacy_va_layout). +# Architecture that selects this option can provide its own version of: +# - STACK_RND_MASK +config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + bool + depends on MMU + config HAVE_COPY_THREAD_TLS bool help diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 37c610963eee..2d4b7044063c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -71,6 +71,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index c67848c55009..5623685c7d13 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -280,8 +280,6 @@ static inline void spin_lock_prefetch(const void *ptr) "nop") : : "p" (ptr)); } -#define HAVE_ARCH_PICK_MMAP_LAYOUT - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index e4acaead67de..3028bacbc4e9 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -20,82 +20,6 @@ #include -/* - * Leave enough space between the mmap area and the stack to honour ulimit in - * the face of randomisation. - */ -#define MIN_GAP (SZ_128M) -#define MAX_GAP (STACK_TOP/6*5) - -static int mmap_is_legacy(struct rlimit *rlim_stack) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlim_stack->rlim_cur == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -unsigned long arch_mmap_rnd(void) -{ - unsigned long rnd; - -#ifdef CONFIG_COMPAT - if (is_compat_task()) - rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); - else -#endif - rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); - return rnd << PAGE_SHIFT; -} - -static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) -{ - unsigned long gap = rlim_stack->rlim_cur; - unsigned long pad = stack_guard_gap; - - /* Account for stack randomization if necessary */ - if (current->flags & PF_RANDOMIZE) - pad += (STACK_RND_MASK << PAGE_SHIFT); - - /* Values close to RLIM_INFINITY can overflow. */ - if (gap + pad > gap) - gap += pad; - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(STACK_TOP - gap - rnd); -} - -/* - * This function, called very early during the creation of a new process VM - * image, sets up which VM layout function to use: - */ -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) -{ - unsigned long random_factor = 0UL; - - if (current->flags & PF_RANDOMIZE) - random_factor = arch_mmap_rnd(); - - /* - * Fall back to the standard layout if the personality bit is set, or - * if the expected stack growth is unlimited: - */ - if (mmap_is_legacy(rlim_stack)) { - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - } else { - mm->mmap_base = mmap_base(random_factor, rlim_stack); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - } -} - /* * You really shouldn't be using read() or write() on /dev/mem. This might go * away in the future. diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 078950d9605b..00fcea236eba 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -264,7 +264,8 @@ extern struct ctl_table epoll_table[]; extern struct ctl_table firmware_config_table[]; #endif -#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT +#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ + defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) int sysctl_legacy_va_layout; #endif @@ -1573,7 +1574,8 @@ static struct ctl_table vm_table[] = { .proc_handler = proc_dointvec, .extra1 = SYSCTL_ZERO, }, -#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT +#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ + defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) { .procname = "legacy_va_layout", .data = &sysctl_legacy_va_layout, diff --git a/mm/util.c b/mm/util.c index bf8af5e07c4a..7922726f0a8f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -17,7 +17,12 @@ #include #include #include +#include +#include #include +#include +#include +#include #include @@ -315,7 +320,78 @@ unsigned long randomize_stack_top(unsigned long stack_top) #endif } -#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) +#ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT +#ifdef CONFIG_ARCH_HAS_ELF_RANDOMIZE +unsigned long arch_mmap_rnd(void) +{ + unsigned long rnd; + +#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS + if (is_compat_task()) + rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); + else +#endif /* CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS */ + rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); + + return rnd << PAGE_SHIFT; +} +#endif /* CONFIG_ARCH_HAS_ELF_RANDOMIZE */ + +static int mmap_is_legacy(struct rlimit *rlim_stack) +{ + if (current->personality & ADDR_COMPAT_LAYOUT) + return 1; + + if (rlim_stack->rlim_cur == RLIM_INFINITY) + return 1; + + return sysctl_legacy_va_layout; +} + +/* + * Leave enough space between the mmap area and the stack to honour ulimit in + * the face of randomisation. + */ +#define MIN_GAP (SZ_128M) +#define MAX_GAP (STACK_TOP / 6 * 5) + +static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) +{ + unsigned long gap = rlim_stack->rlim_cur; + unsigned long pad = stack_guard_gap; + + /* Account for stack randomization if necessary */ + if (current->flags & PF_RANDOMIZE) + pad += (STACK_RND_MASK << PAGE_SHIFT); + + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(STACK_TOP - gap - rnd); +} + +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) +{ + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) + random_factor = arch_mmap_rnd(); + + if (mmap_is_legacy(rlim_stack)) { + mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; + mm->get_unmapped_area = arch_get_unmapped_area; + } else { + mm->mmap_base = mmap_base(random_factor, rlim_stack); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + } +} +#elif defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { mm->mmap_base = TASK_UNMAPPED_BASE; -- cgit v1.2.3 From e7142bf5d231f3ccdf6ea6764d5080999b8e299d Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:38:50 -0700 Subject: arm64, mm: make randomization selected by generic topdown mmap layout This commits selects ARCH_HAS_ELF_RANDOMIZE when an arch uses the generic topdown mmap layout functions so that this security feature is on by default. Note that this commit also removes the possibility for arm64 to have elf randomization and no MMU: without MMU, the security added by randomization is worth nothing. Link: http://lkml.kernel.org/r/20190730055113.23635-6-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Catalin Marinas Acked-by: Kees Cook Reviewed-by: Christoph Hellwig Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 1 + arch/arm64/Kconfig | 1 - arch/arm64/kernel/process.c | 8 -------- mm/util.c | 11 +++++++++-- 4 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index dfce421b8e8a..5f8a5d84dbbe 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -715,6 +715,7 @@ config HAVE_ARCH_COMPAT_MMAP_BASES config ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT bool depends on MMU + select ARCH_HAS_ELF_RANDOMIZE config HAVE_COPY_THREAD_TLS bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2d4b7044063c..866e05882799 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -15,7 +15,6 @@ config ARM64 select ARCH_HAS_DMA_COHERENT_TO_PFN select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI - select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 03689c0beb34..a47462def04b 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -557,14 +557,6 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ~0xf; } -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - if (is_compat_task()) - return randomize_page(mm->brk, SZ_32M); - else - return randomize_page(mm->brk, SZ_1G); -} - /* * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY. */ diff --git a/mm/util.c b/mm/util.c index 7922726f0a8f..3ad6db9a722e 100644 --- a/mm/util.c +++ b/mm/util.c @@ -321,7 +321,15 @@ unsigned long randomize_stack_top(unsigned long stack_top) } #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT -#ifdef CONFIG_ARCH_HAS_ELF_RANDOMIZE +unsigned long arch_randomize_brk(struct mm_struct *mm) +{ + /* Is the current task 32bit ? */ + if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) + return randomize_page(mm->brk, SZ_32M); + + return randomize_page(mm->brk, SZ_1G); +} + unsigned long arch_mmap_rnd(void) { unsigned long rnd; @@ -335,7 +343,6 @@ unsigned long arch_mmap_rnd(void) return rnd << PAGE_SHIFT; } -#endif /* CONFIG_ARCH_HAS_ELF_RANDOMIZE */ static int mmap_is_legacy(struct rlimit *rlim_stack) { -- cgit v1.2.3 From af0f4297286f13a75edf93677b1fb2fc16c412a7 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:38:54 -0700 Subject: arm: properly account for stack randomization and stack guard gap This commit takes care of stack randomization and stack guard gap when computing mmap base address and checks if the task asked for randomization. This fixes the problem uncovered and not fixed for arm here: https://lkml.kernel.org/r/20170622200033.25714-1-riel@redhat.com Link: http://lkml.kernel.org/r/20190730055113.23635-7-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Kees Cook Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/mmap.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index f866870db749..bff3d00bda5b 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -18,8 +18,9 @@ (((pgoff)<> (PAGE_SHIFT - 12)) static int mmap_is_legacy(struct rlimit *rlim_stack) { @@ -35,6 +36,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { unsigned long gap = rlim_stack->rlim_cur; + unsigned long pad = stack_guard_gap; + + /* Account for stack randomization if necessary */ + if (current->flags & PF_RANDOMIZE) + pad += (STACK_RND_MASK << PAGE_SHIFT); + + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; if (gap < MIN_GAP) gap = MIN_GAP; -- cgit v1.2.3 From 86e568e9c0525fc40e76d827212d5e9721cf7504 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:38:57 -0700 Subject: arm: use STACK_TOP when computing mmap base address mmap base address must be computed wrt stack top address, using TASK_SIZE is wrong since STACK_TOP and TASK_SIZE are not equivalent. Link: http://lkml.kernel.org/r/20190730055113.23635-8-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Kees Cook Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index bff3d00bda5b..0b94b674aa91 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -19,7 +19,7 @@ /* gap between mmap and stack */ #define MIN_GAP (128*1024*1024UL) -#define MAX_GAP ((TASK_SIZE)/6*5) +#define MAX_GAP ((STACK_TOP)/6*5) #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) static int mmap_is_legacy(struct rlimit *rlim_stack) @@ -51,7 +51,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - rnd); + return PAGE_ALIGN(STACK_TOP - gap - rnd); } /* -- cgit v1.2.3 From dba79c3df4a2275132759b0bc04c64b7a510af4a Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:39:01 -0700 Subject: arm: use generic mmap top-down layout and brk randomization arm uses a top-down mmap layout by default that exactly fits the generic functions, so get rid of arch specific code and use the generic version by selecting ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT. As ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT selects ARCH_HAS_ELF_RANDOMIZE, use the generic version of arch_randomize_brk since it also fits. Note that this commit also removes the possibility for arm to have elf randomization and no MMU: without MMU, the security added by randomization is worth nothing. Note that it is safe to remove STACK_RND_MASK since it matches the default value. Link: http://lkml.kernel.org/r/20190730055113.23635-9-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Kees Cook Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/Kconfig | 1 + arch/arm/include/asm/processor.h | 2 -- arch/arm/kernel/process.c | 5 ---- arch/arm/mm/mmap.c | 62 ---------------------------------------- 4 files changed, 1 insertion(+), 69 deletions(-) (limited to 'arch') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 229f2cdd81ca..8a50efb559f3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -34,6 +34,7 @@ config ARM select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select BUILDTIME_EXTABLE_SORT if MMU diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 20c2f42454b8..614bf829e454 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -140,8 +140,6 @@ static inline void prefetchw(const void *ptr) #endif #endif -#define HAVE_ARCH_PICK_MMAP_LAYOUT - #endif #endif /* __ASM_ARM_PROCESSOR_H */ diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f934a6739fc0..9485acc520a4 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -319,11 +319,6 @@ unsigned long get_wchan(struct task_struct *p) return 0; } -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - return randomize_page(mm->brk, 0x02000000); -} - #ifdef CONFIG_MMU #ifdef CONFIG_KUSER_HELPERS /* diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 0b94b674aa91..b8d912ac9e61 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -17,43 +17,6 @@ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<> (PAGE_SHIFT - 12)) - -static int mmap_is_legacy(struct rlimit *rlim_stack) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlim_stack->rlim_cur == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) -{ - unsigned long gap = rlim_stack->rlim_cur; - unsigned long pad = stack_guard_gap; - - /* Account for stack randomization if necessary */ - if (current->flags & PF_RANDOMIZE) - pad += (STACK_RND_MASK << PAGE_SHIFT); - - /* Values close to RLIM_INFINITY can overflow. */ - if (gap + pad > gap) - gap += pad; - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(STACK_TOP - gap - rnd); -} - /* * We need to ensure that shared mappings are correctly aligned to * avoid aliasing issues with VIPT caches. We need to ensure that @@ -181,31 +144,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } -unsigned long arch_mmap_rnd(void) -{ - unsigned long rnd; - - rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); - - return rnd << PAGE_SHIFT; -} - -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) -{ - unsigned long random_factor = 0UL; - - if (current->flags & PF_RANDOMIZE) - random_factor = arch_mmap_rnd(); - - if (mmap_is_legacy(rlim_stack)) { - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - } else { - mm->mmap_base = mmap_base(random_factor, rlim_stack); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - } -} - /* * You really shouldn't be using read() or write() on /dev/mem. This * might go away in the future. -- cgit v1.2.3 From b1f61b5bde3a1f50392c97b4c8513d1b8efb1cf2 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:39:04 -0700 Subject: mips: properly account for stack randomization and stack guard gap This commit takes care of stack randomization and stack guard gap when computing mmap base address and checks if the task asked for randomization. This fixes the problem uncovered and not fixed for arm here: https://lkml.kernel.org/r/20170622200033.25714-1-riel@redhat.com Link: http://lkml.kernel.org/r/20190730055113.23635-10-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Kees Cook Acked-by: Paul Burton Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/mmap.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index d79f2b432318..f5c778113384 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -21,8 +21,9 @@ unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); /* gap between mmap and stack */ -#define MIN_GAP (128*1024*1024UL) -#define MAX_GAP ((TASK_SIZE)/6*5) +#define MIN_GAP (128*1024*1024UL) +#define MAX_GAP ((TASK_SIZE)/6*5) +#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) static int mmap_is_legacy(struct rlimit *rlim_stack) { @@ -38,6 +39,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { unsigned long gap = rlim_stack->rlim_cur; + unsigned long pad = stack_guard_gap; + + /* Account for stack randomization if necessary */ + if (current->flags & PF_RANDOMIZE) + pad += (STACK_RND_MASK << PAGE_SHIFT); + + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; if (gap < MIN_GAP) gap = MIN_GAP; -- cgit v1.2.3 From b5fb861790bf54486b68644fc27d6969bf772dd8 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:39:07 -0700 Subject: mips: use STACK_TOP when computing mmap base address mmap base address must be computed wrt stack top address, using TASK_SIZE is wrong since STACK_TOP and TASK_SIZE are not equivalent. Link: http://lkml.kernel.org/r/20190730055113.23635-11-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Kees Cook Acked-by: Paul Burton Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index f5c778113384..a7e84b2e71d7 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -22,7 +22,7 @@ EXPORT_SYMBOL(shm_align_mask); /* gap between mmap and stack */ #define MIN_GAP (128*1024*1024UL) -#define MAX_GAP ((TASK_SIZE)/6*5) +#define MAX_GAP ((STACK_TOP)/6*5) #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) static int mmap_is_legacy(struct rlimit *rlim_stack) @@ -54,7 +54,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - rnd); + return PAGE_ALIGN(STACK_TOP - gap - rnd); } #define COLOUR_ALIGN(addr, pgoff) \ -- cgit v1.2.3 From e548599fbe310754aa8f687d53c24d9cb5338ac4 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:39:11 -0700 Subject: mips: adjust brk randomization offset to fit generic version This commit simply bumps up to 32MB and 1GB the random offset of brk, compared to 8MB and 256MB, for 32bit and 64bit respectively. Link: http://lkml.kernel.org/r/20190730055113.23635-12-alex@ghiti.fr Suggested-by: Kees Cook Signed-off-by: Alexandre Ghiti Acked-by: Paul Burton Reviewed-by: Kees Cook Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/mmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index a7e84b2e71d7..ff6ab87e9c56 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -16,6 +16,7 @@ #include #include #include +#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); @@ -189,11 +190,11 @@ static inline unsigned long brk_rnd(void) unsigned long rnd = get_random_long(); rnd = rnd << PAGE_SHIFT; - /* 8MB for 32bit, 256MB for 64bit */ + /* 32MB for 32bit, 1GB for 64bit */ if (TASK_IS_32BIT_ADDR) - rnd = rnd & 0x7ffffful; + rnd = rnd & (SZ_32M - 1); else - rnd = rnd & 0xffffffful; + rnd = rnd & (SZ_1G - 1); return rnd; } -- cgit v1.2.3 From 09036468c8d074b730a840657a896f81c1c92017 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:39:14 -0700 Subject: mips: replace arch specific way to determine 32bit task with generic version Mips uses TASK_IS_32BIT_ADDR to determine if a task is 32bit, but this define is mips specific and other arches do not have it: instead, use !IS_ENABLED(CONFIG_64BIT) || is_compat_task() condition. Link: http://lkml.kernel.org/r/20190730055113.23635-13-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Paul Burton Reviewed-by: Kees Cook Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/mmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index ff6ab87e9c56..d5106c26ac6a 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -17,6 +17,7 @@ #include #include #include +#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); @@ -191,7 +192,7 @@ static inline unsigned long brk_rnd(void) rnd = rnd << PAGE_SHIFT; /* 32MB for 32bit, 1GB for 64bit */ - if (TASK_IS_32BIT_ADDR) + if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) rnd = rnd & (SZ_32M - 1); else rnd = rnd & (SZ_1G - 1); -- cgit v1.2.3 From 9035bd29427921cd32d268a830aff78dcafb945b Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:39:18 -0700 Subject: mips: use generic mmap top-down layout and brk randomization mips uses a top-down layout by default that exactly fits the generic functions, so get rid of arch specific code and use the generic version by selecting ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT. As ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT selects ARCH_HAS_ELF_RANDOMIZE, use the generic version of arch_randomize_brk since it also fits. Note that this commit also removes the possibility for mips to have elf randomization and no MMU: without MMU, the security added by randomization is worth nothing. Link: http://lkml.kernel.org/r/20190730055113.23635-14-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Acked-by: Paul Burton Reviewed-by: Kees Cook Reviewed-by: Luis Chamberlain Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/Kconfig | 2 +- arch/mips/include/asm/processor.h | 5 -- arch/mips/mm/mmap.c | 96 --------------------------------------- 3 files changed, 1 insertion(+), 102 deletions(-) (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cc8e2b1032a5..a0bd9bdb5f83 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -5,7 +5,6 @@ config MIPS select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT select ARCH_CLOCKSOURCE_DATA - select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_SUPPORTS_UPROBES @@ -13,6 +12,7 @@ config MIPS select ARCH_USE_CMPXCHG_LOCKREF if 64BIT select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index aca909bd7841..fba18d4a9190 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -29,11 +29,6 @@ extern unsigned int vced_count, vcei_count; -/* - * MIPS does have an arch_pick_mmap_layout() - */ -#define HAVE_ARCH_PICK_MMAP_LAYOUT 1 - #ifdef CONFIG_32BIT #ifdef CONFIG_KVM_GUEST /* User space process size is limited to 1GB in KVM Guest Mode */ diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index d5106c26ac6a..00fe90c6db3e 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -16,49 +16,10 @@ #include #include #include -#include -#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); -/* gap between mmap and stack */ -#define MIN_GAP (128*1024*1024UL) -#define MAX_GAP ((STACK_TOP)/6*5) -#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) - -static int mmap_is_legacy(struct rlimit *rlim_stack) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlim_stack->rlim_cur == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) -{ - unsigned long gap = rlim_stack->rlim_cur; - unsigned long pad = stack_guard_gap; - - /* Account for stack randomization if necessary */ - if (current->flags & PF_RANDOMIZE) - pad += (STACK_RND_MASK << PAGE_SHIFT); - - /* Values close to RLIM_INFINITY can overflow. */ - if (gap + pad > gap) - gap += pad; - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(STACK_TOP - gap - rnd); -} - #define COLOUR_ALIGN(addr, pgoff) \ ((((addr) + shm_align_mask) & ~shm_align_mask) + \ (((pgoff) << PAGE_SHIFT) & shm_align_mask)) @@ -156,63 +117,6 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, addr0, len, pgoff, flags, DOWN); } -unsigned long arch_mmap_rnd(void) -{ - unsigned long rnd; - -#ifdef CONFIG_COMPAT - if (TASK_IS_32BIT_ADDR) - rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); - else -#endif /* CONFIG_COMPAT */ - rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); - - return rnd << PAGE_SHIFT; -} - -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) -{ - unsigned long random_factor = 0UL; - - if (current->flags & PF_RANDOMIZE) - random_factor = arch_mmap_rnd(); - - if (mmap_is_legacy(rlim_stack)) { - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - } else { - mm->mmap_base = mmap_base(random_factor, rlim_stack); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - } -} - -static inline unsigned long brk_rnd(void) -{ - unsigned long rnd = get_random_long(); - - rnd = rnd << PAGE_SHIFT; - /* 32MB for 32bit, 1GB for 64bit */ - if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) - rnd = rnd & (SZ_32M - 1); - else - rnd = rnd & (SZ_1G - 1); - - return rnd; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long base = mm->brk; - unsigned long ret; - - ret = PAGE_ALIGN(base + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - - return ret; -} - bool __virt_addr_valid(const volatile void *kaddr) { unsigned long vaddr = (unsigned long)kaddr; -- cgit v1.2.3 From 54c95a11cc1b5e1d578209e027adf5775395dafd Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 23 Sep 2019 15:39:21 -0700 Subject: riscv: make mmap allocation top-down by default In order to avoid wasting user address space by using bottom-up mmap allocation scheme, prefer top-down scheme when possible. Before: root@qemuriscv64:~# cat /proc/self/maps 00010000-00016000 r-xp 00000000 fe:00 6389 /bin/cat.coreutils 00016000-00017000 r--p 00005000 fe:00 6389 /bin/cat.coreutils 00017000-00018000 rw-p 00006000 fe:00 6389 /bin/cat.coreutils 00018000-00039000 rw-p 00000000 00:00 0 [heap] 1555556000-155556d000 r-xp 00000000 fe:00 7193 /lib/ld-2.28.so 155556d000-155556e000 r--p 00016000 fe:00 7193 /lib/ld-2.28.so 155556e000-155556f000 rw-p 00017000 fe:00 7193 /lib/ld-2.28.so 155556f000-1555570000 rw-p 00000000 00:00 0 1555570000-1555572000 r-xp 00000000 00:00 0 [vdso] 1555574000-1555576000 rw-p 00000000 00:00 0 1555576000-1555674000 r-xp 00000000 fe:00 7187 /lib/libc-2.28.so 1555674000-1555678000 r--p 000fd000 fe:00 7187 /lib/libc-2.28.so 1555678000-155567a000 rw-p 00101000 fe:00 7187 /lib/libc-2.28.so 155567a000-15556a0000 rw-p 00000000 00:00 0 3fffb90000-3fffbb1000 rw-p 00000000 00:00 0 [stack] After: root@qemuriscv64:~# cat /proc/self/maps 00010000-00016000 r-xp 00000000 fe:00 6389 /bin/cat.coreutils 00016000-00017000 r--p 00005000 fe:00 6389 /bin/cat.coreutils 00017000-00018000 rw-p 00006000 fe:00 6389 /bin/cat.coreutils 2de81000-2dea2000 rw-p 00000000 00:00 0 [heap] 3ff7eb6000-3ff7ed8000 rw-p 00000000 00:00 0 3ff7ed8000-3ff7fd6000 r-xp 00000000 fe:00 7187 /lib/libc-2.28.so 3ff7fd6000-3ff7fda000 r--p 000fd000 fe:00 7187 /lib/libc-2.28.so 3ff7fda000-3ff7fdc000 rw-p 00101000 fe:00 7187 /lib/libc-2.28.so 3ff7fdc000-3ff7fe2000 rw-p 00000000 00:00 0 3ff7fe4000-3ff7fe6000 r-xp 00000000 00:00 0 [vdso] 3ff7fe6000-3ff7ffd000 r-xp 00000000 fe:00 7193 /lib/ld-2.28.so 3ff7ffd000-3ff7ffe000 r--p 00016000 fe:00 7193 /lib/ld-2.28.so 3ff7ffe000-3ff7fff000 rw-p 00017000 fe:00 7193 /lib/ld-2.28.so 3ff7fff000-3ff8000000 rw-p 00000000 00:00 0 3fff888000-3fff8a9000 rw-p 00000000 00:00 0 [stack] [alex@ghiti.fr: v6] Link: http://lkml.kernel.org/r/20190808061756.19712-15-alex@ghiti.fr Link: http://lkml.kernel.org/r/20190730055113.23635-15-alex@ghiti.fr Signed-off-by: Alexandre Ghiti Reviewed-by: Christoph Hellwig Reviewed-by: Kees Cook Reviewed-by: Luis Chamberlain Acked-by: Paul Walmsley [arch/riscv] Cc: Albert Ou Cc: Alexander Viro Cc: Catalin Marinas Cc: Christoph Hellwig Cc: James Hogan Cc: Palmer Dabbelt Cc: Paul Burton Cc: Ralf Baechle Cc: Russell King Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/riscv/Kconfig | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 71d29fb4008a..8eebbc8860bb 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -59,6 +59,18 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select SPARSEMEM_STATIC if 32BIT + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU + select HAVE_ARCH_MMAP_RND_BITS + +config ARCH_MMAP_RND_BITS_MIN + default 18 if 64BIT + default 8 + +# max bits determined by the following formula: +# VA_BITS - PAGE_SHIFT - 3 +config ARCH_MMAP_RND_BITS_MAX + default 24 if 64BIT # SV39 based + default 17 config MMU def_bool y -- cgit v1.2.3