diff options
Diffstat (limited to 'arch/arm64/kernel/vdso.c')
-rw-r--r-- | arch/arm64/kernel/vdso.c | 374 |
1 files changed, 195 insertions, 179 deletions
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 354b11e27c07..89b6e7840002 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -18,6 +18,7 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/time_namespace.h> #include <linux/timekeeper_internal.h> #include <linux/vmalloc.h> #include <vdso/datapage.h> @@ -28,25 +29,18 @@ #include <asm/signal32.h> #include <asm/vdso.h> -extern char vdso_start[], vdso_end[]; -#ifdef CONFIG_COMPAT_VDSO -extern char vdso32_start[], vdso32_end[]; -#endif /* CONFIG_COMPAT_VDSO */ +enum vdso_abi { + VDSO_ABI_AA64, + VDSO_ABI_AA32, +}; -/* vdso_lookup arch_index */ -enum arch_vdso_type { - ARM64_VDSO = 0, -#ifdef CONFIG_COMPAT_VDSO - ARM64_VDSO32 = 1, -#endif /* CONFIG_COMPAT_VDSO */ +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, }; -#ifdef CONFIG_COMPAT_VDSO -#define VDSO_TYPES (ARM64_VDSO32 + 1) -#else -#define VDSO_TYPES (ARM64_VDSO + 1) -#endif /* CONFIG_COMPAT_VDSO */ -struct __vdso_abi { +struct vdso_abi_info { const char *name; const char *vdso_code_start; const char *vdso_code_end; @@ -57,14 +51,14 @@ struct __vdso_abi { struct vm_special_mapping *cm; }; -static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { - { +static struct vdso_abi_info vdso_info[] __ro_after_init = { + [VDSO_ABI_AA64] = { .name = "vdso", .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, }, #ifdef CONFIG_COMPAT_VDSO - { + [VDSO_ABI_AA32] = { .name = "vdso32", .vdso_code_start = vdso32_start, .vdso_code_end = vdso32_end, @@ -75,78 +69,133 @@ static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { /* * The vDSO data page. */ -static union { - struct vdso_data data[CS_BASES]; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; +static union vdso_data_store vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = vdso_data_store.data; -static int __vdso_remap(enum arch_vdso_type arch_index, - const struct vm_special_mapping *sm, - struct vm_area_struct *new_vma) +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) { - unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end - - vdso_lookup[arch_index].vdso_code_start; - - if (vdso_size != new_size) - return -EINVAL; - current->mm->context.vdso = (void *)new_vma->vm_start; return 0; } -static int __vdso_init(enum arch_vdso_type arch_index) +static int __init __vdso_init(enum vdso_abi abi) { int i; struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) { + if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } - vdso_lookup[arch_index].vdso_pages = ( - vdso_lookup[arch_index].vdso_code_end - - vdso_lookup[arch_index].vdso_code_start) >> + vdso_info[abi].vdso_pages = ( + vdso_info[abi].vdso_code_end - + vdso_info[abi].vdso_code_start) >> PAGE_SHIFT; - /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1, + vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages, sizeof(struct page *), GFP_KERNEL); if (vdso_pagelist == NULL) return -ENOMEM; - /* Grab the vDSO data page. */ - vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); + for (i = 0; i < vdso_info[abi].vdso_pages; i++) + vdso_pagelist[i] = pfn_to_page(pfn + i); - /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start); + vdso_info[abi].cm->pages = vdso_pagelist; + + return 0; +} + +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); - for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++) - vdso_pagelist[i + 1] = pfn_to_page(pfn + i); + mmap_read_lock(mm); - vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0]; - vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1]; + for_each_vma(vmi, vma) { + if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm)) + zap_vma_pages(vma); +#ifdef CONFIG_COMPAT_VDSO + if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm)) + zap_vma_pages(vma); +#endif + } + mmap_read_unlock(mm); return 0; } +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = sym_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } -static int __setup_additional_pages(enum arch_vdso_type arch_index, + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +static int __setup_additional_pages(enum vdso_abi abi, struct mm_struct *mm, struct linux_binprm *bprm, int uses_interp) { unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + unsigned long gp_flags = 0; void *ret; - vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT; + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + + vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = vdso_text_len + PAGE_SIZE; + vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE; vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { @@ -154,18 +203,21 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, goto up_fail; } - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, - vdso_lookup[arch_index].dm); + ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE, + VM_READ|VM_MAYREAD|VM_PFNMAP, + vdso_info[abi].dm); if (IS_ERR(ret)) goto up_fail; - vdso_base += PAGE_SIZE; + if (system_supports_bti_kernel()) + gp_flags = VM_ARM64_BTI; + + vdso_base += VVAR_NR_PAGES * PAGE_SIZE; mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|gp_flags| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_lookup[arch_index].cm); + vdso_info[abi].cm); if (IS_ERR(ret)) goto up_fail; @@ -180,52 +232,42 @@ up_fail: /* * Create and map the vectors page for AArch32 tasks. */ -#ifdef CONFIG_COMPAT_VDSO -static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, - struct vm_area_struct *new_vma) +enum aarch32_map { + AA32_MAP_VECTORS, /* kuser helpers */ + AA32_MAP_SIGPAGE, + AA32_MAP_VVAR, + AA32_MAP_VDSO, +}; + +static struct page *aarch32_vectors_page __ro_after_init; +static struct page *aarch32_sig_page __ro_after_init; + +static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) { - return __vdso_remap(ARM64_VDSO32, sm, new_vma); + current->mm->context.sigpage = (void *)new_vma->vm_start; + + return 0; } -#endif /* CONFIG_COMPAT_VDSO */ -/* - * aarch32_vdso_pages: - * 0 - kuser helpers - * 1 - sigreturn code - * or (CONFIG_COMPAT_VDSO): - * 0 - kuser helpers - * 1 - vdso data - * 2 - vdso code - */ -#define C_VECTORS 0 -#ifdef CONFIG_COMPAT_VDSO -#define C_VVAR 1 -#define C_VDSO 2 -#define C_PAGES (C_VDSO + 1) -#else -#define C_SIGPAGE 1 -#define C_PAGES (C_SIGPAGE + 1) -#endif /* CONFIG_COMPAT_VDSO */ -static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; -static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { - { +static struct vm_special_mapping aarch32_vdso_maps[] = { + [AA32_MAP_VECTORS] = { .name = "[vectors]", /* ABI */ - .pages = &aarch32_vdso_pages[C_VECTORS], + .pages = &aarch32_vectors_page, }, -#ifdef CONFIG_COMPAT_VDSO - { + [AA32_MAP_SIGPAGE] = { + .name = "[sigpage]", /* ABI */ + .pages = &aarch32_sig_page, + .mremap = aarch32_sigpage_mremap, + }, + [AA32_MAP_VVAR] = { .name = "[vvar]", + .fault = vvar_fault, }, - { + [AA32_MAP_VDSO] = { .name = "[vdso]", - .mremap = aarch32_vdso_mremap, - }, -#else - { - .name = "[sigpage]", /* ABI */ - .pages = &aarch32_vdso_pages[C_SIGPAGE], + .mremap = vdso_mremap, }, -#endif /* CONFIG_COMPAT_VDSO */ }; static int aarch32_alloc_kuser_vdso_page(void) @@ -237,69 +279,59 @@ static int aarch32_alloc_kuser_vdso_page(void) if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) return 0; - vdso_page = get_zeroed_page(GFP_ATOMIC); + vdso_page = get_zeroed_page(GFP_KERNEL); if (!vdso_page) return -ENOMEM; memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); - aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page); - flush_dcache_page(aarch32_vdso_pages[C_VECTORS]); + aarch32_vectors_page = virt_to_page((void *)vdso_page); return 0; } -#ifdef CONFIG_COMPAT_VDSO -static int __aarch32_alloc_vdso_pages(void) -{ - int ret; - - vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR]; - vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO]; - - ret = __vdso_init(ARM64_VDSO32); - if (ret) - return ret; - - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) { - unsigned long c_vvar = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VVAR]); - unsigned long c_vdso = - (unsigned long)page_to_virt(aarch32_vdso_pages[C_VDSO]); - - free_page(c_vvar); - free_page(c_vdso); - } - - return ret; -} -#else -static int __aarch32_alloc_vdso_pages(void) +#define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1 +static int aarch32_alloc_sigpage(void) { extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long sigpage; - int ret; + __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD); + void *sigpage; - sigpage = get_zeroed_page(GFP_ATOMIC); + sigpage = (void *)__get_free_page(GFP_KERNEL); if (!sigpage) return -ENOMEM; - memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); - aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage); - flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]); + memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison)); + memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz); + aarch32_sig_page = virt_to_page(sigpage); + return 0; +} - ret = aarch32_alloc_kuser_vdso_page(); - if (ret) - free_page(sigpage); +static int __init __aarch32_alloc_vdso_pages(void) +{ - return ret; + if (!IS_ENABLED(CONFIG_COMPAT_VDSO)) + return 0; + + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; + vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; + + return __vdso_init(VDSO_ABI_AA32); } -#endif /* CONFIG_COMPAT_VDSO */ static int __init aarch32_alloc_vdso_pages(void) { - return __aarch32_alloc_vdso_pages(); + int ret; + + ret = __aarch32_alloc_vdso_pages(); + if (ret) + return ret; + + ret = aarch32_alloc_sigpage(); + if (ret) + return ret; + + return aarch32_alloc_kuser_vdso_page(); } arch_initcall(aarch32_alloc_vdso_pages); @@ -317,12 +349,11 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC, - &aarch32_vdso_spec[C_VECTORS]); + &aarch32_vdso_maps[AA32_MAP_VECTORS]); return PTR_ERR_OR_ZERO(ret); } -#ifndef CONFIG_COMPAT_VDSO static int aarch32_sigreturn_setup(struct mm_struct *mm) { unsigned long addr; @@ -341,63 +372,53 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) ret = _install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, - &aarch32_vdso_spec[C_SIGPAGE]); + &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); if (IS_ERR(ret)) goto out; - mm->context.vdso = (void *)addr; + mm->context.sigpage = (void *)addr; out: return PTR_ERR_OR_ZERO(ret); } -#endif /* !CONFIG_COMPAT_VDSO */ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; int ret; - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; ret = aarch32_kuser_helpers_setup(mm); if (ret) goto out; -#ifdef CONFIG_COMPAT_VDSO - ret = __setup_additional_pages(ARM64_VDSO32, - mm, - bprm, - uses_interp); -#else - ret = aarch32_sigreturn_setup(mm); -#endif /* CONFIG_COMPAT_VDSO */ + if (IS_ENABLED(CONFIG_COMPAT_VDSO)) { + ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, + uses_interp); + if (ret) + goto out; + } + ret = aarch32_sigreturn_setup(mm); out: - up_write(&mm->mmap_sem); + mmap_write_unlock(mm); return ret; } #endif /* CONFIG_COMPAT */ -static int vdso_mremap(const struct vm_special_mapping *sm, - struct vm_area_struct *new_vma) -{ - return __vdso_remap(ARM64_VDSO, sm, new_vma); -} +enum aarch64_map { + AA64_MAP_VVAR, + AA64_MAP_VDSO, +}; -/* - * aarch64_vdso_pages: - * 0 - vvar - * 1 - vdso - */ -#define A_VVAR 0 -#define A_VDSO 1 -#define A_PAGES (A_VDSO + 1) -static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { - { +static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { + [AA64_MAP_VVAR] = { .name = "[vvar]", + .fault = vvar_fault, }, - { + [AA64_MAP_VDSO] = { .name = "[vdso]", .mremap = vdso_mremap, }, @@ -405,28 +426,23 @@ static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { static int __init vdso_init(void) { - vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR]; - vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO]; + vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR]; + vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO]; - return __vdso_init(ARM64_VDSO); + return __vdso_init(VDSO_ABI_AA64); } arch_initcall(vdso_init); -int arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; int ret; - if (down_write_killable(&mm->mmap_sem)) + if (mmap_write_lock_killable(mm)) return -EINTR; - ret = __setup_additional_pages(ARM64_VDSO, - mm, - bprm, - uses_interp); - - up_write(&mm->mmap_sem); + ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); + mmap_write_unlock(mm); return ret; } |