summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-04-10 16:52:12 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2017-04-10 16:52:12 +1000
commit0a62d30b18f1aa2e30ee68856ece0147b611f2dd (patch)
tree37d61ac30b5924250a65dad4461ef61f3bc3eb09
parent4f0756a18f441faf0475f2fdbb1ee66ad38280ab (diff)
parent7e4ccb5ab136881c742e96b6b5fa333b8a2b2ebf (diff)
Merge branch 'akpm/master'
-rw-r--r--Documentation/filesystems/vfs.txt3
-rw-r--r--Documentation/vm/hmm.txt362
-rw-r--r--Documentation/vm/transhuge.txt10
-rw-r--r--MAINTAINERS7
-rw-r--r--arch/arc/kernel/unwind.c2
-rw-r--r--arch/arm/include/asm/cacheflush.h20
-rw-r--r--arch/arm/include/asm/set_memory.h32
-rw-r--r--arch/arm/kernel/ftrace.c1
-rw-r--r--arch/arm/kernel/kgdb.c2
-rw-r--r--arch/arm/kernel/machine_kexec.c1
-rw-r--r--arch/arm/mach-ixp4xx/common-pci.c4
-rw-r--r--arch/arm/mach-omap2/board-n8x0.c2
-rw-r--r--arch/arm/mm/pageattr.c1
-rw-r--r--arch/arm/net/bpf_jit_32.c1
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/cacheflush.h4
-rw-r--r--arch/arm64/mm/pageattr.c1
-rw-r--r--arch/arm64/net/bpf_jit_comp.c1
-rw-r--r--arch/ia64/mm/init.c36
-rw-r--r--arch/m68k/ifpsp060/src/ilsp.S2
-rw-r--r--arch/m68k/ifpsp060/src/isp.S2
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c2
-rw-r--r--arch/mips/dec/prom/init.c6
-rw-r--r--arch/mips/include/asm/octeon/cvmx-helper-rgmii.h2
-rw-r--r--arch/mips/kernel/traps.c4
-rw-r--r--arch/parisc/kernel/entry.S2
-rw-r--r--arch/parisc/kernel/module.c2
-rw-r--r--arch/powerpc/mm/icswx.c2
-rw-r--r--arch/powerpc/mm/mem.c37
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/s390/include/asm/set_memory.h (renamed from arch/s390/include/asm/cacheflush.h)9
-rw-r--r--arch/s390/kernel/ftrace.c1
-rw-r--r--arch/s390/kernel/kprobes.c2
-rw-r--r--arch/s390/kernel/machine_kexec.c1
-rw-r--r--arch/s390/kvm/kvm-s390.c10
-rw-r--r--arch/s390/mm/init.c17
-rw-r--r--arch/s390/mm/pageattr.c1
-rw-r--r--arch/s390/mm/vmem.c1
-rw-r--r--arch/s390/net/bpf_jit_comp.c1
-rw-r--r--arch/sh/mm/init.c35
-rw-r--r--arch/sparc/lib/NG4memset.S26
-rw-r--r--arch/x86/include/asm/cacheflush.h85
-rw-r--r--arch/x86/include/asm/set_memory.h87
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c2
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c1
-rw-r--r--arch/x86/kernel/module.c2
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/page_track.c4
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/mm/init.c2
-rw-r--r--arch/x86/mm/init_32.c43
-rw-r--r--arch/x86/mm/init_64.c59
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/net/bpf_jit_comp.c1
-rw-r--r--arch/x86/pci/pcbios.c2
-rw-r--r--arch/x86/platform/efi/efi.c2
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--certs/blacklist.c2
-rw-r--r--crypto/lzo.c4
-rw-r--r--drivers/acpi/apei/erst.c8
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c2
-rw-r--r--drivers/char/agp/amd-k7-agp.c1
-rw-r--r--drivers/char/agp/ati-agp.c1
-rw-r--r--drivers/char/agp/generic.c12
-rw-r--r--drivers/char/agp/intel-gtt.c1
-rw-r--r--drivers/char/agp/sworks-agp.c1
-rw-r--r--drivers/char/dsp56k.c2
-rw-r--r--drivers/clk/qcom/common.c2
-rw-r--r--drivers/cpufreq/powernow-k8.c3
-rw-r--r--drivers/cpufreq/sti-cpufreq.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c3
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c2
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_dump.c4
-rw-r--r--drivers/gpu/drm/gma500/gtt.c1
-rw-r--r--drivers/gpu/drm/gma500/psb_drv.c1
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc_dma.c3
-rw-r--r--drivers/gpu/drm/ttm/ttm_tt.c3
-rw-r--r--drivers/hwtracing/intel_th/msu.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c2
-rw-r--r--drivers/leds/leds-lp5521.c2
-rw-r--r--drivers/leds/leds-lp5523.c2
-rw-r--r--drivers/leds/leds-lp5562.c2
-rw-r--r--drivers/md/bcache/super.c8
-rw-r--r--drivers/md/bcache/util.h12
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-ioctl.c13
-rw-r--r--drivers/md/dm-stats.c7
-rw-r--r--drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h2
-rw-r--r--drivers/misc/sram-exec.c3
-rw-r--r--drivers/net/can/rcar/rcar_canfd.c2
-rw-r--r--drivers/net/ethernet/amd/amd8111e.h4
-rw-r--r--drivers/net/ethernet/amd/atarilance.c4
-rw-r--r--drivers/net/ethernet/amd/declance.c2
-rw-r--r--drivers/net/ethernet/amd/sun3lance.c3
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_hw.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c29
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/l2t.c8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/l2t.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c12
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c10
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c31
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c14
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/l2t.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sched.c12
-rw-r--r--drivers/net/ethernet/cirrus/mac89x0.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h2
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_phy.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c9
-rw-r--r--drivers/net/ethernet/natsemi/sonic.h2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c2
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c8
-rw-r--r--drivers/net/ethernet/toshiba/tc35815.c2
-rw-r--r--drivers/net/fddi/defxx.c2
-rw-r--r--drivers/net/hippi/rrunner.c3
-rw-r--r--drivers/nvdimm/dimm_devs.c5
-rw-r--r--drivers/scsi/isci/registers.h4
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.h2
-rw-r--r--drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c11
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_bo.c3
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_dynamic_pool.c2
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_reserved_pool.c3
-rw-r--r--drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c5
-rw-r--r--drivers/staging/most/mostcore/core.c2
-rw-r--r--drivers/tty/n_hdlc.c10
-rw-r--r--drivers/tty/serial/st-asc.c2
-rw-r--r--drivers/vhost/net.c9
-rw-r--r--drivers/vhost/vhost.c15
-rw-r--r--drivers/vhost/vsock.c9
-rw-r--r--drivers/video/fbdev/intelfb/intelfbdrv.c2
-rw-r--r--drivers/video/fbdev/vermilion/vermilion.c2
-rw-r--r--drivers/watchdog/hpwdt.c2
-rw-r--r--drivers/xen/evtchn.c14
-rw-r--r--fs/aio.c8
-rw-r--r--fs/btrfs/ctree.c9
-rw-r--r--fs/btrfs/free-space-tree.c3
-rw-r--r--fs/btrfs/ioctl.c9
-rw-r--r--fs/btrfs/send.c27
-rw-r--r--fs/buffer.c13
-rw-r--r--fs/ceph/file.c9
-rw-r--r--fs/dcache.c18
-rw-r--r--fs/exofs/dir.c3
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/f2fs/data.c5
-rw-r--r--fs/f2fs/f2fs.h20
-rw-r--r--fs/f2fs/file.c4
-rw-r--r--fs/f2fs/node.c6
-rw-r--r--fs/f2fs/segment.c14
-rw-r--r--fs/file.c2
-rw-r--r--fs/hfs/extent.c4
-rw-r--r--fs/hfsplus/extents.c5
-rw-r--r--fs/hugetlbfs/inode.c5
-rw-r--r--fs/inode.c16
-rw-r--r--fs/iomap.c13
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/proc/task_mmu.c7
-rw-r--r--fs/select.c5
-rw-r--r--fs/seq_file.c16
-rw-r--r--fs/ubifs/file.c5
-rw-r--r--fs/xattr.c27
-rw-r--r--fs/xfs/kmem.c2
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--include/asm-generic/set_memory.h12
-rw-r--r--include/drm/drm_mem_util.h9
-rw-r--r--include/linux/bcma/bcma_driver_pci.h2
-rw-r--r--include/linux/bootmem.h2
-rw-r--r--include/linux/filter.h4
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/hmm.h468
-rw-r--r--include/linux/ioport.h1
-rw-r--r--include/linux/kref.h6
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/memory_hotplug.h34
-rw-r--r--include/linux/memremap.h57
-rw-r--r--include/linux/migrate.h115
-rw-r--r--include/linux/migrate_mode.h5
-rw-r--r--include/linux/mlx4/device.h2
-rw-r--r--include/linux/mlx5/driver.h7
-rw-r--r--include/linux/mm.h36
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/swap.h24
-rw-r--r--include/linux/swapops.h68
-rw-r--r--include/linux/uaccess.h1
-rw-r--r--include/linux/vmalloc.h1
-rw-r--r--include/uapi/linux/ipmi.h2
-rw-r--r--ipc/util.c7
-rw-r--r--kernel/bpf/core.c9
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/groups.c2
-rw-r--r--kernel/locking/qspinlock_paravirt.h3
-rw-r--r--kernel/memremap.c51
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/power/snapshot.c3
-rw-r--r--lib/iov_iter.c5
-rw-r--r--lib/rhashtable.c13
-rw-r--r--mm/Kconfig44
-rw-r--r--mm/Makefile1
-rw-r--r--mm/balloon_compaction.c8
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/frame_vector.c5
-rw-r--r--mm/hmm.c1205
-rw-r--r--mm/kasan/kasan.c2
-rw-r--r--mm/memory.c61
-rw-r--r--mm/memory_hotplug.c14
-rw-r--r--mm/migrate.c785
-rw-r--r--mm/mprotect.c14
-rw-r--r--mm/nommu.c8
-rw-r--r--mm/page_alloc.c31
-rw-r--r--mm/page_vma_mapped.c10
-rw-r--r--mm/rmap.c25
-rw-r--r--mm/util.c57
-rw-r--r--mm/vmalloc.c23
-rw-r--r--mm/zsmalloc.c8
-rw-r--r--net/ceph/ceph_common.c2
-rw-r--r--net/core/dev.c24
-rw-r--r--net/decnet/af_decnet.c3
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/tcp_metrics.c5
-rw-r--r--net/ipv6/ila/ila_xlat.c8
-rw-r--r--net/mpls/af_mpls.c5
-rw-r--r--net/netfilter/x_tables.c24
-rw-r--r--net/netfilter/xt_recent.c5
-rw-r--r--net/sched/sch_choke.c5
-rw-r--r--net/sched/sch_fq.c12
-rw-r--r--net/sched/sch_fq_codel.c26
-rw-r--r--net/sched/sch_hhf.c33
-rw-r--r--net/sched/sch_netem.c6
-rw-r--r--net/sched/sch_sfq.c6
-rw-r--r--scripts/spelling.txt8
-rw-r--r--security/apparmor/apparmorfs.c2
-rw-r--r--security/apparmor/include/lib.h11
-rw-r--r--security/apparmor/lib.c30
-rw-r--r--security/apparmor/match.c2
-rw-r--r--security/apparmor/policy_unpack.c2
-rw-r--r--security/keys/keyctl.c22
-rw-r--r--sound/pci/hda/hda_intel.c2
-rw-r--r--sound/pci/intel8x0.c4
-rw-r--r--sound/soc/soc-core.c2
-rw-r--r--sound/x86/intel_hdmi_audio.c2
-rw-r--r--virt/kvm/kvm_main.c18
263 files changed, 4274 insertions, 971 deletions
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 94dd27ef4a76..f42b90687d40 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -694,8 +694,7 @@ struct address_space_operations {
write_end: After a successful write_begin, and data copy, write_end must
be called. len is the original len passed to write_begin, and copied
- is the amount that was able to be copied (copied == len is always true
- if write_begin was called with the AOP_FLAG_UNINTERRUPTIBLE flag).
+ is the amount that was able to be copied.
The filesystem must take care of unlocking the page and releasing it
refcount, and updating i_size.
diff --git a/Documentation/vm/hmm.txt b/Documentation/vm/hmm.txt
new file mode 100644
index 000000000000..a18ffc06f63a
--- /dev/null
+++ b/Documentation/vm/hmm.txt
@@ -0,0 +1,362 @@
+Heterogeneous Memory Management (HMM)
+
+Transparently allow any component of a program to use any memory region of said
+program with a device without using device specific memory allocator. This is
+becoming a requirement to simplify the use of advance heterogeneous computing
+where GPU, DSP or FPGA are use to perform various computations.
+
+This document is divided as follow, in the first section i expose the problems
+related to the use of a device specific allocator. The second section i expose
+the hardware limitations that are inherent to many platforms. The third section
+gives an overview of HMM designs. The fourth section explains how CPU page-
+table mirroring works and what is HMM purpose in this context. Fifth section
+deals with how device memory is represented inside the kernel. Finaly the last
+section present the new migration helper that allow to leverage the device DMA
+engine.
+
+
+-------------------------------------------------------------------------------
+
+1) Problems of using device specific memory allocator:
+
+Device with large amount of on board memory (several giga bytes) like GPU have
+historically manage their memory through dedicated driver specific API. This
+creates a disconnect between memory allocated and managed by device driver and
+regular application memory (private anonymous, share memory or regular file
+back memory). From here on i will refer to this aspect as split address space.
+I use share address space to refer to the opposite situation ie one in which
+any memory region can be use by device transparently.
+
+Split address space because device can only access memory allocated through the
+device specific API. This imply that all memory object in a program are not
+equal from device point of view which complicate large program that rely on a
+wide set of libraries.
+
+Concretly this means that code that wants to leverage device like GPU need to
+copy object between genericly allocated memory (malloc, mmap private/share/)
+and memory allocated through the device driver API (this still end up with an
+mmap but of the device file).
+
+For flat dataset (array, grid, image, ...) this isn't too hard to achieve but
+complex data-set (list, tree, ...) are hard to get right. Duplicating a complex
+data-set need to re-map all the pointer relations between each of its elements.
+This is error prone and program gets harder to debug because of the duplicate
+data-set.
+
+Split address space also means that library can not transparently use data they
+are getting from core program or other library and thus each library might have
+to duplicate its input data-set using specific memory allocator. Large project
+suffer from this and waste resources because of the various memory copy.
+
+Duplicating each library API to accept as input or output memory allocted by
+each device specific allocator is not a viable option. It would lead to a
+combinatorial explosions in the library entry points.
+
+Finaly with the advance of high level language constructs (in C++ but in other
+language too) it is now possible for compiler to leverage GPU or other devices
+without even the programmer knowledge. Some of compiler identified patterns are
+only do-able with a share address. It is as well more reasonable to use a share
+address space for all the other patterns.
+
+
+-------------------------------------------------------------------------------
+
+2) System bus, device memory characteristics
+
+System bus cripple share address due to few limitations. Most system bus only
+allow basic memory access from device to main memory, even cache coherency is
+often optional. Access to device memory from CPU is even more limited, most
+often than not it is not cache coherent.
+
+If we only consider the PCIE bus than device can access main memory (often
+through an IOMMU) and be cache coherent with the CPUs. However it only allows
+a limited set of atomic operation from device on main memory. This is worse
+in the other direction the CPUs can only access a limited range of the device
+memory and can not perform atomic operations on it. Thus device memory can not
+be consider like regular memory from kernel point of view.
+
+Another crippling factor is the limited bandwidth (~32GBytes/s with PCIE 4.0
+and 16 lanes). This is 33 times less that fastest GPU memory (1 TBytes/s).
+The final limitation is latency, access to main memory from the device has an
+order of magnitude higher latency than when the device access its own memory.
+
+Some platform are developing new system bus or additions/modifications to PCIE
+to address some of those limitations (OpenCAPI, CCIX). They mainly allow two
+way cache coherency between CPU and device and allow all atomic operations the
+architecture supports. Saddly not all platform are following this trends and
+some major architecture are left without hardware solutions to those problems.
+
+So for share address space to make sense not only we must allow device to
+access any memory memory but we must also permit any memory to be migrated to
+device memory while device is using it (blocking CPU access while it happens).
+
+
+-------------------------------------------------------------------------------
+
+3) Share address space and migration
+
+HMM intends to provide two main features. First one is to share the address
+space by duplication the CPU page table into the device page table so same
+address point to same memory and this for any valid main memory address in
+the process address space.
+
+To achieve this, HMM offer a set of helpers to populate the device page table
+while keeping track of CPU page table updates. Device page table updates are
+not as easy as CPU page table updates. To update the device page table you must
+allow a buffer (or use a pool of pre-allocated buffer) and write GPU specifics
+commands in it to perform the update (unmap, cache invalidations and flush,
+...). This can not be done through common code for all device. Hence why HMM
+provides helpers to factor out everything that can be while leaving the gory
+details to the device driver.
+
+The second mechanism HMM provide is a new kind of ZONE_DEVICE memory that does
+allow to allocate a struct page for each page of the device memory. Those page
+are special because the CPU can not map them. They however allow to migrate
+main memory to device memory using exhisting migration mechanism and everything
+looks like if page was swap out to disk from CPU point of view. Using a struct
+page gives the easiest and cleanest integration with existing mm mechanisms.
+Again here HMM only provide helpers, first to hotplug new ZONE_DEVICE memory
+for the device memory and second to perform migration. Policy decision of what
+and when to migrate things is left to the device driver.
+
+Note that any CPU access to a device page trigger a page fault and a migration
+back to main memory ie when a page backing an given address A is migrated from
+a main memory page to a device page then any CPU access to address A trigger a
+page fault and initiate a migration back to main memory.
+
+
+With this two features, HMM not only allow a device to mirror a process address
+space and keeps both CPU and device page table synchronize, but also allow to
+leverage device memory by migrating part of data-set that is actively use by a
+device.
+
+
+-------------------------------------------------------------------------------
+
+4) Address space mirroring implementation and API
+
+Address space mirroring main objective is to allow to duplicate range of CPU
+page table into a device page table and HMM helps keeping both synchronize. A
+device driver that want to mirror a process address space must start with the
+registration of an hmm_mirror struct:
+
+ int hmm_mirror_register(struct hmm_mirror *mirror,
+ struct mm_struct *mm);
+ int hmm_mirror_register_locked(struct hmm_mirror *mirror,
+ struct mm_struct *mm);
+
+The locked variant is to be use when the driver is already holding the mmap_sem
+of the mm in write mode. The mirror struct has a set of callback that are use
+to propagate CPU page table:
+
+ struct hmm_mirror_ops {
+ /* sync_cpu_device_pagetables() - synchronize page tables
+ *
+ * @mirror: pointer to struct hmm_mirror
+ * @update_type: type of update that occurred to the CPU page table
+ * @start: virtual start address of the range to update
+ * @end: virtual end address of the range to update
+ *
+ * This callback ultimately originates from mmu_notifiers when the CPU
+ * page table is updated. The device driver must update its page table
+ * in response to this callback. The update argument tells what action
+ * to perform.
+ *
+ * The device driver must not return from this callback until the device
+ * page tables are completely updated (TLBs flushed, etc); this is a
+ * synchronous call.
+ */
+ void (*update)(struct hmm_mirror *mirror,
+ enum hmm_update action,
+ unsigned long start,
+ unsigned long end);
+ };
+
+Device driver must perform update to the range following action (turn range
+read only, or fully unmap, ...). Once driver callback returns the device must
+be done with the update.
+
+
+When device driver wants to populate a range of virtual address it can use
+either:
+ int hmm_vma_get_pfns(struct vm_area_struct *vma,
+ struct hmm_range *range,
+ unsigned long start,
+ unsigned long end,
+ hmm_pfn_t *pfns);
+ int hmm_vma_fault(struct vm_area_struct *vma,
+ struct hmm_range *range,
+ unsigned long start,
+ unsigned long end,
+ hmm_pfn_t *pfns,
+ bool write,
+ bool block);
+
+First one (hmm_vma_get_pfns()) will only fetch present CPU page table entry and
+will not trigger a page fault on missing or non present entry. The second one
+do trigger page fault on missing or read only entry if write parameter is true.
+Page fault use the generic mm page fault code path just like a CPU page fault.
+
+Both function copy CPU page table into their pfns array argument. Each entry in
+that array correspond to an address in the virtual range. HMM provide a set of
+flags to help driver identify special CPU page table entries.
+
+Locking with the update() callback is the most important aspect the driver must
+respect in order to keep things properly synchronize. The usage pattern is :
+
+ int driver_populate_range(...)
+ {
+ struct hmm_range range;
+ ...
+ again:
+ ret = hmm_vma_get_pfns(vma, &range, start, end, pfns);
+ if (ret)
+ return ret;
+ take_lock(driver->update);
+ if (!hmm_vma_range_done(vma, &range)) {
+ release_lock(driver->update);
+ goto again;
+ }
+
+ // Use pfns array content to update device page table
+
+ release_lock(driver->update);
+ return 0;
+ }
+
+The driver->update lock is the same lock that driver takes inside its update()
+callback. That lock must be call before hmm_vma_range_done() to avoid any race
+with a concurrent CPU page table update.
+
+HMM implements all this on top of the mmu_notifier API because we wanted to a
+simpler API and also to be able to perform optimization latter own like doing
+concurrent device update in multi-devices scenario.
+
+HMM also serve as an impedence missmatch between how CPU page table update are
+done (by CPU write to the page table and TLB flushes) from how device update
+their own page table. Device update is a multi-step process, first appropriate
+commands are write to a buffer, then this buffer is schedule for execution on
+the device. It is only once the device has executed commands in the buffer that
+the update is done. Creating and scheduling update command buffer can happen
+concurrently for multiple devices. Waiting for each device to report commands
+as executed is serialize (there is no point in doing this concurrently).
+
+
+-------------------------------------------------------------------------------
+
+5) Represent and manage device memory from core kernel point of view
+
+Several differents design were try to support device memory. First one use
+device specific data structure to keep information about migrated memory and
+HMM hooked itself in various place of mm code to handle any access to address
+that were back by device memory. It turns out that this ended up replicating
+most of the fields of struct page and also needed many kernel code path to be
+updated to understand this new kind of memory.
+
+Thing is most kernel code path never try to access the memory behind a page
+but only care about struct page contents. Because of this HMM switchted to
+directly using struct page for device memory which left most kernel code path
+un-aware of the difference. We only need to make sure that no one ever try to
+map those page from the CPU side.
+
+HMM provide a set of helpers to register and hotplug device memory as a new
+region needing struct page. This is offer through a very simple API:
+
+ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
+ struct device *device,
+ unsigned long size);
+ void hmm_devmem_remove(struct hmm_devmem *devmem);
+
+The hmm_devmem_ops is where most of the important things are:
+
+ struct hmm_devmem_ops {
+ void (*free)(struct hmm_devmem *devmem, struct page *page);
+ int (*fault)(struct hmm_devmem *devmem,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ struct page *page,
+ unsigned flags,
+ pmd_t *pmdp);
+ };
+
+The first callback (free()) happens when the last reference on a device page is
+drop. This means the device page is now free and no longer use by anyone. The
+second callback happens whenever CPU try to access a device page which it can
+not do. This second callback must trigger a migration back to system memory,
+HMM provides an helper to do just that:
+
+ int hmm_devmem_fault_range(struct hmm_devmem *devmem,
+ struct vm_area_struct *vma,
+ const struct migrate_vma_ops *ops,
+ unsigned long mentry,
+ unsigned long *src,
+ unsigned long *dst,
+ unsigned long start,
+ unsigned long addr,
+ unsigned long end,
+ void *private);
+
+It relies on new migrate_vma() helper which is a generic page migration helper
+that work on range of virtual address instead of working on individual pages,
+it also allow to leverage device DMA engine to perform the copy from device to
+main memory (or in the other direction). The next section goes over this new
+helper.
+
+
+-------------------------------------------------------------------------------
+
+6) Migrate to and from device memory
+
+Because CPU can not access device memory, migration must use device DMA engine
+to perform copy from and to device memory. For this we need a new migration
+helper:
+
+ int migrate_vma(const struct migrate_vma_ops *ops,
+ struct vm_area_struct *vma,
+ unsigned long mentries,
+ unsigned long start,
+ unsigned long end,
+ unsigned long *src,
+ unsigned long *dst,
+ void *private);
+
+Unlike other migration function it works on a range of virtual address, there
+is two reasons for that. First device DMA copy has a high setup overhead cost
+and thus batching multiple pages is needed as otherwise the migration overhead
+make the whole excersie pointless. The second reason is because driver trigger
+such migration base on range of address the device is actively accessing.
+
+The migrate_vma_ops struct define two callbacks. First one (alloc_and_copy())
+control destination memory allocation and copy operation. Second one is there
+to allow device driver to perform cleanup operation after migration.
+
+ struct migrate_vma_ops {
+ void (*alloc_and_copy)(struct vm_area_struct *vma,
+ const unsigned long *src,
+ unsigned long *dst,
+ unsigned long start,
+ unsigned long end,
+ void *private);
+ void (*finalize_and_map)(struct vm_area_struct *vma,
+ const unsigned long *src,
+ const unsigned long *dst,
+ unsigned long start,
+ unsigned long end,
+ void *private);
+ };
+
+It is important to stress that this migration helpers allow for hole in the
+virtual address range. Some pages in the range might not be migrated for all
+the usual reasons (page is pin, page is lock, ...). This helper does not fail
+but just skip over those pages.
+
+The alloc_and_copy() might as well decide to not migrate all pages in the
+range (for reasons under the callback control). For those the callback just
+have to leave the corresponding dst entry empty.
+
+Finaly the migration of the struct page might fails (for file back page) for
+various reasons (failure to freeze reference, or update page cache, ...). If
+that happens then the finalize_and_map() can catch any pages that was not
+migrated. Note those page were still copied to new page and thus we wasted
+bandwidth but this is considered as a rare event and a price that we are
+willing to pay to keep all the code simpler.
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index cd28d5ee5273..4dde03b44ad1 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -266,7 +266,7 @@ for each mapping.
The number of file transparent huge pages mapped to userspace is available
by reading ShmemPmdMapped and ShmemHugePages fields in /proc/meminfo.
-To identify what applications are mapping file transparent huge pages, it
+To identify what applications are mapping file transparent huge pages, it
is necessary to read /proc/PID/smaps and count the FileHugeMapped fields
for each mapping.
@@ -292,7 +292,7 @@ thp_collapse_alloc_failed is incremented if khugepaged found a range
the allocation.
thp_file_alloc is incremented every time a file huge page is successfully
-i allocated.
+ allocated.
thp_file_mapped is incremented every time a file huge page is mapped into
user address space.
@@ -501,7 +501,7 @@ scanner can get reference to a page is get_page_unless_zero().
All tail pages have zero ->_refcount until atomic_add(). This prevents the
scanner from getting a reference to the tail page up to that point. After the
-atomic_add() we don't care about the ->_refcount value. We already known how
+atomic_add() we don't care about the ->_refcount value. We already known how
many references should be uncharged from the head page.
For head page get_page_unless_zero() will succeed and we don't mind. It's
@@ -519,8 +519,8 @@ comes. Splitting will free up unused subpages.
Splitting the page right away is not an option due to locking context in
the place where we can detect partial unmap. It's also might be
-counterproductive since in many cases partial unmap unmap happens during
-exit(2) if an THP crosses VMA boundary.
+counterproductive since in many cases partial unmap happens during exit(2) if
+a THP crosses a VMA boundary.
Function deferred_split_huge_page() is used to queue page for splitting.
The splitting itself will happen when we get memory pressure via shrinker
diff --git a/MAINTAINERS b/MAINTAINERS
index cd6f0596242b..ba8b057a6449 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6030,6 +6030,13 @@ S: Supported
F: drivers/scsi/hisi_sas/
F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
+HMM - Heterogeneous Memory Management
+M: Jérôme Glisse <jglisse@redhat.com>
+L: linux-mm@kvack.org
+S: Maintained
+F: mm/hmm*
+F: include/linux/hmm*
+
HOST AP DRIVER
M: Jouni Malinen <j@w1.fi>
L: linux-wireless@vger.kernel.org
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index b6e4f7a7419b..333daab7def0 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -845,7 +845,7 @@ static int processCFI(const u8 *start, const u8 *end, unsigned long targetLoc,
* state->dataAlign;
break;
case DW_CFA_def_cfa_register:
- unw_debug("cfa_def_cfa_regsiter: ");
+ unw_debug("cfa_def_cfa_register: ");
state->cfa.reg = get_uleb128(&ptr.p8, end);
break;
/*todo case DW_CFA_def_cfa_expression: */
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 02454fa15d2c..d69bebf697e7 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -478,26 +478,6 @@ static inline void __sync_cache_range_r(volatile void *p, size_t size)
: : : "r0","r1","r2","r3","r4","r5","r6","r7", \
"r9","r10","lr","memory" )
-#ifdef CONFIG_MMU
-int set_memory_ro(unsigned long addr, int numpages);
-int set_memory_rw(unsigned long addr, int numpages);
-int set_memory_x(unsigned long addr, int numpages);
-int set_memory_nx(unsigned long addr, int numpages);
-#else
-static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
-static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
-static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
-static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
-#endif
-
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
-
void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
void *kaddr, unsigned long len);
diff --git a/arch/arm/include/asm/set_memory.h b/arch/arm/include/asm/set_memory.h
new file mode 100644
index 000000000000..5aa4315abe91
--- /dev/null
+++ b/arch/arm/include/asm/set_memory.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 1999-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASMARM_SET_MEMORY_H
+#define _ASMARM_SET_MEMORY_H
+
+#ifdef CONFIG_MMU
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+#else
+static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+#endif
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+#else
+static inline void set_kernel_text_rw(void) { }
+static inline void set_kernel_text_ro(void) { }
+#endif
+
+#endif
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 3f1759411d51..dea3e965fe88 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -21,6 +21,7 @@
#include <asm/opcodes.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
+#include <asm/set_memory.h>
#ifdef CONFIG_THUMB2_KERNEL
#define NOP 0xf85deb04 /* pop.w {lr} */
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index 9232caee7060..1bb4c40a3135 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -269,7 +269,7 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
/*
* Register our undef instruction hooks with ARM undef core.
- * We regsiter a hook specifically looking for the KGB break inst
+ * We register a hook specifically looking for the KGB break inst
* and we handle the normal undef case within the do_undefinstr
* handler.
*/
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index b18c1ea56bed..15495887ca14 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -18,6 +18,7 @@
#include <asm/mach-types.h>
#include <asm/smp_plat.h>
#include <asm/system_misc.h>
+#include <asm/set_memory.h>
extern void relocate_new_kernel(void);
extern const unsigned int relocate_new_kernel_size;
diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c
index 4977296f0c78..bcf3df59f71b 100644
--- a/arch/arm/mach-ixp4xx/common-pci.c
+++ b/arch/arm/mach-ixp4xx/common-pci.c
@@ -43,14 +43,14 @@
int (*ixp4xx_pci_read)(u32 addr, u32 cmd, u32* data);
/*
- * Base address for PCI regsiter region
+ * Base address for PCI register region
*/
unsigned long ixp4xx_pci_reg_base = 0;
/*
* PCI cfg an I/O routines are done by programming a
* command/byte enable register, and then read/writing
- * the data from a data regsiter. We need to ensure
+ * the data from a data register. We need to ensure
* these transactions are atomic or we will end up
* with corrupt data on the bus or in a driver.
*/
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 6b6fda65fb3b..91272db09fa3 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -117,7 +117,7 @@ static struct musb_hdrc_platform_data tusb_data = {
static void __init n8x0_usb_init(void)
{
int ret = 0;
- static char announce[] __initdata = KERN_INFO "TUSB 6010\n";
+ static const char announce[] __initconst = KERN_INFO "TUSB 6010\n";
/* PM companion chip power control pin */
ret = gpio_request_one(TUSB6010_GPIO_ENABLE, GPIOF_OUT_INIT_LOW,
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index 3b69f2642513..1403cb4a0c3d 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -15,6 +15,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <asm/set_memory.h>
struct page_change_data {
pgprot_t set_mask;
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 93d0b6d0b63e..d5b9fa19b684 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -18,6 +18,7 @@
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/hwcap.h>
#include <asm/opcodes.h>
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index a12f1afc95a3..a7a97a608033 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -29,6 +29,7 @@ generic-y += rwsem.h
generic-y += segment.h
generic-y += sembuf.h
generic-y += serial.h
+generic-y += set_memory.h
generic-y += shmbuf.h
generic-y += simd.h
generic-y += sizes.h
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 728f933cef8c..d74a284abdc2 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -150,10 +150,6 @@ static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
{
}
-int set_memory_ro(unsigned long addr, int numpages);
-int set_memory_rw(unsigned long addr, int numpages);
-int set_memory_x(unsigned long addr, int numpages);
-int set_memory_nx(unsigned long addr, int numpages);
int set_memory_valid(unsigned long addr, unsigned long size, int enable);
#endif
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 3212ee0558f6..a682a0a2a0fa 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -17,6 +17,7 @@
#include <linux/vmalloc.h>
#include <asm/pgtable.h>
+#include <asm/set_memory.h>
#include <asm/tlbflush.h>
struct page_change_data {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a785554916c0..efc1bf237cc9 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -27,6 +27,7 @@
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
+#include <asm/set_memory.h>
#include "bpf_jit.h"
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 8f3efa682ee8..0d2058863913 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -646,20 +646,36 @@ mem_init (void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type)
{
pg_data_t *pgdat;
struct zone *zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ bool for_device = false;
int ret;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ break;
+ case MEMORY_DEVICE_PERSISTENT:
+ for_device = true;
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
pgdat = NODE_DATA(nid);
zone = pgdat->node_zones +
zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
ret = __add_pages(nid, zone, start_pfn, nr_pages);
-
if (ret)
printk("%s: Problem encountered in __add_pages() as ret=%d\n",
__func__, ret);
@@ -668,13 +684,27 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, enum memory_type type)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
int ret;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ case MEMORY_DEVICE_PERSISTENT:
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
zone = page_zone(pfn_to_page(start_pfn));
ret = __remove_pages(zone, start_pfn, nr_pages);
if (ret)
diff --git a/arch/m68k/ifpsp060/src/ilsp.S b/arch/m68k/ifpsp060/src/ilsp.S
index 970abaf3303e..dd5b2c357e95 100644
--- a/arch/m68k/ifpsp060/src/ilsp.S
+++ b/arch/m68k/ifpsp060/src/ilsp.S
@@ -776,7 +776,7 @@ muls64_zero:
# ALGORITHM *********************************************************** #
# In the interest of simplicity, all operands are converted to #
# longword size whether the operation is byte, word, or long. The #
-# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
+# bounds are sign extended accordingly. If Rn is a data register, Rn is #
# also sign extended. If Rn is an address register, it need not be sign #
# extended since the full register is always used. #
# The condition codes are set correctly before the final "rts". #
diff --git a/arch/m68k/ifpsp060/src/isp.S b/arch/m68k/ifpsp060/src/isp.S
index b865c1a052ba..29a9f8629b9d 100644
--- a/arch/m68k/ifpsp060/src/isp.S
+++ b/arch/m68k/ifpsp060/src/isp.S
@@ -1876,7 +1876,7 @@ movp_read_err:
# word, or longword sized operands. Then, in the interest of #
# simplicity, all operands are converted to longword size whether the #
# operation is byte, word, or long. The bounds are sign extended #
-# accordingly. If Rn is a data regsiter, Rn is also sign extended. If #
+# accordingly. If Rn is a data register, Rn is also sign extended. If #
# Rn is an address register, it need not be sign extended since the #
# full register is always used. #
# The comparisons are made and the condition codes calculated. #
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
index ba4753c23b03..d18ed5af62f4 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
@@ -152,7 +152,7 @@ static int __cvmx_helper_errata_asx_pass1(int interface, int port,
}
/**
- * Configure all of the ASX, GMX, and PKO regsiters required
+ * Configure all of the ASX, GMX, and PKO registers required
* to get RGMII to function on the supplied interface.
*
* @interface: PKO Interface to configure (0 or 1)
diff --git a/arch/mips/dec/prom/init.c b/arch/mips/dec/prom/init.c
index 4e1761e0a09a..d88eb7a6662b 100644
--- a/arch/mips/dec/prom/init.c
+++ b/arch/mips/dec/prom/init.c
@@ -88,7 +88,7 @@ void __init which_prom(s32 magic, s32 *prom_vec)
void __init prom_init(void)
{
extern void dec_machine_halt(void);
- static char cpu_msg[] __initdata =
+ static const char cpu_msg[] __initconst =
"Sorry, this kernel is compiled for a wrong CPU type!\n";
s32 argc = fw_arg0;
s32 *argv = (void *)fw_arg1;
@@ -111,7 +111,7 @@ void __init prom_init(void)
#if defined(CONFIG_CPU_R3000)
if ((current_cpu_type() == CPU_R4000SC) ||
(current_cpu_type() == CPU_R4400SC)) {
- static char r4k_msg[] __initdata =
+ static const char r4k_msg[] __initconst =
"Please recompile with \"CONFIG_CPU_R4x00 = y\".\n";
printk(cpu_msg);
printk(r4k_msg);
@@ -122,7 +122,7 @@ void __init prom_init(void)
#if defined(CONFIG_CPU_R4X00)
if ((current_cpu_type() == CPU_R3000) ||
(current_cpu_type() == CPU_R3000A)) {
- static char r3k_msg[] __initdata =
+ static const char r3k_msg[] __initconst =
"Please recompile with \"CONFIG_CPU_R3000 = y\".\n";
printk(cpu_msg);
printk(r3k_msg);
diff --git a/arch/mips/include/asm/octeon/cvmx-helper-rgmii.h b/arch/mips/include/asm/octeon/cvmx-helper-rgmii.h
index f89775be7654..f7a95d7de140 100644
--- a/arch/mips/include/asm/octeon/cvmx-helper-rgmii.h
+++ b/arch/mips/include/asm/octeon/cvmx-helper-rgmii.h
@@ -55,7 +55,7 @@ extern int __cvmx_helper_rgmii_probe(int interface);
extern void cvmx_helper_rgmii_internal_loopback(int port);
/**
- * Configure all of the ASX, GMX, and PKO regsiters required
+ * Configure all of the ASX, GMX, and PKO registers required
* to get RGMII to function on the supplied interface.
*
* @interface: PKO Interface to configure (0 or 1)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index b49e7bf9f950..9681b5877140 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2256,8 +2256,8 @@ void set_handler(unsigned long offset, void *addr, unsigned long size)
local_flush_icache_range(ebase + offset, ebase + offset + size);
}
-static char panic_null_cerr[] =
- "Trying to set NULL cache error exception handler";
+static const char panic_null_cerr[] =
+ "Trying to set NULL cache error exception handler\n";
/*
* Install uncached CPU exception handler.
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index ad4cb1613c57..a4fd296c958e 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -1369,7 +1369,7 @@ nadtlb_nullify:
/*
When there is no translation for the probe address then we
- must nullify the insn and return zero in the target regsiter.
+ must nullify the insn and return zero in the target register.
This will indicate to the calling code that it does not have
write/read privileges to this address.
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index c66c943d9322..f1a76935a314 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -218,7 +218,7 @@ void *module_alloc(unsigned long size)
* easier than trying to map the text, data, init_text and
* init_data correctly */
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- GFP_KERNEL | __GFP_HIGHMEM,
+ GFP_KERNEL,
PAGE_KERNEL_RWX, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c
index 915412e4d5ba..1fa794d7d59f 100644
--- a/arch/powerpc/mm/icswx.c
+++ b/arch/powerpc/mm/icswx.c
@@ -186,7 +186,7 @@ static u32 acop_get_inst(struct pt_regs *regs)
}
/**
- * @regs: regsiters at time of interrupt
+ * @regs: registers at time of interrupt
* @address: storage address
* @error_code: Fault code, usually the DSISR or ESR depending on
* processor type
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9ee536ec0739..3bbba178b464 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -126,14 +126,31 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type)
{
struct pglist_data *pgdata;
- struct zone *zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ bool for_device = false;
+ struct zone *zone;
int rc;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ break;
+ case MEMORY_DEVICE_PERSISTENT:
+ for_device = true;
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
resize_hpt_for_hotplug(memblock_phys_mem_size());
pgdata = NODE_DATA(nid);
@@ -155,13 +172,27 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, enum memory_type type)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
int ret;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ case MEMORY_DEVICE_PERSISTENT:
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
zone = page_zone(pfn_to_page(start_pfn));
ret = __remove_pages(zone, start_pfn, nr_pages);
if (ret)
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 7e3481eb2174..45092b12f54f 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,4 +1,5 @@
generic-y += asm-offsets.h
+generic-y += cacheflush.h
generic-y += clkdev.h
generic-y += dma-contiguous.h
generic-y += div64.h
diff --git a/arch/s390/include/asm/cacheflush.h b/arch/s390/include/asm/set_memory.h
index 0499334f9473..46a4db44c47a 100644
--- a/arch/s390/include/asm/cacheflush.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -1,8 +1,5 @@
-#ifndef _S390_CACHEFLUSH_H
-#define _S390_CACHEFLUSH_H
-
-/* Caches aren't brain-dead on the s390. */
-#include <asm-generic/cacheflush.h>
+#ifndef _ASMS390_SET_MEMORY_H
+#define _ASMS390_SET_MEMORY_H
#define SET_MEMORY_RO 1UL
#define SET_MEMORY_RW 2UL
@@ -31,4 +28,4 @@ static inline int set_memory_x(unsigned long addr, int numpages)
return __set_memory(addr, numpages, SET_MEMORY_X);
}
-#endif /* _S390_CACHEFLUSH_H */
+#endif
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 60a8a4e207ed..27477f34cc0a 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -17,6 +17,7 @@
#include <trace/syscall.h>
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include "entry.h"
/*
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 76f9eda1d7c0..3d6a99746454 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -31,7 +31,7 @@
#include <linux/slab.h>
#include <linux/hardirq.h>
#include <linux/ftrace.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/sections.h>
#include <linux/uaccess.h>
#include <asm/dis.h>
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index db5658daf994..49a6bd45957b 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -26,6 +26,7 @@
#include <asm/asm-offsets.h>
#include <asm/cacheflush.h>
#include <asm/os_info.h>
+#include <asm/set_memory.h>
#include <asm/switch_to.h>
#include <asm/nmi.h>
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 11b7d6638991..d849faf62924 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1198,10 +1198,7 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
return -EINVAL;
- keys = kmalloc_array(args->count, sizeof(uint8_t),
- GFP_KERNEL | __GFP_NOWARN);
- if (!keys)
- keys = vmalloc(sizeof(uint8_t) * args->count);
+ keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
if (!keys)
return -ENOMEM;
@@ -1243,10 +1240,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
return -EINVAL;
- keys = kmalloc_array(args->count, sizeof(uint8_t),
- GFP_KERNEL | __GFP_NOWARN);
- if (!keys)
- keys = vmalloc(sizeof(uint8_t) * args->count);
+ keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
if (!keys)
return -ENOMEM;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ee5066718b21..5338d0c35100 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -39,6 +39,7 @@
#include <asm/sections.h>
#include <asm/ctl_reg.h>
#include <asm/sclp.h>
+#include <asm/set_memory.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
@@ -161,7 +162,7 @@ unsigned long memory_block_size_bytes(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type)
{
unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
unsigned long start_pfn = PFN_DOWN(start);
@@ -170,6 +171,18 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
struct zone *zone;
int rc, i;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
@@ -204,7 +217,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, enum memory_type type)
{
/*
* There is no hardware or firmware interface which could trigger a
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index fc5dc33bb141..143cc9af6fd7 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -8,6 +8,7 @@
#include <asm/facility.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/set_memory.h>
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 60d38993f232..c33c94b4be60 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -17,6 +17,7 @@
#include <asm/setup.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/set_memory.h>
static DEFINE_MUTEX(vmem_mutex);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 4ecf6d687509..6e97a2e3fd8d 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -24,6 +24,7 @@
#include <linux/bpf.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
+#include <asm/set_memory.h>
#include "bpf_jit.h"
int bpf_jit_enable __read_mostly;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 75491862d900..f37e7a6ae2cc 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -485,13 +485,30 @@ void free_initrd_mem(unsigned long start, unsigned long end)
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type)
{
pg_data_t *pgdat;
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
+ bool for_device = false;
int ret;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ break;
+ case MEMORY_DEVICE_PERSISTENT:
+ for_device = true;
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
pgdat = NODE_DATA(nid);
/* We only have ZONE_NORMAL, so this is easy.. */
@@ -516,13 +533,27 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, enum memory_type type)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
int ret;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ case MEMORY_DEVICE_PERSISTENT:
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
zone = page_zone(pfn_to_page(start_pfn));
ret = __remove_pages(zone, start_pfn, nr_pages);
if (unlikely(ret))
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S
index 7c0c81f18837..c07834f3a6fc 100644
--- a/arch/sparc/lib/NG4memset.S
+++ b/arch/sparc/lib/NG4memset.S
@@ -13,14 +13,14 @@
.globl NG4memset
NG4memset:
andcc %o1, 0xff, %o4
- be,pt %icc, 1f
+ be,pt %xcc, 1f
mov %o2, %o1
sllx %o4, 8, %g1
or %g1, %o4, %o2
sllx %o2, 16, %g1
or %g1, %o2, %o2
sllx %o2, 32, %g1
- ba,pt %icc, 1f
+ ba,pt %xcc, 1f
or %g1, %o2, %o4
.size NG4memset,.-NG4memset
@@ -29,7 +29,7 @@ NG4memset:
NG4bzero:
clr %o4
1: cmp %o1, 16
- ble %icc, .Ltiny
+ ble %xcc, .Ltiny
mov %o0, %o3
sub %g0, %o0, %g1
and %g1, 0x7, %g1
@@ -37,7 +37,7 @@ NG4bzero:
sub %o1, %g1, %o1
1: stb %o4, [%o0 + 0x00]
subcc %g1, 1, %g1
- bne,pt %icc, 1b
+ bne,pt %xcc, 1b
add %o0, 1, %o0
.Laligned8:
cmp %o1, 64 + (64 - 8)
@@ -48,7 +48,7 @@ NG4bzero:
sub %o1, %g1, %o1
1: stx %o4, [%o0 + 0x00]
subcc %g1, 8, %g1
- bne,pt %icc, 1b
+ bne,pt %xcc, 1b
add %o0, 0x8, %o0
.Laligned64:
andn %o1, 64 - 1, %g1
@@ -58,30 +58,30 @@ NG4bzero:
1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
subcc %g1, 0x40, %g1
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
- bne,pt %icc, 1b
+ bne,pt %xcc, 1b
add %o0, 0x40, %o0
.Lpostloop:
cmp %o1, 8
- bl,pn %icc, .Ltiny
+ bl,pn %xcc, .Ltiny
membar #StoreStore|#StoreLoad
.Lmedium:
andn %o1, 0x7, %g1
sub %o1, %g1, %o1
1: stx %o4, [%o0 + 0x00]
subcc %g1, 0x8, %g1
- bne,pt %icc, 1b
+ bne,pt %xcc, 1b
add %o0, 0x08, %o0
andcc %o1, 0x4, %g1
- be,pt %icc, .Ltiny
+ be,pt %xcc, .Ltiny
sub %o1, %g1, %o1
stw %o4, [%o0 + 0x00]
add %o0, 0x4, %o0
.Ltiny:
cmp %o1, 0
- be,pn %icc, .Lexit
+ be,pn %xcc, .Lexit
1: subcc %o1, 1, %o1
stb %o4, [%o0 + 0x00]
- bne,pt %icc, 1b
+ bne,pt %xcc, 1b
add %o0, 1, %o0
.Lexit:
retl
@@ -99,8 +99,8 @@ NG4bzero:
stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
- bne,pt %icc, 1b
+ bne,pt %xcc, 1b
add %o0, 0x30, %o0
- ba,a,pt %icc, .Lpostloop
+ ba,a,pt %xcc, .Lpostloop
nop
.size NG4bzero,.-NG4bzero
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e7e1942edff7..8b4140f6724f 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -5,93 +5,8 @@
#include <asm-generic/cacheflush.h>
#include <asm/special_insns.h>
-/*
- * The set_memory_* API can be used to change various attributes of a virtual
- * address range. The attributes include:
- * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
- * Executability : eXeutable, NoteXecutable
- * Read/Write : ReadOnly, ReadWrite
- * Presence : NotPresent
- *
- * Within a category, the attributes are mutually exclusive.
- *
- * The implementation of this API will take care of various aspects that
- * are associated with changing such attributes, such as:
- * - Flushing TLBs
- * - Flushing CPU caches
- * - Making sure aliases of the memory behind the mapping don't violate
- * coherency rules as defined by the CPU in the system.
- *
- * What this API does not do:
- * - Provide exclusion between various callers - including callers that
- * operation on other mappings of the same physical page
- * - Restore default attributes when a page is freed
- * - Guarantee that mappings other than the requested one are
- * in any state, other than that these do not violate rules for
- * the CPU you have. Do not depend on any effects on other mappings,
- * CPUs other than the one you have may have more relaxed rules.
- * The caller is required to take care of these.
- */
-
-int _set_memory_uc(unsigned long addr, int numpages);
-int _set_memory_wc(unsigned long addr, int numpages);
-int _set_memory_wt(unsigned long addr, int numpages);
-int _set_memory_wb(unsigned long addr, int numpages);
-int set_memory_uc(unsigned long addr, int numpages);
-int set_memory_wc(unsigned long addr, int numpages);
-int set_memory_wt(unsigned long addr, int numpages);
-int set_memory_wb(unsigned long addr, int numpages);
-int set_memory_x(unsigned long addr, int numpages);
-int set_memory_nx(unsigned long addr, int numpages);
-int set_memory_ro(unsigned long addr, int numpages);
-int set_memory_rw(unsigned long addr, int numpages);
-int set_memory_np(unsigned long addr, int numpages);
-int set_memory_4k(unsigned long addr, int numpages);
-
-int set_memory_array_uc(unsigned long *addr, int addrinarray);
-int set_memory_array_wc(unsigned long *addr, int addrinarray);
-int set_memory_array_wt(unsigned long *addr, int addrinarray);
-int set_memory_array_wb(unsigned long *addr, int addrinarray);
-
-int set_pages_array_uc(struct page **pages, int addrinarray);
-int set_pages_array_wc(struct page **pages, int addrinarray);
-int set_pages_array_wt(struct page **pages, int addrinarray);
-int set_pages_array_wb(struct page **pages, int addrinarray);
-
-/*
- * For legacy compatibility with the old APIs, a few functions
- * are provided that work on a "struct page".
- * These functions operate ONLY on the 1:1 kernel mapping of the
- * memory that the struct page represents, and internally just
- * call the set_memory_* function. See the description of the
- * set_memory_* function for more details on conventions.
- *
- * These APIs should be considered *deprecated* and are likely going to
- * be removed in the future.
- * The reason for this is the implicit operation on the 1:1 mapping only,
- * making this not a generally useful API.
- *
- * Specifically, many users of the old APIs had a virtual address,
- * called virt_to_page() or vmalloc_to_page() on that address to
- * get a struct page* that the old API required.
- * To convert these cases, use set_memory_*() on the original
- * virtual address, do not use these functions.
- */
-
-int set_pages_uc(struct page *page, int numpages);
-int set_pages_wb(struct page *page, int numpages);
-int set_pages_x(struct page *page, int numpages);
-int set_pages_nx(struct page *page, int numpages);
-int set_pages_ro(struct page *page, int numpages);
-int set_pages_rw(struct page *page, int numpages);
-
-
void clflush_cache_range(void *addr, unsigned int size);
#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
-extern int kernel_set_to_readonly;
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
-
#endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
new file mode 100644
index 000000000000..eaec6c364e42
--- /dev/null
+++ b/arch/x86/include/asm/set_memory.h
@@ -0,0 +1,87 @@
+#ifndef _ASM_X86_SET_MEMORY_H
+#define _ASM_X86_SET_MEMORY_H
+
+#include <asm/page.h>
+#include <asm-generic/set_memory.h>
+
+/*
+ * The set_memory_* API can be used to change various attributes of a virtual
+ * address range. The attributes include:
+ * Cachability : UnCached, WriteCombining, WriteThrough, WriteBack
+ * Executability : eXeutable, NoteXecutable
+ * Read/Write : ReadOnly, ReadWrite
+ * Presence : NotPresent
+ *
+ * Within a category, the attributes are mutually exclusive.
+ *
+ * The implementation of this API will take care of various aspects that
+ * are associated with changing such attributes, such as:
+ * - Flushing TLBs
+ * - Flushing CPU caches
+ * - Making sure aliases of the memory behind the mapping don't violate
+ * coherency rules as defined by the CPU in the system.
+ *
+ * What this API does not do:
+ * - Provide exclusion between various callers - including callers that
+ * operation on other mappings of the same physical page
+ * - Restore default attributes when a page is freed
+ * - Guarantee that mappings other than the requested one are
+ * in any state, other than that these do not violate rules for
+ * the CPU you have. Do not depend on any effects on other mappings,
+ * CPUs other than the one you have may have more relaxed rules.
+ * The caller is required to take care of these.
+ */
+
+int _set_memory_uc(unsigned long addr, int numpages);
+int _set_memory_wc(unsigned long addr, int numpages);
+int _set_memory_wt(unsigned long addr, int numpages);
+int _set_memory_wb(unsigned long addr, int numpages);
+int set_memory_uc(unsigned long addr, int numpages);
+int set_memory_wc(unsigned long addr, int numpages);
+int set_memory_wt(unsigned long addr, int numpages);
+int set_memory_wb(unsigned long addr, int numpages);
+int set_memory_np(unsigned long addr, int numpages);
+int set_memory_4k(unsigned long addr, int numpages);
+
+int set_memory_array_uc(unsigned long *addr, int addrinarray);
+int set_memory_array_wc(unsigned long *addr, int addrinarray);
+int set_memory_array_wt(unsigned long *addr, int addrinarray);
+int set_memory_array_wb(unsigned long *addr, int addrinarray);
+
+int set_pages_array_uc(struct page **pages, int addrinarray);
+int set_pages_array_wc(struct page **pages, int addrinarray);
+int set_pages_array_wt(struct page **pages, int addrinarray);
+int set_pages_array_wb(struct page **pages, int addrinarray);
+
+/*
+ * For legacy compatibility with the old APIs, a few functions
+ * are provided that work on a "struct page".
+ * These functions operate ONLY on the 1:1 kernel mapping of the
+ * memory that the struct page represents, and internally just
+ * call the set_memory_* function. See the description of the
+ * set_memory_* function for more details on conventions.
+ *
+ * These APIs should be considered *deprecated* and are likely going to
+ * be removed in the future.
+ * The reason for this is the implicit operation on the 1:1 mapping only,
+ * making this not a generally useful API.
+ *
+ * Specifically, many users of the old APIs had a virtual address,
+ * called virt_to_page() or vmalloc_to_page() on that address to
+ * get a struct page* that the old API required.
+ * To convert these cases, use set_memory_*() on the original
+ * virtual address, do not use these functions.
+ */
+
+int set_pages_uc(struct page *page, int numpages);
+int set_pages_wb(struct page *page, int numpages);
+int set_pages_x(struct page *page, int numpages);
+int set_pages_nx(struct page *page, int numpages);
+int set_pages_ro(struct page *page, int numpages);
+int set_pages_rw(struct page *page, int numpages);
+
+extern int kernel_set_to_readonly;
+void set_kernel_text_rw(void);
+void set_kernel_text_ro(void);
+
+#endif /* _ASM_X86_SET_MEMORY_H */
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index df083efe6ee0..815dd63f49d0 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -36,7 +36,7 @@
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/amd_nb.h>
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c36140d788fe..ee8f11800295 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -16,7 +16,7 @@
#ifdef CONFIG_X86_64
# include <asm/mmconfig.h>
-# include <asm/cacheflush.h>
+# include <asm/set_memory.h>
#endif
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a44ef52184df..0af86d9242da 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,7 +17,7 @@
#include <asm/paravirt.h>
#include <asm/alternative.h>
#include <asm/pgtable.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
void __init check_bugs(void)
{
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3fe8003025ac..9dd546bcd0a7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,7 +24,7 @@
#include <trace/syscall.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/kprobes.h>
#include <asm/ftrace.h>
#include <asm/nops.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 5f43cec296c5..8c53c5d7a1bc 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -23,7 +23,7 @@
#include <asm/io_apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b41ee7a91fe9..a0bacd25c9cd 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -27,6 +27,7 @@
#include <asm/debugreg.h>
#include <asm/kexec-bzimage64.h>
#include <asm/setup.h>
+#include <asm/set_memory.h>
#ifdef CONFIG_KEXEC_FILE
static struct kexec_file_ops *kexec_file_loaders[] = {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..f67bd3205df7 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -85,7 +85,7 @@ void *module_alloc(unsigned long size)
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
- MODULES_END, GFP_KERNEL | __GFP_HIGHMEM,
+ MODULES_END, GFP_KERNEL,
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bad6a25067bc..d2a892fc92bf 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -177,8 +177,8 @@ static void recalculate_apic_map(struct kvm *kvm)
if (kvm_apic_present(vcpu))
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
- new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
- sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
+ new = kvzalloc(sizeof(struct kvm_apic_map) +
+ sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
if (!new)
goto out;
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 60168cdd0546..ea67dc876316 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -40,8 +40,8 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
int i;
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
- slot->arch.gfn_track[i] = kvm_kvzalloc(npages *
- sizeof(*slot->arch.gfn_track[i]));
+ slot->arch.gfn_track[i] = kvzalloc(npages *
+ sizeof(*slot->arch.gfn_track[i]), GFP_KERNEL);
if (!slot->arch.gfn_track[i])
goto track_free;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6bc47e2712c8..1f5263a8d9e0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8197,13 +8197,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
slot->base_gfn, level) + 1;
slot->arch.rmap[i] =
- kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
+ kvzalloc(lpages * sizeof(*slot->arch.rmap[i]), GFP_KERNEL);
if (!slot->arch.rmap[i])
goto out_free;
if (i == 0)
continue;
- linfo = kvm_kvzalloc(lpages * sizeof(*linfo));
+ linfo = kvzalloc(lpages * sizeof(*linfo), GFP_KERNEL);
if (!linfo)
goto out_free;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 22af912d66d2..adbfb095bade 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -5,7 +5,7 @@
#include <linux/memblock.h>
#include <linux/bootmem.h> /* for max_low_pfn */
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/e820.h>
#include <asm/init.h>
#include <asm/page.h>
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 601b8e04e5c6..2e964a907c98 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -48,7 +48,7 @@
#include <asm/sections.h>
#include <asm/paravirt.h>
#include <asm/setup.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/page_types.h>
#include <asm/init.h>
@@ -823,24 +823,57 @@ void __init mem_init(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type)
{
struct pglist_data *pgdata = NODE_DATA(nid);
- struct zone *zone = pgdata->node_zones +
- zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ bool for_device = false;
+ struct zone *zone;
+
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ break;
+ case MEMORY_DEVICE_PERSISTENT:
+ for_device = true;
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
+ zone = pgdata->node_zones +
+ zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
return __add_pages(nid, zone, start_pfn, nr_pages);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+int arch_remove_memory(u64 start, u64 size, enum memory_type type)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type. In particular, MEMORY_DEVICE_UNADDRESSABLE
+ * is not supported on this architecture.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ case MEMORY_DEVICE_PERSISTENT:
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
zone = page_zone(pfn_to_page(start_pfn));
return __remove_pages(zone, start_pfn, nr_pages);
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7bdda6f1d135..1ca42e235037 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -50,7 +50,7 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/numa.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/init.h>
#include <asm/uv/uv.h>
#include <asm/setup.h>
@@ -686,16 +686,45 @@ static void update_end_of_memory_vars(u64 start, u64 size)
* Memory is added always to NORMAL zone. This means you will never get
* additional DMA/DMA32 memory.
*/
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type)
{
struct pglist_data *pgdat = NODE_DATA(nid);
- struct zone *zone = pgdat->node_zones +
- zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ bool for_device = false;
+ struct zone *zone;
int ret;
- init_memory_mapping(start, start + size);
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ break;
+ case MEMORY_DEVICE_PERSISTENT:
+ case MEMORY_DEVICE_UNADDRESSABLE:
+ for_device = true;
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
+ zone = pgdat->node_zones +
+ zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
+
+ /*
+ * We get un-addressable memory when some one is adding a ZONE_DEVICE
+ * to have struct page for a device memory which is not accessible by
+ * the CPU so it is pointless to have a linear kernel mapping of such
+ * memory.
+ *
+ * Core mm should make sure it never set a pte pointing to such fake
+ * physical range.
+ */
+ if (type != MEMORY_DEVICE_UNADDRESSABLE)
+ init_memory_mapping(start, start + size);
ret = __add_pages(nid, zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
@@ -1035,7 +1064,7 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
remove_pagetable(start, end, true);
}
-int __ref arch_remove_memory(u64 start, u64 size)
+int __ref arch_remove_memory(u64 start, u64 size, enum memory_type type)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -1044,6 +1073,20 @@ int __ref arch_remove_memory(u64 start, u64 size)
struct zone *zone;
int ret;
+ /*
+ * Each memory_type needs special handling, so error out on an
+ * unsupported type.
+ */
+ switch (type) {
+ case MEMORY_NORMAL:
+ case MEMORY_DEVICE_PERSISTENT:
+ case MEMORY_DEVICE_UNADDRESSABLE:
+ break;
+ default:
+ pr_err("hotplug unsupported memory type %d\n", type);
+ return -EINVAL;
+ }
+
/* With altmap the first mapped page is offset from @start */
altmap = to_vmem_altmap((unsigned long) page);
if (altmap)
@@ -1051,7 +1094,9 @@ int __ref arch_remove_memory(u64 start, u64 size)
zone = page_zone(page);
ret = __remove_pages(zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
- kernel_physical_mapping_remove(start, start + size);
+
+ if (type != MEMORY_DEVICE_UNADDRESSABLE)
+ kernel_physical_mapping_remove(start, start + size);
return ret;
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a5e1cda85974..1924c4ab8be5 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -13,7 +13,7 @@
#include <linux/vmalloc.h>
#include <linux/mmiotrace.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/e820.h>
#include <asm/fixmap.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index b5949017dead..4f6e9059082f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -24,6 +24,7 @@
#include <asm/pgalloc.h>
#include <asm/proto.h>
#include <asm/pat.h>
+#include <asm/set_memory.h>
/*
* The current flushing context - we pass it instead of 5 arguments:
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 32322ce9b405..a63e6af4afb5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -12,6 +12,7 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <linux/bpf.h>
int bpf_jit_enable __read_mostly;
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 1d97cea3b3a4..4d510a601244 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -9,7 +9,7 @@
#include <linux/uaccess.h>
#include <asm/pci_x86.h>
#include <asm/pci-functions.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
/* BIOS32 signature: "_32_" */
#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 565dff3c9a12..6867d884cebb 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -48,7 +48,7 @@
#include <asm/setup.h>
#include <asm/efi.h>
#include <asm/time.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/tlbflush.h>
#include <asm/x86_init.h>
#include <asm/uv/uv.h>
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 5db706f14111..a163a90af4aa 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -2,7 +2,7 @@
#include <linux/slab.h>
#include <linux/memblock.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/pgtable.h>
#include <asm/realmode.h>
#include <asm/tlbflush.h>
diff --git a/certs/blacklist.c b/certs/blacklist.c
index 3eddce0e307a..3a507b9e2568 100644
--- a/certs/blacklist.c
+++ b/certs/blacklist.c
@@ -140,7 +140,7 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type)
EXPORT_SYMBOL_GPL(is_hash_blacklisted);
/*
- * Intialise the blacklist
+ * Initialise the blacklist
*/
static int __init blacklist_init(void)
{
diff --git a/crypto/lzo.c b/crypto/lzo.c
index 168df784da84..218567d717d6 100644
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
@@ -32,9 +32,7 @@ static void *lzo_alloc_ctx(struct crypto_scomp *tfm)
{
void *ctx;
- ctx = kmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
- if (!ctx)
- ctx = vmalloc(LZO1X_MEM_COMPRESS);
+ ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 7207e5fc9d3d..2c462beee551 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -513,7 +513,7 @@ retry:
if (i < erst_record_id_cache.len)
goto retry;
if (erst_record_id_cache.len >= erst_record_id_cache.size) {
- int new_size, alloc_size;
+ int new_size;
u64 *new_entries;
new_size = erst_record_id_cache.size * 2;
@@ -524,11 +524,7 @@ retry:
pr_warn(FW_WARN "too many record IDs!\n");
return 0;
}
- alloc_size = new_size * sizeof(entries[0]);
- if (alloc_size < PAGE_SIZE)
- new_entries = kmalloc(alloc_size, GFP_KERNEL);
- else
- new_entries = vmalloc(alloc_size);
+ new_entries = kvmalloc(new_size * sizeof(entries[0]), GFP_KERNEL);
if (!new_entries)
return -ENOMEM;
memcpy(new_entries, entries,
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 3ca0729f7e0e..992dda9b8338 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -95,7 +95,7 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
/* pcc mapped address + header size + offset within PCC subspace */
#define GET_PCC_VADDR(offs) (pcc_data.pcc_comm_addr + 0x8 + (offs))
-/* Check if a CPC regsiter is in PCC */
+/* Check if a CPC register is in PCC */
#define CPC_IN_PCC(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
(cpc)->cpc_entry.reg.space_id == \
ACPI_ADR_SPACE_PLATFORM_COMM)
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index dece26f119d4..a804a4107fbc 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -409,7 +409,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
new_pages = kzalloc(bytes, GFP_NOIO | __GFP_NOWARN);
if (!new_pages) {
new_pages = __vmalloc(bytes,
- GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO,
+ GFP_NOIO | __GFP_ZERO,
PAGE_KERNEL);
if (!new_pages)
return NULL;
diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c
index 3661a51e93e2..5fbd333e4c6d 100644
--- a/drivers/char/agp/amd-k7-agp.c
+++ b/drivers/char/agp/amd-k7-agp.c
@@ -9,6 +9,7 @@
#include <linux/page-flags.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <asm/set_memory.h>
#include "agp.h"
#define AMD_MMBASE_BAR 1
diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index 75a9786a77e6..0b5ec7af2414 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/agp_backend.h>
#include <asm/agp.h>
+#include <asm/set_memory.h>
#include "agp.h"
#define ATI_GART_MMBASE_BAR 1
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index f002fa5d1887..658664a5a5aa 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -39,7 +39,9 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <asm/io.h>
-#include <asm/cacheflush.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
#include <asm/pgtable.h>
#include "agp.h"
@@ -88,13 +90,7 @@ static int agp_get_key(void)
void agp_alloc_page_array(size_t size, struct agp_memory *mem)
{
- mem->pages = NULL;
-
- if (size <= 2*PAGE_SIZE)
- mem->pages = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (mem->pages == NULL) {
- mem->pages = vmalloc(size);
- }
+ mem->pages = kvmalloc(size, GFP_KERNEL);
}
EXPORT_SYMBOL(agp_alloc_page_array);
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 7fcc2a9d1d5a..9b6b6023193b 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -25,6 +25,7 @@
#include "agp.h"
#include "intel-agp.h"
#include <drm/intel-gtt.h>
+#include <asm/set_memory.h>
/*
* If we have Intel graphics, we're not going to have anything other than
diff --git a/drivers/char/agp/sworks-agp.c b/drivers/char/agp/sworks-agp.c
index 9b163b49d976..03be4ac79b0d 100644
--- a/drivers/char/agp/sworks-agp.c
+++ b/drivers/char/agp/sworks-agp.c
@@ -9,6 +9,7 @@
#include <linux/slab.h>
#include <linux/jiffies.h>
#include <linux/agp_backend.h>
+#include <asm/set_memory.h>
#include "agp.h"
#define SVWRKS_COMMAND 0x04
diff --git a/drivers/char/dsp56k.c b/drivers/char/dsp56k.c
index 50aa9ba91f25..0d7b577e0ff0 100644
--- a/drivers/char/dsp56k.c
+++ b/drivers/char/dsp56k.c
@@ -489,7 +489,7 @@ static const struct file_operations dsp56k_fops = {
/****** Init and module functions ******/
-static char banner[] __initdata = KERN_INFO "DSP56k driver installed\n";
+static const char banner[] __initconst = KERN_INFO "DSP56k driver installed\n";
static int __init dsp56k_init_driver(void)
{
diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
index 03f9d316f969..d523991c945f 100644
--- a/drivers/clk/qcom/common.c
+++ b/drivers/clk/qcom/common.c
@@ -128,7 +128,7 @@ static void qcom_cc_gdsc_unregister(void *data)
/*
* Backwards compatibility with old DTs. Register a pass-through factor 1/1
- * clock to translate 'path' clk into 'name' clk and regsiter the 'path'
+ * clock to translate 'path' clk into 'name' clk and register the 'path'
* clk as a fixed rate clock if it isn't present.
*/
static int _qcom_cc_register_board_clk(struct device *dev, const char *path,
diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c
index 0b5bf135b090..062d71434e47 100644
--- a/drivers/cpufreq/powernow-k8.c
+++ b/drivers/cpufreq/powernow-k8.c
@@ -1171,7 +1171,8 @@ static struct cpufreq_driver cpufreq_amd64_driver = {
static void __request_acpi_cpufreq(void)
{
- const char *cur_drv, *drv = "acpi-cpufreq";
+ const char drv[] = "acpi-cpufreq";
+ const char *cur_drv;
cur_drv = cpufreq_get_current_driver();
if (!cur_drv)
diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c
index a7db9011d5fe..d2d0430d09d4 100644
--- a/drivers/cpufreq/sti-cpufreq.c
+++ b/drivers/cpufreq/sti-cpufreq.c
@@ -236,7 +236,7 @@ use_defaults:
return 0;
}
-static int sti_cpufreq_fetch_syscon_regsiters(void)
+static int sti_cpufreq_fetch_syscon_registers(void)
{
struct device *dev = ddata.cpu;
struct device_node *np = dev->of_node;
@@ -275,7 +275,7 @@ static int sti_cpufreq_init(void)
goto skip_voltage_scaling;
}
- ret = sti_cpufreq_fetch_syscon_regsiters();
+ ret = sti_cpufreq_fetch_syscon_registers();
if (ret)
goto skip_voltage_scaling;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 6d691abe889c..2ee327d69775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -27,6 +27,9 @@
*/
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
#include "amdgpu.h"
/*
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index a0ea3241c651..1f178b878e42 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -2446,7 +2446,7 @@ EXPORT_SYMBOL(drm_fb_helper_hotplug_event);
int __init drm_fb_helper_modinit(void)
{
#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EXPERT)
- const char *name = "fbcon";
+ const char name[] = "fbcon";
struct module *fbcon;
mutex_lock(&module_mutex);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index d019b5e311cc..2d955d7d7b6d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -161,8 +161,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
file_size += sizeof(*iter.hdr) * n_obj;
/* Allocate the file in vmalloc memory, it's likely to be big */
- iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_HIGHMEM |
- __GFP_NOWARN | __GFP_NORETRY, PAGE_KERNEL);
+ iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+ PAGE_KERNEL);
if (!iter.start) {
dev_warn(gpu->dev, "failed to allocate devcoredump file\n");
return;
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index 3f4f424196b2..3949b0990916 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -21,6 +21,7 @@
#include <drm/drmP.h>
#include <linux/shmem_fs.h>
+#include <asm/set_memory.h>
#include "psb_drv.h"
#include "blitter.h"
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 5ee93ff55608..1f9b35afefee 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -35,6 +35,7 @@
#include <linux/pm_runtime.h>
#include <acpi/video.h>
#include <linux/module.h>
+#include <asm/set_memory.h>
static struct drm_driver driver;
static int psb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8bab4aea63e6..2aa6b97fd22f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -31,6 +31,8 @@
#include <linux/seq_file.h>
#include <linux/stop_machine.h>
+#include <asm/set_memory.h>
+
#include <drm/drmP.h>
#include <drm/i915_drm.h>
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index ca5397beb357..2170534101ca 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -568,9 +568,7 @@ u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
size *= nmemb;
- mem = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (!mem)
- mem = vmalloc(size);
+ mem = kvmalloc(size, GFP_KERNEL);
if (!mem)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index c4777c8d0312..0b3ec35515f3 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -27,6 +27,9 @@
*/
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
#include "radeon.h"
/*
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index a37de5db5731..eeddc1e48409 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -51,6 +51,9 @@
#if IS_ENABLED(CONFIG_AGP)
#include <asm/agp.h>
#endif
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
#define NUM_PAGES_TO_ALLOC (PAGE_SIZE/sizeof(struct page *))
#define SMALL_ALLOCATION 16
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index cec4b4baa179..90ddbdca93bd 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -53,6 +53,9 @@
#if IS_ENABLED(CONFIG_AGP)
#include <asm/agp.h>
#endif
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
#define NUM_PAGES_TO_ALLOC (PAGE_SIZE/sizeof(struct page *))
#define SMALL_ALLOCATION 4
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index aee3c00f836e..5260179d788a 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -44,6 +44,9 @@
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_page_alloc.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
/**
* Allocates storage for pointers to the pages that back the ttm.
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index e88afe1a435c..dbbe31df74df 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -27,7 +27,9 @@
#include <linux/io.h>
#include <linux/dma-mapping.h>
-#include <asm/cacheflush.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
#include "intel_th.h"
#include "msu.h"
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 5c48dbf43c1a..0f7e85c7b70b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -1726,7 +1726,7 @@ int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M,
MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT);
- /* DMA momery regsiter */
+ /* DMA memory register */
if (mr->type == MR_TYPE_DMA)
return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 4139abee3b54..f0dfcdd3f0f0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -204,7 +204,7 @@ int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift,
return 0;
}
- /* Note: if page_shift is zero, FAST memory regsiter */
+ /* Note: if page_shift is zero, FAST memory register */
mtt->page_shift = page_shift;
/* Compute MTT entry necessary */
diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c
index 549b315ca8fe..f53c8cda1bde 100644
--- a/drivers/leds/leds-lp5521.c
+++ b/drivers/leds/leds-lp5521.c
@@ -281,7 +281,7 @@ static void lp5521_firmware_loaded(struct lp55xx_chip *chip)
}
/*
- * Program momery sequence
+ * Program memory sequence
* 1) set engine mode to "LOAD"
* 2) write firmware data into program memory
*/
diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index c5b30f06218a..e9ba8cd32d66 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -387,7 +387,7 @@ static void lp5523_firmware_loaded(struct lp55xx_chip *chip)
}
/*
- * Program momery sequence
+ * Program memory sequence
* 1) set engine mode to "LOAD"
* 2) write firmware data into program memory
*/
diff --git a/drivers/leds/leds-lp5562.c b/drivers/leds/leds-lp5562.c
index b75333803a63..90892585bcb5 100644
--- a/drivers/leds/leds-lp5562.c
+++ b/drivers/leds/leds-lp5562.c
@@ -270,7 +270,7 @@ static void lp5562_firmware_loaded(struct lp55xx_chip *chip)
}
/*
- * Program momery sequence
+ * Program memory sequence
* 1) set engine mode to "LOAD"
* 2) write firmware data into program memory
*/
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 85e3f21c2514..e57353e39168 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -767,16 +767,12 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
}
n = d->nr_stripes * sizeof(atomic_t);
- d->stripe_sectors_dirty = n < PAGE_SIZE << 6
- ? kzalloc(n, GFP_KERNEL)
- : vzalloc(n);
+ d->stripe_sectors_dirty = kvzalloc(n, GFP_KERNEL);
if (!d->stripe_sectors_dirty)
return -ENOMEM;
n = BITS_TO_LONGS(d->nr_stripes) * sizeof(unsigned long);
- d->full_dirty_stripes = n < PAGE_SIZE << 6
- ? kzalloc(n, GFP_KERNEL)
- : vzalloc(n);
+ d->full_dirty_stripes = kvzalloc(n, GFP_KERNEL);
if (!d->full_dirty_stripes)
return -ENOMEM;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 5d13930f0f22..cb8d2ccbb6c6 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -43,11 +43,7 @@ struct closure;
(heap)->used = 0; \
(heap)->size = (_size); \
_bytes = (heap)->size * sizeof(*(heap)->data); \
- (heap)->data = NULL; \
- if (_bytes < KMALLOC_MAX_SIZE) \
- (heap)->data = kmalloc(_bytes, (gfp)); \
- if ((!(heap)->data) && ((gfp) & GFP_KERNEL)) \
- (heap)->data = vmalloc(_bytes); \
+ (heap)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \
(heap)->data; \
})
@@ -136,12 +132,8 @@ do { \
\
(fifo)->mask = _allocated_size - 1; \
(fifo)->front = (fifo)->back = 0; \
- (fifo)->data = NULL; \
\
- if (_bytes < KMALLOC_MAX_SIZE) \
- (fifo)->data = kmalloc(_bytes, (gfp)); \
- if ((!(fifo)->data) && ((gfp) & GFP_KERNEL)) \
- (fifo)->data = vmalloc(_bytes); \
+ (fifo)->data = kvmalloc(_bytes, (gfp) & GFP_KERNEL); \
(fifo)->data; \
})
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 280b56c229d6..9407803b79df 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -406,7 +406,7 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
if (gfp_mask & __GFP_NORETRY)
noio_flag = memalloc_noio_save();
- ptr = __vmalloc(c->block_size, gfp_mask | __GFP_HIGHMEM, PAGE_KERNEL);
+ ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
if (gfp_mask & __GFP_NORETRY)
memalloc_noio_restore(noio_flag);
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 4da6fc6b1ffd..4951bf99dfb1 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1699,6 +1699,7 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
struct dm_ioctl *dmi;
int secure_data;
const size_t minimum_data_size = offsetof(struct dm_ioctl, data);
+ unsigned noio_flag;
if (copy_from_user(param_kernel, user, minimum_data_size))
return -EFAULT;
@@ -1721,15 +1722,9 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
* Use kmalloc() rather than vmalloc() when we can.
*/
dmi = NULL;
- if (param_kernel->data_size <= KMALLOC_MAX_SIZE)
- dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
-
- if (!dmi) {
- unsigned noio_flag;
- noio_flag = memalloc_noio_save();
- dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_HIGH | __GFP_HIGHMEM, PAGE_KERNEL);
- memalloc_noio_restore(noio_flag);
- }
+ noio_flag = memalloc_noio_save();
+ dmi = kvmalloc(param_kernel->data_size, GFP_KERNEL);
+ memalloc_noio_restore(noio_flag);
if (!dmi) {
if (secure_data && clear_user(user, param_kernel->data_size))
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
index 0250e7e521ab..6028d8247f58 100644
--- a/drivers/md/dm-stats.c
+++ b/drivers/md/dm-stats.c
@@ -146,12 +146,7 @@ static void *dm_kvzalloc(size_t alloc_size, int node)
if (!claim_shared_memory(alloc_size))
return NULL;
- if (alloc_size <= KMALLOC_MAX_SIZE) {
- p = kzalloc_node(alloc_size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
- if (p)
- return p;
- }
- p = vzalloc_node(alloc_size, node);
+ p = kvzalloc_node(alloc_size, GFP_KERNEL | __GFP_NOMEMALLOC, node);
if (p)
return p;
diff --git a/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h b/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
index 354ec07eae87..23ae72468025 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drx_dap_fasi.h
@@ -70,7 +70,7 @@
* (3) both long and short but short preferred and long only when necesarry
*
* These modes must be selected compile time via compile switches.
-* Compile switch settings for the diffrent modes:
+* Compile switch settings for the different modes:
* (1) DRXDAPFASI_LONG_ADDR_ALLOWED=0, DRXDAPFASI_SHORT_ADDR_ALLOWED=1
* (2) DRXDAPFASI_LONG_ADDR_ALLOWED=1, DRXDAPFASI_SHORT_ADDR_ALLOWED=0
* (3) DRXDAPFASI_LONG_ADDR_ALLOWED=1, DRXDAPFASI_SHORT_ADDR_ALLOWED=1
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
index ac522417c462..3d528a13b8fc 100644
--- a/drivers/misc/sram-exec.c
+++ b/drivers/misc/sram-exec.c
@@ -16,9 +16,10 @@
#include <linux/device.h>
#include <linux/genalloc.h>
+#include <linux/mm.h>
#include <linux/sram.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include "sram.h"
diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c
index 4ef07d97156d..602c19e23f05 100644
--- a/drivers/net/can/rcar/rcar_canfd.c
+++ b/drivers/net/can/rcar/rcar_canfd.c
@@ -413,7 +413,7 @@
/* RSCFDnRPGACCr */
#define RCANFD_C_RPGACC(r) (0x1900 + (0x04 * (r)))
-/* CAN FD mode specific regsiter map */
+/* CAN FD mode specific register map */
/* RSCFDnCFDCmXXX -> RCANFD_F_XXX(m) */
#define RCANFD_F_DCFG(m) (0x0500 + (0x20 * (m)))
diff --git a/drivers/net/ethernet/amd/amd8111e.h b/drivers/net/ethernet/amd/amd8111e.h
index 7cdb18512407..2a57b46fd6a6 100644
--- a/drivers/net/ethernet/amd/amd8111e.h
+++ b/drivers/net/ethernet/amd/amd8111e.h
@@ -48,7 +48,7 @@ eg., if the value 10011010b is written into the least significant byte of a comm
/* 32 bit registers */
#define ASF_STAT 0x00 /* ASF status register */
-#define CHIPID 0x04 /* Chip ID regsiter */
+#define CHIPID 0x04 /* Chip ID register */
#define MIB_DATA 0x10 /* MIB data register */
#define MIB_ADDR 0x14 /* MIB address register */
#define STAT0 0x30 /* Status0 register */
@@ -648,7 +648,7 @@ typedef enum {
/* driver ioctl parameters */
#define AMD8111E_REG_DUMP_LEN 13*sizeof(u32)
-/* amd8111e desriptor format */
+/* amd8111e descriptor format */
struct amd8111e_tx_dr{
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index 796c37a5bbde..c5b81268c284 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -42,8 +42,8 @@
*/
-static char version[] = "atarilance.c: v1.3 04/04/96 "
- "Roman.Hodek@informatik.uni-erlangen.de\n";
+static const char version[] = "atarilance.c: v1.3 04/04/96 "
+ "Roman.Hodek@informatik.uni-erlangen.de\n";
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 6c98901f1b89..82cc81385033 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -72,7 +72,7 @@
#include <asm/dec/machtype.h>
#include <asm/dec/system.h>
-static char version[] =
+static const char version[] =
"declance.c: v0.011 by Linux MIPS DECstation task force\n";
MODULE_AUTHOR("Linux MIPS DECstation task force");
diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c
index 12bb4f1489fc..77b1db267730 100644
--- a/drivers/net/ethernet/amd/sun3lance.c
+++ b/drivers/net/ethernet/amd/sun3lance.c
@@ -21,7 +21,8 @@
*/
-static char *version = "sun3lance.c: v1.2 1/12/2001 Sam Creasey (sammy@sammy.net)\n";
+static const char version[] =
+"sun3lance.c: v1.2 1/12/2001 Sam Creasey (sammy@sammy.net)\n";
#include <linux/module.h>
#include <linux/stddef.h>
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
index a8b80c56ac25..73efdb05a490 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_hw.c
@@ -307,7 +307,7 @@ void atl1c_start_phy_polling(struct atl1c_hw *hw, u16 clk_sel)
/*
* atl1c_read_phy_core
- * core function to read register in PHY via MDIO control regsiter.
+ * core function to read register in PHY via MDIO control register.
* ext: extension register (see IEEE 802.3)
* dev: device address (see IEEE 802.3 DEVAD, PRTAD is fixed to 0)
* reg: reg to read
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index cea6bdcde33f..8baf9d3eb4b1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -1591,7 +1591,7 @@ static int __bnx2x_vlan_mac_execute_step(struct bnx2x *bp,
if (rc != 0) {
__bnx2x_vlan_mac_h_pend(bp, o, *ramrod_flags);
- /* Calling function should not diffrentiate between this case
+ /* Calling function should not differentiate between this case
* and the case in which there is already a pending ramrod
*/
rc = 1;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h
index 920d918ed193..f04e81f33795 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_defs.h
@@ -41,9 +41,6 @@
#define VALIDATE_TID 1
-void *cxgb_alloc_mem(unsigned long size);
-void cxgb_free_mem(void *addr);
-
/*
* Map an ATID or STID to their entries in the corresponding TID tables.
*/
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index 76684dcb874c..fa81445e334c 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
@@ -1152,27 +1152,6 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new,
}
/*
- * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
- * The allocated memory is cleared.
- */
-void *cxgb_alloc_mem(unsigned long size)
-{
- void *p = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
-
- if (!p)
- p = vzalloc(size);
- return p;
-}
-
-/*
- * Free memory allocated through t3_alloc_mem().
- */
-void cxgb_free_mem(void *addr)
-{
- kvfree(addr);
-}
-
-/*
* Allocate and initialize the TID tables. Returns 0 on success.
*/
static int init_tid_tabs(struct tid_info *t, unsigned int ntids,
@@ -1182,7 +1161,7 @@ static int init_tid_tabs(struct tid_info *t, unsigned int ntids,
unsigned long size = ntids * sizeof(*t->tid_tab) +
natids * sizeof(*t->atid_tab) + nstids * sizeof(*t->stid_tab);
- t->tid_tab = cxgb_alloc_mem(size);
+ t->tid_tab = kvzalloc(size, GFP_KERNEL);
if (!t->tid_tab)
return -ENOMEM;
@@ -1218,7 +1197,7 @@ static int init_tid_tabs(struct tid_info *t, unsigned int ntids,
static void free_tid_maps(struct tid_info *t)
{
- cxgb_free_mem(t->tid_tab);
+ kvfree(t->tid_tab);
}
static inline void add_adapter(struct adapter *adap)
@@ -1293,7 +1272,7 @@ int cxgb3_offload_activate(struct adapter *adapter)
return 0;
out_free_l2t:
- t3_free_l2t(l2td);
+ kvfree(l2td);
out_free:
kfree(t);
return err;
@@ -1302,7 +1281,7 @@ out_free:
static void clean_l2_data(struct rcu_head *head)
{
struct l2t_data *d = container_of(head, struct l2t_data, rcu_head);
- t3_free_l2t(d);
+ kvfree(d);
}
diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
index 52063587e1e9..26264125865f 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
@@ -444,7 +444,7 @@ struct l2t_data *t3_init_l2t(unsigned int l2t_capacity)
struct l2t_data *d;
int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry);
- d = cxgb_alloc_mem(size);
+ d = kvzalloc(size, GFP_KERNEL);
if (!d)
return NULL;
@@ -462,9 +462,3 @@ struct l2t_data *t3_init_l2t(unsigned int l2t_capacity)
}
return d;
}
-
-void t3_free_l2t(struct l2t_data *d)
-{
- cxgb_free_mem(d);
-}
-
diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.h b/drivers/net/ethernet/chelsio/cxgb3/l2t.h
index 8cffcdfd5678..c2fd323c4078 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.h
@@ -115,7 +115,6 @@ int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb,
struct l2t_entry *e);
void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e);
struct l2t_data *t3_init_l2t(unsigned int l2t_capacity);
-void t3_free_l2t(struct l2t_data *d);
int cxgb3_ofld_send(struct t3cdev *dev, struct sk_buff *skb);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index 7ad43af6bde1..3103ef9b561d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -290,8 +290,8 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start,
if (clipt_size < CLIPT_MIN_HASH_BUCKETS)
return NULL;
- ctbl = t4_alloc_mem(sizeof(*ctbl) +
- clipt_size*sizeof(struct list_head));
+ ctbl = kvzalloc(sizeof(*ctbl) +
+ clipt_size*sizeof(struct list_head), GFP_KERNEL);
if (!ctbl)
return NULL;
@@ -305,9 +305,9 @@ struct clip_tbl *t4_init_clip_tbl(unsigned int clipt_start,
for (i = 0; i < ctbl->clipt_size; ++i)
INIT_LIST_HEAD(&ctbl->hash_list[i]);
- cl_list = t4_alloc_mem(clipt_size*sizeof(struct clip_entry));
+ cl_list = kvzalloc(clipt_size*sizeof(struct clip_entry), GFP_KERNEL);
if (!cl_list) {
- t4_free_mem(ctbl);
+ kvfree(ctbl);
return NULL;
}
ctbl->cl_list = (void *)cl_list;
@@ -326,8 +326,8 @@ void t4_cleanup_clip_tbl(struct adapter *adap)
if (ctbl) {
if (ctbl->cl_list)
- t4_free_mem(ctbl->cl_list);
- t4_free_mem(ctbl);
+ kvfree(ctbl->cl_list);
+ kvfree(ctbl);
}
}
EXPORT_SYMBOL(t4_cleanup_clip_tbl);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 163543b1ea0b..1d2be2dd19dd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1184,8 +1184,6 @@ extern const char cxgb4_driver_version[];
void t4_os_portmod_changed(const struct adapter *adap, int port_id);
void t4_os_link_changed(struct adapter *adap, int port_id, int link_stat);
-void *t4_alloc_mem(size_t size);
-
void t4_free_sge_resources(struct adapter *adap);
void t4_free_ofld_rxqs(struct adapter *adap, int n, struct sge_ofld_rxq *q);
irq_handler_t t4_intr_handler(struct adapter *adap);
@@ -1557,7 +1555,6 @@ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
int rateunit, int ratemode, int channel, int class,
int minrate, int maxrate, int weight, int pktsize);
void t4_sge_decode_idma_state(struct adapter *adapter, int state);
-void t4_free_mem(void *addr);
void t4_idma_monitor_init(struct adapter *adapter,
struct sge_idma_monitor_state *idma);
void t4_idma_monitor(struct adapter *adapter,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index f6e739da7bb7..1fa34b009891 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2634,7 +2634,7 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
if (count > avail - pos)
count = avail - pos;
- data = t4_alloc_mem(count);
+ data = kvzalloc(count, GFP_KERNEL);
if (!data)
return -ENOMEM;
@@ -2642,12 +2642,12 @@ static ssize_t mem_read(struct file *file, char __user *buf, size_t count,
ret = t4_memory_rw(adap, 0, mem, pos, count, data, T4_MEMORY_READ);
spin_unlock(&adap->win0_lock);
if (ret) {
- t4_free_mem(data);
+ kvfree(data);
return ret;
}
ret = copy_to_user(buf, data, count);
- t4_free_mem(data);
+ kvfree(data);
if (ret)
return -EFAULT;
@@ -2753,7 +2753,7 @@ static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf,
adap->sge.egr_sz, adap->sge.blocked_fl);
len += sprintf(buf + len, "\n");
size = simple_read_from_buffer(ubuf, count, ppos, buf, len);
- t4_free_mem(buf);
+ kvfree(buf);
return size;
}
@@ -2773,7 +2773,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf,
return err;
bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz);
- t4_free_mem(t);
+ kvfree(t);
return count;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 02f80febeb91..0ba7866c8259 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -969,7 +969,7 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
{
int i, err = 0;
struct adapter *adapter = netdev2adap(dev);
- u8 *buf = t4_alloc_mem(EEPROMSIZE);
+ u8 *buf = kvzalloc(EEPROMSIZE, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -980,7 +980,7 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
if (!err)
memcpy(data, buf + e->offset, e->len);
- t4_free_mem(buf);
+ kvfree(buf);
return err;
}
@@ -1009,7 +1009,7 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
if (aligned_offset != eeprom->offset || aligned_len != eeprom->len) {
/* RMW possibly needed for first or last words.
*/
- buf = t4_alloc_mem(aligned_len);
+ buf = kvzalloc(aligned_len, GFP_KERNEL);
if (!buf)
return -ENOMEM;
err = eeprom_rd_phys(adapter, aligned_offset, (u32 *)buf);
@@ -1037,7 +1037,7 @@ static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
err = t4_seeprom_wp(adapter, true);
out:
if (buf != data)
- t4_free_mem(buf);
+ kvfree(buf);
return err;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index afb0967d2ce6..b717c6d6b83a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -880,27 +880,6 @@ freeout:
return err;
}
-/*
- * Allocate a chunk of memory using kmalloc or, if that fails, vmalloc.
- * The allocated memory is cleared.
- */
-void *t4_alloc_mem(size_t size)
-{
- void *p = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
-
- if (!p)
- p = vzalloc(size);
- return p;
-}
-
-/*
- * Free memory allocated through alloc_mem().
- */
-void t4_free_mem(void *addr)
-{
- kvfree(addr);
-}
-
static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
void *accel_priv, select_queue_fallback_t fallback)
{
@@ -1299,7 +1278,7 @@ static int tid_init(struct tid_info *t)
max_ftids * sizeof(*t->ftid_tab) +
ftid_bmap_size * sizeof(long);
- t->tid_tab = t4_alloc_mem(size);
+ t->tid_tab = kvzalloc(size, GFP_KERNEL);
if (!t->tid_tab)
return -ENOMEM;
@@ -3441,7 +3420,7 @@ static int adap_init0(struct adapter *adap)
/* allocate memory to read the header of the firmware on the
* card
*/
- card_fw = t4_alloc_mem(sizeof(*card_fw));
+ card_fw = kvzalloc(sizeof(*card_fw), GFP_KERNEL);
/* Get FW from from /lib/firmware/ */
ret = request_firmware(&fw, fw_info->fw_mod_name,
@@ -3461,7 +3440,7 @@ static int adap_init0(struct adapter *adap)
/* Cleaning up */
release_firmware(fw);
- t4_free_mem(card_fw);
+ kvfree(card_fw);
if (ret < 0)
goto bye;
@@ -4457,9 +4436,9 @@ static void free_some_resources(struct adapter *adapter)
{
unsigned int i;
- t4_free_mem(adapter->l2t);
+ kvfree(adapter->l2t);
t4_cleanup_sched(adapter);
- t4_free_mem(adapter->tids.tid_tab);
+ kvfree(adapter->tids.tid_tab);
cxgb4_cleanup_tc_u32(adapter);
kfree(adapter->sge.egr_map);
kfree(adapter->sge.ingr_map);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index a1b19422b339..ef06ce8247ab 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -432,9 +432,9 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap)
for (i = 0; i < t->size; i++) {
struct cxgb4_link *link = &t->table[i];
- t4_free_mem(link->tid_map);
+ kvfree(link->tid_map);
}
- t4_free_mem(adap->tc_u32);
+ kvfree(adap->tc_u32);
}
struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap)
@@ -446,8 +446,8 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap)
if (!max_tids)
return NULL;
- t = t4_alloc_mem(sizeof(*t) +
- (max_tids * sizeof(struct cxgb4_link)));
+ t = kvzalloc(sizeof(*t) +
+ (max_tids * sizeof(struct cxgb4_link)), GFP_KERNEL);
if (!t)
return NULL;
@@ -458,7 +458,7 @@ struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap)
unsigned int bmap_size;
bmap_size = BITS_TO_LONGS(max_tids);
- link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size);
+ link->tid_map = kvzalloc(sizeof(unsigned long) * bmap_size, GFP_KERNEL);
if (!link->tid_map)
goto out_no_mem;
bitmap_zero(link->tid_map, max_tids);
@@ -471,11 +471,11 @@ out_no_mem:
struct cxgb4_link *link = &t->table[i];
if (link->tid_map)
- t4_free_mem(link->tid_map);
+ kvfree(link->tid_map);
}
if (t)
- t4_free_mem(t);
+ kvfree(t);
return NULL;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index 7c8c5b9a3c22..6f3692db29af 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -646,7 +646,7 @@ struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end)
if (l2t_size < L2T_MIN_HASH_BUCKETS)
return NULL;
- d = t4_alloc_mem(sizeof(*d) + l2t_size * sizeof(struct l2t_entry));
+ d = kvzalloc(sizeof(*d) + l2t_size * sizeof(struct l2t_entry), GFP_KERNEL);
if (!d)
return NULL;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index c9026352a842..02acff741f11 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -177,7 +177,7 @@ static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p)
}
list_del(&qe->list);
- t4_free_mem(qe);
+ kvfree(qe);
if (atomic_dec_and_test(&e->refcnt)) {
e->state = SCHED_STATE_UNUSED;
memset(&e->info, 0, sizeof(e->info));
@@ -201,7 +201,7 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
if (p->queue < 0 || p->queue >= pi->nqsets)
return -ERANGE;
- qe = t4_alloc_mem(sizeof(struct sched_queue_entry));
+ qe = kvzalloc(sizeof(struct sched_queue_entry), GFP_KERNEL);
if (!qe)
return -ENOMEM;
@@ -211,7 +211,7 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
/* Unbind queue from any existing class */
err = t4_sched_queue_unbind(pi, p);
if (err) {
- t4_free_mem(qe);
+ kvfree(qe);
goto out;
}
@@ -224,7 +224,7 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
spin_lock(&e->lock);
err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, true);
if (err) {
- t4_free_mem(qe);
+ kvfree(qe);
spin_unlock(&e->lock);
goto out;
}
@@ -512,7 +512,7 @@ struct sched_table *t4_init_sched(unsigned int sched_size)
struct sched_table *s;
unsigned int i;
- s = t4_alloc_mem(sizeof(*s) + sched_size * sizeof(struct sched_class));
+ s = kvzalloc(sizeof(*s) + sched_size * sizeof(struct sched_class), GFP_KERNEL);
if (!s)
return NULL;
@@ -548,6 +548,6 @@ void t4_cleanup_sched(struct adapter *adap)
t4_sched_class_free(pi, e);
write_unlock(&s->rw_lock);
}
- t4_free_mem(s);
+ kvfree(s);
}
}
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index b600fbbbf679..f910f0f386d6 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -56,7 +56,7 @@
local_irq_{dis,en}able()
*/
-static char *version =
+static const char version[] =
"cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n";
/* ======================= configure the driver here ======================= */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 24dfba53a0f2..bbc0a98e7ca3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -405,7 +405,7 @@ struct mac_driver {
};
struct mac_stats_string {
- char desc[ETH_GSTRING_LEN];
+ const char desc[ETH_GSTRING_LEN];
unsigned long offset;
};
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
index 68812d783f33..413025bdcb50 100644
--- a/drivers/net/ethernet/intel/igb/e1000_phy.c
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -127,7 +127,7 @@ out:
* @offset: register offset to be read
* @data: pointer to the read data
*
- * Reads the MDI control regsiter in the PHY at offset and stores the
+ * Reads the MDI control register in the PHY at offset and stores the
* information read to data.
**/
s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 3ba89bc43d74..6ffd1849a604 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -70,13 +70,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
tmp = size * sizeof(struct mlx4_en_tx_info);
- ring->tx_info = kmalloc_node(tmp, GFP_KERNEL | __GFP_NOWARN, node);
+ ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node);
if (!ring->tx_info) {
- ring->tx_info = vmalloc(tmp);
- if (!ring->tx_info) {
- err = -ENOMEM;
- goto err_ring;
- }
+ err = -ENOMEM;
+ goto err_ring;
}
en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index db65f72879e9..ce852ca22a96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -115,12 +115,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN);
- if (!buddy->bits[i]) {
- buddy->bits[i] = vzalloc(s * sizeof(long));
- if (!buddy->bits[i])
- goto err_out_free;
- }
+ buddy->bits[i] = kvmalloc_array(s, sizeof(long), GFP_KERNEL | __GFP_ZERO);
+ if (!buddy->bits[i])
+ goto err_out_free;
}
set_bit(0, buddy->bits[buddy->max_order]);
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 07091dd27e5d..7b0a8db57af9 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -444,7 +444,7 @@ static inline __u16 sonic_rra_get(struct net_device* dev, int entry,
(entry * SIZEOF_SONIC_RR) + offset);
}
-static const char *version =
+static const char version[] =
"sonic.c:v0.92 20.9.98 tsbogend@alpha.franken.de\n";
#endif /* SONIC_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 0ed24d6e6c65..40f057edeafc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -3058,7 +3058,7 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
/* There's a possibility the igu_sb_cnt_iov doesn't properly reflect
* the number of VF SBs [especially for first VF on engine, as we can't
- * diffrentiate between empty entries and its entries].
+ * differentiate between empty entries and its entries].
* Since we don't really support more SBs than VFs today, prevent any
* such configuration by sanitizing the number of SBs to equal the
* number of VFs.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 029f431e89ec..077fa4c9537b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -953,7 +953,7 @@ static int qed_slowpath_start(struct qed_dev *cdev,
if (rc)
goto err2;
- /* First Dword used to diffrentiate between various sources */
+ /* First Dword used to differentiate between various sources */
data = cdev->firmware->data + sizeof(u32);
qed_dbg_pf_init(cdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 92a3ee1715d9..ebe39ceea18b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -625,7 +625,7 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
* - If !ARI, VFs would start on next device.
* so offset - (256 - pf_id) would provide the number.
* Utilize the fact that (256 - pf_id) is achieved only by later
- * to diffrentiate between the two.
+ * to differentiate between the two.
*/
if (p_hwfn->cdev->p_iov_info->offset < (256 - p_hwfn->abs_pf_id)) {
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index d54490d3f7ad..1e594351a60f 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -387,7 +387,7 @@ static void sxgbe_free_rx_buffers(struct net_device *dev,
/**
* init_tx_ring - init the TX descriptor ring
* @dev: net device structure
- * @tx_ring: ring to be intialised
+ * @tx_ring: ring to be initialised
* @tx_rsize: ring size
* Description: this function initializes the DMA TX descriptor
*/
@@ -437,7 +437,7 @@ dmamem_err:
/**
* free_rx_ring - free the RX descriptor ring
* @dev: net device structure
- * @rx_ring: ring to be intialised
+ * @rx_ring: ring to be initialised
* @rx_rsize: ring size
* Description: this function initializes the DMA RX descriptor
*/
@@ -453,7 +453,7 @@ static void free_rx_ring(struct device *dev, struct sxgbe_rx_queue *rx_ring,
/**
* init_rx_ring - init the RX descriptor ring
* @dev: net device structure
- * @rx_ring: ring to be intialised
+ * @rx_ring: ring to be initialised
* @rx_rsize: ring size
* Description: this function initializes the DMA RX descriptor
*/
@@ -539,7 +539,7 @@ err_free_dma_rx:
/**
* free_tx_ring - free the TX descriptor ring
* @dev: net device structure
- * @tx_ring: ring to be intialised
+ * @tx_ring: ring to be initialised
* @tx_rsize: ring size
* Description: this function initializes the DMA TX descriptor
*/
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index a45f98fa4aa7..ad6db05b97d6 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -23,7 +23,7 @@
*/
#define DRV_VERSION "1.39"
-static const char *version = "tc35815.c:v" DRV_VERSION "\n";
+static const char version[] = "tc35815.c:v" DRV_VERSION "\n";
#define MODNAME "tc35815"
#include <linux/module.h>
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index b0de8ecd7fe8..f4a816cf012a 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -228,7 +228,7 @@
#define DRV_VERSION "v1.11"
#define DRV_RELDATE "2014/07/01"
-static char version[] =
+static const char version[] =
DRV_NAME ": " DRV_VERSION " " DRV_RELDATE
" Lawrence V. Stefani and others\n";
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index dd7fc6659ad4..9b0d6148e994 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -60,7 +60,8 @@ MODULE_AUTHOR("Jes Sorensen <jes@wildopensource.com>");
MODULE_DESCRIPTION("Essential RoadRunner HIPPI driver");
MODULE_LICENSE("GPL");
-static char version[] = "rrunner.c: v0.50 11/11/2002 Jes Sorensen (jes@wildopensource.com)\n";
+static const char version[] =
+"rrunner.c: v0.50 11/11/2002 Jes Sorensen (jes@wildopensource.com)\n";
static const struct net_device_ops rr_netdev_ops = {
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 8b721321be5b..7aace4582073 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -102,10 +102,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
return -ENXIO;
}
- ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
- if (!ndd->data)
- ndd->data = vmalloc(ndd->nsarea.config_size);
-
+ ndd->data = kvmalloc(ndd->nsarea.config_size, GFP_KERNEL);
if (!ndd->data)
return -ENOMEM;
diff --git a/drivers/scsi/isci/registers.h b/drivers/scsi/isci/registers.h
index 97f3ceb8d724..63468cfe3e4a 100644
--- a/drivers/scsi/isci/registers.h
+++ b/drivers/scsi/isci/registers.h
@@ -652,7 +652,7 @@ struct scu_iit_entry {
/*
- * TODO: Where is the SAS_LNKTOV regsiter?
+ * TODO: Where is the SAS_LNKTOV register?
* TODO: Where is the SAS_PHYTOV register? */
#define SCU_SAS_TRANSMIT_IDENTIFICATION_SMP_TARGET_SHIFT (1)
@@ -1827,7 +1827,7 @@ struct scu_peg_registers {
};
/**
- * struct scu_registers - SCU regsiters including both PEG registers if we turn
+ * struct scu_registers - SCU registers including both PEG registers if we turn
* on that compile option. All of these registers are in the memory mapped
* space returned from BAR1.
*
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 0016f12cc563..316c3df0c3fd 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -244,7 +244,7 @@ struct megasas_cmd *megasas_get_cmd(struct megasas_instance
* @instance: Adapter soft state
* @cmd: Command packet to be returned to free command pool
*/
-inline void
+void
megasas_return_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
{
unsigned long flags;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 8981806fb13f..099ab4ca7edf 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -1421,7 +1421,7 @@ void mpt3sas_ctl_add_to_event_log(struct MPT3SAS_ADAPTER *ioc,
Mpi2EventNotificationReply_t *mpi_reply);
void mpt3sas_enable_diag_buffer(struct MPT3SAS_ADAPTER *ioc,
- u8 bits_to_regsiter);
+ u8 bits_to_register);
int mpt3sas_send_diag_release(struct MPT3SAS_ADAPTER *ioc, u8 buffer_type,
u8 *issue_reset);
diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c
index a6a76a681ea9..8f638267e704 100644
--- a/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c
+++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-mem.c
@@ -45,15 +45,6 @@ EXPORT_SYMBOL(libcfs_kvzalloc);
void *libcfs_kvzalloc_cpt(struct cfs_cpt_table *cptab, int cpt, size_t size,
gfp_t flags)
{
- void *ret;
-
- ret = kzalloc_node(size, flags | __GFP_NOWARN,
- cfs_cpt_spread_node(cptab, cpt));
- if (!ret) {
- WARN_ON(!(flags & (__GFP_FS | __GFP_HIGH)));
- ret = vmalloc_node(size, cfs_cpt_spread_node(cptab, cpt));
- }
-
- return ret;
+ return kvzalloc_node(size, flags, cfs_cpt_spread_node(cptab, cpt));
}
EXPORT_SYMBOL(libcfs_kvzalloc_cpt);
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_bo.c b/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_bo.c
index 6c4ab5aa2a8d..a51a27bc6b5b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_bo.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_bo.c
@@ -36,12 +36,13 @@
#include <linux/string.h>
#include <linux/list.h>
#include <linux/errno.h>
-#include <asm/cacheflush.h>
#include <linux/io.h>
#include <asm/current.h>
#include <linux/sched/signal.h>
#include <linux/file.h>
+#include <asm/set_memory.h>
+
#include "atomisp_internal.h"
#include "hmm/hmm_common.h"
#include "hmm/hmm_pool.h"
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_dynamic_pool.c b/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_dynamic_pool.c
index 6e540cc2e451..9b35980db715 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_dynamic_pool.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_dynamic_pool.c
@@ -27,7 +27,7 @@
#include <linux/types.h>
#include <linux/mm.h>
-#include "asm/cacheflush.h"
+#include <asm/set_memory.h>
#include "atomisp_internal.h"
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_reserved_pool.c b/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_reserved_pool.c
index 590ff7bc6c5f..b51b61995dd9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_reserved_pool.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/hmm/hmm_reserved_pool.c
@@ -27,7 +27,8 @@
#include <linux/types.h>
#include <linux/mm.h>
-#include "asm/cacheflush.h"
+#include <asm/set_memory.h>
+
#include "atomisp_internal.h"
#include "hmm/hmm_pool.h"
diff --git a/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c b/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
index 2009e3a11b86..706bd43e8b1b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
+++ b/drivers/staging/media/atomisp/pci/atomisp2/mmu/isp_mmu.c
@@ -30,13 +30,16 @@
#include <linux/slab.h> /* for kmalloc */
#include <linux/list.h>
#include <linux/io.h>
-#include <asm/cacheflush.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/sizes.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
+
#include "atomisp_internal.h"
#include "mmu/isp_mmu.h"
diff --git a/drivers/staging/most/mostcore/core.c b/drivers/staging/most/mostcore/core.c
index 191404bc5906..892aae6e9c9a 100644
--- a/drivers/staging/most/mostcore/core.c
+++ b/drivers/staging/most/mostcore/core.c
@@ -82,7 +82,7 @@ struct most_inst_obj {
static const struct {
int most_ch_data_type;
- char *name;
+ const char *name;
} ch_data_type[] = {
{ MOST_CH_CONTROL, "control\n" },
{ MOST_CH_ASYNC, "async\n" },
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index e94aea8c0d05..7b2a466616d6 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -939,11 +939,11 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *buf_list)
return buf;
} /* end of n_hdlc_buf_get() */
-static char hdlc_banner[] __initdata =
+static const char hdlc_banner[] __initconst =
KERN_INFO "HDLC line discipline maxframe=%u\n";
-static char hdlc_register_ok[] __initdata =
+static const char hdlc_register_ok[] __initconst =
KERN_INFO "N_HDLC line discipline registered.\n";
-static char hdlc_register_fail[] __initdata =
+static const char hdlc_register_fail[] __initconst =
KERN_ERR "error registering line discipline: %d\n";
static int __init n_hdlc_init(void)
@@ -968,9 +968,9 @@ static int __init n_hdlc_init(void)
} /* end of init_module() */
-static char hdlc_unregister_ok[] __exitdata =
+static const char hdlc_unregister_ok[] __exitdata =
KERN_INFO "N_HDLC: line discipline unregistered\n";
-static char hdlc_unregister_fail[] __exitdata =
+static const char hdlc_unregister_fail[] __exitdata =
KERN_ERR "N_HDLC: can't unregister line discipline (err = %d)\n";
static void __exit n_hdlc_exit(void)
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index 3e99bcc4eba0..f5335be344f6 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -984,7 +984,7 @@ static struct platform_driver asc_serial_driver = {
static int __init asc_init(void)
{
int ret;
- static char banner[] __initdata =
+ static const char banner[] __initconst =
KERN_INFO "STMicroelectronics ASC driver initialized\n";
printk(banner);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9b519897cc17..f61f852d6cfd 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -817,12 +817,9 @@ static int vhost_net_open(struct inode *inode, struct file *f)
struct vhost_virtqueue **vqs;
int i;
- n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!n) {
- n = vmalloc(sizeof *n);
- if (!n)
- return -ENOMEM;
- }
+ n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT);
+ if (!n)
+ return -ENOMEM;
vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
kvfree(n);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f0ba362d4c10..042030e5a035 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -534,18 +534,9 @@ err_mm:
}
EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
-static void *vhost_kvzalloc(unsigned long size)
-{
- void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
-
- if (!n)
- n = vzalloc(size);
- return n;
-}
-
struct vhost_umem *vhost_dev_reset_owner_prepare(void)
{
- return vhost_kvzalloc(sizeof(struct vhost_umem));
+ return kvzalloc(sizeof(struct vhost_umem), GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare);
@@ -1276,7 +1267,7 @@ EXPORT_SYMBOL_GPL(vhost_vq_access_ok);
static struct vhost_umem *vhost_umem_alloc(void)
{
- struct vhost_umem *umem = vhost_kvzalloc(sizeof(*umem));
+ struct vhost_umem *umem = kvzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return NULL;
@@ -1302,7 +1293,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
return -EOPNOTSUPP;
if (mem.nregions > max_mem_regions)
return -E2BIG;
- newmem = vhost_kvzalloc(size + mem.nregions * sizeof(*m->regions));
+ newmem = kvzalloc(size + mem.nregions * sizeof(*m->regions), GFP_KERNEL);
if (!newmem)
return -ENOMEM;
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 44eed8eb0725..d3bd0e707b3c 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -500,12 +500,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
/* This struct is large and allocation could fail, fall back to vmalloc
* if there is no other way.
*/
- vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!vsock) {
- vsock = vmalloc(sizeof(*vsock));
- if (!vsock)
- return -ENOMEM;
- }
+ vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT);
+ if (!vsock)
+ return -ENOMEM;
vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c
index ff2a5d2023e1..6b444400a86c 100644
--- a/drivers/video/fbdev/intelfb/intelfbdrv.c
+++ b/drivers/video/fbdev/intelfb/intelfbdrv.c
@@ -934,7 +934,7 @@ static __inline__ int var_to_refresh(const struct fb_var_screeninfo *var)
}
/***************************************************************
- * Various intialisation functions *
+ * Various initialisation functions *
***************************************************************/
static void get_initial_mode(struct intelfb_info *dinfo)
diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 1c1e95a0b8fa..ce4c4729a5e8 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -37,7 +37,7 @@
#include <linux/mm.h>
#include <linux/fb.h>
#include <linux/pci.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/tlbflush.h>
#include <linux/mmzone.h>
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 70c7194e2810..67fbe35ce7cf 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -34,7 +34,7 @@
#include <linux/nmi.h>
#include <linux/kdebug.h>
#include <linux/notifier.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#endif /* CONFIG_HPWDT_NMI_DECODING */
#include <asm/nmi.h>
#include <asm/frame.h>
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 6890897a6f30..10f1ef582659 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -87,18 +87,6 @@ struct user_evtchn {
bool enabled;
};
-static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
-{
- evtchn_port_t *ring;
- size_t s = size * sizeof(*ring);
-
- ring = kmalloc(s, GFP_KERNEL);
- if (!ring)
- ring = vmalloc(s);
-
- return ring;
-}
-
static void evtchn_free_ring(evtchn_port_t *ring)
{
kvfree(ring);
@@ -334,7 +322,7 @@ static int evtchn_resize_ring(struct per_user_data *u)
else
new_size = 2 * u->ring_size;
- new_ring = evtchn_alloc_ring(new_size);
+ new_ring = kvmalloc(new_size * sizeof(*new_ring), GFP_KERNEL);
if (!new_ring)
return -ENOMEM;
diff --git a/fs/aio.c b/fs/aio.c
index f52d925ee259..e51351ed1928 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -373,6 +373,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
pgoff_t idx;
int rc;
+ /*
+ * We cannot support the _NO_COPY case here, because copy needs to
+ * happen under the ctx->completion_lock. That does not work with the
+ * migration workflow of MIGRATE_SYNC_NO_COPY.
+ */
+ if (mode == MIGRATE_SYNC_NO_COPY)
+ return -EINVAL;
+
rc = 0;
/* mapping->private_lock here protects against the kioctx teardown. */
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 165e7ec12af7..a3a75f1de002 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -5392,13 +5392,10 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
}
- tmp_buf = kmalloc(fs_info->nodesize, GFP_KERNEL | __GFP_NOWARN);
+ tmp_buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
if (!tmp_buf) {
- tmp_buf = vmalloc(fs_info->nodesize);
- if (!tmp_buf) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = -ENOMEM;
+ goto out;
}
left_path->search_commit_root = 1;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index dd7fb22a955a..fc0bd8406758 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -167,8 +167,7 @@ static u8 *alloc_bitmap(u32 bitmap_size)
if (mem)
return mem;
- return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ return __vmalloc(bitmap_size, GFP_NOFS | __GFP_ZERO, PAGE_KERNEL);
}
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 378837e2fb3c..04862ae503a4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3544,12 +3544,9 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
u64 last_dest_end = destoff;
ret = -ENOMEM;
- buf = kmalloc(fs_info->nodesize, GFP_KERNEL | __GFP_NOWARN);
- if (!buf) {
- buf = vmalloc(fs_info->nodesize);
- if (!buf)
- return ret;
- }
+ buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
+ if (!buf)
+ return ret;
path = btrfs_alloc_path();
if (!path) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a60d5bfb8a49..3f645cd67b54 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6360,22 +6360,16 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
sctx->clone_roots_cnt = arg->clone_sources_count;
sctx->send_max_size = BTRFS_SEND_BUF_SIZE;
- sctx->send_buf = kmalloc(sctx->send_max_size, GFP_KERNEL | __GFP_NOWARN);
+ sctx->send_buf = kvmalloc(sctx->send_max_size, GFP_KERNEL);
if (!sctx->send_buf) {
- sctx->send_buf = vmalloc(sctx->send_max_size);
- if (!sctx->send_buf) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = -ENOMEM;
+ goto out;
}
- sctx->read_buf = kmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL | __GFP_NOWARN);
+ sctx->read_buf = kvmalloc(BTRFS_SEND_READ_SIZE, GFP_KERNEL);
if (!sctx->read_buf) {
- sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE);
- if (!sctx->read_buf) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = -ENOMEM;
+ goto out;
}
sctx->pending_dir_moves = RB_ROOT;
@@ -6396,13 +6390,10 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
alloc_size = arg->clone_sources_count * sizeof(*arg->clone_sources);
if (arg->clone_sources_count) {
- clone_sources_tmp = kmalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN);
+ clone_sources_tmp = kvmalloc(alloc_size, GFP_KERNEL);
if (!clone_sources_tmp) {
- clone_sources_tmp = vmalloc(alloc_size);
- if (!clone_sources_tmp) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = -ENOMEM;
+ goto out;
}
ret = copy_from_user(clone_sources_tmp, arg->clone_sources,
diff --git a/fs/buffer.c b/fs/buffer.c
index 2ba905716d34..584ea92d81b4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2379,8 +2379,7 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size)
goto out;
err = pagecache_write_begin(NULL, mapping, size, 0,
- AOP_FLAG_UNINTERRUPTIBLE|AOP_FLAG_CONT_EXPAND,
- &page, &fsdata);
+ AOP_FLAG_CONT_EXPAND, &page, &fsdata);
if (err)
goto out;
@@ -2415,9 +2414,8 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = PAGE_SIZE - zerofrom;
- err = pagecache_write_begin(file, mapping, curpos, len,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
+ err = pagecache_write_begin(file, mapping, curpos, len, 0,
+ &page, &fsdata);
if (err)
goto out;
zero_user(page, zerofrom, len);
@@ -2449,9 +2447,8 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
}
len = offset - zerofrom;
- err = pagecache_write_begin(file, mapping, curpos, len,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
+ err = pagecache_write_begin(file, mapping, curpos, len, 0,
+ &page, &fsdata);
if (err)
goto out;
zero_user(page, zerofrom, len);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 26cc95421cca..18c045e2ead6 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -74,12 +74,9 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
(PAGE_SIZE - 1);
npages = calc_pages_for(align, nbytes);
- pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
- if (!pages) {
- pages = vmalloc(sizeof(*pages) * npages);
- if (!pages)
- return ERR_PTR(-ENOMEM);
- }
+ pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL);
+ if (!pages)
+ return ERR_PTR(-ENOMEM);
for (idx = 0; idx < npages; ) {
size_t start;
diff --git a/fs/dcache.c b/fs/dcache.c
index 95d71eda8142..808ea99062c2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3548,8 +3548,6 @@ __setup("dhash_entries=", set_dhash_entries);
static void __init dcache_init_early(void)
{
- unsigned int loop;
-
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
@@ -3561,24 +3559,19 @@ static void __init dcache_init_early(void)
sizeof(struct hlist_bl_head),
dhash_entries,
13,
- HASH_EARLY,
+ HASH_EARLY | HASH_ZERO,
&d_hash_shift,
&d_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << d_hash_shift); loop++)
- INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
static void __init dcache_init(void)
{
- unsigned int loop;
-
- /*
+ /*
* A constructor could be added for stable state like the lists,
* but it is probably not worth it because of the cache nature
- * of the dcache.
+ * of the dcache.
*/
dentry_cache = KMEM_CACHE(dentry,
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD|SLAB_ACCOUNT);
@@ -3592,14 +3585,11 @@ static void __init dcache_init(void)
sizeof(struct hlist_bl_head),
dhash_entries,
13,
- 0,
+ HASH_ZERO | HASH_ADAPT,
&d_hash_shift,
&d_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << d_hash_shift); loop++)
- INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
/* SLAB cache for __getname() consumers */
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 42f9a0a0c4ca..8eeb694332fe 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -405,8 +405,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
int err;
lock_page(page);
- err = exofs_write_begin(NULL, page->mapping, pos, len,
- AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+ err = exofs_write_begin(NULL, page->mapping, pos, len, 0, &page, NULL);
if (err)
EXOFS_ERR("exofs_set_link: exofs_write_begin FAILED => %d\n",
err);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 354dc1a894c2..b60698c104fd 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2393,7 +2393,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
return 0;
size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
- new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
+ new_groupinfo = kvzalloc(size, GFP_KERNEL);
if (!new_groupinfo) {
ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
return -ENOMEM;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a9448db1cf7e..73cae0cc34ca 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2132,7 +2132,7 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
return 0;
size = roundup_pow_of_two(size * sizeof(struct flex_groups));
- new_groups = ext4_kvzalloc(size, GFP_KERNEL);
+ new_groups = kvzalloc(size, GFP_KERNEL);
if (!new_groups) {
ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
size / (int) sizeof(struct flex_groups));
@@ -3866,7 +3866,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
}
- sbi->s_group_desc = ext4_kvmalloc(db_count *
+ sbi->s_group_desc = kvmalloc(db_count *
sizeof(struct buffer_head *),
GFP_KERNEL);
if (sbi->s_group_desc == NULL) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b1cac6d85bcb..8d4e31a0908f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2128,7 +2128,10 @@ int f2fs_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
set_page_private(newpage, page_private(page));
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fd39db681226..e6b2ee21fd96 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2014,26 +2014,6 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
return kmalloc(size, flags);
}
-static inline void *f2fs_kvmalloc(size_t size, gfp_t flags)
-{
- void *ret;
-
- ret = kmalloc(size, flags | __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(size, flags, PAGE_KERNEL);
- return ret;
-}
-
-static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
-{
- void *ret;
-
- ret = kzalloc(size, flags | __GFP_NOWARN);
- if (!ret)
- ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
- return ret;
-}
-
#define get_inode_mode(i) \
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f3be240ef129..45a216018a25 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1011,11 +1011,11 @@ static int __exchange_data_block(struct inode *src_inode,
while (len) {
olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
- src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
+ src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
if (!src_blkaddr)
return -ENOMEM;
- do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL);
+ do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL);
if (!do_replace) {
kvfree(src_blkaddr);
return -ENOMEM;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 6e87178d34a2..9886c2ff788f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2641,17 +2641,17 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- nm_i->free_nid_bitmap = f2fs_kvzalloc(nm_i->nat_blocks *
+ nm_i->free_nid_bitmap = kvzalloc(nm_i->nat_blocks *
NAT_ENTRY_BITMAP_SIZE, GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
return -ENOMEM;
- nm_i->nat_block_bitmap = f2fs_kvzalloc(nm_i->nat_blocks / 8,
+ nm_i->nat_block_bitmap = kvzalloc(nm_i->nat_blocks / 8,
GFP_KERNEL);
if (!nm_i->nat_block_bitmap)
return -ENOMEM;
- nm_i->free_nid_count = f2fs_kvzalloc(nm_i->nat_blocks *
+ nm_i->free_nid_count = kvzalloc(nm_i->nat_blocks *
sizeof(unsigned short), GFP_KERNEL);
if (!nm_i->free_nid_count)
return -ENOMEM;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bff3f3bc7827..010324c8e6c6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2606,13 +2606,13 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
SM_I(sbi)->sit_info = sit_i;
- sit_i->sentries = f2fs_kvzalloc(MAIN_SEGS(sbi) *
+ sit_i->sentries = kvzalloc(MAIN_SEGS(sbi) *
sizeof(struct seg_entry), GFP_KERNEL);
if (!sit_i->sentries)
return -ENOMEM;
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
- sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
+ sit_i->dirty_sentries_bitmap = kvzalloc(bitmap_size, GFP_KERNEL);
if (!sit_i->dirty_sentries_bitmap)
return -ENOMEM;
@@ -2645,7 +2645,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
return -ENOMEM;
if (sbi->segs_per_sec > 1) {
- sit_i->sec_entries = f2fs_kvzalloc(MAIN_SECS(sbi) *
+ sit_i->sec_entries = kvzalloc(MAIN_SECS(sbi) *
sizeof(struct sec_entry), GFP_KERNEL);
if (!sit_i->sec_entries)
return -ENOMEM;
@@ -2696,12 +2696,12 @@ static int build_free_segmap(struct f2fs_sb_info *sbi)
SM_I(sbi)->free_info = free_i;
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
- free_i->free_segmap = f2fs_kvmalloc(bitmap_size, GFP_KERNEL);
+ free_i->free_segmap = kvmalloc(bitmap_size, GFP_KERNEL);
if (!free_i->free_segmap)
return -ENOMEM;
sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
- free_i->free_secmap = f2fs_kvmalloc(sec_bitmap_size, GFP_KERNEL);
+ free_i->free_secmap = kvmalloc(sec_bitmap_size, GFP_KERNEL);
if (!free_i->free_secmap)
return -ENOMEM;
@@ -2869,7 +2869,7 @@ static int init_victim_secmap(struct f2fs_sb_info *sbi)
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
- dirty_i->victim_secmap = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
+ dirty_i->victim_secmap = kvzalloc(bitmap_size, GFP_KERNEL);
if (!dirty_i->victim_secmap)
return -ENOMEM;
return 0;
@@ -2891,7 +2891,7 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
for (i = 0; i < NR_DIRTY_TYPE; i++) {
- dirty_i->dirty_segmap[i] = f2fs_kvzalloc(bitmap_size, GFP_KERNEL);
+ dirty_i->dirty_segmap[i] = kvzalloc(bitmap_size, GFP_KERNEL);
if (!dirty_i->dirty_segmap[i])
return -ENOMEM;
}
diff --git a/fs/file.c b/fs/file.c
index ad6f094f2eff..1c2972e3a405 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -42,7 +42,7 @@ static void *alloc_fdmem(size_t size)
if (data != NULL)
return data;
}
- return __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM, PAGE_KERNEL);
+ return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL);
}
static void __free_fdtable(struct fdtable *fdt)
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index e33a0d36a93e..5d0182654580 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -485,8 +485,8 @@ void hfs_file_truncate(struct inode *inode)
/* XXX: Can use generic_cont_expand? */
size = inode->i_size - 1;
- res = pagecache_write_begin(NULL, mapping, size+1, 0,
- AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
+ res = pagecache_write_begin(NULL, mapping, size+1, 0, 0,
+ &page, &fsdata);
if (!res) {
res = pagecache_write_end(NULL, mapping, size+1, 0, 0,
page, fsdata);
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index feca524ce2a5..a3eb640b4f8f 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -545,9 +545,8 @@ void hfsplus_file_truncate(struct inode *inode)
void *fsdata;
loff_t size = inode->i_size;
- res = pagecache_write_begin(NULL, mapping, size, 0,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
+ res = pagecache_write_begin(NULL, mapping, size, 0, 0,
+ &page, &fsdata);
if (res)
return;
res = pagecache_write_end(NULL, mapping, size,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7163fe014b57..27eff265c28f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -837,7 +837,10 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
diff --git a/fs/inode.c b/fs/inode.c
index 131b2bcebc48..a9caf53df446 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -119,7 +119,7 @@ static int no_open(struct inode *inode, struct file *file)
}
/**
- * inode_init_always - perform inode structure intialisation
+ * inode_init_always - perform inode structure initialisation
* @sb: superblock inode belongs to
* @inode: inode to initialise
*
@@ -1913,8 +1913,6 @@ __setup("ihash_entries=", set_ihash_entries);
*/
void __init inode_init_early(void)
{
- unsigned int loop;
-
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
*/
@@ -1926,20 +1924,15 @@ void __init inode_init_early(void)
sizeof(struct hlist_head),
ihash_entries,
14,
- HASH_EARLY,
+ HASH_EARLY | HASH_ZERO,
&i_hash_shift,
&i_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
void __init inode_init(void)
{
- unsigned int loop;
-
/* inode slab cache */
inode_cachep = kmem_cache_create("inode_cache",
sizeof(struct inode),
@@ -1957,14 +1950,11 @@ void __init inode_init(void)
sizeof(struct hlist_head),
ihash_entries,
14,
- 0,
+ HASH_ZERO | HASH_ADAPT,
&i_hash_shift,
&i_hash_mask,
0,
0);
-
- for (loop = 0; loop < (1U << i_hash_shift); loop++)
- INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
diff --git a/fs/iomap.c b/fs/iomap.c
index fb17dba3dac5..0b457ffb85ba 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -158,12 +158,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
ssize_t written = 0;
unsigned int flags = AOP_FLAG_NOFS;
- /*
- * Copies from kernel address space cannot fail (NFSD is a big user).
- */
- if (!iter_is_iovec(i))
- flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
do {
struct page *page;
unsigned long offset; /* Offset into pagecache page */
@@ -291,8 +285,7 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
return PTR_ERR(rpage);
status = iomap_write_begin(inode, pos, bytes,
- AOP_FLAG_NOFS | AOP_FLAG_UNINTERRUPTIBLE,
- &page, iomap);
+ AOP_FLAG_NOFS, &page, iomap);
put_page(rpage);
if (unlikely(status))
return status;
@@ -343,8 +336,8 @@ static int iomap_zero(struct inode *inode, loff_t pos, unsigned offset,
struct page *page;
int status;
- status = iomap_write_begin(inode, pos, bytes,
- AOP_FLAG_UNINTERRUPTIBLE | AOP_FLAG_NOFS, &page, iomap);
+ status = iomap_write_begin(inode, pos, bytes, AOP_FLAG_NOFS, &page,
+ iomap);
if (status)
return status;
diff --git a/fs/namei.c b/fs/namei.c
index 482414aa558b..886169c9c616 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4763,7 +4763,7 @@ int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
struct page *page;
void *fsdata;
int err;
- unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
+ unsigned int flags = 0;
if (nofs)
flags |= AOP_FLAG_NOFS;
diff --git a/fs/namespace.c b/fs/namespace.c
index b3b115bd4e1e..6b81c2069979 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3238,7 +3238,6 @@ static void __init init_mount_tree(void)
void __init mnt_init(void)
{
- unsigned u;
int err;
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
@@ -3247,22 +3246,17 @@ void __init mnt_init(void)
mount_hashtable = alloc_large_system_hash("Mount-cache",
sizeof(struct hlist_head),
mhash_entries, 19,
- 0,
+ HASH_ZERO,
&m_hash_shift, &m_hash_mask, 0, 0);
mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
sizeof(struct hlist_head),
mphash_entries, 19,
- 0,
+ HASH_ZERO,
&mp_hash_shift, &mp_hash_mask, 0, 0);
if (!mount_hashtable || !mountpoint_hashtable)
panic("Failed to allocate mount hash table\n");
- for (u = 0; u <= m_hash_mask; u++)
- INIT_HLIST_HEAD(&mount_hashtable[u]);
- for (u = 0; u <= mp_hash_mask; u++)
- INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
-
kernfs_init();
err = sysfs_init();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0c8b33d99b1..a12ba94673ad 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -542,6 +542,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
}
} else if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ else if (is_device_entry(swpent))
+ page = device_entry_to_page(swpent);
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
&& pte_none(*pte))) {
page = find_get_entry(vma->vm_file->f_mapping,
@@ -704,6 +706,8 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
if (is_migration_entry(swpent))
page = migration_entry_to_page(swpent);
+ else if (is_device_entry(swpent))
+ page = device_entry_to_page(swpent);
}
if (page) {
int mapcount = page_mapcount(page);
@@ -1196,6 +1200,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SWAP;
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
+
+ if (is_device_entry(entry))
+ page = device_entry_to_page(entry);
}
if (page && !PageAnon(page))
diff --git a/fs/select.c b/fs/select.c
index 9287d3a96e35..b448274a9790 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -586,10 +586,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
goto out_nofds;
alloc_size = 6 * size;
- bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN);
- if (!bits && alloc_size > PAGE_SIZE)
- bits = vmalloc(alloc_size);
-
+ bits = kvmalloc(alloc_size, GFP_KERNEL);
if (!bits)
goto out_nofds;
}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index ca69fb99e41a..dc7c2be963ed 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -25,21 +25,7 @@ static void seq_set_overflow(struct seq_file *m)
static void *seq_buf_alloc(unsigned long size)
{
- void *buf;
- gfp_t gfp = GFP_KERNEL;
-
- /*
- * For high order allocations, use __GFP_NORETRY to avoid oom-killing -
- * it's better to fall back to vmalloc() than to kill things. For small
- * allocations, just use GFP_KERNEL which will oom kill, thus no need
- * for vmalloc fallback.
- */
- if (size > PAGE_SIZE)
- gfp |= __GFP_NORETRY | __GFP_NOWARN;
- buf = kmalloc(size, gfp);
- if (!buf && size > PAGE_SIZE)
- buf = vmalloc(size);
- return buf;
+ return kvmalloc(size, GFP_KERNEL);
}
/**
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index d9ae86f96df7..c08cbcc5cf08 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1482,7 +1482,10 @@ static int ubifs_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
}
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
#endif
diff --git a/fs/xattr.c b/fs/xattr.c
index 7e3317cf4045..464c94bf65f9 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -431,12 +431,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
if (size) {
if (size > XATTR_SIZE_MAX)
return -E2BIG;
- kvalue = kmalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (!kvalue) {
- kvalue = vmalloc(size);
- if (!kvalue)
- return -ENOMEM;
- }
+ kvalue = kvmalloc(size, GFP_KERNEL);
+ if (!kvalue)
+ return -ENOMEM;
if (copy_from_user(kvalue, value, size)) {
error = -EFAULT;
goto out;
@@ -528,12 +525,9 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
if (size) {
if (size > XATTR_SIZE_MAX)
size = XATTR_SIZE_MAX;
- kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (!kvalue) {
- kvalue = vmalloc(size);
- if (!kvalue)
- return -ENOMEM;
- }
+ kvalue = kvzalloc(size, GFP_KERNEL);
+ if (!kvalue)
+ return -ENOMEM;
}
error = vfs_getxattr(d, kname, kvalue, size);
@@ -611,12 +605,9 @@ listxattr(struct dentry *d, char __user *list, size_t size)
if (size) {
if (size > XATTR_LIST_MAX)
size = XATTR_LIST_MAX;
- klist = kmalloc(size, __GFP_NOWARN | GFP_KERNEL);
- if (!klist) {
- klist = vmalloc(size);
- if (!klist)
- return -ENOMEM;
- }
+ klist = kvmalloc(size, GFP_KERNEL);
+ if (!klist)
+ return -ENOMEM;
}
error = vfs_listxattr(d, klist, size);
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 780fc8986dab..393b6849aeb3 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -67,7 +67,7 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
nofs_flag = memalloc_nofs_save();
lflags = kmem_flags_convert(flags);
- ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+ ptr = __vmalloc(size, lflags | __GFP_ZERO, PAGE_KERNEL);
if (flags & KM_NOFS)
memalloc_nofs_restore(nofs_flag);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 4a98762ec8b4..cd0b077deb35 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3796,7 +3796,7 @@ xlog_recover_bud_pass2(
* This routine is called when an inode create format structure is found in a
* committed transaction in the log. It's purpose is to initialise the inodes
* being allocated on disk. This requires us to get inode cluster buffers that
- * match the range to be intialised, stamped with inode templates and written
+ * match the range to be initialised, stamped with inode templates and written
* by delayed write so that subsequent modifications will hit the cached buffer
* and only need writing out at the end of recovery.
*/
diff --git a/include/asm-generic/set_memory.h b/include/asm-generic/set_memory.h
new file mode 100644
index 000000000000..83e81f8996b2
--- /dev/null
+++ b/include/asm-generic/set_memory.h
@@ -0,0 +1,12 @@
+#ifndef __ASM_SET_MEMORY_H
+#define __ASM_SET_MEMORY_H
+
+/*
+ * Functions to change memory attributes.
+ */
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+
+#endif
diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h
index 70d4e221a3ad..d0f6cf2e5324 100644
--- a/include/drm/drm_mem_util.h
+++ b/include/drm/drm_mem_util.h
@@ -37,8 +37,7 @@ static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
if (size * nmemb <= PAGE_SIZE)
return kcalloc(nmemb, size, GFP_KERNEL);
- return __vmalloc(size * nmemb,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+ return vzalloc(size * nmemb);
}
/* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */
@@ -50,8 +49,7 @@ static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size)
if (size * nmemb <= PAGE_SIZE)
return kmalloc(nmemb * size, GFP_KERNEL);
- return __vmalloc(size * nmemb,
- GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
+ return vmalloc(size * nmemb);
}
static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp)
@@ -69,8 +67,7 @@ static __inline__ void *drm_malloc_gfp(size_t nmemb, size_t size, gfp_t gfp)
return ptr;
}
- return __vmalloc(size * nmemb,
- gfp | __GFP_HIGHMEM, PAGE_KERNEL);
+ return __vmalloc(size * nmemb, gfp, PAGE_KERNEL);
}
static __inline void drm_free_large(void *ptr)
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 9657f11d48a7..bca6a5e4ca3d 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -80,7 +80,7 @@ struct pci_dev;
#define BCMA_CORE_PCI_MDIODATA_DEV_TX 0x1e /* SERDES TX Dev */
#define BCMA_CORE_PCI_MDIODATA_DEV_RX 0x1f /* SERDES RX Dev */
#define BCMA_CORE_PCI_PCIEIND_ADDR 0x0130 /* indirect access to the internal register */
-#define BCMA_CORE_PCI_PCIEIND_DATA 0x0134 /* Data to/from the internal regsiter */
+#define BCMA_CORE_PCI_PCIEIND_DATA 0x0134 /* Data to/from the internal register */
#define BCMA_CORE_PCI_CLKREQENCTRL 0x0138 /* >= rev 6, Clkreq rdma control */
#define BCMA_CORE_PCI_PCICFG0 0x0400 /* PCI config space 0 (rev >= 8) */
#define BCMA_CORE_PCI_PCICFG1 0x0500 /* PCI config space 1 (rev >= 8) */
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 962164d36506..dbaf312b3317 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -358,6 +358,8 @@ extern void *alloc_large_system_hash(const char *tablename,
#define HASH_EARLY 0x00000001 /* Allocating during early boot? */
#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min
* shift passed via *_hash_shift */
+#define HASH_ZERO 0x00000004 /* Zero allocated hash table */
+#define HASH_ADAPT 0x00000008 /* Adaptive scale for large memory */
/* Only NUMA needs hash distribution. 64bit NUMA architectures have
* sufficient vmalloc space.
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 511fe910bf1d..68c55488f45d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -19,7 +19,9 @@
#include <net/sch_generic.h>
-#include <asm/cacheflush.h>
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+#include <asm/set_memory.h>
+#endif
#include <uapi/linux/filter.h>
#include <uapi/linux/bpf.h>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c0b6150c5fcc..3c18fa69165a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -250,9 +250,8 @@ enum positive_aop_returns {
AOP_TRUNCATED_PAGE = 0x80001,
};
-#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */
-#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */
-#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct
+#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */
+#define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct
* helper code (eg buffer layer)
* to clear GFP_FS from alloc */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ef7123219f14..b1a1a3689448 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -72,7 +72,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
* CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and
* IPMODIFY are a kind of attribute flags which can be set only before
* registering the ftrace_ops, and can not be modified while registered.
- * Changing those attribute flags after regsitering ftrace_ops will
+ * Changing those attribute flags after registering ftrace_ops will
* cause unexpected results.
*
* ENABLED - set/unset when ftrace_ops is registered/unregistered
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
new file mode 100644
index 000000000000..374e5fd9e6ad
--- /dev/null
+++ b/include/linux/hmm.h
@@ -0,0 +1,468 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
+ */
+/*
+ * Heterogeneous Memory Management (HMM)
+ *
+ * See Documentation/vm/hmm.txt for reasons and overview of what HMM is and it
+ * is for. Here we focus on the HMM API description, with some explanation of
+ * the underlying implementation.
+ *
+ * Short description: HMM provides a set of helpers to share a virtual address
+ * space between CPU and a device, so that the device can access any valid
+ * address of the process (while still obeying memory protection). HMM also
+ * provides helpers to migrate process memory to device memory, and back. Each
+ * set of functionality (address space mirroring, and migration to and from
+ * device memory) can be used independently of the other.
+ *
+ *
+ * HMM address space mirroring API:
+ *
+ * Use HMM address space mirroring if you want to mirror range of the CPU page
+ * table of a process into a device page table. Here, "mirror" means "keep
+ * synchronized". Prerequisites: the device must provide the ability to write-
+ * protect its page tables (at PAGE_SIZE granularity), and must be able to
+ * recover from the resulting potential page faults.
+ *
+ * HMM guarantees that at any point in time, a given virtual address points to
+ * either the same memory in both CPU and device page tables (that is: CPU and
+ * device page tables each point to the same pages), or that one page table (CPU
+ * or device) points to no entry, while the other still points to the old page
+ * for the address. The latter case happens when the CPU page table update
+ * happens first, and then the update is mirrored over to the device page table.
+ * This does not cause any issue, because the CPU page table cannot start
+ * pointing to a new page until the device page table is invalidated.
+ *
+ * HMM uses mmu_notifiers to monitor the CPU page tables, and forwards any
+ * updates to each device driver that has registered a mirror. It also provides
+ * some API calls to help with taking a snapshot of the CPU page table, and to
+ * synchronize with any updates that might happen concurrently.
+ *
+ *
+ * HMM migration to and from device memory:
+ *
+ * HMM provides a set of helpers to hotplug device memory as ZONE_DEVICE, with
+ * a new MEMORY_DEVICE_UNADDRESSABLE type. This provides a struct page for
+ * each page of the device memory, and allows the device driver to manage its
+ * memory using those struct pages. Having struct pages for device memory makes
+ * migration easier. Because that memory is not addressable by the CPU it must
+ * never be pinned to the device; in other words, any CPU page fault can always
+ * cause the device memory to be migrated (copied/moved) back to regular memory.
+ *
+ * A new migrate helper (migrate_vma()) has been added (see mm/migrate.c) that
+ * allows use of a device DMA engine to perform the copy operation between
+ * regular system memory and device memory.
+ */
+#ifndef LINUX_HMM_H
+#define LINUX_HMM_H
+
+#include <linux/kconfig.h>
+
+#if IS_ENABLED(CONFIG_HMM)
+
+#include <linux/device.h>
+#include <linux/migrate.h>
+#include <linux/memremap.h>
+#include <linux/completion.h>
+
+struct hmm;
+
+/*
+ * hmm_pfn_t - HMM uses its own pfn type to keep several flags per page
+ *
+ * Flags:
+ * HMM_PFN_VALID: pfn is valid
+ * HMM_PFN_READ: CPU page table has read permission set
+ * HMM_PFN_WRITE: CPU page table has write permission set
+ * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
+ * HMM_PFN_EMPTY: corresponding CPU page table entry is pte_none()
+ * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
+ * result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
+ * be mirrored by a device, because the entry will never have HMM_PFN_VALID
+ * set and the pfn value is undefined.
+ * HMM_PFN_DEVICE_UNADDRESSABLE: unaddressable device memory (ZONE_DEVICE)
+ */
+typedef unsigned long hmm_pfn_t;
+
+#define HMM_PFN_VALID (1 << 0)
+#define HMM_PFN_READ (1 << 1)
+#define HMM_PFN_WRITE (1 << 2)
+#define HMM_PFN_ERROR (1 << 3)
+#define HMM_PFN_EMPTY (1 << 4)
+#define HMM_PFN_SPECIAL (1 << 5)
+#define HMM_PFN_DEVICE_UNADDRESSABLE (1 << 6)
+#define HMM_PFN_SHIFT 7
+
+/*
+ * hmm_pfn_t_to_page() - return struct page pointed to by a valid hmm_pfn_t
+ * @pfn: hmm_pfn_t to convert to struct page
+ * Returns: struct page pointer if pfn is a valid hmm_pfn_t, NULL otherwise
+ *
+ * If the hmm_pfn_t is valid (ie valid flag set) then return the struct page
+ * matching the pfn value stored in the hmm_pfn_t. Otherwise return NULL.
+ */
+static inline struct page *hmm_pfn_t_to_page(hmm_pfn_t pfn)
+{
+ if (!(pfn & HMM_PFN_VALID))
+ return NULL;
+ return pfn_to_page(pfn >> HMM_PFN_SHIFT);
+}
+
+/*
+ * hmm_pfn_t_to_pfn() - return pfn value store in a hmm_pfn_t
+ * @pfn: hmm_pfn_t to extract pfn from
+ * Returns: pfn value if hmm_pfn_t is valid, -1UL otherwise
+ */
+static inline unsigned long hmm_pfn_t_to_pfn(hmm_pfn_t pfn)
+{
+ if (!(pfn & HMM_PFN_VALID))
+ return -1UL;
+ return (pfn >> HMM_PFN_SHIFT);
+}
+
+/*
+ * hmm_pfn_t_from_page() - create a valid hmm_pfn_t value from struct page
+ * @page: struct page pointer for which to create the hmm_pfn_t
+ * Returns: valid hmm_pfn_t for the page
+ */
+static inline hmm_pfn_t hmm_pfn_t_from_page(struct page *page)
+{
+ return (page_to_pfn(page) << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+}
+
+/*
+ * hmm_pfn_t_from_pfn() - create a valid hmm_pfn_t value from pfn
+ * @pfn: pfn value for which to create the hmm_pfn_t
+ * Returns: valid hmm_pfn_t for the pfn
+ */
+static inline hmm_pfn_t hmm_pfn_t_from_pfn(unsigned long pfn)
+{
+ return (pfn << HMM_PFN_SHIFT) | HMM_PFN_VALID;
+}
+
+
+#if IS_ENABLED(CONFIG_HMM_MIRROR)
+/*
+ * Mirroring: how to synchronize device page table with CPU page table.
+ *
+ * A device driver that is participating in HMM mirroring must always
+ * synchronize with CPU page table updates. For this, device drivers can either
+ * directly use mmu_notifier APIs or they can use the hmm_mirror API. Device
+ * drivers can decide to register one mirror per device per process, or just
+ * one mirror per process for a group of devices. The pattern is:
+ *
+ * int device_bind_address_space(..., struct mm_struct *mm, ...)
+ * {
+ * struct device_address_space *das;
+ *
+ * // Device driver specific initialization, and allocation of das
+ * // which contains an hmm_mirror struct as one of its fields.
+ * ...
+ *
+ * ret = hmm_mirror_register(&das->mirror, mm, &device_mirror_ops);
+ * if (ret) {
+ * // Cleanup on error
+ * return ret;
+ * }
+ *
+ * // Other device driver specific initialization
+ * ...
+ * }
+ *
+ * Once an hmm_mirror is registered for an address space, the device driver
+ * will get callbacks through sync_cpu_device_pagetables() operation (see
+ * hmm_mirror_ops struct).
+ *
+ * Device driver must not free the struct containing the hmm_mirror struct
+ * before calling hmm_mirror_unregister(). The expected usage is to do that when
+ * the device driver is unbinding from an address space.
+ *
+ *
+ * void device_unbind_address_space(struct device_address_space *das)
+ * {
+ * // Device driver specific cleanup
+ * ...
+ *
+ * hmm_mirror_unregister(&das->mirror);
+ *
+ * // Other device driver specific cleanup, and now das can be freed
+ * ...
+ * }
+ */
+
+struct hmm_mirror;
+
+/*
+ * enum hmm_update_type - type of update
+ * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
+ */
+enum hmm_update_type {
+ HMM_UPDATE_INVALIDATE,
+};
+
+/*
+ * struct hmm_mirror_ops - HMM mirror device operations callback
+ *
+ * @update: callback to update range on a device
+ */
+struct hmm_mirror_ops {
+ /* sync_cpu_device_pagetables() - synchronize page tables
+ *
+ * @mirror: pointer to struct hmm_mirror
+ * @update_type: type of update that occurred to the CPU page table
+ * @start: virtual start address of the range to update
+ * @end: virtual end address of the range to update
+ *
+ * This callback ultimately originates from mmu_notifiers when the CPU
+ * page table is updated. The device driver must update its page table
+ * in response to this callback. The update argument tells what action
+ * to perform.
+ *
+ * The device driver must not return from this callback until the device
+ * page tables are completely updated (TLBs flushed, etc); this is a
+ * synchronous call.
+ */
+ void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
+ enum hmm_update_type update_type,
+ unsigned long start,
+ unsigned long end);
+};
+
+/*
+ * struct hmm_mirror - mirror struct for a device driver
+ *
+ * @hmm: pointer to struct hmm (which is unique per mm_struct)
+ * @ops: device driver callback for HMM mirror operations
+ * @list: for list of mirrors of a given mm
+ *
+ * Each address space (mm_struct) being mirrored by a device must register one
+ * instance of an hmm_mirror struct with HMM. HMM will track the list of all
+ * mirrors for each mm_struct.
+ */
+struct hmm_mirror {
+ struct hmm *hmm;
+ const struct hmm_mirror_ops *ops;
+ struct list_head list;
+};
+
+int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm);
+void hmm_mirror_unregister(struct hmm_mirror *mirror);
+
+
+/*
+ * struct hmm_range - track invalidation lock on virtual address range
+ *
+ * @list: all range lock are on a list
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @pfns: array of pfns (big enough for the range)
+ * @valid: pfns array did not change since it has been fill by an HMM function
+ */
+struct hmm_range {
+ struct list_head list;
+ unsigned long start;
+ unsigned long end;
+ hmm_pfn_t *pfns;
+ bool valid;
+};
+
+/*
+ * To snapshot the CPU page table, call hmm_vma_get_pfns(), then take a device
+ * driver lock that serializes device page table updates, then call
+ * hmm_vma_range_done(), to check if the snapshot is still valid. The same
+ * device driver page table update lock must also be used in the
+ * hmm_mirror_ops.sync_cpu_device_pagetables() callback, so that CPU page
+ * table invalidation serializes on it.
+ *
+ * YOU MUST CALL hmm_vma_range_done() ONCE AND ONLY ONCE EACH TIME YOU CALL
+ * hmm_vma_get_pfns() WITHOUT ERROR !
+ *
+ * IF YOU DO NOT FOLLOW THE ABOVE RULE THE SNAPSHOT CONTENT MIGHT BE INVALID !
+ */
+int hmm_vma_get_pfns(struct vm_area_struct *vma,
+ struct hmm_range *range,
+ unsigned long start,
+ unsigned long end,
+ hmm_pfn_t *pfns);
+bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
+
+
+/*
+ * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will
+ * not migrate any device memory back to system memory. The hmm_pfn_t array will
+ * be updated with the fault result and current snapshot of the CPU page table
+ * for the range.
+ *
+ * The mmap_sem must be taken in read mode before entering and it might be
+ * dropped by the function if the block argument is false. In that case, the
+ * function returns -EAGAIN.
+ *
+ * Return value does not reflect if the fault was successful for every single
+ * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to
+ * determine fault status for each address.
+ *
+ * Trying to fault inside an invalid vma will result in -EINVAL.
+ *
+ * See the function description in mm/hmm.c for further documentation.
+ */
+int hmm_vma_fault(struct vm_area_struct *vma,
+ struct hmm_range *range,
+ unsigned long start,
+ unsigned long end,
+ hmm_pfn_t *pfns,
+ bool write,
+ bool block);
+#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+
+
+#if IS_ENABLED(CONFIG_HMM_DEVMEM)
+struct hmm_devmem;
+
+struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
+ unsigned long addr);
+
+/*
+ * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events
+ *
+ * @free: call when refcount on page reach 1 and thus is no longer use
+ * @fault: call when there is a page fault to unaddressable memory
+ */
+struct hmm_devmem_ops {
+ void (*free)(struct hmm_devmem *devmem, struct page *page);
+ int (*fault)(struct hmm_devmem *devmem,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ struct page *page,
+ unsigned int flags,
+ pmd_t *pmdp);
+};
+
+/*
+ * struct hmm_devmem - track device memory
+ *
+ * @completion: completion object for device memory
+ * @pfn_first: first pfn for this resource (set by hmm_devmem_add())
+ * @pfn_last: last pfn for this resource (set by hmm_devmem_add())
+ * @resource: IO resource reserved for this chunk of memory
+ * @pagemap: device page map for that chunk
+ * @device: device to bind resource to
+ * @ops: memory operations callback
+ * @ref: per CPU refcount
+ *
+ * This an helper structure for device drivers that do not wish to implement
+ * the gory details related to hotplugging new memoy and allocating struct
+ * pages.
+ *
+ * Device drivers can directly use ZONE_DEVICE memory on their own if they
+ * wish to do so.
+ */
+struct hmm_devmem {
+ struct completion completion;
+ unsigned long pfn_first;
+ unsigned long pfn_last;
+ struct resource *resource;
+ struct device *device;
+ struct dev_pagemap pagemap;
+ const struct hmm_devmem_ops *ops;
+ struct percpu_ref ref;
+};
+
+/*
+ * To add (hotplug) device memory, HMM assumes that there is no real resource
+ * that reserves a range in the physical address space (this is intended to be
+ * use by unaddressable device memory). It will reserve a physical range big
+ * enough and allocate struct page for it.
+ *
+ * The device driver can wrap the hmm_devmem struct inside a private device
+ * driver struct. The device driver must call hmm_devmem_remove() before the
+ * device goes away and before freeing the hmm_devmem struct memory.
+ */
+struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
+ struct device *device,
+ unsigned long size);
+void hmm_devmem_remove(struct hmm_devmem *devmem);
+
+int hmm_devmem_fault_range(struct hmm_devmem *devmem,
+ struct vm_area_struct *vma,
+ const struct migrate_vma_ops *ops,
+ unsigned long *src,
+ unsigned long *dst,
+ unsigned long start,
+ unsigned long addr,
+ unsigned long end,
+ void *private);
+
+/*
+ * hmm_devmem_page_set_drvdata - set per-page driver data field
+ *
+ * @page: pointer to struct page
+ * @data: driver data value to set
+ *
+ * Because page can not be on lru we have an unsigned long that driver can use
+ * to store a per page field. This just a simple helper to do that.
+ */
+static inline void hmm_devmem_page_set_drvdata(struct page *page,
+ unsigned long data)
+{
+ unsigned long *drvdata = (unsigned long *)&page->pgmap;
+
+ drvdata[1] = data;
+}
+
+/*
+ * hmm_devmem_page_get_drvdata - get per page driver data field
+ *
+ * @page: pointer to struct page
+ * Return: driver data value
+ */
+static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page)
+{
+ unsigned long *drvdata = (unsigned long *)&page->pgmap;
+
+ return drvdata[1];
+}
+
+
+/*
+ * struct hmm_device - fake device to hang device memory onto
+ *
+ * @device: device struct
+ * @minor: device minor number
+ */
+struct hmm_device {
+ struct device device;
+ unsigned int minor;
+};
+
+/*
+ * A device driver that wants to handle multiple devices memory through a
+ * single fake device can use hmm_device to do so. This is purely a helper and
+ * it is not strictly needed, in order to make use of any HMM functionality.
+ */
+struct hmm_device *hmm_device_new(void *drvdata);
+void hmm_device_put(struct hmm_device *hmm_device);
+#endif /* IS_ENABLED(CONFIG_HMM_DEVMEM) */
+
+
+/* Below are for HMM internal use only! Not to be used by device driver! */
+void hmm_mm_destroy(struct mm_struct *mm);
+
+#else /* IS_ENABLED(CONFIG_HMM) */
+
+/* Below are for HMM internal use only! Not to be used by device driver! */
+static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+
+#endif /* IS_ENABLED(CONFIG_HMM) */
+#endif /* LINUX_HMM_H */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 6230064d7f95..ec619dccd616 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -130,6 +130,7 @@ enum {
IORES_DESC_ACPI_NV_STORAGE = 3,
IORES_DESC_PERSISTENT_MEMORY = 4,
IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5,
+ IORES_DESC_DEVICE_MEMORY_UNADDRESSABLE = 6,
};
/* helpers to define resources */
diff --git a/include/linux/kref.h b/include/linux/kref.h
index f4156f88f557..29220724bf1c 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -66,8 +66,6 @@ static inline void kref_get(struct kref *kref)
*/
static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref))
{
- WARN_ON(release == NULL);
-
if (refcount_dec_and_test(&kref->refcount)) {
release(kref);
return 1;
@@ -79,8 +77,6 @@ static inline int kref_put_mutex(struct kref *kref,
void (*release)(struct kref *kref),
struct mutex *lock)
{
- WARN_ON(release == NULL);
-
if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) {
release(kref);
return 1;
@@ -92,8 +88,6 @@ static inline int kref_put_lock(struct kref *kref,
void (*release)(struct kref *kref),
spinlock_t *lock)
{
- WARN_ON(release == NULL);
-
if (refcount_dec_and_lock(&kref->refcount, lock)) {
release(kref);
return 1;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7e74ae4d99bb..339c9da24d31 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -767,8 +767,6 @@ void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
-void *kvm_kvzalloc(unsigned long size);
-
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 134a2f69c21a..e60f2035b803 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,6 +13,36 @@ struct mem_section;
struct memory_block;
struct resource;
+/*
+ * When hotplugging memory with arch_add_memory(), we want more information on
+ * the type of memory we are hotplugging, because depending on the type of
+ * architecture, the code might want to take different paths.
+ *
+ * MEMORY_NORMAL:
+ * Your regular system memory. Default common case.
+ *
+ * MEMORY_DEVICE_PERSISTENT:
+ * Persistent device memory (pmem): struct page might be allocated in different
+ * memory and architecture might want to perform special actions. It is similar
+ * to regular memory, in that the CPU can access it transparently. However,
+ * it is likely to have different bandwidth and latency than regular memory.
+ * See Documentation/nvdimm/nvdimm.txt for more information.
+ *
+ * MEMORY_DEVICE_UNADDRESSABLE:
+ * Device memory that is not directly addressable by the CPU: CPU can neither
+ * read nor write _UNADDRESSABLE memory. In this case, we do still have struct
+ * pages backing the device memory. Doing so simplifies the implementation, but
+ * it is important to remember that there are certain points at which the struct
+ * page must be treated as an opaque object, rather than a "normal" struct page.
+ * A more complete discussion of unaddressable memory may be found in
+ * include/linux/hmm.h and Documentation/vm/hmm.txt.
+ */
+enum memory_type {
+ MEMORY_NORMAL = 0,
+ MEMORY_DEVICE_PERSISTENT,
+ MEMORY_DEVICE_UNADDRESSABLE,
+};
+
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -104,7 +134,7 @@ extern bool memhp_auto_online;
#ifdef CONFIG_MEMORY_HOTREMOVE
extern bool is_pageblock_removable_nolock(struct page *page);
-extern int arch_remove_memory(u64 start, u64 size);
+extern int arch_remove_memory(u64 start, u64 size, enum memory_type type);
extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages);
#endif /* CONFIG_MEMORY_HOTREMOVE */
@@ -276,7 +306,7 @@ extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource, bool online);
extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
bool for_device);
-extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
+extern int arch_add_memory(int nid, u64 start, u64 size, enum memory_type type);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 93416196ba64..3a9494e91e6e 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -35,24 +35,76 @@ static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
}
#endif
+/*
+ * For MEMORY_DEVICE_UNADDRESSABLE we use ZONE_DEVICE and extend it with two
+ * callbacks:
+ * page_fault()
+ * page_free()
+ *
+ * Additional notes about MEMORY_DEVICE_UNADDRESSABLE may be found in
+ * include/linux/hmm.h and Documentation/vm/hmm.txt. There is also a brief
+ * explanation in include/linux/memory_hotplug.h.
+ *
+ * The page_fault() callback must migrate page back, from device memory to
+ * system memory, so that the CPU can access it. This might fail for various
+ * reasons (device issues, device have been unplugged, ...). When such error
+ * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and
+ * set the CPU page table entry to "poisoned".
+ *
+ * Note that because memory cgroup charges are transferred to the device memory,
+ * this should never fail due to memory restrictions. However, allocation
+ * of a regular system page might still fail because we are out of memory. If
+ * that happens, the page_fault() callback must return VM_FAULT_OOM.
+ *
+ * The page_fault() callback can also try to migrate back multiple pages in one
+ * chunk, as an optimization. It must, however, prioritize the faulting address
+ * over all the others.
+ *
+ *
+ * The page_free() callback is called once the page refcount reaches 1
+ * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
+ * This allows the device driver to implement its own memory management.)
+ */
+typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
+ unsigned long addr,
+ struct page *page,
+ unsigned int flags,
+ pmd_t *pmdp);
+typedef void (*dev_page_free_t)(struct page *page, void *data);
+
/**
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @page_fault: callback when CPU fault on an unaddressable device page
+ * @page_free: free page callback when page refcount reaches 1
* @altmap: pre-allocated/reserved memory for vmemmap allocations
* @res: physical address range covered by @ref
* @ref: reference count that pins the devm_memremap_pages() mapping
* @dev: host device of the mapping for debug
+ * @data: private data pointer for page_free()
+ * @type: memory type: see MEMORY_* in memory_hotplug.h
*/
struct dev_pagemap {
+ dev_page_fault_t page_fault;
+ dev_page_free_t page_free;
struct vmem_altmap *altmap;
const struct resource *res;
struct percpu_ref *ref;
struct device *dev;
+ void *data;
+ enum memory_type type;
};
#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap);
struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
+
+static inline bool is_device_unaddressable_page(const struct page *page)
+{
+ /* See MEMORY_DEVICE_UNADDRESSABLE in include/linux/memory_hotplug.h */
+ return ((page_zonenum(page) == ZONE_DEVICE) &&
+ (page->pgmap->type == MEMORY_DEVICE_UNADDRESSABLE));
+}
#else
static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref,
@@ -71,6 +123,11 @@ static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
{
return NULL;
}
+
+static inline bool is_device_unaddressable_page(const struct page *page)
+{
+ return false;
+}
#endif
/**
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 48e24844b3c5..7dd875a76847 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -43,6 +43,7 @@ extern void putback_movable_page(struct page *page);
extern int migrate_prep(void);
extern int migrate_prep_local(void);
+extern void migrate_page_states(struct page *newpage, struct page *page);
extern void migrate_page_copy(struct page *newpage, struct page *page);
extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
@@ -63,6 +64,10 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
static inline int migrate_prep(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }
+static inline void migrate_page_states(struct page *newpage, struct page *page)
+{
+}
+
static inline void migrate_page_copy(struct page *newpage,
struct page *page) {}
@@ -122,4 +127,114 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
}
#endif /* CONFIG_NUMA_BALANCING && CONFIG_TRANSPARENT_HUGEPAGE*/
+
+#ifdef CONFIG_MIGRATION
+
+/*
+ * Watch out for PAE architecture, which has an unsigned long, and might not
+ * have enough bits to store all physical address and flags. So far we have
+ * enough room for all our flags.
+ */
+#define MIGRATE_PFN_VALID (1UL << 0)
+#define MIGRATE_PFN_MIGRATE (1UL << 1)
+#define MIGRATE_PFN_LOCKED (1UL << 2)
+#define MIGRATE_PFN_WRITE (1UL << 3)
+#define MIGRATE_PFN_DEVICE (1UL << 4)
+#define MIGRATE_PFN_ERROR (1UL << 5)
+#define MIGRATE_PFN_SHIFT 6
+
+static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
+{
+ if (!(mpfn & MIGRATE_PFN_VALID))
+ return NULL;
+ return pfn_to_page(mpfn >> MIGRATE_PFN_SHIFT);
+}
+
+static inline unsigned long migrate_pfn(unsigned long pfn)
+{
+ return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID;
+}
+
+/*
+ * struct migrate_vma_ops - migrate operation callback
+ *
+ * @alloc_and_copy: alloc destination memory and copy source memory to it
+ * @finalize_and_map: allow caller to map the successfully migrated pages
+ *
+ *
+ * The alloc_and_copy() callback happens once all source pages have been locked,
+ * unmapped and checked (checked whether pinned or not). All pages that can be
+ * migrated will have an entry in the src array set with the pfn value of the
+ * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other
+ * flags might be set but should be ignored by the callback).
+ *
+ * The alloc_and_copy() callback can then allocate destination memory and copy
+ * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and
+ * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the
+ * callback must update each corresponding entry in the dst array with the pfn
+ * value of the destination page and with the MIGRATE_PFN_VALID and
+ * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages
+ * locked, via lock_page()).
+ *
+ * At this point the alloc_and_copy() callback is done and returns.
+ *
+ * Note that the callback does not have to migrate all the pages that are
+ * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration
+ * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also
+ * set in the src array entry). If the device driver cannot migrate a device
+ * page back to system memory, then it must set the corresponding dst array
+ * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to
+ * access any of the virtual addresses originally backed by this page. Because
+ * a SIGBUS is such a severe result for the userspace process, the device
+ * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an
+ * unrecoverable state.
+ *
+ * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES
+ * OR BAD THINGS WILL HAPPEN !
+ *
+ *
+ * The finalize_and_map() callback happens after struct page migration from
+ * source to destination (destination struct pages are the struct pages for the
+ * memory allocated by the alloc_and_copy() callback). Migration can fail, and
+ * thus the finalize_and_map() allows the driver to inspect which pages were
+ * successfully migrated, and which were not. Successfully migrated pages will
+ * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
+ *
+ * It is safe to update device page table from within the finalize_and_map()
+ * callback because both destination and source page are still locked, and the
+ * mmap_sem is held in read mode (hence no one can unmap the range being
+ * migrated).
+ *
+ * Once callback is done cleaning up things and updating its page table (if it
+ * chose to do so, this is not an obligation) then it returns. At this point,
+ * the HMM core will finish up the final steps, and the migration is complete.
+ *
+ * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY
+ * ENTRIES OR BAD THINGS WILL HAPPEN !
+ */
+struct migrate_vma_ops {
+ void (*alloc_and_copy)(struct vm_area_struct *vma,
+ const unsigned long *src,
+ unsigned long *dst,
+ unsigned long start,
+ unsigned long end,
+ void *private);
+ void (*finalize_and_map)(struct vm_area_struct *vma,
+ const unsigned long *src,
+ const unsigned long *dst,
+ unsigned long start,
+ unsigned long end,
+ void *private);
+};
+
+int migrate_vma(const struct migrate_vma_ops *ops,
+ struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ unsigned long *src,
+ unsigned long *dst,
+ void *private);
+
+#endif /* CONFIG_MIGRATION */
+
#endif /* _LINUX_MIGRATE_H */
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index ebf3d89a3919..bdf66af9b937 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -6,11 +6,16 @@
* on most operations but not ->writepage as the potential stall time
* is too significant
* MIGRATE_SYNC will block when migrating pages
+ * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages
+ * with the CPU. Instead, page copy happens outside the migratepage()
+ * callback and is likely using a DMA engine. See migrate_vma() and HMM
+ * (mm/hmm.c) for users of this mode.
*/
enum migrate_mode {
MIGRATE_ASYNC,
MIGRATE_SYNC_LIGHT,
MIGRATE_SYNC,
+ MIGRATE_SYNC_NO_COPY,
};
#endif /* MIGRATE_MODE_H_INCLUDED */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1beb1ec2fbdf..eb1a51a6617b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -108,7 +108,7 @@ enum {
MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1)
};
-/* Driver supports 3 diffrent device methods to manage traffic steering:
+/* Driver supports 3 different device methods to manage traffic steering:
* -device managed - High level API for ib and eth flow steering. FW is
* managing flow steering tables.
* - B0 steering mode - Common low level API for ib and (if supported) eth.
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f50864626230..6cd000f30921 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -891,12 +891,7 @@ static inline u16 cmdif_rev(struct mlx5_core_dev *dev)
static inline void *mlx5_vzalloc(unsigned long size)
{
- void *rtn;
-
- rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (!rtn)
- rtn = vzalloc(size);
- return rtn;
+ return kvzalloc(size, GFP_KERNEL);
}
static inline u32 mlx5_base_mkey(const u32 key)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cbdbe0be3127..022423ca4aa8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -518,6 +518,28 @@ static inline int is_vmalloc_or_module_addr(const void *x)
}
#endif
+extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
+static inline void *kvmalloc(size_t size, gfp_t flags)
+{
+ return kvmalloc_node(size, flags, NUMA_NO_NODE);
+}
+static inline void *kvzalloc_node(size_t size, gfp_t flags, int node)
+{
+ return kvmalloc_node(size, flags | __GFP_ZERO, node);
+}
+static inline void *kvzalloc(size_t size, gfp_t flags)
+{
+ return kvmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+ if (size != 0 && n > SIZE_MAX / size)
+ return NULL;
+
+ return kvmalloc(n * size, flags);
+}
+
extern void kvfree(const void *addr);
static inline atomic_t *compound_mapcount_ptr(struct page *page)
@@ -799,11 +821,19 @@ static inline void put_page(struct page *page)
{
page = compound_head(page);
+ /*
+ * ZONE_DEVICE pages should never have their refcount reach 0 (this
+ * would be a bug), so call page_ref_dec() in put_zone_device_page()
+ * to decrement page refcount and skip __put_page() here, as this
+ * would worsen things if a ZONE_DEVICE had a refcount bug.
+ */
+ if (unlikely(is_zone_device_page(page))) {
+ put_zone_device_page(page);
+ return;
+ }
+
if (put_page_testzero(page))
__put_page(page);
-
- if (unlikely(is_zone_device_page(page)))
- put_zone_device_page(page);
}
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 45cdb27791a3..5ddeacc7470d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -23,6 +23,7 @@
struct address_space;
struct mem_cgroup;
+struct hmm;
/*
* Each physical page in the system has a struct page associated with
@@ -500,6 +501,10 @@ struct mm_struct {
atomic_long_t hugetlb_usage;
#endif
struct work_struct async_put_work;
+#if IS_ENABLED(CONFIG_HMM)
+ /* HMM needs to track a few things per mm */
+ struct hmm *hmm;
+#endif
};
extern struct mm_struct init_mm;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba5882419a7d..52a137dbb043 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -51,6 +51,23 @@ static inline int current_is_kswapd(void)
*/
/*
+ * Unaddressable device memory support. See include/linux/hmm.h and
+ * Documentation/vm/hmm.txt. Short description is we need struct pages for
+ * device memory that is unaddressable (inaccessible) by CPU, so that we can
+ * migrate part of a process memory to device memory.
+ *
+ * When a page is migrated from CPU to device, we set the CPU page table entry
+ * to a special SWP_DEVICE_* entry.
+ */
+#ifdef CONFIG_DEVICE_UNADDRESSABLE
+#define SWP_DEVICE_NUM 2
+#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
+#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
+#else
+#define SWP_DEVICE_NUM 0
+#endif
+
+/*
* NUMA node memory migration support
*/
#ifdef CONFIG_MIGRATION
@@ -72,7 +89,8 @@ static inline int current_is_kswapd(void)
#endif
#define MAX_SWAPFILES \
- ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
+ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
/*
* Magic header for a swap area. The first part of the union is
@@ -432,8 +450,8 @@ static inline void show_swap_cache_info(void)
{
}
-#define free_swap_and_cache(swp) is_migration_entry(swp)
-#define swapcache_prepare(swp) is_migration_entry(swp)
+#define free_swap_and_cache(e) (is_migration_entry(e) || is_device_entry(e))
+#define swapcache_prepare(e) (is_migration_entry(e) || is_device_entry(e))
static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
{
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 5c3a5f3e7eec..960eb6b37d61 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -100,6 +100,74 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
}
+#if IS_ENABLED(CONFIG_DEVICE_UNADDRESSABLE)
+static inline swp_entry_t make_device_entry(struct page *page, bool write)
+{
+ return swp_entry(write ? SWP_DEVICE_WRITE : SWP_DEVICE_READ,
+ page_to_pfn(page));
+}
+
+static inline bool is_device_entry(swp_entry_t entry)
+{
+ int type = swp_type(entry);
+ return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE;
+}
+
+static inline void make_device_entry_read(swp_entry_t *entry)
+{
+ *entry = swp_entry(SWP_DEVICE_READ, swp_offset(*entry));
+}
+
+static inline bool is_write_device_entry(swp_entry_t entry)
+{
+ return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
+}
+
+static inline struct page *device_entry_to_page(swp_entry_t entry)
+{
+ return pfn_to_page(swp_offset(entry));
+}
+
+int device_entry_fault(struct vm_area_struct *vma,
+ unsigned long addr,
+ swp_entry_t entry,
+ unsigned int flags,
+ pmd_t *pmdp);
+#else /* CONFIG_DEVICE_UNADDRESSABLE */
+static inline swp_entry_t make_device_entry(struct page *page, bool write)
+{
+ return swp_entry(0, 0);
+}
+
+static inline void make_device_entry_read(swp_entry_t *entry)
+{
+}
+
+static inline bool is_device_entry(swp_entry_t entry)
+{
+ return false;
+}
+
+static inline bool is_write_device_entry(swp_entry_t entry)
+{
+ return false;
+}
+
+static inline struct page *device_entry_to_page(swp_entry_t entry)
+{
+ return NULL;
+}
+
+static inline int device_entry_fault(struct vm_area_struct *vma,
+ unsigned long addr,
+ swp_entry_t entry,
+ unsigned int flags,
+ pmd_t *pmdp)
+{
+ return VM_FAULT_SIGBUS;
+}
+#endif /* CONFIG_DEVICE_UNADDRESSABLE */
+
#ifdef CONFIG_MIGRATION
static inline swp_entry_t make_migration_entry(struct page *page, int write)
{
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index e0cbfb09e60f..201418d5e15c 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -203,7 +203,6 @@ static __always_inline void pagefault_disabled_inc(void)
static __always_inline void pagefault_disabled_dec(void)
{
current->pagefault_disabled--;
- WARN_ON(current->pagefault_disabled < 0);
}
/*
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index d68edffbf142..46991ad3ddd5 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -80,6 +80,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller);
+extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
extern void vfree(const void *addr);
extern void vfree_atomic(const void *addr);
diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h
index 7b26a62e5707..b9095a27a08a 100644
--- a/include/uapi/linux/ipmi.h
+++ b/include/uapi/linux/ipmi.h
@@ -355,7 +355,7 @@ struct ipmi_cmdspec {
#define IPMICTL_REGISTER_FOR_CMD _IOR(IPMI_IOC_MAGIC, 14, \
struct ipmi_cmdspec)
/*
- * Unregister a regsitered command. error values:
+ * Unregister a registered command. error values:
* - EFAULT - an address supplied was invalid.
* - ENOENT - The netfn/cmd was not found registered for this user.
*/
diff --git a/ipc/util.c b/ipc/util.c
index 3459a16a9df9..caec7b1bfaa3 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -403,12 +403,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
*/
void *ipc_alloc(int size)
{
- void *out;
- if (size > PAGE_SIZE)
- out = vmalloc(size);
- else
- out = kmalloc(size, GFP_KERNEL);
- return out;
+ return kvmalloc(size, GFP_KERNEL);
}
/**
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f45827e205d3..33b7861b517c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -76,8 +76,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
- gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux;
struct bpf_prog *fp;
@@ -107,8 +106,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_alloc);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
- gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
u32 pages, delta;
int ret;
@@ -659,8 +657,7 @@ out:
static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other,
gfp_t gfp_extra_flags)
{
- gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
- gfp_extra_flags;
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *fp;
fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ab0cf4c43690..3dd05b9f7fc3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -68,8 +68,7 @@ void *bpf_map_area_alloc(size_t size)
return area;
}
- return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
- PAGE_KERNEL);
+ return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
}
void bpf_map_area_free(void *area)
diff --git a/kernel/fork.c b/kernel/fork.c
index d39e785d83d0..81f5f40a9e3f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -37,6 +37,7 @@
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/vmacache.h>
@@ -203,7 +204,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP | __GFP_HIGHMEM,
+ THREADINFO_GFP,
PAGE_KERNEL,
0, node, __builtin_return_address(0));
@@ -868,6 +869,7 @@ void __mmdrop(struct mm_struct *mm)
BUG_ON(mm == &init_mm);
mm_free_pgd(mm);
destroy_context(mm);
+ hmm_mm_destroy(mm);
mmu_notifier_mm_destroy(mm);
check_mm(mm);
put_user_ns(mm->user_ns);
diff --git a/kernel/groups.c b/kernel/groups.c
index 8dd7a61b7115..d09727692a2a 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -18,7 +18,7 @@ struct group_info *groups_alloc(int gidsetsize)
len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
if (!gi)
- gi = __vmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_HIGHMEM, PAGE_KERNEL);
+ gi = __vmalloc(len, GFP_KERNEL_ACCOUNT, PAGE_KERNEL);
if (!gi)
return NULL;
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index e6b2f7ad3e51..4ccfcaae5b89 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -193,7 +193,8 @@ void __init __pv_init_lock_hash(void)
*/
pv_lock_hash = alloc_large_system_hash("PV qspinlock",
sizeof(struct pv_hash_entry),
- pv_hash_size, 0, HASH_EARLY,
+ pv_hash_size, 0,
+ HASH_EARLY | HASH_ZERO,
&pv_lock_hash_bits, NULL,
pv_hash_size, pv_hash_size);
}
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 07e85e5229da..d98ba1ff56d2 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -18,6 +18,8 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/memory_hotplug.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#ifndef ioremap_cache
/* temporary while we convert existing ioremap_cache users to memremap */
@@ -190,10 +192,51 @@ EXPORT_SYMBOL(get_zone_device_page);
void put_zone_device_page(struct page *page)
{
+ /*
+ * ZONE_DEVICE page refcount should never reach 0 and never be freed
+ * to kernel memory allocator.
+ */
+ int count = page_ref_dec_return(page);
+
+ /*
+ * If refcount is 1 then page is freed and refcount is stable as nobody
+ * holds a reference on the page.
+ */
+ if (page->pgmap->page_free && count == 1)
+ page->pgmap->page_free(page, page->pgmap->data);
+
put_dev_pagemap(page->pgmap);
}
EXPORT_SYMBOL(put_zone_device_page);
+#if IS_ENABLED(CONFIG_DEVICE_UNADDRESSABLE)
+int device_entry_fault(struct vm_area_struct *vma,
+ unsigned long addr,
+ swp_entry_t entry,
+ unsigned int flags,
+ pmd_t *pmdp)
+{
+ struct page *page = device_entry_to_page(entry);
+
+ /*
+ * The page_fault() callback must migrate page back to system memory
+ * so that CPU can access it. This might fail for various reasons
+ * (device issue, device was unsafely unplugged, ...). When such
+ * error conditions happen, the callback must return VM_FAULT_SIGBUS.
+ *
+ * Note that because memory cgroup charges are accounted to the device
+ * memory, this should never fail because of memory restrictions (but
+ * allocation of regular system page might still fail because we are
+ * out of memory).
+ *
+ * There is a more in-depth description of what that callback can and
+ * cannot do, in include/linux/memremap.h
+ */
+ return page->pgmap->page_fault(vma, addr, page, flags, pmdp);
+}
+EXPORT_SYMBOL(device_entry_fault);
+#endif /* CONFIG_DEVICE_UNADDRESSABLE */
+
static void pgmap_radix_release(struct resource *res)
{
resource_size_t key, align_start, align_size, align_end;
@@ -248,7 +291,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
align_size = ALIGN(resource_size(res), SECTION_SIZE);
mem_hotplug_begin();
- arch_remove_memory(align_start, align_size);
+ arch_remove_memory(align_start, align_size, pgmap->type);
mem_hotplug_done();
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
@@ -326,6 +369,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
}
pgmap->ref = ref;
pgmap->res = &page_map->res;
+ pgmap->type = MEMORY_DEVICE_PERSISTENT;
+ pgmap->page_fault = NULL;
+ pgmap->page_free = NULL;
+ pgmap->data = NULL;
mutex_lock(&pgmap_lock);
error = 0;
@@ -363,7 +410,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
goto err_pfn_remap;
mem_hotplug_begin();
- error = arch_add_memory(nid, align_start, align_size, true);
+ error = arch_add_memory(nid, align_start, align_size, pgmap->type);
mem_hotplug_done();
if (error)
goto err_add_memory;
diff --git a/kernel/module.c b/kernel/module.c
index 7d1e29a6d1c3..d5753210cf34 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -49,6 +49,9 @@
#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
+#ifdef CONFIG_STRICT_MODULE_RWX
+#include <asm/set_memory.h>
+#endif
#include <asm/mmu_context.h>
#include <linux/license.h>
#include <asm/sections.h>
@@ -2864,7 +2867,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
/* Suck in entire file: we'll want most of it. */
info->hdr = __vmalloc(info->len,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN, PAGE_KERNEL);
if (!info->hdr)
return -ENOMEM;
diff --git a/kernel/pid.c b/kernel/pid.c
index 0143ac0ddceb..a1f8459c70ac 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -573,16 +573,13 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
*/
void __init pidhash_init(void)
{
- unsigned int i, pidhash_size;
+ unsigned int pidhash_size;
pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
- HASH_EARLY | HASH_SMALL,
+ HASH_EARLY | HASH_SMALL | HASH_ZERO,
&pidhash_shift, NULL,
0, 4096);
pidhash_size = 1U << pidhash_shift;
-
- for (i = 0; i < pidhash_size; i++)
- INIT_HLIST_HEAD(&pid_hash[i]);
}
void __init pidmap_init(void)
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index d79a38de425a..3b1e0f3ad07f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -36,6 +36,9 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
+#ifdef CONFIG_STRICT_KERNEL_RWX
+#include <asm/set_memory.h>
+#endif
#include "power.h"
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f7c93568ec99..83ecb6bd336f 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1028,10 +1028,7 @@ EXPORT_SYMBOL(iov_iter_get_pages);
static struct page **get_pages_array(size_t n)
{
- struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
- if (!p)
- p = vmalloc(n * sizeof(struct page *));
- return p;
+ return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
}
static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index f8635fd57442..2c2c8afcde15 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -86,16 +86,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
size = min(size, 1U << tbl->nest);
if (sizeof(spinlock_t) != 0) {
- tbl->locks = NULL;
-#ifdef CONFIG_NUMA
- if (size * sizeof(spinlock_t) > PAGE_SIZE &&
- gfp == GFP_KERNEL)
- tbl->locks = vmalloc(size * sizeof(spinlock_t));
-#endif
- if (gfp != GFP_KERNEL)
- gfp |= __GFP_NOWARN | __GFP_NORETRY;
-
- if (!tbl->locks)
+ if (gfpflags_allow_blocking(gfp))
+ tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp);
+ else
tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
gfp);
if (!tbl->locks)
diff --git a/mm/Kconfig b/mm/Kconfig
index c89f472b658c..43d000e44424 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -289,6 +289,41 @@ config MIGRATION
config ARCH_ENABLE_HUGEPAGE_MIGRATION
bool
+config HMM
+ bool
+ depends on MMU && 64BIT
+ help
+ HMM provides a set of helpers to share a virtual address
+ space between CPU and a device, so that the device can access any valid
+ address of the process (while still obeying memory protection). HMM also
+ provides helpers to migrate process memory to device memory, and back.
+ Each set of functionality (address space mirroring, and migration to and
+ from device memory) can be used independently of the other.
+
+ This is primarily useful for devices like GPU, for GPGPU compute workload,
+ with APIs such as OpenCL or CUDA. See Documentation/vm/hmm.txt.
+
+config HMM_MIRROR
+ bool "HMM mirror CPU page table into a device page table"
+ depends on MMU && 64BIT
+ select HMM
+ select MMU_NOTIFIER
+ help
+ Select HMM_MIRROR if you want to mirror range of the CPU page table of a
+ process into a device page table. Here, mirror means "keep synchronized".
+ Prerequisites: the device must provide the ability to write-protect its
+ page tables (at PAGE_SIZE granularity), and must be able to recover from
+ the resulting potential page faults.
+
+config HMM_DEVMEM
+ bool "HMM device memory helpers (to leverage ZONE_DEVICE)"
+ depends on MMU && 64BIT
+ select HMM
+ help
+ HMM devmem is a set of helper routines to leverage the ZONE_DEVICE
+ feature. This is just to avoid having device drivers to replicating a lot
+ of boiler plate code.
+
config PHYS_ADDR_T_64BIT
def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
@@ -700,6 +735,15 @@ config ZONE_DEVICE
If FS_DAX is enabled, then say Y.
+config DEVICE_UNADDRESSABLE
+ bool "Unaddressable device memory (GPU memory, ...)"
+ depends on ZONE_DEVICE
+
+ help
+ Allows creation of struct pages to represent unaddressable device memory;
+ i.e., memory that is only accessible from the device (or group of
+ devices).
+
config FRAME_VECTOR
bool
diff --git a/mm/Makefile b/mm/Makefile
index 026f6a828a50..9eb4121485d3 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -75,6 +75,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
+obj-$(CONFIG_HMM) += hmm.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index da91df50ba31..145b903eb023 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -139,6 +139,14 @@ int balloon_page_migrate(struct address_space *mapping,
{
struct balloon_dev_info *balloon = balloon_page_device(page);
+ /*
+ * We can not easily support the no copy case here so ignore it as it
+ * is unlikely to be use with ballon pages. See include/linux/hmm.h for
+ * user of the MIGRATE_SYNC_NO_COPY mode.
+ */
+ if (mode == MIGRATE_SYNC_NO_COPY)
+ return -EINVAL;
+
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
diff --git a/mm/filemap.c b/mm/filemap.c
index d04e475bcf9e..e9e5f7b6a6c3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2796,12 +2796,6 @@ ssize_t generic_perform_write(struct file *file,
ssize_t written = 0;
unsigned int flags = 0;
- /*
- * Copies from kernel address space cannot fail (NFSD is a big user).
- */
- if (!iter_is_iovec(i))
- flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
do {
struct page *page;
unsigned long offset; /* Offset into pagecache page */
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index db77dcb38afd..72ebec18629c 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -200,10 +200,7 @@ struct frame_vector *frame_vector_create(unsigned int nr_frames)
* Avoid higher order allocations, use vmalloc instead. It should
* be rare anyway.
*/
- if (size <= PAGE_SIZE)
- vec = kmalloc(size, GFP_KERNEL);
- else
- vec = vmalloc(size);
+ vec = kvmalloc(size, GFP_KERNEL);
if (!vec)
return NULL;
vec->nr_allocated = nr_frames;
diff --git a/mm/hmm.c b/mm/hmm.c
new file mode 100644
index 000000000000..f567a8b3b079
--- /dev/null
+++ b/mm/hmm.c
@@ -0,0 +1,1205 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
+ */
+/*
+ * Refer to include/linux/hmm.h for information about heterogeneous memory
+ * management or HMM for short.
+ */
+#include <linux/mm.h>
+#include <linux/hmm.h>
+#include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mmzone.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/swapops.h>
+#include <linux/hugetlb.h>
+#include <linux/memremap.h>
+#include <linux/mmu_notifier.h>
+
+#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+
+static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
+
+
+/*
+ * struct hmm - HMM per mm struct
+ *
+ * @mm: mm struct this HMM struct is bound to
+ * @lock: lock protecting ranges list
+ * @sequence: we track updates to the CPU page table with a sequence number
+ * @ranges: list of range being snapshotted
+ * @mirrors: list of mirrors for this mm
+ * @mmu_notifier: mmu notifier to track updates to CPU page table
+ * @mirrors_sem: read/write semaphore protecting the mirrors list
+ */
+struct hmm {
+ struct mm_struct *mm;
+ spinlock_t lock;
+ atomic_t sequence;
+ struct list_head ranges;
+ struct list_head mirrors;
+ struct mmu_notifier mmu_notifier;
+ struct rw_semaphore mirrors_sem;
+};
+
+/*
+ * hmm_register - register HMM against an mm (HMM internal)
+ *
+ * @mm: mm struct to attach to
+ *
+ * This is not intended to be used directly by device drivers. It allocates an
+ * HMM struct if mm does not have one, and initializes it.
+ */
+static struct hmm *hmm_register(struct mm_struct *mm)
+{
+ struct hmm *hmm = READ_ONCE(mm->hmm);
+ bool cleanup = false;
+
+ /*
+ * The hmm struct can only be freed once the mm_struct goes away,
+ * hence we should always have pre-allocated an new hmm struct
+ * above.
+ */
+ if (hmm)
+ return hmm;
+
+ hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
+ if (!hmm)
+ return NULL;
+ INIT_LIST_HEAD(&hmm->mirrors);
+ init_rwsem(&hmm->mirrors_sem);
+ atomic_set(&hmm->sequence, 0);
+ hmm->mmu_notifier.ops = NULL;
+ INIT_LIST_HEAD(&hmm->ranges);
+ spin_lock_init(&hmm->lock);
+ hmm->mm = mm;
+
+ /*
+ * We should only get here if hold the mmap_sem in write mode ie on
+ * registration of first mirror through hmm_mirror_register()
+ */
+ hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
+ if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
+ kfree(hmm);
+ return NULL;
+ }
+
+ spin_lock(&mm->page_table_lock);
+ if (!mm->hmm)
+ mm->hmm = hmm;
+ else
+ cleanup = true;
+ spin_unlock(&mm->page_table_lock);
+
+ if (cleanup) {
+ mmu_notifier_unregister(&hmm->mmu_notifier, mm);
+ kfree(hmm);
+ }
+
+ return mm->hmm;
+}
+
+void hmm_mm_destroy(struct mm_struct *mm)
+{
+ kfree(mm->hmm);
+}
+
+
+#if IS_ENABLED(CONFIG_HMM_MIRROR)
+static void hmm_invalidate_range(struct hmm *hmm,
+ enum hmm_update_type action,
+ unsigned long start,
+ unsigned long end)
+{
+ struct hmm_mirror *mirror;
+ struct hmm_range *range;
+
+ spin_lock(&hmm->lock);
+ list_for_each_entry(range, &hmm->ranges, list) {
+ unsigned long addr, idx, npages;
+
+ if (end < range->start || start >= range->end)
+ continue;
+
+ range->valid = false;
+ addr = max(start, range->start);
+ idx = (addr - range->start) >> PAGE_SHIFT;
+ npages = (min(range->end, end) - addr) >> PAGE_SHIFT;
+ memset(&range->pfns[idx], 0, sizeof(*range->pfns) * npages);
+ }
+ spin_unlock(&hmm->lock);
+
+ down_read(&hmm->mirrors_sem);
+ list_for_each_entry(mirror, &hmm->mirrors, list)
+ mirror->ops->sync_cpu_device_pagetables(mirror, action,
+ start, end);
+ up_read(&hmm->mirrors_sem);
+}
+
+static void hmm_invalidate_page(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long addr)
+{
+ unsigned long start = addr & PAGE_MASK;
+ unsigned long end = start + PAGE_SIZE;
+ struct hmm *hmm = mm->hmm;
+
+ VM_BUG_ON(!hmm);
+
+ atomic_inc(&hmm->sequence);
+ hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end);
+}
+
+static void hmm_invalidate_range_start(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct hmm *hmm = mm->hmm;
+
+ VM_BUG_ON(!hmm);
+
+ atomic_inc(&hmm->sequence);
+}
+
+static void hmm_invalidate_range_end(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct hmm *hmm = mm->hmm;
+
+ VM_BUG_ON(!hmm);
+
+ hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end);
+}
+
+static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
+ .invalidate_page = hmm_invalidate_page,
+ .invalidate_range_start = hmm_invalidate_range_start,
+ .invalidate_range_end = hmm_invalidate_range_end,
+};
+
+/*
+ * hmm_mirror_register() - register a mirror against an mm
+ *
+ * @mirror: new mirror struct to register
+ * @mm: mm to register against
+ *
+ * To start mirroring a process address space, the device driver must register
+ * an HMM mirror struct.
+ *
+ * THE mm->mmap_sem MUST BE HELD IN WRITE MODE !
+ */
+int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
+{
+ /* Sanity check */
+ if (!mm || !mirror || !mirror->ops)
+ return -EINVAL;
+
+ mirror->hmm = hmm_register(mm);
+ if (!mirror->hmm)
+ return -ENOMEM;
+
+ down_write(&mirror->hmm->mirrors_sem);
+ list_add(&mirror->list, &mirror->hmm->mirrors);
+ up_write(&mirror->hmm->mirrors_sem);
+
+ return 0;
+}
+EXPORT_SYMBOL(hmm_mirror_register);
+
+/*
+ * hmm_mirror_unregister() - unregister a mirror
+ *
+ * @mirror: new mirror struct to register
+ *
+ * Stop mirroring a process address space, and cleanup.
+ */
+void hmm_mirror_unregister(struct hmm_mirror *mirror)
+{
+ struct hmm *hmm = mirror->hmm;
+
+ down_write(&hmm->mirrors_sem);
+ list_del(&mirror->list);
+ up_write(&hmm->mirrors_sem);
+}
+EXPORT_SYMBOL(hmm_mirror_unregister);
+
+struct hmm_vma_walk {
+ struct hmm_range *range;
+ unsigned long last;
+ bool fault;
+ bool block;
+ bool write;
+};
+
+static int hmm_vma_do_fault(struct mm_walk *walk,
+ unsigned long addr,
+ hmm_pfn_t *pfn)
+{
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE;
+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ int r;
+
+ flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
+ flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0;
+ r = handle_mm_fault(vma, addr, flags);
+ if (r & VM_FAULT_RETRY)
+ return -EBUSY;
+ if (r & VM_FAULT_ERROR) {
+ *pfn = HMM_PFN_ERROR;
+ return -EFAULT;
+ }
+
+ return -EAGAIN;
+}
+
+static void hmm_pfns_special(hmm_pfn_t *pfns,
+ unsigned long addr,
+ unsigned long end)
+{
+ for (; addr < end; addr += PAGE_SIZE, pfns++)
+ *pfns = HMM_PFN_SPECIAL;
+}
+
+static void hmm_pfns_clear(hmm_pfn_t *pfns,
+ unsigned long addr,
+ unsigned long end)
+{
+ for (; addr < end; addr += PAGE_SIZE, pfns++)
+ *pfns = 0;
+}
+
+static int hmm_vma_walk_hole(unsigned long addr,
+ unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ struct hmm_range *range = hmm_vma_walk->range;
+ hmm_pfn_t *pfns = range->pfns;
+ unsigned long i;
+
+ hmm_vma_walk->last = addr;
+ i = (addr - range->start) >> PAGE_SHIFT;
+ for (; addr < end; addr += PAGE_SIZE, i++) {
+ pfns[i] = HMM_PFN_EMPTY;
+ if (hmm_vma_walk->fault) {
+ int ret;
+
+ ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
+ if (ret != -EAGAIN)
+ return ret;
+ }
+ }
+
+ return hmm_vma_walk->fault ? -EAGAIN : 0;
+}
+
+static int hmm_vma_walk_clear(unsigned long addr,
+ unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ struct hmm_range *range = hmm_vma_walk->range;
+ hmm_pfn_t *pfns = range->pfns;
+ unsigned long i;
+
+ hmm_vma_walk->last = addr;
+ i = (addr - range->start) >> PAGE_SHIFT;
+ for (; addr < end; addr += PAGE_SIZE, i++) {
+ pfns[i] = 0;
+ if (hmm_vma_walk->fault) {
+ int ret;
+
+ ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
+ if (ret != -EAGAIN)
+ return ret;
+ }
+ }
+
+ return hmm_vma_walk->fault ? -EAGAIN : 0;
+}
+
+static int hmm_vma_walk_pmd(pmd_t *pmdp,
+ unsigned long start,
+ unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ struct hmm_range *range = hmm_vma_walk->range;
+ struct vm_area_struct *vma = walk->vma;
+ hmm_pfn_t *pfns = range->pfns;
+ unsigned long addr = start, i;
+ bool write_fault;
+ hmm_pfn_t flag;
+ pte_t *ptep;
+
+ i = (addr - range->start) >> PAGE_SHIFT;
+ flag = vma->vm_flags & VM_READ ? HMM_PFN_READ : 0;
+ write_fault = hmm_vma_walk->fault & hmm_vma_walk->write;
+
+ if (pmd_none(*pmdp) || pmd_trans_unstable(pmdp)) {
+ pmd_t pmd;
+
+ pmd = pmd_read_atomic(pmdp);
+ barrier();
+ if (pmd_none(pmd))
+ return hmm_vma_walk_hole(start, end, walk);
+
+ if (pmd_bad(pmd) || pmd_protnone(pmd))
+ return hmm_vma_walk_clear(start, end, walk);
+
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
+ unsigned long pfn = pmd_pfn(pmd) + pte_index(addr);
+
+ if (write_fault && !pmd_write(pmd))
+ return hmm_vma_walk_clear(start, end, walk);
+
+ flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
+ for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
+ pfns[i] = hmm_pfn_t_from_pfn(pfn) | flag;
+ return 0;
+ } else {
+ /*
+ * Something unusual is going on. Better to have the
+ * driver assume there is nothing for this range and
+ * let the fault code path sort out proper pages for the
+ * range.
+ */
+ return hmm_vma_walk_clear(start, end, walk);
+ }
+ }
+
+ ptep = pte_offset_map(pmdp, addr);
+ for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
+ pte_t pte = *ptep;
+
+ pfns[i] = 0;
+
+ if (pte_none(pte)) {
+ pfns[i] = HMM_PFN_EMPTY;
+ if (hmm_vma_walk->fault)
+ goto fault;
+ continue;
+ }
+
+ if (!pte_present(pte)) {
+ swp_entry_t entry;
+
+ if (!non_swap_entry(entry)) {
+ if (hmm_vma_walk->fault)
+ goto fault;
+ continue;
+ }
+ entry = pte_to_swp_entry(pte);
+
+ /*
+ * This is a special swap entry, ignore migration, use
+ * device and report anything else as error.
+ */
+ if (is_device_entry(entry)) {
+ pfns[i] = hmm_pfn_t_from_pfn(swp_offset(entry));
+ if (is_write_device_entry(entry)) {
+ pfns[i] |= HMM_PFN_WRITE;
+ } else if (write_fault)
+ goto fault;
+ pfns[i] |= HMM_PFN_DEVICE_UNADDRESSABLE;
+ pfns[i] |= flag;
+ } else if (is_migration_entry(entry)) {
+ if (hmm_vma_walk->fault) {
+ pte_unmap(ptep - 1);
+ hmm_vma_walk->last = addr;
+ migration_entry_wait(vma->vm_mm,
+ pmdp, addr);
+ return -EAGAIN;
+ }
+ continue;
+ } else {
+ /* Report error for everything else */
+ pfns[i] = HMM_PFN_ERROR;
+ }
+ continue;
+ }
+
+ if (write_fault && !pte_write(pte))
+ goto fault;
+ pfns[i] = hmm_pfn_t_from_pfn(pte_pfn(pte)) | flag;
+ pfns[i] |= pte_write(pte) ? HMM_PFN_WRITE : 0;
+ continue;
+
+fault:
+ pte_unmap(ptep - 1);
+ /* Fault all pages in range */
+ return hmm_vma_walk_clear(start, end, walk);
+ }
+ pte_unmap(ptep - 1);
+
+ return 0;
+}
+
+/*
+ * hmm_vma_get_pfns() - snapshot CPU page table for a range of virtual addresses
+ * @vma: virtual memory area containing the virtual address range
+ * @range: used to track snapshot validity
+ * @start: range virtual start address (inclusive)
+ * @end: range virtual end address (exclusive)
+ * @entries: array of hmm_pfn_t: provided by the caller, filled in by function
+ * Returns: -EINVAL if invalid argument, -ENOMEM out of memory, 0 success
+ *
+ * This snapshots the CPU page table for a range of virtual addresses. Snapshot
+ * validity is tracked by range struct. See hmm_vma_range_done() for further
+ * information.
+ *
+ * The range struct is initialized here. It tracks the CPU page table, but only
+ * if the function returns success (0), in which case the caller must then call
+ * hmm_vma_range_done() to stop CPU page table update tracking on this range.
+ *
+ * NOT CALLING hmm_vma_range_done() IF FUNCTION RETURNS 0 WILL LEAD TO SERIOUS
+ * MEMORY CORRUPTION ! YOU HAVE BEEN WARNED !
+ */
+int hmm_vma_get_pfns(struct vm_area_struct *vma,
+ struct hmm_range *range,
+ unsigned long start,
+ unsigned long end,
+ hmm_pfn_t *pfns)
+{
+ struct hmm_vma_walk hmm_vma_walk;
+ struct mm_walk mm_walk;
+ struct hmm *hmm;
+
+ /* FIXME support hugetlb fs */
+ if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+ hmm_pfns_special(pfns, start, end);
+ return -EINVAL;
+ }
+
+ /* Sanity check, this really should not happen ! */
+ if (start < vma->vm_start || start >= vma->vm_end)
+ return -EINVAL;
+ if (end < vma->vm_start || end > vma->vm_end)
+ return -EINVAL;
+
+ hmm = hmm_register(vma->vm_mm);
+ if (!hmm)
+ return -ENOMEM;
+ /* Caller must have registered a mirror, via hmm_mirror_register() ! */
+ if (!hmm->mmu_notifier.ops)
+ return -EINVAL;
+
+ /* Initialize range to track CPU page table update */
+ range->start = start;
+ range->pfns = pfns;
+ range->end = end;
+ spin_lock(&hmm->lock);
+ range->valid = true;
+ list_add_rcu(&range->list, &hmm->ranges);
+ spin_unlock(&hmm->lock);
+
+ hmm_vma_walk.fault = false;
+ hmm_vma_walk.range = range;
+ mm_walk.private = &hmm_vma_walk;
+
+ mm_walk.vma = vma;
+ mm_walk.mm = vma->vm_mm;
+ mm_walk.pte_entry = NULL;
+ mm_walk.test_walk = NULL;
+ mm_walk.hugetlb_entry = NULL;
+ mm_walk.pmd_entry = hmm_vma_walk_pmd;
+ mm_walk.pte_hole = hmm_vma_walk_hole;
+
+ walk_page_range(start, end, &mm_walk);
+ return 0;
+}
+EXPORT_SYMBOL(hmm_vma_get_pfns);
+
+/*
+ * hmm_vma_range_done() - stop tracking change to CPU page table over a range
+ * @vma: virtual memory area containing the virtual address range
+ * @range: range being tracked
+ * Returns: false if range data has been invalidated, true otherwise
+ *
+ * Range struct is used to track updates to the CPU page table after a call to
+ * either hmm_vma_get_pfns() or hmm_vma_fault(). Once the device driver is done
+ * using the data, or wants to lock updates to the data it got from those
+ * functions, it must call the hmm_vma_range_done() function, which will then
+ * stop tracking CPU page table updates.
+ *
+ * Note that device driver must still implement general CPU page table update
+ * tracking either by using hmm_mirror (see hmm_mirror_register()) or by using
+ * the mmu_notifier API directly.
+ *
+ * CPU page table update tracking done through hmm_range is only temporary and
+ * to be used while trying to duplicate CPU page table contents for a range of
+ * virtual addresses.
+ *
+ * There are two ways to use this :
+ * again:
+ * hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ * trans = device_build_page_table_update_transaction(pfns);
+ * device_page_table_lock();
+ * if (!hmm_vma_range_done(vma, range)) {
+ * device_page_table_unlock();
+ * goto again;
+ * }
+ * device_commit_transaction(trans);
+ * device_page_table_unlock();
+ *
+ * Or:
+ * hmm_vma_get_pfns(vma, range, start, end, pfns); or hmm_vma_fault(...);
+ * device_page_table_lock();
+ * hmm_vma_range_done(vma, range);
+ * device_update_page_table(pfns);
+ * device_page_table_unlock();
+ */
+bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range)
+{
+ unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
+ struct hmm *hmm;
+
+ if (range->end <= range->start) {
+ BUG();
+ return false;
+ }
+
+ hmm = hmm_register(vma->vm_mm);
+ if (!hmm) {
+ memset(range->pfns, 0, sizeof(*range->pfns) * npages);
+ return false;
+ }
+
+ spin_lock(&hmm->lock);
+ list_del_rcu(&range->list);
+ spin_unlock(&hmm->lock);
+
+ return range->valid;
+}
+EXPORT_SYMBOL(hmm_vma_range_done);
+
+/*
+ * hmm_vma_fault() - try to fault some address in a virtual address range
+ * @vma: virtual memory area containing the virtual address range
+ * @range: use to track pfns array content validity
+ * @start: fault range virtual start address (inclusive)
+ * @end: fault range virtual end address (exclusive)
+ * @pfns: array of hmm_pfn_t, only entry with fault flag set will be faulted
+ * @write: is it a write fault
+ * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
+ * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop)
+ *
+ * This is similar to a regular CPU page fault except that it will not trigger
+ * any memory migration if the memory being faulted is not accessible by CPUs.
+ *
+ * On error, for one virtual address in the range, the function will set the
+ * hmm_pfn_t error flag for the corresponding pfn entry.
+ *
+ * Expected use pattern:
+ * retry:
+ * down_read(&mm->mmap_sem);
+ * // Find vma and address device wants to fault, initialize hmm_pfn_t
+ * // array accordingly
+ * ret = hmm_vma_fault(vma, start, end, pfns, allow_retry);
+ * switch (ret) {
+ * case -EAGAIN:
+ * hmm_vma_range_done(vma, range);
+ * // You might want to rate limit or yield to play nicely, you may
+ * // also commit any valid pfn in the array assuming that you are
+ * // getting true from hmm_vma_range_monitor_end()
+ * goto retry;
+ * case 0:
+ * break;
+ * default:
+ * // Handle error !
+ * up_read(&mm->mmap_sem)
+ * return;
+ * }
+ * // Take device driver lock that serialize device page table update
+ * driver_lock_device_page_table_update();
+ * hmm_vma_range_done(vma, range);
+ * // Commit pfns we got from hmm_vma_fault()
+ * driver_unlock_device_page_table_update();
+ * up_read(&mm->mmap_sem)
+ *
+ * YOU MUST CALL hmm_vma_range_done() AFTER THIS FUNCTION RETURN SUCCESS (0)
+ * BEFORE FREEING THE range struct OR YOU WILL HAVE SERIOUS MEMORY CORRUPTION !
+ *
+ * YOU HAVE BEEN WARNED !
+ */
+int hmm_vma_fault(struct vm_area_struct *vma,
+ struct hmm_range *range,
+ unsigned long start,
+ unsigned long end,
+ hmm_pfn_t *pfns,
+ bool write,
+ bool block)
+{
+ struct hmm_vma_walk hmm_vma_walk;
+ struct mm_walk mm_walk;
+ struct hmm *hmm;
+ int ret;
+
+ /* Sanity check, this really should not happen ! */
+ if (start < vma->vm_start || start >= vma->vm_end)
+ return -EINVAL;
+ if (end < vma->vm_start || end > vma->vm_end)
+ return -EINVAL;
+
+ hmm = hmm_register(vma->vm_mm);
+ if (!hmm) {
+ hmm_pfns_clear(pfns, start, end);
+ return -ENOMEM;
+ }
+ /* Caller must have registered a mirror using hmm_mirror_register() */
+ if (!hmm->mmu_notifier.ops)
+ return -EINVAL;
+
+ /* Initialize range to track CPU page table update */
+ range->start = start;
+ range->pfns = pfns;
+ range->end = end;
+ spin_lock(&hmm->lock);
+ range->valid = true;
+ list_add_rcu(&range->list, &hmm->ranges);
+ spin_unlock(&hmm->lock);
+
+ /* FIXME support hugetlb fs */
+ if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL)) {
+ hmm_pfns_special(pfns, start, end);
+ return 0;
+ }
+
+ hmm_vma_walk.fault = true;
+ hmm_vma_walk.write = write;
+ hmm_vma_walk.block = block;
+ hmm_vma_walk.range = range;
+ mm_walk.private = &hmm_vma_walk;
+ hmm_vma_walk.last = range->start;
+
+ mm_walk.vma = vma;
+ mm_walk.mm = vma->vm_mm;
+ mm_walk.pte_entry = NULL;
+ mm_walk.test_walk = NULL;
+ mm_walk.hugetlb_entry = NULL;
+ mm_walk.pmd_entry = hmm_vma_walk_pmd;
+ mm_walk.pte_hole = hmm_vma_walk_hole;
+
+ do {
+ ret = walk_page_range(start, end, &mm_walk);
+ start = hmm_vma_walk.last;
+ } while (ret == -EAGAIN);
+
+ if (ret) {
+ unsigned long i;
+
+ i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
+ hmm_pfns_clear(&pfns[i], hmm_vma_walk.last, end);
+ hmm_vma_range_done(vma, range);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(hmm_vma_fault);
+#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
+
+
+#if IS_ENABLED(CONFIG_HMM_DEVMEM)
+struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct page *page;
+
+ page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ if (!page)
+ return NULL;
+ lock_page(page);
+ return page;
+}
+EXPORT_SYMBOL(hmm_vma_alloc_locked_page);
+
+
+static void hmm_devmem_ref_release(struct percpu_ref *ref)
+{
+ struct hmm_devmem *devmem;
+
+ devmem = container_of(ref, struct hmm_devmem, ref);
+ complete(&devmem->completion);
+}
+
+static void hmm_devmem_ref_exit(void *data)
+{
+ struct percpu_ref *ref = data;
+ struct hmm_devmem *devmem;
+
+ devmem = container_of(ref, struct hmm_devmem, ref);
+ percpu_ref_exit(ref);
+ devm_remove_action(devmem->device, &hmm_devmem_ref_exit, data);
+}
+
+static void hmm_devmem_ref_kill(void *data)
+{
+ struct percpu_ref *ref = data;
+ struct hmm_devmem *devmem;
+
+ devmem = container_of(ref, struct hmm_devmem, ref);
+ percpu_ref_kill(ref);
+ wait_for_completion(&devmem->completion);
+ devm_remove_action(devmem->device, &hmm_devmem_ref_kill, data);
+}
+
+static int hmm_devmem_fault(struct vm_area_struct *vma,
+ unsigned long addr,
+ struct page *page,
+ unsigned int flags,
+ pmd_t *pmdp)
+{
+ struct hmm_devmem *devmem = page->pgmap->data;
+
+ return devmem->ops->fault(devmem, vma, addr, page, flags, pmdp);
+}
+
+static void hmm_devmem_free(struct page *page, void *data)
+{
+ struct hmm_devmem *devmem = data;
+
+ devmem->ops->free(devmem, page);
+}
+
+static DEFINE_MUTEX(hmm_devmem_lock);
+static RADIX_TREE(hmm_devmem_radix, GFP_KERNEL);
+#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+
+static void hmm_devmem_radix_release(struct resource *resource)
+{
+ resource_size_t key, align_start, align_size, align_end;
+
+ align_start = resource->start & ~(SECTION_SIZE - 1);
+ align_size = ALIGN(resource_size(resource), SECTION_SIZE);
+ align_end = align_start + align_size - 1;
+
+ mutex_lock(&hmm_devmem_lock);
+ for (key = resource->start; key <= resource->end; key += SECTION_SIZE)
+ radix_tree_delete(&hmm_devmem_radix, key >> PA_SECTION_SHIFT);
+ mutex_unlock(&hmm_devmem_lock);
+}
+
+static void hmm_devmem_release(struct device *dev, void *data)
+{
+ struct hmm_devmem *devmem = data;
+ resource_size_t align_start, align_size;
+ struct resource *resource = devmem->resource;
+
+ if (percpu_ref_tryget_live(&devmem->ref)) {
+ dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
+ percpu_ref_put(&devmem->ref);
+ }
+
+ /* pages are dead and unused, undo the arch mapping */
+ align_start = resource->start & ~(SECTION_SIZE - 1);
+ align_size = ALIGN(resource_size(resource), SECTION_SIZE);
+
+ mem_hotplug_begin();
+ arch_remove_memory(align_start, align_size, devmem->pagemap.type);
+ mem_hotplug_done();
+
+ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ hmm_devmem_radix_release(resource);
+}
+
+static struct hmm_devmem *hmm_devmem_find(resource_size_t phys)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ return radix_tree_lookup(&hmm_devmem_radix, phys >> PA_SECTION_SHIFT);
+}
+
+static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
+{
+ resource_size_t key, align_start, align_size, align_end;
+ struct device *device = devmem->device;
+ pgprot_t pgprot = PAGE_KERNEL;
+ int ret, nid, is_ram;
+ unsigned long pfn;
+
+ align_start = devmem->resource->start & ~(SECTION_SIZE - 1);
+ align_size = ALIGN(devmem->resource->start +
+ resource_size(devmem->resource),
+ SECTION_SIZE) - align_start;
+
+ is_ram = region_intersects(align_start, align_size,
+ IORESOURCE_SYSTEM_RAM,
+ IORES_DESC_NONE);
+ if (is_ram == REGION_MIXED) {
+ WARN_ONCE(1, "%s attempted on mixed region %pr\n",
+ __func__, devmem->resource);
+ return -ENXIO;
+ }
+ if (is_ram == REGION_INTERSECTS)
+ return -ENXIO;
+
+ devmem->pagemap.type = MEMORY_DEVICE_UNADDRESSABLE;
+ devmem->pagemap.res = devmem->resource;
+ devmem->pagemap.page_fault = hmm_devmem_fault;
+ devmem->pagemap.page_free = hmm_devmem_free;
+ devmem->pagemap.dev = devmem->device;
+ devmem->pagemap.ref = &devmem->ref;
+ devmem->pagemap.data = devmem;
+
+ mutex_lock(&hmm_devmem_lock);
+ align_end = align_start + align_size - 1;
+ for (key = align_start; key <= align_end; key += SECTION_SIZE) {
+ struct hmm_devmem *dup;
+
+ rcu_read_lock();
+ dup = hmm_devmem_find(key);
+ rcu_read_unlock();
+ if (dup) {
+ dev_err(device, "%s: collides with mapping for %s\n",
+ __func__, dev_name(dup->device));
+ mutex_unlock(&hmm_devmem_lock);
+ ret = -EBUSY;
+ goto error;
+ }
+ ret = radix_tree_insert(&hmm_devmem_radix,
+ key >> PA_SECTION_SHIFT,
+ devmem);
+ if (ret) {
+ dev_err(device, "%s: failed: %d\n", __func__, ret);
+ mutex_unlock(&hmm_devmem_lock);
+ goto error_radix;
+ }
+ }
+ mutex_unlock(&hmm_devmem_lock);
+
+ nid = dev_to_node(device);
+ if (nid < 0)
+ nid = numa_mem_id();
+
+ ret = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start),
+ 0, align_size);
+ if (ret)
+ goto error_radix;
+
+ mem_hotplug_begin();
+ ret = arch_add_memory(nid, align_start, align_size,
+ devmem->pagemap.type);
+ mem_hotplug_done();
+ if (ret)
+ goto error_add_memory;
+
+ for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {
+ struct page *page = pfn_to_page(pfn);
+
+ /*
+ * ZONE_DEVICE pages union ->lru with a ->pgmap back
+ * pointer. It is a bug if a ZONE_DEVICE page is ever
+ * freed or placed on a driver-private list. Therefore,
+ * seed the storage with LIST_POISON* values.
+ */
+ list_del(&page->lru);
+ page->pgmap = &devmem->pagemap;
+ }
+ return 0;
+
+error_add_memory:
+ untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+error_radix:
+ hmm_devmem_radix_release(devmem->resource);
+error:
+ return ret;
+}
+
+static int hmm_devmem_match(struct device *dev, void *data, void *match_data)
+{
+ struct hmm_devmem *devmem = data;
+
+ return devmem->resource == match_data;
+}
+
+static void hmm_devmem_pages_remove(struct hmm_devmem *devmem)
+{
+ devres_release(devmem->device, &hmm_devmem_release,
+ &hmm_devmem_match, devmem->resource);
+}
+
+/*
+ * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory
+ *
+ * @ops: memory event device driver callback (see struct hmm_devmem_ops)
+ * @device: device struct to bind the resource too
+ * @size: size in bytes of the device memory to add
+ * Returns: pointer to new hmm_devmem struct ERR_PTR otherwise
+ *
+ * This first finds an empty range of physical address big enough to contain the
+ * new resource, and then hotplugs it as ZONE_DEVICE memory, which in turn
+ * allocates struct pages. It does not do anything beyond that; all events
+ * affecting the memory will go through the various callbacks provided by
+ * hmm_devmem_ops struct.
+ */
+struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
+ struct device *device,
+ unsigned long size)
+{
+ struct hmm_devmem *devmem;
+ resource_size_t addr;
+ int ret;
+
+ devmem = devres_alloc_node(&hmm_devmem_release, sizeof(*devmem),
+ GFP_KERNEL, dev_to_node(device));
+ if (!devmem)
+ return ERR_PTR(-ENOMEM);
+
+ init_completion(&devmem->completion);
+ devmem->pfn_first = -1UL;
+ devmem->pfn_last = -1UL;
+ devmem->resource = NULL;
+ devmem->device = device;
+ devmem->ops = ops;
+
+ ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
+ 0, GFP_KERNEL);
+ if (ret)
+ goto error_percpu_ref;
+
+ ret = devm_add_action(device, hmm_devmem_ref_exit, &devmem->ref);
+ if (ret)
+ goto error_devm_add_action;
+
+ size = ALIGN(size, SECTION_SIZE);
+ addr = (iomem_resource.end + 1ULL) - size;
+
+ /*
+ * FIXME add a new helper to quickly walk resource tree and find free
+ * range
+ *
+ * FIXME what about ioport_resource resource ?
+ */
+ for (; addr > size && addr >= iomem_resource.start; addr -= size) {
+ ret = region_intersects(addr, size, 0, IORES_DESC_NONE);
+ if (ret != REGION_DISJOINT)
+ continue;
+
+ devmem->resource = devm_request_mem_region(device, addr, size,
+ dev_name(device));
+ if (!devmem->resource) {
+ ret = -ENOMEM;
+ goto error_no_resource;
+ }
+ break;
+ }
+ if (!devmem->resource) {
+ ret = -ERANGE;
+ goto error_no_resource;
+ }
+
+ devmem->resource->desc = IORES_DESC_DEVICE_MEMORY_UNADDRESSABLE;
+ devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
+ devmem->pfn_last = devmem->pfn_first +
+ (resource_size(devmem->resource) >> PAGE_SHIFT);
+
+ ret = hmm_devmem_pages_create(devmem);
+ if (ret)
+ goto error_pages;
+
+ devres_add(device, devmem);
+
+ ret = devm_add_action(device, hmm_devmem_ref_kill, &devmem->ref);
+ if (ret) {
+ hmm_devmem_remove(devmem);
+ return ERR_PTR(ret);
+ }
+
+ return devmem;
+
+error_pages:
+ devm_release_mem_region(device, devmem->resource->start,
+ resource_size(devmem->resource));
+error_no_resource:
+error_devm_add_action:
+ hmm_devmem_ref_kill(&devmem->ref);
+ hmm_devmem_ref_exit(&devmem->ref);
+error_percpu_ref:
+ devres_free(devmem);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(hmm_devmem_add);
+
+/*
+ * hmm_devmem_remove() - remove device memory (kill and free ZONE_DEVICE)
+ *
+ * @devmem: hmm_devmem struct use to track and manage the ZONE_DEVICE memory
+ *
+ * This will hot-unplug memory that was hotplugged by hmm_devmem_add on behalf
+ * of the device driver. It will free struct page and remove the resource that
+ * reserved the physical address range for this device memory.
+ */
+void hmm_devmem_remove(struct hmm_devmem *devmem)
+{
+ resource_size_t start, size;
+ struct device *device;
+
+ if (!devmem)
+ return;
+
+ device = devmem->device;
+ start = devmem->resource->start;
+ size = resource_size(devmem->resource);
+
+ hmm_devmem_ref_kill(&devmem->ref);
+ hmm_devmem_ref_exit(&devmem->ref);
+ hmm_devmem_pages_remove(devmem);
+
+ devm_release_mem_region(device, start, size);
+}
+EXPORT_SYMBOL(hmm_devmem_remove);
+
+/*
+ * hmm_devmem_fault_range() - migrate back a virtual range of memory
+ *
+ * @devmem: hmm_devmem struct use to track and manage the ZONE_DEVICE memory
+ * @vma: virtual memory area containing the range to be migrated
+ * @ops: migration callback for allocating destination memory and copying
+ * @src: array of unsigned long containing source pfns
+ * @dst: array of unsigned long containing destination pfns
+ * @start: start address of the range to migrate (inclusive)
+ * @addr: fault address (must be inside the range)
+ * @end: end address of the range to migrate (exclusive)
+ * @private: pointer passed back to each of the callback
+ * Returns: 0 on success, VM_FAULT_SIGBUS on error
+ *
+ * This is a wrapper around migrate_vma() which checks the migration status
+ * for a given fault address and returns the corresponding page fault handler
+ * status. That will be 0 on success, or VM_FAULT_SIGBUS if migration failed
+ * for the faulting address.
+ *
+ * This is a helper intendend to be used by the ZONE_DEVICE fault handler.
+ */
+int hmm_devmem_fault_range(struct hmm_devmem *devmem,
+ struct vm_area_struct *vma,
+ const struct migrate_vma_ops *ops,
+ unsigned long *src,
+ unsigned long *dst,
+ unsigned long start,
+ unsigned long addr,
+ unsigned long end,
+ void *private)
+{
+ if (migrate_vma(ops, vma, start, end, src, dst, private))
+ return VM_FAULT_SIGBUS;
+
+ if (dst[(addr - start) >> PAGE_SHIFT] & MIGRATE_PFN_ERROR)
+ return VM_FAULT_SIGBUS;
+
+ return 0;
+}
+EXPORT_SYMBOL(hmm_devmem_fault_range);
+
+/*
+ * A device driver that wants to handle multiple devices memory through a
+ * single fake device can use hmm_device to do so. This is purely a helper
+ * and it is not needed to make use of any HMM functionality.
+ */
+#define HMM_DEVICE_MAX 256
+
+static DECLARE_BITMAP(hmm_device_mask, HMM_DEVICE_MAX);
+static DEFINE_SPINLOCK(hmm_device_lock);
+static struct class *hmm_device_class;
+static dev_t hmm_device_devt;
+
+static void hmm_device_release(struct device *device)
+{
+ struct hmm_device *hmm_device;
+
+ hmm_device = container_of(device, struct hmm_device, device);
+ spin_lock(&hmm_device_lock);
+ clear_bit(hmm_device->minor, hmm_device_mask);
+ spin_unlock(&hmm_device_lock);
+
+ kfree(hmm_device);
+}
+
+struct hmm_device *hmm_device_new(void *drvdata)
+{
+ struct hmm_device *hmm_device;
+ int ret;
+
+ hmm_device = kzalloc(sizeof(*hmm_device), GFP_KERNEL);
+ if (!hmm_device)
+ return ERR_PTR(-ENOMEM);
+
+ ret = alloc_chrdev_region(&hmm_device->device.devt, 0, 1, "hmm_device");
+ if (ret < 0) {
+ kfree(hmm_device);
+ return NULL;
+ }
+
+ spin_lock(&hmm_device_lock);
+ hmm_device->minor = find_first_zero_bit(hmm_device_mask, HMM_DEVICE_MAX);
+ if (hmm_device->minor >= HMM_DEVICE_MAX) {
+ spin_unlock(&hmm_device_lock);
+ kfree(hmm_device);
+ return NULL;
+ }
+ set_bit(hmm_device->minor, hmm_device_mask);
+ spin_unlock(&hmm_device_lock);
+
+ dev_set_name(&hmm_device->device, "hmm_device%d", hmm_device->minor);
+ hmm_device->device.devt = MKDEV(MAJOR(hmm_device_devt),
+ hmm_device->minor);
+ hmm_device->device.release = hmm_device_release;
+ dev_set_drvdata(&hmm_device->device, drvdata);
+ hmm_device->device.class = hmm_device_class;
+ device_initialize(&hmm_device->device);
+
+ return hmm_device;
+}
+EXPORT_SYMBOL(hmm_device_new);
+
+void hmm_device_put(struct hmm_device *hmm_device)
+{
+ put_device(&hmm_device->device);
+}
+EXPORT_SYMBOL(hmm_device_put);
+
+static int __init hmm_init(void)
+{
+ int ret;
+
+ ret = alloc_chrdev_region(&hmm_device_devt, 0,
+ HMM_DEVICE_MAX,
+ "hmm_device");
+ if (ret)
+ return ret;
+
+ hmm_device_class = class_create(THIS_MODULE, "hmm_device");
+ if (IS_ERR(hmm_device_class)) {
+ unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX);
+ return PTR_ERR(hmm_device_class);
+ }
+ return 0;
+}
+
+static void __exit hmm_exit(void)
+{
+ unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX);
+ class_destroy(hmm_device_class);
+}
+
+module_init(hmm_init);
+module_exit(hmm_exit);
+MODULE_LICENSE("GPL");
+#endif /* IS_ENABLED(CONFIG_HMM_DEVMEM) */
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index e4c2975fcae8..c81549d5c833 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -691,7 +691,7 @@ int kasan_module_alloc(void *addr, size_t size)
ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
shadow_start + shadow_size,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ GFP_KERNEL | __GFP_ZERO,
PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
__builtin_return_address(0));
diff --git a/mm/memory.c b/mm/memory.c
index 6ff5d729ded0..4116ab23b71f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -49,6 +49,7 @@
#include <linux/swap.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/memremap.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/export.h>
@@ -927,6 +928,35 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte = pte_swp_mksoft_dirty(pte);
set_pte_at(src_mm, addr, src_pte, pte);
}
+ } else if (is_device_entry(entry)) {
+ page = device_entry_to_page(entry);
+
+ /*
+ * Update rss count even for unaddressable pages, as
+ * they should treated just like normal pages in this
+ * respect.
+ *
+ * We will likely want to have some new rss counters
+ * for unaddressable pages, at some point. But for now
+ * keep things as they are.
+ */
+ get_page(page);
+ rss[mm_counter(page)]++;
+ page_dup_rmap(page, false);
+
+ /*
+ * We do not preserve soft-dirty information, because so
+ * far, checkpoint/restore is the only feature that
+ * requires that. And checkpoint/restore does not work
+ * when a device driver is involved (you cannot easily
+ * save and restore device driver state).
+ */
+ if (is_write_device_entry(entry) &&
+ is_cow_mapping(vm_flags)) {
+ make_device_entry_read(&entry);
+ pte = swp_entry_to_pte(entry);
+ set_pte_at(src_mm, addr, src_pte, pte);
+ }
}
goto out_set_pte;
}
@@ -1243,6 +1273,29 @@ again:
}
continue;
}
+
+ entry = pte_to_swp_entry(ptent);
+ if (non_swap_entry(entry) && is_device_entry(entry)) {
+ struct page *page = device_entry_to_page(entry);
+
+ if (unlikely(details && details->check_mapping)) {
+ /*
+ * unmap_shared_mapping_pages() wants to
+ * invalidate cache without truncating:
+ * unmap shared but keep private pages.
+ */
+ if (details->check_mapping !=
+ page_rmapping(page))
+ continue;
+ }
+
+ pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+ rss[mm_counter(page)]--;
+ page_remove_rmap(page, false);
+ put_page(page);
+ continue;
+ }
+
/* If details->check_mapping, we leave swap entries. */
if (unlikely(details))
continue;
@@ -2690,6 +2743,14 @@ int do_swap_page(struct vm_fault *vmf)
if (is_migration_entry(entry)) {
migration_entry_wait(vma->vm_mm, vmf->pmd,
vmf->address);
+ } else if (is_device_entry(entry)) {
+ /*
+ * For un-addressable device memory we call the pgmap
+ * fault handler callback. The callback must migrate
+ * the page back to some CPU accessible page.
+ */
+ ret = device_entry_fault(vma, vmf->address, entry,
+ vmf->flags, vmf->pmd);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;
} else {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b63d7d1239df..76d4745513ee 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -156,7 +156,7 @@ void mem_hotplug_done(void)
/* add this memory to iomem resource */
static struct resource *register_memory_resource(u64 start, u64 size)
{
- struct resource *res;
+ struct resource *res, *conflict;
res = kzalloc(sizeof(struct resource), GFP_KERNEL);
if (!res)
return ERR_PTR(-ENOMEM);
@@ -165,7 +165,13 @@ static struct resource *register_memory_resource(u64 start, u64 size)
res->start = start;
res->end = start + size - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- if (request_resource(&iomem_resource, res) < 0) {
+ conflict = request_resource_conflict(&iomem_resource, res);
+ if (conflict) {
+ if (conflict->desc == IORES_DESC_DEVICE_MEMORY_UNADDRESSABLE) {
+ pr_debug("Device unaddressable memory block "
+ "memory hotplug at %#010llx !\n",
+ (unsigned long long)start);
+ }
pr_debug("System RAM resource %pR cannot be added\n", res);
kfree(res);
return ERR_PTR(-EEXIST);
@@ -1389,7 +1395,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
}
/* call arch's memory hotadd */
- ret = arch_add_memory(nid, start, size, false);
+ ret = arch_add_memory(nid, start, size, MEMORY_NORMAL);
if (ret < 0)
goto error;
@@ -2193,7 +2199,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
memblock_free(start, size);
memblock_remove(start, size);
- arch_remove_memory(start, size);
+ arch_remove_memory(start, size, MEMORY_NORMAL);
try_offline_node(nid);
diff --git a/mm/migrate.c b/mm/migrate.c
index 410b56bdabed..e4b555f9a689 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -28,6 +28,8 @@
#include <linux/cpuset.h>
#include <linux/writeback.h>
#include <linux/mempolicy.h>
+#include <linux/memremap.h>
+#include <linux/userfaultfd_k.h>
#include <linux/vmalloc.h>
#include <linux/security.h>
#include <linux/backing-dev.h>
@@ -233,7 +235,15 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
pte = arch_make_huge_pte(pte, vma, new, 0);
}
#endif
- flush_dcache_page(new);
+
+ if (unlikely(is_zone_device_page(new)) &&
+ is_device_unaddressable_page(new)) {
+ entry = make_device_entry(new, pte_write(pte));
+ pte = swp_entry_to_pte(entry);
+ if (pte_swp_soft_dirty(*pvmw.pte))
+ pte = pte_mksoft_dirty(pte);
+ } else
+ flush_dcache_page(new);
set_pte_at(vma->vm_mm, pvmw.address, pvmw.pte, pte);
if (PageHuge(new)) {
@@ -305,6 +315,8 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
*/
if (!get_page_unless_zero(page))
goto out;
+ if (is_zone_device_page(page))
+ get_zone_device_page(page);
pte_unmap_unlock(ptep, ptl);
wait_on_page_locked(page);
put_page(page);
@@ -396,6 +408,14 @@ int migrate_page_move_mapping(struct address_space *mapping,
int expected_count = 1 + extra_count;
void **pslot;
+ /*
+ * ZONE_DEVICE pages have 1 refcount always held by their device
+ *
+ * Note that DAX memory will never reach that point as it does not have
+ * the MEMORY_DEVICE_ALLOW_MIGRATE flag set (see memory_hotplug.h).
+ */
+ expected_count += is_zone_device_page(page);
+
if (!mapping) {
/* Anonymous page without mapping */
if (page_count(page) != expected_count)
@@ -602,15 +622,10 @@ static void copy_huge_page(struct page *dst, struct page *src)
/*
* Copy the page to its new location
*/
-void migrate_page_copy(struct page *newpage, struct page *page)
+void migrate_page_states(struct page *newpage, struct page *page)
{
int cpupid;
- if (PageHuge(page) || PageTransHuge(page))
- copy_huge_page(newpage, page);
- else
- copy_highpage(newpage, page);
-
if (PageError(page))
SetPageError(newpage);
if (PageReferenced(page))
@@ -664,6 +679,17 @@ void migrate_page_copy(struct page *newpage, struct page *page)
mem_cgroup_migrate(page, newpage);
}
+EXPORT_SYMBOL(migrate_page_states);
+
+void migrate_page_copy(struct page *newpage, struct page *page)
+{
+ if (PageHuge(page) || PageTransHuge(page))
+ copy_huge_page(newpage, page);
+ else
+ copy_highpage(newpage, page);
+
+ migrate_page_states(newpage, page);
+}
EXPORT_SYMBOL(migrate_page_copy);
/************************************************************
@@ -689,7 +715,10 @@ int migrate_page(struct address_space *mapping,
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
}
EXPORT_SYMBOL(migrate_page);
@@ -739,12 +768,15 @@ int buffer_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
- migrate_page_copy(newpage, page);
+ if (mode != MIGRATE_SYNC_NO_COPY)
+ migrate_page_copy(newpage, page);
+ else
+ migrate_page_states(newpage, page);
bh = head;
do {
unlock_buffer(bh);
- put_bh(bh);
+ put_bh(bh);
bh = bh->b_this_page;
} while (bh != head);
@@ -803,8 +835,13 @@ static int fallback_migrate_page(struct address_space *mapping,
{
if (PageDirty(page)) {
/* Only writeback pages in full synchronous migration */
- if (mode != MIGRATE_SYNC)
+ switch (mode) {
+ case MIGRATE_SYNC:
+ case MIGRATE_SYNC_NO_COPY:
+ break;
+ default:
return -EBUSY;
+ }
return writeout(mapping, page);
}
@@ -941,7 +978,11 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* the retry loop is too short and in the sync-light case,
* the overhead of stalling is too much
*/
- if (mode != MIGRATE_SYNC) {
+ switch (mode) {
+ case MIGRATE_SYNC:
+ case MIGRATE_SYNC_NO_COPY:
+ break;
+ default:
rc = -EBUSY;
goto out_unlock;
}
@@ -1211,8 +1252,15 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return -ENOMEM;
if (!trylock_page(hpage)) {
- if (!force || mode != MIGRATE_SYNC)
+ if (!force)
goto out;
+ switch (mode) {
+ case MIGRATE_SYNC:
+ case MIGRATE_SYNC_NO_COPY:
+ break;
+ default:
+ goto out;
+ }
lock_page(hpage);
}
@@ -2048,3 +2096,714 @@ out_unlock:
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_NUMA */
+
+
+struct migrate_vma {
+ struct vm_area_struct *vma;
+ unsigned long *dst;
+ unsigned long *src;
+ unsigned long cpages;
+ unsigned long npages;
+ unsigned long start;
+ unsigned long end;
+};
+
+static int migrate_vma_collect_hole(unsigned long start,
+ unsigned long end,
+ struct mm_walk *walk)
+{
+ struct migrate_vma *migrate = walk->private;
+ unsigned long addr;
+
+ for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
+ migrate->cpages++;
+ migrate->dst[migrate->npages] = 0;
+ migrate->src[migrate->npages++] = 0;
+ }
+
+ return 0;
+}
+
+static int migrate_vma_collect_pmd(pmd_t *pmdp,
+ unsigned long start,
+ unsigned long end,
+ struct mm_walk *walk)
+{
+ struct migrate_vma *migrate = walk->private;
+ struct mm_struct *mm = walk->vma->vm_mm;
+ unsigned long addr = start, unmapped = 0;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ if (pmd_none(*pmdp) || pmd_trans_unstable(pmdp)) {
+ /* FIXME support THP */
+ return migrate_vma_collect_hole(start, end, walk);
+ }
+
+ ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ arch_enter_lazy_mmu_mode();
+
+ for (; addr < end; addr += PAGE_SIZE, ptep++) {
+ unsigned long mpfn, pfn;
+ struct page *page;
+ swp_entry_t entry;
+ pte_t pte;
+
+ pte = *ptep;
+ pfn = pte_pfn(pte);
+
+ if (pte_none(pte)) {
+ migrate->cpages++;
+ mpfn = pfn = 0;
+ goto next;
+ }
+
+ if (!pte_present(pte)) {
+ mpfn = pfn = 0;
+
+ /*
+ * Only care about unaddressable device page special
+ * page table entry. Other special swap entries are not
+ * migratable, and we ignore regular swapped page.
+ */
+ entry = pte_to_swp_entry(pte);
+ if (!is_device_entry(entry))
+ goto next;
+
+ page = device_entry_to_page(entry);
+ mpfn = migrate_pfn(page_to_pfn(page))|
+ MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
+ if (is_write_device_entry(entry))
+ mpfn |= MIGRATE_PFN_WRITE;
+ } else {
+ page = vm_normal_page(migrate->vma, addr, pte);
+ mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
+ mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
+ }
+
+ /* FIXME support THP */
+ if (!page || !page->mapping || PageTransCompound(page)) {
+ mpfn = pfn = 0;
+ goto next;
+ }
+ pfn = page_to_pfn(page);
+
+ /*
+ * By getting a reference on the page we pin it and that blocks
+ * any kind of migration. Side effect is that it "freezes" the
+ * pte.
+ *
+ * We drop this reference after isolating the page from the lru
+ * for non device page (device page are not on the lru and thus
+ * can't be dropped from it).
+ */
+ get_page(page);
+ migrate->cpages++;
+
+ /*
+ * Optimize for the common case where page is only mapped once
+ * in one process. If we can lock the page, then we can safely
+ * set up a special migration page table entry now.
+ */
+ if (trylock_page(page)) {
+ pte_t swp_pte;
+
+ mpfn |= MIGRATE_PFN_LOCKED;
+ ptep_get_and_clear(mm, addr, ptep);
+
+ /* Setup special migration page table entry */
+ entry = make_migration_entry(page, pte_write(pte));
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pte))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ set_pte_at(mm, addr, ptep, swp_pte);
+
+ /*
+ * This is like regular unmap: we remove the rmap and
+ * drop page refcount. Page won't be freed, as we took
+ * a reference just above.
+ */
+ page_remove_rmap(page, false);
+ put_page(page);
+ unmapped++;
+ }
+
+next:
+ migrate->dst[migrate->npages] = 0;
+ migrate->src[migrate->npages++] = mpfn;
+ }
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(ptep - 1, ptl);
+
+ /* Only flush the TLB if we actually modified any entries */
+ if (unmapped)
+ flush_tlb_range(walk->vma, start, end);
+
+ return 0;
+}
+
+/*
+ * migrate_vma_collect() - collect pages over a range of virtual addresses
+ * @migrate: migrate struct containing all migration information
+ *
+ * This will walk the CPU page table. For each virtual address backed by a
+ * valid page, it updates the src array and takes a reference on the page, in
+ * order to pin the page until we lock it and unmap it.
+ */
+static void migrate_vma_collect(struct migrate_vma *migrate)
+{
+ struct mm_walk mm_walk;
+
+ mm_walk.pmd_entry = migrate_vma_collect_pmd;
+ mm_walk.pte_entry = NULL;
+ mm_walk.pte_hole = migrate_vma_collect_hole;
+ mm_walk.hugetlb_entry = NULL;
+ mm_walk.test_walk = NULL;
+ mm_walk.vma = migrate->vma;
+ mm_walk.mm = migrate->vma->vm_mm;
+ mm_walk.private = migrate;
+
+ mmu_notifier_invalidate_range_start(mm_walk.mm,
+ migrate->start,
+ migrate->end);
+ walk_page_range(migrate->start, migrate->end, &mm_walk);
+ mmu_notifier_invalidate_range_end(mm_walk.mm,
+ migrate->start,
+ migrate->end);
+
+ migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
+}
+
+/*
+ * migrate_vma_check_page() - check if page is pinned or not
+ * @page: struct page to check
+ *
+ * Pinned pages cannot be migrated. This is the same test as in
+ * migrate_page_move_mapping(), except that here we allow migration of a
+ * ZONE_DEVICE page.
+ */
+static bool migrate_vma_check_page(struct page *page)
+{
+ /*
+ * One extra ref because caller holds an extra reference, either from
+ * isolate_lru_page() for a regular page, or migrate_vma_collect() for
+ * a device page.
+ */
+ int extra = 1;
+
+ /*
+ * FIXME support THP (transparent huge page), it is bit more complex to
+ * check them than regular pages, because they can be mapped with a pmd
+ * or with a pte (split pte mapping).
+ */
+ if (PageCompound(page))
+ return false;
+
+ /* Page from ZONE_DEVICE have one extra reference */
+ if (is_zone_device_page(page)) {
+ if (is_device_unaddressable_page(page)) {
+ extra++;
+ } else
+ /* Other ZONE_DEVICE memory type are not supported */
+ return false;
+ }
+
+ if ((page_count(page) - extra) > page_mapcount(page))
+ return false;
+
+ return true;
+}
+
+/*
+ * migrate_vma_prepare() - lock pages and isolate them from the lru
+ * @migrate: migrate struct containing all migration information
+ *
+ * This locks pages that have been collected by migrate_vma_collect(). Once each
+ * page is locked it is isolated from the lru (for non-device pages). Finally,
+ * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be
+ * migrated by concurrent kernel threads.
+ */
+static void migrate_vma_prepare(struct migrate_vma *migrate)
+{
+ const unsigned long npages = migrate->npages;
+ const unsigned long start = migrate->start;
+ unsigned long addr, i, restore = 0;
+ bool allow_drain = true;
+
+ lru_add_drain();
+
+ for (i = 0; i < npages; i++) {
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+ bool remap = true;
+
+ if (!page)
+ continue;
+
+ if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
+ remap = false;
+ lock_page(page);
+ migrate->src[i] |= MIGRATE_PFN_LOCKED;
+ }
+
+ /* ZONE_DEVICE pages are not on LRU */
+ if (!is_zone_device_page(page)) {
+ if (!PageLRU(page) && allow_drain) {
+ /* Drain CPU's pagevec */
+ lru_add_drain_all();
+ allow_drain = false;
+ }
+
+ if (isolate_lru_page(page)) {
+ if (remap) {
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ migrate->cpages--;
+ restore++;
+ } else {
+ migrate->src[i] = 0;
+ unlock_page(page);
+ migrate->cpages--;
+ put_page(page);
+ }
+ continue;
+ }
+
+ /* Drop the reference we took in collect */
+ put_page(page);
+ }
+
+ if (!migrate_vma_check_page(page)) {
+ if (remap) {
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ migrate->cpages--;
+ restore++;
+
+ if (!is_zone_device_page(page)) {
+ get_page(page);
+ putback_lru_page(page);
+ }
+ } else {
+ migrate->src[i] = 0;
+ unlock_page(page);
+ migrate->cpages--;
+
+ if (!is_zone_device_page(page))
+ putback_lru_page(page);
+ else
+ put_page(page);
+ }
+ }
+ }
+
+ for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+
+ if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
+ continue;
+
+ remove_migration_pte(page, migrate->vma, addr, page);
+
+ migrate->src[i] = 0;
+ unlock_page(page);
+ put_page(page);
+ restore--;
+ }
+}
+
+/*
+ * migrate_vma_unmap() - replace page mapping with special migration pte entry
+ * @migrate: migrate struct containing all migration information
+ *
+ * Replace page mapping (CPU page table pte) with a special migration pte entry
+ * and check again if it has been pinned. Pinned pages are restored because we
+ * cannot migrate them.
+ *
+ * This is the last step before we call the device driver callback to allocate
+ * destination memory and copy contents of original page over to new page.
+ */
+static void migrate_vma_unmap(struct migrate_vma *migrate)
+{
+ int flags = TTU_MIGRATION | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+ const unsigned long npages = migrate->npages;
+ const unsigned long start = migrate->start;
+ unsigned long addr, i, restore = 0;
+
+ for (i = 0; i < npages; i++) {
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+
+ if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
+ continue;
+
+ if (page_mapped(page)) {
+ try_to_unmap(page, flags);
+ if (page_mapped(page))
+ goto restore;
+ }
+
+ if (migrate_vma_check_page(page))
+ continue;
+
+restore:
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ migrate->cpages--;
+ restore++;
+ }
+
+ for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+
+ if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
+ continue;
+
+ remove_migration_ptes(page, page, false);
+
+ migrate->src[i] = 0;
+ unlock_page(page);
+ restore--;
+
+ if (is_zone_device_page(page))
+ put_page(page);
+ else
+ putback_lru_page(page);
+ }
+}
+
+static void migrate_vma_insert_page(struct migrate_vma *migrate,
+ unsigned long addr,
+ struct page *page,
+ unsigned long *src,
+ unsigned long *dst)
+{
+ struct vm_area_struct *vma = migrate->vma;
+ struct mm_struct *mm = vma->vm_mm;
+ struct mem_cgroup *memcg;
+ spinlock_t *ptl;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ pte_t entry;
+
+ /* Only allow populating anonymous memory */
+ if (!vma_is_anonymous(vma))
+ goto abort;
+
+ pgdp = pgd_offset(mm, addr);
+ pudp = pud_alloc(mm, pgdp, addr);
+ if (!pudp)
+ goto abort;
+ pmdp = pmd_alloc(mm, pudp, addr);
+ if (!pmdp)
+ goto abort;
+
+ if (pmd_trans_unstable(pmdp) || pmd_devmap(*pmdp))
+ goto abort;
+
+ /*
+ * Use pte_alloc() instead of pte_alloc_map(). We can't run
+ * pte_offset_map() on pmds where a huge pmd might be created
+ * from a different thread.
+ *
+ * pte_alloc_map() is safe to use under down_write(mmap_sem) or when
+ * parallel threads are excluded by other means.
+ *
+ * Here we only have down_read(mmap_sem).
+ */
+ if (pte_alloc(mm, pmdp, addr))
+ goto abort;
+
+ /* See the comment in pte_alloc_one_map() */
+ if (unlikely(pmd_trans_unstable(pmdp)))
+ goto abort;
+
+ if (unlikely(anon_vma_prepare(vma)))
+ goto abort;
+ if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg, false))
+ goto abort;
+
+ /*
+ * The memory barrier inside __SetPageUptodate makes sure that
+ * preceding stores to the page contents become visible before
+ * the set_pte_at() write.
+ */
+ __SetPageUptodate(page);
+
+ if (is_zone_device_page(page) && is_device_unaddressable_page(page)) {
+ swp_entry_t swp_entry;
+
+ swp_entry = make_device_entry(page, vma->vm_flags & VM_WRITE);
+ entry = swp_entry_to_pte(swp_entry);
+ } else {
+ entry = mk_pte(page, vma->vm_page_prot);
+ if (vma->vm_flags & VM_WRITE)
+ entry = pte_mkwrite(pte_mkdirty(entry));
+ }
+
+ ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!pte_none(*ptep)) {
+ pte_unmap_unlock(ptep, ptl);
+ mem_cgroup_cancel_charge(page, memcg, false);
+ goto abort;
+ }
+
+ /*
+ * Check for usefaultfd but do not deliver the fault. Instead,
+ * just back off.
+ */
+ if (userfaultfd_missing(vma)) {
+ pte_unmap_unlock(ptep, ptl);
+ mem_cgroup_cancel_charge(page, memcg, false);
+ goto abort;
+ }
+
+ inc_mm_counter(mm, MM_ANONPAGES);
+ page_add_new_anon_rmap(page, vma, addr, false);
+ mem_cgroup_commit_charge(page, memcg, false, false);
+ if (!is_zone_device_page(page))
+ lru_cache_add_active_or_unevictable(page, vma);
+ set_pte_at(mm, addr, ptep, entry);
+
+ /* Take a reference on the page */
+ get_page(page);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, addr, ptep);
+ pte_unmap_unlock(ptep, ptl);
+ *src = MIGRATE_PFN_MIGRATE;
+ return;
+
+abort:
+ *src &= ~MIGRATE_PFN_MIGRATE;
+}
+
+/*
+ * migrate_vma_pages() - migrate meta-data from src page to dst page
+ * @migrate: migrate struct containing all migration information
+ *
+ * This migrates struct page meta-data from source struct page to destination
+ * struct page. This effectively finishes the migration from source page to the
+ * destination page.
+ */
+static void migrate_vma_pages(struct migrate_vma *migrate)
+{
+ const unsigned long npages = migrate->npages;
+ const unsigned long start = migrate->start;
+ unsigned long addr, i;
+
+ for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) {
+ struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+ struct address_space *mapping;
+ int r;
+
+ if (!newpage) {
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ } else if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE)) {
+ if (!page)
+ migrate_vma_insert_page(migrate, addr, newpage,
+ &migrate->src[i],
+ &migrate->dst[i]);
+ continue;
+ }
+
+ mapping = page_mapping(page);
+
+ if (is_zone_device_page(newpage)) {
+ if (is_device_unaddressable_page(newpage)) {
+ /*
+ * For now only support private anonymous when
+ * migrating to un-addressable device memory.
+ */
+ if (mapping) {
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ }
+ } else {
+ /*
+ * Other types of ZONE_DEVICE page are not
+ * supported.
+ */
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ continue;
+ }
+ }
+
+ r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
+ if (r != MIGRATEPAGE_SUCCESS)
+ migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+ }
+}
+
+/*
+ * migrate_vma_finalize() - restore CPU page table entry
+ * @migrate: migrate struct containing all migration information
+ *
+ * This replaces the special migration pte entry with either a mapping to the
+ * new page if migration was successful for that page, or to the original page
+ * otherwise.
+ *
+ * This also unlocks the pages and puts them back on the lru, or drops the extra
+ * refcount, for device pages.
+ */
+static void migrate_vma_finalize(struct migrate_vma *migrate)
+{
+ const unsigned long npages = migrate->npages;
+ unsigned long i;
+
+ for (i = 0; i < npages; i++) {
+ struct page *newpage = migrate_pfn_to_page(migrate->dst[i]);
+ struct page *page = migrate_pfn_to_page(migrate->src[i]);
+
+ if (!page) {
+ if (newpage) {
+ unlock_page(newpage);
+ put_page(newpage);
+ }
+ continue;
+ }
+
+ if (!(migrate->src[i] & MIGRATE_PFN_MIGRATE) || !newpage) {
+ if (newpage) {
+ unlock_page(newpage);
+ put_page(newpage);
+ }
+ newpage = page;
+ }
+
+ remove_migration_ptes(page, newpage, false);
+ unlock_page(page);
+ migrate->cpages--;
+
+ if (is_zone_device_page(page))
+ put_page(page);
+ else
+ putback_lru_page(page);
+
+ if (newpage != page) {
+ unlock_page(newpage);
+ if (is_zone_device_page(newpage))
+ put_page(newpage);
+ else
+ putback_lru_page(newpage);
+ }
+ }
+}
+
+/*
+ * migrate_vma() - migrate a range of memory inside vma
+ *
+ * @ops: migration callback for allocating destination memory and copying
+ * @vma: virtual memory area containing the range to be migrated
+ * @start: start address of the range to migrate (inclusive)
+ * @end: end address of the range to migrate (exclusive)
+ * @src: array of hmm_pfn_t containing source pfns
+ * @dst: array of hmm_pfn_t containing destination pfns
+ * @private: pointer passed back to each of the callback
+ * Returns: 0 on success, error code otherwise
+ *
+ * This function tries to migrate a range of memory virtual address range, using
+ * callbacks to allocate and copy memory from source to destination. First it
+ * collects all the pages backing each virtual address in the range, saving this
+ * inside the src array. Then it locks those pages and unmaps them. Once the pages
+ * are locked and unmapped, it checks whether each page is pinned or not. Pages
+ * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function)
+ * in the corresponding src array entry. It then restores any pages that are
+ * pinned, by remapping and unlocking those pages.
+ *
+ * At this point it calls the alloc_and_copy() callback. For documentation on
+ * what is expected from that callback, see struct migrate_vma_ops comments in
+ * include/linux/migrate.h
+ *
+ * After the alloc_and_copy() callback, this function goes over each entry in
+ * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
+ * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
+ * then the function tries to migrate struct page information from the source
+ * struct page to the destination struct page. If it fails to migrate the struct
+ * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src
+ * array.
+ *
+ * At this point all successfully migrated pages have an entry in the src
+ * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
+ * array entry with MIGRATE_PFN_VALID flag set.
+ *
+ * It then calls the finalize_and_map() callback. See comments for "struct
+ * migrate_vma_ops", in include/linux/migrate.h for details about
+ * finalize_and_map() behavior.
+ *
+ * After the finalize_and_map() callback, for successfully migrated pages, this
+ * function updates the CPU page table to point to new pages, otherwise it
+ * restores the CPU page table to point to the original source pages.
+ *
+ * Function returns 0 after the above steps, even if no pages were migrated
+ * (The function only returns an error if any of the arguments are invalid.)
+ *
+ * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT
+ * unsigned long entries.
+ */
+int migrate_vma(const struct migrate_vma_ops *ops,
+ struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ unsigned long *src,
+ unsigned long *dst,
+ void *private)
+{
+ struct migrate_vma migrate;
+
+ /* Sanity check the arguments */
+ start &= PAGE_MASK;
+ end &= PAGE_MASK;
+ if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL))
+ return -EINVAL;
+ if (!vma || !ops || !src || !dst || start >= end)
+ return -EINVAL;
+ if (start < vma->vm_start || start >= vma->vm_end)
+ return -EINVAL;
+ if (end <= vma->vm_start || end > vma->vm_end)
+ return -EINVAL;
+
+ memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
+ migrate.src = src;
+ migrate.dst = dst;
+ migrate.start = start;
+ migrate.npages = 0;
+ migrate.cpages = 0;
+ migrate.end = end;
+ migrate.vma = vma;
+
+ /* Collect, and try to unmap source pages */
+ migrate_vma_collect(&migrate);
+ if (!migrate.cpages)
+ return 0;
+
+ /* Lock and isolate page */
+ migrate_vma_prepare(&migrate);
+ if (!migrate.cpages)
+ return 0;
+
+ /* Unmap pages */
+ migrate_vma_unmap(&migrate);
+ if (!migrate.cpages)
+ return 0;
+
+ /*
+ * At this point pages are locked and unmapped, and thus they have
+ * stable content and can safely be copied to destination memory that
+ * is allocated by the callback.
+ *
+ * Note that migration can fail in migrate_vma_struct_page() for each
+ * individual page.
+ */
+ ops->alloc_and_copy(vma, src, dst, start, end, private);
+
+ /* This does the real migration of struct page */
+ migrate_vma_pages(&migrate);
+
+ ops->finalize_and_map(vma, src, dst, start, end, private);
+
+ /* Unlock and remap pages */
+ migrate_vma_finalize(&migrate);
+
+ return 0;
+}
+EXPORT_SYMBOL(migrate_vma);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8edd0d576254..dadb020d825f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -126,6 +126,20 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
pages++;
}
+
+ if (is_write_device_entry(entry)) {
+ pte_t newpte;
+
+ /*
+ * We do not preserve soft-dirtiness. See
+ * copy_one_pte() for explanation.
+ */
+ make_device_entry_read(&entry);
+ newpte = swp_entry_to_pte(entry);
+ set_pte_at(mm, addr, pte, newpte);
+
+ pages++;
+ }
}
} while (pte++, addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
diff --git a/mm/nommu.c b/mm/nommu.c
index 2d131b97a851..fc184f597d59 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -237,12 +237,16 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
}
EXPORT_SYMBOL(__vmalloc);
+void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags)
+{
+ return __vmalloc(size, flags, PAGE_KERNEL);
+}
+
void *vmalloc_user(unsigned long size)
{
void *ret;
- ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
- PAGE_KERNEL);
+ ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
if (ret) {
struct vm_area_struct *vma;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a904dc24e018..32b31d661c9c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7180,6 +7180,17 @@ static unsigned long __init arch_reserved_kernel_pages(void)
#endif
/*
+ * Adaptive scale is meant to reduce sizes of hash tables on large memory
+ * machines. As memory size is increased the scale is also increased but at
+ * slower pace. Starting from ADAPT_SCALE_BASE (64G), every time memory
+ * quadruples the scale is increased by one, which means the size of hash table
+ * only doubles, instead of quadrupling as well.
+ */
+#define ADAPT_SCALE_BASE (64ul << 30)
+#define ADAPT_SCALE_SHIFT 2
+#define ADAPT_SCALE_NPAGES (ADAPT_SCALE_BASE >> PAGE_SHIFT)
+
+/*
* allocate a large system hash table from bootmem
* - it is assumed that the hash table must contain an exact power-of-2
* quantity of entries
@@ -7198,6 +7209,7 @@ void *__init alloc_large_system_hash(const char *tablename,
unsigned long long max = high_limit;
unsigned long log2qty, size;
void *table = NULL;
+ gfp_t gfp_flags;
/* allow the kernel cmdline to have a say */
if (!numentries) {
@@ -7209,6 +7221,14 @@ void *__init alloc_large_system_hash(const char *tablename,
if (PAGE_SHIFT < 20)
numentries = round_up(numentries, (1<<20)/PAGE_SIZE);
+ if (flags & HASH_ADAPT) {
+ unsigned long adapt;
+
+ for (adapt = ADAPT_SCALE_NPAGES; adapt < numentries;
+ adapt <<= ADAPT_SCALE_SHIFT)
+ scale++;
+ }
+
/* limit to 1 bucket per 2^scale bytes of low memory */
if (scale > PAGE_SHIFT)
numentries >>= (scale - PAGE_SHIFT);
@@ -7242,12 +7262,17 @@ void *__init alloc_large_system_hash(const char *tablename,
log2qty = ilog2(numentries);
+ /*
+ * memblock allocator returns zeroed memory already, so HASH_ZERO is
+ * currently not used when HASH_EARLY is specified.
+ */
+ gfp_flags = (flags & HASH_ZERO) ? GFP_ATOMIC | __GFP_ZERO : GFP_ATOMIC;
do {
size = bucketsize << log2qty;
if (flags & HASH_EARLY)
table = memblock_virt_alloc_nopanic(size, 0);
else if (hashdist)
- table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL);
+ table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
else {
/*
* If bucketsize is not a power-of-two, we may free
@@ -7255,8 +7280,8 @@ void *__init alloc_large_system_hash(const char *tablename,
* alloc_pages_exact() automatically does
*/
if (get_order(size) < MAX_ORDER) {
- table = alloc_pages_exact(size, GFP_ATOMIC);
- kmemleak_alloc(table, size, 1, GFP_ATOMIC);
+ table = alloc_pages_exact(size, gfp_flags);
+ kmemleak_alloc(table, size, 1, gfp_flags);
}
}
} while (!table && size > PAGE_SIZE && --log2qty);
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index de9c40d7304a..12b8c1a61f66 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -48,6 +48,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
if (!is_swap_pte(*pvmw->pte))
return false;
entry = pte_to_swp_entry(*pvmw->pte);
+
if (!is_migration_entry(entry))
return false;
if (migration_entry_to_page(entry) - pvmw->page >=
@@ -60,6 +61,15 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
WARN_ON_ONCE(1);
#endif
} else {
+ if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
+
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (is_device_entry(entry) &&
+ device_entry_to_page(entry) == pvmw->page)
+ return true;
+ }
+
if (!pte_present(*pvmw->pte))
return false;
diff --git a/mm/rmap.c b/mm/rmap.c
index e116a8e80468..06db53aacb4b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -63,6 +63,7 @@
#include <linux/hugetlb.h>
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
+#include <linux/memremap.h>
#include <asm/tlbflush.h>
@@ -1308,6 +1309,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;
+ if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
+ is_zone_device_page(page) && !is_device_unaddressable_page(page))
+ return true;
+
if (flags & TTU_SPLIT_HUGE_PMD) {
split_huge_pmd_address(vma, address,
flags & TTU_MIGRATION, page);
@@ -1343,6 +1348,26 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address;
+ if (IS_ENABLED(CONFIG_MIGRATION) &&
+ (flags & TTU_MIGRATION) &&
+ is_zone_device_page(page)) {
+ swp_entry_t entry;
+ pte_t swp_pte;
+
+ pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+
+ /*
+ * Store the pfn of the page in a special migration
+ * pte. do_swap_page() will wait until the migration
+ * pte is removed and then restart fault handling.
+ */
+ entry = make_migration_entry(page, 0);
+ swp_pte = swp_entry_to_pte(entry);
+ if (pte_soft_dirty(pteval))
+ swp_pte = pte_swp_mksoft_dirty(swp_pte);
+ set_pte_at(mm, address, pvmw.pte, swp_pte);
+ goto discard;
+ }
if (!(flags & TTU_IGNORE_ACCESS)) {
if (ptep_clear_flush_young_notify(vma, address,
diff --git a/mm/util.c b/mm/util.c
index 656dc5e37a87..718154debc87 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -329,6 +329,63 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
}
EXPORT_SYMBOL(vm_mmap);
+/**
+ * kvmalloc_node - attempt to allocate physically contiguous memory, but upon
+ * failure, fall back to non-contiguous (vmalloc) allocation.
+ * @size: size of the request.
+ * @flags: gfp mask for the allocation - must be compatible (superset) with GFP_KERNEL.
+ * @node: numa node to allocate from
+ *
+ * Uses kmalloc to get the memory but if the allocation fails then falls back
+ * to the vmalloc allocator. Use kvfree for freeing the memory.
+ *
+ * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT
+ * is supported only for large (>32kB) allocations, and it should be used only if
+ * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks.
+ *
+ * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people.
+ */
+void *kvmalloc_node(size_t size, gfp_t flags, int node)
+{
+ gfp_t kmalloc_flags = flags;
+ void *ret;
+
+ /*
+ * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables)
+ * so the given set of flags has to be compatible.
+ */
+ WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL);
+
+ /*
+ * Make sure that larger requests are not too disruptive - no OOM
+ * killer and no allocation failure warnings as we have a fallback
+ */
+ if (size > PAGE_SIZE) {
+ kmalloc_flags |= __GFP_NOWARN;
+
+ /*
+ * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly
+ * requests because there is no other way to tell the allocator
+ * that we want to fail rather than retry endlessly.
+ */
+ if (!(kmalloc_flags & __GFP_REPEAT) ||
+ (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
+ kmalloc_flags |= __GFP_NORETRY;
+ }
+
+ ret = kmalloc_node(size, kmalloc_flags, node);
+
+ /*
+ * It doesn't really make sense to fallback to vmalloc for sub page
+ * requests
+ */
+ if (ret || size <= PAGE_SIZE)
+ return ret;
+
+ return __vmalloc_node_flags(size, node, flags);
+}
+EXPORT_SYMBOL(kvmalloc_node);
+
void kvfree(const void *addr)
{
if (is_vmalloc_addr(addr))
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b52aeed3f58e..8ef8ea1a68f6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1658,7 +1658,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
struct page **pages;
unsigned int nr_pages, array_size, i;
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
- const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
+ const gfp_t alloc_mask = gfp_mask | __GFP_HIGHMEM | __GFP_NOWARN;
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
@@ -1786,6 +1786,13 @@ fail:
* Allocate enough pages to cover @size from the page level
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
+ *
+ * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT
+ * and __GFP_NOFAIL are not supported
+ *
+ * Any use of gfp flags outside of GFP_KERNEL should be consulted
+ * with mm people.
+ *
*/
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
@@ -1802,7 +1809,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
}
EXPORT_SYMBOL(__vmalloc);
-static inline void *__vmalloc_node_flags(unsigned long size,
+void *__vmalloc_node_flags(unsigned long size,
int node, gfp_t flags)
{
return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
@@ -1821,7 +1828,7 @@ static inline void *__vmalloc_node_flags(unsigned long size,
void *vmalloc(unsigned long size)
{
return __vmalloc_node_flags(size, NUMA_NO_NODE,
- GFP_KERNEL | __GFP_HIGHMEM);
+ GFP_KERNEL);
}
EXPORT_SYMBOL(vmalloc);
@@ -1838,7 +1845,7 @@ EXPORT_SYMBOL(vmalloc);
void *vzalloc(unsigned long size)
{
return __vmalloc_node_flags(size, NUMA_NO_NODE,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL(vzalloc);
@@ -1855,7 +1862,7 @@ void *vmalloc_user(unsigned long size)
void *ret;
ret = __vmalloc_node(size, SHMLBA,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ GFP_KERNEL | __GFP_ZERO,
PAGE_KERNEL, NUMA_NO_NODE,
__builtin_return_address(0));
if (ret) {
@@ -1879,7 +1886,7 @@ EXPORT_SYMBOL(vmalloc_user);
*/
void *vmalloc_node(unsigned long size, int node)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
+ return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
node, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_node);
@@ -1899,7 +1906,7 @@ EXPORT_SYMBOL(vmalloc_node);
void *vzalloc_node(unsigned long size, int node)
{
return __vmalloc_node_flags(size, node,
- GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL(vzalloc_node);
@@ -1921,7 +1928,7 @@ EXPORT_SYMBOL(vzalloc_node);
void *vmalloc_exec(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
+ return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC,
NUMA_NO_NODE, __builtin_return_address(0));
}
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b7ee9c34dbd6..e55a7648e060 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1983,6 +1983,14 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage,
unsigned int obj_idx;
int ret = -EAGAIN;
+ /*
+ * We cannot support the _NO_COPY case here, because copy needs to
+ * happen under the zs lock, which does not work with
+ * MIGRATE_SYNC_NO_COPY workflow.
+ */
+ if (mode == MIGRATE_SYNC_NO_COPY)
+ return -EINVAL;
+
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 108533859a53..4eb773ccce11 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -187,7 +187,7 @@ void *ceph_kvmalloc(size_t size, gfp_t flags)
return ptr;
}
- return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
+ return __vmalloc(size, flags, PAGE_KERNEL);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index fc0f5f9dac0e..e0705a126b24 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7120,12 +7120,10 @@ static int netif_alloc_rx_queues(struct net_device *dev)
BUG_ON(count < 1);
- rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!rx) {
- rx = vzalloc(sz);
- if (!rx)
- return -ENOMEM;
- }
+ rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+ if (!rx)
+ return -ENOMEM;
+
dev->_rx = rx;
for (i = 0; i < count; i++)
@@ -7162,12 +7160,10 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
if (count < 1 || count > 0xffff)
return -EINVAL;
- tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!tx) {
- tx = vzalloc(sz);
- if (!tx)
- return -ENOMEM;
- }
+ tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT);
+ if (!tx)
+ return -ENOMEM;
+
dev->_tx = tx;
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
@@ -7701,9 +7697,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
- p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
- if (!p)
- p = vzalloc(alloc_size);
+ p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT);
if (!p)
return NULL;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 9afa2a5030b2..405483a07efc 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2361,7 +2361,8 @@ MODULE_AUTHOR("Linux DECnet Project Team");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_DECnet);
-static char banner[] __initdata = KERN_INFO "NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
+static const char banner[] __initconst = KERN_INFO
+"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
static int __init decnet_init(void)
{
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8bea74298173..e9a59d2d91d4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -678,11 +678,7 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
/* no more locks than number of hash buckets */
nblocks = min(nblocks, hashinfo->ehash_mask + 1);
- hashinfo->ehash_locks = kmalloc_array(nblocks, locksz,
- GFP_KERNEL | __GFP_NOWARN);
- if (!hashinfo->ehash_locks)
- hashinfo->ehash_locks = vmalloc(nblocks * locksz);
-
+ hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
if (!hashinfo->ehash_locks)
return -ENOMEM;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 9d0d4f39e42b..653bbd67e3a3 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -1011,10 +1011,7 @@ static int __net_init tcp_net_metrics_init(struct net *net)
tcp_metrics_hash_log = order_base_2(slots);
size = sizeof(struct tcpm_hash_bucket) << tcp_metrics_hash_log;
- tcp_metrics_hash = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
- if (!tcp_metrics_hash)
- tcp_metrics_hash = vzalloc(size);
-
+ tcp_metrics_hash = kvzalloc(size, GFP_KERNEL);
if (!tcp_metrics_hash)
return -ENOMEM;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index af8f52ee7180..2fd5ca151dcf 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -41,13 +41,7 @@ static int alloc_ila_locks(struct ila_net *ilan)
size = roundup_pow_of_two(nr_pcpus * LOCKS_PER_CPU);
if (sizeof(spinlock_t) != 0) {
-#ifdef CONFIG_NUMA
- if (size * sizeof(spinlock_t) > PAGE_SIZE)
- ilan->locks = vmalloc(size * sizeof(spinlock_t));
- else
-#endif
- ilan->locks = kmalloc_array(size, sizeof(spinlock_t),
- GFP_KERNEL);
+ ilan->locks = kvmalloc(size * sizeof(spinlock_t), GFP_KERNEL);
if (!ilan->locks)
return -ENOMEM;
for (i = 0; i < size; i++)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 5928d22ba9c8..81de33da89ed 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2001,10 +2001,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
unsigned index;
if (size) {
- labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
- if (!labels)
- labels = vzalloc(size);
-
+ labels = kvzalloc(size, GFP_KERNEL);
if (!labels)
goto nolabels;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 14857afc9937..e58ecff638b3 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -763,17 +763,8 @@ EXPORT_SYMBOL(xt_check_entry_offsets);
*/
unsigned int *xt_alloc_entry_offsets(unsigned int size)
{
- unsigned int *off;
+ return kvmalloc_array(size, sizeof(unsigned int), GFP_KERNEL | __GFP_ZERO);
- off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN);
-
- if (off)
- return off;
-
- if (size < (SIZE_MAX / sizeof(unsigned int)))
- off = vmalloc(size * sizeof(unsigned int));
-
- return off;
}
EXPORT_SYMBOL(xt_alloc_entry_offsets);
@@ -1007,8 +998,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
if (!info) {
- info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN |
- __GFP_NORETRY | __GFP_HIGHMEM,
+ info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
PAGE_KERNEL);
if (!info)
return NULL;
@@ -1114,7 +1104,7 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)
- i->jumpstack = vzalloc(size);
+ i->jumpstack = kvzalloc(size, GFP_KERNEL);
else
i->jumpstack = kzalloc(size, GFP_KERNEL);
if (i->jumpstack == NULL)
@@ -1136,12 +1126,8 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
*/
size = sizeof(void *) * i->stacksize * 2u;
for_each_possible_cpu(cpu) {
- if (size > PAGE_SIZE)
- i->jumpstack[cpu] = vmalloc_node(size,
- cpu_to_node(cpu));
- else
- i->jumpstack[cpu] = kmalloc_node(size,
- GFP_KERNEL, cpu_to_node(cpu));
+ i->jumpstack[cpu] = kvmalloc_node(size, GFP_KERNEL,
+ cpu_to_node(cpu));
if (i->jumpstack[cpu] == NULL)
/*
* Freeing will be done later on by the callers. The
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1d89a4eaf841..d6aa8f63ed2e 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -388,10 +388,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
}
sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size;
- if (sz <= PAGE_SIZE)
- t = kzalloc(sz, GFP_KERNEL);
- else
- t = vzalloc(sz);
+ t = kvzalloc(sz, GFP_KERNEL);
if (t == NULL) {
ret = -ENOMEM;
goto out;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 593183a5b5b5..f11f8732b0d5 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -376,10 +376,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
if (mask != q->tab_mask) {
struct sk_buff **ntab;
- ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
- GFP_KERNEL | __GFP_NOWARN);
- if (!ntab)
- ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
+ ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO);
if (!ntab)
return -ENOMEM;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index a4f738ac7728..594f77d89f6c 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -624,16 +624,6 @@ static void fq_rehash(struct fq_sched_data *q,
q->stat_gc_flows += fcnt;
}
-static void *fq_alloc_node(size_t sz, int node)
-{
- void *ptr;
-
- ptr = kmalloc_node(sz, GFP_KERNEL | __GFP_REPEAT | __GFP_NOWARN, node);
- if (!ptr)
- ptr = vmalloc_node(sz, node);
- return ptr;
-}
-
static void fq_free(void *addr)
{
kvfree(addr);
@@ -650,7 +640,7 @@ static int fq_resize(struct Qdisc *sch, u32 log)
return 0;
/* If XPS was setup, we can allocate memory on right NUMA node */
- array = fq_alloc_node(sizeof(struct rb_root) << log,
+ array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT,
netdev_queue_numa_node_read(sch->dev_queue));
if (!array)
return -ENOMEM;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 097bbe9857a5..efb27e4dc808 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -445,27 +445,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static void *fq_codel_zalloc(size_t sz)
-{
- void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
-
- if (!ptr)
- ptr = vzalloc(sz);
- return ptr;
-}
-
-static void fq_codel_free(void *addr)
-{
- kvfree(addr);
-}
-
static void fq_codel_destroy(struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
tcf_destroy_chain(&q->filter_list);
- fq_codel_free(q->backlogs);
- fq_codel_free(q->flows);
+ kvfree(q->backlogs);
+ kvfree(q->flows);
}
static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
@@ -492,13 +478,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
}
if (!q->flows) {
- q->flows = fq_codel_zalloc(q->flows_cnt *
- sizeof(struct fq_codel_flow));
+ q->flows = kvzalloc(q->flows_cnt *
+ sizeof(struct fq_codel_flow), GFP_KERNEL);
if (!q->flows)
return -ENOMEM;
- q->backlogs = fq_codel_zalloc(q->flows_cnt * sizeof(u32));
+ q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
if (!q->backlogs) {
- fq_codel_free(q->flows);
+ kvfree(q->flows);
return -ENOMEM;
}
for (i = 0; i < q->flows_cnt; i++) {
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 2fae8b5f1b80..8cf3d9eac553 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -467,29 +467,14 @@ static void hhf_reset(struct Qdisc *sch)
rtnl_kfree_skbs(skb, skb);
}
-static void *hhf_zalloc(size_t sz)
-{
- void *ptr = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN);
-
- if (!ptr)
- ptr = vzalloc(sz);
-
- return ptr;
-}
-
-static void hhf_free(void *addr)
-{
- kvfree(addr);
-}
-
static void hhf_destroy(struct Qdisc *sch)
{
int i;
struct hhf_sched_data *q = qdisc_priv(sch);
for (i = 0; i < HHF_ARRAYS_CNT; i++) {
- hhf_free(q->hhf_arrays[i]);
- hhf_free(q->hhf_valid_bits[i]);
+ kvfree(q->hhf_arrays[i]);
+ kvfree(q->hhf_valid_bits[i]);
}
for (i = 0; i < HH_FLOWS_CNT; i++) {
@@ -503,7 +488,7 @@ static void hhf_destroy(struct Qdisc *sch)
kfree(flow);
}
}
- hhf_free(q->hh_flows);
+ kvfree(q->hh_flows);
}
static const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = {
@@ -609,8 +594,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
if (!q->hh_flows) {
/* Initialize heavy-hitter flow table. */
- q->hh_flows = hhf_zalloc(HH_FLOWS_CNT *
- sizeof(struct list_head));
+ q->hh_flows = kvzalloc(HH_FLOWS_CNT *
+ sizeof(struct list_head), GFP_KERNEL);
if (!q->hh_flows)
return -ENOMEM;
for (i = 0; i < HH_FLOWS_CNT; i++)
@@ -624,8 +609,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
/* Initialize heavy-hitter filter arrays. */
for (i = 0; i < HHF_ARRAYS_CNT; i++) {
- q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
- sizeof(u32));
+ q->hhf_arrays[i] = kvzalloc(HHF_ARRAYS_LEN *
+ sizeof(u32), GFP_KERNEL);
if (!q->hhf_arrays[i]) {
/* Note: hhf_destroy() will be called
* by our caller.
@@ -637,8 +622,8 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
/* Initialize valid bits of heavy-hitter filter arrays. */
for (i = 0; i < HHF_ARRAYS_CNT; i++) {
- q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
- BITS_PER_BYTE);
+ q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN /
+ BITS_PER_BYTE, GFP_KERNEL);
if (!q->hhf_valid_bits[i]) {
/* Note: hhf_destroy() will be called
* by our caller.
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 94b4928ad413..e4ca9d65dc4f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -702,15 +702,11 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
spinlock_t *root_lock;
struct disttable *d;
int i;
- size_t s;
if (n > NETEM_DIST_MAX)
return -EINVAL;
- s = sizeof(struct disttable) + n * sizeof(s16);
- d = kmalloc(s, GFP_KERNEL | __GFP_NOWARN);
- if (!d)
- d = vmalloc(s);
+ d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
if (!d)
return -ENOMEM;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b00e02c139de..332d94be6e1c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -685,11 +685,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
static void *sfq_alloc(size_t sz)
{
- void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
-
- if (!ptr)
- ptr = vmalloc(sz);
- return ptr;
+ return kvmalloc(sz, GFP_KERNEL);
}
static void sfq_free(void *addr)
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 0545f5a8cabe..e9cea4c5b6a1 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -174,6 +174,7 @@ bakup||backup
baloon||balloon
baloons||balloons
bandwith||bandwidth
+banlance||balance
batery||battery
beacuse||because
becasue||because
@@ -365,6 +366,8 @@ dictionnary||dictionary
didnt||didn't
diferent||different
differrence||difference
+diffrent||different
+diffrentiate||differentiate
difinition||definition
diplay||display
direectly||directly
@@ -590,6 +593,9 @@ interruptted||interrupted
interupted||interrupted
interupt||interrupt
intial||initial
+intialisation||initialisation
+intialised||initialised
+intialise||initialise
intialization||initialization
intialized||initialized
intialize||initialize
@@ -672,6 +678,7 @@ miximum||maximum
mmnemonic||mnemonic
mnay||many
modulues||modules
+momery||memory
monochorome||monochrome
monochromo||monochrome
monocrome||monochrome
@@ -871,6 +878,7 @@ registerd||registered
registeresd||registered
registes||registers
registraration||registration
+regsiter||register
regster||register
regualar||regular
reguator||regulator
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 41073f70eb41..be0b49897a67 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -98,7 +98,7 @@ static struct aa_loaddata *aa_simple_write_to_buffer(const char __user *userbuf,
return ERR_PTR(-ESPIPE);
/* freed by caller to simple_write_to_buffer */
- data = kvmalloc(sizeof(*data) + alloc_size);
+ data = kvmalloc(sizeof(*data) + alloc_size, GFP_KERNEL);
if (data == NULL)
return ERR_PTR(-ENOMEM);
kref_init(&data->count);
diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h
index 0291ff3902f9..550a700563b4 100644
--- a/security/apparmor/include/lib.h
+++ b/security/apparmor/include/lib.h
@@ -64,17 +64,6 @@ char *aa_split_fqname(char *args, char **ns_name);
const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name,
size_t *ns_len);
void aa_info_message(const char *str);
-void *__aa_kvmalloc(size_t size, gfp_t flags);
-
-static inline void *kvmalloc(size_t size)
-{
- return __aa_kvmalloc(size, 0);
-}
-
-static inline void *kvzalloc(size_t size)
-{
- return __aa_kvmalloc(size, __GFP_ZERO);
-}
/**
* aa_strneq - compare null terminated @str to a non null terminated substring
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 32cafc12593e..7cd788a9445b 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -129,36 +129,6 @@ void aa_info_message(const char *str)
}
/**
- * __aa_kvmalloc - do allocation preferring kmalloc but falling back to vmalloc
- * @size: how many bytes of memory are required
- * @flags: the type of memory to allocate (see kmalloc).
- *
- * Return: allocated buffer or NULL if failed
- *
- * It is possible that policy being loaded from the user is larger than
- * what can be allocated by kmalloc, in those cases fall back to vmalloc.
- */
-void *__aa_kvmalloc(size_t size, gfp_t flags)
-{
- void *buffer = NULL;
-
- if (size == 0)
- return NULL;
-
- /* do not attempt kmalloc if we need more than 16 pages at once */
- if (size <= (16*PAGE_SIZE))
- buffer = kmalloc(size, flags | GFP_KERNEL | __GFP_NORETRY |
- __GFP_NOWARN);
- if (!buffer) {
- if (flags & __GFP_ZERO)
- buffer = vzalloc(size);
- else
- buffer = vmalloc(size);
- }
- return buffer;
-}
-
-/**
* aa_policy_init - initialize a policy structure
* @policy: policy to initialize (NOT NULL)
* @prefix: prefix name if any is required. (MAYBE NULL)
diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index eb0efef746f5..960c913381e2 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -88,7 +88,7 @@ static struct table_header *unpack_table(char *blob, size_t bsize)
if (bsize < tsize)
goto out;
- table = kvzalloc(tsize);
+ table = kvzalloc(tsize, GFP_KERNEL);
if (table) {
table->td_id = th.td_id;
table->td_flags = th.td_flags;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 2e37c9c26bbd..f3422a91353c 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -487,7 +487,7 @@ fail:
static void *kvmemdup(const void *src, size_t len)
{
- void *p = kvmalloc(len);
+ void *p = kvmalloc(len, GFP_KERNEL);
if (p)
memcpy(p, src, len);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 10fcea154c0f..e4402ae56681 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -101,14 +101,9 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type,
if (_payload) {
ret = -ENOMEM;
- payload = kmalloc(plen, GFP_KERNEL | __GFP_NOWARN);
- if (!payload) {
- if (plen <= PAGE_SIZE)
- goto error2;
- payload = vmalloc(plen);
- if (!payload)
- goto error2;
- }
+ payload = kvmalloc(plen, GFP_KERNEL);
+ if (!payload)
+ goto error2;
ret = -EFAULT;
if (copy_from_user(payload, _payload, plen) != 0)
@@ -1066,14 +1061,9 @@ long keyctl_instantiate_key_common(key_serial_t id,
if (from) {
ret = -ENOMEM;
- payload = kmalloc(plen, GFP_KERNEL);
- if (!payload) {
- if (plen <= PAGE_SIZE)
- goto error;
- payload = vmalloc(plen);
- if (!payload)
- goto error;
- }
+ payload = kvmalloc(plen, GFP_KERNEL);
+ if (!payload)
+ goto error;
ret = -EFAULT;
if (!copy_from_iter_full(payload, plen, from))
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 59ab34fa0bc8..eb765b9f6fea 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -53,7 +53,7 @@
#ifdef CONFIG_X86
/* for snoop control */
#include <asm/pgtable.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <asm/cpufeature.h>
#endif
#include <sound/core.h>
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 9720a30dbfff..6d17b171c17b 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -40,7 +40,9 @@
#include <sound/initval.h>
/* for 440MX workaround */
#include <asm/pgtable.h>
-#include <asm/cacheflush.h>
+#ifdef CONFIG_X86
+#include <asm/set_memory.h>
+#endif
MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
MODULE_DESCRIPTION("Intel 82801AA,82901AB,i810,i820,i830,i840,i845,MX440; SiS 7012; Ali 5455");
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 8be2ce17aa65..e7d99071d108 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -936,7 +936,7 @@ static struct snd_soc_component *soc_find_component(
*
* @dlc: name of the DAI and optional component info to match
*
- * This function will search all regsitered components and their DAIs to
+ * This function will search all registered components and their DAIs to
* find the DAI of the same name. The component's of_node and name
* should also match if being specified.
*
diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c
index c505b019e09c..664b7fe206d6 100644
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
@@ -30,7 +30,7 @@
#include <linux/pm_runtime.h>
#include <linux/dma-mapping.h>
#include <linux/delay.h>
-#include <asm/cacheflush.h>
+#include <asm/set_memory.h>
#include <sound/core.h>
#include <sound/asoundef.h>
#include <sound/pcm.h>
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f489167839c4..41e7e7cb238e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -504,7 +504,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
int i;
struct kvm_memslots *slots;
- slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+ slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!slots)
return NULL;
@@ -689,18 +689,6 @@ out_err_no_disable:
return ERR_PTR(r);
}
-/*
- * Avoid using vmalloc for a small buffer.
- * Should not be used when the size is statically known.
- */
-void *kvm_kvzalloc(unsigned long size)
-{
- if (size > PAGE_SIZE)
- return vzalloc(size);
- else
- return kzalloc(size, GFP_KERNEL);
-}
-
static void kvm_destroy_devices(struct kvm *kvm)
{
struct kvm_device *dev, *tmp;
@@ -782,7 +770,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
- memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
+ memslot->dirty_bitmap = kvzalloc(dirty_bytes, GFP_KERNEL);
if (!memslot->dirty_bitmap)
return -ENOMEM;
@@ -1008,7 +996,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
- slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
+ slots = kvzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!slots)
goto out_free;
memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));