diff options
763 files changed, 68429 insertions, 33627 deletions
diff --git a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt new file mode 100644 index 000000000000..4f0ab3ed3b6f --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt @@ -0,0 +1,58 @@ +Renesas R-Car LVDS Encoder +========================== + +These DT bindings describe the LVDS encoder embedded in the Renesas R-Car +Gen2, R-Car Gen3 and RZ/G SoCs. + +Required properties: + +- compatible : Shall contain one of + - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders + - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders + - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders + - "renesas,r8a7793-lvds" for R8A7793 (R-Car M2-N) compatible LVDS encoders + - "renesas,r8a7795-lvds" for R8A7795 (R-Car H3) compatible LVDS encoders + - "renesas,r8a7796-lvds" for R8A7796 (R-Car M3-W) compatible LVDS encoders + - "renesas,r8a77970-lvds" for R8A77970 (R-Car V3M) compatible LVDS encoders + - "renesas,r8a77995-lvds" for R8A77995 (R-Car D3) compatible LVDS encoders + +- reg: Base address and length for the memory-mapped registers +- clocks: A phandle + clock-specifier pair for the functional clock +- resets: A phandle + reset specifier for the module reset + +Required nodes: + +The LVDS encoder has two video ports. Their connections are modelled using the +OF graph bindings specified in Documentation/devicetree/bindings/graph.txt. + +- Video port 0 corresponds to the parallel RGB input +- Video port 1 corresponds to the LVDS output + +Each port shall have a single endpoint. + + +Example: + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7790-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + resets = <&cpg 726>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/connector/dvi-connector.txt b/Documentation/devicetree/bindings/display/connector/dvi-connector.txt index fc53f7c60bc6..207e42e9eba0 100644 --- a/Documentation/devicetree/bindings/display/connector/dvi-connector.txt +++ b/Documentation/devicetree/bindings/display/connector/dvi-connector.txt @@ -10,6 +10,7 @@ Optional properties: - analog: the connector has DVI analog pins - digital: the connector has DVI digital pins - dual-link: the connector has pins for DVI dual-link +- hpd-gpios: HPD GPIO number Required nodes: - Video port for DVI input diff --git a/Documentation/devicetree/bindings/display/msm/dsi.txt b/Documentation/devicetree/bindings/display/msm/dsi.txt index a6671bd2c85a..518e9cdf0d4b 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi.txt +++ b/Documentation/devicetree/bindings/display/msm/dsi.txt @@ -7,8 +7,6 @@ Required properties: - reg: Physical base address and length of the registers of controller - reg-names: The names of register regions. The following regions are required: * "dsi_ctrl" -- qcom,dsi-host-index: The ID of DSI controller hardware instance. This should - be 0 or 1, since we have 2 DSI controllers at most for now. - interrupts: The interrupt signal from the DSI block. - power-domains: Should be <&mmcc MDSS_GDSC>. - clocks: Phandles to device clocks. @@ -22,6 +20,8 @@ Required properties: * "core" For DSIv2, we need an additional clock: * "src" + For DSI6G v2.0 onwards, we need also need the clock: + * "byte_intf" - assigned-clocks: Parents of "byte" and "pixel" for the given platform. - assigned-clock-parents: The Byte clock and Pixel clock PLL outputs provided by a DSI PHY block. See [1] for details on clock bindings. @@ -88,21 +88,35 @@ Required properties: * "qcom,dsi-phy-28nm-lp" * "qcom,dsi-phy-20nm" * "qcom,dsi-phy-28nm-8960" -- reg: Physical base address and length of the registers of PLL, PHY and PHY - regulator + * "qcom,dsi-phy-14nm" + * "qcom,dsi-phy-10nm" +- reg: Physical base address and length of the registers of PLL, PHY. Some + revisions require the PHY regulator base address, whereas others require the + PHY lane base address. See below for each PHY revision. - reg-names: The names of register regions. The following regions are required: + For DSI 28nm HPM/LP/8960 PHYs and 20nm PHY: * "dsi_pll" * "dsi_phy" * "dsi_phy_regulator" + For DSI 14nm and 10nm PHYs: + * "dsi_pll" + * "dsi_phy" + * "dsi_phy_lane" - clock-cells: Must be 1. The DSI PHY block acts as a clock provider, creating 2 clocks: A byte clock (index 0), and a pixel clock (index 1). -- qcom,dsi-phy-index: The ID of DSI PHY hardware instance. This should - be 0 or 1, since we have 2 DSI PHYs at most for now. - power-domains: Should be <&mmcc MDSS_GDSC>. - clocks: Phandles to device clocks. See [1] for details on clock bindings. - clock-names: the following clocks are required: * "iface" + For 28nm HPM/LP, 28nm 8960 PHYs: +- vddio-supply: phandle to vdd-io regulator device node + For 20nm PHY: - vddio-supply: phandle to vdd-io regulator device node +- vcca-supply: phandle to vcca regulator device node + For 14nm PHY: +- vcca-supply: phandle to vcca regulator device node + For 10nm PHY: +- vdds-supply: phandle to vdds regulator device node Optional properties: - qcom,dsi-phy-regulator-ldo-mode: Boolean value indicating if the LDO mode PHY diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt index cd48aba3bc8c..c9cd17f99702 100644 --- a/Documentation/devicetree/bindings/display/renesas,du.txt +++ b/Documentation/devicetree/bindings/display/renesas,du.txt @@ -13,13 +13,10 @@ Required Properties: - "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU - "renesas,du-r8a7796" for R8A7796 (R-Car M3-W) compatible DU + - "renesas,du-r8a77970" for R8A77970 (R-Car V3M) compatible DU + - "renesas,du-r8a77995" for R8A77995 (R-Car D3) compatible DU - - reg: A list of base address and length of each memory resource, one for - each entry in the reg-names property. - - reg-names: Name of the memory resources. The DU requires one memory - resource for the DU core (named "du") and one memory resource for each - LVDS encoder (named "lvds.x" with "x" being the LVDS controller numerical - index). + - reg: the memory-mapped I/O registers base address and length - interrupt-parent: phandle of the parent interrupt controller. - interrupts: Interrupt specifiers for the DU interrupts. @@ -29,14 +26,13 @@ Required Properties: - clock-names: Name of the clocks. This property is model-dependent. - R8A7779 uses a single functional clock. The clock doesn't need to be named. - - All other DU instances use one functional clock per channel and one - clock per LVDS encoder (if available). The functional clocks must be - named "du.x" with "x" being the channel numerical index. The LVDS clocks - must be named "lvds.x" with "x" being the LVDS encoder numerical index. - - In addition to the functional and encoder clocks, all DU versions also - support externally supplied pixel clocks. Those clocks are optional. - When supplied they must be named "dclkin.x" with "x" being the input - clock numerical index. + - All other DU instances use one functional clock per channel The + functional clocks must be named "du.x" with "x" being the channel + numerical index. + - In addition to the functional clocks, all DU versions also support + externally supplied pixel clocks. Those clocks are optional. When + supplied they must be named "dclkin.x" with "x" being the input clock + numerical index. - vsps: A list of phandle and channel index tuples to the VSPs that handle the memory interfaces for the DU channels. The phandle identifies the VSP @@ -63,15 +59,15 @@ corresponding to each DU output. R8A7794 (R-Car E2) DPAD 0 DPAD 1 - - R8A7795 (R-Car H3) DPAD 0 HDMI 0 HDMI 1 LVDS 0 R8A7796 (R-Car M3-W) DPAD 0 HDMI 0 LVDS 0 - + R8A77970 (R-Car V3M) DPAD 0 LVDS 0 - - + R8A77995 (R-Car D3) DPAD 0 LVDS 0 LVDS 1 - Example: R8A7795 (R-Car H3) ES2.0 DU du: display@feb00000 { compatible = "renesas,du-r8a7795"; - reg = <0 0xfeb00000 0 0x80000>, - <0 0xfeb90000 0 0x14>; - reg-names = "du", "lvds.0"; + reg = <0 0xfeb00000 0 0x80000>; interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>, @@ -79,9 +75,8 @@ Example: R8A7795 (R-Car H3) ES2.0 DU clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 722>, - <&cpg CPG_MOD 721>, - <&cpg CPG_MOD 727>; - clock-names = "du.0", "du.1", "du.2", "du.3", "lvds.0"; + <&cpg CPG_MOD 721>; + clock-names = "du.0", "du.1", "du.2", "du.3"; vsps = <&vspd0 0>, <&vspd1 0>, <&vspd2 0>, <&vspd0 1>; ports { diff --git a/Documentation/devicetree/overlay-notes.txt b/Documentation/devicetree/overlay-notes.txt index c4aa0adf13ec..5175a24d387e 100644 --- a/Documentation/devicetree/overlay-notes.txt +++ b/Documentation/devicetree/overlay-notes.txt @@ -87,8 +87,8 @@ Overlay in-kernel API The API is quite easy to use. -1. Call of_overlay_apply() to create and apply an overlay changeset. The return -value is an error or a cookie identifying this overlay. +1. Call of_overlay_fdt_apply() to create and apply an overlay changeset. The +return value is an error or a cookie identifying this overlay. 2. Call of_overlay_remove() to remove and cleanup the overlay changeset previously created via the call to of_overlay_apply(). Removal of an overlay diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 1a0a413eeced..f4d0b3476d9c 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -450,5 +450,12 @@ See drivers/gpu/drm/amd/display/TODO for tasks. Contact: Harry Wentland, Alex Deucher +i915 +---- + +- Our early/late pm callbacks could be removed in favour of using + device_link_add to model the dependency between i915 and snd_had. See + https://dri.freedesktop.org/docs/drm/driver-api/device_link.html + Outside DRM =========== diff --git a/MAINTAINERS b/MAINTAINERS index 2afba909724c..dccae57985fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -766,6 +766,8 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -4744,6 +4746,7 @@ F: drivers/gpu/drm/rcar-du/ F: drivers/gpu/drm/shmobile/ F: include/linux/platform_data/shmob_drm.h F: Documentation/devicetree/bindings/display/bridge/renesas,dw-hdmi.txt +F: Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt F: Documentation/devicetree/bindings/display/renesas,du.txt DRM DRIVERS FOR ROCKCHIP diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 25de5f6ca997..45416826f6ee 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -259,7 +259,7 @@ static void __init dtb_apic_setup(void) dtb_ioapic_setup(); } -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE static void __init x86_flattree_get_config(void) { u32 size, map_len; diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 5d101c4053e0..4edb9fd3cf47 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref) trace_dma_fence_destroy(fence); + /* Failed to signal before release, could be a refcounting issue */ WARN_ON(!list_empty(&fence->cb_list)); if (fence->ops->release) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index d6e5b7273853..2ca2b5154d52 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -30,7 +30,6 @@ FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME) ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/include \ -I$(FULL_AMD_PATH)/amdgpu \ - -I$(FULL_AMD_PATH)/scheduler \ -I$(FULL_AMD_PATH)/powerplay/inc \ -I$(FULL_AMD_PATH)/acp/include \ -I$(FULL_AMD_DISPLAY_PATH) \ @@ -63,7 +62,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o # add GMC block amdgpu-y += \ @@ -88,8 +87,7 @@ amdgpu-y += \ # add SMC block amdgpu-y += \ - amdgpu_dpm.o \ - amdgpu_powerplay.o + amdgpu_dpm.o # add DCE block amdgpu-y += \ @@ -130,6 +128,8 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ + amdgpu_amdkfd_fence.o \ + amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o # add cgs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 31126df06c8c..f44a83ab2bf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -68,6 +68,7 @@ #include "amdgpu_vce.h" #include "amdgpu_vcn.h" #include "amdgpu_mn.h" +#include "amdgpu_gmc.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_gart.h" @@ -127,6 +128,7 @@ extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; +extern int amdgpu_emu_mode; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -179,10 +181,6 @@ extern int amdgpu_cik_support; #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 -/* GPU RESET flags */ -#define AMDGPU_RESET_INFO_VRAM_LOST (1 << 0) -#define AMDGPU_RESET_INFO_FULLRESET (1 << 1) - struct amdgpu_device; struct amdgpu_ib; struct amdgpu_cs_parser; @@ -318,13 +316,6 @@ struct amdgpu_vm_pte_funcs { void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, uint64_t value, unsigned count, uint32_t incr); - - /* maximum nums of PTEs/PDEs in a single operation */ - uint32_t set_max_nums_pte_pde; - - /* number of dw to reserve per operation */ - unsigned set_pte_pde_num_dw; - /* for linear pte/pde updates without addr mapping */ void (*set_pte_pde)(struct amdgpu_ib *ib, uint64_t pe, @@ -332,28 +323,6 @@ struct amdgpu_vm_pte_funcs { uint32_t incr, uint64_t flags); }; -/* provided by the gmc block */ -struct amdgpu_gart_funcs { - /* flush the vm tlb via mmio */ - void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid); - /* write pte/pde updates using the cpu */ - int (*set_pte_pde)(struct amdgpu_device *adev, - void *cpu_pt_addr, /* cpu addr of page table */ - uint32_t gpu_page_idx, /* pte/pde to update */ - uint64_t addr, /* addr to write into pte/pde */ - uint64_t flags); /* access flags */ - /* enable/disable PRT support */ - void (*set_prt)(struct amdgpu_device *adev, bool enable); - /* set pte flags based per asic */ - uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, - uint32_t flags); - /* get the pde for a given mc addr */ - void (*get_vm_pde)(struct amdgpu_device *adev, int level, - u64 *dst, u64 *flags); - uint32_t (*get_invalidate_req)(unsigned int vmid); -}; - /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ @@ -371,14 +340,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev); bool amdgpu_read_bios(struct amdgpu_device *adev); /* - * Dummy page - */ -struct amdgpu_dummy_page { - struct page *page; - dma_addr_t addr; -}; - -/* * Clocks */ @@ -418,8 +379,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags); -int amdgpu_gem_prime_pin(struct drm_gem_object *obj); -void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); @@ -480,7 +441,7 @@ struct amdgpu_sa_bo { void amdgpu_gem_force_release(struct amdgpu_device *adev); int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, - u64 flags, bool kernel, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, struct drm_gem_object **obj); @@ -494,56 +455,6 @@ int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); /* - * VMHUB structures, functions & helpers - */ -struct amdgpu_vmhub { - uint32_t ctx0_ptb_addr_lo32; - uint32_t ctx0_ptb_addr_hi32; - uint32_t vm_inv_eng0_req; - uint32_t vm_inv_eng0_ack; - uint32_t vm_context0_cntl; - uint32_t vm_l2_pro_fault_status; - uint32_t vm_l2_pro_fault_cntl; -}; - -/* - * GPU MC structures, functions & helpers - */ -struct amdgpu_mc { - resource_size_t aper_size; - resource_size_t aper_base; - resource_size_t agp_base; - /* for some chips with <= 32MB we need to lie - * about vram size near mc fb location */ - u64 mc_vram_size; - u64 visible_vram_size; - u64 gart_size; - u64 gart_start; - u64 gart_end; - u64 vram_start; - u64 vram_end; - unsigned vram_width; - u64 real_vram_size; - int vram_mtrr; - u64 mc_mask; - const struct firmware *fw; /* MC firmware */ - uint32_t fw_version; - struct amdgpu_irq_src vm_fault; - uint32_t vram_type; - uint32_t srbm_soft_reset; - bool prt_warning; - uint64_t stolen_size; - /* apertures */ - u64 shared_aperture_start; - u64 shared_aperture_end; - u64 private_aperture_start; - u64 private_aperture_end; - /* protects concurrent invalidation */ - spinlock_t invalidate_lock; - bool translate_further; -}; - -/* * GPU doorbell structures, functions & helpers */ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT @@ -1125,8 +1036,9 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - unsigned vmid; uint64_t vm_pd_addr; + unsigned vmid; + unsigned pasid; uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; @@ -1156,7 +1068,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo *wb_obj; @@ -1169,8 +1081,6 @@ struct amdgpu_wb { int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); -void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); - /* * SDMA */ @@ -1288,6 +1198,11 @@ struct amdgpu_asic_funcs { void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes); /* get config memsize register */ u32 (*get_config_memsize)(struct amdgpu_device *adev); + /* flush hdp write queue */ + void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + /* invalidate hdp read cache */ + void (*invalidate_hdp)(struct amdgpu_device *adev, + struct amdgpu_ring *ring); }; /* @@ -1431,7 +1346,7 @@ struct amdgpu_nbio_funcs { u32 (*get_pcie_data_offset)(struct amdgpu_device *adev); u32 (*get_rev_id)(struct amdgpu_device *adev); void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); - void (*hdp_flush)(struct amdgpu_device *adev); + void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); u32 (*get_memsize)(struct amdgpu_device *adev); void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index); @@ -1478,9 +1393,7 @@ enum amd_hw_ip_block_type { #define HWIP_MAX_INSTANCE 6 struct amd_powerplay { - struct cgs_device *cgs_device; void *pp_handle; - const struct amd_ip_funcs *ip_funcs; const struct amd_pm_funcs *pp_funcs; }; @@ -1574,9 +1487,9 @@ struct amdgpu_device { struct amdgpu_clock clock; /* MC */ - struct amdgpu_mc mc; + struct amdgpu_gmc gmc; struct amdgpu_gart gart; - struct amdgpu_dummy_page dummy_page; + dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; @@ -1715,6 +1628,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); + u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); @@ -1726,6 +1642,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); +int emu_soc_asic_init(struct amdgpu_device *adev); + /* * Registers read & write functions. */ @@ -1736,6 +1654,9 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) +#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) +#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) + #define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0) #define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX) #define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0)) @@ -1838,13 +1759,17 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) -#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) -#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) -#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags)) +#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) +#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) +#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) +#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) +#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) +#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) +#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) -#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags)) #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) @@ -1857,11 +1782,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) -#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) +#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) @@ -1871,7 +1796,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) -#define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc)) #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) #define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h)) @@ -1894,20 +1818,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job, bool force); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); -void amdgpu_update_display_priority(struct amdgpu_device *adev); +void amdgpu_display_update_priority(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_device_vram_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc, u64 base); + struct amdgpu_gmc *mc, u64 base); void amdgpu_device_gart_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc); + struct amdgpu_gmc *mc); int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); -void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size); -int amdgpu_ttm_init(struct amdgpu_device *adev); -void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, const u32 array_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 57afad79f55d..8fa850a070e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, size_t size; u32 retry = 3; + if (amdgpu_acpi_pcie_notify_device_ready(adev)) + return -EINVAL; + /* Get the device handle */ handle = ACPI_HANDLE(&adev->pdev->dev); if (!handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1d605e1c1d66..4d36203ffb11 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -30,6 +30,8 @@ const struct kgd2kfd_calls *kgd2kfd; bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); +static const unsigned int compute_vmid_bitmap = 0xFF00; + int amdgpu_amdkfd_init(void) { int ret; @@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void) #else ret = -ENOENT; #endif + amdgpu_amdkfd_gpuvm_init_mem_limits(); return ret; } @@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: + case CHIP_HAWAII: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); break; #endif case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: @@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = 0xFF00, + .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, - .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, + .gpuvm_size = min(adev->vm_manager.max_pfn + << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_VA_HOLE_START), + .drm_render_minor = adev->ddev->render->index }; /* this is going to have a few of the MSBs set that we need to @@ -204,20 +216,14 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **cpu_ptr) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - struct kgd_mem **mem = (struct kgd_mem **) mem_obj; + struct amdgpu_bo *bo = NULL; int r; + uint64_t gpu_addr_tmp = 0; + void *cpu_ptr_tmp = NULL; - BUG_ON(kgd == NULL); - BUG_ON(gpu_addr == NULL); - BUG_ON(cpu_ptr == NULL); - - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if ((*mem) == NULL) - return -ENOMEM; - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, - &(*mem)->bo); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel, + NULL, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -225,54 +231,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, } /* map the buffer */ - r = amdgpu_bo_reserve((*mem)->bo, true); + r = amdgpu_bo_reserve(bo, true); if (r) { dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } - r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, - &(*mem)->gpu_addr); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, + &gpu_addr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } - *gpu_addr = (*mem)->gpu_addr; - r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } - *cpu_ptr = (*mem)->cpu_ptr; - amdgpu_bo_unreserve((*mem)->bo); + *mem_obj = bo; + *gpu_addr = gpu_addr_tmp; + *cpu_ptr = cpu_ptr_tmp; + + amdgpu_bo_unreserve(bo); return 0; allocate_mem_kmap_bo_failed: - amdgpu_bo_unpin((*mem)->bo); + amdgpu_bo_unpin(bo); allocate_mem_pin_bo_failed: - amdgpu_bo_unreserve((*mem)->bo); + amdgpu_bo_unreserve(bo); allocate_mem_reserve_bo_failed: - amdgpu_bo_unref(&(*mem)->bo); + amdgpu_bo_unref(&bo); return r; } void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { - struct kgd_mem *mem = (struct kgd_mem *) mem_obj; - - BUG_ON(mem == NULL); + struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; - amdgpu_bo_reserve(mem->bo, true); - amdgpu_bo_kunmap(mem->bo); - amdgpu_bo_unpin(mem->bo); - amdgpu_bo_unreserve(mem->bo); - amdgpu_bo_unref(&(mem->bo)); - kfree(mem); + amdgpu_bo_reserve(bo, true); + amdgpu_bo_kunmap(bo); + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&(bo)); } void get_local_mem_info(struct kgd_dev *kgd, @@ -281,24 +286,29 @@ void get_local_mem_info(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *)kgd; uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : ~((1ULL << 32) - 1); - resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size; + resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size; memset(mem_info, 0, sizeof(*mem_info)); - if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) { - mem_info->local_mem_size_public = adev->mc.visible_vram_size; - mem_info->local_mem_size_private = adev->mc.real_vram_size - - adev->mc.visible_vram_size; + if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) { + mem_info->local_mem_size_public = adev->gmc.visible_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size - + adev->gmc.visible_vram_size; } else { mem_info->local_mem_size_public = 0; - mem_info->local_mem_size_private = adev->mc.real_vram_size; + mem_info->local_mem_size_private = adev->gmc.real_vram_size; } - mem_info->vram_width = adev->mc.vram_width; + mem_info->vram_width = adev->gmc.vram_width; pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n", - &adev->mc.aper_base, &aper_limit, + &adev->gmc.aper_base, &aper_limit, mem_info->local_mem_size_public, mem_info->local_mem_size_private); + if (amdgpu_emu_mode == 1) { + mem_info->mem_clk_max = 100; + return; + } + if (amdgpu_sriov_vf(adev)) mem_info->mem_clk_max = adev->clock.default_mclk / 100; else @@ -319,6 +329,9 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* the sclk is in quantas of 10kHz */ + if (amdgpu_emu_mode == 1) + return 100; + if (amdgpu_sriov_vf(adev)) return adev->clock.default_sclk / 100; @@ -354,3 +367,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } + +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct amdgpu_ring *ring; + struct dma_fence *f = NULL; + int ret; + + switch (engine) { + case KGD_ENGINE_MEC1: + ring = &adev->gfx.compute_ring[0]; + break; + case KGD_ENGINE_SDMA1: + ring = &adev->sdma.instance[0].ring; + break; + case KGD_ENGINE_SDMA2: + ring = &adev->sdma.instance[1].ring; + break; + default: + pr_err("Invalid engine in IB submission: %d\n", engine); + ret = -EINVAL; + goto err; + } + + ret = amdgpu_job_alloc(adev, 1, &job, NULL); + if (ret) + goto err; + + ib = &job->ibs[0]; + memset(ib, 0, sizeof(struct amdgpu_ib)); + + ib->gpu_addr = gpu_addr; + ib->ptr = ib_cmd; + ib->length_dw = ib_len; + /* This works for NO_HWS. TODO: need to handle without knowing VMID */ + job->vmid = vmid; + + ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); + if (ret) { + DRM_ERROR("amdgpu: failed to schedule IB.\n"); + goto err_ib_sched; + } + + ret = dma_fence_wait(f, false); + +err_ib_sched: + dma_fence_put(f); + amdgpu_job_free(job); +err: + return ret; +} + +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) +{ + if (adev->kfd) { + if ((1 << vmid) & compute_vmid_bitmap) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2a519f9062ee..d7509b706b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,13 +28,89 @@ #include <linux/types.h> #include <linux/mmu_context.h> #include <kgd_kfd_interface.h> +#include <drm/ttm/ttm_execbuf_util.h> +#include "amdgpu_sync.h" +#include "amdgpu_vm.h" + +extern const struct kgd2kfd_calls *kgd2kfd; struct amdgpu_device; +struct kfd_bo_va_list { + struct list_head bo_list; + struct amdgpu_bo_va *bo_va; + void *kgd_dev; + bool is_mapped; + uint64_t va; + uint64_t pte_flags; +}; + struct kgd_mem { + struct mutex lock; struct amdgpu_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; + struct list_head bo_va_list; + /* protected by amdkfd_process_info.lock */ + struct ttm_validate_buffer validate_list; + struct ttm_validate_buffer resv_list; + uint32_t domain; + unsigned int mapped_to_gpu_memory; + uint64_t va; + + uint32_t mapping_flags; + + struct amdkfd_process_info *process_info; + + struct amdgpu_sync sync; + + bool aql_queue; +}; + +/* KFD Memory Eviction */ +struct amdgpu_amdkfd_fence { + struct dma_fence base; + struct mm_struct *mm; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; +}; + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); + +struct amdkfd_process_info { + /* List head of all VMs that belong to a KFD process */ + struct list_head vm_list_head; + /* List head for all KFD BOs that belong to a KFD process. */ + struct list_head kfd_bo_list; + /* Lock to protect kfd_bo_list */ + struct mutex lock; + + /* Number of VMs */ + unsigned int n_vms; + /* Eviction Fence */ + struct amdgpu_amdkfd_fence *eviction_fence; +}; + +/* struct amdkfd_vm - + * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs + * belonging to a KFD process. All the VMs belonging to the same process point + * to the same amdkfd_process_info. + */ +struct amdkfd_vm { + /* Keep base as the first parameter for pointer compatibility between + * amdkfd_vm and amdgpu_vm. + */ + struct amdgpu_vm base; + + /* List node in amdkfd_process_info.vm_list_head*/ + struct list_head vm_list_node; + + struct amdgpu_device *adev; + /* Points to the KFD process VM info*/ + struct amdkfd_process_info *process_info; + + uint64_t pd_phys_addr; }; int amdgpu_amdkfd_init(void); @@ -48,9 +124,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); + /* Shared API */ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -79,4 +161,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); valid; \ }) +/* GPUVM API */ +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef); +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags); +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size); +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, + struct dma_fence **ef); + +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c new file mode 100644 index 000000000000..2c14025e5e76 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -0,0 +1,179 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/stacktrace.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/sched/mm.h> +#include "amdgpu_amdkfd.h" + +static const struct dma_fence_ops amdkfd_fence_ops; +static atomic_t fence_seq = ATOMIC_INIT(0); + +/* Eviction Fence + * Fence helper functions to deal with KFD memory eviction. + * Big Idea - Since KFD submissions are done by user queues, a BO cannot be + * evicted unless all the user queues for that process are evicted. + * + * All the BOs in a process share an eviction fence. When process X wants + * to map VRAM memory but TTM can't find enough space, TTM will attempt to + * evict BOs from its LRU list. TTM checks if the BO is valuable to evict + * by calling ttm_bo_driver->eviction_valuable(). + * + * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs + * to process X. Otherwise, it will return true to indicate BO can be + * evicted by TTM. + * + * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue + * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move + * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler. + * + * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to + * nofity when the BO is free to move. fence_add_callback --> enable_signaling + * --> amdgpu_amdkfd_fence.enable_signaling + * + * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce + * user queues and signal fence. The work item will also start another delayed + * work item to restore BOs + */ + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + + /* This reference gets released in amdkfd_fence_release */ + mmgrab(mm); + fence->mm = mm; + get_task_comm(fence->timeline_name, current); + spin_lock_init(&fence->lock); + + dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, + context, atomic_inc_return(&fence_seq)); + + return fence; +} + +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence; + + if (!f) + return NULL; + + fence = container_of(f, struct amdgpu_amdkfd_fence, base); + if (fence && f->ops == &amdkfd_fence_ops) + return fence; + + return NULL; +} + +static const char *amdkfd_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_amdkfd_fence"; +} + +static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + return fence->timeline_name; +} + +/** + * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict + * a KFD BO and schedules a job to move the BO. + * If fence is already signaled return true. + * If fence is not signaled schedule a evict KFD process work item. + */ +static bool amdkfd_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + + if (dma_fence_is_signaled(f)) + return true; + + if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) + return true; + + return false; +} + +/** + * amdkfd_fence_release - callback that fence can be freed + * + * @fence: fence + * + * This function is called when the reference count becomes zero. + * Drops the mm_struct reference and RCU schedules freeing up the fence. + */ +static void amdkfd_fence_release(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + /* Unconditionally signal the fence. The process is getting + * terminated. + */ + if (WARN_ON(!fence)) + return; /* Not an amdgpu_amdkfd_fence */ + + mmdrop(fence->mm); + kfree_rcu(f, rcu); +} + +/** + * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f + * if same return TRUE else return FALSE. + * + * @f: [IN] fence + * @mm: [IN] mm that needs to be verified + */ +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + else if (fence->mm == mm) + return true; + + return false; +} + +static const struct dma_fence_ops amdkfd_fence_ops = { + .get_driver_name = amdkfd_fence_get_driver_name, + .get_timeline_name = amdkfd_fence_get_timeline_name, + .enable_signaling = amdkfd_fence_enable_signaling, + .signaled = NULL, + .wait = dma_fence_default_wait, + .release = amdkfd_fence_release, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index a9e6aea0e5f8..7485c376b90e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -196,12 +199,25 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -787,14 +803,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -812,8 +821,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) @@ -866,3 +873,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid\n"); + return 0; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index b127259d7d85..7be453494423 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t queue_id); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int utimeout); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static int kgd_address_watch_disable(struct kgd_dev *kgd); static int kgd_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, @@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -157,12 +159,25 @@ static const struct kfd2kgd_calls kfd2kgd = { get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -704,14 +719,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; -} - -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static int kgd_address_watch_disable(struct kgd_dev *kgd) @@ -775,8 +783,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) @@ -828,3 +834,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) /* Only 12 bit in use*/ return hdr->common.ucode_version; } + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return -EINVAL; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c new file mode 100644 index 000000000000..a12a1654e124 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -0,0 +1,1506 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/list.h> +#include <drm/drmP.h> +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_amdkfd.h" + +/* Special VM and GART address alignment needed for VI pre-Fiji due to + * a HW bug. + */ +#define VI_BO_SIZE_ALIGN (0x8000) + +/* Impose limit on how much memory KFD can use */ +static struct { + uint64_t max_system_mem_limit; + int64_t system_mem_used; + spinlock_t mem_limit_lock; +} kfd_mem_limit; + +/* Struct used for amdgpu_amdkfd_bo_validate */ +struct amdgpu_vm_parser { + uint32_t domain; + bool wait; +}; + +static const char * const domain_bit_to_string[] = { + "CPU", + "GTT", + "VRAM", + "GDS", + "GWS", + "OA" +}; + +#define domain_string(domain) domain_bit_to_string[ffs(domain)-1] + + + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, + struct kgd_mem *mem) +{ + struct kfd_bo_va_list *entry; + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) + if (entry->bo_va->base.vm == avm) + return false; + + return true; +} + +/* Set memory usage limits. Current, limits are + * System (kernel) memory - 3/8th System RAM + */ +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ + struct sysinfo si; + uint64_t mem; + + si_meminfo(&si); + mem = si.totalram - si.totalhigh; + mem *= si.mem_unit; + + spin_lock_init(&kfd_mem_limit.mem_limit_lock); + kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20)); +} + +static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + int ret = 0; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) { + if (kfd_mem_limit.system_mem_used + (acc_size + size) > + kfd_mem_limit.max_system_mem_limit) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += (acc_size + size); + } +err_no_mem: + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return ret; +} + +static void unreserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) + kfd_mem_limit.system_mem_used -= (acc_size + size); + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +{ + spin_lock(&kfd_mem_limit.mem_limit_lock); + + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + kfd_mem_limit.system_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + } + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + + +/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's + * reservation object. + * + * @bo: [IN] Remove eviction fence(s) from this BO + * @ef: [IN] If ef is specified, then this eviction fence is removed if it + * is present in the shared list. + * @ef_list: [OUT] Returns list of eviction fences. These fences are removed + * from BO's reservation object shared list. + * @ef_count: [OUT] Number of fences in ef_list. + * + * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be + * called to restore the eviction fences and to avoid memory leak. This is + * useful for shared BOs. + * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. + */ +static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence *ef, + struct amdgpu_amdkfd_fence ***ef_list, + unsigned int *ef_count) +{ + struct reservation_object_list *fobj; + struct reservation_object *resv; + unsigned int i = 0, j = 0, k = 0, shared_count; + unsigned int count = 0; + struct amdgpu_amdkfd_fence **fence_list; + + if (!ef && !ef_list) + return -EINVAL; + + if (ef_list) { + *ef_list = NULL; + *ef_count = 0; + } + + resv = bo->tbo.resv; + fobj = reservation_object_get_list(resv); + + if (!fobj) + return 0; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + /* Go through all the shared fences in the resevation object. If + * ef is specified and it exists in the list, remove it and reduce the + * count. If ef is not specified, then get the count of eviction fences + * present. + */ + shared_count = fobj->shared_count; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + if (ef) { + if (f->context == ef->base.context) { + dma_fence_put(f); + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } else if (to_amdgpu_amdkfd_fence(f)) + count++; + } + write_seqcount_end(&resv->seq); + preempt_enable(); + + if (ef || !count) + return 0; + + /* Alloc memory for count number of eviction fence pointers. Fill the + * ef_list array and ef_count + */ + fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), + GFP_KERNEL); + if (!fence_list) + return -ENOMEM; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + j = 0; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + struct amdgpu_amdkfd_fence *efence; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + efence = to_amdgpu_amdkfd_fence(f); + if (efence) { + fence_list[k++] = efence; + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } + + write_seqcount_end(&resv->seq); + preempt_enable(); + + *ef_list = fence_list; + *ef_count = k; + + return 0; +} + +/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's + * reservation object. + * + * @bo: [IN] Add eviction fences to this BO + * @ef_list: [IN] List of eviction fences to be added + * @ef_count: [IN] Number of fences in ef_list. + * + * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this + * function. + */ +static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence **ef_list, + unsigned int ef_count) +{ + int i; + + if (!ef_list || !ef_count) + return; + + for (i = 0; i < ef_count; i++) { + amdgpu_bo_fence(bo, &ef_list[i]->base, true); + /* Re-adding the fence takes an additional reference. Drop that + * reference. + */ + dma_fence_put(&ef_list[i]->base); + } + + kfree(ef_list); +} + +static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, + bool wait) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), + "Called with userptr BO")) + return -EINVAL; + + amdgpu_ttm_placement_from_domain(bo, domain); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto validate_fail; + if (wait) { + struct amdgpu_amdkfd_fence **ef_list; + unsigned int ef_count; + + ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, + &ef_count); + if (ret) + goto validate_fail; + + ttm_bo_wait(&bo->tbo, false, false); + amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); + } + +validate_fail: + return ret; +} + +static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) +{ + struct amdgpu_vm_parser *p = param; + + return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); +} + +/* vm_validate_pt_pd_bos - Validate page table and directory BOs + * + * Page directories are not updated here because huge page handling + * during page table updates can invalidate page directory entries + * again. Page directories are only updated after updating page + * tables. + */ +static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) +{ + struct amdgpu_bo *pd = vm->base.root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + struct amdgpu_vm_parser param; + uint64_t addr, flags = AMDGPU_PTE_VALID; + int ret; + + param.domain = AMDGPU_GEM_DOMAIN_VRAM; + param.wait = false; + + ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, + ¶m); + if (ret) { + pr_err("amdgpu: failed to validate PT BOs\n"); + return ret; + } + + ret = amdgpu_amdkfd_validate(¶m, pd); + if (ret) { + pr_err("amdgpu: failed to validate PD\n"); + return ret; + } + + addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); + amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); + vm->pd_phys_addr = addr; + + if (vm->base.use_cpu_for_update) { + ret = amdgpu_bo_kmap(pd, NULL); + if (ret) { + pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); + return ret; + } + } + + return 0; +} + +static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + struct dma_fence *f) +{ + int ret = amdgpu_sync_fence(adev, sync, f, false); + + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. + */ + if (sync->last_vm_update) { + dma_fence_put(sync->last_vm_update); + sync->last_vm_update = NULL; + } + + return ret; +} + +static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) +{ + struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + int ret; + + ret = amdgpu_vm_update_directories(adev, vm); + if (ret) + return ret; + + return sync_vm_fence(adev, sync, vm->last_update); +} + +/* add_bo_to_vm - Add a BO to a VM + * + * Everything that needs to bo done only once when a BO is first added + * to a VM. It can later be mapped and unmapped many times without + * repeating these steps. + * + * 1. Allocate and initialize BO VA entry data structure + * 2. Add BO to the VM + * 3. Determine ASIC-specific PTE flags + * 4. Alloc page tables and directories if needed + * 4a. Validate new page tables and directories + */ +static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_vm *avm, bool is_aql, + struct kfd_bo_va_list **p_bo_va_entry) +{ + int ret; + struct kfd_bo_va_list *bo_va_entry; + struct amdkfd_vm *kvm = container_of(avm, + struct amdkfd_vm, base); + struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_bo *bo = mem->bo; + uint64_t va = mem->va; + struct list_head *list_bo_va = &mem->bo_va_list; + unsigned long bo_size = bo->tbo.mem.size; + + if (!va) { + pr_err("Invalid VA when adding BO to VM\n"); + return -EINVAL; + } + + if (is_aql) + va += bo_size; + + bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); + if (!bo_va_entry) + return -ENOMEM; + + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, + va + bo_size, avm); + + /* Add BO to VM internal data structures*/ + bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); + if (!bo_va_entry->bo_va) { + ret = -EINVAL; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); + goto err_vmadd; + } + + bo_va_entry->va = va; + bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, + mem->mapping_flags); + bo_va_entry->kgd_dev = (void *)adev; + list_add(&bo_va_entry->bo_list, list_bo_va); + + if (p_bo_va_entry) + *p_bo_va_entry = bo_va_entry; + + /* Allocate new page tables if needed and validate + * them. Clearing of new page tables and validate need to wait + * on move fences. We don't want that to trigger the eviction + * fence, so remove it temporarily. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + + ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); + if (ret) { + pr_err("Failed to allocate pts, err=%d\n", ret); + goto err_alloc_pts; + } + + ret = vm_validate_pt_pd_bos(kvm); + if (ret) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; + } + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + return 0; + +err_alloc_pts: + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); + list_del(&bo_va_entry->bo_list); +err_vmadd: + kfree(bo_va_entry); + return ret; +} + +static void remove_bo_from_vm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, unsigned long size) +{ + pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", + entry->va, + entry->va + size, entry); + amdgpu_vm_bo_rmv(adev, entry->bo_va); + list_del(&entry->bo_list); + kfree(entry); +} + +static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *entry = &mem->validate_list; + struct amdgpu_bo *bo = mem->bo; + + INIT_LIST_HEAD(&entry->head); + entry->shared = true; + entry->bo = &bo->tbo; + mutex_lock(&process_info->lock); + list_add_tail(&entry->head, &process_info->kfd_bo_list); + mutex_unlock(&process_info->lock); +} + +/* Reserving a BO and its page table BOs must happen atomically to + * avoid deadlocks. Some operations update multiple VMs at once. Track + * all the reservation info in a context structure. Optionally a sync + * object can track VM updates. + */ +struct bo_vm_reservation_context { + struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ + unsigned int n_vms; /* Number of VMs reserved */ + struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ + struct ww_acquire_ctx ticket; /* Reservation ticket */ + struct list_head list, duplicates; /* BO lists */ + struct amdgpu_sync *sync; /* Pointer to sync object */ + bool reserved; /* Whether BOs are reserved */ +}; + +enum bo_vm_match { + BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ + BO_VM_MAPPED, /* Match VMs where a BO is mapped */ + BO_VM_ALL, /* Match all VMs a BO was added to */ +}; + +/** + * reserve_bo_and_vm - reserve a BO and a VM unconditionally. + * @mem: KFD BO structure. + * @vm: the VM to reserve. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + */ +static int reserve_bo_and_vm(struct kgd_mem *mem, + struct amdgpu_vm *vm, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + int ret; + + WARN_ON(!vm); + + ctx->reserved = false; + ctx->n_vms = 1; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else { + pr_err("Failed to reserve buffers in ttm\n"); + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally + * @mem: KFD BO structure. + * @vm: the VM to reserve. If NULL, then all VMs associated with the BO + * is used. Otherwise, a single VM associated with the BO. + * @map_type: the mapping status that will be used to filter the VMs. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + * + * Returns 0 for success, negative for failure. + */ +static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + struct amdgpu_vm *vm, enum bo_vm_match map_type, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + struct kfd_bo_va_list *entry; + unsigned int i; + int ret; + + ctx->reserved = false; + ctx->n_vms = 0; + ctx->vm_pd = NULL; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + ctx->n_vms++; + } + + if (ctx->n_vms != 0) { + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), + GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + } + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + i = 0; + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, + &ctx->vm_pd[i]); + i++; + } + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else + pr_err("Failed to reserve buffers in ttm.\n"); + + if (ret) { + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context + * @ctx: Reservation context to unreserve + * @wait: Optionally wait for a sync object representing pending VM updates + * @intr: Whether the wait is interruptible + * + * Also frees any resources allocated in + * reserve_bo_and_(cond_)vm(s). Returns the status from + * amdgpu_sync_wait. + */ +static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, + bool wait, bool intr) +{ + int ret = 0; + + if (wait) + ret = amdgpu_sync_wait(ctx->sync, intr); + + if (ctx->reserved) + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); + kfree(ctx->vm_pd); + + ctx->sync = NULL; + + ctx->reserved = false; + ctx->vm_pd = NULL; + + return ret; +} + +static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + struct amdgpu_bo_va *bo_va = entry->bo_va; + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); + struct amdgpu_bo *pd = vm->root.base.bo; + + /* Remove eviction fence from PD (and thereby from PTs too as + * they share the resv. object). Otherwise during PT update + * job (see amdgpu_vm_bo_update_mapping), eviction fence would + * get added to job->sync object and job execution would + * trigger the eviction fence. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + + amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + sync_vm_fence(adev, sync, bo_va->last_pt_update); + + return 0; +} + +static int update_gpuvm_pte(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + int ret; + struct amdgpu_vm *vm; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; + + bo_va = entry->bo_va; + vm = bo_va->base.vm; + bo = bo_va->base.bo; + + /* Update the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) { + pr_err("amdgpu_vm_bo_update failed\n"); + return ret; + } + + return sync_vm_fence(adev, sync, bo_va->last_pt_update); +} + +static int map_bo_to_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) +{ + int ret; + + /* Set virtual address for the allocation */ + ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, + amdgpu_bo_size(entry->bo_va->base.bo), + entry->pte_flags); + if (ret) { + pr_err("Failed to map VA 0x%llx in vm. ret %d\n", + entry->va, ret); + return ret; + } + + ret = update_gpuvm_pte(adev, entry, sync); + if (ret) { + pr_err("update_gpuvm_pte() failed\n"); + goto update_gpuvm_pte_failed; + } + + return 0; + +update_gpuvm_pte_failed: + unmap_bo_from_gpuvm(adev, entry, sync); + return ret; +} + +static int process_validate_vms(struct amdkfd_process_info *process_info) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_validate_pt_pd_bos(peer_vm); + if (ret) + return ret; + } + + return 0; +} + +static int process_update_pds(struct amdkfd_process_info *process_info, + struct amdgpu_sync *sync) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_update_pds(&peer_vm->base, sync); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef) +{ + int ret; + struct amdkfd_vm *new_vm; + struct amdkfd_process_info *info; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); + if (!new_vm) + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ + ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); + if (ret) { + pr_err("Failed init vm ret %d\n", ret); + goto vm_init_fail; + } + new_vm->adev = adev; + + if (!*process_info) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto alloc_process_info_fail; + } + + mutex_init(&info->lock); + INIT_LIST_HEAD(&info->vm_list_head); + INIT_LIST_HEAD(&info->kfd_bo_list); + + info->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + current->mm); + if (!info->eviction_fence) { + pr_err("Failed to create eviction fence\n"); + goto create_evict_fence_fail; + } + + *process_info = info; + *ef = dma_fence_get(&info->eviction_fence->base); + } + + new_vm->process_info = *process_info; + + mutex_lock(&new_vm->process_info->lock); + list_add_tail(&new_vm->vm_list_node, + &(new_vm->process_info->vm_list_head)); + new_vm->process_info->n_vms++; + mutex_unlock(&new_vm->process_info->lock); + + *vm = (void *) new_vm; + + pr_debug("Created process vm %p\n", *vm); + + return ret; + +create_evict_fence_fail: + mutex_destroy(&info->lock); + kfree(info); +alloc_process_info_fail: + amdgpu_vm_fini(adev, &new_vm->base); +vm_init_fail: + kfree(new_vm); + return ret; + +} + +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; + struct amdgpu_vm *avm = &kfd_vm->base; + struct amdgpu_bo *pd; + struct amdkfd_process_info *process_info; + + if (WARN_ON(!kgd || !vm)) + return; + + pr_debug("Destroying process vm %p\n", vm); + /* Release eviction fence from PD */ + pd = avm->root.base.bo; + amdgpu_bo_reserve(pd, false); + amdgpu_bo_fence(pd, NULL, false); + amdgpu_bo_unreserve(pd); + + process_info = kfd_vm->process_info; + + mutex_lock(&process_info->lock); + process_info->n_vms--; + list_del(&kfd_vm->vm_list_node); + mutex_unlock(&process_info->lock); + + /* Release per-process resources */ + if (!process_info->n_vms) { + WARN_ON(!list_empty(&process_info->kfd_bo_list)); + + dma_fence_put(&process_info->eviction_fence->base); + mutex_destroy(&process_info->lock); + kfree(process_info); + } + + /* Release the VM context */ + amdgpu_vm_fini(adev, avm); + kfree(vm); +} + +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +{ + struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; + + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; +} + +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + struct amdgpu_bo *bo; + int byte_align; + u32 alloc_domain; + u64 alloc_flags; + uint32_t mapping_flags; + int ret; + + /* + * Check on which domain to allocate BO + */ + if (flags & ALLOC_MEM_FLAGS_VRAM) { + alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : + AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + } else if (flags & ALLOC_MEM_FLAGS_GTT) { + alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else { + return -EINVAL; + } + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); + + /* Workaround for AQL queue wraparound bug. Map the same + * memory twice. That means we only actually allocate half + * the memory. + */ + if ((*mem)->aql_queue) + size = size >> 1; + + /* Workaround for TLB bug on older VI chips */ + byte_align = (adev->family == AMDGPU_FAMILY_VI && + adev->asic_type != CHIP_FIJI && + adev->asic_type != CHIP_POLARIS10 && + adev->asic_type != CHIP_POLARIS11) ? + VI_BO_SIZE_ALIGN : 1; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (flags & ALLOC_MEM_FLAGS_COHERENT) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + (*mem)->mapping_flags = mapping_flags; + + amdgpu_sync_create(&(*mem)->sync); + + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + if (ret) { + pr_debug("Insufficient system memory\n"); + goto err_reserve_system_mem; + } + + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", + va, size, domain_string(alloc_domain)); + + ret = amdgpu_bo_create(adev, size, byte_align, + alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo); + if (ret) { + pr_debug("Failed to create BO on domain %s. ret %d\n", + domain_string(alloc_domain), ret); + goto err_bo_create; + } + bo->kfd_bo = *mem; + (*mem)->bo = bo; + + (*mem)->va = va; + (*mem)->domain = alloc_domain; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = kfd_vm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); + + if (offset) + *offset = amdgpu_bo_mmap_offset(bo); + + return 0; + +err_bo_create: + unreserve_system_mem_limit(adev, size, alloc_domain); +err_reserve_system_mem: + mutex_destroy(&(*mem)->lock); + kfree(*mem); + return ret; +} + +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem) +{ + struct amdkfd_process_info *process_info = mem->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry, *tmp; + struct bo_vm_reservation_context ctx; + struct ttm_validate_buffer *bo_list_entry; + int ret; + + mutex_lock(&mem->lock); + + if (mem->mapped_to_gpu_memory > 0) { + pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", + mem->va, bo_size); + mutex_unlock(&mem->lock); + return -EBUSY; + } + + mutex_unlock(&mem->lock); + /* lock is not needed after this, since mem is unused and will + * be freed anyway + */ + + /* Make sure restore workers don't access the BO any more */ + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); + + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); + if (unlikely(ret)) + return ret; + + /* The eviction fence should be removed by the last unmap. + * TODO: Log an error condition if the bo still has the eviction fence + * attached + */ + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, + mem->va + bo_size * (1 + mem->aql_queue)); + + /* Remove from VM internal data structures */ + list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) + remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, + entry, bo_size); + + ret = unreserve_bo_and_vms(&ctx, false, false); + + /* Free the sync object */ + amdgpu_sync_free(&mem->sync); + + /* Free the BO*/ + amdgpu_bo_unref(&mem->bo); + mutex_destroy(&mem->lock); + kfree(mem); + + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + int ret; + struct amdgpu_bo *bo; + uint32_t domain; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + struct kfd_bo_va_list *bo_va_entry = NULL; + struct kfd_bo_va_list *bo_va_entry_aql = NULL; + unsigned long bo_size; + + /* Make sure restore is not running concurrently. + */ + mutex_lock(&mem->process_info->lock); + + mutex_lock(&mem->lock); + + bo = mem->bo; + + if (!bo) { + pr_err("Invalid BO when mapping memory to GPU\n"); + ret = -EINVAL; + goto out; + } + + domain = mem->domain; + bo_size = bo->tbo.mem.size; + + pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm, domain_string(domain)); + + ret = reserve_bo_and_vm(mem, vm, &ctx); + if (unlikely(ret)) + goto out; + + if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, + &bo_va_entry); + if (ret) + goto add_bo_to_vm_failed; + if (mem->aql_queue) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, + true, &bo_va_entry_aql); + if (ret) + goto add_bo_to_vm_failed_aql; + } + } else { + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto add_bo_to_vm_failed; + } + + if (mem->mapped_to_gpu_memory == 0) { + /* Validate BO only once. The eviction fence gets added to BO + * the first time it is mapped. Validate will wait for all + * background evictions to complete. + */ + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) { + pr_debug("Validate failed\n"); + goto map_bo_to_gpuvm_failed; + } + } + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && !entry->is_mapped) { + pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", + entry->va, entry->va + bo_size, + entry); + + ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + if (ret) { + pr_err("Failed to map radeon bo to gpuvm\n"); + goto map_bo_to_gpuvm_failed; + } + + ret = vm_update_pds(vm, ctx.sync); + if (ret) { + pr_err("Failed to update page directories\n"); + goto map_bo_to_gpuvm_failed; + } + + entry->is_mapped = true; + mem->mapped_to_gpu_memory++; + pr_debug("\t INC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) + amdgpu_bo_fence(bo, + &kfd_vm->process_info->eviction_fence->base, + true); + ret = unreserve_bo_and_vms(&ctx, false, false); + + goto out; + +map_bo_to_gpuvm_failed: + if (bo_va_entry_aql) + remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); +add_bo_to_vm_failed_aql: + if (bo_va_entry) + remove_bo_from_vm(adev, bo_va_entry, bo_size); +add_bo_to_vm_failed: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->process_info->lock); + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_process_info *process_info = + ((struct amdkfd_vm *)vm)->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + int ret; + + mutex_lock(&mem->lock); + + ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); + if (unlikely(ret)) + goto out; + /* If no VMs were reserved, it means the BO wasn't actually mapped */ + if (ctx.n_vms == 0) { + ret = -EINVAL; + goto unreserve_out; + } + + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto unreserve_out; + + pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && entry->is_mapped) { + pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", + entry->va, + entry->va + bo_size, + entry); + + ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); + if (ret == 0) { + entry->is_mapped = false; + } else { + pr_err("failed to unmap VA 0x%llx\n", + mem->va); + goto unreserve_out; + } + + mem->mapped_to_gpu_memory--; + pr_debug("\t DEC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + /* If BO is unmapped from all VMs, unfence it. It can be evicted if + * required. + */ + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + +unreserve_out: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) +{ + struct amdgpu_sync sync; + int ret; + + amdgpu_sync_create(&sync); + + mutex_lock(&mem->lock); + amdgpu_sync_clone(&mem->sync, &sync); + mutex_unlock(&mem->lock); + + ret = amdgpu_sync_wait(&sync, intr); + amdgpu_sync_free(&sync); + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size) +{ + int ret; + struct amdgpu_bo *bo = mem->bo; + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + pr_err("userptr can't be mapped to kernel\n"); + return -EINVAL; + } + + /* delete kgd_mem from kfd_bo_list to avoid re-validating + * this BO in BO's restoring after eviction. + */ + mutex_lock(&mem->process_info->lock); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto bo_reserve_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto pin_failed; + } + + ret = amdgpu_bo_kmap(bo, kptr); + if (ret) { + pr_err("Failed to map bo to kernel. ret %d\n", ret); + goto kmap_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, mem->process_info->eviction_fence, NULL, NULL); + list_del_init(&mem->validate_list.head); + + if (size) + *size = amdgpu_bo_size(bo); + + amdgpu_bo_unreserve(bo); + + mutex_unlock(&mem->process_info->lock); + return 0; + +kmap_failed: + amdgpu_bo_unpin(bo); +pin_failed: + amdgpu_bo_unreserve(bo); +bo_reserve_failed: + mutex_unlock(&mem->process_info->lock); + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given + * KFD process identified by process_info + * + * @process_info: amdkfd_process_info of the KFD process + * + * After memory eviction, restore thread calls this function. The function + * should be called when the Process is still valid. BO restore involves - + * + * 1. Release old eviction fence and create new one + * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. + * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of + * BOs that need to be reserved. + * 4. Reserve all the BOs + * 5. Validate of PD and PT BOs. + * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence + * 7. Add fence to all PD and PT BOs. + * 8. Unreserve all BOs + */ +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +{ + struct amdgpu_bo_list_entry *pd_bo_list; + struct amdkfd_process_info *process_info = info; + struct amdkfd_vm *peer_vm; + struct kgd_mem *mem; + struct bo_vm_reservation_context ctx; + struct amdgpu_amdkfd_fence *new_fence; + int ret = 0, i; + struct list_head duplicate_save; + struct amdgpu_sync sync_obj; + + INIT_LIST_HEAD(&duplicate_save); + INIT_LIST_HEAD(&ctx.list); + INIT_LIST_HEAD(&ctx.duplicates); + + pd_bo_list = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list) + return -ENOMEM; + + i = 0; + mutex_lock(&process_info->lock); + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, + &pd_bo_list[i++]); + + /* Reserve all BOs and page tables/directory. Add all BOs from + * kfd_bo_list to ctx.list + */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + list_add_tail(&mem->resv_list.head, &ctx.list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, + false, &duplicate_save); + if (ret) { + pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); + goto ttm_reserve_fail; + } + + amdgpu_sync_create(&sync_obj); + + /* Validate PDs and PTs */ + ret = process_validate_vms(process_info); + if (ret) + goto validate_map_fail; + + /* Wait for PD/PTs validate to finish */ + /* FIXME: I think this isn't needed */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + ttm_bo_wait(&bo->tbo, false, false); + } + + /* Validate BOs and map them to GPUVM (update VM page tables). */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + struct amdgpu_bo *bo = mem->bo; + uint32_t domain = mem->domain; + struct kfd_bo_va_list *bo_va_entry; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); + if (ret) { + pr_debug("Memory eviction: Validate BOs failed. Try again\n"); + goto validate_map_fail; + } + + list_for_each_entry(bo_va_entry, &mem->bo_va_list, + bo_list) { + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, + &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PTE failed. Try again\n"); + goto validate_map_fail; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PDs failed. Try again\n"); + goto validate_map_fail; + } + + amdgpu_sync_wait(&sync_obj, false); + + /* Release old eviction fence and create new one, because fence only + * goes from unsignaled to signaled, fence cannot be reused. + * Use context and mm from the old fence. + */ + new_fence = amdgpu_amdkfd_fence_create( + process_info->eviction_fence->base.context, + process_info->eviction_fence->mm); + if (!new_fence) { + pr_err("Failed to create eviction fence\n"); + ret = -ENOMEM; + goto validate_map_fail; + } + dma_fence_put(&process_info->eviction_fence->base); + process_info->eviction_fence = new_fence; + *ef = dma_fence_get(&new_fence->base); + + /* Wait for validate to finish and attach new eviction fence */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + ttm_bo_wait(&mem->bo->tbo, false, false); + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + amdgpu_bo_fence(mem->bo, + &process_info->eviction_fence->base, true); + + /* Attach eviction fence to PD / PT BOs */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + } + +validate_map_fail: + ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); + amdgpu_sync_free(&sync_obj); +ttm_reserve_fail: + mutex_unlock(&process_info->lock); + kfree(pd_bo_list); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index ff8efd0f8fd5..a0f48cb9b8f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -114,6 +114,9 @@ union igp_info { struct atom_integrated_system_info_v1_11 v11; }; +union umc_info { + struct atom_umc_info_v3_1 v31; +}; /* * Return vram width from integrated system info table, if available, * or 0 if not. @@ -143,6 +146,94 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev) return 0; } +static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, + int atom_mem_type) +{ + int vram_type; + + if (adev->flags & AMD_IS_APU) { + switch (atom_mem_type) { + case Ddr2MemType: + case LpDdr2MemType: + vram_type = AMDGPU_VRAM_TYPE_DDR2; + break; + case Ddr3MemType: + case LpDdr3MemType: + vram_type = AMDGPU_VRAM_TYPE_DDR3; + break; + case Ddr4MemType: + case LpDdr4MemType: + vram_type = AMDGPU_VRAM_TYPE_DDR4; + break; + default: + vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + break; + } + } else { + switch (atom_mem_type) { + case ATOM_DGPU_VRAM_TYPE_GDDR5: + vram_type = AMDGPU_VRAM_TYPE_GDDR5; + break; + case ATOM_DGPU_VRAM_TYPE_HBM: + vram_type = AMDGPU_VRAM_TYPE_HBM; + break; + default: + vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; + break; + } + } + + return vram_type; +} +/* + * Return vram type from either integrated system info table + * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not + */ +int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union igp_info *igp_info; + union umc_info *umc_info; + u8 frev, crev; + u8 mem_type; + + if (adev->flags & AMD_IS_APU) + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + integratedsysteminfo); + else + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + umc_info); + if (amdgpu_atom_parse_data_header(mode_info->atom_context, + index, &size, + &frev, &crev, &data_offset)) { + if (adev->flags & AMD_IS_APU) { + igp_info = (union igp_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 11: + mem_type = igp_info->v11.memorytype; + return convert_atom_mem_type_to_vram_type(adev, mem_type); + default: + return 0; + } + } else { + umc_info = (union umc_info *) + (mode_info->atom_context->bios + data_offset); + switch (crev) { + case 1: + mem_type = umc_info->v31.vram_type; + return convert_atom_mem_type_to_vram_type(adev, mem_type); + default: + return 0; + } + } + } + + return 0; +} + union firmware_info { struct atom_firmware_info_v3_1 v31; }; @@ -151,10 +242,6 @@ union smu_info { struct atom_smu_info_v3_1 v31; }; -union umc_info { - struct atom_umc_info_v3_1 v31; -}; - int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) { struct amdgpu_mode_info *mode_info = &adev->mode_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 288b97e54347..7689c961c4ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -28,6 +28,7 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev) void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev); int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev); +int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index e2c3c5ec42d1..c53095b3b0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -568,6 +568,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = { /* HG _PR3 doesn't seem to work on this A+A weston board */ { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 63ec1e1bb6aa..02b849be083b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -80,8 +80,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, int time; n = AMDGPU_BENCHMARK_ITERATIONS; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, - NULL, 0, &sobj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0, + ttm_bo_type_kernel, NULL, &sobj); if (r) { goto out_cleanup; } @@ -93,8 +93,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, if (r) { goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, - NULL, 0, &dobj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0, + ttm_bo_type_kernel, NULL, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 59089e027f4d..92be7f6de197 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -233,8 +233,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, for (i = 0; i < list->num_entries; i++) { unsigned priority = list->array[i].priority; - list_add_tail(&list->array[i].tv.head, - &bucket[priority]); + if (!list->array[i].robj->parent) + list_add_tail(&list->array[i].tv.head, + &bucket[priority]); + list->array[i].user_pages = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 4466f3535e2d..37098c68a645 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -24,7 +24,6 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/pci.h> -#include <linux/acpi.h> #include <drm/drmP.h> #include <linux/firmware.h> #include <drm/amdgpu_drm.h> @@ -42,152 +41,6 @@ struct amdgpu_cgs_device { struct amdgpu_device *adev = \ ((struct amdgpu_cgs_device *)cgs_device)->adev -static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device, - int (*call_back_func)(struct amd_pp_init *, void **)) -{ - CGS_FUNC_ADEV; - struct amd_pp_init pp_init; - struct amd_powerplay *amd_pp; - - if (call_back_func == NULL) - return NULL; - - amd_pp = &(adev->powerplay); - pp_init.chip_family = adev->family; - pp_init.chip_id = adev->asic_type; - pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false; - pp_init.feature_mask = amdgpu_pp_feature_mask; - pp_init.device = cgs_device; - if (call_back_func(&pp_init, &(amd_pp->pp_handle))) - return NULL; - - return adev->powerplay.pp_handle; -} - -static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, - enum cgs_gpu_mem_type type, - uint64_t size, uint64_t align, - cgs_handle_t *handle) -{ - CGS_FUNC_ADEV; - uint16_t flags = 0; - int ret = 0; - uint32_t domain = 0; - struct amdgpu_bo *obj; - - /* fail if the alignment is not a power of 2 */ - if (((align != 1) && (align & (align - 1))) - || size == 0 || align == 0) - return -EINVAL; - - - switch(type) { - case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__VISIBLE_FB: - flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - domain = AMDGPU_GEM_DOMAIN_VRAM; - break; - case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB: - case CGS_GPU_MEM_TYPE__INVISIBLE_FB: - flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - domain = AMDGPU_GEM_DOMAIN_VRAM; - break; - case CGS_GPU_MEM_TYPE__GART_CACHEABLE: - domain = AMDGPU_GEM_DOMAIN_GTT; - break; - case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE: - flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - domain = AMDGPU_GEM_DOMAIN_GTT; - break; - default: - return -EINVAL; - } - - - *handle = 0; - - ret = amdgpu_bo_create(adev, size, align, true, domain, flags, - NULL, NULL, 0, &obj); - if (ret) { - DRM_ERROR("(%d) bo create failed\n", ret); - return ret; - } - *handle = (cgs_handle_t)obj; - - return ret; -} - -static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - - if (obj) { - int r = amdgpu_bo_reserve(obj, true); - if (likely(r == 0)) { - amdgpu_bo_kunmap(obj); - amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - } - amdgpu_bo_unref(&obj); - - } - return 0; -} - -static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, - uint64_t *mcaddr) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - - WARN_ON_ONCE(obj->placement.num_placement > 1); - - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_unpin(obj); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle, - void **map) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - r = amdgpu_bo_kmap(obj, map); - amdgpu_bo_unreserve(obj); - return r; -} - -static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle) -{ - int r; - struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; - r = amdgpu_bo_reserve(obj, true); - if (unlikely(r != 0)) - return r; - amdgpu_bo_kunmap(obj); - amdgpu_bo_unreserve(obj); - return r; -} static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset) { @@ -801,11 +654,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, else strcpy(fw_name, "amdgpu/vega10_smc.bin"); break; - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_RAVEN: - adev->pm.fw_version = info->version; - return 0; default: DRM_ERROR("SMC firmware not supported\n"); return -EINVAL; @@ -857,61 +705,6 @@ static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device) return amdgpu_sriov_vf(adev); } -static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device, - struct cgs_system_info *sys_info) -{ - CGS_FUNC_ADEV; - - if (NULL == sys_info) - return -ENODEV; - - if (sizeof(struct cgs_system_info) != sys_info->size) - return -ENODEV; - - switch (sys_info->info_id) { - case CGS_SYSTEM_INFO_ADAPTER_BDF_ID: - sys_info->value = adev->pdev->devfn | (adev->pdev->bus->number << 8); - break; - case CGS_SYSTEM_INFO_PCIE_GEN_INFO: - sys_info->value = adev->pm.pcie_gen_mask; - break; - case CGS_SYSTEM_INFO_PCIE_MLW: - sys_info->value = adev->pm.pcie_mlw_mask; - break; - case CGS_SYSTEM_INFO_PCIE_DEV: - sys_info->value = adev->pdev->device; - break; - case CGS_SYSTEM_INFO_PCIE_REV: - sys_info->value = adev->pdev->revision; - break; - case CGS_SYSTEM_INFO_CG_FLAGS: - sys_info->value = adev->cg_flags; - break; - case CGS_SYSTEM_INFO_PG_FLAGS: - sys_info->value = adev->pg_flags; - break; - case CGS_SYSTEM_INFO_GFX_CU_INFO: - sys_info->value = adev->gfx.cu_info.number; - break; - case CGS_SYSTEM_INFO_GFX_SE_INFO: - sys_info->value = adev->gfx.config.max_shader_engines; - break; - case CGS_SYSTEM_INFO_PCIE_SUB_SYS_ID: - sys_info->value = adev->pdev->subsystem_device; - break; - case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID: - sys_info->value = adev->pdev->subsystem_vendor; - break; - case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN: - sys_info->value = adev->pdev->devfn; - break; - default: - return -ENODEV; - } - - return 0; -} - static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, struct cgs_display_info *info) { @@ -953,6 +746,11 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device, (amdgpu_crtc->v_border * 2); mode_info->vblank_time_us = vblank_lines * line_time_us; mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode); + /* we have issues with mclk switching with refresh rates + * over 120 hz on the non-DC code. + */ + if (mode_info->refresh_rate > 120) + mode_info->vblank_time_us = 0; mode_info = NULL; } } @@ -977,223 +775,7 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena return 0; } -/** \brief evaluate acpi namespace object, handle or pathname must be valid - * \param cgs_device - * \param info input/output arguments for the control method - * \return status - */ - -#if defined(CONFIG_ACPI) -static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, - struct cgs_acpi_method_info *info) -{ - CGS_FUNC_ADEV; - acpi_handle handle; - struct acpi_object_list input; - struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; - union acpi_object *params, *obj; - uint8_t name[5] = {'\0'}; - struct cgs_acpi_method_argument *argument; - uint32_t i, count; - acpi_status status; - int result; - - handle = ACPI_HANDLE(&adev->pdev->dev); - if (!handle) - return -ENODEV; - - memset(&input, 0, sizeof(struct acpi_object_list)); - - /* validate input info */ - if (info->size != sizeof(struct cgs_acpi_method_info)) - return -EINVAL; - - input.count = info->input_count; - if (info->input_count > 0) { - if (info->pinput_argument == NULL) - return -EINVAL; - argument = info->pinput_argument; - for (i = 0; i < info->input_count; i++) { - if (((argument->type == ACPI_TYPE_STRING) || - (argument->type == ACPI_TYPE_BUFFER)) && - (argument->pointer == NULL)) - return -EINVAL; - argument++; - } - } - - if (info->output_count > 0) { - if (info->poutput_argument == NULL) - return -EINVAL; - argument = info->poutput_argument; - for (i = 0; i < info->output_count; i++) { - if (((argument->type == ACPI_TYPE_STRING) || - (argument->type == ACPI_TYPE_BUFFER)) - && (argument->pointer == NULL)) - return -EINVAL; - argument++; - } - } - - /* The path name passed to acpi_evaluate_object should be null terminated */ - if ((info->field & CGS_ACPI_FIELD_METHOD_NAME) != 0) { - strncpy(name, (char *)&(info->name), sizeof(uint32_t)); - name[4] = '\0'; - } - - /* parse input parameters */ - if (input.count > 0) { - input.pointer = params = - kzalloc(sizeof(union acpi_object) * input.count, GFP_KERNEL); - if (params == NULL) - return -EINVAL; - - argument = info->pinput_argument; - - for (i = 0; i < input.count; i++) { - params->type = argument->type; - switch (params->type) { - case ACPI_TYPE_INTEGER: - params->integer.value = argument->value; - break; - case ACPI_TYPE_STRING: - params->string.length = argument->data_length; - params->string.pointer = argument->pointer; - break; - case ACPI_TYPE_BUFFER: - params->buffer.length = argument->data_length; - params->buffer.pointer = argument->pointer; - break; - default: - break; - } - params++; - argument++; - } - } - - /* parse output info */ - count = info->output_count; - argument = info->poutput_argument; - - /* evaluate the acpi method */ - status = acpi_evaluate_object(handle, name, &input, &output); - - if (ACPI_FAILURE(status)) { - result = -EIO; - goto free_input; - } - - /* return the output info */ - obj = output.pointer; - - if (count > 1) { - if ((obj->type != ACPI_TYPE_PACKAGE) || - (obj->package.count != count)) { - result = -EIO; - goto free_obj; - } - params = obj->package.elements; - } else - params = obj; - - if (params == NULL) { - result = -EIO; - goto free_obj; - } - - for (i = 0; i < count; i++) { - if (argument->type != params->type) { - result = -EIO; - goto free_obj; - } - switch (params->type) { - case ACPI_TYPE_INTEGER: - argument->value = params->integer.value; - break; - case ACPI_TYPE_STRING: - if ((params->string.length != argument->data_length) || - (params->string.pointer == NULL)) { - result = -EIO; - goto free_obj; - } - strncpy(argument->pointer, - params->string.pointer, - params->string.length); - break; - case ACPI_TYPE_BUFFER: - if (params->buffer.pointer == NULL) { - result = -EIO; - goto free_obj; - } - memcpy(argument->pointer, - params->buffer.pointer, - argument->data_length); - break; - default: - break; - } - argument++; - params++; - } - - result = 0; -free_obj: - kfree(obj); -free_input: - kfree((void *)input.pointer); - return result; -} -#else -static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device, - struct cgs_acpi_method_info *info) -{ - return -EIO; -} -#endif - -static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device, - uint32_t acpi_method, - uint32_t acpi_function, - void *pinput, void *poutput, - uint32_t output_count, - uint32_t input_size, - uint32_t output_size) -{ - struct cgs_acpi_method_argument acpi_input[2] = { {0}, {0} }; - struct cgs_acpi_method_argument acpi_output = {0}; - struct cgs_acpi_method_info info = {0}; - - acpi_input[0].type = CGS_ACPI_TYPE_INTEGER; - acpi_input[0].data_length = sizeof(uint32_t); - acpi_input[0].value = acpi_function; - - acpi_input[1].type = CGS_ACPI_TYPE_BUFFER; - acpi_input[1].data_length = input_size; - acpi_input[1].pointer = pinput; - - acpi_output.type = CGS_ACPI_TYPE_BUFFER; - acpi_output.data_length = output_size; - acpi_output.pointer = poutput; - - info.size = sizeof(struct cgs_acpi_method_info); - info.field = CGS_ACPI_FIELD_METHOD_NAME | CGS_ACPI_FIELD_INPUT_ARGUMENT_COUNT; - info.input_count = 2; - info.name = acpi_method; - info.pinput_argument = acpi_input; - info.output_count = output_count; - info.poutput_argument = &acpi_output; - - return amdgpu_cgs_acpi_eval_object(cgs_device, &info); -} - static const struct cgs_ops amdgpu_cgs_ops = { - .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem, - .free_gpu_mem = amdgpu_cgs_free_gpu_mem, - .gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem, - .gunmap_gpu_mem = amdgpu_cgs_gunmap_gpu_mem, - .kmap_gpu_mem = amdgpu_cgs_kmap_gpu_mem, - .kunmap_gpu_mem = amdgpu_cgs_kunmap_gpu_mem, .read_register = amdgpu_cgs_read_register, .write_register = amdgpu_cgs_write_register, .read_ind_register = amdgpu_cgs_read_ind_register, @@ -1208,12 +790,9 @@ static const struct cgs_ops amdgpu_cgs_ops = { .set_clockgating_state = amdgpu_cgs_set_clockgating_state, .get_active_displays_info = amdgpu_cgs_get_active_displays_info, .notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled, - .call_acpi_method = amdgpu_cgs_call_acpi_method, - .query_system_info = amdgpu_cgs_query_system_info, .is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled, .enter_safe_mode = amdgpu_cgs_enter_safe_mode, .lock_grbm_idx = amdgpu_cgs_lock_grbm_idx, - .register_pp_handle = amdgpu_cgs_register_pp_handle, }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 8ca3783f2deb..9da8d5802980 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) /* don't do anything if sink is not display port, i.e., * passive dp->(dvi|hdmi) adaptor */ - if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) { - int saved_dpms = connector->dpms; - /* Only turn off the display if it's physically disconnected */ - if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); - } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { - /* Don't try to start link training before we - * have the dpcd */ - if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) - return; - - /* set it to OFF so that drm_helper_connector_dpms() - * won't return immediately since the current state - * is ON at this point. - */ - connector->dpms = DRM_MODE_DPMS_OFF; - drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); - } - connector->dpms = saved_dpms; + if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT && + amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) && + amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { + /* Don't start link training before we have the DPCD */ + if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) + return; + + /* Turn the connector off and back on immediately, which + * will trigger link training + */ + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } } } @@ -877,7 +870,7 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force) ret = connector_status_disconnected; if (amdgpu_connector->ddc_bus) - dret = amdgpu_ddc_probe(amdgpu_connector, false); + dret = amdgpu_display_ddc_probe(amdgpu_connector, false); if (dret) { amdgpu_connector->detected_by_load = false; amdgpu_connector_free_edid(connector); @@ -998,7 +991,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) } if (amdgpu_connector->ddc_bus) - dret = amdgpu_ddc_probe(amdgpu_connector, false); + dret = amdgpu_display_ddc_probe(amdgpu_connector, false); if (dret) { amdgpu_connector->detected_by_load = false; amdgpu_connector_free_edid(connector); @@ -1401,7 +1394,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) /* setup ddc on the bridge */ amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder); /* bridge chips are always aux */ - if (amdgpu_ddc_probe(amdgpu_connector, true)) /* try DDC */ + /* try DDC */ + if (amdgpu_display_ddc_probe(amdgpu_connector, true)) ret = connector_status_connected; else if (amdgpu_connector->dac_load_detect) { /* try load detection */ const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; @@ -1421,7 +1415,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) ret = connector_status_connected; } else { /* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */ - if (amdgpu_ddc_probe(amdgpu_connector, false)) + if (amdgpu_display_ddc_probe(amdgpu_connector, + false)) ret = connector_status_connected; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e80fc38141b5..dc34b50e6b29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -257,7 +257,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, return; } - total_vram = adev->mc.real_vram_size - adev->vram_pin_size; + total_vram = adev->gmc.real_vram_size - adev->vram_pin_size; used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; @@ -302,8 +302,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); /* Do the same for visible VRAM if half of it is free */ - if (adev->mc.visible_vram_size < adev->mc.real_vram_size) { - u64 total_vis_vram = adev->mc.visible_vram_size; + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { + u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -346,8 +346,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .allow_reserved_eviction = false, - .resv = bo->tbo.resv + .resv = bo->tbo.resv, + .flags = 0 }; uint32_t domain; int r; @@ -359,7 +359,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * to move it. Don't move anything if the threshold is zero. */ if (p->bytes_moved < p->bytes_moved_threshold) { - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited * visible VRAM if we've depleted our allowance to do @@ -381,9 +381,9 @@ retry: r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -437,9 +437,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ amdgpu_ttm_placement_from_domain(bo, other); update_bytes_moved_vis = - adev->mc.visible_vram_size < adev->mc.real_vram_size && + adev->gmc.visible_vram_size < adev->gmc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT; + bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT; initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); bytes_moved = atomic64_read(&adev->num_bytes_moved) - @@ -542,7 +542,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf_entry.robj) + if (p->uf_entry.robj && !p->uf_entry.robj->parent) list_add(&p->uf_entry.tv.head, &p->validated); while (1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index ee76b468774a..369beb5041a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -767,10 +767,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data) return 0; } +static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT)); + return 0; +} + static const struct drm_info_list amdgpu_debugfs_list[] = { {"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump}, {"amdgpu_test_ib", &amdgpu_debugfs_test_ib}, - {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram} + {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}, + {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt}, }; int amdgpu_debugfs_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 00a50cc5ec9a..690cf77b950e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -87,6 +87,8 @@ static const char *amdgpu_asic_name[] = { "LAST", }; +static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); + bool amdgpu_device_is_px(struct drm_device *dev) { struct amdgpu_device *adev = dev->dev_private; @@ -121,6 +123,32 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, return ret; } +/* + * MMIO register read with bytes helper functions + * @offset:bytes offset from MMIO start + * +*/ + +uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) { + if (offset < adev->rmmio_size) + return (readb(adev->rmmio + offset)); + BUG(); +} + +/* + * MMIO register write with bytes helper functions + * @offset:bytes offset from MMIO start + * @value: the value want to be written to the register + * +*/ +void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) { + if (offset < adev->rmmio_size) + writeb(value, adev->rmmio + offset); + else + BUG(); +} + + void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags) { @@ -492,7 +520,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) memset(&adev->wb.used, 0, sizeof(adev->wb.used)); /* clear wb memory */ - memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t)); + memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); } return 0; @@ -530,8 +558,9 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { + wb >>= 3; if (wb < adev->wb.num_wb) - __clear_bit(wb >> 3, adev->wb.used); + __clear_bit(wb, adev->wb.used); } /** @@ -544,7 +573,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) * as parameter. */ void amdgpu_device_vram_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc, u64 base) + struct amdgpu_gmc *mc, u64 base) { uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; @@ -570,11 +599,11 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev, * FIXME: when reducing GTT size align new size on power of 2. */ void amdgpu_device_gart_location(struct amdgpu_device *adev, - struct amdgpu_mc *mc) + struct amdgpu_gmc *mc) { u64 size_af, size_bf; - size_af = adev->mc.mc_mask - mc->vram_end; + size_af = adev->gmc.mc_mask - mc->vram_end; size_bf = mc->vram_start; if (size_bf > size_af) { if (mc->gart_size > size_bf) { @@ -608,7 +637,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, */ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) { - u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size); + u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size); u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1; struct pci_bus *root; struct resource *res; @@ -829,6 +858,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev) dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n"); amdgpu_lockup_timeout = 10000; } + + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); } /** @@ -1036,7 +1067,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, if (!ip_block_version) return -EINVAL; - DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, + DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, ip_block_version->funcs->name); adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; @@ -1310,6 +1341,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) return r; } adev->ip_blocks[i].status.sw = true; + /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { r = amdgpu_device_vram_scratch_init(adev); @@ -1343,8 +1375,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.sw) continue; - /* gmc hw init is done early */ - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) + if (adev->ip_blocks[i].status.hw) continue; r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); if (r) { @@ -1378,12 +1409,16 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) { int i = 0, r; + if (amdgpu_emu_mode == 1) + return 0; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && - adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { + adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && + adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_GATE); @@ -1432,7 +1467,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC && + adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* ungate blocks before hw fini so that we can shutdown the blocks safely */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_UNGATE); @@ -1455,11 +1491,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.hw) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - amdgpu_free_static_csa(adev); - amdgpu_device_wb_fini(adev); - amdgpu_device_vram_scratch_fini(adev); - } if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { @@ -1483,9 +1514,19 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } + /* disable all interrupts */ + amdgpu_irq_disable_all(adev); + for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) continue; + + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { + amdgpu_free_static_csa(adev); + amdgpu_device_wb_fini(adev); + amdgpu_device_vram_scratch_fini(adev); + } + r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); /* XXX handle errors */ if (r) { @@ -1536,7 +1577,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.valid) continue; /* ungate blocks so that suspend can properly shut them down */ - if (i != AMD_IP_BLOCK_TYPE_SMC) { + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC && + adev->ip_blocks[i].version->funcs->set_clockgating_state) { r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, AMD_CG_STATE_UNGATE); if (r) { @@ -1582,6 +1624,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) r = block->version->funcs->hw_init(adev); DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); + if (r) + return r; } } @@ -1615,6 +1659,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) r = block->version->funcs->hw_init(adev); DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); + if (r) + return r; } } @@ -1701,6 +1747,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) case CHIP_BONAIRE: case CHIP_HAWAII: case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_POLARIS11: @@ -1711,9 +1759,6 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #if defined(CONFIG_DRM_AMD_DC_PRE_VEGA) return amdgpu_dc != 0; #endif - case CHIP_KABINI: - case CHIP_MULLINS: - return amdgpu_dc > 0; case CHIP_VEGA10: #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case CHIP_RAVEN: @@ -1768,14 +1813,16 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->flags = flags; adev->asic_type = flags & AMD_ASIC_MASK; adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; - adev->mc.gart_size = 512 * 1024 * 1024; + if (amdgpu_emu_mode == 1) + adev->usec_timeout *= 2; + adev->gmc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; adev->vm_manager.vm_pte_num_rings = 0; - adev->gart.gart_funcs = NULL; + adev->gmc.gmc_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -1864,6 +1911,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (adev->rio_mem == NULL) DRM_INFO("PCI I/O BAR is not found.\n"); + amdgpu_device_get_pcie_info(adev); + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) @@ -1882,6 +1931,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (runtime) vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain); + if (amdgpu_emu_mode == 1) { + /* post the asic on emulation mode */ + emu_soc_asic_init(adev); + goto fence_driver_init; + } + /* Read BIOS */ if (!amdgpu_get_bios(adev)) { r = -EINVAL; @@ -1934,6 +1989,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_atombios_i2c_init(adev); } +fence_driver_init: /* Fence driver */ r = amdgpu_fence_driver_init(adev); if (r) { @@ -2065,6 +2121,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); + amdgpu_pm_sysfs_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_device_ip_fini(adev); if (adev->firmware.gpu_info_fw) { @@ -2076,7 +2133,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); - amdgpu_atombios_fini(adev); + + if (amdgpu_emu_mode != 1) + amdgpu_atombios_fini(adev); + kfree(adev->bios); adev->bios = NULL; if (!pci_is_thunderbolt_attached(adev->pdev)) @@ -2090,7 +2150,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; amdgpu_device_doorbell_fini(adev); - amdgpu_pm_sysfs_fini(adev); amdgpu_debugfs_regs_cleanup(adev); } @@ -2284,14 +2343,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); } drm_modeset_unlock_all(dev); - } else { - /* - * There is no equivalent atomic helper to turn on - * display, so we defined our own function for this, - * once suspend resume is supported by the atomic - * framework this will be reworked - */ - amdgpu_dm_display_resume(adev); } } @@ -2458,17 +2509,71 @@ err: return r; } +static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_bo *bo, *tmp; + struct dma_fence *fence = NULL, *next = NULL; + long r = 1; + int i = 0; + long tmo; + + if (amdgpu_sriov_runtime(adev)) + tmo = msecs_to_jiffies(amdgpu_lockup_timeout); + else + tmo = msecs_to_jiffies(100); + + DRM_INFO("recover vram bo from shadow start\n"); + mutex_lock(&adev->shadow_list_lock); + list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { + next = NULL; + amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); + if (fence) { + r = dma_fence_wait_timeout(fence, false, tmo); + if (r == 0) + pr_err("wait fence %p[%d] timeout\n", fence, i); + else if (r < 0) + pr_err("wait fence %p[%d] interrupted\n", fence, i); + if (r < 1) { + dma_fence_put(fence); + fence = next; + break; + } + i++; + } + + dma_fence_put(fence); + fence = next; + } + mutex_unlock(&adev->shadow_list_lock); + + if (fence) { + r = dma_fence_wait_timeout(fence, false, tmo); + if (r == 0) + pr_err("wait fence %p[%d] timeout\n", fence, i); + else if (r < 0) + pr_err("wait fence %p[%d] interrupted\n", fence, i); + + } + dma_fence_put(fence); + + if (r > 0) + DRM_INFO("recover vram bo from shadow done\n"); + else + DRM_ERROR("recover vram bo from shadow failed\n"); + + return (r > 0?0:1); +} + /* * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough * * @adev: amdgpu device pointer - * @reset_flags: output param tells caller the reset result * * attempt to do soft-reset or full-reset and reinitialize Asic * return 0 means successed otherwise failed */ -static int amdgpu_device_reset(struct amdgpu_device *adev, - uint64_t* reset_flags) +static int amdgpu_device_reset(struct amdgpu_device *adev) { bool need_full_reset, vram_lost = 0; int r; @@ -2483,7 +2588,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev, DRM_INFO("soft reset failed, will fallback to full reset!\n"); need_full_reset = true; } - } if (need_full_reset) { @@ -2532,13 +2636,8 @@ out: } } - if (reset_flags) { - if (vram_lost) - (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; - - if (need_full_reset) - (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; - } + if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost)) + r = amdgpu_device_handle_vram_lost(adev); return r; } @@ -2547,14 +2646,11 @@ out: * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * * @adev: amdgpu device pointer - * @reset_flags: output param tells caller the reset result * * do VF FLR and reinitialize Asic * return 0 means successed otherwise failed */ -static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, - uint64_t *reset_flags, - bool from_hypervisor) +static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, bool from_hypervisor) { int r; @@ -2575,28 +2671,20 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); + amdgpu_virt_release_full_gpu(adev, true); if (r) goto error; amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); - if (r) - dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r); - -error: - /* release full control of GPU after ib test */ - amdgpu_virt_release_full_gpu(adev, true); - if (reset_flags) { - if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST; - atomic_inc(&adev->vram_lost_counter); - } - - /* VF FLR or hotlink reset is always full-reset */ - (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET; + if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { + atomic_inc(&adev->vram_lost_counter); + r = amdgpu_device_handle_vram_lost(adev); } +error: + return r; } @@ -2614,7 +2702,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force) { struct drm_atomic_state *state = NULL; - uint64_t reset_flags = 0; int i, r, resched; if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { @@ -2636,22 +2723,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); + /* store modesetting */ if (amdgpu_device_has_dc_support(adev)) state = drm_atomic_helper_suspend(adev->ddev); - /* block scheduler */ + /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->sched.thread) continue; - /* only focus on the ring hit timeout if &job not NULL */ + kthread_park(ring->sched.thread); + if (job && job->ring->idx != i) continue; - kthread_park(ring->sched.thread); drm_sched_hw_job_reset(&ring->sched, &job->base); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ @@ -2659,74 +2747,29 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } if (amdgpu_sriov_vf(adev)) - r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true); + r = amdgpu_device_reset_sriov(adev, job ? false : true); else - r = amdgpu_device_reset(adev, &reset_flags); + r = amdgpu_device_reset(adev); - if (!r) { - if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) || - (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) { - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct amdgpu_bo *bo, *tmp; - struct dma_fence *fence = NULL, *next = NULL; - - DRM_INFO("recover vram bo from shadow\n"); - mutex_lock(&adev->shadow_list_lock); - list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { - next = NULL; - amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); - if (fence) { - r = dma_fence_wait(fence, false); - if (r) { - WARN(r, "recovery from shadow isn't completed\n"); - break; - } - } - - dma_fence_put(fence); - fence = next; - } - mutex_unlock(&adev->shadow_list_lock); - if (fence) { - r = dma_fence_wait(fence, false); - if (r) - WARN(r, "recovery from shadow isn't completed\n"); - } - dma_fence_put(fence); - } - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; - /* only focus on the ring hit timeout if &job not NULL */ - if (job && job->ring->idx != i) - continue; + if (!ring || !ring->sched.thread) + continue; + /* only need recovery sched of the given job's ring + * or all rings (in the case @job is NULL) + * after above amdgpu_reset accomplished + */ + if ((!job || job->ring->idx == i) && !r) drm_sched_job_recovery(&ring->sched); - kthread_unpark(ring->sched.thread); - } - } else { - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - /* only focus on the ring hit timeout if &job not NULL */ - if (job && job->ring->idx != i) - continue; - - kthread_unpark(adev->rings[i]->sched.thread); - } + kthread_unpark(ring->sched.thread); } if (amdgpu_device_has_dc_support(adev)) { if (drm_atomic_helper_resume(adev->ddev, state)) dev_info(adev->dev, "drm resume failed:%d\n", r); - amdgpu_dm_display_resume(adev); } else { drm_helper_resume_force_mode(adev->ddev); } @@ -2747,7 +2790,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, return r; } -void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) +static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { u32 mask; int ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 38d47559f098..93f700ab1bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -29,6 +29,7 @@ #include "amdgpu_i2c.h" #include "atom.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include <asm/div64.h> #include <linux/pm_runtime.h> @@ -36,7 +37,8 @@ #include <drm/drm_edid.h> #include <drm/drm_fb_helper.h> -static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) +static void amdgpu_display_flip_callback(struct dma_fence *f, + struct dma_fence_cb *cb) { struct amdgpu_flip_work *work = container_of(cb, struct amdgpu_flip_work, cb); @@ -45,8 +47,8 @@ static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) schedule_work(&work->flip_work.work); } -static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, - struct dma_fence **f) +static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work, + struct dma_fence **f) { struct dma_fence *fence= *f; @@ -55,14 +57,15 @@ static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, *f = NULL; - if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback)) + if (!dma_fence_add_callback(fence, &work->cb, + amdgpu_display_flip_callback)) return true; dma_fence_put(fence); return false; } -static void amdgpu_flip_work_func(struct work_struct *__work) +static void amdgpu_display_flip_work_func(struct work_struct *__work) { struct delayed_work *delayed_work = container_of(__work, struct delayed_work, work); @@ -76,20 +79,20 @@ static void amdgpu_flip_work_func(struct work_struct *__work) unsigned i; int vpos, hpos; - if (amdgpu_flip_handle_fence(work, &work->excl)) + if (amdgpu_display_flip_handle_fence(work, &work->excl)) return; for (i = 0; i < work->shared_count; ++i) - if (amdgpu_flip_handle_fence(work, &work->shared[i])) + if (amdgpu_display_flip_handle_fence(work, &work->shared[i])) return; /* Wait until we're out of the vertical blank period before the one * targeted by the flip */ if (amdgpu_crtc->enabled && - (amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0, - &vpos, &hpos, NULL, NULL, - &crtc->hwmode) + (amdgpu_display_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0, + &vpos, &hpos, NULL, NULL, + &crtc->hwmode) & (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) == (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) && (int)(work->target_vblank - @@ -117,7 +120,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work) /* * Handle unpin events outside the interrupt handler proper. */ -static void amdgpu_unpin_work_func(struct work_struct *__work) +static void amdgpu_display_unpin_work_func(struct work_struct *__work) { struct amdgpu_flip_work *work = container_of(__work, struct amdgpu_flip_work, unpin_work); @@ -139,11 +142,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work) kfree(work); } -int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, uint32_t target, - struct drm_modeset_acquire_ctx *ctx) +int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags, uint32_t target, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->dev; struct amdgpu_device *adev = dev->dev_private; @@ -162,8 +165,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, if (work == NULL) return -ENOMEM; - INIT_DELAYED_WORK(&work->flip_work, amdgpu_flip_work_func); - INIT_WORK(&work->unpin_work, amdgpu_unpin_work_func); + INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func); + INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func); work->event = event; work->adev = adev; @@ -189,7 +192,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, goto cleanup; } - r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base); + r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -207,7 +210,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, amdgpu_bo_unreserve(new_abo); work->base = base; - work->target_vblank = target - drm_crtc_vblank_count(crtc) + + work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) + amdgpu_get_vblank_counter_kms(dev, work->crtc_id); /* we borrow the event spin lock for protecting flip_wrok */ @@ -228,7 +231,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, /* update crtc fb */ crtc->primary->fb = fb; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - amdgpu_flip_work_func(&work->flip_work.work); + amdgpu_display_flip_work_func(&work->flip_work.work); return 0; pflip_cleanup: @@ -254,8 +257,8 @@ cleanup: return r; } -int amdgpu_crtc_set_config(struct drm_mode_set *set, - struct drm_modeset_acquire_ctx *ctx) +int amdgpu_display_crtc_set_config(struct drm_mode_set *set, + struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev; struct amdgpu_device *adev; @@ -352,7 +355,7 @@ static const char *hpd_names[6] = { "HPD6", }; -void amdgpu_print_display_setup(struct drm_device *dev) +void amdgpu_display_print_display_setup(struct drm_device *dev) { struct drm_connector *connector; struct amdgpu_connector *amdgpu_connector; @@ -429,11 +432,11 @@ void amdgpu_print_display_setup(struct drm_device *dev) } /** - * amdgpu_ddc_probe + * amdgpu_display_ddc_probe * */ -bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, - bool use_aux) +bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, + bool use_aux) { u8 out = 0x0; u8 buf[8]; @@ -479,7 +482,7 @@ bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, return true; } -static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) +static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); @@ -488,9 +491,10 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb) kfree(amdgpu_fb); } -static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) +static int amdgpu_display_user_framebuffer_create_handle( + struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) { struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb); @@ -498,15 +502,28 @@ static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb, } static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { - .destroy = amdgpu_user_framebuffer_destroy, - .create_handle = amdgpu_user_framebuffer_create_handle, + .destroy = amdgpu_display_user_framebuffer_destroy, + .create_handle = amdgpu_display_user_framebuffer_create_handle, }; -int -amdgpu_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev) +{ + uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; + +#if defined(CONFIG_DRM_AMD_DC) + if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && + adev->flags & AMD_IS_APU && + amdgpu_device_asic_has_dc_support(adev->asic_type)) + domain |= AMDGPU_GEM_DOMAIN_GTT; +#endif + + return domain; +} + +int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { int ret; rfb->obj = obj; @@ -520,9 +537,9 @@ amdgpu_framebuffer_init(struct drm_device *dev, } struct drm_framebuffer * -amdgpu_user_framebuffer_create(struct drm_device *dev, - struct drm_file *file_priv, - const struct drm_mode_fb_cmd2 *mode_cmd) +amdgpu_display_user_framebuffer_create(struct drm_device *dev, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj; struct amdgpu_framebuffer *amdgpu_fb; @@ -547,7 +564,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOMEM); } - ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); + ret = amdgpu_display_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); drm_gem_object_put_unlocked(obj); @@ -558,7 +575,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev, } const struct drm_mode_config_funcs amdgpu_mode_funcs = { - .fb_create = amdgpu_user_framebuffer_create, + .fb_create = amdgpu_display_user_framebuffer_create, .output_poll_changed = drm_fb_helper_output_poll_changed, }; @@ -580,7 +597,7 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] = { AMDGPU_FMT_DITHER_ENABLE, "on" }, }; -int amdgpu_modeset_create_props(struct amdgpu_device *adev) +int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) { int sz; @@ -629,7 +646,7 @@ int amdgpu_modeset_create_props(struct amdgpu_device *adev) return 0; } -void amdgpu_update_display_priority(struct amdgpu_device *adev) +void amdgpu_display_update_priority(struct amdgpu_device *adev) { /* adjustment options for the display watermarks */ if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2)) @@ -639,7 +656,7 @@ void amdgpu_update_display_priority(struct amdgpu_device *adev) } -static bool is_hdtv_mode(const struct drm_display_mode *mode) +static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode) { /* try and guess if this is a tv or a monitor */ if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */ @@ -651,9 +668,9 @@ static bool is_hdtv_mode(const struct drm_display_mode *mode) return false; } -bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) +bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) { struct drm_device *dev = crtc->dev; struct drm_encoder *encoder; @@ -696,7 +713,7 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, ((amdgpu_encoder->underscan_type == UNDERSCAN_ON) || ((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) && drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) && - is_hdtv_mode(mode)))) { + amdgpu_display_is_hdtv_mode(mode)))) { if (amdgpu_encoder->underscan_hborder != 0) amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder; else @@ -764,10 +781,10 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, * unknown small number of scanlines wrt. real scanout position. * */ -int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode) +int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, + unsigned int pipe, unsigned int flags, int *vpos, + int *hpos, ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) { u32 vbl = 0, position = 0; int vbl_start, vbl_end, vtotal, ret = 0; @@ -859,7 +876,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, return ret; } -int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc) +int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc) { if (crtc < 0 || crtc >= adev->mode_info.num_crtc) return AMDGPU_CRTC_IRQ_NONE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 0bcb6c6e0ca9..2b11d808f297 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,9 +23,10 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ +uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev); struct drm_framebuffer * -amdgpu_user_framebuffer_create(struct drm_device *dev, - struct drm_file *file_priv, - const struct drm_mode_fb_cmd2 *mode_cmd); +amdgpu_display_user_framebuffer_create(struct drm_device *dev, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index a8437a3296a6..643d008410c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -265,9 +265,6 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_read_sensor(adev, idx, value, size) \ ((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size))) -#define amdgpu_dpm_get_temperature(adev) \ - ((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle)) - #define amdgpu_dpm_set_fan_control_mode(adev, m) \ ((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m))) @@ -328,8 +325,8 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_set_mclk_od(adev, value) \ ((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value)) -#define amdgpu_dpm_dispatch_task(adev, task_id, input, output) \ - ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output)) +#define amdgpu_dpm_dispatch_task(adev, task_id, user_state) \ + ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (user_state)) #define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \ ((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal))) @@ -344,17 +341,9 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->reset_power_profile_state(\ (adev)->powerplay.pp_handle, request)) -#define amdgpu_dpm_get_power_profile_state(adev, query) \ - ((adev)->powerplay.pp_funcs->get_power_profile_state(\ - (adev)->powerplay.pp_handle, query)) - -#define amdgpu_dpm_set_power_profile_state(adev, request) \ - ((adev)->powerplay.pp_funcs->set_power_profile_state(\ - (adev)->powerplay.pp_handle, request)) - -#define amdgpu_dpm_switch_power_profile(adev, type) \ +#define amdgpu_dpm_switch_power_profile(adev, type, en) \ ((adev)->powerplay.pp_funcs->switch_power_profile(\ - (adev)->powerplay.pp_handle, type)) + (adev)->powerplay.pp_handle, type, en)) #define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \ ((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\ @@ -366,6 +355,22 @@ enum amdgpu_pcie_gen { (adev)->powerplay.pp_handle, virtual_addr_low, \ virtual_addr_hi, mc_addr_low, mc_addr_hi, size) +#define amdgpu_dpm_get_power_profile_mode(adev, buf) \ + ((adev)->powerplay.pp_funcs->get_power_profile_mode(\ + (adev)->powerplay.pp_handle, buf)) + +#define amdgpu_dpm_set_power_profile_mode(adev, parameter, size) \ + ((adev)->powerplay.pp_funcs->set_power_profile_mode(\ + (adev)->powerplay.pp_handle, parameter, size)) + +#define amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, size) \ + ((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\ + (adev)->powerplay.pp_handle, type, parameter, size)) + +#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \ + ((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \ + (adev)->powerplay.pp_handle)) + struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ba4335fd4f65..2337d4bfd85c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -73,9 +73,11 @@ * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl * - 3.23.0 - Add query for VRAM lost counter + * - 3.24.0 - Add high priority compute support for gfx9 + * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk). */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 23 +#define KMS_DRIVER_MINOR 25 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -119,7 +121,7 @@ uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; char *amdgpu_virtual_display = NULL; -uint amdgpu_pp_feature_mask = 0xffffffff; +uint amdgpu_pp_feature_mask = 0xffffbfff; int amdgpu_ngg = 0; int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; @@ -129,6 +131,7 @@ int amdgpu_job_hang_limit = 0; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ +int amdgpu_emu_mode = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -281,9 +284,12 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); -MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); +module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); + #ifdef CONFIG_DRM_AMDGPU_SI #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) @@ -576,6 +582,11 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, struct drm_device *dev; unsigned long flags = ent->driver_data; int ret, retry = 0; + bool supports_atomic = false; + + if (!amdgpu_virtual_display && + amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK)) + supports_atomic = true; if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) { DRM_INFO("This hardware requires experimental hardware support.\n" @@ -596,6 +607,13 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) return ret; + /* warn the user if they mix atomic and non-atomic capable GPUs */ + if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic) + DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n"); + /* support atomic early so the atomic debugfs stuff gets created */ + if (supports_atomic) + kms_driver.driver_features |= DRIVER_ATOMIC; + dev = drm_dev_alloc(&kms_driver, &pdev->dev); if (IS_ERR(dev)) return PTR_ERR(dev); @@ -833,8 +851,8 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode) { - return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, - stime, etime, mode); + return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos, + stime, etime, mode); } static struct drm_driver kms_driver = { @@ -852,9 +870,6 @@ static struct drm_driver kms_driver = { .disable_vblank = amdgpu_disable_vblank_kms, .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, .get_scanout_position = amdgpu_get_crtc_scanout_position, - .irq_preinstall = amdgpu_irq_preinstall, - .irq_postinstall = amdgpu_irq_postinstall, - .irq_uninstall = amdgpu_irq_uninstall, .irq_handler = amdgpu_irq_handler, .ioctls = amdgpu_ioctls_kms, .gem_free_object_unlocked = amdgpu_gem_object_free, @@ -867,9 +882,7 @@ static struct drm_driver kms_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = amdgpu_gem_prime_export, - .gem_prime_import = drm_gem_prime_import, - .gem_prime_pin = amdgpu_gem_prime_pin, - .gem_prime_unpin = amdgpu_gem_prime_unpin, + .gem_prime_import = amdgpu_gem_prime_import, .gem_prime_res_obj = amdgpu_gem_prime_res_obj, .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index ff3e9beb7d19..12063019751b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -38,6 +38,8 @@ #include <linux/vga_switcheroo.h> +#include "amdgpu_display.h" + /* object hierarchy - this contains a helper + a amdgpu fb the helper contains a pointer to amdgpu framebuffer baseclass. @@ -124,7 +126,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, struct drm_gem_object *gobj = NULL; struct amdgpu_bo *abo = NULL; bool fb_tiled = false; /* useful for testing */ - u32 tiling_flags = 0; + u32 tiling_flags = 0, domain; int ret; int aligned_size, size; int height = mode_cmd->height; @@ -135,12 +137,12 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); + domain = amdgpu_display_framebuffer_domains(adev); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, - AMDGPU_GEM_DOMAIN_VRAM, + ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_VRAM_CLEARED, @@ -166,7 +168,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, } - ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL); + ret = amdgpu_bo_pin(abo, domain, NULL); if (ret) { amdgpu_bo_unreserve(abo); goto out_unref; @@ -225,7 +227,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info->par = rfbdev; info->skip_vt_switch = true; - ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj); + ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb, + &mode_cmd, gobj); if (ret) { DRM_ERROR("failed to initialize framebuffer %d\n", ret); goto out; @@ -242,8 +245,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper, info->fbops = &amdgpufb_ops; - tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start; - info->fix.smem_start = adev->mc.aper_base + tmp; + tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start; + info->fix.smem_start = adev->gmc.aper_base + tmp; info->fix.smem_len = amdgpu_bo_size(abo); info->screen_base = amdgpu_bo_kptr(abo); info->screen_size = amdgpu_bo_size(abo); @@ -252,7 +255,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, /* setup aperture base/size for vesafb takeover */ info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base; - info->apertures->ranges[0].size = adev->mc.aper_size; + info->apertures->ranges[0].size = adev->gmc.aper_size; /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ @@ -262,7 +265,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper, } DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start); - DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->mc.aper_base); + DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base); DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo)); DRM_INFO("fb depth is %d\n", fb->format->depth); DRM_INFO(" pitch is %d\n", fb->pitches[0]); @@ -319,7 +322,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev) return 0; /* select 8 bpp console on low vram cards */ - if (adev->mc.real_vram_size <= (32*1024*1024)) + if (adev->gmc.real_vram_size <= (32*1024*1024)) bpp_sel = 8; rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 0a4f34afaaaa..cf0f186c6092 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -68,17 +68,15 @@ */ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) { - if (adev->dummy_page.page) + struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page; + + if (adev->dummy_page_addr) return 0; - adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO); - if (adev->dummy_page.page == NULL) - return -ENOMEM; - adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page, - 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) { + adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; + adev->dummy_page_addr = 0; return -ENOMEM; } return 0; @@ -93,12 +91,11 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) */ static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) { - if (adev->dummy_page.page == NULL) + if (!adev->dummy_page_addr) return; - pci_unmap_page(adev->pdev, adev->dummy_page.addr, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(adev->dummy_page.page); - adev->dummy_page.page = NULL; + pci_unmap_page(adev->pdev, adev->dummy_page_addr, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + adev->dummy_page_addr = 0; } /** @@ -116,11 +113,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) int r; if (adev->gart.robj == NULL) { - r = amdgpu_bo_create(adev, adev->gart.table_size, - PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, + r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &adev->gart.robj); + ttm_bo_type_kernel, NULL, + &adev->gart.robj); if (r) { return r; } @@ -236,18 +234,19 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS adev->gart.pages[p] = NULL; #endif - page_base = adev->dummy_page.addr; + page_base = adev->dummy_page_addr; if (!adev->gart.ptr) continue; for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { - amdgpu_gart_set_pte_pde(adev, adev->gart.ptr, - t, page_base, flags); + amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr, + t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; } } mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_gmc_flush_gpu_tlb(adev, 0); return 0; } @@ -279,7 +278,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, for (i = 0; i < pages; i++) { page_base = dma_addr[i]; for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) { - amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags); + amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags); page_base += AMDGPU_GPU_PAGE_SIZE; } } @@ -317,7 +316,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); for (i = 0; i < pages; i++, p++) - adev->gart.pages[p] = pagelist[i]; + adev->gart.pages[p] = pagelist ? pagelist[i] : NULL; #endif if (!adev->gart.ptr) @@ -329,7 +328,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, return r; mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); + amdgpu_gmc_flush_gpu_tlb(adev, 0); return 0; } @@ -345,7 +345,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev) { int r; - if (adev->dummy_page.page) + if (adev->dummy_page_addr) return 0; /* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */ @@ -357,8 +357,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev) if (r) return r; /* Compute table size */ - adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE; - adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE; + adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE; + adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE; DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n", adev->gart.num_cpu_pages, adev->gart.num_gpu_pages); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index d4a43302c2be..456295c00291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -31,7 +31,6 @@ */ struct amdgpu_device; struct amdgpu_bo; -struct amdgpu_gart_funcs; #define AMDGPU_GPU_PAGE_SIZE 4096 #define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1) @@ -52,8 +51,6 @@ struct amdgpu_gart { /* Asic default pte flags */ uint64_t gart_pte_flags; - - const struct amdgpu_gart_funcs *gart_funcs; }; int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index e48b4ec88c8c..46b9ea4e6103 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj); if (robj) { - if (robj->gem_base.import_attach) - drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg); amdgpu_mn_unregister(robj); amdgpu_bo_unref(&robj); } @@ -45,7 +43,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj) int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, - u64 flags, bool kernel, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, struct drm_gem_object **obj) { @@ -59,8 +57,8 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, } retry: - r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain, - flags, NULL, resv, 0, &bo); + r = amdgpu_bo_create(adev, size, alignment, initial_domain, + flags, type, resv, &bo); if (r) { if (r != -ERESTARTSYS) { if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { @@ -523,12 +521,13 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, goto error; if (operation == AMDGPU_VA_OP_MAP || - operation == AMDGPU_VA_OP_REPLACE) + operation == AMDGPU_VA_OP_REPLACE) { r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + goto error; + } r = amdgpu_vm_update_directories(adev, vm); - if (r) - goto error; error: if (r && r != -ERESTARTSYS) @@ -634,7 +633,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, if (r) goto error_backoff; - va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); @@ -654,7 +653,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, if (r) goto error_backoff; - va_flags = amdgpu_vm_get_pte_flags(adev, args->flags); + va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, args->offset_in_bo, args->map_size, va_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h new file mode 100644 index 000000000000..893c2490b783 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -0,0 +1,112 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ +#ifndef __AMDGPU_GMC_H__ +#define __AMDGPU_GMC_H__ + +#include <linux/types.h> + +#include "amdgpu_irq.h" + +struct firmware; + +/* + * VMHUB structures, functions & helpers + */ +struct amdgpu_vmhub { + uint32_t ctx0_ptb_addr_lo32; + uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_req; + uint32_t vm_inv_eng0_ack; + uint32_t vm_context0_cntl; + uint32_t vm_l2_pro_fault_status; + uint32_t vm_l2_pro_fault_cntl; +}; + +/* + * GPU MC structures, functions & helpers + */ +struct amdgpu_gmc_funcs { + /* flush the vm tlb via mmio */ + void (*flush_gpu_tlb)(struct amdgpu_device *adev, + uint32_t vmid); + /* flush the vm tlb via ring */ + uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, + uint64_t pd_addr); + /* Change the VMID -> PASID mapping */ + void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid, + unsigned pasid); + /* write pte/pde updates using the cpu */ + int (*set_pte_pde)(struct amdgpu_device *adev, + void *cpu_pt_addr, /* cpu addr of page table */ + uint32_t gpu_page_idx, /* pte/pde to update */ + uint64_t addr, /* addr to write into pte/pde */ + uint64_t flags); /* access flags */ + /* enable/disable PRT support */ + void (*set_prt)(struct amdgpu_device *adev, bool enable); + /* set pte flags based per asic */ + uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, + uint32_t flags); + /* get the pde for a given mc addr */ + void (*get_vm_pde)(struct amdgpu_device *adev, int level, + u64 *dst, u64 *flags); +}; + +struct amdgpu_gmc { + resource_size_t aper_size; + resource_size_t aper_base; + /* for some chips with <= 32MB we need to lie + * about vram size near mc fb location */ + u64 mc_vram_size; + u64 visible_vram_size; + u64 gart_size; + u64 gart_start; + u64 gart_end; + u64 vram_start; + u64 vram_end; + unsigned vram_width; + u64 real_vram_size; + int vram_mtrr; + u64 mc_mask; + const struct firmware *fw; /* MC firmware */ + uint32_t fw_version; + struct amdgpu_irq_src vm_fault; + uint32_t vram_type; + uint32_t srbm_soft_reset; + bool prt_warning; + uint64_t stolen_size; + /* apertures */ + u64 shared_aperture_start; + u64 shared_aperture_end; + u64 private_aperture_start; + u64 private_aperture_end; + /* protects concurrent invalidation */ + spinlock_t invalidate_lock; + bool translate_further; + + const struct amdgpu_gmc_funcs *gmc_funcs; +}; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index e14ab34d8262..da7b1b92d9cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -56,7 +56,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, return -ENOMEM; start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; - size = (adev->mc.gart_size >> PAGE_SHIFT) - start; + size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); atomic64_set(&mgr->available, p_size); @@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) { struct amdgpu_gtt_mgr *mgr = man->priv; - + spin_lock(&mgr->lock); drm_mm_takedown(&mgr->mm); spin_unlock(&mgr->lock); kfree(mgr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a162d87ca0c8..311589e02d17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -181,15 +181,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } - if (ring->funcs->init_cond_exec) + if (job && ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_hdp_flush #ifdef CONFIG_X86_64 - && !(adev->flags & AMD_IS_APU) + if (!(adev->flags & AMD_IS_APU)) #endif - ) - amdgpu_ring_emit_hdp_flush(ring); + { + if (ring->funcs->emit_hdp_flush) + amdgpu_ring_emit_hdp_flush(ring); + else + amdgpu_asic_flush_hdp(adev, ring); + } skip_preamble = ring->current_ctx == fence_ctx; need_ctx_switch = ring->current_ctx != fence_ctx; @@ -219,12 +222,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->emit_tmz) amdgpu_ring_emit_tmz(ring, false); - if (ring->funcs->emit_hdp_invalidate #ifdef CONFIG_X86_64 - && !(adev->flags & AMD_IS_APU) + if (!(adev->flags & AMD_IS_APU)) #endif - ) - amdgpu_ring_emit_hdp_invalidate(ring); + amdgpu_asic_invalidate_hdp(adev, ring); r = amdgpu_fence_emit(ring, f); if (r) { @@ -278,11 +279,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) return r; } - r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo); - if (r) { - return r; - } - adev->ib_pool_ready = true; if (amdgpu_debugfs_sa_init(adev)) { dev_err(adev->dev, "failed to register debugfs file for SA\n"); @@ -301,7 +297,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) void amdgpu_ib_pool_fini(struct amdgpu_device *adev) { if (adev->ib_pool_ready) { - amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo); amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo); adev->ib_pool_ready = false; } @@ -321,14 +316,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { unsigned i; int r, ret = 0; + long tmo_gfx, tmo_mm; + + tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; + if (amdgpu_sriov_vf(adev)) { + /* for MM engines in hypervisor side they are not scheduled together + * with CP and SDMA engines, so even in exclusive mode MM engine could + * still running on other VF thus the IB TEST TIMEOUT for MM engines + * under SR-IOV should be set to a long time. 8 sec should be enough + * for the MM comes back to this VF. + */ + tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT; + } + + if (amdgpu_sriov_runtime(adev)) { + /* for CP & SDMA engines since they are scheduled together so + * need to make the timeout width enough to cover the time + * cost waiting for it coming back under RUNTIME only + */ + tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; + } for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; + long tmo; if (!ring || !ring->ready) continue; - r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT); + /* MM engine need more time */ + if (ring->funcs->type == AMDGPU_RING_TYPE_UVD || + ring->funcs->type == AMDGPU_RING_TYPE_VCE || + ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + tmo = tmo_mm; + else + tmo = tmo_gfx; + + r = amdgpu_ring_test_ib(ring, tmo); if (r) { ring->ready = false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 16884a0b677b..a1c78f90eadf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -40,6 +40,12 @@ */ static DEFINE_IDA(amdgpu_pasid_ida); +/* Helper to free pasid from a fence callback */ +struct amdgpu_pasid_cb { + struct dma_fence_cb cb; + unsigned int pasid; +}; + /** * amdgpu_pasid_alloc - Allocate a PASID * @bits: Maximum width of the PASID in bits, must be at least 1 @@ -63,6 +69,9 @@ int amdgpu_pasid_alloc(unsigned int bits) break; } + if (pasid >= 0) + trace_amdgpu_pasid_allocated(pasid); + return pasid; } @@ -72,9 +81,86 @@ int amdgpu_pasid_alloc(unsigned int bits) */ void amdgpu_pasid_free(unsigned int pasid) { + trace_amdgpu_pasid_freed(pasid); ida_simple_remove(&amdgpu_pasid_ida, pasid); } +static void amdgpu_pasid_free_cb(struct dma_fence *fence, + struct dma_fence_cb *_cb) +{ + struct amdgpu_pasid_cb *cb = + container_of(_cb, struct amdgpu_pasid_cb, cb); + + amdgpu_pasid_free(cb->pasid); + dma_fence_put(fence); + kfree(cb); +} + +/** + * amdgpu_pasid_free_delayed - free pasid when fences signal + * + * @resv: reservation object with the fences to wait for + * @pasid: pasid to free + * + * Free the pasid only after all the fences in resv are signaled. + */ +void amdgpu_pasid_free_delayed(struct reservation_object *resv, + unsigned int pasid) +{ + struct dma_fence *fence, **fences; + struct amdgpu_pasid_cb *cb; + unsigned count; + int r; + + r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences); + if (r) + goto fallback; + + if (count == 0) { + amdgpu_pasid_free(pasid); + return; + } + + if (count == 1) { + fence = fences[0]; + kfree(fences); + } else { + uint64_t context = dma_fence_context_alloc(1); + struct dma_fence_array *array; + + array = dma_fence_array_create(count, fences, context, + 1, false); + if (!array) { + kfree(fences); + goto fallback; + } + fence = &array->base; + } + + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) { + /* Last resort when we are OOM */ + dma_fence_wait(fence, false); + dma_fence_put(fence); + amdgpu_pasid_free(pasid); + } else { + cb->pasid = pasid; + if (dma_fence_add_callback(fence, &cb->cb, + amdgpu_pasid_free_cb)) + amdgpu_pasid_free_cb(fence, &cb->cb); + } + + return; + +fallback: + /* Not enough memory for the delayed delete, as last resort + * block for all the fences to complete. + */ + reservation_object_wait_timeout_rcu(resv, true, false, + MAX_SCHEDULE_TIMEOUT); + amdgpu_pasid_free(pasid); +} + /* * VMID manager * @@ -96,164 +182,185 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, atomic_read(&adev->gpu_reset_counter); } -/* idr_mgr->lock must be held */ -static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm, - struct amdgpu_ring *ring, - struct amdgpu_sync *sync, - struct dma_fence *fence, - struct amdgpu_job *job) -{ - struct amdgpu_device *adev = ring->adev; - unsigned vmhub = ring->funcs->vmhub; - uint64_t fence_context = adev->fence_context + ring->idx; - struct amdgpu_vmid *id = vm->reserved_vmid[vmhub]; - struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct dma_fence *updates = sync->last_vm_update; - int r = 0; - struct dma_fence *flushed, *tmp; - bool needs_flush = vm->use_cpu_for_update; - - flushed = id->flushed_updates; - if ((amdgpu_vmid_had_gpu_reset(adev, id)) || - (atomic64_read(&id->owner) != vm->entity.fence_context) || - (job->vm_pd_addr != id->pd_gpu_addr) || - (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) || - (!id->last_flush || (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush)))) { - needs_flush = true; - /* to prevent one context starved by another context */ - id->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&id->active, ring); - if (tmp) { - r = amdgpu_sync_fence(adev, sync, tmp, false); - return r; - } - } - - /* Good we can use this VMID. Remember this submission as - * user of the VMID. - */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); - if (r) - goto out; - - if (updates && (!flushed || updates->context != flushed->context || - dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - } - id->pd_gpu_addr = job->vm_pd_addr; - atomic64_set(&id->owner, vm->entity.fence_context); - job->vm_needs_flush = needs_flush; - if (needs_flush) { - dma_fence_put(id->last_flush); - id->last_flush = NULL; - } - job->vmid = id - id_mgr->ids; - trace_amdgpu_vm_grab_id(vm, ring, job); -out: - return r; -} - /** - * amdgpu_vm_grab_id - allocate the next free VMID + * amdgpu_vm_grab_idle - grab idle VMID * * @vm: vm to allocate id for * @ring: ring we want to submit job to * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse + * @idle: resulting idle VMID * - * Allocate an id for the vm, adding fences to the sync obj as necessary. + * Try to find an idle VMID, if none is idle add a fence to wait to the sync + * object. Returns -ENOMEM when we are out of memory. */ -int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync, struct dma_fence *fence, - struct amdgpu_job *job) +static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct amdgpu_vmid **idle) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; - struct amdgpu_vmid *id, *idle; struct dma_fence **fences; unsigned i; - int r = 0; + int r; + + if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) + return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false); - mutex_lock(&id_mgr->lock); - if (vm->reserved_vmid[vmhub]) { - r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job); - mutex_unlock(&id_mgr->lock); - return r; - } fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) { - mutex_unlock(&id_mgr->lock); + if (!fences) return -ENOMEM; - } + /* Check if we have an idle VMID */ i = 0; - list_for_each_entry(idle, &id_mgr->ids_lru, list) { - fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); + list_for_each_entry((*idle), &id_mgr->ids_lru, list) { + fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring); if (!fences[i]) break; ++i; } /* If we can't find a idle VMID to use, wait till one becomes available */ - if (&idle->list == &id_mgr->ids_lru) { + if (&(*idle)->list == &id_mgr->ids_lru) { u64 fence_context = adev->vm_manager.fence_context + ring->idx; unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; struct dma_fence_array *array; unsigned j; + *idle = NULL; for (j = 0; j < i; ++j) dma_fence_get(fences[j]); array = dma_fence_array_create(i, fences, fence_context, - seqno, true); + seqno, true); if (!array) { for (j = 0; j < i; ++j) dma_fence_put(fences[j]); kfree(fences); - r = -ENOMEM; - goto error; + return -ENOMEM; } + r = amdgpu_sync_fence(adev, sync, &array->base, false); + dma_fence_put(ring->vmid_wait); + ring->vmid_wait = &array->base; + return r; + } + kfree(fences); - r = amdgpu_sync_fence(ring->adev, sync, &array->base, false); - dma_fence_put(&array->base); - if (r) - goto error; + return 0; +} - mutex_unlock(&id_mgr->lock); - return 0; +/** + * amdgpu_vm_grab_reserved - try to assign reserved VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * @job: job who wants to use the VMID + * + * Try to assign a reserved VMID. + */ +static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job, + struct amdgpu_vmid **id) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct dma_fence *updates = sync->last_vm_update; + bool needs_flush = vm->use_cpu_for_update; + int r = 0; + + *id = vm->reserved_vmid[vmhub]; + if (updates && (*id)->flushed_updates && + updates->context == (*id)->flushed_updates->context && + !dma_fence_is_later(updates, (*id)->flushed_updates)) + updates = NULL; + + if ((*id)->owner != vm->entity.fence_context || + job->vm_pd_addr != (*id)->pd_gpu_addr || + updates || !(*id)->last_flush || + ((*id)->last_flush->context != fence_context && + !dma_fence_is_signaled((*id)->last_flush))) { + struct dma_fence *tmp; + /* to prevent one context starved by another context */ + (*id)->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); + if (tmp) { + *id = NULL; + r = amdgpu_sync_fence(adev, sync, tmp, false); + return r; + } + needs_flush = true; } - kfree(fences); + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); + if (r) + return r; + + if (updates) { + dma_fence_put((*id)->flushed_updates); + (*id)->flushed_updates = dma_fence_get(updates); + } + job->vm_needs_flush = needs_flush; + return 0; +} + +/** + * amdgpu_vm_grab_used - try to reuse a VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * @job: job who wants to use the VMID + * @id: resulting VMID + * + * Try to reuse a VMID for this submission. + */ +static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job, + struct amdgpu_vmid **id) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + uint64_t fence_context = adev->fence_context + ring->idx; + struct dma_fence *updates = sync->last_vm_update; + int r; job->vm_needs_flush = vm->use_cpu_for_update; + /* Check if we can use a VMID already assigned to this VM */ - list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { - struct dma_fence *flushed; + list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { bool needs_flush = vm->use_cpu_for_update; + struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if (amdgpu_vmid_had_gpu_reset(adev, id)) - continue; - - if (atomic64_read(&id->owner) != vm->entity.fence_context) + if ((*id)->owner != vm->entity.fence_context) continue; - if (job->vm_pd_addr != id->pd_gpu_addr) + if ((*id)->pd_gpu_addr != job->vm_pd_addr) continue; - if (!id->last_flush || - (id->last_flush->context != fence_context && - !dma_fence_is_signaled(id->last_flush))) + if (!(*id)->last_flush || + ((*id)->last_flush->context != fence_context && + !dma_fence_is_signaled((*id)->last_flush))) needs_flush = true; - flushed = id->flushed_updates; + flushed = (*id)->flushed_updates; if (updates && (!flushed || dma_fence_is_later(updates, flushed))) needs_flush = true; @@ -261,47 +368,91 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (adev->asic_type < CHIP_VEGA10 && needs_flush) continue; - /* Good we can use this VMID. Remember this submission as + /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); + r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false); if (r) - goto error; + return r; if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); + dma_fence_put((*id)->flushed_updates); + (*id)->flushed_updates = dma_fence_get(updates); } - if (needs_flush) - goto needs_flush; - else - goto no_flush_needed; + job->vm_needs_flush |= needs_flush; + return 0; + } - }; + *id = NULL; + return 0; +} - /* Still no ID to use? Then use the idle one found earlier */ - id = idle; +/** + * amdgpu_vm_grab_id - allocate the next free VMID + * + * @vm: vm to allocate id for + * @ring: ring we want to submit job to + * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse + * @job: job who wants to use the VMID + * + * Allocate an id for the vm, adding fences to the sync obj as necessary. + */ +int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, + struct amdgpu_sync *sync, struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vmid *idle = NULL; + struct amdgpu_vmid *id = NULL; + int r = 0; - /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(ring->adev, &id->active, fence, false); - if (r) + mutex_lock(&id_mgr->lock); + r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle); + if (r || !idle) goto error; - id->pd_gpu_addr = job->vm_pd_addr; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); - atomic64_set(&id->owner, vm->entity.fence_context); + if (vm->reserved_vmid[vmhub]) { + r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id); + if (r || !id) + goto error; + } else { + r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id); + if (r) + goto error; -needs_flush: - job->vm_needs_flush = true; - dma_fence_put(id->last_flush); - id->last_flush = NULL; + if (!id) { + struct dma_fence *updates = sync->last_vm_update; -no_flush_needed: - list_move_tail(&id->list, &id_mgr->ids_lru); + /* Still no ID to use? Then use the idle one found earlier */ + id = idle; + /* Remember this submission as user of the VMID */ + r = amdgpu_sync_fence(ring->adev, &id->active, + fence, false); + if (r) + goto error; + + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + job->vm_needs_flush = true; + } + + list_move_tail(&id->list, &id_mgr->ids_lru); + } + + id->pd_gpu_addr = job->vm_pd_addr; + id->owner = vm->entity.fence_context; + + if (job->vm_needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } job->vmid = id - id_mgr->ids; + job->pasid = vm->pasid; trace_amdgpu_vm_grab_id(vm, ring, job); error: @@ -370,13 +521,15 @@ void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub, struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[vmid]; - atomic64_set(&id->owner, 0); + mutex_lock(&id_mgr->lock); + id->owner = 0; id->gds_base = 0; id->gds_size = 0; id->gws_base = 0; id->gws_size = 0; id->oa_base = 0; id->oa_size = 0; + mutex_unlock(&id_mgr->lock); } /** @@ -454,6 +607,7 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) amdgpu_sync_free(&id->active); dma_fence_put(id->flushed_updates); dma_fence_put(id->last_flush); + dma_fence_put(id->pasid_mapping); } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index ad931fa570b3..7625419f0fc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -43,7 +43,7 @@ struct amdgpu_vmid { struct list_head list; struct amdgpu_sync active; struct dma_fence *last_flush; - atomic64_t owner; + uint64_t owner; uint64_t pd_gpu_addr; /* last flushed PD/PT update */ @@ -57,6 +57,9 @@ struct amdgpu_vmid { uint32_t gws_size; uint32_t oa_base; uint32_t oa_size; + + unsigned pasid; + struct dma_fence *pasid_mapping; }; struct amdgpu_vmid_mgr { @@ -69,6 +72,8 @@ struct amdgpu_vmid_mgr { int amdgpu_pasid_alloc(unsigned int bits); void amdgpu_pasid_free(unsigned int pasid); +void amdgpu_pasid_free_delayed(struct reservation_object *resv, + unsigned int pasid); bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 29cf10927a92..0e01f115bbe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -25,51 +25,12 @@ #define __AMDGPU_IH_H__ #include <linux/chash.h> +#include "soc15_ih_clientid.h" struct amdgpu_device; - /* - * vega10+ IH clients - */ -enum amdgpu_ih_clientid -{ - AMDGPU_IH_CLIENTID_IH = 0x00, - AMDGPU_IH_CLIENTID_ACP = 0x01, - AMDGPU_IH_CLIENTID_ATHUB = 0x02, - AMDGPU_IH_CLIENTID_BIF = 0x03, - AMDGPU_IH_CLIENTID_DCE = 0x04, - AMDGPU_IH_CLIENTID_ISP = 0x05, - AMDGPU_IH_CLIENTID_PCIE0 = 0x06, - AMDGPU_IH_CLIENTID_RLC = 0x07, - AMDGPU_IH_CLIENTID_SDMA0 = 0x08, - AMDGPU_IH_CLIENTID_SDMA1 = 0x09, - AMDGPU_IH_CLIENTID_SE0SH = 0x0a, - AMDGPU_IH_CLIENTID_SE1SH = 0x0b, - AMDGPU_IH_CLIENTID_SE2SH = 0x0c, - AMDGPU_IH_CLIENTID_SE3SH = 0x0d, - AMDGPU_IH_CLIENTID_SYSHUB = 0x0e, - AMDGPU_IH_CLIENTID_THM = 0x0f, - AMDGPU_IH_CLIENTID_UVD = 0x10, - AMDGPU_IH_CLIENTID_VCE0 = 0x11, - AMDGPU_IH_CLIENTID_VMC = 0x12, - AMDGPU_IH_CLIENTID_XDMA = 0x13, - AMDGPU_IH_CLIENTID_GRBM_CP = 0x14, - AMDGPU_IH_CLIENTID_ATS = 0x15, - AMDGPU_IH_CLIENTID_ROM_SMUIO = 0x16, - AMDGPU_IH_CLIENTID_DF = 0x17, - AMDGPU_IH_CLIENTID_VCE1 = 0x18, - AMDGPU_IH_CLIENTID_PWR = 0x19, - AMDGPU_IH_CLIENTID_UTCL2 = 0x1b, - AMDGPU_IH_CLIENTID_EA = 0x1c, - AMDGPU_IH_CLIENTID_UTCL2LOG = 0x1d, - AMDGPU_IH_CLIENTID_MP0 = 0x1e, - AMDGPU_IH_CLIENTID_MP1 = 0x1f, - - AMDGPU_IH_CLIENTID_MAX, - - AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD -}; #define AMDGPU_IH_CLIENTID_LEGACY 0 +#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX #define AMDGPU_PAGEFAULT_HASH_BITS 8 struct amdgpu_retryfault_hashtable { @@ -109,7 +70,7 @@ struct amdgpu_iv_entry { unsigned vmid_src; uint64_t timestamp; unsigned timestamp_src; - unsigned pas_id; + unsigned pasid; unsigned pasid_src; unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW]; const uint32_t *iv_entry; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 56bcd59c3399..11dfe57bd8bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -92,7 +92,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) } /* Disable *all* interrupts */ -static void amdgpu_irq_disable_all(struct amdgpu_device *adev) +void amdgpu_irq_disable_all(struct amdgpu_device *adev) { unsigned long irqflags; unsigned i, j, k; @@ -123,55 +123,6 @@ static void amdgpu_irq_disable_all(struct amdgpu_device *adev) } /** - * amdgpu_irq_preinstall - drm irq preinstall callback - * - * @dev: drm dev pointer - * - * Gets the hw ready to enable irqs (all asics). - * This function disables all interrupt sources on the GPU. - */ -void amdgpu_irq_preinstall(struct drm_device *dev) -{ - struct amdgpu_device *adev = dev->dev_private; - - /* Disable *all* interrupts */ - amdgpu_irq_disable_all(adev); - /* Clear bits */ - amdgpu_ih_process(adev); -} - -/** - * amdgpu_irq_postinstall - drm irq preinstall callback - * - * @dev: drm dev pointer - * - * Handles stuff to be done after enabling irqs (all asics). - * Returns 0 on success. - */ -int amdgpu_irq_postinstall(struct drm_device *dev) -{ - dev->max_vblank_count = 0x00ffffff; - return 0; -} - -/** - * amdgpu_irq_uninstall - drm irq uninstall callback - * - * @dev: drm dev pointer - * - * This function disables all interrupt sources on the GPU (all asics). - */ -void amdgpu_irq_uninstall(struct drm_device *dev) -{ - struct amdgpu_device *adev = dev->dev_private; - - if (adev == NULL) { - return; - } - amdgpu_irq_disable_all(adev); -} - -/** * amdgpu_irq_handler - irq handler * * @int irq, void *arg: args @@ -257,10 +208,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev) r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; - flush_work(&adev->hotplug_work); + if (!amdgpu_device_has_dc_support(adev)) + flush_work(&adev->hotplug_work); cancel_work_sync(&adev->reset_work); return r; } + adev->ddev->max_vblank_count = 0x00ffffff; DRM_DEBUG("amdgpu: irq initialized.\n"); return 0; @@ -282,7 +235,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) adev->irq.installed = false; if (adev->irq.msi_enabled) pci_disable_msi(adev->pdev); - flush_work(&adev->hotplug_work); + if (!amdgpu_device_has_dc_support(adev)) + flush_work(&adev->hotplug_work); cancel_work_sync(&adev->reset_work); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 0610cc4a9788..3375ad778edc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -78,9 +78,7 @@ struct amdgpu_irq { uint32_t srbm_soft_reset; }; -void amdgpu_irq_preinstall(struct drm_device *dev); -int amdgpu_irq_postinstall(struct drm_device *dev); -void amdgpu_irq_uninstall(struct drm_device *dev); +void amdgpu_irq_disable_all(struct amdgpu_device *adev); irqreturn_t amdgpu_irq_handler(int irq, void *arg); int amdgpu_irq_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bd6e9a40f421..e851c66cbb5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -191,7 +191,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->feature = 0; break; case AMDGPU_INFO_FW_GMC: - fw_info->ver = adev->mc.fw_version; + fw_info->ver = adev->gmc.fw_version; fw_info->feature = 0; break; case AMDGPU_INFO_FW_GFX_ME: @@ -470,9 +470,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_VRAM_GTT: { struct drm_amdgpu_info_vram_gtt vram_gtt; - vram_gtt.vram_size = adev->mc.real_vram_size; + vram_gtt.vram_size = adev->gmc.real_vram_size; vram_gtt.vram_size -= adev->vram_pin_size; - vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size; + vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size; vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size); vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size; vram_gtt.gtt_size *= PAGE_SIZE; @@ -484,17 +484,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_memory_info mem; memset(&mem, 0, sizeof(mem)); - mem.vram.total_heap_size = adev->mc.real_vram_size; + mem.vram.total_heap_size = adev->gmc.real_vram_size; mem.vram.usable_heap_size = - adev->mc.real_vram_size - adev->vram_pin_size; + adev->gmc.real_vram_size - adev->vram_pin_size; mem.vram.heap_usage = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = - adev->mc.visible_vram_size; + adev->gmc.visible_vram_size; mem.cpu_accessible_vram.usable_heap_size = - adev->mc.visible_vram_size - + adev->gmc.visible_vram_size - (adev->vram_pin_size - adev->invisible_pin_size); mem.cpu_accessible_vram.heap_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -580,11 +580,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; + vm_size -= AMDGPU_VA_RESERVED_SIZE; + + /* Older VCE FW versions are buggy and can handle only 40bits */ + if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45) + vm_size = min(vm_size, 1ULL << 40); + dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_max = min(vm_size, AMDGPU_VA_HOLE_START); - vm_size -= AMDGPU_VA_RESERVED_SIZE; if (vm_size > AMDGPU_VA_HOLE_START) { dev_info.high_va_offset = AMDGPU_VA_HOLE_END; dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size; @@ -599,8 +604,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], sizeof(adev->gfx.cu_info.bitmap)); - dev_info.vram_type = adev->mc.vram_type; - dev_info.vram_bit_width = adev->mc.vram_width; + dev_info.vram_type = adev->gmc.vram_type; + dev_info.vram_bit_width = adev->gmc.vram_width; dev_info.vce_harvest_config = adev->vce.harvest_config; dev_info.gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; @@ -758,6 +763,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -EINVAL; } break; + case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK: + /* get stable pstate sclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; + case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK: + /* get stable pstate mclk in Mhz */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 /= 100; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->sensor_info.type); @@ -805,7 +828,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv; - int r; + int r, pasid; file_priv->driver_priv = NULL; @@ -819,28 +842,25 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } - r = amdgpu_vm_init(adev, &fpriv->vm, - AMDGPU_VM_CONTEXT_GFX, 0); - if (r) { - kfree(fpriv); - goto out_suspend; + pasid = amdgpu_pasid_alloc(16); + if (pasid < 0) { + dev_warn(adev->dev, "No more PASIDs available!"); + pasid = 0; } + r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid); + if (r) + goto error_pasid; fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL); if (!fpriv->prt_va) { r = -ENOMEM; - amdgpu_vm_fini(adev, &fpriv->vm); - kfree(fpriv); - goto out_suspend; + goto error_vm; } if (amdgpu_sriov_vf(adev)) { r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); - if (r) { - amdgpu_vm_fini(adev, &fpriv->vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto error_vm; } mutex_init(&fpriv->bo_list_lock); @@ -849,6 +869,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); file_priv->driver_priv = fpriv; + goto out_suspend; + +error_vm: + amdgpu_vm_fini(adev, &fpriv->vm); + +error_pasid: + if (pasid) + amdgpu_pasid_free(pasid); + + kfree(fpriv); out_suspend: pm_runtime_mark_last_busy(dev->dev); @@ -871,6 +901,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, struct amdgpu_device *adev = dev->dev_private; struct amdgpu_fpriv *fpriv = file_priv->driver_priv; struct amdgpu_bo_list *list; + struct amdgpu_bo *pd; + unsigned int pasid; int handle; if (!fpriv) @@ -895,7 +927,13 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(adev->virt.csa_obj); } + pasid = fpriv->vm.pasid; + pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); + amdgpu_vm_fini(adev, &fpriv->vm); + if (pasid) + amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); + amdgpu_bo_unref(&pd); idr_for_each_entry(&fpriv->bo_list_handles, list, handle) amdgpu_bo_list_free(list); @@ -947,11 +985,11 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe) */ do { count = amdgpu_display_vblank_get_counter(adev, pipe); - /* Ask amdgpu_get_crtc_scanoutpos to return vpos as - * distance to start of vblank, instead of regular - * vertical scanout pos. + /* Ask amdgpu_display_get_crtc_scanoutpos to return + * vpos as distance to start of vblank, instead of + * regular vertical scanout pos. */ - stat = amdgpu_get_crtc_scanoutpos( + stat = amdgpu_display_get_crtc_scanoutpos( dev, pipe, GET_DISTANCE_TO_VBLANKSTART, &vpos, &hpos, NULL, NULL, &adev->mode_info.crtcs[pipe]->base.hwmode); @@ -992,7 +1030,7 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe) int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe) { struct amdgpu_device *adev = dev->dev_private; - int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe); + int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe); return amdgpu_irq_get(adev, &adev->crtc_irq, idx); } @@ -1008,7 +1046,7 @@ int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe) void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe) { struct amdgpu_device *adev = dev->dev_private; - int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe); + int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe); amdgpu_irq_put(adev, &adev->crtc_irq, idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 54f06c959340..d6416ee52e32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -267,8 +267,6 @@ struct amdgpu_display_funcs { void (*bandwidth_update)(struct amdgpu_device *adev); /* get frame count */ u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc); - /* wait for vblank */ - void (*vblank_wait)(struct amdgpu_device *adev, int crtc); /* set backlight level */ void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder, u8 level); @@ -352,6 +350,7 @@ struct amdgpu_mode_info { u16 firmware_flags; /* pointer to backlight encoder */ struct amdgpu_encoder *bl_encoder; + u8 bl_level; /* saved backlight level */ struct amdgpu_audio audio; /* audio stuff */ int num_crtc; /* number of crtcs */ int num_hpd; /* number of hpd pins */ @@ -552,14 +551,6 @@ struct amdgpu_connector { /* we need to mind the EDID between detect and get modes due to analog/digital/tvencoder */ struct edid *edid; - /* number of modes generated from EDID at 'dc_sink' */ - int num_modes; - /* The 'old' sink - before an HPD. - * The 'current' sink is in dc_link->sink. */ - struct dc_sink *dc_sink; - struct dc_link *dc_link; - struct dc_sink *dc_em_sink; - const struct dc_stream *stream; void *con_priv; bool dac_load_detect; bool detected_by_load; /* if the connection status was determined by load */ @@ -570,27 +561,6 @@ struct amdgpu_connector { enum amdgpu_connector_audio audio; enum amdgpu_connector_dither dither; unsigned pixelclock_for_modeset; - - struct drm_dp_mst_topology_mgr mst_mgr; - struct amdgpu_dm_dp_aux dm_dp_aux; - struct drm_dp_mst_port *port; - struct amdgpu_connector *mst_port; - struct amdgpu_encoder *mst_encoder; - struct semaphore mst_sem; - - /* TODO see if we can merge with ddc_bus or make a dm_connector */ - struct amdgpu_i2c_adapter *i2c; - - /* Monitor range limits */ - int min_vfreq ; - int max_vfreq ; - int pixel_clock_mhz; - - /*freesync caps*/ - struct mod_freesync_caps caps; - - struct mutex hpd_lock; - }; /* TODO: start to use this struct and remove same field from base one */ @@ -608,7 +578,7 @@ struct amdgpu_mst_connector { #define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \ ((em) == ATOM_ENCODER_MODE_DP_MST)) -/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */ +/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */ #define DRM_SCANOUTPOS_VALID (1 << 0) #define DRM_SCANOUTPOS_IN_VBLANK (1 << 1) #define DRM_SCANOUTPOS_ACCURATE (1 << 2) @@ -627,30 +597,31 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder, u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder); struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder); -bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux); +bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, + bool use_aux); void amdgpu_encoder_set_active_device(struct drm_encoder *encoder); -int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode); +int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, + unsigned int pipe, unsigned int flags, int *vpos, + int *hpos, ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode); -int amdgpu_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); +int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); void amdgpu_enc_destroy(struct drm_encoder *encoder); void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj); -bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode); +bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode); void amdgpu_panel_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *adjusted_mode); -int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc); +int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc); /* fbdev layer */ int amdgpu_fbdev_init(struct amdgpu_device *adev); @@ -662,15 +633,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj) int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled); /* amdgpu_display.c */ -void amdgpu_print_display_setup(struct drm_device *dev); -int amdgpu_modeset_create_props(struct amdgpu_device *adev); -int amdgpu_crtc_set_config(struct drm_mode_set *set, - struct drm_modeset_acquire_ctx *ctx); -int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc, - struct drm_framebuffer *fb, - struct drm_pending_vblank_event *event, - uint32_t page_flip_flags, uint32_t target, - struct drm_modeset_acquire_ctx *ctx); +void amdgpu_display_print_display_setup(struct drm_device *dev); +int amdgpu_display_modeset_create_props(struct amdgpu_device *adev); +int amdgpu_display_crtc_set_config(struct drm_mode_set *set, + struct drm_modeset_acquire_ctx *ctx); +int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags, uint32_t target, + struct drm_modeset_acquire_ctx *ctx); extern const struct drm_mode_config_funcs amdgpu_mode_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5c4c3e0d527b..6d08cde8443c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -36,6 +36,7 @@ #include <drm/drm_cache.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" static bool amdgpu_need_backup(struct amdgpu_device *adev) { @@ -54,8 +55,13 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + if (bo->kfd_bo) + amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_bo_kunmap(bo); + if (bo->gem_base.import_attach) + drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); if (!list_empty(&bo->shadow_list)) { @@ -83,7 +89,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) u32 c = 0; if (domain & AMDGPU_GEM_DOMAIN_VRAM) { - unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT; + unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; places[c].fpfn = 0; places[c].lpfn = 0; @@ -103,7 +109,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; if (flags & AMDGPU_GEM_CREATE_SHADOW) - places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT; + places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT; else places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_TT; @@ -169,13 +175,15 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * @size: size for the new BO * @align: alignment for the new BO * @domain: where to place it - * @bo_ptr: resulting BO + * @bo_ptr: used to initialize BOs in structures * @gpu_addr: GPU addr of the pinned BO * @cpu_addr: optional CPU address mapping * * Allocates and pins a BO for kernel internal use, and returns it still * reserved. * + * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. + * * Returns 0 on success, negative error code otherwise. */ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, @@ -187,10 +195,10 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, int r; if (!*bo_ptr) { - r = amdgpu_bo_create(adev, size, align, true, domain, + r = amdgpu_bo_create(adev, size, align, domain, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, bo_ptr); + ttm_bo_type_kernel, NULL, bo_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r); @@ -238,12 +246,14 @@ error_free: * @size: size for the new BO * @align: alignment for the new BO * @domain: where to place it - * @bo_ptr: resulting BO + * @bo_ptr: used to initialize BOs in structures * @gpu_addr: GPU addr of the pinned BO * @cpu_addr: optional CPU address mapping * * Allocates and pins a BO for kernel internal use. * + * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. + * * Returns 0 on success, negative error code otherwise. */ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, @@ -331,22 +341,19 @@ fail: return false; } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, +static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, - uint64_t init_value, struct amdgpu_bo **bo_ptr) { struct ttm_operation_ctx ctx = { - .interruptible = !kernel, + .interruptible = (type != ttm_bo_type_kernel), .no_wait_gpu = false, - .allow_reserved_eviction = true, - .resv = resv + .resv = resv, + .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT }; struct amdgpu_bo *bo; - enum ttm_bo_type type; unsigned long page_align; size_t acc_size; int r; @@ -357,13 +364,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, if (!amdgpu_bo_validate_size(adev, size, domain)) return -ENOMEM; - if (kernel) { - type = ttm_bo_type_kernel; - } else if (sg) { - type = ttm_bo_type_sg; - } else { - type = ttm_bo_type_device; - } *bo_ptr = NULL; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, @@ -372,11 +372,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - r = drm_gem_object_init(adev->ddev, &bo->gem_base, size); - if (unlikely(r)) { - kfree(bo); - return r; - } + drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); INIT_LIST_HEAD(&bo->va); bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | @@ -386,7 +382,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA); bo->allowed_domains = bo->preferred_domains; - if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) + if (type != ttm_bo_type_kernel && + bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; bo->flags = flags; @@ -423,27 +420,27 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, amdgpu_ttm_placement_from_domain(bo, domain); r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type, - &bo->placement, page_align, &ctx, NULL, - acc_size, sg, resv, &amdgpu_ttm_bo_destroy); + &bo->placement, page_align, &ctx, acc_size, + NULL, resv, &amdgpu_ttm_bo_destroy); if (unlikely(r != 0)) return r; - if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && bo->tbo.mem.mem_type == TTM_PL_VRAM && - bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT) + bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0); - if (kernel) + if (type == ttm_bo_type_kernel) bo->tbo.priority = 1; if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED && bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -480,12 +477,11 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, if (bo->shadow) return 0; - r = amdgpu_bo_do_create(adev, size, byte_align, true, - AMDGPU_GEM_DOMAIN_GTT, + r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_SHADOW, - NULL, bo->tbo.resv, 0, - &bo->shadow); + ttm_bo_type_kernel, + bo->tbo.resv, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -496,22 +492,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } -/* init_value will only take effect when flags contains - * AMDGPU_GEM_CREATE_VRAM_CLEARED. - */ -int amdgpu_bo_create(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, +int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, struct reservation_object *resv, - uint64_t init_value, struct amdgpu_bo **bo_ptr) { uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; - r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain, - parent_flags, sg, resv, init_value, bo_ptr); + r = amdgpu_bo_do_create(adev, size, byte_align, domain, + parent_flags, type, resv, bo_ptr); if (r) return r; @@ -826,31 +817,32 @@ static const char *amdgpu_vram_names[] = { "GDDR4", "GDDR5", "HBM", - "DDR3" + "DDR3", + "DDR4", }; int amdgpu_bo_init(struct amdgpu_device *adev) { /* reserve PAT memory space to WC for VRAM */ - arch_io_reserve_memtype_wc(adev->mc.aper_base, - adev->mc.aper_size); + arch_io_reserve_memtype_wc(adev->gmc.aper_base, + adev->gmc.aper_size); /* Add an MTRR for the VRAM */ - adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base, - adev->mc.aper_size); + adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base, + adev->gmc.aper_size); DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", - adev->mc.mc_vram_size >> 20, - (unsigned long long)adev->mc.aper_size >> 20); + adev->gmc.mc_vram_size >> 20, + (unsigned long long)adev->gmc.aper_size >> 20); DRM_INFO("RAM width %dbits %s\n", - adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]); + adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]); return amdgpu_ttm_init(adev); } void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); - arch_phys_wc_del(adev->mc.vram_mtrr); - arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size); + arch_phys_wc_del(adev->gmc.vram_mtrr); + arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); } int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, @@ -980,7 +972,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) size = bo->mem.num_pages << PAGE_SHIFT; offset = bo->mem.start << PAGE_SHIFT; - if ((offset + size) <= adev->mc.visible_vram_size) + if ((offset + size) <= adev->gmc.visible_vram_size) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1003,7 +995,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) offset = bo->mem.start << PAGE_SHIFT; /* this should never happen */ if (bo->mem.mem_type == TTM_PL_VRAM && - (offset + size) > adev->mc.visible_vram_size) + (offset + size) > adev->gmc.visible_vram_size) return -EINVAL; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 33615e2ea2e6..546f77cb7882 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -92,6 +92,8 @@ struct amdgpu_bo { struct list_head mn_list; struct list_head shadow_list; }; + + struct kgd_mem *kfd_bo; }; static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) @@ -201,13 +203,11 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; } -int amdgpu_bo_create(struct amdgpu_device *adev, - unsigned long size, int byte_align, - bool kernel, u32 domain, u64 flags, - struct sg_table *sg, - struct reservation_object *resv, - uint64_t init_value, - struct amdgpu_bo **bo_ptr); +int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, + int byte_align, u32 domain, + u64 flags, enum ttm_bo_type type, + struct reservation_object *resv, + struct amdgpu_bo **bo_ptr); int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, @@ -282,8 +282,6 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, struct amdgpu_sa_manager *sa_manager); int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, struct amdgpu_sa_manager *sa_manager); -int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager); int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, struct amdgpu_sa_bo **sa_bo, unsigned size, unsigned align); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 01a996c6b802..361975cf45a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -116,7 +116,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, } if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state); } else { mutex_lock(&adev->pm.mutex); adev->pm.dpm.user_state = state; @@ -316,7 +316,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, if (state != POWER_STATE_TYPE_INTERNAL_BOOT && state != POWER_STATE_TYPE_DEFAULT) { amdgpu_dpm_dispatch_task(adev, - AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL); + AMD_PP_TASK_ENABLE_USER_STATE, &state); adev->pp_force_state_enabled = true; } } @@ -360,6 +360,90 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, return count; } +static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + uint32_t parameter_size = 0; + long parameter[64]; + char buf_cpy[128]; + char *tmp_str; + char *sub_str; + const char delimiter[3] = {' ', '\n', '\0'}; + uint32_t type; + + if (count > 127) + return -EINVAL; + + if (*buf == 's') + type = PP_OD_EDIT_SCLK_VDDC_TABLE; + else if (*buf == 'm') + type = PP_OD_EDIT_MCLK_VDDC_TABLE; + else if(*buf == 'r') + type = PP_OD_RESTORE_DEFAULT_TABLE; + else if (*buf == 'c') + type = PP_OD_COMMIT_DPM_TABLE; + else + return -EINVAL; + + memcpy(buf_cpy, buf, count+1); + + tmp_str = buf_cpy; + + while (isspace(*++tmp_str)); + + while (tmp_str[0]) { + sub_str = strsep(&tmp_str, delimiter); + ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); + if (ret) + return -EINVAL; + parameter_size++; + + while (isspace(*tmp_str)) + tmp_str++; + } + + if (adev->powerplay.pp_funcs->odn_edit_dpm_table) + ret = amdgpu_dpm_odn_edit_dpm_table(adev, type, + parameter, parameter_size); + + if (ret) + return -EINVAL; + + if (type == PP_OD_COMMIT_DPM_TABLE) { + if (adev->powerplay.pp_funcs->dispatch_tasks) { + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); + return count; + } else { + return -EINVAL; + } + } + + return count; +} + +static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t size = 0; + + if (adev->powerplay.pp_funcs->print_clock_levels) { + size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); + size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); + return size; + } else { + return snprintf(buf, PAGE_SIZE, "\n"); + } + +} + static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, struct device_attribute *attr, char *buf) @@ -530,7 +614,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, amdgpu_dpm_set_sclk_od(adev, (uint32_t)value); if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); } else { adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; amdgpu_pm_compute_clocks(adev); @@ -574,7 +658,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, amdgpu_dpm_set_mclk_od(adev, (uint32_t)value); if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL); } else { adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps; amdgpu_pm_compute_clocks(adev); @@ -584,159 +668,70 @@ fail: return count; } -static ssize_t amdgpu_get_pp_power_profile(struct device *dev, - char *buf, struct amd_pp_profile *query) +static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - int ret = 0xff; - if (adev->powerplay.pp_funcs->get_power_profile_state) - ret = amdgpu_dpm_get_power_profile_state( - adev, query); + if (adev->powerplay.pp_funcs->get_power_profile_mode) + return amdgpu_dpm_get_power_profile_mode(adev, buf); - if (ret) - return ret; - - return snprintf(buf, PAGE_SIZE, - "%d %d %d %d %d\n", - query->min_sclk / 100, - query->min_mclk / 100, - query->activity_threshold, - query->up_hyst, - query->down_hyst); + return snprintf(buf, PAGE_SIZE, "\n"); } -static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev, - struct device_attribute *attr, - char *buf) -{ - struct amd_pp_profile query = {0}; - - query.type = AMD_PP_GFX_PROFILE; - - return amdgpu_get_pp_power_profile(dev, buf, &query); -} -static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev, +static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct amd_pp_profile query = {0}; - - query.type = AMD_PP_COMPUTE_PROFILE; - - return amdgpu_get_pp_power_profile(dev, buf, &query); -} - -static ssize_t amdgpu_set_pp_power_profile(struct device *dev, const char *buf, - size_t count, - struct amd_pp_profile *request) + size_t count) { + int ret = 0xff; struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; - uint32_t loop = 0; - char *sub_str, buf_cpy[128], *tmp_str; + uint32_t parameter_size = 0; + long parameter[64]; + char *sub_str, buf_cpy[128]; + char *tmp_str; + uint32_t i = 0; + char tmp[2]; + long int profile_mode = 0; const char delimiter[3] = {' ', '\n', '\0'}; - long int value; - int ret = 0xff; - - if (strncmp("reset", buf, strlen("reset")) == 0) { - if (adev->powerplay.pp_funcs->reset_power_profile_state) - ret = amdgpu_dpm_reset_power_profile_state( - adev, request); - if (ret) { - count = -EINVAL; - goto fail; - } - return count; - } - - if (strncmp("set", buf, strlen("set")) == 0) { - if (adev->powerplay.pp_funcs->set_power_profile_state) - ret = amdgpu_dpm_set_power_profile_state( - adev, request); - - if (ret) { - count = -EINVAL; - goto fail; - } - return count; - } - if (count + 1 >= 128) { - count = -EINVAL; + tmp[0] = *(buf); + tmp[1] = '\0'; + ret = kstrtol(tmp, 0, &profile_mode); + if (ret) goto fail; - } - - memcpy(buf_cpy, buf, count + 1); - tmp_str = buf_cpy; - - while (tmp_str[0]) { - sub_str = strsep(&tmp_str, delimiter); - ret = kstrtol(sub_str, 0, &value); - if (ret) { - count = -EINVAL; - goto fail; - } - switch (loop) { - case 0: - /* input unit MHz convert to dpm table unit 10KHz*/ - request->min_sclk = (uint32_t)value * 100; - break; - case 1: - /* input unit MHz convert to dpm table unit 10KHz*/ - request->min_mclk = (uint32_t)value * 100; - break; - case 2: - request->activity_threshold = (uint16_t)value; - break; - case 3: - request->up_hyst = (uint8_t)value; - break; - case 4: - request->down_hyst = (uint8_t)value; - break; - default: - break; + if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { + if (count < 2 || count > 127) + return -EINVAL; + while (isspace(*++buf)) + i++; + memcpy(buf_cpy, buf, count-i); + tmp_str = buf_cpy; + while (tmp_str[0]) { + sub_str = strsep(&tmp_str, delimiter); + ret = kstrtol(sub_str, 0, ¶meter[parameter_size]); + if (ret) { + count = -EINVAL; + goto fail; + } + parameter_size++; + while (isspace(*tmp_str)) + tmp_str++; } - - loop++; } - if (adev->powerplay.pp_funcs->set_power_profile_state) - ret = amdgpu_dpm_set_power_profile_state(adev, request); - - if (ret) - count = -EINVAL; + parameter[parameter_size] = profile_mode; + if (adev->powerplay.pp_funcs->set_power_profile_mode) + ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size); + if (!ret) + return count; fail: - return count; -} - -static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct amd_pp_profile request = {0}; - - request.type = AMD_PP_GFX_PROFILE; - - return amdgpu_set_pp_power_profile(dev, buf, count, &request); -} - -static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) -{ - struct amd_pp_profile request = {0}; - - request.type = AMD_PP_COMPUTE_PROFILE; - - return amdgpu_set_pp_power_profile(dev, buf, count, &request); + return -EINVAL; } static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); @@ -766,12 +761,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR, static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR, amdgpu_get_pp_mclk_od, amdgpu_set_pp_mclk_od); -static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR, - amdgpu_get_pp_gfx_power_profile, - amdgpu_set_pp_gfx_power_profile); -static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR, - amdgpu_get_pp_compute_power_profile, - amdgpu_set_pp_compute_power_profile); +static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR, + amdgpu_get_pp_power_profile_mode, + amdgpu_set_pp_power_profile_mode); +static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR, + amdgpu_get_pp_od_clk_voltage, + amdgpu_set_pp_od_clk_voltage); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -779,17 +774,23 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, { struct amdgpu_device *adev = dev_get_drvdata(dev); struct drm_device *ddev = adev->ddev; - int temp; + int r, temp, size = sizeof(temp); /* Can't get temperature when the card is off */ if ((adev->flags & AMD_IS_PX) && (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!adev->powerplay.pp_funcs->get_temperature) - temp = 0; - else - temp = amdgpu_dpm_get_temperature(adev); + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the temperature */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, + (void *)&temp, &size); + if (r) + return r; return snprintf(buf, PAGE_SIZE, "%d\n", temp); } @@ -834,6 +835,11 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, int err; int value; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + if (!adev->powerplay.pp_funcs->set_fan_control_mode) return -EINVAL; @@ -868,6 +874,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, int err; u32 value; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + err = kstrtou32(buf, 10, &value); if (err) return err; @@ -891,6 +902,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + if (adev->powerplay.pp_funcs->get_fan_speed_percent) { err = amdgpu_dpm_get_fan_speed_percent(adev, &speed); if (err) @@ -910,6 +926,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed); if (err) @@ -919,6 +940,175 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, return sprintf(buf, "%i\n", speed); } +static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + u32 vddgfx; + int r, size = sizeof(vddgfx); + + /* Can't get voltage when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the voltage */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, + (void *)&vddgfx, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", vddgfx); +} + +static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "vddgfx\n"); +} + +static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + u32 vddnb; + int r, size = sizeof(vddnb); + + /* only APUs have vddnb */ + if (adev->flags & AMD_IS_APU) + return -EINVAL; + + /* Can't get voltage when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the voltage */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, + (void *)&vddnb, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", vddnb); +} + +static ssize_t amdgpu_hwmon_show_vddnb_label(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "vddnb\n"); +} + +static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + struct drm_device *ddev = adev->ddev; + struct pp_gpu_power query = {0}; + int r, size = sizeof(query); + unsigned uw; + + /* Can't get power when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + /* sanity check PP is enabled */ + if (!(adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor)) + return -EINVAL; + + /* get the voltage */ + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER, + (void *)&query, &size); + if (r) + return r; + + /* convert to microwatts */ + uw = (query.average_gpu_power >> 8) * 1000000; + + return snprintf(buf, PAGE_SIZE, "%u\n", uw); +} + +static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%i\n", 0); +} + +static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + uint32_t limit = 0; + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else { + return snprintf(buf, PAGE_SIZE, "\n"); + } +} + +static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + uint32_t limit = 0; + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) { + adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false); + return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000); + } else { + return snprintf(buf, PAGE_SIZE, "\n"); + } +} + + +static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int err; + u32 value; + + err = kstrtou32(buf, 10, &value); + if (err) + return err; + + value = value / 1000000; /* convert to Watt */ + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { + err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); + if (err) + return err; + } else { + return -EINVAL; + } + + return count; +} + static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); @@ -927,6 +1117,14 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_ static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0); static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0); +static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0); +static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0); +static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0); +static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0); +static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0); static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_temp1_input.dev_attr.attr, @@ -937,6 +1135,14 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_pwm1_min.dev_attr.attr, &sensor_dev_attr_pwm1_max.dev_attr.attr, &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_in0_input.dev_attr.attr, + &sensor_dev_attr_in0_label.dev_attr.attr, + &sensor_dev_attr_in1_input.dev_attr.attr, + &sensor_dev_attr_in1_label.dev_attr.attr, + &sensor_dev_attr_power1_average.dev_attr.attr, + &sensor_dev_attr_power1_cap_max.dev_attr.attr, + &sensor_dev_attr_power1_cap_min.dev_attr.attr, + &sensor_dev_attr_power1_cap.dev_attr.attr, NULL }; @@ -947,9 +1153,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* no skipping for powerplay */ - if (adev->powerplay.cgs_device) - return effective_mode; + /* handle non-powerplay limitations */ + if (!adev->powerplay.pp_handle) { + /* Skip fan attributes if fan is not present */ + if (adev->pm.no_fan && + (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) + return 0; + /* requires powerplay */ + if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) + return 0; + } /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && @@ -961,14 +1177,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) return 0; - /* Skip fan attributes if fan is not present */ - if (adev->pm.no_fan && - (attr == &sensor_dev_attr_pwm1.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) - return 0; - /* mask fan attributes if we have no bindings for this asic to expose */ if ((!adev->powerplay.pp_funcs->get_fan_speed_percent && attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */ @@ -982,6 +1190,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ effective_mode &= ~S_IWUSR; + if ((adev->flags & AMD_IS_APU) && + (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| + attr == &sensor_dev_attr_power1_cap.dev_attr.attr)) + return 0; + /* hide max/min values if we can't both query and manage the fan */ if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && !adev->powerplay.pp_funcs->get_fan_speed_percent) && @@ -989,8 +1203,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) return 0; - /* requires powerplay */ - if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr) + /* only APUs have vddnb */ + if (!(adev->flags & AMD_IS_APU) && + (attr == &sensor_dev_attr_in1_input.dev_attr.attr || + attr == &sensor_dev_attr_in1_label.dev_attr.attr)) return 0; return effective_mode; @@ -1013,13 +1229,15 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) pm.dpm.thermal.work); /* switch to the thermal state */ enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; + int temp, size = sizeof(temp); if (!adev->pm.dpm_enabled) return; - if (adev->powerplay.pp_funcs->get_temperature) { - int temp = amdgpu_dpm_get_temperature(adev); - + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->read_sensor && + !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP, + (void *)&temp, &size)) { if (temp < adev->pm.dpm.thermal.min_temp) /* switch back the user state */ dpm_state = adev->pm.dpm.user_state; @@ -1319,9 +1537,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) if (adev->pm.dpm_enabled == 0) return 0; - if (adev->powerplay.pp_funcs->get_temperature == NULL) - return 0; - adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev, DRIVER_NAME, adev, hwmon_groups); @@ -1391,20 +1606,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } ret = device_create_file(adev->dev, - &dev_attr_pp_gfx_power_profile); + &dev_attr_pp_power_profile_mode); if (ret) { DRM_ERROR("failed to create device file " - "pp_gfx_power_profile\n"); + "pp_power_profile_mode\n"); return ret; } ret = device_create_file(adev->dev, - &dev_attr_pp_compute_power_profile); + &dev_attr_pp_od_clk_voltage); if (ret) { DRM_ERROR("failed to create device file " - "pp_compute_power_profile\n"); + "pp_od_clk_voltage\n"); return ret; } - ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -1437,9 +1651,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_pp_sclk_od); device_remove_file(adev->dev, &dev_attr_pp_mclk_od); device_remove_file(adev->dev, - &dev_attr_pp_gfx_power_profile); + &dev_attr_pp_power_profile_mode); device_remove_file(adev->dev, - &dev_attr_pp_compute_power_profile); + &dev_attr_pp_od_clk_voltage); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -1462,7 +1676,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) } if (adev->powerplay.pp_funcs->dispatch_tasks) { - amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL); + amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL); } else { mutex_lock(&adev->pm.mutex); adev->pm.dpm.new_active_crtcs = 0; @@ -1512,6 +1726,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "\t%u MHz (MCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size)) seq_printf(m, "\t%u MHz (SCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (PSTATE_SCLK)\n", value/100); + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, (void *)&value, &size)) + seq_printf(m, "\t%u MHz (PSTATE_MCLK)\n", value/100); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size)) seq_printf(m, "\t%u mV (VDDGFX)\n", value); if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c deleted file mode 100644 index 5f5aa5fddc16..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ -#include "atom.h" -#include "amdgpu.h" -#include "amd_shared.h" -#include <linux/module.h> -#include <linux/moduleparam.h> -#include "amdgpu_pm.h" -#include <drm/amdgpu_drm.h> -#include "amdgpu_powerplay.h" -#include "si_dpm.h" -#include "cik_dpm.h" -#include "vi_dpm.h" - -static int amdgpu_pp_early_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amd_powerplay *amd_pp; - int ret = 0; - - amd_pp = &(adev->powerplay); - amd_pp->pp_handle = (void *)adev; - - switch (adev->asic_type) { - case CHIP_POLARIS11: - case CHIP_POLARIS10: - case CHIP_POLARIS12: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_TOPAZ: - case CHIP_CARRIZO: - case CHIP_STONEY: - case CHIP_VEGA10: - case CHIP_RAVEN: - amd_pp->cgs_device = amdgpu_cgs_create_device(adev); - amd_pp->ip_funcs = &pp_ip_funcs; - amd_pp->pp_funcs = &pp_dpm_funcs; - break; - /* These chips don't have powerplay implemenations */ -#ifdef CONFIG_DRM_AMDGPU_SI - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - amd_pp->ip_funcs = &si_dpm_ip_funcs; - amd_pp->pp_funcs = &si_dpm_funcs; - break; -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: - if (amdgpu_dpm == -1) { - amd_pp->ip_funcs = &ci_dpm_ip_funcs; - amd_pp->pp_funcs = &ci_dpm_funcs; - } else { - amd_pp->cgs_device = amdgpu_cgs_create_device(adev); - amd_pp->ip_funcs = &pp_ip_funcs; - amd_pp->pp_funcs = &pp_dpm_funcs; - } - break; - case CHIP_KABINI: - case CHIP_MULLINS: - case CHIP_KAVERI: - amd_pp->ip_funcs = &kv_dpm_ip_funcs; - amd_pp->pp_funcs = &kv_dpm_funcs; - break; -#endif - default: - ret = -EINVAL; - break; - } - - if (adev->powerplay.ip_funcs->early_init) - ret = adev->powerplay.ip_funcs->early_init( - amd_pp->cgs_device ? amd_pp->cgs_device : - amd_pp->pp_handle); - - return ret; -} - - -static int amdgpu_pp_late_init(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->late_init) - ret = adev->powerplay.ip_funcs->late_init( - adev->powerplay.pp_handle); - - return ret; -} - -static int amdgpu_pp_sw_init(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->sw_init) - ret = adev->powerplay.ip_funcs->sw_init( - adev->powerplay.pp_handle); - - return ret; -} - -static int amdgpu_pp_sw_fini(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->sw_fini) - ret = adev->powerplay.ip_funcs->sw_fini( - adev->powerplay.pp_handle); - if (ret) - return ret; - - return ret; -} - -static int amdgpu_pp_hw_init(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) - amdgpu_ucode_init_bo(adev); - - if (adev->powerplay.ip_funcs->hw_init) - ret = adev->powerplay.ip_funcs->hw_init( - adev->powerplay.pp_handle); - - return ret; -} - -static int amdgpu_pp_hw_fini(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->hw_fini) - ret = adev->powerplay.ip_funcs->hw_fini( - adev->powerplay.pp_handle); - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) - amdgpu_ucode_fini_bo(adev); - - return ret; -} - -static void amdgpu_pp_late_fini(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->late_fini) - adev->powerplay.ip_funcs->late_fini( - adev->powerplay.pp_handle); - - if (adev->powerplay.cgs_device) - amdgpu_cgs_destroy_device(adev->powerplay.cgs_device); -} - -static int amdgpu_pp_suspend(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->suspend) - ret = adev->powerplay.ip_funcs->suspend( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_resume(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->resume) - ret = adev->powerplay.ip_funcs->resume( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_set_clockgating_state(void *handle, - enum amd_clockgating_state state) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->set_clockgating_state) - ret = adev->powerplay.ip_funcs->set_clockgating_state( - adev->powerplay.pp_handle, state); - return ret; -} - -static int amdgpu_pp_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->set_powergating_state) - ret = adev->powerplay.ip_funcs->set_powergating_state( - adev->powerplay.pp_handle, state); - return ret; -} - - -static bool amdgpu_pp_is_idle(void *handle) -{ - bool ret = true; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->is_idle) - ret = adev->powerplay.ip_funcs->is_idle( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_wait_for_idle(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->wait_for_idle) - ret = adev->powerplay.ip_funcs->wait_for_idle( - adev->powerplay.pp_handle); - return ret; -} - -static int amdgpu_pp_soft_reset(void *handle) -{ - int ret = 0; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->powerplay.ip_funcs->soft_reset) - ret = adev->powerplay.ip_funcs->soft_reset( - adev->powerplay.pp_handle); - return ret; -} - -static const struct amd_ip_funcs amdgpu_pp_ip_funcs = { - .name = "amdgpu_powerplay", - .early_init = amdgpu_pp_early_init, - .late_init = amdgpu_pp_late_init, - .sw_init = amdgpu_pp_sw_init, - .sw_fini = amdgpu_pp_sw_fini, - .hw_init = amdgpu_pp_hw_init, - .hw_fini = amdgpu_pp_hw_fini, - .late_fini = amdgpu_pp_late_fini, - .suspend = amdgpu_pp_suspend, - .resume = amdgpu_pp_resume, - .is_idle = amdgpu_pp_is_idle, - .wait_for_idle = amdgpu_pp_wait_for_idle, - .soft_reset = amdgpu_pp_soft_reset, - .set_clockgating_state = amdgpu_pp_set_clockgating_state, - .set_powergating_state = amdgpu_pp_set_powergating_state, -}; - -const struct amdgpu_ip_block_version amdgpu_pp_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_SMC, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &amdgpu_pp_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index ae9c106979d7..1c9991738477 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -26,9 +26,12 @@ #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_display.h" #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> +static const struct dma_buf_ops amdgpu_dmabuf_ops; + struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -102,59 +105,93 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, int ret; ww_mutex_lock(&resv->lock, NULL); - ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false, - AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo); - ww_mutex_unlock(&resv->lock); + ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg, + resv, &bo); if (ret) - return ERR_PTR(ret); + goto error; - bo->prime_shared_count = 1; + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) + bo->prime_shared_count = 1; + + ww_mutex_unlock(&resv->lock); return &bo->gem_base; + +error: + ww_mutex_unlock(&resv->lock); + return ERR_PTR(ret); } -int amdgpu_gem_prime_pin(struct drm_gem_object *obj) +static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, + struct device *target_dev, + struct dma_buf_attachment *attach) { + struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - long ret = 0; - - ret = amdgpu_bo_reserve(bo, false); - if (unlikely(ret != 0)) - return ret; - - /* - * Wait for all shared fences to complete before we switch to future - * use of exclusive fence on this prime shared bo. - */ - ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, - MAX_SCHEDULE_TIMEOUT); - if (unlikely(ret < 0)) { - DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); - amdgpu_bo_unreserve(bo); - return ret; + long r; + + r = drm_gem_map_attach(dma_buf, target_dev, attach); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (unlikely(r != 0)) + goto error_detach; + + + if (dma_buf->ops != &amdgpu_dmabuf_ops) { + /* + * Wait for all shared fences to complete before we switch to future + * use of exclusive fence on this prime shared bo. + */ + r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + true, false, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(r < 0)) { + DRM_DEBUG_PRIME("Fence wait failed: %li\n", r); + goto error_unreserve; + } } /* pin buffer into GTT */ - ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); - if (likely(ret == 0)) + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (r) + goto error_unreserve; + + if (dma_buf->ops != &amdgpu_dmabuf_ops) bo->prime_shared_count++; +error_unreserve: amdgpu_bo_unreserve(bo); - return ret; + +error_detach: + if (r) + drm_gem_map_detach(dma_buf, attach); + return r; } -void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) +static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) { + struct drm_gem_object *obj = dma_buf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); int ret = 0; ret = amdgpu_bo_reserve(bo, true); if (unlikely(ret != 0)) - return; + goto error; amdgpu_bo_unpin(bo); - if (bo->prime_shared_count) + if (dma_buf->ops != &amdgpu_dmabuf_ops && bo->prime_shared_count) bo->prime_shared_count--; amdgpu_bo_unreserve(bo); + +error: + drm_gem_map_detach(dma_buf, attach); } struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) @@ -164,6 +201,50 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) return bo->tbo.resv; } +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, + enum dma_data_direction direction) +{ + struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { true, false }; + u32 domain = amdgpu_display_framebuffer_domains(adev); + int ret; + bool reads = (direction == DMA_BIDIRECTIONAL || + direction == DMA_FROM_DEVICE); + + if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT)) + return 0; + + /* move to gtt */ + ret = amdgpu_bo_reserve(bo, false); + if (unlikely(ret != 0)) + return ret; + + if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) { + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + } + + amdgpu_bo_unreserve(bo); + return ret; +} + +static const struct dma_buf_ops amdgpu_dmabuf_ops = { + .attach = amdgpu_gem_map_attach, + .detach = amdgpu_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .begin_cpu_access = amdgpu_gem_begin_cpu_access, + .map = drm_gem_dmabuf_kmap, + .map_atomic = drm_gem_dmabuf_kmap_atomic, + .unmap = drm_gem_dmabuf_kunmap, + .unmap_atomic = drm_gem_dmabuf_kunmap_atomic, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; + struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags) @@ -176,7 +257,30 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, return ERR_PTR(-EPERM); buf = drm_gem_prime_export(dev, gobj, flags); - if (!IS_ERR(buf)) + if (!IS_ERR(buf)) { buf->file->f_mapping = dev->anon_inode->i_mapping; + buf->ops = &amdgpu_dmabuf_ops; + } + return buf; } + +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct drm_gem_object *obj; + + if (dma_buf->ops == &amdgpu_dmabuf_ops) { + obj = dma_buf->priv; + if (obj->dev == dev) { + /* + * Importing dmabuf exported from out own gem increases + * refcount on gem itself instead of f_count of dmabuf. + */ + drm_gem_object_get(obj); + return obj; + } + } + + return drm_gem_prime_import(dev, dma_buf); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2157d4509e84..9a75410cd576 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -51,29 +51,10 @@ static int psp_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: - psp->init_microcode = psp_v3_1_init_microcode; - psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv; - psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos; - psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf; - psp->ring_init = psp_v3_1_ring_init; - psp->ring_create = psp_v3_1_ring_create; - psp->ring_stop = psp_v3_1_ring_stop; - psp->ring_destroy = psp_v3_1_ring_destroy; - psp->cmd_submit = psp_v3_1_cmd_submit; - psp->compare_sram_data = psp_v3_1_compare_sram_data; - psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; - psp->mode1_reset = psp_v3_1_mode1_reset; + psp_v3_1_set_psp_funcs(psp); break; case CHIP_RAVEN: - psp->init_microcode = psp_v10_0_init_microcode; - psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; - psp->ring_init = psp_v10_0_ring_init; - psp->ring_create = psp_v10_0_ring_create; - psp->ring_stop = psp_v10_0_ring_stop; - psp->ring_destroy = psp_v10_0_ring_destroy; - psp->cmd_submit = psp_v10_0_cmd_submit; - psp->compare_sram_data = psp_v10_0_compare_sram_data; - psp->mode1_reset = psp_v10_0_mode1_reset; + psp_v10_0_set_psp_funcs(psp); break; default: return -EINVAL; @@ -81,6 +62,9 @@ static int psp_sw_init(void *handle) psp->adev = adev; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + ret = psp_init_microcode(psp); if (ret) { DRM_ERROR("Failed to load psp firmware!\n"); @@ -94,6 +78,9 @@ static int psp_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + release_firmware(adev->psp.sos_fw); adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); @@ -472,6 +459,9 @@ static int psp_suspend(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) + return 0; + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); @@ -512,19 +502,8 @@ failed: return ret; } -static bool psp_check_reset(void* handle) +int psp_gpu_reset(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (adev->flags & AMD_IS_APU) - return true; - - return false; -} - -static int psp_reset(void* handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; return psp_mode1_reset(&adev->psp); } @@ -571,9 +550,9 @@ const struct amd_ip_funcs psp_ip_funcs = { .suspend = psp_suspend, .resume = psp_resume, .is_idle = NULL, - .check_soft_reset = psp_check_reset, + .check_soft_reset = NULL, .wait_for_idle = NULL, - .soft_reset = psp_reset, + .soft_reset = NULL, .set_clockgating_state = psp_set_clockgating_state, .set_powergating_state = psp_set_powergating_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index ce4654550416..129209686848 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -33,6 +33,8 @@ #define PSP_ASD_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 +struct psp_context; + enum psp_ring_type { PSP_RING_TYPE__INVALID = 0, @@ -53,12 +55,8 @@ struct psp_ring uint32_t ring_size; }; -struct psp_context +struct psp_funcs { - struct amdgpu_device *adev; - struct psp_ring km_ring; - struct psp_gfx_cmd_resp *cmd; - int (*init_microcode)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); @@ -77,6 +75,15 @@ struct psp_context enum AMDGPU_UCODE_ID ucode_type); bool (*smu_reload_quirk)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp); +}; + +struct psp_context +{ + struct amdgpu_device *adev; + struct psp_ring km_ring; + struct psp_gfx_cmd_resp *cmd; + + const struct psp_funcs *funcs; /* fence buffer */ struct amdgpu_bo *fw_pri_bo; @@ -123,25 +130,25 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; -#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type)) -#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type)) -#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type)) -#define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type)) -#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type))) +#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) +#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) +#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) +#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) +#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) #define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ - (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) + (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) #define psp_compare_sram_data(psp, ucode, type) \ - (psp)->compare_sram_data((psp), (ucode), (type)) + (psp)->funcs->compare_sram_data((psp), (ucode), (type)) #define psp_init_microcode(psp) \ - ((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0) + ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0) #define psp_bootloader_load_sysdrv(psp) \ - ((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0) + ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ - ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0) + ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ - ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false) + ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) #define psp_mode1_reset(psp) \ - ((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false) + ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) extern const struct amd_ip_funcs psp_ip_funcs; @@ -151,4 +158,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; +int psp_gpu_reset(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 13044e66dcaf..d5f526f38e50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -360,6 +360,9 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) amdgpu_debugfs_ring_fini(ring); + dma_fence_put(ring->vmid_wait); + ring->vmid_wait = NULL; + ring->adev->rings[ring->idx] = NULL; } @@ -481,7 +484,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, result = 0; if (*pos < 12) { - early[0] = amdgpu_ring_get_rptr(ring); + early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; early[2] = ring->wptr & ring->buf_mask; for (i = *pos / 4; i < 3 && size; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 102dad3edf6a..1a5911882657 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -26,6 +26,7 @@ #include <drm/amdgpu_drm.h> #include <drm/gpu_scheduler.h> +#include <drm/drm_print.h> /* max number of rings */ #define AMDGPU_MAX_RINGS 18 @@ -35,8 +36,9 @@ #define AMDGPU_MAX_UVD_ENC_RINGS 2 /* some special values for the owner field */ -#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) -#define AMDGPU_FENCE_OWNER_VM ((void*)1ul) +#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) +#define AMDGPU_FENCE_OWNER_VM ((void *)1ul) +#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul) #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) @@ -128,7 +130,6 @@ struct amdgpu_ring_funcs { void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); - void (*emit_hdp_invalidate)(struct amdgpu_ring *ring); void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid, uint32_t gds_base, uint32_t gds_size, uint32_t gws_base, uint32_t gws_size, @@ -151,6 +152,8 @@ struct amdgpu_ring_funcs { void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, @@ -195,6 +198,7 @@ struct amdgpu_ring { u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; + struct dma_fence *vmid_wait; bool has_compute_vm_bug; atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 3144400435b7..fb1667b35daa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -63,21 +63,27 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) INIT_LIST_HEAD(&sa_manager->flist[i]); - r = amdgpu_bo_create(adev, size, align, true, domain, - 0, NULL, NULL, 0, &sa_manager->bo); + r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo, + &sa_manager->gpu_addr, &sa_manager->cpu_ptr); if (r) { dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r); return r; } + memset(sa_manager->cpu_ptr, 0, sa_manager->size); return r; } void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager) + struct amdgpu_sa_manager *sa_manager) { struct amdgpu_sa_bo *sa_bo, *tmp; + if (sa_manager->bo == NULL) { + dev_err(adev->dev, "no bo for sa manager\n"); + return; + } + if (!list_empty(&sa_manager->olist)) { sa_manager->hole = &sa_manager->olist, amdgpu_sa_bo_try_free(sa_manager); @@ -88,55 +94,9 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev, list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) { amdgpu_sa_bo_remove_locked(sa_bo); } - amdgpu_bo_unref(&sa_manager->bo); - sa_manager->size = 0; -} - -int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager) -{ - int r; - - if (sa_manager->bo == NULL) { - dev_err(adev->dev, "no bo for sa manager\n"); - return -EINVAL; - } - /* map the buffer */ - r = amdgpu_bo_reserve(sa_manager->bo, false); - if (r) { - dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r); - return r; - } - r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr); - if (r) { - amdgpu_bo_unreserve(sa_manager->bo); - dev_err(adev->dev, "(%d) failed to pin manager bo\n", r); - return r; - } - r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr); - memset(sa_manager->cpu_ptr, 0, sa_manager->size); - amdgpu_bo_unreserve(sa_manager->bo); - return r; -} - -int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev, - struct amdgpu_sa_manager *sa_manager) -{ - int r; - - if (sa_manager->bo == NULL) { - dev_err(adev->dev, "no bo for sa manager\n"); - return -EINVAL; - } - - r = amdgpu_bo_reserve(sa_manager->bo, true); - if (!r) { - amdgpu_bo_kunmap(sa_manager->bo); - amdgpu_bo_unpin(sa_manager->bo); - amdgpu_bo_unreserve(sa_manager->bo); - } - return r; + amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr); + sa_manager->size = 0; } static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index df65c66dc956..2d6f5ec77a68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -31,6 +31,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" struct amdgpu_sync_entry { struct hlist_node node; @@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, */ static void *amdgpu_sync_get_owner(struct dma_fence *f) { - struct drm_sched_fence *s_fence = to_drm_sched_fence(f); + struct drm_sched_fence *s_fence; + struct amdgpu_amdkfd_fence *kfd_fence; + + if (!f) + return AMDGPU_FENCE_OWNER_UNDEFINED; + s_fence = to_drm_sched_fence(f); if (s_fence) return s_fence->owner; + kfd_fence = to_amdgpu_amdkfd_fence(f); + if (kfd_fence) + return AMDGPU_FENCE_OWNER_KFD; + return AMDGPU_FENCE_OWNER_UNDEFINED; } @@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], reservation_object_held(resv)); + /* We only want to trigger KFD eviction fences on + * evict or move jobs. Skip KFD fences otherwise. + */ + fence_owner = amdgpu_sync_get_owner(f); + if (fence_owner == AMDGPU_FENCE_OWNER_KFD && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + continue; + if (amdgpu_sync_same_dev(adev, f)) { /* VM updates are only interesting * for other VM updates and moves. */ - fence_owner = amdgpu_sync_get_owner(f); if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && ((owner == AMDGPU_FENCE_OWNER_VM) != @@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit return NULL; } +/** + * amdgpu_sync_clone - clone a sync object + * + * @source: sync object to clone + * @clone: pointer to destination sync object + * + * Adds references to all unsignaled fences in @source to @clone. Also + * removes signaled fences from @source while at it. + */ +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + struct dma_fence *f; + int i, r; + + hash_for_each_safe(source->fences, i, tmp, e, node) { + f = e->fence; + if (!dma_fence_is_signaled(f)) { + r = amdgpu_sync_fence(NULL, clone, f, e->explicit); + if (r) + return r; + } else { + hash_del(&e->node); + dma_fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + } + } + + dma_fence_put(clone->last_vm_update); + clone->last_vm_update = dma_fence_get(source->last_vm_update); + + return 0; +} + int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) { struct amdgpu_sync_entry *e; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7aba38d5c9df..10cf23a57f17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index ed8c3739015b..2dbe87591f81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; + n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; @@ -59,9 +59,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_cleanup; } - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, 0, - NULL, NULL, 0, &vram_obj); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0, + ttm_bo_type_kernel, NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -80,9 +79,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) void **vram_start, **vram_end; struct dma_fence *fence = NULL; - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, 0, gtt_obj + i); + r = amdgpu_bo_create(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, 0, + ttm_bo_type_kernel, NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; @@ -142,10 +141,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) "0x%16llx/0x%16llx)\n", i, *vram_start, gart_start, (unsigned long long) - (gart_addr - adev->mc.gart_start + + (gart_addr - adev->gmc.gart_start + (void*)gart_start - gtt_map), (unsigned long long) - (vram_addr - adev->mc.vram_start + + (vram_addr - adev->gmc.vram_start + (void*)gart_start - gtt_map)); amdgpu_bo_kunmap(vram_obj); goto out_lclean_unpin; @@ -187,10 +186,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) "0x%16llx/0x%16llx)\n", i, *gart_start, vram_start, (unsigned long long) - (vram_addr - adev->mc.vram_start + + (vram_addr - adev->gmc.vram_start + (void*)vram_start - vram_map), (unsigned long long) - (gart_addr - adev->mc.gart_start + + (gart_addr - adev->gmc.gart_start + (void*)vram_start - vram_map)); amdgpu_bo_kunmap(gtt_obj[i]); goto out_lclean_unpin; @@ -200,7 +199,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(gtt_obj[i]); DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", - gart_addr - adev->mc.gart_start); + gart_addr - adev->gmc.gart_start); continue; out_lclean_unpin: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index cace7a93fc94..532263ab6e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -86,7 +86,7 @@ TRACE_EVENT(amdgpu_iv, __field(unsigned, vmid_src) __field(uint64_t, timestamp) __field(unsigned, timestamp_src) - __field(unsigned, pas_id) + __field(unsigned, pasid) __array(unsigned, src_data, 4) ), TP_fast_assign( @@ -97,16 +97,16 @@ TRACE_EVENT(amdgpu_iv, __entry->vmid_src = iv->vmid_src; __entry->timestamp = iv->timestamp; __entry->timestamp_src = iv->timestamp_src; - __entry->pas_id = iv->pas_id; + __entry->pasid = iv->pasid; __entry->src_data[0] = iv->src_data[0]; __entry->src_data[1] = iv->src_data[1]; __entry->src_data[2] = iv->src_data[2]; __entry->src_data[3] = iv->src_data[3]; ), - TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n", + TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n", __entry->client_id, __entry->src_id, __entry->ring_id, __entry->vmid, - __entry->timestamp, __entry->pas_id, + __entry->timestamp, __entry->pasid, __entry->src_data[0], __entry->src_data[1], __entry->src_data[2], __entry->src_data[3]) ); @@ -217,7 +217,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, struct amdgpu_job *job), TP_ARGS(vm, ring, job), TP_STRUCT__entry( - __field(struct amdgpu_vm *, vm) + __field(u32, pasid) __field(u32, ring) __field(u32, vmid) __field(u32, vm_hub) @@ -226,15 +226,15 @@ TRACE_EVENT(amdgpu_vm_grab_id, ), TP_fast_assign( - __entry->vm = vm; + __entry->pasid = vm->pasid; __entry->ring = ring->idx; __entry->vmid = job->vmid; __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), - TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", - __entry->vm, __entry->ring, __entry->vmid, + TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", + __entry->pasid, __entry->ring, __entry->vmid, __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) ); @@ -378,6 +378,28 @@ TRACE_EVENT(amdgpu_vm_flush, __entry->vm_hub,__entry->pd_addr) ); +DECLARE_EVENT_CLASS(amdgpu_pasid, + TP_PROTO(unsigned pasid), + TP_ARGS(pasid), + TP_STRUCT__entry( + __field(unsigned, pasid) + ), + TP_fast_assign( + __entry->pasid = pasid; + ), + TP_printk("pasid=%u", __entry->pasid) +); + +DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated, + TP_PROTO(unsigned pasid), + TP_ARGS(pasid) +); + +DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed, + TP_PROTO(unsigned pasid), + TP_ARGS(pasid) +); + TRACE_EVENT(amdgpu_bo_list_set, TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), TP_ARGS(list, bo), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d897c4c61a01..e28b73609fbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -46,6 +46,7 @@ #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -161,7 +162,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_TT: man->func = &amdgpu_gtt_mgr_func; - man->gpu_offset = adev->mc.gart_start; + man->gpu_offset = adev->gmc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; @@ -169,7 +170,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, case TTM_PL_VRAM: /* "On-card" video ram */ man->func = &amdgpu_vram_mgr_func; - man->gpu_offset = adev->mc.vram_start; + man->gpu_offset = adev->gmc.vram_start; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; @@ -203,6 +204,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, .flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM }; + if (bo->type == ttm_bo_type_sg) { + placement->num_placement = 0; + placement->num_busy_placement = 0; + return; + } + if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; placement->busy_placement = &placements; @@ -213,13 +220,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (adev->mman.buffer_funcs && - adev->mman.buffer_funcs_ring && - adev->mman.buffer_funcs_ring->ready == false) { + if (!adev->mman.buffer_funcs_enabled) { amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); - } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size && + } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { - unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; + unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; struct drm_mm_node *node = bo->mem.mm_node; unsigned long pages_left; @@ -260,6 +265,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + /* + * Don't verify access for KFD BOs. They don't have a GEM + * object associated with them. + */ + if (abo->kfd_bo) + return 0; + if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; return drm_vma_node_verify_access(&abo->gem_base.vma_node, @@ -331,7 +343,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE); - if (!ring->ready) { + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -577,12 +589,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, amdgpu_move_null(bo, new_mem); return 0; } - if (adev->mman.buffer_funcs == NULL || - adev->mman.buffer_funcs_ring == NULL || - !adev->mman.buffer_funcs_ring->ready) { - /* use memcpy */ + + if (!adev->mman.buffer_funcs_enabled) goto memcpy; - } if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { @@ -621,6 +630,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ { struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type]; struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); + struct drm_mm_node *mm_node = mem->mm_node; mem->bus.addr = NULL; mem->bus.offset = 0; @@ -638,9 +648,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; /* check if it's visible */ - if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size) + if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size) return -EINVAL; - mem->bus.base = adev->mc.aper_base; + /* Only physically contiguous buffers apply. In a contiguous + * buffer, size of the first mm_node would match the number of + * pages in ttm_mem_reg. + */ + if (adev->mman.aper_base_kaddr && + (mm_node->size == mem->num_pages)) + mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr + + mem->bus.offset; + + mem->bus.base = adev->gmc.aper_base; mem->bus.is_iomem = true; break; default: @@ -674,7 +693,6 @@ struct amdgpu_ttm_gup_task_list { struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; - struct amdgpu_device *adev; u64 offset; uint64_t userptr; struct mm_struct *usermm; @@ -832,6 +850,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void*)ttm; uint64_t flags; int r = 0; @@ -858,9 +877,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return 0; } - flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); + flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; - r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, + r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); if (r) @@ -891,7 +910,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) placement.num_busy_placement = 1; placement.busy_placement = &placements; placements.fpfn = 0; - placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT; + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | TTM_PL_FLAG_TT; @@ -937,6 +956,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { + struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; @@ -947,7 +967,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); + r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); if (r) DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", gtt->ttm.ttm.num_pages, gtt->offset); @@ -968,22 +988,20 @@ static struct ttm_backend_func amdgpu_backend_func = { .destroy = &amdgpu_ttm_backend_destroy, }; -static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev, - unsigned long size, uint32_t page_flags, - struct page *dummy_read_page) +static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, + uint32_t page_flags) { struct amdgpu_device *adev; struct amdgpu_ttm_tt *gtt; - adev = amdgpu_ttm_adev(bdev); + adev = amdgpu_ttm_adev(bo->bdev); gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL); if (gtt == NULL) { return NULL; } gtt->ttm.ttm.func = &amdgpu_backend_func; - gtt->adev = adev; - if (ttm_dma_tt_init(>t->ttm, bdev, size, page_flags, dummy_read_page)) { + if (ttm_sg_tt_init(>t->ttm, bo, page_flags)) { kfree(gtt); return NULL; } @@ -997,9 +1015,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, struct amdgpu_ttm_tt *gtt = (void *)ttm; bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG); - if (ttm->state != tt_unpopulated) - return 0; - if (gtt && gtt->userptr) { ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL); if (!ttm->sg) @@ -1012,7 +1027,8 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm, if (slave && ttm->sg) { drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages, - gtt->ttm.dma_address, ttm->num_pages); + gtt->ttm.dma_address, + ttm->num_pages); ttm->state = tt_unbound; return 0; } @@ -1170,6 +1186,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; + struct reservation_object_list *flist; + struct dma_fence *f; + int i; + + /* If bo is a KFD BO, check if the bo belongs to the current process. + * If true, then return false as any KFD process needs all its BOs to + * be resident to run successfully + */ + flist = reservation_object_get_list(bo->resv); + if (flist) { + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(bo->resv)); + if (amdkfd_fence_check_mm(f, current->mm)) + return false; + } + } switch (bo->mem.mem_type) { case TTM_PL_TT: @@ -1212,7 +1245,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); pos = (nodes->start << PAGE_SHIFT) + offset; - while (len && pos < adev->mc.mc_vram_size) { + while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; uint32_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; @@ -1298,7 +1331,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) struct ttm_operation_ctx ctx = { false, false }; int r = 0; int i; - u64 vram_size = adev->mc.visible_vram_size; + u64 vram_size = adev->gmc.visible_vram_size; u64 offset = adev->fw_vram_usage.start_offset; u64 size = adev->fw_vram_usage.size; struct amdgpu_bo *bo; @@ -1309,11 +1342,12 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev) if (adev->fw_vram_usage.size > 0 && adev->fw_vram_usage.size <= vram_size) { - r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, - PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, - &adev->fw_vram_usage.reserved_bo); + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + ttm_bo_type_kernel, NULL, + &adev->fw_vram_usage.reserved_bo); if (r) goto error_create; @@ -1387,8 +1421,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } adev->mman.initialized = true; + + /* We opt to avoid OOM on system pages allocations */ + adev->mman.bdev.no_retry = true; + r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, - adev->mc.real_vram_size >> PAGE_SHIFT); + adev->gmc.real_vram_size >> PAGE_SHIFT); if (r) { DRM_ERROR("Failed initializing VRAM heap.\n"); return r; @@ -1397,11 +1435,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Reduce size of CPU-visible VRAM if requested */ vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024; if (amdgpu_vis_vram_limit > 0 && - vis_vram_limit <= adev->mc.visible_vram_size) - adev->mc.visible_vram_size = vis_vram_limit; + vis_vram_limit <= adev->gmc.visible_vram_size) + adev->gmc.visible_vram_size = vis_vram_limit; /* Change the size here instead of the init above so only lpfn is affected */ - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); +#ifdef CONFIG_64BIT + adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, + adev->gmc.visible_vram_size); +#endif /* *The reserved vram for firmware must be pinned to the specified @@ -1412,21 +1454,21 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } - r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, NULL, NULL); if (r) return r; DRM_INFO("amdgpu: %uM of VRAM memory ready\n", - (unsigned) (adev->mc.real_vram_size / (1024 * 1024))); + (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); if (amdgpu_gtt_size == -1) { struct sysinfo si; si_meminfo(&si); gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), - adev->mc.mc_vram_size), + adev->gmc.mc_vram_size), ((uint64_t)si.totalram * si.mem_unit * 3/4)); } else @@ -1494,6 +1536,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_debugfs_fini(adev); amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) + iounmap(adev->mman.aper_base_kaddr); + adev->mman.aper_base_kaddr = NULL; ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); @@ -1509,18 +1554,30 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) DRM_INFO("amdgpu: ttm finalized\n"); } -/* this should only be called at bootup or when userspace - * isn't running */ -void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size) +/** + * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions + * + * @adev: amdgpu_device pointer + * @enable: true when we can use buffer functions. + * + * Enable/disable use of buffer functions during suspend/resume. This should + * only be called at bootup or when userspace isn't running. + */ +void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) { - struct ttm_mem_type_manager *man; + struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM]; + uint64_t size; - if (!adev->mman.initialized) + if (!adev->mman.initialized || adev->in_gpu_reset) return; - man = &adev->mman.bdev.man[TTM_PL_VRAM]; /* this just adjusts TTM size idea, which sets lpfn to the correct value */ + if (enable) + size = adev->gmc.real_vram_size; + else + size = adev->gmc.visible_vram_size; man->size = size >> PAGE_SHIFT; + adev->mman.buffer_funcs_enabled = enable; } int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) @@ -1559,7 +1616,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); - *addr = adev->mc.gart_start; + *addr = adev->gmc.gart_start; *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; @@ -1619,6 +1676,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; + if (direct_submit && !ring->ready) { + DRM_ERROR("Trying to move memory with ring turned off.\n"); + return -EINVAL; + } + max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; @@ -1677,13 +1739,12 @@ error_free: } int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint64_t src_data, + uint32_t src_data, struct reservation_object *resv, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint32_t max_bytes = 8 * - adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde; + uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct drm_mm_node *mm_node; @@ -1693,7 +1754,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct amdgpu_job *job; int r; - if (!ring->ready) { + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to clear memory with ring turned off.\n"); return -EINVAL; } @@ -1714,9 +1775,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, num_pages -= mm_node->size; ++mm_node; } - - /* num of dwords for each SDMA_OP_PTEPDE cmd */ - num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; + num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; /* for IB padding */ num_dw += 64; @@ -1741,16 +1800,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; - WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8"); - dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_vm_set_pte_pde(adev, &job->ibs[0], - dst_addr, 0, - cur_size_in_bytes >> 3, 0, - src_data); + amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, + dst_addr, cur_size_in_bytes); dst_addr += cur_size_in_bytes; byte_count -= cur_size_in_bytes; @@ -1811,14 +1866,14 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return -ENXIO; while (size) { unsigned long flags; uint32_t value; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return result; spin_lock_irqsave(&adev->mmio_idx_lock, flags); @@ -1850,14 +1905,14 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return -ENXIO; while (size) { unsigned long flags; uint32_t value; - if (*pos >= adev->mc.mc_vram_size) + if (*pos >= adev->gmc.mc_vram_size) return result; r = get_user(value, (uint32_t *)buf); @@ -1935,38 +1990,98 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif -static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf, - size_t size, loff_t *pos) +static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) { struct amdgpu_device *adev = file_inode(f)->i_private; - int r; - uint64_t phys; struct iommu_domain *dom; + ssize_t result = 0; + int r; - // always return 8 bytes - if (size != 8) - return -EINVAL; + dom = iommu_get_domain_for_dev(adev->dev); - // only accept page addresses - if (*pos & 0xFFF) - return -EINVAL; + while (size) { + phys_addr_t addr = *pos & PAGE_MASK; + loff_t off = *pos & ~PAGE_MASK; + size_t bytes = PAGE_SIZE - off; + unsigned long pfn; + struct page *p; + void *ptr; + + bytes = bytes < size ? bytes : size; + + addr = dom ? iommu_iova_to_phys(dom, addr) : addr; + + pfn = addr >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return -EPERM; + + p = pfn_to_page(pfn); + if (p->mapping != adev->mman.bdev.dev_mapping) + return -EPERM; + + ptr = kmap(p); + r = copy_to_user(buf, ptr, bytes); + kunmap(p); + if (r) + return -EFAULT; + + size -= bytes; + *pos += bytes; + result += bytes; + } + + return result; +} + +static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = file_inode(f)->i_private; + struct iommu_domain *dom; + ssize_t result = 0; + int r; dom = iommu_get_domain_for_dev(adev->dev); - if (dom) - phys = iommu_iova_to_phys(dom, *pos); - else - phys = *pos; - r = copy_to_user(buf, &phys, 8); - if (r) - return -EFAULT; + while (size) { + phys_addr_t addr = *pos & PAGE_MASK; + loff_t off = *pos & ~PAGE_MASK; + size_t bytes = PAGE_SIZE - off; + unsigned long pfn; + struct page *p; + void *ptr; + + bytes = bytes < size ? bytes : size; + + addr = dom ? iommu_iova_to_phys(dom, addr) : addr; - return 8; + pfn = addr >> PAGE_SHIFT; + if (!pfn_valid(pfn)) + return -EPERM; + + p = pfn_to_page(pfn); + if (p->mapping != adev->mman.bdev.dev_mapping) + return -EPERM; + + ptr = kmap(p); + r = copy_from_user(ptr, buf, bytes); + kunmap(p); + if (r) + return -EFAULT; + + size -= bytes; + *pos += bytes; + result += bytes; + } + + return result; } -static const struct file_operations amdgpu_ttm_iova_fops = { +static const struct file_operations amdgpu_ttm_iomem_fops = { .owner = THIS_MODULE, - .read = amdgpu_iova_to_phys_read, + .read = amdgpu_iomem_read, + .write = amdgpu_iomem_write, .llseek = default_llseek }; @@ -1979,7 +2094,7 @@ static const struct { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS { "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT }, #endif - { "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM }, + { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM }, }; #endif @@ -2001,9 +2116,9 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) if (IS_ERR(ent)) return PTR_ERR(ent); if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM) - i_size_write(ent->d_inode, adev->mc.mc_vram_size); + i_size_write(ent->d_inode, adev->gmc.mc_vram_size); else if (ttm_debugfs_entries[count].domain == TTM_PL_TT) - i_size_write(ent->d_inode, adev->mc.gart_size); + i_size_write(ent->d_inode, adev->gmc.gart_size); adev->mman.debugfs_entries[count] = ent; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 167856f6080f..6ea7de863041 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -44,6 +44,7 @@ struct amdgpu_mman { struct ttm_bo_device bdev; bool mem_global_referenced; bool initialized; + void __iomem *aper_base_kaddr; #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_entries[8]; @@ -52,6 +53,7 @@ struct amdgpu_mman { /* buffer handling */ const struct amdgpu_buffer_funcs *buffer_funcs; struct amdgpu_ring *buffer_funcs_ring; + bool buffer_funcs_enabled; struct mutex gtt_window_lock; /* Scheduler entity for buffer moves */ @@ -74,6 +76,11 @@ int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); +int amdgpu_ttm_init(struct amdgpu_device *adev); +void amdgpu_ttm_fini(struct amdgpu_device *adev); +void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, + bool enable); + int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, @@ -86,7 +93,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct reservation_object *resv, struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint64_t src_data, + uint32_t src_data, struct reservation_object *resv, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b2eae86bf906..f3c459b7c0bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -299,12 +299,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) cancel_delayed_work_sync(&adev->uvd.idle_work); - for (i = 0; i < adev->uvd.max_handles; ++i) - if (atomic_read(&adev->uvd.handles[i])) - break; + /* only valid for physical mode */ + if (adev->asic_type < CHIP_POLARIS10) { + for (i = 0; i < adev->uvd.max_handles; ++i) + if (atomic_read(&adev->uvd.handles[i])) + break; - if (i == AMDGPU_MAX_UVD_HANDLES) - return 0; + if (i == adev->uvd.max_handles) + return 0; + } size = amdgpu_bo_size(adev->uvd.vcpu_bo); ptr = adev->uvd.cpu_addr; @@ -952,37 +955,28 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, bool direct, struct dma_fence **fence) { - struct ttm_operation_ctx ctx = { true, false }; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; + struct amdgpu_device *adev = ring->adev; + struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - struct amdgpu_device *adev = ring->adev; - uint64_t addr; uint32_t data[4]; - int i, r; - - memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); + uint64_t addr; + long r; + int i; - r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); - if (r) - return r; + amdgpu_bo_kunmap(bo); + amdgpu_bo_unpin(bo); if (!ring->adev->uvd.address_64_bit) { + struct ttm_operation_ctx ctx = { true, false }; + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); amdgpu_uvd_force_into_uvd_segment(bo); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + goto err; } - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - goto err; - r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) goto err; @@ -1014,6 +1008,14 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { + r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + true, false, + msecs_to_jiffies(10)); + if (r == 0) + r = -ETIMEDOUT; + if (r < 0) + goto err_free; + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); job->fence = dma_fence_get(f); if (r) @@ -1021,17 +1023,23 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, amdgpu_job_free(job); } else { + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (r) + goto err_free; + r = amdgpu_job_submit(job, ring, &adev->uvd.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &f); if (r) goto err_free; } - ttm_eu_fence_buffer_objects(&ticket, &head, f); + amdgpu_bo_fence(bo, f, false); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); if (fence) *fence = dma_fence_get(f); - amdgpu_bo_unref(&bo); dma_fence_put(f); return 0; @@ -1040,7 +1048,8 @@ err_free: amdgpu_job_free(job); err: - ttm_eu_backoff_reservation(&ticket, &head); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -1051,31 +1060,16 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - /* stitch together an UVD create msg */ msg[0] = cpu_to_le32(0x00000de4); msg[1] = cpu_to_le32(0x00000000); @@ -1091,9 +1085,6 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = 11; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_uvd_send_msg(ring, bo, true, fence); } @@ -1101,31 +1092,16 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - /* stitch together an UVD destroy msg */ msg[0] = cpu_to_le32(0x00000de4); msg[1] = cpu_to_le32(0x00000002); @@ -1134,9 +1110,6 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = 4; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_uvd_send_msg(ring, bo, direct, fence); } @@ -1146,9 +1119,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, uvd.idle_work.work); unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring); - if (amdgpu_sriov_vf(adev)) - return; - if (fences == 0) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, false); @@ -1168,11 +1138,12 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); + bool set_clocks; if (amdgpu_sriov_vf(adev)) return; + set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); if (set_clocks) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, true); @@ -1188,7 +1159,8 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) { - schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); + if (!amdgpu_sriov_vf(ring->adev)) + schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index d274ae535530..9152478d7528 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -300,9 +300,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work) container_of(work, struct amdgpu_device, vce.idle_work.work); unsigned i, count = 0; - if (amdgpu_sriov_vf(adev)) - return; - for (i = 0; i < adev->vce.num_rings; i++) count += amdgpu_fence_count_emitted(&adev->vce.ring[i]); @@ -362,7 +359,8 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) */ void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) { - schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); + if (!amdgpu_sriov_vf(ring->adev)) + schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 0fd378ae92c3..71781267ee4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -30,6 +30,8 @@ #define AMDGPU_VCE_HARVEST_VCE0 (1 << 0) #define AMDGPU_VCE_HARVEST_VCE1 (1 << 1) +#define AMDGPU_VCE_FW_53_45 ((53 << 24) | (45 << 16)) + struct amdgpu_vce { struct amdgpu_bo *vcpu_bo; uint64_t gpu_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 837962118dbc..58e495330b38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -270,34 +270,17 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) return r; } -static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, - bool direct, struct dma_fence **fence) +static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, + struct amdgpu_bo *bo, bool direct, + struct dma_fence **fence) { - struct ttm_operation_ctx ctx = { true, false }; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head head; + struct amdgpu_device *adev = ring->adev; + struct dma_fence *f = NULL; struct amdgpu_job *job; struct amdgpu_ib *ib; - struct dma_fence *f = NULL; - struct amdgpu_device *adev = ring->adev; uint64_t addr; int i, r; - memset(&tv, 0, sizeof(tv)); - tv.bo = &bo->tbo; - - INIT_LIST_HEAD(&head); - list_add(&tv.head, &head); - - r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); - if (r) - return r; - - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - goto err; - r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) goto err; @@ -330,11 +313,12 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *b goto err_free; } - ttm_eu_fence_buffer_objects(&ticket, &head, f); + amdgpu_bo_fence(bo, f, false); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); if (fence) *fence = dma_fence_get(f); - amdgpu_bo_unref(&bo); dma_fence_put(f); return 0; @@ -343,7 +327,8 @@ err_free: amdgpu_job_free(job); err: - ttm_eu_backoff_reservation(&ticket, &head); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); return r; } @@ -351,31 +336,16 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - msg[0] = cpu_to_le32(0x00000028); msg[1] = cpu_to_le32(0x00000038); msg[2] = cpu_to_le32(0x00000001); @@ -393,9 +363,6 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = 14; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); } @@ -403,31 +370,16 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han bool direct, struct dma_fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo; + struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; - r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, 0, &bo); + r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &bo, NULL, (void **)&msg); if (r) return r; - r = amdgpu_bo_reserve(bo, false); - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - r = amdgpu_bo_kmap(bo, (void **)&msg); - if (r) { - amdgpu_bo_unreserve(bo); - amdgpu_bo_unref(&bo); - return r; - } - msg[0] = cpu_to_le32(0x00000028); msg[1] = cpu_to_le32(0x00000018); msg[2] = cpu_to_le32(0x00000000); @@ -437,9 +389,6 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = 6; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - amdgpu_bo_kunmap(bo); - amdgpu_bo_unreserve(bo); - return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index e7dfb7b44b4b..21adb1b6e5cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,7 +22,21 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define MAX_KIQ_REG_TRY 20 + +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) +{ + uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + + addr -= AMDGPU_VA_RESERVED_SIZE; + + if (addr >= AMDGPU_VA_HOLE_START) + addr |= AMDGPU_VA_HOLE_END; + + return addr; +} bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { @@ -55,14 +69,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) { /* * amdgpu_map_static_csa should be called during amdgpu_vm_init - * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE" - * to this VM, and each command submission of GFX should use this virtual - * address within META_DATA init package to support SRIOV gfx preemption. + * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command + * submission of GFX should use this virtual address within META_DATA init + * package to support SRIOV gfx preemption. */ - int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va **bo_va) { + uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry pd; @@ -90,7 +104,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR, + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); @@ -99,7 +113,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE, + r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); @@ -125,9 +139,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { - signed long r; + signed long r, cnt = 0; unsigned long flags; - uint32_t val, seq; + uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; @@ -141,18 +155,39 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld\n", r); - return ~0; + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_read; + + if (in_interrupt()) + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); } - val = adev->wb.wb[adev->virt.reg_val_offs]; - return val; + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_read; + + return adev->wb.wb[adev->virt.reg_val_offs]; + +failed_kiq_read: + pr_err("failed to read reg:%x\n", reg); + return ~0; } void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { - signed long r; + signed long r, cnt = 0; unsigned long flags; uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; @@ -168,8 +203,34 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - if (r < 1) - DRM_ERROR("wait for kiq fence error: %ld\n", r); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (adev->in_gpu_reset || in_interrupt())) + goto failed_kiq_write; + + if (in_interrupt()) + might_sleep(); + + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_write; + + return; + +failed_kiq_write: + pr_err("failed to write reg:%x\n", reg); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 6a83425aa9ed..880ac113a3a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -251,8 +251,7 @@ struct amdgpu_virt { uint32_t gim_feature; }; -#define AMDGPU_CSA_SIZE (8 * 1024) -#define AMDGPU_CSA_VADDR (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE) +#define AMDGPU_CSA_SIZE (8 * 1024) #define amdgpu_sriov_enabled(adev) \ ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) @@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void) } struct amdgpu_vm; + +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); int amdgpu_allocate_static_csa(struct amdgpu_device *adev); int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5afbc5e714d0..24474294c92a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -75,7 +75,8 @@ struct amdgpu_pte_update_params { /* indirect buffer to fill with commands */ struct amdgpu_ib *ib; /* Function which actually does the update */ - void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, + void (*func)(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); /* The next two are used during VM update by CPU @@ -257,6 +258,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) } /** + * amdgpu_vm_clear_bo - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @bo: BO to clear + * @level: level this BO is at + * + * Root PD needs to be reserved when calling this. + */ +static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo, + unsigned level, bool pte_support_ats) +{ + struct ttm_operation_ctx ctx = { true, false }; + struct dma_fence *fence = NULL; + unsigned entries, ats_entries; + struct amdgpu_ring *ring; + struct amdgpu_job *job; + uint64_t addr; + int r; + + addr = amdgpu_bo_gpu_offset(bo); + entries = amdgpu_bo_size(bo) / 8; + + if (pte_support_ats) { + if (level == adev->vm_manager.root_level) { + ats_entries = amdgpu_vm_level_shift(adev, level); + ats_entries += AMDGPU_GPU_PAGE_SHIFT; + ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + } else { + ats_entries = entries; + entries = 0; + } + } else { + ats_entries = 0; + } + + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + + r = reservation_object_reserve_shared(bo->tbo.resv); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + goto error; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto error; + + if (ats_entries) { + uint64_t ats_value; + + ats_value = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) + ats_value |= AMDGPU_PDE_PTE; + + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + ats_entries, 0, ats_value); + addr += ats_entries * 8; + } + + if (entries) + amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0, + entries, 0, 0); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + + WARN_ON(job->ibs[0].length_dw > 64); + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + amdgpu_bo_fence(bo, fence, true); + dma_fence_put(fence); + + if (bo->shadow) + return amdgpu_vm_clear_bo(adev, vm, bo->shadow, + level, pte_support_ats); + + return 0; + +error_free: + amdgpu_job_free(job); + +error: + return r; +} + +/** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * * @adev: amdgpu_device pointer @@ -270,13 +369,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt *parent, uint64_t saddr, uint64_t eaddr, - unsigned level) + unsigned level, bool ats) { unsigned shift = amdgpu_vm_level_shift(adev, level); unsigned pt_idx, from, to; - int r; u64 flags; - uint64_t init_value = 0; + int r; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -299,21 +397,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_SHADOW); - if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) - init_value |= AMDGPU_PDE_PTE; - - } - /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.base.bo->tbo.resv; @@ -323,16 +413,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, if (!entry->base.bo) { r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, level), - AMDGPU_GPU_PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - flags, - NULL, resv, init_value, &pt); + AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, flags, + ttm_bo_type_kernel, resv, &pt); if (r) return r; + r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); + if (r) { + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + return r; + } + if (vm->use_cpu_for_update) { r = amdgpu_bo_kmap(pt, NULL); if (r) { + amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt); return r; } @@ -356,7 +453,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, uint64_t sub_eaddr = (pt_idx == to) ? eaddr : ((1 << shift) - 1); r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, - sub_eaddr, level); + sub_eaddr, level, ats); if (r) return r; } @@ -379,26 +476,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size) { - uint64_t last_pfn; uint64_t eaddr; + bool ats = false; /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) return -EINVAL; eaddr = saddr + size - 1; - last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; - if (last_pfn >= adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", - last_pfn, adev->vm_manager.max_pfn); - return -EINVAL; - } + + if (vm->pte_support_ats) + ats = saddr < AMDGPU_VA_HOLE_START; saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; + if (eaddr >= adev->vm_manager.max_pfn) { + dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n", + eaddr, adev->vm_manager.max_pfn); + return -EINVAL; + } + return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, - adev->vm_manager.root_level); + adev->vm_manager.root_level, ats); } /** @@ -465,7 +565,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) { - return (adev->mc.real_vram_size == adev->mc.visible_vram_size); + return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); } /** @@ -491,14 +591,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ id->oa_base != job->oa_base || id->oa_size != job->oa_size); bool vm_flush_needed = job->vm_needs_flush; + bool pasid_mapping_needed = id->pasid != job->pasid || + !id->pasid_mapping || + !dma_fence_is_signaled(id->pasid_mapping); + struct dma_fence *fence = NULL; unsigned patch_offset = 0; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; + pasid_mapping_needed = true; } + gds_switch_needed &= !!ring->funcs->emit_gds_switch; + vm_flush_needed &= !!ring->funcs->emit_vm_flush; + pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && + ring->funcs->emit_wreg; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; @@ -508,23 +618,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && vm_flush_needed) { - struct dma_fence *fence; - + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); + } + if (pasid_mapping_needed) + amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); + + if (vm_flush_needed || pasid_mapping_needed) { r = amdgpu_fence_emit(ring, &fence); if (r) return r; + } + if (vm_flush_needed) { mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); - id->last_flush = fence; - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); + id->last_flush = dma_fence_get(fence); + id->current_gpu_reset_count = + atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } + if (pasid_mapping_needed) { + id->pasid = job->pasid; + dma_fence_put(id->pasid_mapping); + id->pasid_mapping = dma_fence_get(fence); + } + dma_fence_put(fence); + if (ring->funcs->emit_gds_switch && gds_switch_needed) { id->gds_base = job->gds_base; id->gds_size = job->gds_size; @@ -578,6 +701,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * amdgpu_vm_do_set_ptes - helper to call the right asic function * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -588,10 +712,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * to setup the page table using the DMA. */ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); if (count < 3) { @@ -608,6 +734,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -617,13 +744,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * Traces the parameters and calls the DMA function to copy the PTEs. */ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { uint64_t src = (params->src + (addr >> 12) * 8); - + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_copy_ptes(pe, src, count); amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); @@ -657,6 +785,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU * * @params: see amdgpu_pte_update_params definition + * @bo: PD/PT to update * @pe: kmap addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -666,6 +795,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * Write count number of PT/PD entries directly. */ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) @@ -673,14 +803,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, unsigned int i; uint64_t value; + pe += (unsigned long)amdgpu_bo_kptr(bo); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); for (i = 0; i < count; i++) { value = params->pages_addr ? amdgpu_vm_map_gart(params->pages_addr, addr) : addr; - amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, - i, value, flags); + amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); addr += incr; } } @@ -714,8 +846,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, struct amdgpu_vm_pt *parent, struct amdgpu_vm_pt *entry) { - struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo; - uint64_t pd_addr, shadow_addr = 0; + struct amdgpu_bo *bo = parent->base.bo, *pbo; uint64_t pde, pt, flags; unsigned level; @@ -723,29 +854,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, if (entry->huge) return; - if (vm->use_cpu_for_update) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - shadow = parent->base.bo->shadow; - if (shadow) - shadow_addr = amdgpu_bo_gpu_offset(shadow); - } - - for (level = 0, pbo = parent->base.bo->parent; pbo; ++level) + for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; level += params->adev->vm_manager.root_level; - pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_bo_gpu_offset(entry->base.bo); flags = AMDGPU_PTE_VALID; - amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags); - if (shadow) { - pde = shadow_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); - } - - pde = pd_addr + (entry - parent->entries) * 8; - params->func(params, pde, pt, 1, 0, flags); + amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); + pde = (entry - parent->entries) * 8; + if (bo->shadow) + params->func(params, bo->shadow, pde, pt, 1, 0, flags); + params->func(params, bo, pde, pt, 1, 0, flags); } /* @@ -856,7 +975,7 @@ restart: if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); } else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { @@ -870,11 +989,6 @@ restart: amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, root->tbo.resv, AMDGPU_FENCE_OWNER_VM, false); - if (root->shadow) - amdgpu_sync_resv(adev, &job->sync, - root->shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM, false); - WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &fence); @@ -946,7 +1060,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, unsigned nptes, uint64_t dst, uint64_t flags) { - uint64_t pd_addr, pde; + uint64_t pde; /* In the case of a mixed PT the PDE must point to it*/ if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && @@ -967,21 +1081,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, } entry->huge = true; - amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0, - &dst, &flags); + amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); - if (p->func == amdgpu_vm_cpu_set_ptes) { - pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo); - } else { - if (parent->base.bo->shadow) { - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow); - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); - } - pd_addr = amdgpu_bo_gpu_offset(parent->base.bo); - } - pde = pd_addr + (entry - parent->entries) * 8; - p->func(p, pde, dst, 1, 0, flags); + pde = (entry - parent->entries) * 8; + if (parent->base.bo->shadow) + p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); + p->func(p, parent->base.bo, pde, dst, 1, 0, flags); } /** @@ -1007,7 +1112,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t addr, pe_start; struct amdgpu_bo *pt; unsigned nptes; - bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); /* walk over the address space and update the page tables */ for (addr = start; addr < end; addr += nptes, @@ -1030,20 +1134,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; pt = entry->base.bo; - if (use_cpu_update) { - pe_start = (unsigned long)amdgpu_bo_kptr(pt); - } else { - if (pt->shadow) { - pe_start = amdgpu_bo_gpu_offset(pt->shadow); - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - } - pe_start = amdgpu_bo_gpu_offset(pt); - } - - pe_start += (addr & mask) * 8; - params->func(params, pe_start, dst, nptes, + pe_start = (addr & mask) * 8; + if (pt->shadow) + params->func(params, pt->shadow, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + params->func(params, pt, pe_start, dst, nptes, AMDGPU_GPU_PAGE_SIZE, flags); } @@ -1204,11 +1299,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } else { /* set page commands needed */ - ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; + ndw += ncmds * 10; /* extra commands for begin/end fragments */ - ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw - * adev->vm_manager.fragment_size; + ndw += 2 * 10 * adev->vm_manager.fragment_size; params.func = amdgpu_vm_do_set_ptes; } @@ -1457,7 +1551,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, if (vm->use_cpu_for_update) { /* Flush HDP */ mb(); - amdgpu_gart_flush_gpu_tlb(adev, 0); + amdgpu_asic_flush_hdp(adev, NULL); } spin_lock(&vm->status_lock); @@ -1485,7 +1579,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); enable = !!atomic_read(&adev->vm_manager.num_prt_users); - adev->gart.gart_funcs->set_prt(adev, enable); + adev->gmc.gmc_funcs->set_prt(adev, enable); spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); } @@ -1494,7 +1588,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) */ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) { - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) @@ -1529,7 +1623,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, { struct amdgpu_prt_cb *cb; - if (!adev->gart.gart_funcs->set_prt) + if (!adev->gmc.gmc_funcs->set_prt) return; cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); @@ -1623,16 +1717,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence) { struct amdgpu_bo_va_mapping *mapping; + uint64_t init_pte_value = 0; struct dma_fence *f = NULL; int r; - uint64_t init_pte_value = 0; while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats) + if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, @@ -2262,11 +2356,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); - uint64_t init_pde_value = 0, flags; unsigned ring_instance; struct amdgpu_ring *ring; struct drm_sched_rq *rq; unsigned long size; + uint64_t flags; int r, i; vm->va = RB_ROOT_CACHED; @@ -2295,23 +2389,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN) vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_DEFAULT_ATC - | AMDGPU_PDE_PTE; - - } - } else + } else { vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_GFX); + } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; if (vm->use_cpu_for_update) flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; else @@ -2319,9 +2409,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, AMDGPU_GEM_CREATE_SHADOW); size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - flags, NULL, NULL, init_pde_value, - &vm->root.base.bo); + r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags, + ttm_bo_type_kernel, NULL, &vm->root.base.bo); if (r) goto error_free_sched_entity; @@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, + adev->vm_manager.root_level, + vm->pte_support_ats); + if (r) + goto error_unreserve; + vm->root.base.vm = vm; list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va); list_add_tail(&vm->root.base.vm_status, &vm->evicted); @@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, return 0; +error_unreserve: + amdgpu_bo_unreserve(vm->root.base.bo); + error_free_root: amdgpu_bo_unref(&vm->root.base.bo->shadow); amdgpu_bo_unref(&vm->root.base.bo); @@ -2405,7 +2503,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev, void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; - bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; u64 fault; int i, r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 21a80f1bb2b9..e9841518343e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -28,6 +28,7 @@ #include <linux/kfifo.h> #include <linux/rbtree.h> #include <drm/gpu_scheduler.h> +#include <drm/drm_file.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -99,7 +100,7 @@ struct amdgpu_bo_list_entry; #define AMDGPU_MMHUB 1 /* hardcode that limit for now */ -#define AMDGPU_VA_RESERVED_SIZE (8ULL << 20) +#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) /* VA hole for 48bit addresses on Vega10 */ #define AMDGPU_VA_HOLE_START 0x0000800000000000ULL diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 4acca92f6a52..9aca653bec07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -89,11 +89,11 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, uint64_t start = node->start << PAGE_SHIFT; uint64_t end = (node->size + node->start) << PAGE_SHIFT; - if (start >= adev->mc.visible_vram_size) + if (start >= adev->gmc.visible_vram_size) return 0; - return (end > adev->mc.visible_vram_size ? - adev->mc.visible_vram_size : end) - start; + return (end > adev->gmc.visible_vram_size ? + adev->gmc.visible_vram_size : end) - start; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 2af26d2da127..d702fb8e3427 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -34,7 +34,7 @@ #include <linux/backlight.h> #include "bif/bif_4_1_d.h" -static u8 +u8 amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) { u8 backlight_level; @@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev) return backlight_level; } -static void +void amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev, u8 backlight_level) { diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h index 2bdec40515ce..f77cbdef679e 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h @@ -25,6 +25,11 @@ #define __ATOMBIOS_ENCODER_H__ u8 +amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev); +void +amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev, + u8 backlight_level); +u8 amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder); void amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder, diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index a0943aa8d1d3..98d1dd253596 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -65,6 +65,8 @@ MODULE_FIRMWARE("radeon/hawaii_k_smc.bin"); #define VOLTAGE_VID_OFFSET_SCALE1 625 #define VOLTAGE_VID_OFFSET_SCALE2 100 +static const struct amd_pm_funcs ci_dpm_funcs; + static const struct ci_pt_defaults defaults_hawaii_xt = { 1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000, @@ -905,7 +907,7 @@ static bool ci_dpm_vblank_too_short(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; u32 vblank_time = amdgpu_dpm_get_vblank_time(adev); - u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; + u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300; /* disable mclk switching if the refresh is >120Hz, even if the * blanking period would allow it @@ -2954,7 +2956,7 @@ static int ci_calculate_mclk_params(struct amdgpu_device *adev, mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK; mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT); - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK | MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK); mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) | @@ -3077,7 +3079,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev, (memory_clock <= pi->mclk_strobe_mode_threshold)) memory_level->StrobeEnable = 1; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) { memory_level->StrobeRatio = ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable); if (pi->mclk_edc_enable_threshold && @@ -3695,40 +3697,6 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table, return ret; } -static void ci_save_default_power_profile(struct amdgpu_device *adev) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct SMU7_Discrete_GraphicsLevel *levels = - pi->smc_state_table.GraphicsLevel; - uint32_t min_level = 0; - - pi->default_gfx_power_profile.activity_threshold = - be16_to_cpu(levels[0].ActivityLevel); - pi->default_gfx_power_profile.up_hyst = levels[0].UpH; - pi->default_gfx_power_profile.down_hyst = levels[0].DownH; - pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE; - - pi->default_compute_power_profile = pi->default_gfx_power_profile; - pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE; - - /* Optimize compute power profile: Use only highest - * 2 power levels (if more than 2 are available), Hysteresis: - * 0ms up, 5ms down - */ - if (pi->smc_state_table.GraphicsDpmLevelCount > 2) - min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2; - else if (pi->smc_state_table.GraphicsDpmLevelCount == 2) - min_level = 1; - pi->default_compute_power_profile.min_sclk = - be32_to_cpu(levels[min_level].SclkFrequency); - - pi->default_compute_power_profile.up_hyst = 0; - pi->default_compute_power_profile.down_hyst = 5; - - pi->gfx_power_profile = pi->default_gfx_power_profile; - pi->compute_power_profile = pi->default_compute_power_profile; -} - static int ci_init_smc_table(struct amdgpu_device *adev) { struct ci_power_info *pi = ci_get_pi(adev); @@ -3752,7 +3720,7 @@ static int ci_init_smc_table(struct amdgpu_device *adev) if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC) table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC; - if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5; if (ulv->supported) { @@ -3874,8 +3842,6 @@ static int ci_init_smc_table(struct amdgpu_device *adev) if (ret) return ret; - ci_save_default_power_profile(adev); - return 0; } @@ -4549,12 +4515,12 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev, for (k = 0; k < table->num_entries; k++) { table->mc_reg_table_entry[k].mc_data[j] = (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); - if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) + if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { + if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) { if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE) return -EINVAL; table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD; @@ -6277,6 +6243,7 @@ static int ci_dpm_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->powerplay.pp_funcs = &ci_dpm_funcs; ci_dpm_set_irq_funcs(adev); return 0; @@ -6639,9 +6606,10 @@ static int ci_dpm_force_clock_level(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct ci_power_info *pi = ci_get_pi(adev); - if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO | - AMD_DPM_FORCED_LEVEL_LOW | - AMD_DPM_FORCED_LEVEL_HIGH)) + if (adev->pm.dpm.forced_level != AMD_DPM_FORCED_LEVEL_MANUAL) + return -EINVAL; + + if (mask == 0) return -EINVAL; switch (type) { @@ -6662,15 +6630,15 @@ static int ci_dpm_force_clock_level(void *handle, case PP_PCIE: { uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask; - uint32_t level = 0; - while (tmp >>= 1) - level++; - - if (!pi->pcie_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter(adev, + if (!pi->pcie_dpm_key_disabled) { + if (fls(tmp) != ffs(tmp)) + amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_UnForceLevel); + else + amdgpu_ci_send_msg_to_smc_with_parameter(adev, PPSMC_MSG_PCIeDPM_ForceLevel, - level); + fls(tmp) - 1); + } break; } default: @@ -6752,222 +6720,6 @@ static int ci_dpm_set_mclk_od(void *handle, uint32_t value) return 0; } -static int ci_dpm_get_power_profile_state(void *handle, - struct amd_pp_profile *query) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi || !query) - return -EINVAL; - - if (query->type == AMD_PP_GFX_PROFILE) - memcpy(query, &pi->gfx_power_profile, - sizeof(struct amd_pp_profile)); - else if (query->type == AMD_PP_COMPUTE_PROFILE) - memcpy(query, &pi->compute_power_profile, - sizeof(struct amd_pp_profile)); - else - return -EINVAL; - - return 0; -} - -static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev, - struct amd_pp_profile *request) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_dpm_table *dpm_table = &(pi->dpm_table); - struct SMU7_Discrete_GraphicsLevel *levels = - pi->smc_state_table.GraphicsLevel; - uint32_t array = pi->dpm_table_start + - offsetof(SMU7_Discrete_DpmTable, GraphicsLevel); - uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) * - SMU7_MAX_LEVELS_GRAPHICS; - uint32_t i; - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - levels[i].ActivityLevel = - cpu_to_be16(request->activity_threshold); - levels[i].EnabledForActivity = 1; - levels[i].UpH = request->up_hyst; - levels[i].DownH = request->down_hyst; - } - - return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels, - array_size, pi->sram_end); -} - -static void ci_find_min_clock_masks(struct amdgpu_device *adev, - uint32_t *sclk_mask, uint32_t *mclk_mask, - uint32_t min_sclk, uint32_t min_mclk) -{ - struct ci_power_info *pi = ci_get_pi(adev); - struct ci_dpm_table *dpm_table = &(pi->dpm_table); - uint32_t i; - - for (i = 0; i < dpm_table->sclk_table.count; i++) { - if (dpm_table->sclk_table.dpm_levels[i].enabled && - dpm_table->sclk_table.dpm_levels[i].value >= min_sclk) - *sclk_mask |= 1 << i; - } - - for (i = 0; i < dpm_table->mclk_table.count; i++) { - if (dpm_table->mclk_table.dpm_levels[i].enabled && - dpm_table->mclk_table.dpm_levels[i].value >= min_mclk) - *mclk_mask |= 1 << i; - } -} - -static int ci_set_power_profile_state(struct amdgpu_device *adev, - struct amd_pp_profile *request) -{ - struct ci_power_info *pi = ci_get_pi(adev); - int tmp_result, result = 0; - uint32_t sclk_mask = 0, mclk_mask = 0; - - tmp_result = ci_freeze_sclk_mclk_dpm(adev); - if (tmp_result) { - DRM_ERROR("Failed to freeze SCLK MCLK DPM!"); - result = tmp_result; - } - - tmp_result = ci_populate_requested_graphic_levels(adev, - request); - if (tmp_result) { - DRM_ERROR("Failed to populate requested graphic levels!"); - result = tmp_result; - } - - tmp_result = ci_unfreeze_sclk_mclk_dpm(adev); - if (tmp_result) { - DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!"); - result = tmp_result; - } - - ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask, - request->min_sclk, request->min_mclk); - - if (sclk_mask) { - if (!pi->sclk_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter( - adev, - PPSMC_MSG_SCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask. - sclk_dpm_enable_mask & - sclk_mask); - } - - if (mclk_mask) { - if (!pi->mclk_dpm_key_disabled) - amdgpu_ci_send_msg_to_smc_with_parameter( - adev, - PPSMC_MSG_MCLKDPM_SetEnabledMask, - pi->dpm_level_enable_mask. - mclk_dpm_enable_mask & - mclk_mask); - } - - - return result; -} - -static int ci_dpm_set_power_profile_state(void *handle, - struct amd_pp_profile *request) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - int ret = -1; - - if (!pi || !request) - return -EINVAL; - - if (adev->pm.dpm.forced_level != - AMD_DPM_FORCED_LEVEL_AUTO) - return -EINVAL; - - if (request->min_sclk || - request->min_mclk || - request->activity_threshold || - request->up_hyst || - request->down_hyst) { - if (request->type == AMD_PP_GFX_PROFILE) - memcpy(&pi->gfx_power_profile, request, - sizeof(struct amd_pp_profile)); - else if (request->type == AMD_PP_COMPUTE_PROFILE) - memcpy(&pi->compute_power_profile, request, - sizeof(struct amd_pp_profile)); - else - return -EINVAL; - - if (request->type == pi->current_power_profile) - ret = ci_set_power_profile_state( - adev, - request); - } else { - /* set power profile if it exists */ - switch (request->type) { - case AMD_PP_GFX_PROFILE: - ret = ci_set_power_profile_state( - adev, - &pi->gfx_power_profile); - break; - case AMD_PP_COMPUTE_PROFILE: - ret = ci_set_power_profile_state( - adev, - &pi->compute_power_profile); - break; - default: - return -EINVAL; - } - } - - if (!ret) - pi->current_power_profile = request->type; - - return 0; -} - -static int ci_dpm_reset_power_profile_state(void *handle, - struct amd_pp_profile *request) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - - if (!pi || !request) - return -EINVAL; - - if (request->type == AMD_PP_GFX_PROFILE) { - pi->gfx_power_profile = pi->default_gfx_power_profile; - return ci_dpm_set_power_profile_state(adev, - &pi->gfx_power_profile); - } else if (request->type == AMD_PP_COMPUTE_PROFILE) { - pi->compute_power_profile = - pi->default_compute_power_profile; - return ci_dpm_set_power_profile_state(adev, - &pi->compute_power_profile); - } else - return -EINVAL; -} - -static int ci_dpm_switch_power_profile(void *handle, - enum amd_pp_profile_type type) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ci_power_info *pi = ci_get_pi(adev); - struct amd_pp_profile request = {0}; - - if (!pi) - return -EINVAL; - - if (pi->current_power_profile != type) { - request.type = type; - return ci_dpm_set_power_profile_state(adev, &request); - } - - return 0; -} - static int ci_dpm_read_sensor(void *handle, int idx, void *value, int *size) { @@ -7011,7 +6763,7 @@ static int ci_dpm_read_sensor(void *handle, int idx, } } -const struct amd_ip_funcs ci_dpm_ip_funcs = { +static const struct amd_ip_funcs ci_dpm_ip_funcs = { .name = "ci_dpm", .early_init = ci_dpm_early_init, .late_init = ci_dpm_late_init, @@ -7028,8 +6780,16 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = { .set_powergating_state = ci_dpm_set_powergating_state, }; -const struct amd_pm_funcs ci_dpm_funcs = { - .get_temperature = &ci_dpm_get_temp, +const struct amdgpu_ip_block_version ci_smu_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_SMC, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &ci_dpm_ip_funcs, +}; + +static const struct amd_pm_funcs ci_dpm_funcs = { .pre_set_power_state = &ci_dpm_pre_set_power_state, .set_power_state = &ci_dpm_set_power_state, .post_set_power_state = &ci_dpm_post_set_power_state, @@ -7053,10 +6813,6 @@ const struct amd_pm_funcs ci_dpm_funcs = { .set_mclk_od = ci_dpm_set_mclk_od, .check_state_equal = ci_check_state_equal, .get_vce_clock_state = amdgpu_get_vce_clock_state, - .get_power_profile_state = ci_dpm_get_power_profile_state, - .set_power_profile_state = ci_dpm_set_power_profile_state, - .reset_power_profile_state = ci_dpm_reset_power_profile_state, - .switch_power_profile = ci_dpm_switch_power_profile, .read_sensor = ci_dpm_read_sensor, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h index 84cbc9c45f4d..91be2996ae7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h @@ -295,13 +295,6 @@ struct ci_power_info { bool fan_is_controlled_by_smc; u32 t_min; u32 fan_ctrl_default_mode; - - /* power profile */ - struct amd_pp_profile gfx_power_profile; - struct amd_pp_profile compute_power_profile; - struct amd_pp_profile default_gfx_power_profile; - struct amd_pp_profile default_compute_power_profile; - enum amd_pp_profile_type current_power_profile; }; #define CISLANDS_VOLTAGE_CONTROL_NONE 0x0 diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 8e59e65efd44..0df22030e713 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -67,7 +67,6 @@ #include "amdgpu_dm.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_powerplay.h" #include "dce_virtual.h" /* @@ -1715,6 +1714,27 @@ static void cik_detect_hw_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } +static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1); + } +} + +static void cik_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32(mmHDP_DEBUG0, 1); + RREG32(mmHDP_DEBUG0); + } else { + amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1); + } +} + static const struct amdgpu_asic_funcs cik_asic_funcs = { .read_disabled_bios = &cik_read_disabled_bios, @@ -1726,6 +1746,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .set_uvd_clocks = &cik_set_uvd_clocks, .set_vce_clocks = &cik_set_vce_clocks, .get_config_memsize = &cik_get_config_memsize, + .flush_hdp = &cik_flush_hdp, + .invalidate_hdp = &cik_invalidate_hdp, }; static int cik_common_early_init(void *handle) @@ -1864,10 +1886,6 @@ static int cik_common_early_init(void *handle) return -EINVAL; } - adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - - amdgpu_device_get_pcie_info(adev); - return 0; } @@ -1977,7 +1995,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + if (amdgpu_dpm == -1) + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -1995,7 +2016,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + if (amdgpu_dpm == -1) + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2013,7 +2037,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2032,7 +2056,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); - amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block); + amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index c4989f51ecef..e49c6f15a0a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -24,6 +24,8 @@ #ifndef __CIK_H__ #define __CIK_H__ +#define CIK_FLUSH_GPU_TLB_NUM_WREG 3 + void cik_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int cik_set_ip_blocks(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h index c7b4349f6319..2a086610f74d 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h @@ -24,8 +24,7 @@ #ifndef __CIK_DPM_H__ #define __CIK_DPM_H__ -extern const struct amd_ip_funcs ci_dpm_ip_funcs; -extern const struct amd_ip_funcs kv_dpm_ip_funcs; -extern const struct amd_pm_funcs ci_dpm_funcs; -extern const struct amd_pm_funcs kv_dpm_funcs; +extern const struct amdgpu_ip_block_version ci_smu_ip_block; +extern const struct amdgpu_ip_block_version kv_smu_ip_block; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index d5a05c19708f..44d10c2172f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -111,7 +111,7 @@ static int cik_ih_irq_init(struct amdgpu_device *adev) cik_ih_disable_interrupts(adev); /* setup interrupt control */ - WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN @@ -281,7 +281,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vmid = (dw[2] >> 8) & 0xff; - entry->pas_id = (dw[2] >> 16) & 0xffff; + entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ adev->irq.ih.rptr += 16; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 6e8278e689b1..f48ea0dad875 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -261,13 +261,6 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring) amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ } -static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - amdgpu_ring_write(ring, mmHDP_DEBUG0); - amdgpu_ring_write(ring, 1); -} - /** * cik_sdma_ring_emit_fence - emit a fence on the DMA ring * @@ -317,7 +310,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); @@ -517,7 +510,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -885,18 +878,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - if (vmid < 8) { - amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid)); - } else { - amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8)); - } - amdgpu_ring_write(ring, pd_addr >> 12); - - /* flush TLB */ - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); - amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); - amdgpu_ring_write(ring, 1 << vmid); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2); @@ -906,6 +888,14 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ } +static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} + static void cik_enable_sdma_mgcg(struct amdgpu_device *adev, bool enable) { @@ -1279,9 +1269,9 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .set_wptr = cik_sdma_ring_set_wptr, .emit_frame_size = 6 + /* cik_sdma_ring_emit_hdp_flush */ - 3 + /* cik_sdma_ring_emit_hdp_invalidate */ + 3 + /* hdp invalidate */ 6 + /* cik_sdma_ring_emit_pipeline_sync */ - 12 + /* cik_sdma_ring_emit_vm_flush */ + CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */ 9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */ .emit_ib = cik_sdma_ring_emit_ib, @@ -1289,11 +1279,11 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync, .emit_vm_flush = cik_sdma_ring_emit_vm_flush, .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, - .emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate, .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .insert_nop = cik_sdma_ring_insert_nop, .pad_ib = cik_sdma_ring_pad_ib, + .emit_wreg = cik_sdma_ring_emit_wreg, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) @@ -1391,9 +1381,6 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { .copy_pte = cik_sdma_vm_copy_pte, .write_pte = cik_sdma_vm_write_pte, - - .set_max_nums_pte_pde = 0x1fffff >> 3, - .set_pte_pde_num_dw = 10, .set_pte_pde = cik_sdma_vm_set_pte_pde, }; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index f576e9cbbc61..960c29e17da6 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -111,7 +111,7 @@ static int cz_ih_irq_init(struct amdgpu_device *adev) cz_ih_disable_interrupts(adev); /* setup interrupt control */ - WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN @@ -260,7 +260,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; entry->vmid = (dw[2] >> 8) & 0xff; - entry->pas_id = (dw[2] >> 16) & 0xffff; + entry->pasid = (dw[2] >> 16) & 0xffff; /* wptr/rptr are in bytes! */ adev->irq.ih.rptr += 16; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index f34bc68aadfb..452f88ea46a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -190,66 +190,6 @@ static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v10_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & - CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK) - return true; - else - return false; -} - -static bool dce_v10_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v10_0_vblank_wait - vblank wait asic callback. - * - * @adev: amdgpu_device pointer - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v10_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v10_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v10_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v10_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v10_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc >= adev->mode_info.num_crtc) @@ -1205,7 +1145,7 @@ static void dce_v10_0_bandwidth_update(struct amdgpu_device *adev) u32 num_heads = 0, lb_size; int i; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2517,9 +2457,9 @@ static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = { .cursor_set2 = dce_v10_0_crtc_cursor_set2, .cursor_move = dce_v10_0_crtc_cursor_move, .gamma_set = dce_v10_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v10_0_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2537,7 +2477,8 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode) amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); dce_v10_0_vga_enable(crtc, false); /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); amdgpu_irq_update(adev, &adev->pageflip_irq, type); drm_crtc_vblank_on(crtc); @@ -2676,7 +2617,7 @@ static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; return false; } - if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) + if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) return false; if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) return false; @@ -2824,9 +2765,9 @@ static int dce_v10_0_sw_init(void *handle) adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2841,7 +2782,7 @@ static int dce_v10_0_sw_init(void *handle) } if (amdgpu_atombios_get_connector_info_from_object_table(adev)) - amdgpu_print_display_setup(adev->ddev); + amdgpu_display_print_display_setup(adev->ddev); else return -EINVAL; @@ -2921,6 +2862,11 @@ static int dce_v10_0_hw_fini(void *handle) static int dce_v10_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v10_0_hw_fini(handle); } @@ -2929,6 +2875,9 @@ static int dce_v10_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v10_0_hw_init(handle); /* turn on the BL */ @@ -3249,7 +3198,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, { unsigned crtc = entry->src_id - 1; uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); + unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, crtc); switch (entry->src_data[0]) { case 0: /* vblank */ @@ -3601,7 +3550,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { .bandwidth_update = &dce_v10_0_bandwidth_update, .vblank_get_counter = &dce_v10_0_vblank_get_counter, - .vblank_wait = &dce_v10_0_vblank_wait, .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, .hpd_sense = &dce_v10_0_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 26378bd6aba4..a7c1c584a191 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -207,66 +207,6 @@ static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v11_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & - CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK) - return true; - else - return false; -} - -static bool dce_v11_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v11_0_vblank_wait - vblank wait asic callback. - * - * @adev: amdgpu_device pointer - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v11_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc < 0 || crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v11_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v11_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v11_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v11_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc < 0 || crtc >= adev->mode_info.num_crtc) @@ -1229,7 +1169,7 @@ static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev) u32 num_heads = 0, lb_size; int i; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2592,9 +2532,9 @@ static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = { .cursor_set2 = dce_v11_0_crtc_cursor_set2, .cursor_move = dce_v11_0_crtc_cursor_move, .gamma_set = dce_v11_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v11_0_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2612,7 +2552,8 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); dce_v11_0_vga_enable(crtc, false); /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); amdgpu_irq_update(adev, &adev->pageflip_irq, type); drm_crtc_vblank_on(crtc); @@ -2779,7 +2720,7 @@ static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; return false; } - if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) + if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) return false; if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) return false; @@ -2939,9 +2880,9 @@ static int dce_v11_0_sw_init(void *handle) adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2957,7 +2898,7 @@ static int dce_v11_0_sw_init(void *handle) } if (amdgpu_atombios_get_connector_info_from_object_table(adev)) - amdgpu_print_display_setup(adev->ddev); + amdgpu_display_print_display_setup(adev->ddev); else return -EINVAL; @@ -3047,6 +2988,11 @@ static int dce_v11_0_hw_fini(void *handle) static int dce_v11_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v11_0_hw_fini(handle); } @@ -3055,6 +3001,9 @@ static int dce_v11_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v11_0_hw_init(handle); /* turn on the BL */ @@ -3368,7 +3317,8 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, { unsigned crtc = entry->src_id - 1; uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); + unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, + crtc); switch (entry->src_data[0]) { case 0: /* vblank */ @@ -3725,7 +3675,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { .bandwidth_update = &dce_v11_0_bandwidth_update, .vblank_get_counter = &dce_v11_0_vblank_get_counter, - .vblank_wait = &dce_v11_0_vblank_wait, .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, .hpd_sense = &dce_v11_0_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index bd2c4f727df6..9f67b7fd3487 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -142,64 +142,6 @@ static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & CRTC_STATUS__CRTC_V_BLANK_MASK) - return true; - else - return false; -} - -static bool dce_v6_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v6_0_wait_for_vblank - vblank wait asic callback. - * - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v6_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v6_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v6_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v6_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v6_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc >= adev->mode_info.num_crtc) @@ -1108,7 +1050,7 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev) if (!adev->mode_info.mode_config_initialized) return; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2407,9 +2349,9 @@ static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = { .cursor_set2 = dce_v6_0_crtc_cursor_set2, .cursor_move = dce_v6_0_crtc_cursor_move, .gamma_set = dce_v6_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, .destroy = dce_v6_0_crtc_destroy, - .page_flip_target = amdgpu_crtc_page_flip_target, + .page_flip_target = amdgpu_display_crtc_page_flip_target, }; static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode) @@ -2425,7 +2367,8 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode) amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE); amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE); /* Make sure VBLANK and PFLIP interrupts are still enabled */ - type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id); + type = amdgpu_display_crtc_idx_to_irq_type(adev, + amdgpu_crtc->crtc_id); amdgpu_irq_update(adev, &adev->crtc_irq, type); amdgpu_irq_update(adev, &adev->pageflip_irq, type); drm_crtc_vblank_on(crtc); @@ -2562,7 +2505,7 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; return false; } - if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) + if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode)) return false; if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode)) return false; @@ -2693,9 +2636,9 @@ static int dce_v6_0_sw_init(void *handle) adev->ddev->mode_config.max_height = 16384; adev->ddev->mode_config.preferred_depth = 24; adev->ddev->mode_config.prefer_shadow = 1; - adev->ddev->mode_config.fb_base = adev->mc.aper_base; + adev->ddev->mode_config.fb_base = adev->gmc.aper_base; - r = amdgpu_modeset_create_props(adev); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -2711,7 +2654,7 @@ static int dce_v6_0_sw_init(void *handle) ret = amdgpu_atombios_get_connector_info_from_object_table(adev); if (ret) - amdgpu_print_display_setup(adev->ddev); + amdgpu_display_print_display_setup(adev->ddev); else return -EINVAL; @@ -2787,6 +2730,11 @@ static int dce_v6_0_hw_fini(void *handle) static int dce_v6_0_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->mode_info.bl_level = + amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); + return dce_v6_0_hw_fini(handle); } @@ -2795,6 +2743,9 @@ static int dce_v6_0_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + amdgpu_atombios_encoder_set_backlight_level_to_reg(adev, + adev->mode_info.bl_level); + ret = dce_v6_0_hw_init(handle); /* turn on the BL */ @@ -2966,7 +2917,8 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev, { unsigned crtc = entry->src_id - 1; uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg); - unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc); + unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, + crtc); switch (entry->src_data[0]) { case 0: /* vblank */ @@ -3093,7 +3045,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); schedule_work(&adev->hotplug_work); - DRM_INFO("IH: HPD%d\n", hpd + 1); + DRM_DEBUG("IH: HPD%d\n", hpd + 1); } return 0; @@ -3407,7 +3359,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev, static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { .bandwidth_update = &dce_v6_0_bandwidth_update, .vblank_get_counter = &dce_v6_0_vblank_get_counter, - .vblank_wait = &dce_v6_0_vblank_wait, .backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level, .backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level, .hpd_sense = &dce_v6_0_hpd_sense, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index c008dc030687..f55422cbd77a 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -140,66 +140,6 @@ static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } -static bool dce_v8_0_is_in_vblank(struct amdgpu_device *adev, int crtc) -{ - if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & - CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK) - return true; - else - return false; -} - -static bool dce_v8_0_is_counter_moving(struct amdgpu_device *adev, int crtc) -{ - u32 pos1, pos2; - - pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); - - if (pos1 != pos2) - return true; - else - return false; -} - -/** - * dce_v8_0_vblank_wait - vblank wait asic callback. - * - * @adev: amdgpu_device pointer - * @crtc: crtc to wait for vblank on - * - * Wait for vblank on the requested crtc (evergreen+). - */ -static void dce_v8_0_vblank_wait(struct amdgpu_device *adev, int crtc) -{ - unsigned i = 100; - - if (crtc >= adev->mode_info.num_crtc) - return; - - if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK)) - return; - - /* depending on when we hit vblank, we may be close to active; if so, - * wait for another frame. - */ - while (dce_v8_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v8_0_is_counter_moving(adev, crtc)) - break; - } - } - - while (!dce_v8_0_is_in_vblank(adev, crtc)) { - if (i++ == 100) { - i = 0; - if (!dce_v8_0_is_counter_moving(adev, crtc)) - break; - } - } -} - static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc) { if (crtc >= adev->mode_info.num_crtc) @@ -1144,7 +1084,7 @@ static void dce_v8_0_bandwidth_update(struct amdgpu_device *adev) u32 num_heads = 0, lb_size; int i; - amdgpu_update_display_priority(adev); + amdgpu_display_update_priority(adev); for (i = 0; i < adev->mode_info.num_crtc; i++) { if (adev->mode_info.crtcs[i]->base.enabled) @@ -2421,9 +2361,9 @@ static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = { .cursor_set2 = dce_v8_0_crtc_cursor_set2, .cursor_move = dce_v8_0_crtc_cursor_move, .gamma_set = dce_v8_0_crtc_gamma_set, - .set_config = amdgpu_crtc_set_config, + .set_config = amdgpu_display_crtc_set_config, |