summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/vc4
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2019-03-25 11:05:11 +0100
committerDaniel Vetter <daniel.vetter@ffwll.ch>2019-03-25 11:05:12 +0100
commit0bec6219e5a0cf2dd17716949a7592807e10f3d7 (patch)
tree3eabbc70c5d9c053fbdc269bc09bf622b6ad1400 /drivers/gpu/drm/vc4
parent535f6f5d7b7f7b3127c1c8172ff0504260d14f45 (diff)
parentff01e6971ecd9ba6a9c0538c46d713f38a751f11 (diff)
Merge tag 'drm-misc-next-2019-03-21' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
drm-misc-next for 5.2: UAPI Changes: - Add Colorspace connector property (Uma) - fourcc: Several new YUV formats from ARM (Brian & Ayan) - fourcc: Fix merge conflicts between new formats above and Swati's that went in via topic/hdr-formats-2019-03-07 branch (Maarten) Cross-subsystem Changes: - Typed component support via topic/component-typed-2019-02-11 (Maxime/Daniel) Core Changes: - Improve component helper documentation (Daniel) - Avoid calling drm_dev_unregister() twice on unplugged devices (Noralf) - Add device managed (devm) drm_device init function (Noralf) - Graduate TINYDRM_MODE to DRM_SIMPLE_MODE in core (Noralf) - Move MIPI/DSI rate control params computation into core from i915 (David) - Add support for shmem backed gem objects (Noralf) Driver Changes: - various: Use of_node_name_eq for node name comparisons (Rob Herring) - sun4i: Add DSI burst mode support (Konstantin) - panel: Add Ronbo RB070D30 MIPI/DSI panel support (Konstantin) - virtio: A few prime improvements (Gerd) - tinydrm: Remove tinydrm_device (Noralf) - vc4: Add load tracker to driver to detect underflow in atomic check (Boris) - vboxvideo: Move it out of staging \o/ (Hans) - v3d: Add support for V3D v4.2 (Eric) Cc: Konstantin Sudakov <k.sudakov@integrasources.com> Cc: Rob Herring <robh@kernel.org> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Maxime Ripard <maxime.ripard@bootlin.com> Cc: Uma Shankar <uma.shankar@intel.com> Cc: Noralf Trønnes <noralf@tronnes.org> Cc: Gerd Hoffmann <kraxel@redhat.com> Cc: David Francis <David.Francis@amd.com> Cc: Boris Brezillon <boris.brezillon@bootlin.com> Cc: Eric Anholt <eric@anholt.net> Cc: Hans de Goede <hdegoede@redhat.com> Cc: Brian Starkey <brian.starkey@arm.com> Cc: Ayan Kumar Halder <ayan.halder@arm.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> From: Sean Paul <sean@poorly.run> Link: https://patchwork.freedesktop.org/patch/msgid/20190321170805.GA50145@art_vandelay
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r--drivers/gpu/drm/vc4/vc4_bo.c15
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c8
-rw-r--r--drivers/gpu/drm/vc4/vc4_debugfs.c10
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c4
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h30
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c18
-rw-r--r--drivers/gpu/drm/vc4/vc4_hvs.c95
-rw-r--r--drivers/gpu/drm/vc4/vc4_kms.c122
-rw-r--r--drivers/gpu/drm/vc4/vc4_plane.c59
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h51
-rw-r--r--drivers/gpu/drm/vc4/vc4_txp.c3
11 files changed, 344 insertions, 71 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 8dcce7182bb7..92e3f98d8478 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -201,8 +201,6 @@ static void vc4_bo_destroy(struct vc4_bo *bo)
bo->validated_shader = NULL;
}
- reservation_object_fini(&bo->_resv);
-
drm_gem_cma_free_object(obj);
}
@@ -427,8 +425,6 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++;
vc4->bo_labels[VC4_BO_TYPE_KERNEL].size_allocated += size;
mutex_unlock(&vc4->bo_lock);
- bo->resv = &bo->_resv;
- reservation_object_init(bo->resv);
return &bo->base.base;
}
@@ -684,13 +680,6 @@ static void vc4_bo_cache_time_timer(struct timer_list *t)
schedule_work(&vc4->bo_cache.time_work);
}
-struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj)
-{
- struct vc4_bo *bo = to_vc4_bo(obj);
-
- return bo->resv;
-}
-
struct dma_buf *
vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
{
@@ -822,14 +811,12 @@ vc4_prime_import_sg_table(struct drm_device *dev,
struct sg_table *sgt)
{
struct drm_gem_object *obj;
- struct vc4_bo *bo;
obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
if (IS_ERR(obj))
return obj;
- bo = to_vc4_bo(obj);
- bo->resv = attach->dmabuf->resv;
+ obj->resv = attach->dmabuf->resv;
return obj;
}
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 730008d3da76..64c964b7c577 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -834,6 +834,14 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
vc4_crtc->event = NULL;
drm_crtc_vblank_put(crtc);
+
+ /* Wait for the page flip to unmask the underrun to ensure that
+ * the display list was updated by the hardware. Before that
+ * happens, the HVS will be using the previous display list with
+ * the CRTC and encoder already reconfigured, leading to
+ * underruns. This can be seen when reconfiguring the CRTC.
+ */
+ vc4_hvs_unmask_underrun(dev, vc4_crtc->channel);
}
spin_unlock_irqrestore(&dev->event_lock, flags);
}
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index 7a0003de71ab..59cdad89f844 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -23,6 +23,7 @@ static const struct drm_info_list vc4_debugfs_list[] = {
{"vec_regs", vc4_vec_debugfs_regs, 0},
{"txp_regs", vc4_txp_debugfs_regs, 0},
{"hvs_regs", vc4_hvs_debugfs_regs, 0},
+ {"hvs_underrun", vc4_hvs_debugfs_underrun, 0},
{"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0},
{"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1},
{"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2},
@@ -35,6 +36,15 @@ static const struct drm_info_list vc4_debugfs_list[] = {
int
vc4_debugfs_init(struct drm_minor *minor)
{
+ struct vc4_dev *vc4 = to_vc4_dev(minor->dev);
+ struct dentry *dentry;
+
+ dentry = debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR,
+ minor->debugfs_root,
+ &vc4->load_tracker_enabled);
+ if (!dentry)
+ return -ENOMEM;
+
return drm_debugfs_create_files(vc4_debugfs_list, VC4_DEBUGFS_ENTRIES,
minor->debugfs_root, minor);
}
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 5fcd2f0da7f7..4daf44fd4548 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -200,7 +200,6 @@ static struct drm_driver vc4_drm_driver = {
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = drm_gem_prime_import,
.gem_prime_export = vc4_prime_export,
- .gem_prime_res_obj = vc4_prime_res_obj,
.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
.gem_prime_import_sg_table = vc4_prime_import_sg_table,
.gem_prime_vmap = vc4_prime_vmap,
@@ -287,7 +286,7 @@ static int vc4_drm_bind(struct device *dev)
vc4_kms_load(drm);
- drm_fbdev_generic_setup(drm, 32);
+ drm_fbdev_generic_setup(drm, 16);
return 0;
@@ -312,6 +311,7 @@ static void vc4_drm_unbind(struct device *dev)
drm_mode_config_cleanup(drm);
+ drm_atomic_private_obj_fini(&vc4->load_tracker);
drm_atomic_private_obj_fini(&vc4->ctm_manager);
drm_dev_put(drm);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 2c635f001c71..7a3c093e7443 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -7,7 +7,6 @@
*/
#include <linux/mm_types.h>
-#include <linux/reservation.h>
#include <drm/drmP.h>
#include <drm/drm_util.h>
#include <drm/drm_encoder.h>
@@ -185,10 +184,20 @@ struct vc4_dev {
/* Bitmask of the current bin_alloc used for overflow memory. */
uint32_t bin_alloc_overflow;
+ /* Incremented when an underrun error happened after an atomic commit.
+ * This is particularly useful to detect when a specific modeset is too
+ * demanding in term of memory or HVS bandwidth which is hard to guess
+ * at atomic check time.
+ */
+ atomic_t underrun;
+
struct work_struct overflow_mem_work;
int power_refcount;
+ /* Set to true when the load tracker is active. */
+ bool load_tracker_enabled;
+
/* Mutex controlling the power refcount. */
struct mutex power_lock;
@@ -201,6 +210,7 @@ struct vc4_dev {
struct drm_modeset_lock ctm_state_lock;
struct drm_private_obj ctm_manager;
+ struct drm_private_obj load_tracker;
};
static inline struct vc4_dev *
@@ -240,10 +250,6 @@ struct vc4_bo {
*/
struct vc4_validated_shader_info *validated_shader;
- /* normally (resv == &_resv) except for imported bo's */
- struct reservation_object *resv;
- struct reservation_object _resv;
-
/* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i
* for user-allocated labels.
*/
@@ -376,6 +382,16 @@ struct vc4_plane_state {
* when async update is not possible.
*/
bool dlist_initialized;
+
+ /* Load of this plane on the HVS block. The load is expressed in HVS
+ * cycles/sec.
+ */
+ u64 hvs_load;
+
+ /* Memory bandwidth needed for this plane. This is expressed in
+ * bytes/sec.
+ */
+ u64 membus_load;
};
static inline struct vc4_plane_state *
@@ -685,7 +701,6 @@ int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
vm_fault_t vc4_fault(struct vm_fault *vmf);
int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
-struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach,
@@ -773,6 +788,9 @@ void vc4_irq_reset(struct drm_device *dev);
extern struct platform_driver vc4_hvs_driver;
void vc4_hvs_dump_state(struct drm_device *dev);
int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused);
+int vc4_hvs_debugfs_underrun(struct seq_file *m, void *unused);
+void vc4_hvs_unmask_underrun(struct drm_device *dev, int channel);
+void vc4_hvs_mask_underrun(struct drm_device *dev, int channel);
/* vc4_kms.c */
int vc4_kms_load(struct drm_device *dev);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index aea2b8dfec17..5ee5bf7fedf7 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -536,7 +536,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
bo = to_vc4_bo(&exec->bo[i]->base);
bo->seqno = seqno;
- reservation_object_add_shared_fence(bo->resv, exec->fence);
+ reservation_object_add_shared_fence(bo->base.base.resv, exec->fence);
}
list_for_each_entry(bo, &exec->unref_list, unref_head) {
@@ -547,7 +547,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
bo->write_seqno = seqno;
- reservation_object_add_excl_fence(bo->resv, exec->fence);
+ reservation_object_add_excl_fence(bo->base.base.resv, exec->fence);
}
}
@@ -559,7 +559,7 @@ vc4_unlock_bo_reservations(struct drm_device *dev,
int i;
for (i = 0; i < exec->bo_count; i++) {
- struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+ struct drm_gem_object *bo = &exec->bo[i]->base;
ww_mutex_unlock(&bo->resv->lock);
}
@@ -581,13 +581,13 @@ vc4_lock_bo_reservations(struct drm_device *dev,
{
int contended_lock = -1;
int i, ret;
- struct vc4_bo *bo;
+ struct drm_gem_object *bo;
ww_acquire_init(acquire_ctx, &reservation_ww_class);
retry:
if (contended_lock != -1) {
- bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+ bo = &exec->bo[contended_lock]->base;
ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
acquire_ctx);
if (ret) {
@@ -600,19 +600,19 @@ retry:
if (i == contended_lock)
continue;
- bo = to_vc4_bo(&exec->bo[i]->base);
+ bo = &exec->bo[i]->base;
ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
if (ret) {
int j;
for (j = 0; j < i; j++) {
- bo = to_vc4_bo(&exec->bo[j]->base);
+ bo = &exec->bo[j]->base;
ww_mutex_unlock(&bo->resv->lock);
}
if (contended_lock != -1 && contended_lock >= i) {
- bo = to_vc4_bo(&exec->bo[contended_lock]->base);
+ bo = &exec->bo[contended_lock]->base;
ww_mutex_unlock(&bo->resv->lock);
}
@@ -633,7 +633,7 @@ retry:
* before we commit the CL to the hardware.
*/
for (i = 0; i < exec->bo_count; i++) {
- bo = to_vc4_bo(&exec->bo[i]->base);
+ bo = &exec->bo[i]->base;
ret = reservation_object_reserve_shared(bo->resv, 1);
if (ret) {
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 5d8c749c9749..918e71256ecc 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -22,6 +22,7 @@
* each CRTC.
*/
+#include <drm/drm_atomic_helper.h>
#include <linux/component.h>
#include "vc4_drv.h"
#include "vc4_regs.h"
@@ -102,6 +103,18 @@ int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused)
return 0;
}
+
+int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ drm_printf(&p, "%d\n", atomic_read(&vc4->underrun));
+
+ return 0;
+}
#endif
/* The filter kernel is composed of dwords each containing 3 9-bit
@@ -166,6 +179,67 @@ static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
return 0;
}
+void vc4_hvs_mask_underrun(struct drm_device *dev, int channel)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ u32 dispctrl = HVS_READ(SCALER_DISPCTRL);
+
+ dispctrl &= ~SCALER_DISPCTRL_DSPEISLUR(channel);
+
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+}
+
+void vc4_hvs_unmask_underrun(struct drm_device *dev, int channel)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ u32 dispctrl = HVS_READ(SCALER_DISPCTRL);
+
+ dispctrl |= SCALER_DISPCTRL_DSPEISLUR(channel);
+
+ HVS_WRITE(SCALER_DISPSTAT,
+ SCALER_DISPSTAT_EUFLOW(channel));
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+}
+
+static void vc4_hvs_report_underrun(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ atomic_inc(&vc4->underrun);
+ DRM_DEV_ERROR(dev->dev, "HVS underrun\n");
+}
+
+static irqreturn_t vc4_hvs_irq_handler(int irq, void *data)
+{
+ struct drm_device *dev = data;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ irqreturn_t irqret = IRQ_NONE;
+ int channel;
+ u32 control;
+ u32 status;
+
+ status = HVS_READ(SCALER_DISPSTAT);
+ control = HVS_READ(SCALER_DISPCTRL);
+
+ for (channel = 0; channel < SCALER_CHANNELS_COUNT; channel++) {
+ /* Interrupt masking is not always honored, so check it here. */
+ if (status & SCALER_DISPSTAT_EUFLOW(channel) &&
+ control & SCALER_DISPCTRL_DSPEISLUR(channel)) {
+ vc4_hvs_mask_underrun(dev, channel);
+ vc4_hvs_report_underrun(dev);
+
+ irqret = IRQ_HANDLED;
+ }
+ }
+
+ /* Clear every per-channel interrupt flag. */
+ HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_IRQMASK(0) |
+ SCALER_DISPSTAT_IRQMASK(1) |
+ SCALER_DISPSTAT_IRQMASK(2));
+
+ return irqret;
+}
+
static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
{
struct platform_device *pdev = to_platform_device(dev);
@@ -219,15 +293,36 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
dispctrl = HVS_READ(SCALER_DISPCTRL);
dispctrl |= SCALER_DISPCTRL_ENABLE;
+ dispctrl |= SCALER_DISPCTRL_DISPEIRQ(0) |
+ SCALER_DISPCTRL_DISPEIRQ(1) |
+ SCALER_DISPCTRL_DISPEIRQ(2);
/* Set DSP3 (PV1) to use HVS channel 2, which would otherwise
* be unused.
*/
dispctrl &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
+ dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
+ SCALER_DISPCTRL_SLVWREIRQ |
+ SCALER_DISPCTRL_SLVRDEIRQ |
+ SCALER_DISPCTRL_DSPEIEOF(0) |
+ SCALER_DISPCTRL_DSPEIEOF(1) |
+ SCALER_DISPCTRL_DSPEIEOF(2) |
+ SCALER_DISPCTRL_DSPEIEOLN(0) |
+ SCALER_DISPCTRL_DSPEIEOLN(1) |
+ SCALER_DISPCTRL_DSPEIEOLN(2) |
+ SCALER_DISPCTRL_DSPEISLUR(0) |
+ SCALER_DISPCTRL_DSPEISLUR(1) |
+ SCALER_DISPCTRL_DSPEISLUR(2) |
+ SCALER_DISPCTRL_SCLEIRQ);
dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX);
HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+ ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+ vc4_hvs_irq_handler, 0, "vc4 hvs", drm);
+ if (ret)
+ return ret;
+
return 0;
}
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 91b8c72ff361..5160cad25fce 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -34,6 +34,18 @@ static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv)
return container_of(priv, struct vc4_ctm_state, base);
}
+struct vc4_load_tracker_state {
+ struct drm_private_state base;
+ u64 hvs_load;
+ u64 membus_load;
+};
+
+static struct vc4_load_tracker_state *
+to_vc4_load_tracker_state(struct drm_private_state *priv)
+{
+ return container_of(priv, struct vc4_load_tracker_state, base);
+}
+
static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state,
struct drm_private_obj *manager)
{
@@ -138,6 +150,16 @@ vc4_atomic_complete_commit(struct drm_atomic_state *state)
{
struct drm_device *dev = state->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_crtc *vc4_crtc;
+ int i;
+
+ for (i = 0; i < dev->mode_config.num_crtc; i++) {
+ if (!state->crtcs[i].ptr || !state->crtcs[i].commit)
+ continue;
+
+ vc4_crtc = to_vc4_crtc(state->crtcs[i].ptr);
+ vc4_hvs_mask_underrun(dev, vc4_crtc->channel);
+ }
drm_atomic_helper_wait_for_fences(dev, state, false);
@@ -385,6 +407,85 @@ vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
return 0;
}
+static int vc4_load_tracker_atomic_check(struct drm_atomic_state *state)
+{
+ struct drm_plane_state *old_plane_state, *new_plane_state;
+ struct vc4_dev *vc4 = to_vc4_dev(state->dev);
+ struct vc4_load_tracker_state *load_state;
+ struct drm_private_state *priv_state;
+ struct drm_plane *plane;
+ int i;
+
+ priv_state = drm_atomic_get_private_obj_state(state,
+ &vc4->load_tracker);
+ if (IS_ERR(priv_state))
+ return PTR_ERR(priv_state);
+
+ load_state = to_vc4_load_tracker_state(priv_state);
+ for_each_oldnew_plane_in_state(state, plane, old_plane_state,
+ new_plane_state, i) {
+ struct vc4_plane_state *vc4_plane_state;
+
+ if (old_plane_state->fb && old_plane_state->crtc) {
+ vc4_plane_state = to_vc4_plane_state(old_plane_state);
+ load_state->membus_load -= vc4_plane_state->membus_load;
+ load_state->hvs_load -= vc4_plane_state->hvs_load;
+ }
+
+ if (new_plane_state->fb && new_plane_state->crtc) {
+ vc4_plane_state = to_vc4_plane_state(new_plane_state);
+ load_state->membus_load += vc4_plane_state->membus_load;
+ load_state->hvs_load += vc4_plane_state->hvs_load;
+ }
+ }
+
+ /* Don't check the load when the tracker is disabled. */
+ if (!vc4->load_tracker_enabled)
+ return 0;
+
+ /* The absolute limit is 2Gbyte/sec, but let's take a margin to let
+ * the system work when other blocks are accessing the memory.
+ */
+ if (load_state->membus_load > SZ_1G + SZ_512M)
+ return -ENOSPC;
+
+ /* HVS clock is supposed to run @ 250Mhz, let's take a margin and
+ * consider the maximum number of cycles is 240M.
+ */
+ if (load_state->hvs_load > 240000000ULL)
+ return -ENOSPC;
+
+ return 0;
+}
+
+static struct drm_private_state *
+vc4_load_tracker_duplicate_state(struct drm_private_obj *obj)
+{
+ struct vc4_load_tracker_state *state;
+
+ state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return NULL;
+
+ __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base);
+
+ return &state->base;
+}
+
+static void vc4_load_tracker_destroy_state(struct drm_private_obj *obj,
+ struct drm_private_state *state)
+{
+ struct vc4_load_tracker_state *load_state;
+
+ load_state = to_vc4_load_tracker_state(state);
+ kfree(load_state);
+}
+
+static const struct drm_private_state_funcs vc4_load_tracker_state_funcs = {
+ .atomic_duplicate_state = vc4_load_tracker_duplicate_state,
+ .atomic_destroy_state = vc4_load_tracker_destroy_state,
+};
+
static int
vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
{
@@ -394,7 +495,11 @@ vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
if (ret < 0)
return ret;
- return drm_atomic_helper_check(dev, state);
+ ret = drm_atomic_helper_check(dev, state);
+ if (ret)
+ return ret;
+
+ return vc4_load_tracker_atomic_check(state);
}
static const struct drm_mode_config_funcs vc4_mode_funcs = {
@@ -407,8 +512,14 @@ int vc4_kms_load(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_ctm_state *ctm_state;
+ struct vc4_load_tracker_state *load_state;
int ret;
+ /* Start with the load tracker enabled. Can be disabled through the
+ * debugfs load_tracker file.
+ */
+ vc4->load_tracker_enabled = true;
+
sema_init(&vc4->async_modeset, 1);
/* Set support for vblank irq fast disable, before drm_vblank_init() */
@@ -436,6 +547,15 @@ int vc4_kms_load(struct drm_device *dev)
drm_atomic_private_obj_init(dev, &vc4->ctm_manager, &ctm_state->base,
&vc4_ctm_state_funcs);
+ load_state = kzalloc(sizeof(*load_state), GFP_KERNEL);
+ if (!load_state) {
+ drm_atomic_private_obj_fini(&vc4->ctm_manager);
+ return -ENOMEM;
+ }
+
+ drm_atomic_private_obj_init(dev, &vc4->load_tracker, &load_state->base,
+ &vc4_load_tracker_state_funcs);
+
drm_mode_config_reset(dev);
drm_kms_helper_poll_init(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index d098337c10e9..4d918d3e4858 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -488,6 +488,61 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state,
}
}
+static void vc4_plane_calc_load(struct drm_plane_state *state)
+{
+ unsigned int hvs_load_shift, vrefresh, i;
+ struct drm_framebuffer *fb = state->fb;
+ struct vc4_plane_state *vc4_state;
+ struct drm_crtc_state *crtc_state;
+ unsigned int vscale_factor;
+
+ vc4_state = to_vc4_plane_state(state);
+ crtc_state = drm_atomic_get_existing_crtc_state(state->state,
+ state->crtc);
+ vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
+
+ /* The HVS is able to process 2 pixels/cycle when scaling the source,
+ * 4 pixels/cycle otherwise.
+ * Alpha blending step seems to be pipelined and it's always operating
+ * at 4 pixels/cycle, so the limiting aspect here seems to be the
+ * scaler block.
+ * HVS load is expressed in clk-cycles/sec (AKA Hz).
+ */
+ if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+ vc4_state->y_scaling[1] != VC4_SCALING_NONE)
+ hvs_load_shift = 1;
+ else
+ hvs_load_shift = 2;
+
+ vc4_state->membus_load = 0;
+ vc4_state->hvs_load = 0;
+ for (i = 0; i < fb->format->num_planes; i++) {
+ /* Even if the bandwidth/plane required for a single frame is
+ *
+ * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
+ *
+ * when downscaling, we have to read more pixels per line in
+ * the time frame reserved for a single line, so the bandwidth
+ * demand can be punctually higher. To account for that, we
+ * calculate the down-scaling factor and multiply the plane
+ * load by this number. We're likely over-estimating the read
+ * demand, but that's better than under-estimating it.
+ */
+ vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
+ vc4_state->crtc_h);
+ vc4_state->membus_load += vc4_state->src_w[i] *
+ vc4_state->src_h[i] * vscale_factor *
+ fb->format->cpp[i];
+ vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
+ }
+
+ vc4_state->hvs_load *= vrefresh;
+ vc4_state->hvs_load >>= hvs_load_shift;
+ vc4_state->membus_load *= vrefresh;
+}
+
static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
{
struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
@@ -875,6 +930,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
*/
vc4_state->dlist_initialized = 1;
+ vc4_plane_calc_load(state);
+
return 0;
}
@@ -1082,7 +1139,7 @@ static int vc4_prepare_fb(struct drm_plane *plane,
bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
- fence = reservation_object_get_excl_rcu(bo->resv);
+ fence = reservation_object_get_excl_rcu(bo->base.base.resv);
drm_atomic_set_fence_for_plane(state, fence);
if (plane->state->fb == state->fb)
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 931088014272..c0c5fadaf7e3 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -212,11 +212,11 @@
#define PV_HACT_ACT 0x30
+#define SCALER_CHANNELS_COUNT 3
+
#define SCALER_DISPCTRL 0x00000000
/* Global register for clock gating the HVS */
# define SCALER_DISPCTRL_ENABLE BIT(31)
-# define SCALER_DISPCTRL_DSP2EISLUR BIT(15)
-# define SCALER_DISPCTRL_DSP1EISLUR BIT(14)
# define SCALER_DISPCTRL_DSP3_MUX_MASK VC4_MASK(19, 18)
# define SCALER_DISPCTRL_DSP3_MUX_SHIFT 18
@@ -224,45 +224,25 @@
* SCALER_DISPSTAT_IRQDISP0. Note that short frame contributions are
* always enabled.
*/
-# define SCALER_DISPCTRL_DSP0EISLUR BIT(13)
-# define SCALER_DISPCTRL_DSP2EIEOLN BIT(12)
-# define SCALER_DISPCTRL_DSP2EIEOF BIT(11)
-# define SCALER_DISPCTRL_DSP1EIEOLN BIT(10)
-# define SCALER_DISPCTRL_DSP1EIEOF BIT(9)
+# define SCALER_DISPCTRL_DSPEISLUR(x) BIT(13 + (x))
/* Enables Display 0 end-of-line-N contribution to
* SCALER_DISPSTAT_IRQDISP0
*/
-# define SCALER_DISPCTRL_DSP0EIEOLN BIT(8)
+# define SCALER_DISPCTRL_DSPEIEOLN(x) BIT(8 + ((x) * 2))
/* Enables Display 0 EOF contribution to SCALER_DISPSTAT_IRQDISP0 */
-# define SCALER_DISPCTRL_DSP0EIEOF BIT(7)
+# define SCALER_DISPCTRL_DSPEIEOF(x) BIT(7 + ((x) * 2))
# define SCALER_DISPCTRL_SLVRDEIRQ BIT(6)
# define SCALER_DISPCTRL_SLVWREIRQ BIT(5)
# define SCALER_DISPCTRL_DMAEIRQ BIT(4)
-# define SCALER_DISPCTRL_DISP2EIRQ BIT(3)
-# define SCALER_DISPCTRL_DISP1EIRQ BIT(2)
/* Enables interrupt generation on the enabled EOF/EOLN/EISLUR
* bits and short frames..
*/
-# define SCALER_DISPCTRL_DISP0EIRQ BIT(1)
+# define SCALER_DISPCTRL_DISPEIRQ(x) BIT(1 + (x))
/* Enables interrupt generation on scaler profiler interrupt. */
# define SCALER_DISPCTRL_SCLEIRQ BIT(0)
#define SCALER_DISPSTAT 0x00000004
-# define SCALER_DISPSTAT_COBLOW2 BIT(29)
-# define SCALER_DISPSTAT_EOLN2 BIT(28)
-# define SCALER_DISPSTAT_ESFRAME2 BIT(27)
-# define SCALER_DISPSTAT_ESLINE2 BIT(26)
-# define SCALER_DISPSTAT_EUFLOW2 BIT(25)
-# define SCALER_DISPSTAT_EOF2 BIT(24)
-
-# define SCALER_DISPSTAT_COBLOW1 BIT(21)
-# define SCALER_DISPSTAT_EOLN1 BIT(20)
-# define SCALER_DISPSTAT_ESFRAME1 BIT(19)
-# define SCALER_DISPSTAT_ESLINE1 BIT(18)
-# define SCALER_DISPSTAT_EUFLOW1 BIT(17)
-# define SCALER_DISPSTAT_EOF1 BIT(16)
-
# define SCALER_DISPSTAT_RESP_MASK VC4_MASK(15, 14)
# define SCALER_DISPSTAT_RESP_SHIFT 14
# define SCALER_DISPSTAT_RESP_OKAY 0
@@ -270,23 +250,26 @@
# define SCALER_DISPSTAT_RESP_SLVERR 2
# define SCALER_DISPSTAT_RESP_DECERR 3
-# define SCALER_DISPSTAT_COBLOW0 BIT(13)
+# define SCALER_DISPSTAT_COBLOW(x) BIT(13 + ((x) * 8))
/* Set when the DISPEOLN line is done compositing. */
-# define SCALER_DISPSTAT_EOLN0 BIT(12)
+# define SCALER_DISPSTAT_EOLN(x) BIT(12 + ((x) * 8))
/* Set when VSTART is seen but there are still pixels in the current
* output line.
*/
-# define SCALER_DISPSTAT_ESFRAME0 BIT(11)
+# define SCALER_DISPSTAT_ESFRAME(x) BIT(11 + ((x) * 8))
/* Set when HSTART is seen but there are still pixels in the current
* output line.
*/
-# define SCALER_DISPSTAT_ESLINE0 BIT(10)
+# define SCALER_DISPSTAT_ESLINE(x) BIT(10 + ((x) * 8))
/* Set when the the downstream tries to read from the display FIFO
* while it's empty.
*/
-# define SCALER_DISPSTAT_EUFLOW0 BIT(9)
+# define SCALER_DISPSTAT_EUFLOW(x) BIT(9 + ((x) * 8))
/* Set when the display mode changes from RUN to EOF */
-# define SCALER_DISPSTAT_EOF0 BIT(8)
+# define SCALER_DISPSTAT_EOF(x) BIT(8 + ((x) * 8))
+
+# define SCALER_DISPSTAT_IRQMASK(x) VC4_MASK(13 + ((x) * 8), \
+ 8 + ((x) * 8))
/* Set on AXI invalid DMA ID error. */
# define SCALER_DISPSTAT_DMA_ERROR BIT(7)
@@ -298,12 +281,10 @@
* SCALER_DISPSTAT_RESP_ERROR is not SCALER_DISPSTAT_RESP_OKAY.
*/
# define SCALER_DISPSTAT_IRQDMA BIT(4)
-# define SCALER_DISPSTAT_IRQDISP2 BIT(3)
-# define SCALER_DISPSTAT_IRQDISP1 BIT(2)
/* Set when any of the EOF/EOLN/ESFRAME/ESLINE bits are set and their
* corresponding interrupt bit is enabled in DISPCTRL.
*/
-# define SCALER_DISPSTAT_IRQDISP0 BIT(1)
+# define SCALER_DISPSTAT_IRQDISP(x) BIT(1 + (x))
/* On read, the profiler interrupt. On write, clear *all* interrupt bits. */
# define SCALER_DISPSTAT_IRQSCL BIT(0)
diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c
index 5dabd91f2d7e..cc2888dd7171 100644
--- a/drivers/gpu/drm/vc4/vc4_txp.c
+++ b/drivers/gpu/drm/vc4/vc4_txp.c
@@ -249,7 +249,6 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn,
struct drm_connector_state *conn_state)
{
struct drm_crtc_state *crtc_state;
- struct drm_gem_cma_object *gem;
struct drm_framebuffer *fb;
int i;
@@ -275,8 +274,6 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn,
if (i == ARRAY_SIZE(drm_fmts))
return -EINVAL;
- gem = drm_fb_cma_get_gem_obj(fb, 0);
-
/* Pitch must be aligned on 16 bytes. */
if (fb->pitches[0] & GENMASK(3, 0))
return -EINVAL;