summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/iris/iris_bufmgr.h
blob: 36564089ce393fba3c67ae6872cd8bea303cfcc4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
/*
 * Copyright © 2017 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#ifndef IRIS_BUFMGR_H
#define IRIS_BUFMGR_H

#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/types.h>
#include "c11/threads.h"
#include "util/macros.h"
#include "util/u_atomic.h"
#include "util/u_dynarray.h"
#include "util/list.h"
#include "util/simple_mtx.h"
#include "pipe/p_defines.h"
#include "pipebuffer/pb_slab.h"

struct intel_device_info;
struct pipe_debug_callback;
struct isl_surf;
struct iris_syncobj;

/**
 * Memory zones.  When allocating a buffer, you can request that it is
 * placed into a specific region of the virtual address space (PPGTT).
 *
 * Most buffers can go anywhere (IRIS_MEMZONE_OTHER).  Some buffers are
 * accessed via an offset from a base address.  STATE_BASE_ADDRESS has
 * a maximum 4GB size for each region, so we need to restrict those
 * buffers to be within 4GB of the base.  Each memory zone corresponds
 * to a particular base address.
 *
 * We lay out the virtual address space as follows:
 *
 * - [0,   4K): Nothing            (empty page for null address)
 * - [4K,  4G): Shaders            (Instruction Base Address)
 * - [4G,  8G): Surfaces & Binders (Surface State Base Address, Bindless ...)
 * - [8G, 12G): Dynamic            (Dynamic State Base Address)
 * - [12G, *):  Other              (everything else in the full 48-bit VMA)
 *
 * A special buffer for border color lives at the start of the dynamic state
 * memory zone.  This unfortunately has to be handled specially because the
 * SAMPLER_STATE "Indirect State Pointer" field is only a 24-bit pointer.
 *
 * Each GL context uses a separate GEM context, which technically gives them
 * each a separate VMA.  However, we assign address globally, so buffers will
 * have the same address in all GEM contexts.  This lets us have a single BO
 * field for the address, which is easy and cheap.
 */
enum iris_memory_zone {
   IRIS_MEMZONE_SHADER,
   IRIS_MEMZONE_BINDER,
   IRIS_MEMZONE_BINDLESS,
   IRIS_MEMZONE_SURFACE,
   IRIS_MEMZONE_DYNAMIC,
   IRIS_MEMZONE_OTHER,

   IRIS_MEMZONE_BORDER_COLOR_POOL,
};

/* Intentionally exclude single buffer "zones" */
#define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1)

#define IRIS_BINDER_SIZE (64 * 1024)
#define IRIS_MAX_BINDERS 100
#define IRIS_BINDLESS_SIZE (8 * 1024 * 1024)

#define IRIS_MEMZONE_SHADER_START     (0ull * (1ull << 32))
#define IRIS_MEMZONE_BINDER_START     (1ull * (1ull << 32))
#define IRIS_MEMZONE_BINDLESS_START   (IRIS_MEMZONE_BINDER_START + IRIS_MAX_BINDERS * IRIS_BINDER_SIZE)
#define IRIS_MEMZONE_SURFACE_START    (IRIS_MEMZONE_BINDLESS_START + IRIS_BINDLESS_SIZE)
#define IRIS_MEMZONE_DYNAMIC_START    (2ull * (1ull << 32))
#define IRIS_MEMZONE_OTHER_START      (3ull * (1ull << 32))

#define IRIS_BORDER_COLOR_POOL_ADDRESS IRIS_MEMZONE_DYNAMIC_START
#define IRIS_BORDER_COLOR_POOL_SIZE (64 * 1024)

/**
 * Classification of the various incoherent caches of the GPU into a number of
 * caching domains.
 */
enum iris_domain {
   /** Render color cache. */
   IRIS_DOMAIN_RENDER_WRITE = 0,
   /** (Hi)Z/stencil cache. */
   IRIS_DOMAIN_DEPTH_WRITE,
   /** Data port (HDC) cache. */
   IRIS_DOMAIN_DATA_WRITE,
   /** Any other read-write cache. */
   IRIS_DOMAIN_OTHER_WRITE,
   /** Vertex cache. */
   IRIS_DOMAIN_VF_READ,
   /** Any other read-only cache. */
   IRIS_DOMAIN_OTHER_READ,
   /** Number of caching domains. */
   NUM_IRIS_DOMAINS,
   /** Not a real cache, use to opt out of the cache tracking mechanism. */
   IRIS_DOMAIN_NONE = NUM_IRIS_DOMAINS
};

/**
 * Whether a caching domain is guaranteed not to write any data to memory.
 */
static inline bool
iris_domain_is_read_only(enum iris_domain access)
{
   return access == IRIS_DOMAIN_OTHER_READ ||
          access == IRIS_DOMAIN_VF_READ;
}

enum iris_mmap_mode {
   IRIS_MMAP_NONE, /**< Cannot be mapped */
   IRIS_MMAP_UC, /**< Fully uncached memory map */
   IRIS_MMAP_WC, /**< Write-combining map with no caching of reads */
   IRIS_MMAP_WB, /**< Write-back mapping with CPU caches enabled */
};

enum iris_heap {
   IRIS_HEAP_SYSTEM_MEMORY,
   IRIS_HEAP_DEVICE_LOCAL,
   IRIS_HEAP_DEVICE_LOCAL_PREFERRED,
   IRIS_HEAP_MAX,
};

extern const char *iris_heap_to_string[];

#define IRIS_BATCH_COUNT 2

struct iris_bo_screen_deps {
   struct iris_syncobj *write_syncobjs[IRIS_BATCH_COUNT];
   struct iris_syncobj *read_syncobjs[IRIS_BATCH_COUNT];
};

struct iris_bo {
   /**
    * Size in bytes of the buffer object.
    *
    * The size may be larger than the size originally requested for the
    * allocation, such as being aligned to page size.
    */
   uint64_t size;

   /** Buffer manager context associated with this buffer object */
   struct iris_bufmgr *bufmgr;

   /** Pre-computed hash using _mesa_hash_pointer for cache tracking sets */
   uint32_t hash;

   /** The GEM handle for this buffer object. */
   uint32_t gem_handle;

   /**
    * Virtual address of the buffer inside the PPGTT (Per-Process Graphics
    * Translation Table).
    *
    * Although each hardware context has its own VMA, we assign BO's to the
    * same address in all contexts, for simplicity.
    */
   uint64_t address;

   /**
    * If non-zero, then this bo has an aux-map translation to this address.
    */
   uint64_t aux_map_address;

   /**
    * If this BO is referenced by a batch, this _may_ be the index into the
    * batch->exec_bos[] list.
    *
    * Note that a single buffer may be used by multiple batches/contexts,
    * and thus appear in multiple lists, but we only track one index here.
    * In the common case one can guess that batch->exec_bos[bo->index] == bo
    * and double check if that's true to avoid a linear list walk.
    *
    * XXX: this is not ideal now that we have more than one batch per context,
    * XXX: as the index will flop back and forth between the render index and
    * XXX: compute index...
    */
   unsigned index;

   int refcount;
   const char *name;

   /** BO cache list */
   struct list_head head;

   /**
    * Synchronization sequence number of most recent access of this BO from
    * each caching domain.
    *
    * Although this is a global field, use in multiple contexts should be
    * safe, see iris_emit_buffer_barrier_for() for details.
    *
    * Also align it to 64 bits. This will make atomic operations faster on 32
    * bit platforms.
    */
   uint64_t last_seqnos[NUM_IRIS_DOMAINS] __attribute__ ((aligned (8)));

   /** Up to one per screen, may need realloc. */
   struct iris_bo_screen_deps *deps;
   int deps_size;

   /**
    * Boolean of whether the GPU is definitely not accessing the buffer.
    *
    * This is only valid when reusable, since non-reusable
    * buffers are those that have been shared with other
    * processes, so we don't know their state.
    */
   bool idle;

   union {
      struct {
         uint64_t kflags;

         time_t free_time;

         /** Mapped address for the buffer, saved across map/unmap cycles */
         void *map;

         /** List of GEM handle exports of this buffer (bo_export) */
         struct list_head exports;

         /**
          * Kernel-assigned global name for this object
          *
          * List contains both flink named and prime fd'd objects
          */
         unsigned global_name;

         /** The mmap coherency mode selected at BO allocation time */
         enum iris_mmap_mode mmap_mode;

         /** The heap selected at BO allocation time */
         enum iris_heap heap;

         /** Was this buffer imported from an external client? */
         bool imported;

         /** Has this buffer been exported to external clients? */
         bool exported;

         /** Boolean of whether this buffer can be re-used */
         bool reusable;

         /** Boolean of whether this buffer points into user memory */
         bool userptr;
      } real;
      struct {
         struct pb_slab_entry entry;
         struct iris_bo *real;
      } slab;
   };
};

#define BO_ALLOC_ZEROED      (1<<0)
#define BO_ALLOC_COHERENT    (1<<1)
#define BO_ALLOC_SMEM        (1<<2)
#define BO_ALLOC_SCANOUT     (1<<3)
#define BO_ALLOC_NO_SUBALLOC (1<<4)
#define BO_ALLOC_LMEM        (1<<5)

/**
 * Allocate a buffer object.
 *
 * Buffer objects are not necessarily initially mapped into CPU virtual
 * address space or graphics device aperture.  They must be mapped
 * using iris_bo_map() to be used by the CPU.
 */
struct iris_bo *iris_bo_alloc(struct iris_bufmgr *bufmgr,
                              const char *name,
                              uint64_t size,
                              uint32_t alignment,
                              enum iris_memory_zone memzone,
                              unsigned flags);

struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
                       void *ptr, size_t size,
                       enum iris_memory_zone memzone);

/** Takes a reference on a buffer object */
static inline void
iris_bo_reference(struct iris_bo *bo)
{
   p_atomic_inc(&bo->refcount);
}

/**
 * Releases a reference on a buffer object, freeing the data if
 * no references remain.
 */
void iris_bo_unreference(struct iris_bo *bo);

#define MAP_READ          PIPE_MAP_READ
#define MAP_WRITE         PIPE_MAP_WRITE
#define MAP_ASYNC         PIPE_MAP_UNSYNCHRONIZED
#define MAP_PERSISTENT    PIPE_MAP_PERSISTENT
#define MAP_COHERENT      PIPE_MAP_COHERENT
/* internal */
#define MAP_RAW           (PIPE_MAP_DRV_PRV << 0)
#define MAP_INTERNAL_MASK (MAP_RAW)

#define MAP_FLAGS         (MAP_READ | MAP_WRITE | MAP_ASYNC | \
                           MAP_PERSISTENT | MAP_COHERENT | MAP_INTERNAL_MASK)

/**
 * Maps the buffer into userspace.
 *
 * This function will block waiting for any existing execution on the
 * buffer to complete, first.  The resulting mapping is returned.
 */
MUST_CHECK void *iris_bo_map(struct pipe_debug_callback *dbg,
                             struct iris_bo *bo, unsigned flags);

/**
 * Reduces the refcount on the userspace mapping of the buffer
 * object.
 */
static inline int iris_bo_unmap(struct iris_bo *bo) { return 0; }

/**
 * Waits for rendering to an object by the GPU to have completed.
 *
 * This is not required for any access to the BO by bo_map,
 * bo_subdata, etc.  It is merely a way for the driver to implement
 * glFinish.
 */
void iris_bo_wait_rendering(struct iris_bo *bo);


/**
 * Unref a buffer manager instance.
 */
void iris_bufmgr_unref(struct iris_bufmgr *bufmgr);

/**
 * Create a visible name for a buffer which can be used by other apps
 *
 * \param buf Buffer to create a name for
 * \param name Returned name
 */
int iris_bo_flink(struct iris_bo *bo, uint32_t *name);

/**
 * Returns true if the BO is backed by a real GEM object, false if it's
 * a wrapper that's suballocated from a larger BO.
 */
static inline bool
iris_bo_is_real(struct iris_bo *bo)
{
   return bo->gem_handle != 0;
}

/**
 * Unwrap any slab-allocated wrapper BOs to get the BO for the underlying
 * backing storage, which is a real BO associated with a GEM object.
 */
static inline struct iris_bo *
iris_get_backing_bo(struct iris_bo *bo)
{
   if (!iris_bo_is_real(bo))
      bo = bo->slab.real;

   /* We only allow one level of wrapping. */
   assert(iris_bo_is_real(bo));

   return bo;
}

/**
 * Is this buffer shared with external clients (imported or exported)?
 */
static inline bool
iris_bo_is_external(const struct iris_bo *bo)
{
   bo = iris_get_backing_bo((struct iris_bo *) bo);
   return bo->real.exported || bo->real.imported;
}

static inline bool
iris_bo_is_imported(const struct iris_bo *bo)
{
   bo = iris_get_backing_bo((struct iris_bo *) bo);
   return bo->real.imported;
}

static inline bool
iris_bo_is_exported(const struct iris_bo *bo)
{
   bo = iris_get_backing_bo((struct iris_bo *) bo);
   return bo->real.exported;
}

static inline enum iris_mmap_mode
iris_bo_mmap_mode(const struct iris_bo *bo)
{
   bo = iris_get_backing_bo((struct iris_bo *) bo);
   return bo->real.mmap_mode;
}

/**
 * Mark a buffer as being shared with other external clients.
 */
void iris_bo_mark_exported(struct iris_bo *bo);

/**
 * Returns true  if mapping the buffer for write could cause the process
 * to block, due to the object being active in the GPU.
 */
bool iris_bo_busy(struct iris_bo *bo);

/**
 * Specify the volatility of the buffer.
 * \param bo Buffer to create a name for
 * \param madv The purgeable status
 *
 * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be
 * reclaimed under memory pressure. If you subsequently require the buffer,
 * then you must pass I915_MADV_WILLNEED to mark the buffer as required.
 *
 * Returns 1 if the buffer was retained, or 0 if it was discarded whilst
 * marked as I915_MADV_DONTNEED.
 */
int iris_bo_madvise(struct iris_bo *bo, int madv);

struct iris_bufmgr *iris_bufmgr_get_for_fd(struct intel_device_info *devinfo,
                                           int fd, bool bo_reuse);
int iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr);

struct iris_bo *iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
                                             const char *name,
                                             unsigned handle);

void* iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr);

int iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns);

uint32_t iris_create_hw_context(struct iris_bufmgr *bufmgr);
uint32_t iris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);
int iris_kernel_context_get_priority(struct iris_bufmgr *bufmgr, uint32_t ctx_id);

#define IRIS_CONTEXT_LOW_PRIORITY    ((I915_CONTEXT_MIN_USER_PRIORITY-1)/2)
#define IRIS_CONTEXT_MEDIUM_PRIORITY (I915_CONTEXT_DEFAULT_PRIORITY)
#define IRIS_CONTEXT_HIGH_PRIORITY   ((I915_CONTEXT_MAX_USER_PRIORITY+1)/2)

void iris_hw_context_set_unrecoverable(struct iris_bufmgr *bufmgr,
                                       uint32_t ctx_id);
int iris_hw_context_set_priority(struct iris_bufmgr *bufmgr,
                                 uint32_t ctx_id, int priority);

void iris_destroy_kernel_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id);

int iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling);
int iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf);

int iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd);
struct iris_bo *iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd);

/**
 * Exports a bo as a GEM handle into a given DRM file descriptor
 * \param bo Buffer to export
 * \param drm_fd File descriptor where the new handle is created
 * \param out_handle Pointer to store the new handle
 *
 * Returns 0 if the buffer was successfully exported, a non zero error code
 * otherwise.
 */
int iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
                                         uint32_t *out_handle);

uint32_t iris_bo_export_gem_handle(struct iris_bo *bo);

int iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *out);

/**
 * Returns the BO's address relative to the appropriate base address.
 *
 * All of our base addresses are programmed to the start of a 4GB region,
 * so simply returning the bottom 32 bits of the BO address will give us
 * the offset from whatever base address corresponds to that memory region.
 */
static inline uint32_t
iris_bo_offset_from_base_address(struct iris_bo *bo)
{
   /* This only works for buffers in the memory zones corresponding to a
    * base address - the top, unbounded memory zone doesn't have a base.
    */
   assert(bo->address < IRIS_MEMZONE_OTHER_START);
   return bo->address;
}

/**
 * Track access of a BO from the specified caching domain and sequence number.
 *
 * Can be used without locking.  Only the most recent access (i.e. highest
 * seqno) is tracked.
 */
static inline void
iris_bo_bump_seqno(struct iris_bo *bo, uint64_t seqno,
                   enum iris_domain type)
{
   uint64_t *const last_seqno = &bo->last_seqnos[type];
   uint64_t tmp, prev_seqno = p_atomic_read(last_seqno);

   while (prev_seqno < seqno &&
          prev_seqno != (tmp = p_atomic_cmpxchg(last_seqno, prev_seqno, seqno)))
      prev_seqno = tmp;
}

enum iris_memory_zone iris_memzone_for_address(uint64_t address);

int iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr);

simple_mtx_t *iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr);

#endif /* IRIS_BUFMGR_H */