summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/radeon/r600_pipe_common.h
blob: 4e377d45407131b47600510e30d1f85630c7d677 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
/*
 * Copyright 2013 Advanced Micro Devices, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

/**
 * This file contains common screen and context structures and functions
 * for r600g and radeonsi.
 */

#ifndef R600_PIPE_COMMON_H
#define R600_PIPE_COMMON_H

#include <stdio.h>

#include "amd/common/ac_binary.h"

#include "radeon/radeon_winsys.h"

#include "util/disk_cache.h"
#include "util/u_blitter.h"
#include "util/list.h"
#include "util/u_range.h"
#include "util/slab.h"
#include "util/u_suballoc.h"
#include "util/u_transfer.h"
#include "util/u_threaded_context.h"

struct u_log_context;
struct si_screen;
struct si_context;
struct si_perfcounters;
struct tgsi_shader_info;
struct si_qbo_state;

/* Only 32-bit buffer allocations are supported, gallium doesn't support more
 * at the moment.
 */
struct r600_resource {
	struct threaded_resource	b;

	/* Winsys objects. */
	struct pb_buffer		*buf;
	uint64_t			gpu_address;
	/* Memory usage if the buffer placement is optimal. */
	uint64_t			vram_usage;
	uint64_t			gart_usage;

	/* Resource properties. */
	uint64_t			bo_size;
	unsigned			bo_alignment;
	enum radeon_bo_domain		domains;
	enum radeon_bo_flag		flags;
	unsigned			bind_history;
	int				max_forced_staging_uploads;

	/* The buffer range which is initialized (with a write transfer,
	 * streamout, DMA, or as a random access target). The rest of
	 * the buffer is considered invalid and can be mapped unsynchronized.
	 *
	 * This allows unsychronized mapping of a buffer range which hasn't
	 * been used yet. It's for applications which forget to use
	 * the unsynchronized map flag and expect the driver to figure it out.
         */
	struct util_range		valid_buffer_range;

	/* For buffers only. This indicates that a write operation has been
	 * performed by TC L2, but the cache hasn't been flushed.
	 * Any hw block which doesn't use or bypasses TC L2 should check this
	 * flag and flush the cache before using the buffer.
	 *
	 * For example, TC L2 must be flushed if a buffer which has been
	 * modified by a shader store instruction is about to be used as
	 * an index buffer. The reason is that VGT DMA index fetching doesn't
	 * use TC L2.
	 */
	bool				TC_L2_dirty;

	/* Whether the resource has been exported via resource_get_handle. */
	unsigned			external_usage; /* PIPE_HANDLE_USAGE_* */

	/* Whether this resource is referenced by bindless handles. */
	bool				texture_handle_allocated;
	bool				image_handle_allocated;
};

struct r600_transfer {
	struct threaded_transfer	b;
	struct r600_resource		*staging;
	unsigned			offset;
};

struct r600_fmask_info {
	uint64_t offset;
	uint64_t size;
	unsigned alignment;
	unsigned pitch_in_pixels;
	unsigned bank_height;
	unsigned slice_tile_max;
	unsigned tile_mode_index;
	unsigned tile_swizzle;
};

struct r600_cmask_info {
	uint64_t offset;
	uint64_t size;
	unsigned alignment;
	unsigned slice_tile_max;
	uint64_t base_address_reg;
};

struct r600_texture {
	struct r600_resource		resource;

	struct radeon_surf		surface;
	uint64_t			size;
	struct r600_texture		*flushed_depth_texture;

	/* Colorbuffer compression and fast clear. */
	struct r600_fmask_info		fmask;
	struct r600_cmask_info		cmask;
	struct r600_resource		*cmask_buffer;
	uint64_t			dcc_offset; /* 0 = disabled */
	unsigned			cb_color_info; /* fast clear enable bit */
	unsigned			color_clear_value[2];
	unsigned			last_msaa_resolve_target_micro_mode;
	unsigned			num_level0_transfers;

	/* Depth buffer compression and fast clear. */
	uint64_t			htile_offset;
	float				depth_clear_value;
	uint16_t			dirty_level_mask; /* each bit says if that mipmap is compressed */
	uint16_t			stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
	enum pipe_format		db_render_format:16;
	uint8_t				stencil_clear_value;
	bool				tc_compatible_htile:1;
	bool				depth_cleared:1; /* if it was cleared at least once */
	bool				stencil_cleared:1; /* if it was cleared at least once */
	bool				upgraded_depth:1; /* upgraded from unorm to Z32_FLOAT */
	bool				is_depth:1;
	bool				db_compatible:1;
	bool				can_sample_z:1;
	bool				can_sample_s:1;

	/* We need to track DCC dirtiness, because st/dri usually calls
	 * flush_resource twice per frame (not a bug) and we don't wanna
	 * decompress DCC twice. Also, the dirty tracking must be done even
	 * if DCC isn't used, because it's required by the DCC usage analysis
	 * for a possible future enablement.
	 */
	bool				separate_dcc_dirty:1;
	/* Statistics gathering for the DCC enablement heuristic. */
	bool				dcc_gather_statistics:1;
	/* Counter that should be non-zero if the texture is bound to a
	 * framebuffer.
	 */
	unsigned                        framebuffers_bound;
	/* Whether the texture is a displayable back buffer and needs DCC
	 * decompression, which is expensive. Therefore, it's enabled only
	 * if statistics suggest that it will pay off and it's allocated
	 * separately. It can't be bound as a sampler by apps. Limited to
	 * target == 2D and last_level == 0. If enabled, dcc_offset contains
	 * the absolute GPUVM address, not the relative one.
	 */
	struct r600_resource		*dcc_separate_buffer;
	/* When DCC is temporarily disabled, the separate buffer is here. */
	struct r600_resource		*last_dcc_separate_buffer;
	/* Estimate of how much this color buffer is written to in units of
	 * full-screen draws: ps_invocations / (width * height)
	 * Shader kills, late Z, and blending with trivial discards make it
	 * inaccurate (we need to count CB updates, not PS invocations).
	 */
	unsigned			ps_draw_ratio;
	/* The number of clears since the last DCC usage analysis. */
	unsigned			num_slow_clears;
};

struct r600_surface {
	struct pipe_surface		base;

	/* These can vary with block-compressed textures. */
	uint16_t width0;
	uint16_t height0;

	bool color_initialized:1;
	bool depth_initialized:1;

	/* Misc. color flags. */
	bool color_is_int8:1;
	bool color_is_int10:1;
	bool dcc_incompatible:1;

	/* Color registers. */
	unsigned cb_color_info;
	unsigned cb_color_view;
	unsigned cb_color_attrib;
	unsigned cb_color_attrib2;	/* GFX9 and later */
	unsigned cb_dcc_control;	/* VI and later */
	unsigned spi_shader_col_format:8;	/* no blending, no alpha-to-coverage. */
	unsigned spi_shader_col_format_alpha:8;	/* alpha-to-coverage */
	unsigned spi_shader_col_format_blend:8;	/* blending without alpha. */
	unsigned spi_shader_col_format_blend_alpha:8; /* blending with alpha. */

	/* DB registers. */
	uint64_t db_depth_base;		/* DB_Z_READ/WRITE_BASE */
	uint64_t db_stencil_base;
	uint64_t db_htile_data_base;
	unsigned db_depth_info;
	unsigned db_z_info;
	unsigned db_z_info2;		/* GFX9+ */
	unsigned db_depth_view;
	unsigned db_depth_size;
	unsigned db_depth_slice;
	unsigned db_stencil_info;
	unsigned db_stencil_info2;	/* GFX9+ */
	unsigned db_htile_surface;
};

struct si_mmio_counter {
	unsigned busy;
	unsigned idle;
};

union si_mmio_counters {
	struct {
		/* For global GPU load including SDMA. */
		struct si_mmio_counter gpu;

		/* GRBM_STATUS */
		struct si_mmio_counter spi;
		struct si_mmio_counter gui;
		struct si_mmio_counter ta;
		struct si_mmio_counter gds;
		struct si_mmio_counter vgt;
		struct si_mmio_counter ia;
		struct si_mmio_counter sx;
		struct si_mmio_counter wd;
		struct si_mmio_counter bci;
		struct si_mmio_counter sc;
		struct si_mmio_counter pa;
		struct si_mmio_counter db;
		struct si_mmio_counter cp;
		struct si_mmio_counter cb;

		/* SRBM_STATUS2 */
		struct si_mmio_counter sdma;

		/* CP_STAT */
		struct si_mmio_counter pfp;
		struct si_mmio_counter meq;
		struct si_mmio_counter me;
		struct si_mmio_counter surf_sync;
		struct si_mmio_counter cp_dma;
		struct si_mmio_counter scratch_ram;
	} named;
	unsigned array[0];
};

struct r600_memory_object {
	struct pipe_memory_object	b;
	struct pb_buffer		*buf;
	uint32_t			stride;
	uint32_t			offset;
};

/* This encapsulates a state or an operation which can emitted into the GPU
 * command stream. */
struct r600_atom {
	void (*emit)(struct si_context *ctx, struct r600_atom *state);
	unsigned short		id;
};

/* Saved CS data for debugging features. */
struct radeon_saved_cs {
	uint32_t			*ib;
	unsigned			num_dw;

	struct radeon_bo_list_item	*bo_list;
	unsigned			bo_count;
};

struct r600_common_context {
	struct pipe_context b; /* base class */

	struct si_screen		*screen;
	struct radeon_winsys		*ws;
	struct radeon_winsys_ctx	*ctx;
	enum radeon_family		family;
	enum chip_class			chip_class;
	struct radeon_winsys_cs		*gfx_cs;
	struct radeon_winsys_cs		*dma_cs;
	struct pipe_fence_handle	*last_gfx_fence;
	struct pipe_fence_handle	*last_sdma_fence;
	struct r600_resource		*eop_bug_scratch;
	struct u_upload_mgr		*cached_gtt_allocator;
	unsigned			num_gfx_cs_flushes;
	unsigned			initial_gfx_cs_size;
	unsigned			gpu_reset_counter;
	unsigned			last_dirty_tex_counter;
	unsigned			last_compressed_colortex_counter;
	unsigned			last_num_draw_calls;

	struct threaded_context		*tc;
	struct u_suballocator		*allocator_zeroed_memory;
	struct slab_child_pool		pool_transfers;
	struct slab_child_pool		pool_transfers_unsync; /* for threaded_context */

	/* Current unaccounted memory usage. */
	uint64_t			vram;
	uint64_t			gtt;

	/* Additional context states. */
	unsigned flags; /* flush flags */

	/* Queries. */
	/* Maintain the list of active queries for pausing between IBs. */
	int				num_occlusion_queries;
	int				num_perfect_occlusion_queries;
	struct list_head		active_queries;
	unsigned			num_cs_dw_queries_suspend;
	/* Misc stats. */
	unsigned			num_draw_calls;
	unsigned			num_decompress_calls;
	unsigned			num_mrt_draw_calls;
	unsigned			num_prim_restart_calls;
	unsigned			num_spill_draw_calls;
	unsigned			num_compute_calls;
	unsigned			num_spill_compute_calls;
	unsigned			num_dma_calls;
	unsigned			num_cp_dma_calls;
	unsigned			num_vs_flushes;
	unsigned			num_ps_flushes;
	unsigned			num_cs_flushes;
	unsigned			num_cb_cache_flushes;
	unsigned			num_db_cache_flushes;
	unsigned			num_L2_invalidates;
	unsigned			num_L2_writebacks;
	unsigned			num_resident_handles;
	uint64_t			num_alloc_tex_transfer_bytes;
	unsigned			last_tex_ps_draw_ratio; /* for query */

	/* Render condition. */
	struct r600_atom		render_cond_atom;
	struct pipe_query		*render_cond;
	unsigned			render_cond_mode;
	bool				render_cond_invert;
	bool				render_cond_force_off; /* for u_blitter */

	/* Statistics gathering for the DCC enablement heuristic. It can't be
	 * in r600_texture because r600_texture can be shared by multiple
	 * contexts. This is for back buffers only. We shouldn't get too many
	 * of those.
	 *
	 * X11 DRI3 rotates among a finite set of back buffers. They should
	 * all fit in this array. If they don't, separate DCC might never be
	 * enabled by DCC stat gathering.
	 */
	struct {
		struct r600_texture		*tex;
		/* Query queue: 0 = usually active, 1 = waiting, 2 = readback. */
		struct pipe_query		*ps_stats[3];
		/* If all slots are used and another slot is needed,
		 * the least recently used slot is evicted based on this. */
		int64_t				last_use_timestamp;
		bool				query_active;
	} dcc_stats[5];

	struct pipe_device_reset_callback device_reset_callback;
	struct u_log_context		*log;

	void				*query_result_shader;

	/* Copy one resource to another using async DMA. */
	void (*dma_copy)(struct pipe_context *ctx,
			 struct pipe_resource *dst,
			 unsigned dst_level,
			 unsigned dst_x, unsigned dst_y, unsigned dst_z,
			 struct pipe_resource *src,
			 unsigned src_level,
			 const struct pipe_box *src_box);

	void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst,
				 uint64_t offset, uint64_t size, unsigned value);
};

/* r600_buffer_common.c */
bool si_rings_is_buffer_referenced(struct si_context *sctx,
				   struct pb_buffer *buf,
				   enum radeon_bo_usage usage);
void *si_buffer_map_sync_with_rings(struct si_context *sctx,
				    struct r600_resource *resource,
				    unsigned usage);
void si_init_resource_fields(struct si_screen *sscreen,
			     struct r600_resource *res,
			     uint64_t size, unsigned alignment);
bool si_alloc_resource(struct si_screen *sscreen,
		       struct r600_resource *res);
struct pipe_resource *si_aligned_buffer_create(struct pipe_screen *screen,
					       unsigned flags,
					       unsigned usage,
					       unsigned size,
					       unsigned alignment);
void si_replace_buffer_storage(struct pipe_context *ctx,
			       struct pipe_resource *dst,
			       struct pipe_resource *src);
void si_init_screen_buffer_functions(struct si_screen *sscreen);
void si_init_buffer_functions(struct si_context *sctx);

/* r600_perfcounters.c */
void si_perfcounters_destroy(struct si_screen *sscreen);

/* r600_texture.c */
bool si_prepare_for_dma_blit(struct si_context *sctx,
			     struct r600_texture *rdst,
			     unsigned dst_level, unsigned dstx,
			     unsigned dsty, unsigned dstz,
			     struct r600_texture *rsrc,
			     unsigned src_level,
			     const struct pipe_box *src_box);
void si_texture_get_fmask_info(struct si_screen *sscreen,
			       struct r600_texture *rtex,
			       unsigned nr_samples,
			       struct r600_fmask_info *out);
void si_texture_get_cmask_info(struct si_screen *sscreen,
			       struct r600_texture *rtex,
			       struct r600_cmask_info *out);
void si_eliminate_fast_color_clear(struct si_context *sctx,
				   struct r600_texture *rtex);
void si_texture_discard_cmask(struct si_screen *sscreen,
			      struct r600_texture *rtex);
bool si_init_flushed_depth_texture(struct pipe_context *ctx,
				   struct pipe_resource *texture,
				   struct r600_texture **staging);
void si_print_texture_info(struct si_screen *sscreen,
			   struct r600_texture *rtex, struct u_log_context *log);
struct pipe_resource *si_texture_create(struct pipe_screen *screen,
					const struct pipe_resource *templ);
bool vi_dcc_formats_compatible(enum pipe_format format1,
			       enum pipe_format format2);
bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex,
				     unsigned level,
				     enum pipe_format view_format);
void vi_disable_dcc_if_incompatible_format(struct si_context *sctx,
					   struct pipe_resource *tex,
					   unsigned level,
					   enum pipe_format view_format);
struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
					      struct pipe_resource *texture,
					      const struct pipe_surface *templ,
					      unsigned width0, unsigned height0,
					      unsigned width, unsigned height);
unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);
void vi_separate_dcc_try_enable(struct si_context *sctx,
				struct r600_texture *tex);
void vi_separate_dcc_start_query(struct si_context *sctx,
				 struct r600_texture *tex);
void vi_separate_dcc_stop_query(struct si_context *sctx,
				struct r600_texture *tex);
void vi_separate_dcc_process_and_reset_stats(struct pipe_context *ctx,
					     struct r600_texture *tex);
bool si_texture_disable_dcc(struct si_context *sctx,
			    struct r600_texture *rtex);
void si_init_screen_texture_functions(struct si_screen *sscreen);
void si_init_context_texture_functions(struct si_context *sctx);


/* Inline helpers. */

static inline struct r600_resource *r600_resource(struct pipe_resource *r)
{
	return (struct r600_resource*)r;
}

static inline void
r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
{
	pipe_resource_reference((struct pipe_resource **)ptr,
				(struct pipe_resource *)res);
}

static inline void
r600_texture_reference(struct r600_texture **ptr, struct r600_texture *res)
{
	pipe_resource_reference((struct pipe_resource **)ptr, &res->resource.b.b);
}

static inline bool
vi_dcc_enabled(struct r600_texture *tex, unsigned level)
{
	return tex->dcc_offset && level < tex->surface.num_dcc_levels;
}

#endif