summaryrefslogtreecommitdiff
path: root/src/panfrost/lib/pan_texture.c
blob: 0bf526d1dcf0a2a0a5a85d27d6c1f8fa323a0390 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
/*
 * Copyright (C) 2008 VMware, Inc.
 * Copyright (C) 2014 Broadcom
 * Copyright (C) 2018-2019 Alyssa Rosenzweig
 * Copyright (C) 2019-2020 Collabora, Ltd.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 */

#include "util/macros.h"
#include "util/u_math.h"
#include "pan_texture.h"
#include "panfrost-quirks.h"

/* Generates a texture descriptor. Ideally, descriptors are immutable after the
 * texture is created, so we can keep these hanging around in GPU memory in a
 * dedicated BO and not have to worry. In practice there are some minor gotchas
 * with this (the driver sometimes will change the format of a texture on the
 * fly for compression) but it's fast enough to just regenerate the descriptor
 * in those cases, rather than monkeypatching at drawtime. A texture descriptor
 * consists of a 32-byte header followed by pointers. 
 */

/* List of supported modifiers, in descending order of preference. AFBC is
 * faster than u-interleaved tiling which is faster than linear. Within AFBC,
 * enabling the YUV-like transform is typically a win where possible. */

uint64_t pan_best_modifiers[PAN_MODIFIER_COUNT] = {
        DRM_FORMAT_MOD_ARM_AFBC(
                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
                AFBC_FORMAT_MOD_SPARSE |
                AFBC_FORMAT_MOD_YTR),

        DRM_FORMAT_MOD_ARM_AFBC(
                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
                AFBC_FORMAT_MOD_SPARSE),

        DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED,
        DRM_FORMAT_MOD_LINEAR
};

/* Map modifiers to mali_texture_layout for packing in a texture descriptor */

static enum mali_texture_layout
panfrost_modifier_to_layout(uint64_t modifier)
{
        if (drm_is_afbc(modifier))
                return MALI_TEXTURE_LAYOUT_AFBC;
        else if (modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED)
                return MALI_TEXTURE_LAYOUT_TILED;
        else if (modifier == DRM_FORMAT_MOD_LINEAR)
                return MALI_TEXTURE_LAYOUT_LINEAR;
        else
                unreachable("Invalid modifer");
}

/* Check if we need to set a custom stride by computing the "expected"
 * stride and comparing it to what the user actually wants. Only applies
 * to linear textures, since tiled/compressed textures have strict
 * alignment requirements for their strides as it is */

static bool
panfrost_needs_explicit_stride(uint64_t modifier,
                               enum pipe_format format,
                               struct panfrost_slice *slices,
                               uint16_t width,
                               unsigned first_level,
                               unsigned last_level)
{
        if (modifier != DRM_FORMAT_MOD_LINEAR)
                return false;

        unsigned bytes_per_block = util_format_get_blocksize(format);
        unsigned block_w = util_format_get_blockwidth(format);

        for (unsigned l = first_level; l <= last_level; ++l) {
                unsigned actual = slices[l].line_stride;
                unsigned expected =
                        DIV_ROUND_UP(u_minify(width, l), block_w) *
                        bytes_per_block;

                if (actual != expected)
                        return true;
        }

        return false;
}

/* A Scalable Texture Compression (ASTC) corresponds to just a few texture type
 * in the hardware, but in fact can be parametrized to have various widths and
 * heights for the so-called "stretch factor". It turns out these parameters
 * are stuffed in the bottom bits of the payload pointers. This functions
 * computes these magic stuffing constants based on the ASTC format in use. The
 * constant in a given dimension is 3-bits, and two are stored side-by-side for
 * each active dimension.
 */

static unsigned
panfrost_astc_stretch(unsigned dim)
{
        assert(dim >= 4 && dim <= 12);
        return MIN2(dim, 11) - 4;
}

/* Texture addresses are tagged with information about compressed formats.
 * AFBC uses a bit for whether the colorspace transform is enabled (RGB and
 * RGBA only).
 * For ASTC, this is a "stretch factor" encoding the block size. */

static unsigned
panfrost_compression_tag(
                const struct util_format_description *desc, uint64_t modifier)
{
        if (drm_is_afbc(modifier))
                return (modifier & AFBC_FORMAT_MOD_YTR) ? 1 : 0;
        else if (desc->layout == UTIL_FORMAT_LAYOUT_ASTC)
                return (panfrost_astc_stretch(desc->block.height) << 3) |
                        panfrost_astc_stretch(desc->block.width);
        else
                return 0;
}


/* Cubemaps have 6 faces as "layers" in between each actual layer. We
 * need to fix this up. TODO: logic wrong in the asserted out cases ...
 * can they happen, perhaps from cubemap arrays? */

static void
panfrost_adjust_cube_dimensions(
                unsigned *first_face, unsigned *last_face,
                unsigned *first_layer, unsigned *last_layer)
{
        *first_face = *first_layer % 6;
        *last_face = *last_layer % 6;
        *first_layer /= 6;
        *last_layer /= 6;

        assert((*first_layer == *last_layer) || (*first_face == 0 && *last_face == 5));
}

/* Following the texture descriptor is a number of pointers. How many? */

static unsigned
panfrost_texture_num_elements(
                unsigned first_level, unsigned last_level,
                unsigned first_layer, unsigned last_layer,
                unsigned nr_samples,
                bool is_cube, bool manual_stride)
{
        unsigned first_face  = 0, last_face = 0;

        if (is_cube) {
                panfrost_adjust_cube_dimensions(&first_face, &last_face,
                                &first_layer, &last_layer);
        }

        unsigned levels = 1 + last_level - first_level;
        unsigned layers = 1 + last_layer - first_layer;
        unsigned faces  = 1 + last_face  - first_face;
        unsigned num_elements = levels * layers * faces * MAX2(nr_samples, 1);

        if (manual_stride)
                num_elements *= 2;

        return num_elements;
}

/* Conservative estimate of the size of the texture payload a priori.
 * Average case, size equal to the actual size. Worst case, off by 2x (if
 * a manual stride is not needed on a linear texture). Returned value
 * must be greater than or equal to the actual size, so it's safe to use
 * as an allocation amount */

unsigned
panfrost_estimate_texture_payload_size(
                unsigned first_level, unsigned last_level,
                unsigned first_layer, unsigned last_layer,
                unsigned nr_samples,
                enum mali_texture_dimension dim, uint64_t modifier)
{
        /* Assume worst case */
        unsigned manual_stride = (modifier == DRM_FORMAT_MOD_LINEAR);

        unsigned elements = panfrost_texture_num_elements(
                        first_level, last_level,
                        first_layer, last_layer,
                        nr_samples,
                        dim == MALI_TEXTURE_DIMENSION_CUBE, manual_stride);

        return sizeof(mali_ptr) * elements;
}

/* If not explicitly, line stride is calculated for block-based formats as
 * (ceil(width / block_width) * block_size). As a special case, this is left
 * zero if there is only a single block vertically. So, we have a helper to
 * extract the dimensions of a block-based format and use that to calculate the
 * line stride as such.
 */

unsigned
panfrost_block_dim(uint64_t modifier, bool width, unsigned plane)
{
        if (!drm_is_afbc(modifier)) {
                assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
                return 16;
        }

        switch (modifier & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK) {
        case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16:
                return 16;
        case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8:
                return width ? 32 : 8;
        case AFBC_FORMAT_MOD_BLOCK_SIZE_64x4:
                return width ? 64 : 4;
        case AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4:
                return plane ? (width ? 64 : 4) : (width ? 32 : 8);
        default:
                unreachable("Invalid AFBC block size");
        }
}

static uint64_t
panfrost_get_surface_strides(struct panfrost_slice *slices,
                             const struct util_format_description *desc,
                             enum mali_texture_dimension dim,
                             uint64_t modifier,
                             unsigned width, unsigned height,
                             unsigned l, unsigned cube_stride)
{
        bool is_3d = dim == MALI_TEXTURE_DIMENSION_3D;

        unsigned line_stride = slices[l].row_stride;
        unsigned layer_stride =
                panfrost_get_layer_stride(slices, is_3d, cube_stride, l);

        return ((uint64_t)layer_stride << 32) | line_stride;
}

static mali_ptr
panfrost_get_surface_pointer(mali_ptr base, struct panfrost_slice *slices,
                             enum mali_texture_dimension dim,
                             unsigned l, unsigned w, unsigned f, unsigned s,
                             unsigned cube_stride)
{
        unsigned face_mult = dim == MALI_TEXTURE_DIMENSION_CUBE ? 6 : 1;
        bool is_3d = dim == MALI_TEXTURE_DIMENSION_3D;

        return base +
               panfrost_texture_offset(slices, is_3d, cube_stride,
                                       l, w * face_mult + f, s);
}

struct panfrost_surface_iter {
        unsigned layer, last_layer;
        unsigned level, first_level, last_level;
        unsigned face, first_face, last_face;
        unsigned sample, first_sample, last_sample;
};

static void
panfrost_surface_iter_begin(struct panfrost_surface_iter *iter,
                            unsigned first_layer, unsigned last_layer,
                            unsigned first_level, unsigned last_level,
                            unsigned first_face, unsigned last_face,
                            unsigned nr_samples)
{
        iter->layer = first_layer;
        iter->last_layer = last_layer;
        iter->level = iter->first_level = first_level;
        iter->last_level = last_level;
        iter->face = iter->first_face = first_face;
        iter->last_face = last_face;
        iter->sample = iter->first_sample = 0;
        iter->last_sample = nr_samples - 1;
}

static bool
panfrost_surface_iter_end(const struct panfrost_surface_iter *iter)
{
        return iter->layer > iter->last_layer;
}

static void
panfrost_surface_iter_next(const struct panfrost_device *dev,
                           struct panfrost_surface_iter *iter)
{
#define INC_TEST(field) \
        do { \
                if (iter->field++ < iter->last_ ## field) \
                       return; \
                iter->field = iter->first_ ## field; \
        } while (0)

        /* Ordering is different on v7: inner loop is iterating on levels */
        if (dev->arch >= 7)
                INC_TEST(level);

        INC_TEST(sample);
        INC_TEST(face);

        if (dev->arch < 7)
                INC_TEST(level);

        iter->layer++;

#undef INC_TEST
}

static void
panfrost_emit_texture_payload(const struct panfrost_device *dev,
                              mali_ptr *payload,
                              const struct util_format_description *desc,
                              enum mali_texture_dimension dim,
                              uint64_t modifier,
                              unsigned width, unsigned height,
                              unsigned first_level, unsigned last_level,
                              unsigned first_layer, unsigned last_layer,
                              unsigned nr_samples,
                              unsigned cube_stride,
                              bool manual_stride,
                              mali_ptr base,
                              struct panfrost_slice *slices)
{
        base |= panfrost_compression_tag(desc, modifier);

        /* Inject the addresses in, interleaving array indices, mip levels,
         * cube faces, and strides in that order */

        unsigned first_face  = 0, last_face = 0;

        if (dim == MALI_TEXTURE_DIMENSION_CUBE)
                panfrost_adjust_cube_dimensions(&first_face, &last_face, &first_layer, &last_layer);

        nr_samples = MAX2(nr_samples, 1);

        struct panfrost_surface_iter iter;
        unsigned idx = 0;

        for (panfrost_surface_iter_begin(&iter, first_layer, last_layer,
                                         first_level, last_level,
                                         first_face, last_face, nr_samples);
             !panfrost_surface_iter_end(&iter);
             panfrost_surface_iter_next(dev, &iter)) {
                payload[idx++] =
                        panfrost_get_surface_pointer(base, slices, dim,
                                                     iter.level, iter.layer,
                                                     iter.face, iter.sample,
                                                     cube_stride);

                if (!manual_stride)
                        continue;

                payload[idx++] =
                        panfrost_get_surface_strides(slices, desc, dim,
                                                     modifier, width, height,
                                                     iter.level, cube_stride);
        }
}

void
panfrost_new_texture(
        const struct panfrost_device *dev,
        void *out,
        uint16_t width, uint16_t height,
        uint16_t depth, uint16_t array_size,
        enum pipe_format format,
        enum mali_texture_dimension dim,
        uint64_t modifier,
        unsigned first_level, unsigned last_level,
        unsigned first_layer, unsigned last_layer,
        unsigned nr_samples,
        unsigned cube_stride,
        unsigned swizzle,
        mali_ptr base,
        struct panfrost_slice *slices)
{
        const struct util_format_description *desc =
                util_format_description(format);

        bool manual_stride =
                panfrost_needs_explicit_stride(modifier, format, slices, width,
                                               first_level, last_level);

        pan_pack(out, MIDGARD_TEXTURE, cfg) {
                cfg.width = u_minify(width, first_level);
                cfg.height = u_minify(height, first_level);
                if (dim == MALI_TEXTURE_DIMENSION_3D)
                        cfg.depth = u_minify(depth, first_level);
                else
                        cfg.sample_count = MAX2(1, nr_samples);
                cfg.array_size = array_size;
                cfg.format = panfrost_pipe_format_v6[format].hw;
                cfg.dimension = dim;
                cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
                cfg.manual_stride = manual_stride;
                cfg.levels = last_level - first_level + 1;
                cfg.swizzle = swizzle;
        };

        panfrost_emit_texture_payload(
                dev,
                (mali_ptr *) (out + MALI_MIDGARD_TEXTURE_LENGTH),
                desc,
                dim,
                modifier,
                width, height,
                first_level, last_level,
                first_layer, last_layer,
                nr_samples,
                cube_stride,
                manual_stride,
                base,
                slices);
}

void
panfrost_new_texture_bifrost(
        const struct panfrost_device *dev,
        struct mali_bifrost_texture_packed *out,
        uint16_t width, uint16_t height,
        uint16_t depth, uint16_t array_size,
        enum pipe_format format,
        enum mali_texture_dimension dim,
        uint64_t modifier,
        unsigned first_level, unsigned last_level,
        unsigned first_layer, unsigned last_layer,
        unsigned nr_samples,
        unsigned cube_stride,
        unsigned swizzle,
        mali_ptr base,
        struct panfrost_slice *slices,
        const struct panfrost_ptr *payload)
{
        const struct util_format_description *desc =
                util_format_description(format);

        panfrost_emit_texture_payload(dev,
                                      payload->cpu,
                                      desc,
                                      dim,
                                      modifier,
                                      width, height,
                                      first_level, last_level,
                                      first_layer, last_layer,
                                      nr_samples,
                                      cube_stride,
                                      true, /* Stride explicit on Bifrost */
                                      base,
                                      slices);

        pan_pack(out, BIFROST_TEXTURE, cfg) {
                cfg.dimension = dim;
                cfg.format = dev->formats[format].hw;

                cfg.width = u_minify(width, first_level);
                cfg.height = u_minify(height, first_level);
                if (dim == MALI_TEXTURE_DIMENSION_3D)
                        cfg.depth = u_minify(depth, first_level);
                else
                        cfg.sample_count = MAX2(nr_samples, 1);
                cfg.swizzle = swizzle;
                cfg.texel_ordering = panfrost_modifier_to_layout(modifier);
                cfg.levels = last_level - first_level + 1;
                cfg.array_size = array_size;
                cfg.surfaces = payload->gpu;

                /* We specify API-level LOD clamps in the sampler descriptor
                 * and use these clamps simply for bounds checking */
                cfg.minimum_lod = FIXED_16(0, false);
                cfg.maximum_lod = FIXED_16(cfg.levels - 1, false);
        }
}

/* Computes sizes for checksumming, which is 8 bytes per 16x16 tile.
 * Checksumming is believed to be a CRC variant (CRC64 based on the size?).
 * This feature is also known as "transaction elimination". */

#define CHECKSUM_TILE_WIDTH 16
#define CHECKSUM_TILE_HEIGHT 16
#define CHECKSUM_BYTES_PER_TILE 8

unsigned
panfrost_compute_checksum_size(
        struct panfrost_slice *slice,
        unsigned width,
        unsigned height)
{
        unsigned aligned_width = ALIGN_POT(width, CHECKSUM_TILE_WIDTH);
        unsigned aligned_height = ALIGN_POT(height, CHECKSUM_TILE_HEIGHT);

        unsigned tile_count_x = aligned_width / CHECKSUM_TILE_WIDTH;
        unsigned tile_count_y = aligned_height / CHECKSUM_TILE_HEIGHT;

        slice->crc.stride = tile_count_x * CHECKSUM_BYTES_PER_TILE;

        return slice->crc.stride * tile_count_y;
}

unsigned
panfrost_get_layer_stride(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level)
{
        return is_3d ? slices[level].size0 : cube_stride;
}

/* Computes the offset into a texture at a particular level/face. Add to
 * the base address of a texture to get the address to that level/face */

unsigned
panfrost_texture_offset(struct panfrost_slice *slices, bool is_3d, unsigned cube_stride, unsigned level, unsigned face, unsigned sample)
{
        unsigned layer_stride = panfrost_get_layer_stride(slices, is_3d, cube_stride, level);
        return slices[level].offset + (face * layer_stride) + (sample * slices[level].size0);
}