diff options
Diffstat (limited to 'src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c')
-rw-r--r-- | src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c | 3502 |
1 files changed, 2166 insertions, 1336 deletions
diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index f62aad28eb3..88ced74699f 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -40,59 +40,69 @@ struct ntv_context { */ bool spirv_1_4_interfaces; + bool explicit_lod; //whether to set lod=0 for texture() + struct spirv_builder builder; + nir_shader *nir; struct hash_table *glsl_types; + struct hash_table *bo_struct_types; + struct hash_table *bo_array_types; SpvId GLSL_std_450; gl_shader_stage stage; - const struct zink_so_info *so_info; + const struct zink_shader_info *sinfo; - SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][3]; //8, 16, 32 + SpvId ubos[PIPE_MAX_CONSTANT_BUFFERS][5]; //8, 16, 32, unused, 64 nir_variable *ubo_vars[PIPE_MAX_CONSTANT_BUFFERS]; - SpvId ssbos[PIPE_MAX_SHADER_BUFFERS][3]; //8, 16, 32 - nir_variable *ssbo_vars[PIPE_MAX_SHADER_BUFFERS]; - SpvId image_types[PIPE_MAX_SAMPLERS]; - SpvId images[PIPE_MAX_SAMPLERS]; - SpvId sampler_types[PIPE_MAX_SAMPLERS]; - SpvId samplers[PIPE_MAX_SAMPLERS]; - unsigned char sampler_array_sizes[PIPE_MAX_SAMPLERS]; - unsigned samplers_used : PIPE_MAX_SAMPLERS; + SpvId ssbos[5]; //8, 16, 32, unused, 64 + nir_variable *ssbo_vars; + + SpvId images[PIPE_MAX_SHADER_IMAGES]; + struct hash_table image_types; + SpvId samplers[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + SpvId bindless_samplers[2]; + SpvId cl_samplers[PIPE_MAX_SAMPLERS]; + nir_variable *sampler_var[PIPE_MAX_SHADER_SAMPLER_VIEWS]; /* driver_location -> variable */ + nir_variable *bindless_sampler_var[2]; + unsigned last_sampler; + unsigned bindless_set_idx; + nir_variable *image_var[PIPE_MAX_SHADER_IMAGES]; /* driver_location -> variable */ + SpvId entry_ifaces[PIPE_MAX_SHADER_INPUTS * 4 + PIPE_MAX_SHADER_OUTPUTS * 4]; size_t num_entry_ifaces; SpvId *defs; + nir_alu_type *def_types; + SpvId *resident_defs; size_t num_defs; - SpvId *regs; - size_t num_regs; - struct hash_table *vars; /* nir_variable -> SpvId */ - struct hash_table *image_vars; /* SpvId -> nir_variable */ - struct hash_table *so_outputs; /* pipe_stream_output -> SpvId */ - unsigned outputs[VARYING_SLOT_MAX * 4]; - const struct glsl_type *so_output_gl_types[VARYING_SLOT_MAX * 4]; - SpvId so_output_types[VARYING_SLOT_MAX * 4]; const SpvId *block_ids; size_t num_blocks; bool block_started; SpvId loop_break, loop_cont; + SpvId shared_block_var[5]; //8, 16, 32, unused, 64 + SpvId shared_block_arr_type[5]; //8, 16, 32, unused, 64 + SpvId scratch_block_var[5]; //8, 16, 32, unused, 64 + SpvId front_face_var, instance_id_var, vertex_id_var, primitive_id_var, invocation_id_var, // geometry sample_mask_type, sample_id_var, sample_pos_var, sample_mask_in_var, tess_patch_vertices_in, tess_coord_var, // tess - push_const_var, + push_const_var, point_coord_var, workgroup_id_var, num_workgroups_var, local_invocation_id_var, global_invocation_id_var, local_invocation_index_var, helper_invocation_var, local_group_size_var, - shared_block_var, base_vertex_var, base_instance_var, draw_id_var; + SpvId shared_mem_size; + SpvId subgroup_eq_mask_var, subgroup_ge_mask_var, subgroup_gt_mask_var, @@ -101,6 +111,9 @@ struct ntv_context { subgroup_le_mask_var, subgroup_lt_mask_var, subgroup_size_var; + + SpvId discard_func; + SpvId float_array_type[2]; }; static SpvId @@ -108,10 +121,6 @@ get_fvec_constant(struct ntv_context *ctx, unsigned bit_size, unsigned num_components, double value); static SpvId -get_uvec_constant(struct ntv_context *ctx, unsigned bit_size, - unsigned num_components, uint64_t value); - -static SpvId get_ivec_constant(struct ntv_context *ctx, unsigned bit_size, unsigned num_components, int64_t value); @@ -126,6 +135,128 @@ static SpvId emit_triop(struct ntv_context *ctx, SpvOp op, SpvId type, SpvId src0, SpvId src1, SpvId src2); +static bool +alu_op_is_typeless(nir_op op) +{ + switch (op) { + case nir_op_mov: + case nir_op_vec16: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec5: + case nir_op_vec8: + case nir_op_bcsel: + return true; + default: + break; + } + return false; +} + +static nir_alu_type +get_nir_alu_type(const struct glsl_type *type) +{ + return nir_alu_type_get_base_type(nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_without_array_or_matrix(type)))); +} + +static nir_alu_type +infer_nir_alu_type_from_uses_ssa(nir_def *ssa); + +static nir_alu_type +infer_nir_alu_type_from_use(nir_src *src) +{ + nir_instr *instr = nir_src_parent_instr(src); + nir_alu_type atype = nir_type_invalid; + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (alu->op == nir_op_bcsel) { + if (nir_srcs_equal(alu->src[0].src, *src)) { + /* special case: the first src in bcsel is always bool */ + return nir_type_bool; + } + } + /* ignore typeless ops */ + if (alu_op_is_typeless(alu->op)) { + atype = infer_nir_alu_type_from_uses_ssa(&alu->def); + break; + } + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + if (!nir_srcs_equal(alu->src[i].src, *src)) + continue; + atype = nir_op_infos[alu->op].input_types[i]; + break; + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (!nir_srcs_equal(tex->src[i].src, *src)) + continue; + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + case nir_tex_src_lod: + if (tex->op == nir_texop_txf || + tex->op == nir_texop_txf_ms || + tex->op == nir_texop_txs) + atype = nir_type_int; + else + atype = nir_type_float; + break; + case nir_tex_src_projector: + case nir_tex_src_bias: + case nir_tex_src_min_lod: + case nir_tex_src_comparator: + case nir_tex_src_ddx: + case nir_tex_src_ddy: + atype = nir_type_float; + break; + case nir_tex_src_offset: + case nir_tex_src_ms_index: + case nir_tex_src_texture_offset: + case nir_tex_src_sampler_offset: + case nir_tex_src_sampler_handle: + case nir_tex_src_texture_handle: + atype = nir_type_int; + break; + default: + break; + } + break; + } + break; + } + case nir_instr_type_intrinsic: { + if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_deref) { + atype = get_nir_alu_type(nir_instr_as_deref(instr)->type); + } else if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_store_deref) { + atype = get_nir_alu_type(nir_src_as_deref(nir_instr_as_intrinsic(instr)->src[0])->type); + } + break; + } + default: + break; + } + return nir_alu_type_get_base_type(atype); +} + +static nir_alu_type +infer_nir_alu_type_from_uses_ssa(nir_def *ssa) +{ + nir_alu_type atype = nir_type_invalid; + /* try to infer a type: if it's wrong then whatever, but at least we tried */ + nir_foreach_use_including_if(src, ssa) { + if (nir_src_is_if(src)) + return nir_type_bool; + atype = infer_nir_alu_type_from_use(src); + if (atype) + break; + } + return atype ? atype : nir_type_uint; +} + static SpvId get_bvec_type(struct ntv_context *ctx, int num_components) { @@ -138,17 +269,24 @@ get_bvec_type(struct ntv_context *ctx, int num_components) return bool_type; } +static SpvId +find_image_type(struct ntv_context *ctx, nir_variable *var) +{ + struct hash_entry *he = _mesa_hash_table_search(&ctx->image_types, var); + return he ? (intptr_t)he->data : 0; +} + static SpvScope -get_scope(nir_scope scope) +get_scope(mesa_scope scope) { SpvScope conv[] = { - [NIR_SCOPE_NONE] = 0, - [NIR_SCOPE_INVOCATION] = SpvScopeInvocation, - [NIR_SCOPE_SUBGROUP] = SpvScopeSubgroup, - [NIR_SCOPE_SHADER_CALL] = SpvScopeShaderCallKHR, - [NIR_SCOPE_WORKGROUP] = SpvScopeWorkgroup, - [NIR_SCOPE_QUEUE_FAMILY] = SpvScopeQueueFamily, - [NIR_SCOPE_DEVICE] = SpvScopeDevice, + [SCOPE_NONE] = 0, + [SCOPE_INVOCATION] = SpvScopeInvocation, + [SCOPE_SUBGROUP] = SpvScopeSubgroup, + [SCOPE_SHADER_CALL] = SpvScopeShaderCallKHR, + [SCOPE_WORKGROUP] = SpvScopeWorkgroup, + [SCOPE_QUEUE_FAMILY] = SpvScopeQueueFamily, + [SCOPE_DEVICE] = SpvScopeDevice, }; return conv[scope]; } @@ -163,9 +301,7 @@ block_label(struct ntv_context *ctx, nir_block *block) static void emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id) { - unsigned access = var->data.access; - while (access) { - unsigned bit = u_bit_scan(&access); + u_foreach_bit(bit, var->data.access) { switch (1 << bit) { case ACCESS_COHERENT: /* SpvDecorationCoherent can't be used with vulkan memory model */ @@ -186,43 +322,79 @@ emit_access_decorations(struct ntv_context *ctx, nir_variable *var, SpvId var_id spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationNonUniform); break; case ACCESS_CAN_REORDER: - case ACCESS_STREAM_CACHE_POLICY: + case ACCESS_NON_TEMPORAL: /* no equivalent */ break; default: unreachable("unknown access bit"); } } + /* The Simple, GLSL, and Vulkan memory models can assume that aliasing is generally + * not present between the memory object declarations. Specifically, the consumer + * is free to assume aliasing is not present between memory object declarations, + * unless the memory object declarations explicitly indicate they alias. + * ... + * Applying Restrict is allowed, but has no effect. + * ... + * Only those memory object declarations decorated with Aliased or AliasedPointer may alias each other. + * + * - SPIRV 2.18.2 Aliasing + * + * thus if the variable isn't marked restrict, assume it may alias + */ + if (!(var->data.access & ACCESS_RESTRICT)) + spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationAliased); } static SpvOp -get_atomic_op(nir_intrinsic_op op) +get_atomic_op(struct ntv_context *ctx, unsigned bit_size, nir_atomic_op op) { switch (op) { -#define CASE_ATOMIC_OP(type) \ - case nir_intrinsic_ssbo_atomic_##type: \ - case nir_intrinsic_image_deref_atomic_##type: \ - case nir_intrinsic_shared_atomic_##type - - CASE_ATOMIC_OP(add): +#define ATOMIC_FCAP(NAME) \ + do {\ + if (bit_size == 16) \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat16##NAME##EXT); \ + if (bit_size == 32) \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat32##NAME##EXT); \ + if (bit_size == 64) \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityAtomicFloat64##NAME##EXT); \ + } while (0) + + case nir_atomic_op_fadd: + ATOMIC_FCAP(Add); + if (bit_size == 16) + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float16_add"); + else + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_add"); + return SpvOpAtomicFAddEXT; + case nir_atomic_op_fmax: + ATOMIC_FCAP(MinMax); + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_min_max"); + return SpvOpAtomicFMaxEXT; + case nir_atomic_op_fmin: + ATOMIC_FCAP(MinMax); + spirv_builder_emit_extension(&ctx->builder, "SPV_EXT_shader_atomic_float_min_max"); + return SpvOpAtomicFMinEXT; + + case nir_atomic_op_iadd: return SpvOpAtomicIAdd; - CASE_ATOMIC_OP(umin): + case nir_atomic_op_umin: return SpvOpAtomicUMin; - CASE_ATOMIC_OP(imin): + case nir_atomic_op_imin: return SpvOpAtomicSMin; - CASE_ATOMIC_OP(umax): + case nir_atomic_op_umax: return SpvOpAtomicUMax; - CASE_ATOMIC_OP(imax): + case nir_atomic_op_imax: return SpvOpAtomicSMax; - CASE_ATOMIC_OP(and): + case nir_atomic_op_iand: return SpvOpAtomicAnd; - CASE_ATOMIC_OP(or): + case nir_atomic_op_ior: return SpvOpAtomicOr; - CASE_ATOMIC_OP(xor): + case nir_atomic_op_ixor: return SpvOpAtomicXor; - CASE_ATOMIC_OP(exchange): + case nir_atomic_op_xchg: return SpvOpAtomicExchange; - CASE_ATOMIC_OP(comp_swap): + case nir_atomic_op_cmpxchg: return SpvOpAtomicCompareExchange; default: debug_printf("%s - ", nir_intrinsic_infos[op].name); @@ -230,7 +402,7 @@ get_atomic_op(nir_intrinsic_op op) } return 0; } -#undef CASE_ATOMIC_OP + static SpvId emit_float_const(struct ntv_context *ctx, int bit_size, double value) { @@ -294,10 +466,37 @@ get_uvec_type(struct ntv_context *ctx, unsigned bit_size, unsigned num_component return uint_type; } +static SpvId +get_alu_type(struct ntv_context *ctx, nir_alu_type type, unsigned num_components, unsigned bit_size) +{ + if (bit_size == 1) + return get_bvec_type(ctx, num_components); + + type = nir_alu_type_get_base_type(type); + switch (nir_alu_type_get_base_type(type)) { + case nir_type_bool: + return get_bvec_type(ctx, num_components); + + case nir_type_int: + return get_ivec_type(ctx, bit_size, num_components); + + case nir_type_uint: + return get_uvec_type(ctx, bit_size, num_components); + + case nir_type_float: + return get_fvec_type(ctx, bit_size, num_components); + + default: + unreachable("unsupported nir_alu_type"); + } +} + static SpvStorageClass get_storage_class(struct nir_variable *var) { switch (var->data.mode) { + case nir_var_function_temp: + return SpvStorageClassFunction; case nir_var_mem_push_const: return SpvStorageClassPushConstant; case nir_var_shader_in: @@ -305,7 +504,12 @@ get_storage_class(struct nir_variable *var) case nir_var_shader_out: return SpvStorageClassOutput; case nir_var_uniform: + case nir_var_image: return SpvStorageClassUniformConstant; + case nir_var_mem_ubo: + return SpvStorageClassUniform; + case nir_var_mem_ssbo: + return SpvStorageClassStorageBuffer; default: unreachable("Unsupported nir_variable_mode"); } @@ -313,10 +517,10 @@ get_storage_class(struct nir_variable *var) } static SpvId -get_dest_uvec_type(struct ntv_context *ctx, nir_dest *dest) +get_def_uvec_type(struct ntv_context *ctx, nir_def *def) { - unsigned bit_size = nir_dest_bit_size(*dest); - return get_uvec_type(ctx, bit_size, nir_dest_num_components(*dest)); + unsigned bit_size = def->bit_size; + return get_uvec_type(ctx, bit_size, def->num_components); } static SpvId @@ -346,7 +550,15 @@ get_glsl_basetype(struct ntv_context *ctx, enum glsl_base_type type) case GLSL_TYPE_UINT64: return spirv_builder_type_uint(&ctx->builder, 64); - /* TODO: handle more types */ + + case GLSL_TYPE_UINT16: + return spirv_builder_type_uint(&ctx->builder, 16); + case GLSL_TYPE_INT16: + return spirv_builder_type_int(&ctx->builder, 16); + case GLSL_TYPE_INT8: + return spirv_builder_type_int(&ctx->builder, 8); + case GLSL_TYPE_UINT8: + return spirv_builder_type_uint(&ctx->builder, 8); default: unreachable("unknown GLSL type"); @@ -413,8 +625,11 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type) types[i] = get_glsl_type(ctx, glsl_get_struct_field(type, i)); ret = spirv_builder_type_struct(&ctx->builder, types, glsl_get_length(type)); - for (unsigned i = 0; i < glsl_get_length(type); i++) - spirv_builder_emit_member_offset(&ctx->builder, ret, i, glsl_get_struct_field_offset(type, i)); + for (unsigned i = 0; i < glsl_get_length(type); i++) { + int32_t offset = glsl_get_struct_field_offset(type, i); + if (offset >= 0) + spirv_builder_emit_member_offset(&ctx->builder, ret, i, offset); + } } else unreachable("Unhandled GLSL type"); @@ -423,21 +638,99 @@ get_glsl_type(struct ntv_context *ctx, const struct glsl_type *type) } static void -create_shared_block(struct ntv_context *ctx, unsigned shared_size) +create_scratch_block(struct ntv_context *ctx, unsigned scratch_size, unsigned bit_size) { - SpvId type = spirv_builder_type_uint(&ctx->builder, 32); - SpvId array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, shared_size / 4)); - spirv_builder_emit_array_stride(&ctx->builder, array, 4); + unsigned idx = bit_size >> 4; + SpvId type = spirv_builder_type_uint(&ctx->builder, bit_size); + unsigned block_size = scratch_size / (bit_size / 8); + assert(block_size); + SpvId array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, block_size)); + spirv_builder_emit_array_stride(&ctx->builder, array, bit_size / 8); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassWorkgroup, + SpvStorageClassPrivate, array); - ctx->shared_block_var = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassWorkgroup); + ctx->scratch_block_var[idx] = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassPrivate); + if (ctx->spirv_1_4_interfaces) { + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); + ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->scratch_block_var[idx]; + } +} + +static SpvId +get_scratch_block(struct ntv_context *ctx, unsigned bit_size) +{ + unsigned idx = bit_size >> 4; + if (!ctx->scratch_block_var[idx]) + create_scratch_block(ctx, ctx->nir->scratch_size, bit_size); + return ctx->scratch_block_var[idx]; +} + +static void +create_shared_block(struct ntv_context *ctx, unsigned bit_size) +{ + unsigned idx = bit_size >> 4; + SpvId type = spirv_builder_type_uint(&ctx->builder, bit_size); + SpvId array; + + assert(gl_shader_stage_is_compute(ctx->nir->info.stage)); + if (ctx->nir->info.cs.has_variable_shared_mem) { + assert(ctx->shared_mem_size); + SpvId const_shared_size = emit_uint_const(ctx, 32, ctx->nir->info.shared_size); + SpvId shared_mem_size = spirv_builder_emit_triop(&ctx->builder, SpvOpSpecConstantOp, spirv_builder_type_uint(&ctx->builder, 32), SpvOpIAdd, const_shared_size, ctx->shared_mem_size); + shared_mem_size = spirv_builder_emit_triop(&ctx->builder, SpvOpSpecConstantOp, spirv_builder_type_uint(&ctx->builder, 32), SpvOpUDiv, shared_mem_size, emit_uint_const(ctx, 32, bit_size / 8)); + array = spirv_builder_type_array(&ctx->builder, type, shared_mem_size); + } else { + unsigned block_size = ctx->nir->info.shared_size / (bit_size / 8); + assert(block_size); + array = spirv_builder_type_array(&ctx->builder, type, emit_uint_const(ctx, 32, block_size)); + } + + ctx->shared_block_arr_type[idx] = array; + spirv_builder_emit_array_stride(&ctx->builder, array, bit_size / 8); + + /* Create wrapper struct for Block, Offset and Aliased decorations. */ + SpvId block = spirv_builder_type_struct(&ctx->builder, &array, 1); + + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassWorkgroup, + block); + ctx->shared_block_var[idx] = spirv_builder_emit_var(&ctx->builder, ptr_type, SpvStorageClassWorkgroup); if (ctx->spirv_1_4_interfaces) { assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); - ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->shared_block_var; + ctx->entry_ifaces[ctx->num_entry_ifaces++] = ctx->shared_block_var[idx]; + } + /* Alias our shared memory blocks */ + if (ctx->sinfo->have_workgroup_memory_explicit_layout) { + spirv_builder_emit_member_offset(&ctx->builder, block, 0, 0); + spirv_builder_emit_decoration(&ctx->builder, block, SpvDecorationBlock); + spirv_builder_emit_decoration(&ctx->builder, ctx->shared_block_var[idx], SpvDecorationAliased); } } +static SpvId +get_shared_block(struct ntv_context *ctx, unsigned bit_size) +{ + unsigned idx = bit_size >> 4; + if (!ctx->shared_block_var[idx]) + create_shared_block(ctx, bit_size); + if (ctx->sinfo->have_workgroup_memory_explicit_layout) { + spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_workgroup_memory_explicit_layout"); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayoutKHR); + if (ctx->shared_block_var[0]) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR); + if (ctx->shared_block_var[1]) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR); + } + + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassWorkgroup, + ctx->shared_block_arr_type[idx]); + SpvId zero = emit_uint_const(ctx, 32, 0); + + return spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + ctx->shared_block_var[idx], &zero, 1); +} + #define HANDLE_EMIT_BUILTIN(SLOT, BUILTIN) \ case VARYING_SLOT_##SLOT: \ spirv_builder_emit_builtin(&ctx->builder, var_id, SpvBuiltIn##BUILTIN); \ @@ -505,7 +798,6 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var) else if (ctx->stage == MESA_SHADER_FRAGMENT) { switch (var->data.location) { HANDLE_EMIT_BUILTIN(POS, FragCoord); - HANDLE_EMIT_BUILTIN(PNTC, PointCoord); HANDLE_EMIT_BUILTIN(LAYER, Layer); HANDLE_EMIT_BUILTIN(PRIMITIVE_ID, PrimitiveId); HANDLE_EMIT_BUILTIN(CLIP_DIST0, ClipDistance); @@ -521,6 +813,7 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationCentroid); else if (var->data.sample) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationSample); + emit_interpolation(ctx, var_id, var->data.interpolation); } else if (ctx->stage < MESA_SHADER_FRAGMENT) { switch (var->data.location) { HANDLE_EMIT_BUILTIN(POS, Position); @@ -550,8 +843,6 @@ emit_input(struct ntv_context *ctx, struct nir_variable *var) if (var->data.patch) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch); - emit_interpolation(ctx, var_id, var->data.interpolation); - _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); @@ -574,6 +865,11 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); + if (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW) { + spirv_builder_emit_decoration(&ctx->builder, var_id, + SpvDecorationRelaxedPrecision); + } + if (ctx->stage != MESA_SHADER_FRAGMENT) { switch (var->data.location) { HANDLE_EMIT_BUILTIN(POS, Position); @@ -587,16 +883,12 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) HANDLE_EMIT_BUILTIN(TESS_LEVEL_INNER, TessLevelInner); default: - spirv_builder_emit_location(&ctx->builder, var_id, - var->data.driver_location); - } - /* tcs can't do xfb */ - if (ctx->stage != MESA_SHADER_TESS_CTRL) { - unsigned idx = var->data.location << 2 | var->data.location_frac; - ctx->outputs[idx] = var_id; - ctx->so_output_gl_types[idx] = var->type; - ctx->so_output_types[idx] = var_type; + /* non-xfb psiz output will have location -1 */ + if (var->data.location >= 0) + spirv_builder_emit_location(&ctx->builder, var_id, + var->data.driver_location); } + emit_interpolation(ctx, var_id, var->data.interpolation); } else { if (var->data.location >= FRAG_RESULT_DATA0) { spirv_builder_emit_location(&ctx->builder, var_id, @@ -633,12 +925,10 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_component(&ctx->builder, var_id, var->data.location_frac); - emit_interpolation(ctx, var_id, var->data.interpolation); - if (var->data.patch) spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationPatch); - if (var->data.explicit_xfb_buffer) { + if (var->data.explicit_xfb_buffer && ctx->nir->xfb_info) { spirv_builder_emit_offset(&ctx->builder, var_id, var->data.offset); spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, var->data.xfb.buffer); spirv_builder_emit_xfb_stride(&ctx->builder, var_id, var->data.xfb.stride); @@ -652,6 +942,41 @@ emit_output(struct ntv_context *ctx, struct nir_variable *var) ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; } +static void +emit_shader_temp(struct ntv_context *ctx, struct nir_variable *var) +{ + SpvId var_type = get_glsl_type(ctx, var->type); + + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPrivate, + var_type); + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassPrivate); + if (var->name) + spirv_builder_emit_name(&ctx->builder, var_id, var->name); + + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); + + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); + ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; +} + +static void +emit_temp(struct ntv_context *ctx, struct nir_variable *var) +{ + SpvId var_type = get_glsl_type(ctx, var->type); + + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassFunction, + var_type); + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassFunction); + if (var->name) + spirv_builder_emit_name(&ctx->builder, var_id, var->name); + + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); +} + static SpvDim type_to_dim(enum glsl_sampler_dim gdim, bool *is_ms) { @@ -674,6 +999,9 @@ type_to_dim(enum glsl_sampler_dim gdim, bool *is_ms) case GLSL_SAMPLER_DIM_MS: *is_ms = true; return SpvDim2D; + case GLSL_SAMPLER_DIM_SUBPASS_MS: + *is_ms = true; + return SpvDimSubpassData; case GLSL_SAMPLER_DIM_SUBPASS: return SpvDimSubpassData; default: @@ -800,13 +1128,12 @@ get_image_format(struct ntv_context *ctx, enum pipe_format format) return ret; } -static void -emit_image(struct ntv_context *ctx, struct nir_variable *var) +static SpvId +get_bare_image_type(struct ntv_context *ctx, struct nir_variable *var, bool is_sampler) { const struct glsl_type *type = glsl_without_array(var->type); bool is_ms; - bool is_sampler = glsl_type_is_sampler(type); if (var->data.fb_fetch_output) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInputAttachment); @@ -818,29 +1145,64 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var) } SpvDim dimension = type_to_dim(glsl_get_sampler_dim(type), &is_ms); + if (dimension == SpvDim1D) { + if (is_sampler) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampled1D); + else + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImage1D); + } + if (dimension == SpvDimBuffer) { + if (is_sampler) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampledBuffer); + else + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageBuffer); + } + bool arrayed = glsl_sampler_type_is_array(type); if (dimension == SpvDimCube && arrayed) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageCubeArray); + if (arrayed && !is_sampler && is_ms) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageMSArray); SpvId result_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); - SpvId image_type = spirv_builder_type_image(&ctx->builder, result_type, + return spirv_builder_type_image(&ctx->builder, result_type, dimension, false, arrayed, is_ms, is_sampler ? 1 : 2, get_image_format(ctx, var->data.image.format)); +} + +static SpvId +get_image_type(struct ntv_context *ctx, struct nir_variable *var, + bool is_sampler, bool is_buffer) +{ + SpvId image_type = get_bare_image_type(ctx, var, is_sampler); + return is_sampler && ctx->stage != MESA_SHADER_KERNEL && !is_buffer ? + spirv_builder_type_sampled_image(&ctx->builder, image_type) : + image_type; +} - SpvId var_type = is_sampler ? spirv_builder_type_sampled_image(&ctx->builder, image_type) : image_type; +static SpvId +emit_image(struct ntv_context *ctx, struct nir_variable *var, SpvId image_type) +{ + if (var->data.bindless) + return 0; + const struct glsl_type *type = glsl_without_array(var->type); + + bool is_sampler = glsl_type_is_sampler(type); + bool is_buffer = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF; + SpvId var_type = is_sampler && ctx->stage != MESA_SHADER_KERNEL && !is_buffer ? + spirv_builder_type_sampled_image(&ctx->builder, image_type) : image_type; + + bool mediump = (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW); int index = var->data.driver_location; - assert(!is_sampler || (!(ctx->samplers_used & (1 << index)))); - assert(!is_sampler || !ctx->sampler_types[index]); - assert(is_sampler || !ctx->image_types[index]); + assert(!find_image_type(ctx, var)); if (glsl_type_is_array(var->type)) { var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, glsl_get_aoa_size(var->type))); spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(void*)); - ctx->sampler_array_sizes[index] = glsl_get_aoa_size(var->type); } SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, @@ -849,25 +1211,32 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var) SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, SpvStorageClassUniformConstant); + if (mediump) { + spirv_builder_emit_decoration(&ctx->builder, var_id, + SpvDecorationRelaxedPrecision); + } + if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); if (var->data.fb_fetch_output) spirv_builder_emit_input_attachment_index(&ctx->builder, var_id, var->data.index); + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); if (is_sampler) { - ctx->sampler_types[index] = image_type; - ctx->samplers[index] = var_id; - ctx->samplers_used |= 1 << index; + if (var->data.descriptor_set == ctx->bindless_set_idx) { + assert(!ctx->bindless_samplers[index]); + ctx->bindless_samplers[index] = var_id; + } else { + assert(!ctx->samplers[index]); + ctx->samplers[index] = var_id; + } } else { - ctx->image_types[index] = image_type; + assert(!ctx->images[index]); ctx->images[index] = var_id; - _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); - uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t)); - *key = var_id; - _mesa_hash_table_insert(ctx->image_vars, key, var); emit_access_decorations(ctx, var, var_id); } + _mesa_hash_table_insert(&ctx->image_types, var, (void *)(intptr_t)image_type); if (ctx->spirv_1_4_interfaces) { assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; @@ -875,6 +1244,30 @@ emit_image(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_descriptor_set(&ctx->builder, var_id, var->data.descriptor_set); spirv_builder_emit_binding(&ctx->builder, var_id, var->data.binding); + return var_id; +} + +static void +emit_sampler(struct ntv_context *ctx, unsigned sampler_index, unsigned desc_set) +{ + SpvId type = spirv_builder_type_sampler(&ctx->builder); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassUniformConstant, + type); + + SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassUniformConstant); + char buf[128]; + snprintf(buf, sizeof(buf), "sampler_%u", sampler_index); + spirv_builder_emit_name(&ctx->builder, var_id, buf); + spirv_builder_emit_descriptor_set(&ctx->builder, var_id, desc_set); + spirv_builder_emit_binding(&ctx->builder, var_id, sampler_index); + ctx->cl_samplers[sampler_index] = var_id; + if (ctx->spirv_1_4_interfaces) { + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); + ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; + } + } static SpvId @@ -887,19 +1280,22 @@ get_sized_uint_array_type(struct ntv_context *ctx, unsigned array_size, unsigned return array_type; } +/* get array<struct(array_type <--this one)> */ static SpvId -get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bitsize) +get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var) { + struct hash_entry *he = _mesa_hash_table_search(ctx->bo_array_types, var); + if (he) + return (SpvId)(uintptr_t)he->data; + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0))); assert(bitsize); SpvId array_type; - const struct glsl_type *type = var->type; - if (!glsl_type_is_unsized_array(type)) { - type = glsl_get_struct_field(var->interface_type, 0); - if (!glsl_type_is_unsized_array(type)) { - uint32_t array_size = glsl_get_length(type) * (bitsize / 4); - assert(array_size); - return get_sized_uint_array_type(ctx, array_size, bitsize); - } + const struct glsl_type *type = glsl_without_array(var->type); + const struct glsl_type *first_type = glsl_get_struct_field(type, 0); + if (!glsl_type_is_unsized_array(first_type)) { + uint32_t array_size = glsl_get_length(first_type); + assert(array_size); + return get_sized_uint_array_type(ctx, array_size, bitsize); } SpvId uint_type = spirv_builder_type_uint(&ctx->builder, bitsize); array_type = spirv_builder_type_runtime_array(&ctx->builder, uint_type); @@ -907,19 +1303,25 @@ get_bo_array_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bi return array_type; } +/* get array<struct(array_type) <--this one> */ static SpvId -get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var, unsigned bitsize) +get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var) { - SpvId array_type = get_bo_array_type(ctx, var, bitsize); + struct hash_entry *he = _mesa_hash_table_search(ctx->bo_struct_types, var); + if (he) + return (SpvId)(uintptr_t)he->data; + const struct glsl_type *bare_type = glsl_without_array(var->type); + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(bare_type, 0))); + SpvId array_type = get_bo_array_type(ctx, var); + _mesa_hash_table_insert(ctx->bo_array_types, var, (void *)(uintptr_t)array_type); bool ssbo = var->data.mode == nir_var_mem_ssbo; // wrap UBO-array in a struct SpvId runtime_array = 0; - if (ssbo && glsl_get_length(var->interface_type) > 1) { - const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, glsl_get_length(var->interface_type) - 1); + if (ssbo && glsl_get_length(bare_type) > 1) { + const struct glsl_type *last_member = glsl_get_struct_field(bare_type, glsl_get_length(bare_type) - 1); if (glsl_type_is_unsized_array(last_member)) { - bool is_64bit = glsl_type_is_64bit(glsl_without_array(last_member)); - runtime_array = spirv_builder_type_runtime_array(&ctx->builder, get_uvec_type(ctx, is_64bit ? 64 : bitsize, 1)); + runtime_array = spirv_builder_type_runtime_array(&ctx->builder, get_uvec_type(ctx, bitsize, 1)); spirv_builder_emit_array_stride(&ctx->builder, runtime_array, glsl_get_explicit_stride(last_member)); } } @@ -934,36 +1336,39 @@ get_bo_struct_type(struct ntv_context *ctx, struct nir_variable *var, unsigned b spirv_builder_emit_decoration(&ctx->builder, struct_type, SpvDecorationBlock); spirv_builder_emit_member_offset(&ctx->builder, struct_type, 0, 0); - if (runtime_array) { - spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1, - glsl_get_struct_field_offset(var->interface_type, - glsl_get_length(var->interface_type) - 1)); - } + if (runtime_array) + spirv_builder_emit_member_offset(&ctx->builder, struct_type, 1, 0); - return spirv_builder_type_pointer(&ctx->builder, - ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, - struct_type); + return struct_type; } static void -emit_bo(struct ntv_context *ctx, struct nir_variable *var, unsigned force_bitsize) +emit_bo(struct ntv_context *ctx, struct nir_variable *var, bool aliased) { + unsigned bitsize = glsl_get_bit_size(glsl_get_array_element(glsl_get_struct_field(glsl_without_array(var->type), 0))); bool ssbo = var->data.mode == nir_var_mem_ssbo; - unsigned bitsize = force_bitsize ? force_bitsize : 32; - unsigned idx = bitsize >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - - SpvId pointer_type = get_bo_struct_type(ctx, var, bitsize); - + SpvId struct_type = get_bo_struct_type(ctx, var); + _mesa_hash_table_insert(ctx->bo_struct_types, var, (void *)(uintptr_t)struct_type); + SpvId array_length = emit_uint_const(ctx, 32, glsl_get_length(var->type)); + SpvId array_type = spirv_builder_type_array(&ctx->builder, struct_type, array_length); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, + array_type); SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform); if (var->name) spirv_builder_emit_name(&ctx->builder, var_id, var->name); + if (aliased) + spirv_builder_emit_decoration(&ctx->builder, var_id, SpvDecorationAliased); + + unsigned idx = bitsize >> 4; + assert(idx < ARRAY_SIZE(ctx->ssbos)); if (ssbo) { - assert(!ctx->ssbos[var->data.driver_location][idx]); - ctx->ssbos[var->data.driver_location][idx] = var_id; - ctx->ssbo_vars[var->data.driver_location] = var; + assert(!ctx->ssbos[idx]); + ctx->ssbos[idx] = var_id; + if (bitsize == 32) + ctx->ssbo_vars = var; } else { assert(!ctx->ubos[var->data.driver_location][idx]); ctx->ubos[var->data.driver_location][idx] = var_id; @@ -973,79 +1378,60 @@ emit_bo(struct ntv_context *ctx, struct nir_variable *var, unsigned force_bitsiz assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; } + _mesa_hash_table_insert(ctx->vars, var, (void *)(intptr_t)var_id); spirv_builder_emit_descriptor_set(&ctx->builder, var_id, var->data.descriptor_set); spirv_builder_emit_binding(&ctx->builder, var_id, var->data.binding); } -static void -emit_uniform(struct ntv_context *ctx, struct nir_variable *var) -{ - if (var->data.mode == nir_var_mem_ubo || var->data.mode == nir_var_mem_ssbo) - emit_bo(ctx, var, 0); - else { - assert(var->data.mode == nir_var_uniform); - const struct glsl_type *type = glsl_without_array(var->type); - if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) - emit_image(ctx, var); - } -} - static SpvId get_vec_from_bit_size(struct ntv_context *ctx, uint32_t bit_size, uint32_t num_components) { if (bit_size == 1) return get_bvec_type(ctx, num_components); - if (bit_size == 8 || bit_size == 16 || bit_size == 32 || bit_size == 64) - return get_uvec_type(ctx, bit_size, num_components); - unreachable("unhandled register bit size"); - return 0; + return get_uvec_type(ctx, bit_size, num_components); } static SpvId -get_src_ssa(struct ntv_context *ctx, const nir_ssa_def *ssa) +get_src_ssa(struct ntv_context *ctx, const nir_def *ssa, nir_alu_type *atype) { assert(ssa->index < ctx->num_defs); assert(ctx->defs[ssa->index] != 0); + *atype = ctx->def_types[ssa->index]; return ctx->defs[ssa->index]; } -static SpvId -get_var_from_reg(struct ntv_context *ctx, nir_register *reg) +static void +init_reg(struct ntv_context *ctx, nir_intrinsic_instr *decl, nir_alu_type atype) { - assert(reg->index < ctx->num_regs); - assert(ctx->regs[reg->index] != 0); - return ctx->regs[reg->index]; -} + unsigned index = decl->def.index; + unsigned num_components = nir_intrinsic_num_components(decl); + unsigned bit_size = nir_intrinsic_bit_size(decl); -static SpvId -get_src_reg(struct ntv_context *ctx, const nir_reg_src *reg) -{ - assert(reg->reg); - assert(!reg->indirect); - assert(!reg->base_offset); + if (ctx->defs[index]) + return; + + SpvId type = get_alu_type(ctx, atype, num_components, bit_size); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassFunction, + type); + SpvId var = spirv_builder_emit_var(&ctx->builder, pointer_type, + SpvStorageClassFunction); - SpvId var = get_var_from_reg(ctx, reg->reg); - SpvId type = get_vec_from_bit_size(ctx, reg->reg->bit_size, reg->reg->num_components); - return spirv_builder_emit_load(&ctx->builder, type, var); + ctx->defs[index] = var; + ctx->def_types[index] = nir_alu_type_get_base_type(atype); } static SpvId -get_src(struct ntv_context *ctx, nir_src *src) +get_src(struct ntv_context *ctx, nir_src *src, nir_alu_type *atype) { - if (src->is_ssa) - return get_src_ssa(ctx, src->ssa); - else - return get_src_reg(ctx, &src->reg); + return get_src_ssa(ctx, src->ssa, atype); } static SpvId -get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) +get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, nir_alu_type *atype) { - assert(!alu->src[src].negate); - assert(!alu->src[src].abs); - - SpvId def = get_src(ctx, &alu->src[src].src); + SpvId def = get_src(ctx, &alu->src[src].src, atype); unsigned used_channels = 0; bool need_swizzle = false; @@ -1068,10 +1454,7 @@ get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) return def; int bit_size = nir_src_bit_size(alu->src[src].src); - assert(bit_size == 1 || bit_size == 8 || bit_size == 16 || bit_size == 32 || bit_size == 64); - - SpvId raw_type = bit_size == 1 ? spirv_builder_type_bool(&ctx->builder) : - spirv_builder_type_uint(&ctx->builder, bit_size); + SpvId raw_type = get_alu_type(ctx, *atype, 1, bit_size); if (used_channels == 1) { uint32_t indices[] = { alu->src[src].swizzle[0] }; @@ -1111,14 +1494,6 @@ get_alu_src_raw(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) } } -static void -store_ssa_def(struct ntv_context *ctx, nir_ssa_def *ssa, SpvId result) -{ - assert(result != 0); - assert(ssa->index < ctx->num_defs); - ctx->defs[ssa->index] = result; -} - static SpvId emit_select(struct ntv_context *ctx, SpvId type, SpvId cond, SpvId if_true, SpvId if_false) @@ -1127,14 +1502,6 @@ emit_select(struct ntv_context *ctx, SpvId type, SpvId cond, } static SpvId -uvec_to_bvec(struct ntv_context *ctx, SpvId value, unsigned num_components) -{ - SpvId type = get_bvec_type(ctx, num_components); - SpvId zero = get_uvec_constant(ctx, 32, num_components, 0); - return emit_binop(ctx, SpvOpINotEqual, type, value, zero); -} - -static SpvId emit_bitcast(struct ntv_context *ctx, SpvId type, SpvId value) { return emit_unop(ctx, SpvOpBitcast, type, value); @@ -1164,50 +1531,22 @@ bitcast_to_fvec(struct ntv_context *ctx, SpvId value, unsigned bit_size, return emit_bitcast(ctx, type, value); } -static void -store_reg_def(struct ntv_context *ctx, nir_reg_dest *reg, SpvId result) +static SpvId +cast_src_to_type(struct ntv_context *ctx, SpvId value, nir_src src, nir_alu_type atype) { - SpvId var = get_var_from_reg(ctx, reg->reg); - assert(var); - spirv_builder_emit_store(&ctx->builder, var, result); + atype = nir_alu_type_get_base_type(atype); + unsigned num_components = nir_src_num_components(src); + unsigned bit_size = nir_src_bit_size(src); + return emit_bitcast(ctx, get_alu_type(ctx, atype, num_components, bit_size), value); } static void -store_dest_raw(struct ntv_context *ctx, nir_dest *dest, SpvId result) +store_def(struct ntv_context *ctx, unsigned def_index, SpvId result, nir_alu_type type) { - if (dest->is_ssa) - store_ssa_def(ctx, &dest->ssa, result); - else - store_reg_def(ctx, &dest->reg, result); -} - -static SpvId -store_dest(struct ntv_context *ctx, nir_dest *dest, SpvId result, nir_alu_type type) -{ - unsigned num_components = nir_dest_num_components(*dest); - unsigned bit_size = nir_dest_bit_size(*dest); - - if (bit_size != 1) { - switch (nir_alu_type_get_base_type(type)) { - case nir_type_bool: - assert("bool should have bit-size 1"); - break; - - case nir_type_uint: - break; /* nothing to do! */ - - case nir_type_int: - case nir_type_float: - result = bitcast_to_uvec(ctx, result, bit_size, num_components); - break; - - default: - unreachable("unsupported nir_alu_type"); - } - } - - store_dest_raw(ctx, dest, result); - return result; + assert(result != 0); + assert(def_index < ctx->num_defs); + ctx->def_types[def_index] = nir_alu_type_get_base_type(type); + ctx->defs[def_index] = result; } static SpvId @@ -1216,178 +1555,20 @@ emit_unop(struct ntv_context *ctx, SpvOp op, SpvId type, SpvId src) return spirv_builder_emit_unop(&ctx->builder, op, type, src); } -/* return the intended xfb output vec type based on base type and vector size */ -static SpvId -get_output_type(struct ntv_context *ctx, unsigned register_index, unsigned num_components) -{ - const struct glsl_type *out_type = NULL; - /* index is based on component, so we might have to go back a few slots to get to the base */ - while (!out_type) - out_type = ctx->so_output_gl_types[register_index--]; - enum glsl_base_type base_type = glsl_get_base_type(out_type); - if (base_type == GLSL_TYPE_ARRAY) - base_type = glsl_get_base_type(glsl_without_array(out_type)); - - switch (base_type) { - case GLSL_TYPE_BOOL: - return get_bvec_type(ctx, num_components); - - case GLSL_TYPE_FLOAT: - return get_fvec_type(ctx, 32, num_components); - - case GLSL_TYPE_INT: - return get_ivec_type(ctx, 32, num_components); - - case GLSL_TYPE_UINT: - return get_uvec_type(ctx, 32, num_components); - - default: - break; - } - unreachable("unknown type"); - return 0; -} - -/* for streamout create new outputs, as streamout can be done on individual components, - from complete outputs, so we just can't use the created packed outputs */ -static void -emit_so_info(struct ntv_context *ctx, const struct zink_so_info *so_info, - unsigned first_so) -{ - unsigned output = 0; - for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { - struct pipe_stream_output so_output = so_info->so_info.output[i]; - unsigned slot = so_info->so_info_slots[i] << 2 | so_output.start_component; - SpvId out_type = get_output_type(ctx, slot, so_output.num_components); - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassOutput, - out_type); - SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, - SpvStorageClassOutput); - char name[10]; - - snprintf(name, 10, "xfb%d", output); - spirv_builder_emit_name(&ctx->builder, var_id, name); - spirv_builder_emit_offset(&ctx->builder, var_id, (so_output.dst_offset * 4)); - spirv_builder_emit_xfb_buffer(&ctx->builder, var_id, so_output.output_buffer); - spirv_builder_emit_xfb_stride(&ctx->builder, var_id, so_info->so_info.stride[so_output.output_buffer] * 4); - if (so_output.stream) - spirv_builder_emit_stream(&ctx->builder, var_id, so_output.stream); - - /* output location is incremented by VARYING_SLOT_VAR0 for non-builtins in vtn, - * so we need to ensure that the new xfb location slot doesn't conflict with any previously-emitted - * outputs. - */ - uint32_t location = first_so + i; - assert(location < VARYING_SLOT_VAR0); - spirv_builder_emit_location(&ctx->builder, var_id, location); - - /* note: gl_ClipDistance[4] can the 0-indexed member of VARYING_SLOT_CLIP_DIST1 here, - * so this is still the 0 component - */ - if (so_output.start_component) - spirv_builder_emit_component(&ctx->builder, var_id, so_output.start_component); - - uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t)); - *key = (uint32_t)so_output.register_index << 2 | so_output.start_component; - _mesa_hash_table_insert(ctx->so_outputs, key, (void *)(intptr_t)var_id); - - assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); - ctx->entry_ifaces[ctx->num_entry_ifaces++] = var_id; - output += align(so_output.num_components, 4) / 4; - } -} - -static void -emit_so_outputs(struct ntv_context *ctx, - const struct zink_so_info *so_info) -{ - for (unsigned i = 0; i < so_info->so_info.num_outputs; i++) { - uint32_t components[NIR_MAX_VEC_COMPONENTS]; - unsigned slot = so_info->so_info_slots[i]; - struct pipe_stream_output so_output = so_info->so_info.output[i]; - uint32_t so_key = (uint32_t) so_output.register_index << 2 | so_output.start_component; - uint32_t location = (uint32_t) slot << 2 | so_output.start_component; - struct hash_entry *he = _mesa_hash_table_search(ctx->so_outputs, &so_key); - assert(he); - SpvId so_output_var_id = (SpvId)(intptr_t)he->data; - - SpvId type = get_output_type(ctx, location, so_output.num_components); - SpvId output = 0; - /* index is based on component, so we might have to go back a few slots to get to the base */ - UNUSED uint32_t orig_location = location; - while (!output) - output = ctx->outputs[location--]; - location++; - SpvId output_type = ctx->so_output_types[location]; - const struct glsl_type *out_type = ctx->so_output_gl_types[location]; - - SpvId src = spirv_builder_emit_load(&ctx->builder, output_type, output); - - SpvId result; - - for (unsigned c = 0; c < so_output.num_components; c++) { - components[c] = so_output.start_component + c; - /* this is the second half of a 2 * vec4 array */ - if (slot == VARYING_SLOT_CLIP_DIST1) - components[c] += 4; - } - - /* if we're emitting a scalar or the type we're emitting matches the output's original type and we're - * emitting the same number of components, then we can skip any sort of conversion here - */ - if (glsl_type_is_scalar(out_type) || (type == output_type && glsl_get_length(out_type) == so_output.num_components)) - result = src; - else { - /* OpCompositeExtract can only extract scalars for our use here */ - if (so_output.num_components == 1) { - result = spirv_builder_emit_composite_extract(&ctx->builder, type, src, components, so_output.num_components); - } else if (glsl_type_is_vector(out_type)) { - /* OpVectorShuffle can select vector members into a differently-sized vector */ - result = spirv_builder_emit_vector_shuffle(&ctx->builder, type, - src, src, - components, so_output.num_components); - result = emit_bitcast(ctx, type, result); - } else { - /* for arrays, we need to manually extract each desired member - * and re-pack them into the desired output type - */ - for (unsigned c = 0; c < so_output.num_components; c++) { - uint32_t member[2]; - unsigned member_idx = 0; - if (glsl_type_is_matrix(out_type)) { - member_idx = 1; - member[0] = so_output.register_index; - } - member[member_idx] = so_output.start_component + c; - SpvId base_type = get_glsl_basetype(ctx, glsl_get_base_type(glsl_without_array_or_matrix(out_type))); - - if (slot == VARYING_SLOT_CLIP_DIST1) - member[member_idx] += 4; - components[c] = spirv_builder_emit_composite_extract(&ctx->builder, base_type, src, member, 1 + member_idx); - } - result = spirv_builder_emit_composite_construct(&ctx->builder, type, components, so_output.num_components); - } - } - - spirv_builder_emit_store(&ctx->builder, so_output_var_id, result); - } -} - static SpvId emit_atomic(struct ntv_context *ctx, SpvId op, SpvId type, SpvId src0, SpvId src1, SpvId src2) { if (op == SpvOpAtomicLoad) - return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_triop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0)); if (op == SpvOpAtomicCompareExchange) - return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_hexop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0), emit_uint_const(ctx, 32, 0), /* these params are intentionally swapped */ src2, src1); - return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeWorkgroup), + return spirv_builder_emit_quadop(&ctx->builder, op, type, src0, emit_uint_const(ctx, 32, SpvScopeDevice), emit_uint_const(ctx, 32, 0), src1); } @@ -1453,26 +1634,6 @@ get_fvec_constant(struct ntv_context *ctx, unsigned bit_size, } static SpvId -get_uvec_constant(struct ntv_context *ctx, unsigned bit_size, - unsigned num_components, uint64_t value) -{ - assert(bit_size == 32 || bit_size == 64); - - SpvId result = emit_uint_const(ctx, bit_size, value); - if (num_components == 1) - return result; - - assert(num_components > 1); - SpvId components[NIR_MAX_VEC_COMPONENTS]; - for (int i = 0; i < num_components; i++) - components[i] = result; - - SpvId type = get_uvec_type(ctx, bit_size, num_components); - return spirv_builder_const_composite(&ctx->builder, type, components, - num_components); -} - -static SpvId get_ivec_constant(struct ntv_context *ctx, unsigned bit_size, unsigned num_components, int64_t value) { @@ -1498,36 +1659,36 @@ alu_instr_src_components(const nir_alu_instr *instr, unsigned src) if (nir_op_infos[instr->op].input_sizes[src] > 0) return nir_op_infos[instr->op].input_sizes[src]; - if (instr->dest.dest.is_ssa) - return instr->dest.dest.ssa.num_components; - else - return instr->dest.dest.reg.reg->num_components; + return instr->def.num_components; } static SpvId -get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) +get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src, SpvId *raw_value, nir_alu_type *atype) { - SpvId raw_value = get_alu_src_raw(ctx, alu, src); + *raw_value = get_alu_src_raw(ctx, alu, src, atype); unsigned num_components = alu_instr_src_components(alu, src); unsigned bit_size = nir_src_bit_size(alu->src[src].src); - nir_alu_type type = nir_op_infos[alu->op].input_types[src]; + nir_alu_type type = alu_op_is_typeless(alu->op) ? *atype : nir_op_infos[alu->op].input_types[src]; + type = nir_alu_type_get_base_type(type); + if (type == *atype) + return *raw_value; if (bit_size == 1) - return raw_value; + return *raw_value; else { switch (nir_alu_type_get_base_type(type)) { case nir_type_bool: unreachable("bool should have bit-size 1"); case nir_type_int: - return bitcast_to_ivec(ctx, raw_value, bit_size, num_components); + return bitcast_to_ivec(ctx, *raw_value, bit_size, num_components); case nir_type_uint: - return raw_value; + return bitcast_to_uvec(ctx, *raw_value, bit_size, num_components); case nir_type_float: - return bitcast_to_fvec(ctx, raw_value, bit_size, num_components); + return bitcast_to_fvec(ctx, *raw_value, bit_size, num_components); default: unreachable("unknown nir_alu_type"); @@ -1535,39 +1696,16 @@ get_alu_src(struct ntv_context *ctx, nir_alu_instr *alu, unsigned src) } } -static SpvId -store_alu_result(struct ntv_context *ctx, nir_alu_instr *alu, SpvId result) +static void +store_alu_result(struct ntv_context *ctx, nir_alu_instr *alu, SpvId result, nir_alu_type atype) { - assert(!alu->dest.saturate); - return store_dest(ctx, &alu->dest.dest, result, - nir_op_infos[alu->op].output_type); + store_def(ctx, alu->def.index, result, atype); } static SpvId -get_dest_type(struct ntv_context *ctx, nir_dest *dest, nir_alu_type type) +get_def_type(struct ntv_context *ctx, nir_def *def, nir_alu_type type) { - unsigned num_components = nir_dest_num_components(*dest); - unsigned bit_size = nir_dest_bit_size(*dest); - - if (bit_size == 1) - return get_bvec_type(ctx, num_components); - - switch (nir_alu_type_get_base_type(type)) { - case nir_type_bool: - unreachable("bool should have bit-size 1"); - - case nir_type_int: - return get_ivec_type(ctx, bit_size, num_components); - - case nir_type_uint: - return get_uvec_type(ctx, bit_size, num_components); - - case nir_type_float: - return get_fvec_type(ctx, bit_size, num_components); - - default: - unreachable("unsupported nir_alu_type"); - } + return get_alu_type(ctx, type, def->num_components, def->bit_size); } static bool @@ -1588,14 +1726,66 @@ needs_derivative_control(nir_alu_instr *alu) static void emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) { + bool is_bcsel = alu->op == nir_op_bcsel; + nir_alu_type stype[NIR_MAX_VEC_COMPONENTS] = {0}; SpvId src[NIR_MAX_VEC_COMPONENTS]; + SpvId raw_src[NIR_MAX_VEC_COMPONENTS]; for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) - src[i] = get_alu_src(ctx, alu, i); + src[i] = get_alu_src(ctx, alu, i, &raw_src[i], &stype[i]); + + nir_alu_type typeless_type = stype[is_bcsel]; + if (nir_op_infos[alu->op].num_inputs > 1 && + alu_op_is_typeless(alu->op) && + nir_src_bit_size(alu->src[is_bcsel].src) != 1) { + unsigned uint_count = 0; + unsigned int_count = 0; + unsigned float_count = 0; + for (unsigned i = is_bcsel; i < nir_op_infos[alu->op].num_inputs; i++) { + if (stype[i] == nir_type_bool) + break; + switch (stype[i]) { + case nir_type_uint: + uint_count++; + break; + case nir_type_int: + int_count++; + break; + case nir_type_float: + float_count++; + break; + default: + unreachable("this shouldn't happen"); + } + } + if (uint_count > int_count && uint_count > float_count) + typeless_type = nir_type_uint; + else if (int_count > uint_count && int_count > float_count) + typeless_type = nir_type_int; + else if (float_count > uint_count && float_count > int_count) + typeless_type = nir_type_float; + else if (float_count == uint_count || uint_count == int_count) + typeless_type = nir_type_uint; + else if (float_count == int_count) + typeless_type = nir_type_float; + else + typeless_type = nir_type_uint; + assert(typeless_type != nir_type_bool); + for (unsigned i = is_bcsel; i < nir_op_infos[alu->op].num_inputs; i++) { + unsigned num_components = alu_instr_src_components(alu, i); + unsigned bit_size = nir_src_bit_size(alu->src[i].src); + SpvId type = get_alu_type(ctx, typeless_type, num_components, bit_size); + if (stype[i] != typeless_type) { + src[i] = emit_bitcast(ctx, type, src[i]); + } + } + } - SpvId dest_type = get_dest_type(ctx, &alu->dest.dest, - nir_op_infos[alu->op].output_type); - unsigned bit_size = nir_dest_bit_size(alu->dest.dest); - unsigned num_components = nir_dest_num_components(alu->dest.dest); + unsigned bit_size = alu->def.bit_size; + unsigned num_components = alu->def.num_components; + nir_alu_type atype = bit_size == 1 ? + nir_type_bool : + (alu_op_is_typeless(alu->op) ? typeless_type : nir_op_infos[alu->op].output_type); + SpvId dest_type = get_def_type(ctx, &alu->def, atype); if (needs_derivative_control(alu)) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityDerivativeControl); @@ -1621,6 +1811,8 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) UNOP(nir_op_fddy, SpvOpDPdy) UNOP(nir_op_fddy_coarse, SpvOpDPdyCoarse) UNOP(nir_op_fddy_fine, SpvOpDPdyFine) + UNOP(nir_op_f2i8, SpvOpConvertFToS) + UNOP(nir_op_f2u8, SpvOpConvertFToU) UNOP(nir_op_f2i16, SpvOpConvertFToS) UNOP(nir_op_f2u16, SpvOpConvertFToU) UNOP(nir_op_f2i32, SpvOpConvertFToS) @@ -1629,6 +1821,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) UNOP(nir_op_i2f32, SpvOpConvertSToF) UNOP(nir_op_u2f16, SpvOpConvertUToF) UNOP(nir_op_u2f32, SpvOpConvertUToF) + UNOP(nir_op_i2i8, SpvOpSConvert) UNOP(nir_op_i2i16, SpvOpSConvert) UNOP(nir_op_i2i32, SpvOpSConvert) UNOP(nir_op_u2u8, SpvOpUConvert) @@ -1647,6 +1840,12 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) UNOP(nir_op_bit_count, SpvOpBitCount) #undef UNOP + case nir_op_f2f16_rtz: + assert(nir_op_infos[alu->op].num_inputs == 1); + result = emit_unop(ctx, SpvOpFConvert, dest_type, src[0]); + spirv_builder_emit_rounding_mode(&ctx->builder, result, SpvFPRoundingModeRTZ); + break; + case nir_op_inot: if (bit_size == 1) result = emit_unop(ctx, SpvOpLogicalNot, dest_type, src[0]); @@ -1654,6 +1853,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) result = emit_unop(ctx, SpvOpNot, dest_type, src[0]); break; + case nir_op_b2i8: case nir_op_b2i16: case nir_op_b2i32: case nir_op_b2i64: @@ -1672,12 +1872,25 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) get_fvec_constant(ctx, bit_size, num_components, 0)); break; + case nir_op_uclz: + assert(nir_op_infos[alu->op].num_inputs == 1); + result = emit_unop(ctx, SpvOpUCountLeadingZerosINTEL, dest_type, src[0]); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityIntegerFunctions2INTEL); + spirv_builder_emit_extension(&ctx->builder, "SPV_INTEL_shader_integer_functions2"); + break; #define BUILTIN_UNOP(nir_op, spirv_op) \ case nir_op: \ assert(nir_op_infos[alu->op].num_inputs == 1); \ result = emit_builtin_unop(ctx, spirv_op, dest_type, src[0]); \ break; +#define BUILTIN_UNOPF(nir_op, spirv_op) \ + case nir_op: \ + assert(nir_op_infos[alu->op].num_inputs == 1); \ + result = emit_builtin_unop(ctx, spirv_op, get_def_type(ctx, &alu->def, nir_type_float), src[0]); \ + atype = nir_type_float; \ + break; + BUILTIN_UNOP(nir_op_iabs, GLSLstd450SAbs) BUILTIN_UNOP(nir_op_fabs, GLSLstd450FAbs) BUILTIN_UNOP(nir_op_fsqrt, GLSLstd450Sqrt) @@ -1696,31 +1909,27 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BUILTIN_UNOP(nir_op_ufind_msb, GLSLstd450FindUMsb) BUILTIN_UNOP(nir_op_find_lsb, GLSLstd450FindILsb) BUILTIN_UNOP(nir_op_ifind_msb, GLSLstd450FindSMsb) - BUILTIN_UNOP(nir_op_pack_half_2x16, GLSLstd450PackHalf2x16) - BUILTIN_UNOP(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16) - BUILTIN_UNOP(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32) -#undef BUILTIN_UNOP - case nir_op_frcp: + case nir_op_pack_half_2x16: assert(nir_op_infos[alu->op].num_inputs == 1); - result = emit_binop(ctx, SpvOpFDiv, dest_type, - get_fvec_constant(ctx, bit_size, num_components, 1), - src[0]); + result = emit_builtin_unop(ctx, GLSLstd450PackHalf2x16, get_def_type(ctx, &alu->def, nir_type_uint), src[0]); break; - case nir_op_f2b1: + case nir_op_unpack_64_2x32: assert(nir_op_infos[alu->op].num_inputs == 1); - result = emit_binop(ctx, SpvOpFOrdNotEqual, dest_type, src[0], - get_fvec_constant(ctx, - nir_src_bit_size(alu->src[0].src), - num_components, 0)); + result = emit_builtin_unop(ctx, GLSLstd450UnpackDouble2x32, get_def_type(ctx, &alu->def, nir_type_uint), src[0]); break; - case nir_op_i2b1: + + BUILTIN_UNOPF(nir_op_unpack_half_2x16, GLSLstd450UnpackHalf2x16) + BUILTIN_UNOPF(nir_op_pack_64_2x32, GLSLstd450PackDouble2x32) +#undef BUILTIN_UNOP +#undef BUILTIN_UNOPF + + case nir_op_frcp: assert(nir_op_infos[alu->op].num_inputs == 1); - result = emit_binop(ctx, SpvOpINotEqual, dest_type, src[0], - get_ivec_constant(ctx, - nir_src_bit_size(alu->src[0].src), - num_components, 0)); + result = emit_binop(ctx, SpvOpFDiv, dest_type, + get_fvec_constant(ctx, bit_size, num_components, 1), + src[0]); break; @@ -1736,6 +1945,8 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BINOP(nir_op_idiv, SpvOpSDiv) BINOP(nir_op_udiv, SpvOpUDiv) BINOP(nir_op_umod, SpvOpUMod) + BINOP(nir_op_imod, SpvOpSMod) + BINOP(nir_op_irem, SpvOpSRem) BINOP(nir_op_fadd, SpvOpFAdd) BINOP(nir_op_fsub, SpvOpFSub) BINOP(nir_op_fmul, SpvOpFMul) @@ -1747,12 +1958,6 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BINOP(nir_op_uge, SpvOpUGreaterThanEqual) BINOP(nir_op_flt, SpvOpFOrdLessThan) BINOP(nir_op_fge, SpvOpFOrdGreaterThanEqual) - BINOP(nir_op_feq, SpvOpFOrdEqual) - BINOP(nir_op_fneu, SpvOpFUnordNotEqual) - BINOP(nir_op_ishl, SpvOpShiftLeftLogical) - BINOP(nir_op_ishr, SpvOpShiftRightArithmetic) - BINOP(nir_op_ushr, SpvOpShiftRightLogical) - BINOP(nir_op_ixor, SpvOpBitwiseXor) BINOP(nir_op_frem, SpvOpFRem) #undef BINOP @@ -1769,8 +1974,26 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BINOP_LOG(nir_op_ior, SpvOpBitwiseOr, SpvOpLogicalOr) BINOP_LOG(nir_op_ieq, SpvOpIEqual, SpvOpLogicalEqual) BINOP_LOG(nir_op_ine, SpvOpINotEqual, SpvOpLogicalNotEqual) + BINOP_LOG(nir_op_ixor, SpvOpBitwiseXor, SpvOpLogicalNotEqual) #undef BINOP_LOG +#define BINOP_SHIFT(nir_op, spirv_op) \ + case nir_op: { \ + assert(nir_op_infos[alu->op].num_inputs == 2); \ + int shift_bit_size = nir_src_bit_size(alu->src[1].src); \ + nir_alu_type shift_nir_type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[1]); \ + SpvId shift_type = get_alu_type(ctx, shift_nir_type, num_components, shift_bit_size); \ + SpvId shift_mask = get_ivec_constant(ctx, shift_bit_size, num_components, bit_size - 1); \ + SpvId shift_count = emit_binop(ctx, SpvOpBitwiseAnd, shift_type, src[1], shift_mask); \ + result = emit_binop(ctx, spirv_op, dest_type, src[0], shift_count); \ + break; \ + } + + BINOP_SHIFT(nir_op_ishl, SpvOpShiftLeftLogical) + BINOP_SHIFT(nir_op_ishr, SpvOpShiftRightArithmetic) + BINOP_SHIFT(nir_op_ushr, SpvOpShiftRightLogical) +#undef BINOP_SHIFT + #define BUILTIN_BINOP(nir_op, spirv_op) \ case nir_op: \ assert(nir_op_infos[alu->op].num_inputs == 2); \ @@ -1783,8 +2006,31 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) BUILTIN_BINOP(nir_op_imax, GLSLstd450SMax) BUILTIN_BINOP(nir_op_umin, GLSLstd450UMin) BUILTIN_BINOP(nir_op_umax, GLSLstd450UMax) + BUILTIN_BINOP(nir_op_ldexp, GLSLstd450Ldexp) #undef BUILTIN_BINOP +#define INTEL_BINOP(nir_op, spirv_op) \ + case nir_op: \ + assert(nir_op_infos[alu->op].num_inputs == 2); \ + result = emit_binop(ctx, spirv_op, dest_type, src[0], src[1]); \ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityIntegerFunctions2INTEL); \ + spirv_builder_emit_extension(&ctx->builder, "SPV_INTEL_shader_integer_functions2"); \ + break; + + INTEL_BINOP(nir_op_uabs_isub, SpvOpAbsISubINTEL) + INTEL_BINOP(nir_op_uabs_usub, SpvOpAbsUSubINTEL) + INTEL_BINOP(nir_op_iadd_sat, SpvOpIAddSatINTEL) + INTEL_BINOP(nir_op_uadd_sat, SpvOpUAddSatINTEL) + INTEL_BINOP(nir_op_ihadd, SpvOpIAverageINTEL) + INTEL_BINOP(nir_op_uhadd, SpvOpUAverageINTEL) + INTEL_BINOP(nir_op_irhadd, SpvOpIAverageRoundedINTEL) + INTEL_BINOP(nir_op_urhadd, SpvOpUAverageRoundedINTEL) + INTEL_BINOP(nir_op_isub_sat, SpvOpISubSatINTEL) + INTEL_BINOP(nir_op_usub_sat, SpvOpUSubSatINTEL) + INTEL_BINOP(nir_op_imul_32x16, SpvOpIMul32x16INTEL) + INTEL_BINOP(nir_op_umul_32x16, SpvOpUMul32x16INTEL) +#undef INTEL_BINOP + case nir_op_fdot2: case nir_op_fdot3: case nir_op_fdot4: @@ -1799,6 +2045,23 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) case nir_op_sge: unreachable("should already be lowered away"); + case nir_op_fneu: + assert(nir_op_infos[alu->op].num_inputs == 2); + if (raw_src[0] == raw_src[1]) + result = emit_unop(ctx, SpvOpIsNan, dest_type, src[0]); + else + result = emit_binop(ctx, SpvOpFUnordNotEqual, dest_type, src[0], src[1]); + break; + + case nir_op_feq: + assert(nir_op_infos[alu->op].num_inputs == 2); + if (raw_src[0] == raw_src[1]) + result = emit_unop(ctx, SpvOpLogicalNot, dest_type, + emit_unop(ctx, SpvOpIsNan, dest_type, src[0])); + else + result = emit_binop(ctx, SpvOpFOrdEqual, dest_type, src[0], src[1]); + break; + case nir_op_flrp: assert(nir_op_infos[alu->op].num_inputs == 3); result = emit_builtin_triop(ctx, GLSLstd450FMix, dest_type, @@ -1841,6 +2104,84 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) result = spirv_builder_emit_quadop(&ctx->builder, SpvOpBitFieldInsert, dest_type, src[0], src[1], src[2], src[3]); break; + /* those are all simple bitcasts, we could do better, but it doesn't matter */ + case nir_op_pack_32_4x8: + case nir_op_pack_32_2x16: + case nir_op_pack_64_4x16: + case nir_op_unpack_32_4x8: + case nir_op_unpack_32_2x16: + case nir_op_unpack_64_4x16: { + result = emit_bitcast(ctx, dest_type, src[0]); + break; + } + + case nir_op_pack_32_2x16_split: + case nir_op_pack_64_2x32_split: { + nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]); + if (num_components <= 2) { + SpvId components[] = {src[0], src[1]}; + SpvId vec_type = get_alu_type(ctx, type, num_components * 2, nir_src_bit_size(alu->src[0].src)); + result = spirv_builder_emit_composite_construct(&ctx->builder, vec_type, components, 2); + result = emit_bitcast(ctx, dest_type, result); + } else { + SpvId components[NIR_MAX_VEC_COMPONENTS]; + SpvId conv_type = get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)); + SpvId vec_type = get_alu_type(ctx, type, 2, nir_src_bit_size(alu->src[0].src)); + SpvId dest_scalar_type = get_alu_type(ctx, nir_op_infos[alu->op].output_type, 1, bit_size); + for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) { + SpvId conv[2]; + conv[0] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, src[0], &i, 1); + conv[1] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, src[1], &i, 1); + SpvId vec = spirv_builder_emit_composite_construct(&ctx->builder, vec_type, conv, 2); + components[i] = emit_bitcast(ctx, dest_scalar_type, vec); + } + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components); + } + break; + } + + case nir_op_unpack_32_2x16_split_x: + case nir_op_unpack_64_2x32_split_x: { + nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]); + SpvId vec_type = get_alu_type(ctx, type, 2, bit_size); + unsigned idx = 0; + if (num_components == 1) { + SpvId vec = emit_bitcast(ctx, vec_type, src[0]); + result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, vec, &idx, 1); + } else { + SpvId components[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) { + SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)), src[0], &i, 1); + conv = emit_bitcast(ctx, vec_type, conv); + SpvId conv_type = get_alu_type(ctx, type, 1, bit_size); + components[i] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, conv, &idx, 1); + } + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components); + } + break; + } + + case nir_op_unpack_32_2x16_split_y: + case nir_op_unpack_64_2x32_split_y: { + nir_alu_type type = nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[0]); + SpvId vec_type = get_alu_type(ctx, type, 2, bit_size); + unsigned idx = 1; + if (num_components == 1) { + SpvId vec = emit_bitcast(ctx, vec_type, src[0]); + result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, vec, &idx, 1); + } else { + SpvId components[NIR_MAX_VEC_COMPONENTS]; + for (unsigned i = 0; i < nir_src_num_components(alu->src[0].src); i++) { + SpvId conv = spirv_builder_emit_composite_extract(&ctx->builder, get_alu_type(ctx, type, 1, nir_src_bit_size(alu->src[0].src)), src[0], &i, 1); + conv = emit_bitcast(ctx, vec_type, conv); + SpvId conv_type = get_alu_type(ctx, type, 1, bit_size); + components[i] = spirv_builder_emit_composite_extract(&ctx->builder, conv_type, conv, &idx, 1); + } + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, components, num_components); + } + break; + } + default: fprintf(stderr, "emit_alu: not implemented (%s)\n", nir_op_infos[alu->op].name); @@ -1851,7 +2192,7 @@ emit_alu(struct ntv_context *ctx, nir_alu_instr *alu) if (alu->exact) spirv_builder_emit_decoration(&ctx->builder, result, SpvDecorationNoContraction); - store_alu_result(ctx, alu, result); + store_alu_result(ctx, alu, result, atype); } static void @@ -1861,273 +2202,99 @@ emit_load_const(struct ntv_context *ctx, nir_load_const_instr *load_const) unsigned num_components = load_const->def.num_components; SpvId components[NIR_MAX_VEC_COMPONENTS]; + nir_alu_type atype; if (bit_size == 1) { + atype = nir_type_bool; for (int i = 0; i < num_components; i++) components[i] = spirv_builder_const_bool(&ctx->builder, load_const->value[i].b); } else { + atype = infer_nir_alu_type_from_uses_ssa(&load_const->def); for (int i = 0; i < num_components; i++) { - uint64_t tmp = nir_const_value_as_uint(load_const->value[i], - bit_size); - components[i] = emit_uint_const(ctx, bit_size, tmp); + switch (atype) { + case nir_type_uint: { + uint64_t tmp = nir_const_value_as_uint(load_const->value[i], bit_size); + components[i] = emit_uint_const(ctx, bit_size, tmp); + break; + } + case nir_type_int: { + int64_t tmp = nir_const_value_as_int(load_const->value[i], bit_size); + components[i] = emit_int_const(ctx, bit_size, tmp); + break; + } + case nir_type_float: { + double tmp = nir_const_value_as_float(load_const->value[i], bit_size); + components[i] = emit_float_const(ctx, bit_size, tmp); + break; + } + default: + unreachable("this shouldn't happen!"); + } } } if (num_components > 1) { - SpvId type = get_vec_from_bit_size(ctx, bit_size, - num_components); + SpvId type = get_alu_type(ctx, atype, num_components, bit_size); SpvId value = spirv_builder_const_composite(&ctx->builder, type, components, num_components); - store_ssa_def(ctx, &load_const->def, value); + store_def(ctx, load_const->def.index, value, atype); } else { assert(num_components == 1); - store_ssa_def(ctx, &load_const->def, components[0]); - } -} - -static void -emit_load_bo(struct ntv_context *ctx, nir_intrinsic_instr *intr) -{ - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - bool ssbo = intr->intrinsic == nir_intrinsic_load_ssbo; - assert(const_block_index); // no dynamic indexing for now - - unsigned idx = 0; - unsigned bit_size = nir_dest_bit_size(intr->dest); - idx = MIN2(bit_size, 32) >> 4; - if (ssbo) { - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - if (!ctx->ssbos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest)); - } else { - assert(idx < ARRAY_SIZE(ctx->ubos[0])); - if (!ctx->ubos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ubo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest)); - } - SpvId bo = ssbo ? ctx->ssbos[const_block_index->u32][idx] : ctx->ubos[const_block_index->u32][idx]; - SpvId uint_type = get_uvec_type(ctx, MIN2(bit_size, 32), 1); - SpvId one = emit_uint_const(ctx, 32, 1); - - /* number of components being loaded */ - unsigned num_components = nir_dest_num_components(intr->dest); - /* we need to grab 2x32 to fill the 64bit value */ - if (bit_size == 64) - num_components *= 2; - SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2]; - SpvId result; - - /* destination type for the load */ - SpvId type = get_dest_uvec_type(ctx, &intr->dest); - /* an id of an array member in bytes */ - SpvId uint_size = emit_uint_const(ctx, 32, MIN2(bit_size, 32) / 8); - - /* we grab a single array member at a time, so it's a pointer to a uint */ - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - ssbo ? SpvStorageClassStorageBuffer : SpvStorageClassUniform, - uint_type); - - /* our generated uniform has a memory layout like - * - * struct { - * uint base[array_size]; - * }; - * - * where 'array_size' is set as though every member of the ubo takes up a vec4, - * even if it's only a vec2 or a float. - * - * first, access 'base' - */ - SpvId member = emit_uint_const(ctx, 32, 0); - /* this is the offset (in bytes) that we're accessing: - * it may be a const value or it may be dynamic in the shader - */ - SpvId offset = get_src(ctx, &intr->src[1]); - /* calculate the byte offset in the array */ - SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); - /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type: - * index 0 is accessing 'base' - * index 1 is accessing 'base[index 1]' - * - * we must perform the access this way in case src[1] is dynamic because there's - * no other spirv method for using an id to access a member of a composite, as - * (composite|vector)_extract both take literals - */ - for (unsigned i = 0; i < num_components; i++) { - SpvId indices[2] = { member, vec_offset }; - SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - bo, indices, - ARRAY_SIZE(indices)); - /* load a single value into the constituents array */ - if (ssbo && nir_intrinsic_access(intr) & ACCESS_COHERENT) - constituents[i] = emit_atomic(ctx, SpvOpAtomicLoad, uint_type, ptr, 0, 0); - else - constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, ptr); - /* increment to the next member index for the next load */ - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); - } - - /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values - * by creating uvec2 composites and bitcasting them to u64 values - */ - if (bit_size == 64) { - num_components /= 2; - type = get_uvec_type(ctx, 64, num_components); - SpvId u64_type = get_uvec_type(ctx, 64, 1); - for (unsigned i = 0; i < num_components; i++) { - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2); - constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]); - } - } - /* if loading more than 1 value, reassemble the results into the desired type, - * otherwise just use the loaded result - */ - if (num_components > 1) { - result = spirv_builder_emit_composite_construct(&ctx->builder, - type, - constituents, - num_components); - } else - result = constituents[0]; - - /* explicitly convert to a bool vector if the destination type is a bool */ - if (nir_dest_bit_size(intr->dest) == 1) - result = uvec_to_bvec(ctx, result, num_components); - - store_dest(ctx, &intr->dest, result, nir_type_uint); -} - -static void -emit_store_ssbo(struct ntv_context *ctx, nir_intrinsic_instr *intr) -{ - /* TODO: would be great to refactor this in with emit_load_bo() */ - - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[1]); - assert(const_block_index); - - unsigned idx = MIN2(nir_src_bit_size(intr->src[0]), 32) >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - if (!ctx->ssbos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_src_bit_size(intr->src[0])); - SpvId bo = ctx->ssbos[const_block_index->u32][idx]; - - unsigned bit_size = nir_src_bit_size(intr->src[0]); - SpvId uint_type = get_uvec_type(ctx, 32, 1); - SpvId one = emit_uint_const(ctx, 32, 1); - - /* number of components being stored */ - unsigned wrmask = nir_intrinsic_write_mask(intr); - unsigned num_components = util_bitcount(wrmask); - - /* we need to grab 2x32 to fill the 64bit value */ - bool is_64bit = bit_size == 64; - - /* an id of an array member in bytes */ - SpvId uint_size = emit_uint_const(ctx, 32, MIN2(bit_size, 32) / 8); - /* we grab a single array member at a time, so it's a pointer to a uint */ - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassStorageBuffer, - get_uvec_type(ctx, MIN2(bit_size, 32), 1)); - - /* our generated uniform has a memory layout like - * - * struct { - * uint base[array_size]; - * }; - * - * where 'array_size' is set as though every member of the ubo takes up a vec4, - * even if it's only a vec2 or a float. - * - * first, access 'base' - */ - SpvId member = emit_uint_const(ctx, 32, 0); - /* this is the offset (in bytes) that we're accessing: - * it may be a const value or it may be dynamic in the shader - */ - SpvId offset = get_src(ctx, &intr->src[2]); - /* calculate byte offset */ - SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); - - SpvId value = get_src(ctx, &intr->src[0]); - /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type: - * index 0 is accessing 'base' - * index 1 is accessing 'base[index 1]' - * index 2 is accessing 'base[index 1][index 2]' - * - * we must perform the access this way in case src[1] is dynamic because there's - * no other spirv method for using an id to access a member of a composite, as - * (composite|vector)_extract both take literals - */ - unsigned write_count = 0; - SpvId src_base_type = get_uvec_type(ctx, bit_size, 1); - for (unsigned i = 0; write_count < num_components; i++) { - if (wrmask & (1 << i)) { - SpvId component = nir_src_num_components(intr->src[0]) > 1 ? - spirv_builder_emit_composite_extract(&ctx->builder, src_base_type, value, &i, 1) : - value; - SpvId component_split; - if (is_64bit) - component_split = emit_bitcast(ctx, get_uvec_type(ctx, 32, 2), component); - for (unsigned j = 0; j < 1 + !!is_64bit; j++) { - if (j) - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); - SpvId indices[] = { member, vec_offset }; - SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - bo, indices, - ARRAY_SIZE(indices)); - if (is_64bit) - component = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, component_split, &j, 1); - if (nir_intrinsic_access(intr) & ACCESS_COHERENT) - spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeWorkgroup, 0, component); - else - spirv_builder_emit_store(&ctx->builder, ptr, component); - } - write_count++; - } else if (is_64bit) - /* we're doing 32bit stores here, so we need to increment correctly here */ - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); - - /* increment to the next vec4 member index for the next store */ - vec_offset = emit_binop(ctx, SpvOpIAdd, uint_type, vec_offset, one); + store_def(ctx, load_const->def.index, components[0], atype); } } static void emit_discard(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - assert(ctx->block_started); - spirv_builder_emit_kill(&ctx->builder); - /* discard is weird in NIR, so let's just create an unreachable block after - it and hope that the vulkan driver will DCE any instructinos in it. */ - spirv_builder_label(&ctx->builder, spirv_builder_new_id(&ctx->builder)); + assert(ctx->discard_func); + SpvId type_void = spirv_builder_type_void(&ctx->builder); + spirv_builder_function_call(&ctx->builder, type_void, + ctx->discard_func, NULL, 0); } static void emit_load_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId ptr = get_src(ctx, intr->src); + nir_alu_type atype; + SpvId ptr = get_src(ctx, intr->src, &atype); + + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + SpvId type; + if (glsl_type_is_image(deref->type)) { + nir_variable *var = nir_deref_instr_get_variable(deref); + const struct glsl_type *gtype = glsl_without_array(var->type); + type = get_image_type(ctx, var, + glsl_type_is_sampler(gtype), + glsl_get_sampler_dim(gtype) == GLSL_SAMPLER_DIM_BUF); + atype = nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(gtype)); + } else { + type = get_glsl_type(ctx, deref->type); + atype = get_nir_alu_type(deref->type); + } + SpvId result; - SpvId result = spirv_builder_emit_load(&ctx->builder, - get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type), - ptr); - unsigned num_components = nir_dest_num_components(intr->dest); - unsigned bit_size = nir_dest_bit_size(intr->dest); - result = bitcast_to_uvec(ctx, result, bit_size, num_components); - store_dest(ctx, &intr->dest, result, nir_type_uint); + if (nir_intrinsic_access(intr) & ACCESS_COHERENT) + result = emit_atomic(ctx, SpvOpAtomicLoad, type, ptr, 0, 0); + else + result = spirv_builder_emit_load(&ctx->builder, type, ptr); + store_def(ctx, intr->def.index, result, atype); } static void emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId ptr = get_src(ctx, &intr->src[0]); - SpvId src = get_src(ctx, &intr->src[1]); + nir_alu_type ptype, stype; + SpvId ptr = get_src(ctx, &intr->src[0], &ptype); + SpvId src = get_src(ctx, &intr->src[1], &stype); const struct glsl_type *gtype = nir_src_as_deref(intr->src[0])->type; SpvId type = get_glsl_type(ctx, gtype); - nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0])); - unsigned num_writes = util_bitcount(nir_intrinsic_write_mask(intr)); + nir_variable *var = nir_intrinsic_get_var(intr, 0); unsigned wrmask = nir_intrinsic_write_mask(intr); - if (num_writes && num_writes != intr->num_components) { + if (!glsl_type_is_scalar(gtype) && + wrmask != BITFIELD_MASK(glsl_type_is_array(gtype) ? glsl_get_aoa_size(gtype) : glsl_get_vector_elements(gtype))) { /* no idea what we do if this fails */ assert(glsl_type_is_array(gtype) || glsl_type_is_vector(gtype)); @@ -2136,17 +2303,18 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId member_type; if (glsl_type_is_vector(gtype)) { result_type = get_glsl_basetype(ctx, glsl_get_base_type(gtype)); - member_type = get_uvec_type(ctx, 32, 1); + member_type = get_alu_type(ctx, stype, 1, glsl_get_bit_size(gtype)); } else member_type = result_type = get_glsl_type(ctx, glsl_get_array_element(gtype)); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassOutput, + get_storage_class(var), result_type); for (unsigned i = 0; i < 4; i++) - if ((wrmask >> i) & 1) { + if (wrmask & BITFIELD_BIT(i)) { SpvId idx = emit_uint_const(ctx, 32, i); SpvId val = spirv_builder_emit_composite_extract(&ctx->builder, member_type, src, &i, 1); - val = emit_bitcast(ctx, result_type, val); + if (stype != ptype) + val = emit_bitcast(ctx, result_type, val); SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, ptr, &idx, 1); spirv_builder_emit_store(&ctx->builder, member, val); @@ -2155,99 +2323,161 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr) } SpvId result; - if (ctx->stage == MESA_SHADER_FRAGMENT && var->data.location == FRAG_RESULT_SAMPLE_MASK) { + if (ctx->stage == MESA_SHADER_FRAGMENT && + var->data.mode == nir_var_shader_out && + var->data.location == FRAG_RESULT_SAMPLE_MASK) { src = emit_bitcast(ctx, type, src); /* SampleMask is always an array in spirv, so we need to construct it into one */ result = spirv_builder_emit_composite_construct(&ctx->builder, ctx->sample_mask_type, &src, 1); - } else - result = emit_bitcast(ctx, type, src); - spirv_builder_emit_store(&ctx->builder, ptr, result); + } else { + if (ptype == stype) + result = src; + else + result = emit_bitcast(ctx, type, src); + } + if (nir_intrinsic_access(intr) & ACCESS_COHERENT) + spirv_builder_emit_atomic_store(&ctx->builder, ptr, SpvScopeDevice, 0, result); + else + spirv_builder_emit_store(&ctx->builder, ptr, result); } static void emit_load_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint); - unsigned num_components = nir_dest_num_components(intr->dest); - unsigned bit_size = nir_dest_bit_size(intr->dest); - bool qword = bit_size == 64; - SpvId uint_type = get_uvec_type(ctx, 32, 1); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + unsigned num_components = intr->def.num_components; + unsigned bit_size = intr->def.bit_size; + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassWorkgroup, uint_type); - SpvId offset = emit_binop(ctx, SpvOpUDiv, uint_type, get_src(ctx, &intr->src[0]), emit_uint_const(ctx, 32, 4)); + nir_alu_type atype; + SpvId offset = get_src(ctx, &intr->src[0], &atype); + if (atype == nir_type_float) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1); SpvId constituents[NIR_MAX_VEC_COMPONENTS]; + SpvId shared_block = get_shared_block(ctx, bit_size); /* need to convert array -> vec */ for (unsigned i = 0; i < num_components; i++) { - SpvId parts[2]; - for (unsigned j = 0; j < 1 + !!qword; j++) { - SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, - ctx->shared_block_var, &offset, 1); - parts[j] = spirv_builder_emit_load(&ctx->builder, uint_type, member); - offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, 1)); - } - if (qword) - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 64, 1), parts, 2); - else - constituents[i] = parts[0]; + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + shared_block, &offset, 1); + constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member); + offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, 1)); } SpvId result; if (num_components > 1) result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, constituents, num_components); else - result = bitcast_to_uvec(ctx, constituents[0], bit_size, num_components); - store_dest(ctx, &intr->dest, result, nir_type_uint); + result = constituents[0]; + store_def(ctx, intr->def.index, result, nir_type_uint); } static void emit_store_shared(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId src = get_src(ctx, &intr->src[0]); - bool qword = nir_src_bit_size(intr->src[0]) == 64; + nir_alu_type atype; + SpvId src = get_src(ctx, &intr->src[0], &atype); - unsigned num_writes = util_bitcount(nir_intrinsic_write_mask(intr)); unsigned wrmask = nir_intrinsic_write_mask(intr); - /* this is a partial write, so we have to loop and do a per-component write */ - SpvId uint_type = get_uvec_type(ctx, 32, 1); + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassWorkgroup, uint_type); - SpvId offset = emit_binop(ctx, SpvOpUDiv, uint_type, get_src(ctx, &intr->src[1]), emit_uint_const(ctx, 32, 4)); - - for (unsigned i = 0; num_writes; i++) { - if ((wrmask >> i) & 1) { - for (unsigned j = 0; j < 1 + !!qword; j++) { - unsigned comp = ((1 + !!qword) * i) + j; - SpvId shared_offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, emit_uint_const(ctx, 32, comp)); - SpvId val = src; - if (nir_src_num_components(intr->src[0]) != 1 || qword) - val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &comp, 1); - SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, - ctx->shared_block_var, &shared_offset, 1); - spirv_builder_emit_store(&ctx->builder, member, val); - } - num_writes--; - } + nir_alu_type otype; + SpvId offset = get_src(ctx, &intr->src[1], &otype); + if (otype == nir_type_float) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1); + SpvId shared_block = get_shared_block(ctx, bit_size); + /* this is a partial write, so we have to loop and do a per-component write */ + u_foreach_bit(i, wrmask) { + SpvId shared_offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, i)); + SpvId val = src; + if (nir_src_num_components(intr->src[0]) != 1) + val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &i, 1); + if (atype != nir_type_uint) + val = emit_bitcast(ctx, get_alu_type(ctx, nir_type_uint, 1, bit_size), val); + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + shared_block, &shared_offset, 1); + spirv_builder_emit_store(&ctx->builder, member, val); + } +} + +static void +emit_load_scratch(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + unsigned num_components = intr->def.num_components; + unsigned bit_size = intr->def.bit_size; + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPrivate, + uint_type); + nir_alu_type atype; + SpvId offset = get_src(ctx, &intr->src[0], &atype); + if (atype != nir_type_uint) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[0]), 1); + SpvId constituents[NIR_MAX_VEC_COMPONENTS]; + SpvId scratch_block = get_scratch_block(ctx, bit_size); + /* need to convert array -> vec */ + for (unsigned i = 0; i < num_components; i++) { + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + scratch_block, &offset, 1); + constituents[i] = spirv_builder_emit_load(&ctx->builder, uint_type, member); + offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, 1)); + } + SpvId result; + if (num_components > 1) + result = spirv_builder_emit_composite_construct(&ctx->builder, dest_type, constituents, num_components); + else + result = constituents[0]; + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_store_scratch(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + nir_alu_type atype; + SpvId src = get_src(ctx, &intr->src[0], &atype); + + unsigned wrmask = nir_intrinsic_write_mask(intr); + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId uint_type = get_uvec_type(ctx, bit_size, 1); + SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPrivate, + uint_type); + nir_alu_type otype; + SpvId offset = get_src(ctx, &intr->src[1], &otype); + if (otype != nir_type_uint) + offset = bitcast_to_uvec(ctx, offset, nir_src_bit_size(intr->src[1]), 1); + SpvId scratch_block = get_scratch_block(ctx, bit_size); + /* this is a partial write, so we have to loop and do a per-component write */ + u_foreach_bit(i, wrmask) { + SpvId scratch_offset = emit_binop(ctx, SpvOpIAdd, spirv_builder_type_uint(&ctx->builder, 32), offset, emit_uint_const(ctx, 32, i)); + SpvId val = src; + if (nir_src_num_components(intr->src[0]) != 1) + val = spirv_builder_emit_composite_extract(&ctx->builder, uint_type, src, &i, 1); + if (atype != nir_type_uint) + val = emit_bitcast(ctx, get_alu_type(ctx, nir_type_uint, 1, bit_size), val); + SpvId member = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, + scratch_block, &scratch_offset, 1); + spirv_builder_emit_store(&ctx->builder, member, val); } } static void emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - unsigned bit_size = nir_dest_bit_size(intr->dest); SpvId uint_type = get_uvec_type(ctx, 32, 1); SpvId load_type = get_uvec_type(ctx, 32, 1); /* number of components being loaded */ - unsigned num_components = nir_dest_num_components(intr->dest); - /* we need to grab 2x32 to fill the 64bit value */ - if (bit_size == 64) - num_components *= 2; + unsigned num_components = intr->def.num_components; SpvId constituents[NIR_MAX_VEC_COMPONENTS * 2]; SpvId result; /* destination type for the load */ - SpvId type = get_dest_uvec_type(ctx, &intr->dest); + SpvId type = get_def_uvec_type(ctx, &intr->def); SpvId one = emit_uint_const(ctx, 32, 1); /* we grab a single array member at a time, so it's a pointer to a uint */ @@ -2255,9 +2485,12 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvStorageClassPushConstant, load_type); - SpvId member = get_src(ctx, &intr->src[0]); + nir_alu_type atype; + SpvId member = get_src(ctx, &intr->src[0], &atype); + if (atype == nir_type_float) + member = bitcast_to_uvec(ctx, member, nir_src_bit_size(intr->src[0]), 1); /* reuse the offset from ZINK_PUSH_CONST_OFFSET */ - SpvId offset = emit_uint_const(ctx, 32, 0); + SpvId offset = emit_uint_const(ctx, 32, nir_intrinsic_component(intr)); /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type: * index 0 is accessing 'base' * index 1 is accessing 'base[index 1]' @@ -2274,18 +2507,6 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one); } - /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values - * by creating uvec2 composites and bitcasting them to u64 values - */ - if (bit_size == 64) { - num_components /= 2; - type = get_uvec_type(ctx, 64, num_components); - SpvId u64_type = get_uvec_type(ctx, 64, 1); - for (unsigned i = 0; i < num_components; i++) { - constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2); - constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]); - } - } /* if loading more than 1 value, reassemble the results into the desired type, * otherwise just use the loaded result */ @@ -2297,7 +2518,84 @@ emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr) } else result = constituents[0]; - store_dest(ctx, &intr->dest, result, nir_type_uint); + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_load_global(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + bool coherent = ctx->sinfo->have_vulkan_memory_model && nir_intrinsic_access(intr) & ACCESS_COHERENT; + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPhysicalStorageBuffer, + dest_type); + nir_alu_type atype; + SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[0], &atype)); + SpvId result = spirv_builder_emit_load_aligned(&ctx->builder, dest_type, ptr, intr->def.bit_size / 8, coherent); + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_store_global(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + bool coherent = ctx->sinfo->have_vulkan_memory_model && nir_intrinsic_access(intr) & ACCESS_COHERENT; + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses); + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId dest_type = get_uvec_type(ctx, bit_size, 1); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPhysicalStorageBuffer, + dest_type); + nir_alu_type atype; + SpvId param = get_src(ctx, &intr->src[0], &atype); + if (atype != nir_type_uint) + param = emit_bitcast(ctx, dest_type, param); + SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[1], &atype)); + spirv_builder_emit_store_aligned(&ctx->builder, ptr, param, bit_size / 8, coherent); +} + +static void +emit_load_reg(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + assert(nir_intrinsic_base(intr) == 0 && "no array registers"); + + nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[0].ssa); + unsigned num_components = nir_intrinsic_num_components(decl); + unsigned bit_size = nir_intrinsic_bit_size(decl); + unsigned index = decl->def.index; + assert(index < ctx->num_defs); + + init_reg(ctx, decl, nir_type_uint); + assert(ctx->defs[index] != 0); + + nir_alu_type atype = ctx->def_types[index]; + SpvId var = ctx->defs[index]; + SpvId type = get_alu_type(ctx, atype, num_components, bit_size); + SpvId result = spirv_builder_emit_load(&ctx->builder, type, var); + store_def(ctx, intr->def.index, result, atype); +} + +static void +emit_store_reg(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + nir_alu_type atype; + SpvId param = get_src(ctx, &intr->src[0], &atype); + + nir_intrinsic_instr *decl = nir_reg_get_decl(intr->src[1].ssa); + unsigned index = decl->def.index; + unsigned num_components = nir_intrinsic_num_components(decl); + unsigned bit_size = nir_intrinsic_bit_size(decl); + + atype = nir_alu_type_get_base_type(atype); + init_reg(ctx, decl, atype); + SpvId var = ctx->defs[index]; + nir_alu_type vtype = ctx->def_types[index]; + if (atype != vtype) { + assert(vtype != nir_type_bool); + param = emit_bitcast(ctx, get_alu_type(ctx, vtype, num_components, bit_size), param); + } + assert(var); + spirv_builder_emit_store(&ctx->builder, var, param); } static SpvId @@ -2313,6 +2611,17 @@ create_builtin_var(struct ntv_context *ctx, SpvId var_type, spirv_builder_emit_name(&ctx->builder, var, name); spirv_builder_emit_builtin(&ctx->builder, var, builtin); + if (ctx->stage == MESA_SHADER_FRAGMENT) { + switch (builtin) { + case SpvBuiltInSampleId: + case SpvBuiltInSubgroupLocalInvocationId: + spirv_builder_emit_decoration(&ctx->builder, var, SpvDecorationFlat); + break; + default: + break; + } + } + assert(ctx->num_entry_ifaces < ARRAY_SIZE(ctx->entry_ifaces)); ctx->entry_ifaces[ctx->num_entry_ifaces++] = var; return var; @@ -2330,37 +2639,39 @@ emit_load_front_face(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, ctx->front_face_var); - assert(1 == nir_dest_num_components(intr->dest)); - store_dest(ctx, &intr->dest, result, nir_type_bool); + assert(1 == intr->def.num_components); + store_def(ctx, intr->def.index, result, nir_type_bool); } static void emit_load_uint_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *var_id, const char *var_name, SpvBuiltIn builtin) { SpvId var_type = spirv_builder_type_uint(&ctx->builder, 32); - if (builtin == SpvBuiltInSampleMask) { - /* gl_SampleMaskIn is an array[1] in spirv... */ - var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, 1)); - spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(uint32_t)); - } if (!*var_id) { + if (builtin == SpvBuiltInSampleMask) { + /* gl_SampleMaskIn is an array[1] in spirv... */ + var_type = spirv_builder_type_array(&ctx->builder, var_type, emit_uint_const(ctx, 32, 1)); + spirv_builder_emit_array_stride(&ctx->builder, var_type, sizeof(uint32_t)); + } *var_id = create_builtin_var(ctx, var_type, SpvStorageClassInput, var_name, builtin); - if (builtin == SpvBuiltInSampleMask) { - SpvId zero = emit_uint_const(ctx, 32, 0); - var_type = spirv_builder_type_uint(&ctx->builder, 32); - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassInput, - var_type); - *var_id = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, *var_id, &zero, 1); - } } - SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, *var_id); - assert(1 == nir_dest_num_components(intr->dest)); - store_dest(ctx, &intr->dest, result, nir_type_uint); + SpvId load_var = *var_id; + if (builtin == SpvBuiltInSampleMask) { + SpvId zero = emit_uint_const(ctx, 32, 0); + var_type = spirv_builder_type_uint(&ctx->builder, 32); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassInput, + var_type); + load_var = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, load_var, &zero, 1); + } + + SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, load_var); + assert(1 == intr->def.num_components); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void @@ -2370,16 +2681,19 @@ emit_load_vec_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *v switch (type) { case nir_type_bool: - var_type = get_bvec_type(ctx, nir_dest_num_components(intr->dest)); + var_type = get_bvec_type(ctx, intr->def.num_components); break; case nir_type_int: - var_type = get_ivec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest)); + var_type = get_ivec_type(ctx, intr->def.bit_size, + intr->def.num_components); break; case nir_type_uint: - var_type = get_uvec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest)); + var_type = get_uvec_type(ctx, intr->def.bit_size, + intr->def.num_components); break; case nir_type_float: - var_type = get_fvec_type(ctx, nir_dest_bit_size(intr->dest), nir_dest_num_components(intr->dest)); + var_type = get_fvec_type(ctx, intr->def.bit_size, + intr->def.num_components); break; default: unreachable("unknown type passed"); @@ -2391,7 +2705,7 @@ emit_load_vec_input(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId *v builtin); SpvId result = spirv_builder_emit_load(&ctx->builder, var_type, *var_id); - store_dest(ctx, &intr->dest, result, type); + store_def(ctx, intr->def.index, result, type); } static void @@ -2399,133 +2713,182 @@ emit_interpolate(struct ntv_context *ctx, nir_intrinsic_instr *intr) { SpvId op; spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInterpolationFunction); + SpvId src1 = 0; + nir_alu_type atype; switch (intr->intrinsic) { case nir_intrinsic_interp_deref_at_centroid: op = GLSLstd450InterpolateAtCentroid; break; case nir_intrinsic_interp_deref_at_sample: op = GLSLstd450InterpolateAtSample; + src1 = get_src(ctx, &intr->src[1], &atype); + if (atype != nir_type_int) + src1 = emit_bitcast(ctx, get_ivec_type(ctx, 32, 1), src1); break; case nir_intrinsic_interp_deref_at_offset: op = GLSLstd450InterpolateAtOffset; + src1 = get_src(ctx, &intr->src[1], &atype); + /* + The offset operand must be a vector of 2 components of 32-bit floating-point type. + - InterpolateAtOffset spec + */ + if (atype != nir_type_float) + src1 = emit_bitcast(ctx, get_fvec_type(ctx, 32, 2), src1); break; default: unreachable("unknown interp op"); } - SpvId ptr = get_src(ctx, &intr->src[0]); + nir_alu_type ptype; + SpvId ptr = get_src(ctx, &intr->src[0], &ptype); SpvId result; + const struct glsl_type *gtype = nir_src_as_deref(intr->src[0])->type; + assert(glsl_get_vector_elements(gtype) == intr->num_components); + assert(ptype == get_nir_alu_type(gtype)); if (intr->intrinsic == nir_intrinsic_interp_deref_at_centroid) - result = emit_builtin_unop(ctx, op, get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type), ptr); + result = emit_builtin_unop(ctx, op, get_glsl_type(ctx, gtype), ptr); else - result = emit_builtin_binop(ctx, op, get_glsl_type(ctx, nir_src_as_deref(intr->src[0])->type), - ptr, get_src(ctx, &intr->src[1])); - unsigned num_components = nir_dest_num_components(intr->dest); - unsigned bit_size = nir_dest_bit_size(intr->dest); - result = bitcast_to_uvec(ctx, result, bit_size, num_components); - store_dest(ctx, &intr->dest, result, nir_type_uint); + result = emit_builtin_binop(ctx, op, get_glsl_type(ctx, gtype), ptr, src1); + store_def(ctx, intr->def.index, result, ptype); } static void -handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2) +handle_atomic_op(struct ntv_context *ctx, nir_intrinsic_instr *intr, SpvId ptr, SpvId param, SpvId param2, nir_alu_type type) { - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); - SpvId result = emit_atomic(ctx, get_atomic_op(intr->intrinsic), dest_type, ptr, param, param2); + SpvId dest_type = get_def_type(ctx, &intr->def, type); + SpvId result = emit_atomic(ctx, + get_atomic_op(ctx, intr->def.bit_size, nir_intrinsic_atomic_op(intr)), + dest_type, ptr, param, param2); assert(result); - store_dest(ctx, &intr->dest, result, nir_type_uint); + store_def(ctx, intr->def.index, result, type); } static void -emit_ssbo_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) +emit_deref_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId ssbo; - SpvId param; - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); + nir_alu_type atype; + nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint; + SpvId ptr = get_src(ctx, &intr->src[0], &atype); + if (atype != ret_type && ret_type == nir_type_float) { + unsigned bit_size = nir_src_bit_size(intr->src[0]); + SpvId *float_array_type = &ctx->float_array_type[bit_size == 32 ? 0 : 1]; + if (!*float_array_type) { + *float_array_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassStorageBuffer, + spirv_builder_type_float(&ctx->builder, bit_size)); + } + ptr = emit_unop(ctx, SpvOpBitcast, *float_array_type, ptr); + } - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - assert(const_block_index); // no dynamic indexing for now - unsigned bit_size = MIN2(nir_src_bit_size(intr->src[0]), 32); - unsigned idx = bit_size >> 4; - assert(idx < ARRAY_SIZE(ctx->ssbos[0])); - if (!ctx->ssbos[const_block_index->u32][idx]) - emit_bo(ctx, ctx->ssbo_vars[const_block_index->u32], nir_dest_bit_size(intr->dest)); - ssbo = ctx->ssbos[const_block_index->u32][idx]; - param = get_src(ctx, &intr->src[2]); - - SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, - SpvStorageClassStorageBuffer, - dest_type); - SpvId uint_type = get_uvec_type(ctx, 32, 1); - /* an id of the array stride in bytes */ - SpvId uint_size = emit_uint_const(ctx, 32, bit_size / 8); - SpvId member = emit_uint_const(ctx, 32, 0); - SpvId offset = get_src(ctx, &intr->src[1]); - SpvId vec_offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size); - SpvId indices[] = { member, vec_offset }; - SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - ssbo, indices, - ARRAY_SIZE(indices)); + SpvId param = get_src(ctx, &intr->src[1], &atype); + if (atype != ret_type) + param = cast_src_to_type(ctx, param, intr->src[1], ret_type); SpvId param2 = 0; - if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) - param2 = get_src(ctx, &intr->src[3]); + if (nir_src_bit_size(intr->src[1]) == 64) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics); - handle_atomic_op(ctx, intr, ptr, param, param2); + if (intr->intrinsic == nir_intrinsic_deref_atomic_swap) { + param2 = get_src(ctx, &intr->src[2], &atype); + if (atype != ret_type) + param2 = cast_src_to_type(ctx, param2, intr->src[2], ret_type); + } + + handle_atomic_op(ctx, intr, ptr, param, param2, ret_type); } static void emit_shared_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId dest_type = get_dest_type(ctx, &intr->dest, nir_type_uint32); - SpvId param = get_src(ctx, &intr->src[1]); + unsigned bit_size = nir_src_bit_size(intr->src[1]); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + nir_alu_type atype; + nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint; + SpvId param = get_src(ctx, &intr->src[1], &atype); + if (atype != ret_type) + param = cast_src_to_type(ctx, param, intr->src[1], ret_type); SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassWorkgroup, dest_type); - SpvId offset = emit_binop(ctx, SpvOpUDiv, get_uvec_type(ctx, 32, 1), get_src(ctx, &intr->src[0]), emit_uint_const(ctx, 32, 4)); + SpvId offset = get_src(ctx, &intr->src[0], &atype); + if (atype != nir_type_uint) + offset = cast_src_to_type(ctx, offset, intr->src[0], nir_type_uint); + offset = emit_binop(ctx, SpvOpUDiv, get_uvec_type(ctx, 32, 1), offset, emit_uint_const(ctx, 32, bit_size / 8)); + SpvId shared_block = get_shared_block(ctx, bit_size); SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, - ctx->shared_block_var, &offset, 1); + shared_block, &offset, 1); + if (nir_src_bit_size(intr->src[1]) == 64) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics); + SpvId param2 = 0; + + if (intr->intrinsic == nir_intrinsic_shared_atomic_swap) { + param2 = get_src(ctx, &intr->src[2], &atype); + if (atype != ret_type) + param2 = cast_src_to_type(ctx, param2, intr->src[2], ret_type); + } + + handle_atomic_op(ctx, intr, ptr, param, param2, ret_type); +} + +static void +emit_global_atomic_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + unsigned bit_size = nir_src_bit_size(intr->src[1]); + SpvId dest_type = get_def_type(ctx, &intr->def, nir_type_uint); + nir_alu_type atype; + nir_alu_type ret_type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)) == nir_type_float ? nir_type_float : nir_type_uint; + SpvId param = get_src(ctx, &intr->src[1], &atype); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityPhysicalStorageBufferAddresses); + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassPhysicalStorageBuffer, + dest_type); + SpvId ptr = emit_bitcast(ctx, pointer_type, get_src(ctx, &intr->src[0], &atype)); + + if (bit_size == 64) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityInt64Atomics); SpvId param2 = 0; - if (intr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) - param2 = get_src(ctx, &intr->src[2]); + if (intr->intrinsic == nir_intrinsic_global_atomic_swap) + param2 = get_src(ctx, &intr->src[2], &atype); - handle_atomic_op(ctx, intr, ptr, param, param2); + handle_atomic_op(ctx, intr, ptr, param, param2, ret_type); } static void emit_get_ssbo_size(struct ntv_context *ctx, nir_intrinsic_instr *intr) { SpvId uint_type = get_uvec_type(ctx, 32, 1); - nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); - assert(const_block_index); // no dynamic indexing for now - nir_variable *var = ctx->ssbo_vars[const_block_index->u32]; + nir_variable *var = ctx->ssbo_vars; + const struct glsl_type *bare_type = glsl_without_array(var->type); + unsigned last_member_idx = glsl_get_length(bare_type) - 1; + SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder, + SpvStorageClassStorageBuffer, + get_bo_struct_type(ctx, var)); + nir_alu_type atype; + SpvId bo = get_src(ctx, &intr->src[0], &atype); + if (atype == nir_type_float) + bo = bitcast_to_uvec(ctx, bo, nir_src_bit_size(intr->src[0]), 1); + SpvId indices[] = { bo }; + SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type, + ctx->ssbos[2], indices, + ARRAY_SIZE(indices)); SpvId result = spirv_builder_emit_binop(&ctx->builder, SpvOpArrayLength, uint_type, - ctx->ssbos[const_block_index->u32][2], 1); + ptr, last_member_idx); /* this is going to be converted by nir to: length = (buffer_size - offset) / stride * so we need to un-convert it to avoid having the calculation performed twice */ - unsigned last_member_idx = glsl_get_length(var->interface_type) - 1; - const struct glsl_type *last_member = glsl_get_struct_field(var->interface_type, last_member_idx); + const struct glsl_type *last_member = glsl_get_struct_field(bare_type, last_member_idx); /* multiply by stride */ result = emit_binop(ctx, SpvOpIMul, uint_type, result, emit_uint_const(ctx, 32, glsl_get_explicit_stride(last_member))); /* get total ssbo size by adding offset */ result = emit_binop(ctx, SpvOpIAdd, uint_type, result, emit_uint_const(ctx, 32, - glsl_get_struct_field_offset(var->interface_type, last_member_idx))); - store_dest(ctx, &intr->dest, result, nir_type_uint); -} - -static inline nir_variable * -get_var_from_image(struct ntv_context *ctx, SpvId var_id) -{ - struct hash_entry *he = _mesa_hash_table_search(ctx->image_vars, &var_id); - assert(he); - return he->data; + glsl_get_struct_field_offset(bare_type, last_member_idx))); + store_def(ctx, intr->def.index, result, nir_type_uint); } static SpvId @@ -2534,16 +2897,17 @@ get_image_coords(struct ntv_context *ctx, const struct glsl_type *type, nir_src uint32_t num_coords = glsl_get_sampler_coordinate_components(type); uint32_t src_components = nir_src_num_components(*src); - SpvId spv = get_src(ctx, src); + nir_alu_type atype; + SpvId spv = get_src(ctx, src, &atype); if (num_coords == src_components) return spv; /* need to extract the coord dimensions that the image can use */ - SpvId vec_type = get_uvec_type(ctx, 32, num_coords); + SpvId vec_type = get_alu_type(ctx, atype, num_coords, 32); if (num_coords == 1) return spirv_builder_emit_vector_extract(&ctx->builder, vec_type, spv, 0); uint32_t constituents[4]; - SpvId zero = emit_uint_const(ctx, nir_src_bit_size(*src), 0); + SpvId zero = atype == nir_type_uint ? emit_uint_const(ctx, nir_src_bit_size(*src), 0) : emit_float_const(ctx, nir_src_bit_size(*src), 0); assert(num_coords < ARRAY_SIZE(constituents)); for (unsigned i = 0; i < num_coords; i++) constituents[i] = i < src_components ? i : zero; @@ -2553,81 +2917,165 @@ get_image_coords(struct ntv_context *ctx, const struct glsl_type *type, nir_src static void emit_image_deref_store(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + SpvId img_type = find_image_type(ctx, var); const struct glsl_type *type = glsl_without_array(var->type); SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); SpvId coord = get_image_coords(ctx, type, &intr->src[1]); - SpvId texel = get_src(ctx, &intr->src[3]); - SpvId sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ? get_src(ctx, &intr->src[2]) : 0; - assert(nir_src_bit_size(intr->src[3]) == glsl_base_type_bit_size(glsl_get_sampler_result_type(type))); + SpvId texel = get_src(ctx, &intr->src[3], &atype); /* texel type must match image type */ - texel = emit_bitcast(ctx, - spirv_builder_type_vector(&ctx->builder, base_type, 4), - texel); + if (atype != nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(type))) + texel = emit_bitcast(ctx, + spirv_builder_type_vector(&ctx->builder, base_type, 4), + texel); + bool use_sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS; + SpvId sample = use_sample ? get_src(ctx, &intr->src[2], &atype) : 0; + assert(nir_src_bit_size(intr->src[3]) == glsl_base_type_bit_size(glsl_get_sampler_result_type(type))); spirv_builder_emit_image_write(&ctx->builder, img, coord, texel, 0, sample, 0); } +static SpvId +extract_sparse_load(struct ntv_context *ctx, SpvId result, SpvId dest_type, nir_def *def) +{ + /* Result Type must be an OpTypeStruct with two members. + * The first member’s type must be an integer type scalar. + * It holds a Residency Code that can be passed to OpImageSparseTexelsResident + * - OpImageSparseRead spec + */ + uint32_t idx = 0; + SpvId resident = spirv_builder_emit_composite_extract(&ctx->builder, spirv_builder_type_uint(&ctx->builder, 32), result, &idx, 1); + idx = 1; + /* normal vec4 return */ + if (def->num_components == 4) + result = spirv_builder_emit_composite_extract(&ctx->builder, dest_type, result, &idx, 1); + else { + /* shadow */ + assert(def->num_components == 1); + SpvId type = spirv_builder_type_float(&ctx->builder, def->bit_size); + SpvId val[2]; + /* pad to 2 components: the upcoming is_sparse_texels_resident instr will always use the + * separate residency value, but the shader still expects this return to be a vec2, + * so give it a vec2 + */ + val[0] = spirv_builder_emit_composite_extract(&ctx->builder, type, result, &idx, 1); + val[1] = emit_float_const(ctx, def->bit_size, 0); + result = spirv_builder_emit_composite_construct(&ctx->builder, get_fvec_type(ctx, def->bit_size, 2), val, 2); + } + assert(resident != 0); + assert(def->index < ctx->num_defs); + ctx->resident_defs[def->index] = resident; + return result; +} + static void emit_image_deref_load(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + bool sparse = intr->intrinsic == nir_intrinsic_image_deref_sparse_load; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + bool mediump = (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW); + SpvId img_type = find_image_type(ctx, var); const struct glsl_type *type = glsl_without_array(var->type); SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); SpvId coord = get_image_coords(ctx, type, &intr->src[1]); - SpvId sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS ? get_src(ctx, &intr->src[2]) : 0; + bool use_sample = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS; + SpvId sample = use_sample ? get_src(ctx, &intr->src[2], &atype) : 0; + SpvId dest_type = spirv_builder_type_vector(&ctx->builder, base_type, + intr->def.num_components); SpvId result = spirv_builder_emit_image_read(&ctx->builder, - spirv_builder_type_vector(&ctx->builder, base_type, nir_dest_num_components(intr->dest)), - img, coord, 0, sample, 0); - store_dest(ctx, &intr->dest, result, nir_type_float); + dest_type, + img, coord, 0, sample, 0, sparse); + if (sparse) + result = extract_sparse_load(ctx, result, dest_type, &intr->def); + + if (!sparse && mediump) { + spirv_builder_emit_decoration(&ctx->builder, result, + SpvDecorationRelaxedPrecision); + } + + store_def(ctx, intr->def.index, result, nir_get_nir_type_for_glsl_base_type(glsl_get_sampler_result_type(type))); } static void emit_image_deref_size(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + SpvId img_type = find_image_type(ctx, var); const struct glsl_type *type = glsl_without_array(var->type); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); - SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, get_uvec_type(ctx, 32, glsl_get_sampler_coordinate_components(type)), img, 0); - store_dest(ctx, &intr->dest, result, nir_type_uint); + unsigned num_components = glsl_get_sampler_coordinate_components(type); + /* SPIRV requires 2 components for non-array cube size */ + if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && !glsl_sampler_type_is_array(type)) + num_components = 2; + + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery); + SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, get_uvec_type(ctx, 32, num_components), img, 0); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void emit_image_deref_samples(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - nir_variable *var = get_var_from_image(ctx, img_var); - SpvId img_type = ctx->image_types[var->data.driver_location]; + nir_alu_type atype; + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + SpvId img_type = find_image_type(ctx, var); SpvId img = spirv_builder_emit_load(&ctx->builder, img_type, img_var); - SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, get_dest_type(ctx, &intr->dest, nir_type_uint), img); - store_dest(ctx, &intr->dest, result, nir_type_uint); + + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery); + SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, get_def_type(ctx, &intr->def, nir_type_uint), img); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void emit_image_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { - SpvId img_var = get_src(ctx, &intr->src[0]); - SpvId param = get_src(ctx, &intr->src[3]); - nir_variable *var = get_var_from_image(ctx, img_var); + nir_alu_type atype, ptype; + SpvId param = get_src(ctx, &intr->src[3], &ptype); + SpvId img_var = get_src(ctx, &intr->src[0], &atype); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); const struct glsl_type *type = glsl_without_array(var->type); bool is_ms; type_to_dim(glsl_get_sampler_dim(type), &is_ms); - SpvId sample = is_ms ? get_src(ctx, &intr->src[2]) : emit_uint_const(ctx, 32, 0); + SpvId sample = is_ms ? get_src(ctx, &intr->src[2], &atype) : emit_uint_const(ctx, 32, 0); SpvId coord = get_image_coords(ctx, type, &intr->src[1]); - SpvId base_type = get_glsl_basetype(ctx, glsl_get_sampler_result_type(type)); + enum glsl_base_type glsl_result_type = glsl_get_sampler_result_type(type); + SpvId base_type = get_glsl_basetype(ctx, glsl_result_type); SpvId texel = spirv_builder_emit_image_texel_pointer(&ctx->builder, base_type, img_var, coord, sample); SpvId param2 = 0; - if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) - param2 = get_src(ctx, &intr->src[4]); - handle_atomic_op(ctx, intr, texel, param, param2); + /* The type of Value must be the same as Result Type. + * The type of the value pointed to by Pointer must be the same as Result Type. + */ + nir_alu_type ntype = nir_get_nir_type_for_glsl_base_type(glsl_result_type); + if (ptype != ntype) { + SpvId cast_type = get_def_type(ctx, &intr->def, ntype); + param = emit_bitcast(ctx, cast_type, param); + } + + if (intr->intrinsic == nir_intrinsic_image_deref_atomic_swap) { + param2 = get_src(ctx, &intr->src[4], &ptype); + if (ptype != ntype) { + SpvId cast_type = get_def_type(ctx, &intr->def, ntype); + param2 = emit_bitcast(ctx, cast_type, param2); + } + } + + handle_atomic_op(ctx, intr, texel, param, param2, ntype); } static void @@ -2635,9 +3083,10 @@ emit_ballot(struct ntv_context *ctx, nir_intrinsic_instr *intr) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot"); - SpvId type = get_dest_uvec_type(ctx, &intr->dest); - SpvId result = emit_unop(ctx, SpvOpSubgroupBallotKHR, type, get_src(ctx, &intr->src[0])); - store_dest(ctx, &intr->dest, result, nir_type_uint); + SpvId type = get_def_uvec_type(ctx, &intr->def); + nir_alu_type atype; + SpvId result = emit_unop(ctx, SpvOpSubgroupBallotKHR, type, get_src(ctx, &intr->src[0], &atype)); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void @@ -2645,9 +3094,11 @@ emit_read_first_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot"); - SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint); - SpvId result = emit_unop(ctx, SpvOpSubgroupFirstInvocationKHR, type, get_src(ctx, &intr->src[0])); - store_dest(ctx, &intr->dest, result, nir_type_uint); + nir_alu_type atype; + SpvId src = get_src(ctx, &intr->src[0], &atype); + SpvId type = get_def_type(ctx, &intr->def, atype); + SpvId result = emit_unop(ctx, SpvOpSubgroupFirstInvocationKHR, type, src); + store_def(ctx, intr->def.index, result, atype); } static void @@ -2655,11 +3106,13 @@ emit_read_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr) { spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_ballot"); - SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint); + nir_alu_type atype, itype; + SpvId src = get_src(ctx, &intr->src[0], &atype); + SpvId type = get_def_type(ctx, &intr->def, atype); SpvId result = emit_binop(ctx, SpvOpSubgroupReadInvocationKHR, type, - get_src(ctx, &intr->src[0]), - get_src(ctx, &intr->src[1])); - store_dest(ctx, &intr->dest, result, nir_type_uint); + src, + get_src(ctx, &intr->src[1], &itype)); + store_def(ctx, intr->def.index, result, atype); } static void @@ -2669,9 +3122,25 @@ emit_shader_clock(struct ntv_context *ctx, nir_intrinsic_instr *intr) spirv_builder_emit_extension(&ctx->builder, "SPV_KHR_shader_clock"); SpvScope scope = get_scope(nir_intrinsic_memory_scope(intr)); - SpvId type = get_dest_type(ctx, &intr->dest, nir_type_uint); + SpvId type = get_def_type(ctx, &intr->def, nir_type_uint); SpvId result = spirv_builder_emit_unop_const(&ctx->builder, SpvOpReadClockKHR, type, scope); - store_dest(ctx, &intr->dest, result, nir_type_uint); + store_def(ctx, intr->def.index, result, nir_type_uint); +} + +static void +emit_is_sparse_texels_resident(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySparseResidency); + + SpvId type = get_def_type(ctx, &intr->def, nir_type_uint); + + unsigned index = intr->src[0].ssa->index; + assert(index < ctx->num_defs); + assert(ctx->resident_defs[index] != 0); + SpvId resident = ctx->resident_defs[index]; + + SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageSparseTexelsResident, type, resident); + store_def(ctx, intr->def.index, result, nir_type_uint); } static void @@ -2693,27 +3162,85 @@ emit_vote(struct ntv_context *ctx, nir_intrinsic_instr *intr) default: unreachable("unknown vote intrinsic"); } - SpvId result = spirv_builder_emit_vote(&ctx->builder, op, get_src(ctx, &intr->src[0])); - store_dest_raw(ctx, &intr->dest, result); + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityGroupNonUniformVote); + nir_alu_type atype; + SpvId result = spirv_builder_emit_vote(&ctx->builder, op, get_src(ctx, &intr->src[0], &atype)); + store_def(ctx, intr->def.index, result, nir_type_bool); +} + +static void +emit_is_helper_invocation(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + spirv_builder_emit_extension(&ctx->builder, + "SPV_EXT_demote_to_helper_invocation"); + SpvId result = spirv_is_helper_invocation(&ctx->builder); + store_def(ctx, intr->def.index, result, nir_type_bool); +} + +static void +emit_barrier(struct ntv_context *ctx, nir_intrinsic_instr *intr) +{ + SpvScope scope = get_scope(nir_intrinsic_execution_scope(intr)); + SpvScope mem_scope = get_scope(nir_intrinsic_memory_scope(intr)); + SpvMemorySemanticsMask semantics = 0; + + if (nir_intrinsic_memory_scope(intr) != SCOPE_NONE) { + nir_variable_mode modes = nir_intrinsic_memory_modes(intr); + + if (modes & nir_var_image) + semantics |= SpvMemorySemanticsImageMemoryMask; + + if (modes & nir_var_mem_shared) + semantics |= SpvMemorySemanticsWorkgroupMemoryMask; + + if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) + semantics |= SpvMemorySemanticsUniformMemoryMask; + + if (modes & nir_var_mem_global) + semantics |= SpvMemorySemanticsCrossWorkgroupMemoryMask; + + if (modes & (nir_var_shader_out | nir_var_mem_task_payload)) + semantics |= SpvMemorySemanticsOutputMemoryMask; + + if (!modes) + semantics = SpvMemorySemanticsWorkgroupMemoryMask | + SpvMemorySemanticsUniformMemoryMask | + SpvMemorySemanticsImageMemoryMask | + SpvMemorySemanticsCrossWorkgroupMemoryMask; + semantics |= SpvMemorySemanticsAcquireReleaseMask; + } + + if (nir_intrinsic_execution_scope(intr) != SCOPE_NONE) + spirv_builder_emit_control_barrier(&ctx->builder, scope, mem_scope, semantics); + else + spirv_builder_emit_memory_barrier(&ctx->builder, mem_scope, semantics); } static void emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) { switch (intr->intrinsic) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - emit_load_bo(ctx, intr); + case nir_intrinsic_decl_reg: + /* Nothing to do */ + break; + + case nir_intrinsic_load_reg: + emit_load_reg(ctx, intr); break; - case nir_intrinsic_store_ssbo: - emit_store_ssbo(ctx, intr); + case nir_intrinsic_store_reg: + emit_store_reg(ctx, intr); break; case nir_intrinsic_discard: emit_discard(ctx, intr); break; + case nir_intrinsic_demote: + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityDemoteToHelperInvocation); + spirv_builder_emit_demote(&ctx->builder); + break; + case nir_intrinsic_load_deref: emit_load_deref(ctx, intr); break; @@ -2722,10 +3249,19 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_store_deref(ctx, intr); break; - case nir_intrinsic_load_push_constant: + case nir_intrinsic_load_push_constant_zink: emit_load_push_const(ctx, intr); break; + case nir_intrinsic_load_global: + case nir_intrinsic_load_global_constant: + emit_load_global(ctx, intr); + break; + + case nir_intrinsic_store_global: + emit_store_global(ctx, intr); + break; + case nir_intrinsic_load_front_face: emit_load_front_face(ctx, intr); break; @@ -2759,9 +3295,15 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_load_sample_id: + spirv_builder_emit_cap(&ctx->builder, SpvCapabilitySampleRateShading); emit_load_uint_input(ctx, intr, &ctx->sample_id_var, "gl_SampleId", SpvBuiltInSampleId); break; + case nir_intrinsic_load_point_coord_maybe_flipped: + case nir_intrinsic_load_point_coord: + emit_load_vec_input(ctx, intr, &ctx->point_coord_var, "gl_PointCoord", SpvBuiltInPointCoord, nir_type_float); + break; + case nir_intrinsic_load_sample_pos: emit_load_vec_input(ctx, intr, &ctx->sample_pos_var, "gl_SamplePosition", SpvBuiltInSamplePosition, nir_type_float); break; @@ -2770,21 +3312,15 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_load_uint_input(ctx, intr, &ctx->sample_mask_in_var, "gl_SampleMaskIn", SpvBuiltInSampleMask); break; - case nir_intrinsic_emit_vertex_with_counter: - /* geometry shader emits copied xfb outputs just prior to EmitVertex(), - * since that's the end of the shader - */ - if (ctx->so_info) - emit_so_outputs(ctx, ctx->so_info); - spirv_builder_emit_vertex(&ctx->builder, nir_intrinsic_stream_id(intr)); - break; - - case nir_intrinsic_set_vertex_and_primitive_count: - /* do nothing */ + case nir_intrinsic_emit_vertex: + if (ctx->nir->info.gs.vertices_out) //skip vertex emission if !vertices_out + spirv_builder_emit_vertex(&ctx->builder, nir_intrinsic_stream_id(intr), + ctx->nir->info.stage == MESA_SHADER_GEOMETRY && util_bitcount(ctx->nir->info.gs.active_stream_mask) > 1); break; - case nir_intrinsic_end_primitive_with_counter: - spirv_builder_end_primitive(&ctx->builder, nir_intrinsic_stream_id(intr)); + case nir_intrinsic_end_primitive: + spirv_builder_end_primitive(&ctx->builder, nir_intrinsic_stream_id(intr), + ctx->nir->info.stage == MESA_SHADER_GEOMETRY && util_bitcount(ctx->nir->info.gs.active_stream_mask) > 1); break; case nir_intrinsic_load_helper_invocation: @@ -2801,39 +3337,8 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) SpvBuiltInTessCoord, nir_type_float); break; - case nir_intrinsic_memory_barrier_tcs_patch: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsOutputMemoryMask | SpvMemorySemanticsReleaseMask); - break; - - case nir_intrinsic_memory_barrier: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsImageMemoryMask | SpvMemorySemanticsUniformMemoryMask | - SpvMemorySemanticsMakeVisibleMask | SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_memory_barrier_image: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeDevice, - SpvMemorySemanticsImageMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_group_memory_barrier: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsWorkgroupMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_memory_barrier_shared: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvMemorySemanticsWorkgroupMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_control_barrier: - spirv_builder_emit_control_barrier(&ctx->builder, SpvScopeWorkgroup, - SpvScopeWorkgroup, - SpvMemorySemanticsWorkgroupMemoryMask | SpvMemorySemanticsAcquireMask); + case nir_intrinsic_barrier: + emit_barrier(ctx, intr); break; case nir_intrinsic_interp_deref_at_centroid: @@ -2842,38 +3347,21 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_interpolate(ctx, intr); break; - case nir_intrinsic_memory_barrier_buffer: - spirv_builder_emit_memory_barrier(&ctx->builder, SpvScopeDevice, - SpvMemorySemanticsUniformMemoryMask | - SpvMemorySemanticsAcquireReleaseMask); - break; - - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - emit_ssbo_atomic_intrinsic(ctx, intr); - break; - - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_deref_atomic: + case nir_intrinsic_deref_atomic_swap: + emit_deref_atomic_intrinsic(ctx, intr); + break; + + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: emit_shared_atomic_intrinsic(ctx, intr); break; + case nir_intrinsic_global_atomic: + case nir_intrinsic_global_atomic_swap: + emit_global_atomic_intrinsic(ctx, intr); + break; + case nir_intrinsic_begin_invocation_interlock: case nir_intrinsic_end_invocation_interlock: spirv_builder_emit_interlock(&ctx->builder, intr->intrinsic == nir_intrinsic_end_invocation_interlock); @@ -2887,6 +3375,7 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_image_deref_store(ctx, intr); break; + case nir_intrinsic_image_deref_sparse_load: case nir_intrinsic_image_deref_load: emit_image_deref_load(ctx, intr); break; @@ -2899,16 +3388,8 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_image_deref_samples(ctx, intr); break; - case nir_intrinsic_image_deref_atomic_add: - case nir_intrinsic_image_deref_atomic_umin: - case nir_intrinsic_image_deref_atomic_imin: - case nir_intrinsic_image_deref_atomic_umax: - case nir_intrinsic_image_deref_atomic_imax: - case nir_intrinsic_image_deref_atomic_and: - case nir_intrinsic_image_deref_atomic_or: - case nir_intrinsic_image_deref_atomic_xor: - case nir_intrinsic_image_deref_atomic_exchange: - case nir_intrinsic_image_deref_atomic_comp_swap: + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_deref_atomic_swap: emit_image_intrinsic(ctx, intr); break; @@ -2959,7 +3440,7 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) case nir_intrinsic_load_workgroup_size: assert(ctx->local_group_size_var); - store_dest(ctx, &intr->dest, ctx->local_group_size_var, nir_type_uint); + store_def(ctx, intr->def.index, ctx->local_group_size_var, nir_type_uint); break; case nir_intrinsic_load_shared: @@ -2970,6 +3451,14 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_store_shared(ctx, intr); break; + case nir_intrinsic_load_scratch: + emit_load_scratch(ctx, intr); + break; + + case nir_intrinsic_store_scratch: + emit_store_scratch(ctx, intr); + break; + case nir_intrinsic_shader_clock: emit_shader_clock(ctx, intr); break; @@ -2981,6 +3470,14 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) emit_vote(ctx, intr); break; + case nir_intrinsic_is_sparse_resident_zink: + emit_is_sparse_texels_resident(ctx, intr); + break; + + case nir_intrinsic_is_helper_invocation: + emit_is_helper_invocation(ctx, intr); + break; + default: fprintf(stderr, "emit_intrinsic: not implemented (%s)\n", nir_intrinsic_infos[intr->intrinsic].name); @@ -2989,20 +3486,24 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr) } static void -emit_undef(struct ntv_context *ctx, nir_ssa_undef_instr *undef) +emit_undef(struct ntv_context *ctx, nir_undef_instr *undef) { SpvId type = undef->def.bit_size == 1 ? get_bvec_type(ctx, undef->def.num_components) : get_uvec_type(ctx, undef->def.bit_size, undef->def.num_components); - store_ssa_def(ctx, &undef->def, - spirv_builder_emit_undef(&ctx->builder, type)); + store_def(ctx, undef->def.index, + spirv_builder_emit_undef(&ctx->builder, type), + undef->def.bit_size == 1 ? nir_type_bool : nir_type_uint); } static SpvId get_src_float(struct ntv_context *ctx, nir_src *src) { - SpvId def = get_src(ctx, src); + nir_alu_type atype; + SpvId def = get_src(ctx, src, &atype); + if (atype == nir_type_float) + return def; unsigned num_components = nir_src_num_components(*src); unsigned bit_size = nir_src_bit_size(*src); return bitcast_to_fvec(ctx, def, bit_size, num_components); @@ -3011,7 +3512,10 @@ get_src_float(struct ntv_context *ctx, nir_src *src) static SpvId get_src_int(struct ntv_context *ctx, nir_src *src) { - SpvId def = get_src(ctx, src); + nir_alu_type atype; + SpvId def = get_src(ctx, src, &atype); + if (atype == nir_type_int) + return def; unsigned num_components = nir_src_num_components(*src); unsigned bit_size = nir_src_bit_size(*src); return bitcast_to_ivec(ctx, def, bit_size, num_components); @@ -3027,44 +3531,37 @@ tex_instr_is_lod_allowed(nir_tex_instr *tex) return (tex->sampler_dim == GLSL_SAMPLER_DIM_1D || tex->sampler_dim == GLSL_SAMPLER_DIM_2D || tex->sampler_dim == GLSL_SAMPLER_DIM_3D || - tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); + tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE || + /* External images are interpreted as 2D in type_to_dim, + * so LOD is allowed */ + tex->sampler_dim == GLSL_SAMPLER_DIM_EXTERNAL || + /* RECT will always become 2D, so this is fine */ + tex->sampler_dim == GLSL_SAMPLER_DIM_RECT); } static void -emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) +get_tex_srcs(struct ntv_context *ctx, nir_tex_instr *tex, + nir_variable **bindless_var, unsigned *coord_components, + struct spriv_tex_src *tex_src) { - assert(tex->op == nir_texop_tex || - tex->op == nir_texop_txb || - tex->op == nir_texop_txl || - tex->op == nir_texop_txd || - tex->op == nir_texop_txf || - tex->op == nir_texop_txf_ms || - tex->op == nir_texop_txs || - tex->op == nir_texop_lod || - tex->op == nir_texop_tg4 || - tex->op == nir_texop_texture_samples || - tex->op == nir_texop_query_levels); - assert(tex->texture_index == tex->sampler_index); - - SpvId coord = 0, proj = 0, bias = 0, lod = 0, dref = 0, dx = 0, dy = 0, - const_offset = 0, offset = 0, sample = 0, tex_offset = 0; - unsigned coord_components = 0; + tex_src->sparse = tex->is_sparse; + nir_alu_type atype; for (unsigned i = 0; i < tex->num_srcs; i++) { nir_const_value *cv; switch (tex->src[i].src_type) { case nir_tex_src_coord: if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms) - coord = get_src_int(ctx, &tex->src[i].src); + tex_src->coord = get_src_int(ctx, &tex->src[i].src); else - coord = get_src_float(ctx, &tex->src[i].src); - coord_components = nir_src_num_components(tex->src[i].src); + tex_src->coord = get_src_float(ctx, &tex->src[i].src); + *coord_components = nir_src_num_components(tex->src[i].src); break; case nir_tex_src_projector: assert(nir_src_num_components(tex->src[i].src) == 1); - proj = get_src_float(ctx, &tex->src[i].src); - assert(proj != 0); + tex_src->proj = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->proj != 0); break; case nir_tex_src_offset: @@ -3074,27 +3571,33 @@ emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) unsigned num_components = nir_src_num_components(tex->src[i].src); SpvId components[NIR_MAX_VEC_COMPONENTS]; - for (int i = 0; i < num_components; ++i) { - int64_t tmp = nir_const_value_as_int(cv[i], bit_size); - components[i] = emit_int_const(ctx, bit_size, tmp); + for (int j = 0; j < num_components; ++j) { + int64_t tmp = nir_const_value_as_int(cv[j], bit_size); + components[j] = emit_int_const(ctx, bit_size, tmp); } if (num_components > 1) { SpvId type = get_ivec_type(ctx, bit_size, num_components); - const_offset = spirv_builder_const_composite(&ctx->builder, - type, - components, - num_components); + tex_src->const_offset = spirv_builder_const_composite(&ctx->builder, + type, + components, + num_components); } else - const_offset = components[0]; + tex_src->const_offset = components[0]; } else - offset = get_src_int(ctx, &tex->src[i].src); + tex_src->offset = get_src_int(ctx, &tex->src[i].src); break; case nir_tex_src_bias: assert(tex->op == nir_texop_txb); - bias = get_src_float(ctx, &tex->src[i].src); - assert(bias != 0); + tex_src->bias = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->bias != 0); + break; + + case nir_tex_src_min_lod: + assert(nir_src_num_components(tex->src[i].src) == 1); + tex_src->min_lod = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->min_lod != 0); break; case nir_tex_src_lod: @@ -3102,201 +3605,320 @@ emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms || tex->op == nir_texop_txs) - lod = get_src_int(ctx, &tex->src[i].src); + tex_src->lod = get_src_int(ctx, &tex->src[i].src); else - lod = get_src_float(ctx, &tex->src[i].src); - assert(lod != 0); + tex_src->lod = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->lod != 0); break; case nir_tex_src_ms_index: assert(nir_src_num_components(tex->src[i].src) == 1); - sample = get_src_int(ctx, &tex->src[i].src); + tex_src->sample = get_src_int(ctx, &tex->src[i].src); break; case nir_tex_src_comparator: assert(nir_src_num_components(tex->src[i].src) == 1); - dref = get_src_float(ctx, &tex->src[i].src); - assert(dref != 0); + tex_src->dref = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->dref != 0); break; case nir_tex_src_ddx: - dx = get_src_float(ctx, &tex->src[i].src); - assert(dx != 0); + tex_src->dx = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->dx != 0); break; case nir_tex_src_ddy: - dy = get_src_float(ctx, &tex->src[i].src); - assert(dy != 0); + tex_src->dy = get_src_float(ctx, &tex->src[i].src); + assert(tex_src->dy != 0); break; case nir_tex_src_texture_offset: - tex_offset = get_src_int(ctx, &tex->src[i].src); + tex_src->tex_offset = get_src_int(ctx, &tex->src[i].src); break; case nir_tex_src_sampler_offset: + case nir_tex_src_sampler_handle: /* don't care */ break; + case nir_tex_src_texture_handle: + tex_src->bindless = get_src(ctx, &tex->src[i].src, &atype); + *bindless_var = nir_deref_instr_get_variable(nir_src_as_deref(tex->src[i].src)); + break; + default: fprintf(stderr, "texture source: %d\n", tex->src[i].src_type); unreachable("unknown texture source"); } } +} - unsigned texture_index = tex->texture_index; - if (!tex_offset) { - /* convert constant index back to base + offset */ - unsigned last_sampler = util_last_bit(ctx->samplers_used); - for (unsigned i = 0; i < last_sampler; i++) { - if (!ctx->sampler_array_sizes[i]) { - if (i == texture_index) - /* this is a non-array sampler, so we don't need an access chain */ +static void +find_sampler_and_texture_index(struct ntv_context *ctx, struct spriv_tex_src *tex_src, + nir_variable *bindless_var, + nir_variable **var, uint32_t *texture_index) +{ + *var = bindless_var ? bindless_var : ctx->sampler_var[*texture_index]; + nir_variable **sampler_var = tex_src->bindless ? ctx->bindless_sampler_var : ctx->sampler_var; + if (!bindless_var && (!tex_src->tex_offset || !var)) { + if (sampler_var[*texture_index]) { + if (glsl_type_is_array(sampler_var[*texture_index]->type)) + tex_src->tex_offset = emit_uint_const(ctx, 32, 0); + } else { + /* convert constant index back to base + offset */ + for (int i = *texture_index; i >= 0; i--) { + if (sampler_var[i]) { + assert(glsl_type_is_array(sampler_var[i]->type)); + if (!tex_src->tex_offset) + tex_src->tex_offset = emit_uint_const(ctx, 32, *texture_index - i); + *var = sampler_var[i]; + *texture_index = i; break; - } else if (texture_index <= i + ctx->sampler_array_sizes[i] - 1) { - /* this is the first member of a sampler array */ - tex_offset = emit_uint_const(ctx, 32, texture_index - i); - texture_index = i; - break; + } } } } - SpvId image_type = ctx->sampler_types[texture_index]; - assert(image_type); - SpvId sampled_type = spirv_builder_type_sampled_image(&ctx->builder, - image_type); - assert(sampled_type); - assert(ctx->samplers_used & (1u << texture_index)); - SpvId sampler_id = ctx->samplers[texture_index]; - if (tex_offset) { - SpvId ptr = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, sampled_type); - sampler_id = spirv_builder_emit_access_chain(&ctx->builder, ptr, sampler_id, &tex_offset, 1); - } - SpvId load = spirv_builder_emit_load(&ctx->builder, sampled_type, sampler_id); - - SpvId dest_type = get_dest_type(ctx, &tex->dest, tex->dest_type); +} - if (!tex_instr_is_lod_allowed(tex)) - lod = 0; - if (tex->op == nir_texop_txs) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); - SpvId result = spirv_builder_emit_image_query_size(&ctx->builder, - dest_type, image, - lod); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; - } - if (tex->op == nir_texop_query_levels) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); - SpvId result = spirv_builder_emit_image_query_levels(&ctx->builder, - dest_type, image); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; - } - if (tex->op == nir_texop_texture_samples) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); - SpvId result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, - dest_type, image); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; +static SpvId +get_texture_load(struct ntv_context *ctx, SpvId sampler_id, nir_tex_instr *tex, + SpvId image_type, SpvId sampled_type) +{ + if (ctx->stage == MESA_SHADER_KERNEL) { + SpvId image_load = spirv_builder_emit_load(&ctx->builder, image_type, sampler_id); + if (nir_tex_instr_need_sampler(tex)) { + SpvId sampler_load = spirv_builder_emit_load(&ctx->builder, spirv_builder_type_sampler(&ctx->builder), + ctx->cl_samplers[tex->sampler_index]); + return spirv_builder_emit_sampled_image(&ctx->builder, sampled_type, image_load, sampler_load); + } else { + return image_load; + } + } else { + return spirv_builder_emit_load(&ctx->builder, sampled_type, sampler_id); } +} - if (proj && coord_components > 0) { - SpvId constituents[NIR_MAX_VEC_COMPONENTS + 1]; - if (coord_components == 1) - constituents[0] = coord; - else { - assert(coord_components > 1); - SpvId float_type = spirv_builder_type_float(&ctx->builder, 32); - for (uint32_t i = 0; i < coord_components; ++i) - constituents[i] = spirv_builder_emit_composite_extract(&ctx->builder, - float_type, - coord, - &i, 1); - } +static SpvId +get_texop_dest_type(struct ntv_context *ctx, const nir_tex_instr *tex) +{ + SpvId actual_dest_type; + unsigned num_components = tex->def.num_components; + switch (nir_alu_type_get_base_type(tex->dest_type)) { + case nir_type_int: + actual_dest_type = get_ivec_type(ctx, 32, num_components); + break; - constituents[coord_components++] = proj; + case nir_type_uint: + actual_dest_type = get_uvec_type(ctx, 32, num_components); + break; - SpvId vec_type = get_fvec_type(ctx, 32, coord_components); - coord = spirv_builder_emit_composite_construct(&ctx->builder, - vec_type, - constituents, - coord_components); - } - if (tex->op == nir_texop_lod) { - SpvId result = spirv_builder_emit_image_query_lod(&ctx->builder, - dest_type, load, - coord); - store_dest(ctx, &tex->dest, result, tex->dest_type); - return; + case nir_type_float: + actual_dest_type = get_fvec_type(ctx, 32, num_components); + break; + + default: + unreachable("unexpected nir_alu_type"); } - SpvId actual_dest_type; - if (dref) - actual_dest_type = - spirv_builder_type_float(&ctx->builder, - nir_dest_bit_size(tex->dest)); + + return actual_dest_type; +} + +static void +move_tex_proj_to_coord(struct ntv_context *ctx, unsigned coord_components, struct spriv_tex_src *tex_src) +{ + SpvId constituents[NIR_MAX_VEC_COMPONENTS + 1]; + if (coord_components == 1) + constituents[0] = tex_src->coord; else { - unsigned num_components = nir_dest_num_components(tex->dest); - switch (nir_alu_type_get_base_type(tex->dest_type)) { - case nir_type_int: - actual_dest_type = get_ivec_type(ctx, 32, num_components); - break; + assert(coord_components > 1); + SpvId float_type = spirv_builder_type_float(&ctx->builder, 32); + for (uint32_t i = 0; i < coord_components; ++i) + constituents[i] = spirv_builder_emit_composite_extract(&ctx->builder, + float_type, + tex_src->coord, + &i, 1); + } - case nir_type_uint: - actual_dest_type = get_uvec_type(ctx, 32, num_components); - break; + constituents[coord_components++] = tex_src->proj; - case nir_type_float: - actual_dest_type = get_fvec_type(ctx, 32, num_components); - break; + SpvId vec_type = get_fvec_type(ctx, 32, coord_components); + tex_src->coord = spirv_builder_emit_composite_construct(&ctx->builder, + vec_type, + constituents, + coord_components); +} - default: - unreachable("unexpected nir_alu_type"); - } - } +static SpvId +get_tex_image_to_load( struct ntv_context *ctx, SpvId image_type, bool is_buffer, SpvId load) +{ + return is_buffer || ctx->stage == MESA_SHADER_KERNEL ? + load : + spirv_builder_emit_image(&ctx->builder, image_type, load); +} + +static SpvId +emit_tex_readop(struct ntv_context *ctx, nir_variable *bindless_var, SpvId load, + struct spriv_tex_src *tex_src, SpvId dest_type, bool is_buffer, + nir_variable *var, SpvId image_type, nir_tex_instr *tex) +{ + SpvId actual_dest_type = get_texop_dest_type(ctx, tex); SpvId result; - if (offset) + if (tex_src->offset) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageGatherExtended); + if (tex_src->min_lod) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityMinLod); if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms || tex->op == nir_texop_tg4) { - SpvId image = spirv_builder_emit_image(&ctx->builder, image_type, load); + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); if (tex->op == nir_texop_tg4) { - if (const_offset) + if (tex_src->const_offset) spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageGatherExtended); - result = spirv_builder_emit_image_gather(&ctx->builder, dest_type, - load, coord, emit_uint_const(ctx, 32, tex->component), - lod, sample, const_offset, offset, dref); - } else + result = spirv_builder_emit_image_gather(&ctx->builder, actual_dest_type, + load, tex_src, emit_uint_const(ctx, 32, tex->component)); + actual_dest_type = dest_type; + } else { + assert(tex->op == nir_texop_txf_ms || !tex_src->sample); + bool is_ms; + type_to_dim(glsl_get_sampler_dim(glsl_without_array(var->type)), &is_ms); + assert(is_ms || !tex_src->sample); result = spirv_builder_emit_image_fetch(&ctx->builder, actual_dest_type, - image, coord, lod, sample, const_offset, offset); + image, tex_src); + } } else { + if (tex->op == nir_texop_txl) + tex_src->min_lod = 0; result = spirv_builder_emit_image_sample(&ctx->builder, actual_dest_type, load, - coord, - proj != 0, - lod, bias, dref, dx, dy, - const_offset, offset); + tex_src); } - spirv_builder_emit_decoration(&ctx->builder, result, - SpvDecorationRelaxedPrecision); - - if (dref && nir_dest_num_components(tex->dest) > 1 && tex->op != nir_texop_tg4) { - SpvId components[4] = { result, result, result, result }; - result = spirv_builder_emit_composite_construct(&ctx->builder, - dest_type, - components, - 4); + if (!bindless_var && (var->data.precision == GLSL_PRECISION_MEDIUM || var->data.precision == GLSL_PRECISION_LOW)) { + spirv_builder_emit_decoration(&ctx->builder, result, + SpvDecorationRelaxedPrecision); } - if (nir_dest_bit_size(tex->dest) != 32) { + if (tex->is_sparse) + result = extract_sparse_load(ctx, result, actual_dest_type, &tex->def); + + if (tex->def.bit_size != 32) { /* convert FP32 to FP16 */ result = emit_unop(ctx, SpvOpFConvert, dest_type, result); } - store_dest(ctx, &tex->dest, result, tex->dest_type); + return result; +} + +static void +emit_tex(struct ntv_context *ctx, nir_tex_instr *tex) +{ + assert(tex->op == nir_texop_tex || + tex->op == nir_texop_txb || + tex->op == nir_texop_txl || + tex->op == nir_texop_txd || + tex->op == nir_texop_txf || + tex->op == nir_texop_txf_ms || + tex->op == nir_texop_txs || + tex->op == nir_texop_lod || + tex->op == nir_texop_tg4 || + tex->op == nir_texop_texture_samples || + tex->op == nir_texop_query_levels); + assert(tex->texture_index == tex->sampler_index || ctx->stage == MESA_SHADER_KERNEL); + + struct spriv_tex_src tex_src = {0}; + unsigned coord_components = 0; + nir_variable *bindless_var = NULL; + nir_variable *var = NULL; + uint32_t texture_index = tex->texture_index; + + get_tex_srcs(ctx, tex, &bindless_var, &coord_components, &tex_src); + find_sampler_and_texture_index(ctx, &tex_src, bindless_var, &var, &texture_index); + + assert(var); + SpvId image_type = find_image_type(ctx, var); + assert(image_type); + + bool is_buffer = glsl_get_sampler_dim(glsl_without_array(var->type)) == + GLSL_SAMPLER_DIM_BUF; + SpvId sampled_type = is_buffer ? image_type : + spirv_builder_type_sampled_image(&ctx->builder, image_type); + assert(sampled_type); + + SpvId sampler_id = tex_src.bindless ? tex_src.bindless : ctx->samplers[texture_index]; + if (tex_src.tex_offset) { + SpvId ptr = spirv_builder_type_pointer(&ctx->builder, SpvStorageClassUniformConstant, sampled_type); + sampler_id = spirv_builder_emit_access_chain(&ctx->builder, ptr, sampler_id, &tex_src.tex_offset, 1); + } + + SpvId load = get_texture_load(ctx, sampler_id, tex, image_type, sampled_type); + + if (tex->is_sparse) + tex->def.num_components--; + SpvId dest_type = get_def_type(ctx, &tex->def, tex->dest_type); + + if (nir_tex_instr_is_query(tex)) + spirv_builder_emit_cap(&ctx->builder, SpvCapabilityImageQuery); + + if (!tex_instr_is_lod_allowed(tex)) + tex_src.lod = 0; + else if (ctx->stage != MESA_SHADER_FRAGMENT && + tex->op == nir_texop_tex && ctx->explicit_lod && !tex_src.lod) + tex_src.lod = emit_float_const(ctx, 32, 0.0); + + if (tex_src.proj && coord_components > 0) + move_tex_proj_to_coord(ctx, coord_components, &tex_src); + + SpvId result = 0; + + switch (tex->op) { + case nir_texop_txs: { + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); + /* Its Dim operand must be one of 1D, 2D, 3D, or Cube + * - OpImageQuerySizeLod specification + * + * Additionally, if its Dim is 1D, 2D, 3D, or Cube, + * it must also have either an MS of 1 or a Sampled of 0 or 2. + * - OpImageQuerySize specification + * + * all spirv samplers use these types + */ + if (!tex_src.lod && tex_instr_is_lod_allowed(tex)) + tex_src.lod = emit_uint_const(ctx, 32, 0); + result = spirv_builder_emit_image_query_size(&ctx->builder, + dest_type, image, + tex_src.lod); + break; + } + case nir_texop_query_levels: { + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); + result = spirv_builder_emit_image_query_levels(&ctx->builder, + dest_type, image); + break; + } + case nir_texop_texture_samples: { + SpvId image = get_tex_image_to_load(ctx, image_type, is_buffer, load); + result = spirv_builder_emit_unop(&ctx->builder, SpvOpImageQuerySamples, + dest_type, image); + break; + } + case nir_texop_lod: { + result = spirv_builder_emit_image_query_lod(&ctx->builder, + dest_type, load, + tex_src.coord); + break; + } + default: + result = emit_tex_readop(ctx, bindless_var, load, &tex_src, + dest_type, is_buffer, var, image_type, tex); + break; + } + + store_def(ctx, tex->def.index, result, tex->dest_type); + + if (tex->is_sparse) + tex->def.num_components++; } static void @@ -3356,7 +3978,7 @@ emit_deref_var(struct ntv_context *ctx, nir_deref_instr *deref) struct hash_entry *he = _mesa_hash_table_search(ctx->vars, deref->var); assert(he); SpvId result = (SpvId)(intptr_t)he->data; - store_dest_raw(ctx, &deref->dest, result); + store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type)); } static void @@ -3365,21 +3987,63 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref) assert(deref->deref_type == nir_deref_type_array); nir_variable *var = nir_deref_instr_get_variable(deref); + if (!nir_src_is_always_uniform(deref->arr.index)) { + if (deref->modes & nir_var_mem_ubo) + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilityUniformBufferArrayDynamicIndexing); + + if (deref->modes & nir_var_mem_ssbo) + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilityStorageBufferArrayDynamicIndexing); + + if (deref->modes & (nir_var_uniform | nir_var_image)) { + const struct glsl_type *type = glsl_without_array(var->type); + assert(glsl_type_is_sampler(type) || glsl_type_is_image(type)); + + if (glsl_type_is_sampler(type)) + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilitySampledImageArrayDynamicIndexing); + else + spirv_builder_emit_cap(&ctx->builder, + SpvCapabilityStorageImageArrayDynamicIndexing); + } + } + SpvStorageClass storage_class = get_storage_class(var); - SpvId base, type; + SpvId type; + nir_alu_type atype = nir_type_uint; + + SpvId base = get_src(ctx, &deref->parent, &atype); + switch (var->data.mode) { + + case nir_var_mem_ubo: + case nir_var_mem_ssbo: + base = get_src(ctx, &deref->parent, &atype); + /* this is either the array<buffers> deref or the array<uint> deref */ + if (glsl_type_is_struct_or_ifc(deref->type)) { + /* array<buffers> */ + type = get_bo_struct_type(ctx, var); + break; + } + /* array<uint> */ + FALLTHROUGH; + case nir_var_function_temp: case nir_var_shader_in: case nir_var_shader_out: - base = get_src(ctx, &deref->parent); + base = get_src(ctx, &deref->parent, &atype); type = get_glsl_type(ctx, deref->type); break; - case nir_var_uniform: { - assert(glsl_type_is_image(glsl_without_array(var->type))); + case nir_var_uniform: + case nir_var_image: { struct hash_entry *he = _mesa_hash_table_search(ctx->vars, var); assert(he); base = (SpvId)(intptr_t)he->data; - type = ctx->image_types[var->data.driver_location]; + const struct glsl_type *gtype = glsl_without_array(var->type); + type = get_image_type(ctx, var, + glsl_type_is_sampler(gtype), + glsl_get_sampler_dim(gtype) == GLSL_SAMPLER_DIM_BUF); break; } @@ -3387,7 +4051,30 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref) unreachable("Unsupported nir_variable_mode\n"); } - SpvId index = get_src(ctx, &deref->arr.index); + nir_alu_type itype; + SpvId index = get_src(ctx, &deref->arr.index, &itype); + if (itype == nir_type_float) + index = emit_bitcast(ctx, get_uvec_type(ctx, 32, 1), index); + + if (var->data.mode == nir_var_uniform || var->data.mode == nir_var_image) { + nir_deref_instr *aoa_deref = nir_src_as_deref(deref->parent); + uint32_t inner_stride = glsl_array_size(aoa_deref->type); + + while (aoa_deref->deref_type != nir_deref_type_var) { + assert(aoa_deref->deref_type == nir_deref_type_array); + + SpvId aoa_index = get_src(ctx, &aoa_deref->arr.index, &itype); + if (itype == nir_type_float) + aoa_index = emit_bitcast(ctx, get_uvec_type(ctx, 32, 1), aoa_index); + + aoa_deref = nir_src_as_deref(aoa_deref->parent); + + uint32_t stride = glsl_get_aoa_size(aoa_deref->type) / inner_stride; + aoa_index = emit_binop(ctx, SpvOpIMul, get_uvec_type(ctx, 32, 1), aoa_index, + emit_uint_const(ctx, 32, stride)); + index = emit_binop(ctx, SpvOpIAdd, get_uvec_type(ctx, 32, 1), index, aoa_index); + } + } SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, storage_class, @@ -3398,14 +4085,7 @@ emit_deref_array(struct ntv_context *ctx, nir_deref_instr *deref) base, &index, 1); /* uint is a bit of a lie here, it's really just an opaque type */ - store_dest(ctx, &deref->dest, result, nir_type_uint); - - /* image ops always need to be able to get the variable to check out sampler types and such */ - if (glsl_type_is_image(glsl_without_array(var->type))) { - uint32_t *key = ralloc_size(ctx->mem_ctx, sizeof(uint32_t)); - *key = result; - _mesa_hash_table_insert(ctx->image_vars, key, var); - } + store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type)); } static void @@ -3417,17 +4097,21 @@ emit_deref_struct(struct ntv_context *ctx, nir_deref_instr *deref) SpvStorageClass storage_class = get_storage_class(var); SpvId index = emit_uint_const(ctx, 32, deref->strct.index); + SpvId type = (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) ? + get_bo_array_type(ctx, var) : + get_glsl_type(ctx, deref->type); SpvId ptr_type = spirv_builder_type_pointer(&ctx->builder, storage_class, - get_glsl_type(ctx, deref->type)); + type); + nir_alu_type atype; SpvId result = spirv_builder_emit_access_chain(&ctx->builder, ptr_type, - get_src(ctx, &deref->parent), + get_src(ctx, &deref->parent, &atype), &index, 1); /* uint is a bit of a lie here, it's really just an opaque type */ - store_dest(ctx, &deref->dest, result, nir_type_uint); + store_def(ctx, deref->def.index, result, get_nir_alu_type(deref->type)); } static void @@ -3466,8 +4150,8 @@ emit_block(struct ntv_context *ctx, struct nir_block *block) case nir_instr_type_load_const: emit_load_const(ctx, nir_instr_as_load_const(instr)); break; - case nir_instr_type_ssa_undef: - emit_undef(ctx, nir_instr_as_ssa_undef(instr)); + case nir_instr_type_undef: + emit_undef(ctx, nir_instr_as_undef(instr)); break; case nir_instr_type_tex: emit_tex(ctx, nir_instr_as_tex(instr)); @@ -3498,7 +4182,8 @@ static SpvId get_src_bool(struct ntv_context *ctx, nir_src *src) { assert(nir_src_bit_size(*src) == 1); - return get_src(ctx, src); + nir_alu_type atype; + return get_src(ctx, src, &atype); } static void @@ -3538,6 +4223,7 @@ emit_if(struct ntv_context *ctx, nir_if *if_stmt) static void emit_loop(struct ntv_context *ctx, nir_loop *loop) { + assert(!nir_loop_has_continue_construct(loop)); SpvId header_id = spirv_builder_new_id(&ctx->builder); SpvId begin_id = block_label(ctx, nir_loop_first_block(loop)); SpvId break_id = spirv_builder_new_id(&ctx->builder); @@ -3592,35 +4278,33 @@ emit_cf_list(struct ntv_context *ctx, struct exec_list *list) } static SpvExecutionMode -get_input_prim_type_mode(uint16_t type) +get_input_prim_type_mode(enum mesa_prim type) { switch (type) { - case GL_POINTS: + case MESA_PRIM_POINTS: return SpvExecutionModeInputPoints; - case GL_LINES: - case GL_LINE_LOOP: - case GL_LINE_STRIP: + case MESA_PRIM_LINES: + case MESA_PRIM_LINE_LOOP: + case MESA_PRIM_LINE_STRIP: return SpvExecutionModeInputLines; - case GL_TRIANGLE_STRIP: - case GL_TRIANGLES: - case GL_TRIANGLE_FAN: + case MESA_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLE_FAN: return SpvExecutionModeTriangles; - case GL_QUADS: - case GL_QUAD_STRIP: + case MESA_PRIM_QUADS: + case MESA_PRIM_QUAD_STRIP: return SpvExecutionModeQuads; break; - case GL_POLYGON: + case MESA_PRIM_POLYGON: unreachable("handle polygons in gs"); break; - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_LINE_STRIP_ADJACENCY: return SpvExecutionModeInputLinesAdjacency; - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: + case MESA_PRIM_TRIANGLES_ADJACENCY: + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: return SpvExecutionModeInputTrianglesAdjacency; break; - case GL_ISOLINES: - return SpvExecutionModeIsolines; default: debug_printf("unknown geometry shader input mode %u\n", type); unreachable("error!"); @@ -3630,38 +4314,36 @@ get_input_prim_type_mode(uint16_t type) return 0; } static SpvExecutionMode -get_output_prim_type_mode(uint16_t type) +get_output_prim_type_mode(enum mesa_prim type) { switch (type) { - case GL_POINTS: + case MESA_PRIM_POINTS: return SpvExecutionModeOutputPoints; - case GL_LINES: - case GL_LINE_LOOP: - unreachable("GL_LINES/LINE_LOOP passed as gs output"); + case MESA_PRIM_LINES: + case MESA_PRIM_LINE_LOOP: + unreachable("MESA_PRIM_LINES/LINE_LOOP passed as gs output"); break; - case GL_LINE_STRIP: + case MESA_PRIM_LINE_STRIP: return SpvExecutionModeOutputLineStrip; - case GL_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLE_STRIP: return SpvExecutionModeOutputTriangleStrip; - case GL_TRIANGLES: - case GL_TRIANGLE_FAN: //FIXME: not sure if right for output + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_TRIANGLE_FAN: //FIXME: not sure if right for output return SpvExecutionModeTriangles; - case GL_QUADS: - case GL_QUAD_STRIP: + case MESA_PRIM_QUADS: + case MESA_PRIM_QUAD_STRIP: return SpvExecutionModeQuads; - case GL_POLYGON: + case MESA_PRIM_POLYGON: unreachable("handle polygons in gs"); break; - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_LINE_STRIP_ADJACENCY: unreachable("handle line adjacency in gs"); break; - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: + case MESA_PRIM_TRIANGLES_ADJACENCY: + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: unreachable("handle triangle adjacency in gs"); break; - case GL_ISOLINES: - return SpvExecutionModeIsolines; default: debug_printf("unknown geometry shader output mode %u\n", type); unreachable("error!"); @@ -3690,12 +4372,12 @@ get_depth_layout_mode(enum gl_frag_depth_layout depth_layout) } static SpvExecutionMode -get_primitive_mode(uint16_t primitive_mode) +get_primitive_mode(enum tess_primitive_mode primitive_mode) { switch (primitive_mode) { - case GL_TRIANGLES: return SpvExecutionModeTriangles; - case GL_QUADS: return SpvExecutionModeQuads; - case GL_ISOLINES: return SpvExecutionModeIsolines; + case TESS_PRIMITIVE_TRIANGLES: return SpvExecutionModeTriangles; + case TESS_PRIMITIVE_QUADS: return SpvExecutionModeQuads; + case TESS_PRIMITIVE_ISOLINES: return SpvExecutionModeIsolines; default: unreachable("unknown tess prim type!"); } @@ -3717,39 +4399,43 @@ get_spacing(enum gl_tess_spacing spacing) } struct spirv_shader * -nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_version) +nir_to_spirv(struct nir_shader *s, const struct zink_shader_info *sinfo, uint32_t spirv_version) { struct spirv_shader *ret = NULL; struct ntv_context ctx = {0}; ctx.mem_ctx = ralloc_context(NULL); + ctx.nir = s; ctx.builder.mem_ctx = ctx.mem_ctx; assert(spirv_version >= SPIRV_VERSION(1, 0)); ctx.spirv_1_4_interfaces = spirv_version >= SPIRV_VERSION(1, 4); + ctx.bindless_set_idx = sinfo->bindless_set_idx; ctx.glsl_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); - if (!ctx.glsl_types) + ctx.bo_array_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); + ctx.bo_struct_types = _mesa_pointer_hash_table_create(ctx.mem_ctx); + if (!ctx.glsl_types || !ctx.bo_array_types || !ctx.bo_struct_types || + !_mesa_hash_table_init(&ctx.image_types, ctx.mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal)) goto fail; spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShader); - if (s->info.image_buffers != 0) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageBuffer); - spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampledBuffer); switch (s->info.stage) { case MESA_SHADER_FRAGMENT: - if (s->info.fs.post_depth_coverage && - BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN)) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleMaskPostDepthCoverage); if (s->info.fs.uses_sample_shading) spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleRateShading); + if (s->info.fs.uses_demote && spirv_version < SPIRV_VERSION(1, 6)) + spirv_builder_emit_extension(&ctx.builder, + "SPV_EXT_demote_to_helper_invocation"); break; case MESA_SHADER_VERTEX: if (BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID) || + BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_DRAW_ID) || BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE) || BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX)) { - spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_draw_parameters"); + if (spirv_version < SPIRV_VERSION(1, 3)) + spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_draw_parameters"); spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDrawParameters); } break; @@ -3783,9 +4469,16 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShaderViewportIndexLayerEXT); } } + } else if (s->info.stage == MESA_SHADER_FRAGMENT) { + /* incredibly, this is legal and intended. + * https://github.com/KhronosGroup/SPIRV-Registry/issues/95 + */ + if (s->info.inputs_read & (BITFIELD64_BIT(VARYING_SLOT_LAYER) | + BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID))) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityGeometry); } - if (s->info.num_ssbos) + if (s->info.num_ssbos && spirv_version < SPIRV_VERSION(1, 1)) spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_storage_buffer_storage_class"); if (s->info.stage < MESA_SHADER_FRAGMENT && @@ -3796,46 +4489,40 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_cap(&ctx.builder, SpvCapabilityMultiViewport); } - if (s->info.num_textures) { - spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampled1D); - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageQuery); - } - - if (s->info.num_images) { - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImage1D); - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityImageQuery); + if (s->info.stage > MESA_SHADER_VERTEX && + s->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)) { + if (s->info.stage < MESA_SHADER_GEOMETRY) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityShaderViewportIndex); + else + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityMultiViewport); } - if (s->info.bit_sizes_int & 8) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt8); - if (s->info.bit_sizes_int & 16) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt16); - if (s->info.bit_sizes_int & 64) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityInt64); - - if (s->info.bit_sizes_float & 16) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityFloat16); - if (s->info.bit_sizes_float & 64) - spirv_builder_emit_cap(&ctx.builder, SpvCapabilityFloat64); - ctx.stage = s->info.stage; - ctx.so_info = so_info; + ctx.sinfo = sinfo; ctx.GLSL_std_450 = spirv_builder_import(&ctx.builder, "GLSL.std.450"); + ctx.explicit_lod = true; spirv_builder_emit_source(&ctx.builder, SpvSourceLanguageUnknown, 0); - if (s->info.stage == MESA_SHADER_COMPUTE) { - SpvAddressingModel model; + SpvAddressingModel model = SpvAddressingModelLogical; + if (gl_shader_stage_is_compute(s->info.stage)) { if (s->info.cs.ptr_size == 32) model = SpvAddressingModelPhysical32; - else if (s->info.cs.ptr_size == 64) - model = SpvAddressingModelPhysical64; - else + else if (s->info.cs.ptr_size == 64) { + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityPhysicalStorageBufferAddresses); + model = SpvAddressingModelPhysicalStorageBuffer64; + } else model = SpvAddressingModelLogical; + } + + if (ctx.sinfo->have_vulkan_memory_model) { + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityVulkanMemoryModel); + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityVulkanMemoryModelDeviceScope); + spirv_builder_emit_mem_model(&ctx.builder, model, + SpvMemoryModelVulkan); + } else { spirv_builder_emit_mem_model(&ctx.builder, model, SpvMemoryModelGLSL450); - } else - spirv_builder_emit_mem_model(&ctx.builder, SpvAddressingModelLogical, - SpvMemoryModelGLSL450); + } if (s->info.stage == MESA_SHADER_FRAGMENT && s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) { @@ -3861,6 +4548,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t exec_model = SpvExecutionModelFragment; break; case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: exec_model = SpvExecutionModelGLCompute; break; default: @@ -3868,46 +4556,143 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t } SpvId type_void = spirv_builder_type_void(&ctx.builder); - SpvId type_main = spirv_builder_type_function(&ctx.builder, type_void, - NULL, 0); + SpvId type_void_func = spirv_builder_type_function(&ctx.builder, type_void, + NULL, 0); SpvId entry_point = spirv_builder_new_id(&ctx.builder); spirv_builder_emit_name(&ctx.builder, entry_point, "main"); ctx.vars = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); - ctx.image_vars = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32, - _mesa_key_u32_equal); - - ctx.so_outputs = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32, - _mesa_key_u32_equal); - nir_foreach_variable_with_modes(var, s, nir_var_mem_push_const) input_var_init(&ctx, var); nir_foreach_shader_in_variable(var, s) emit_input(&ctx, var); - int max_output = -1; + int max_output = 0; nir_foreach_shader_out_variable(var, s) { /* ignore SPIR-V built-ins, tagged with a sentinel value */ if (var->data.driver_location != UINT_MAX) { assert(var->data.driver_location < INT_MAX); - max_output = MAX2(max_output, (int)var->data.driver_location); + unsigned extent = glsl_count_attribute_slots(var->type, false); + max_output = MAX2(max_output, (int)var->data.driver_location + extent); } emit_output(&ctx, var); } + uint32_t tcs_vertices_out_word = 0; + + unsigned ubo_counter[2] = {0}; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) + ubo_counter[var->data.driver_location != 0]++; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ubo) + emit_bo(&ctx, var, ubo_counter[var->data.driver_location != 0] > 1); + + unsigned ssbo_counter = 0; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo) + ssbo_counter++; + nir_foreach_variable_with_modes(var, s, nir_var_mem_ssbo) + emit_bo(&ctx, var, ssbo_counter > 1); + + nir_foreach_variable_with_modes(var, s, nir_var_image) + ctx.image_var[var->data.driver_location] = var; + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(var->type))) { + if (var->data.descriptor_set == ctx.bindless_set_idx) + ctx.bindless_sampler_var[var->data.driver_location] = var; + else + ctx.sampler_var[var->data.driver_location] = var; + ctx.last_sampler = MAX2(ctx.last_sampler, var->data.driver_location); + } + } + if (sinfo->sampler_mask) { + assert(s->info.stage == MESA_SHADER_KERNEL); + int desc_set = -1; + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + if (glsl_type_is_sampler(glsl_without_array(var->type))) { + desc_set = var->data.descriptor_set; + break; + } + } + assert(desc_set != -1); + u_foreach_bit(sampler, sinfo->sampler_mask) + emit_sampler(&ctx, sampler, desc_set); + } + nir_foreach_variable_with_modes(var, s, nir_var_image | nir_var_uniform) { + const struct glsl_type *type = glsl_without_array(var->type); + if (glsl_type_is_sampler(type)) + emit_image(&ctx, var, get_bare_image_type(&ctx, var, true)); + else if (glsl_type_is_image(type)) + emit_image(&ctx, var, get_bare_image_type(&ctx, var, false)); + } + + if (sinfo->float_controls.flush_denorms) { + unsigned execution_mode = s->info.float_controls_execution_mode; + bool flush_16_bit = nir_is_denorm_flush_to_zero(execution_mode, 16); + bool flush_32_bit = nir_is_denorm_flush_to_zero(execution_mode, 32); + bool flush_64_bit = nir_is_denorm_flush_to_zero(execution_mode, 64); + bool preserve_16_bit = nir_is_denorm_preserve(execution_mode, 16); + bool preserve_32_bit = nir_is_denorm_preserve(execution_mode, 32); + bool preserve_64_bit = nir_is_denorm_preserve(execution_mode, 64); + bool emit_cap_flush = false; + bool emit_cap_preserve = false; + + if (!sinfo->float_controls.denorms_all_independence) { + bool flush = flush_16_bit && flush_64_bit; + bool preserve = preserve_16_bit && preserve_64_bit; + + if (!sinfo->float_controls.denorms_32_bit_independence) { + flush = flush && flush_32_bit; + preserve = preserve && preserve_32_bit; + + flush_32_bit = flush; + preserve_32_bit = preserve; + } + + flush_16_bit = flush; + flush_64_bit = flush; + preserve_16_bit = preserve; + preserve_64_bit = preserve; + } + + if (flush_16_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(0)) { + emit_cap_flush = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormFlushToZero, 16); + } + if (flush_32_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(1)) { + emit_cap_flush = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormFlushToZero, 32); + } + if (flush_64_bit && sinfo->float_controls.flush_denorms & BITFIELD_BIT(2)) { + emit_cap_flush = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormFlushToZero, 64); + } - if (so_info) - emit_so_info(&ctx, so_info, max_output + 1); + if (preserve_16_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(0)) { + emit_cap_preserve = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormPreserve, 16); + } + if (preserve_32_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(1)) { + emit_cap_preserve = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormPreserve, 32); + } + if (preserve_64_bit && sinfo->float_controls.preserve_denorms & BITFIELD_BIT(2)) { + emit_cap_preserve = true; + spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeDenormPreserve, 64); + } - /* we have to reverse iterate to match what's done in zink_compiler.c */ - foreach_list_typed_reverse(nir_variable, var, node, &s->variables) - if (_nir_shader_variable_has_mode(var, nir_var_uniform | - nir_var_mem_ubo | - nir_var_mem_ssbo)) - emit_uniform(&ctx, var); + if (emit_cap_flush) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDenormFlushToZero); + if (emit_cap_preserve) + spirv_builder_emit_cap(&ctx.builder, SpvCapabilityDenormPreserve); + } switch (s->info.stage) { case MESA_SHADER_FRAGMENT: @@ -3924,6 +4709,7 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t SpvExecutionModeEarlyFragmentTests); if (s->info.fs.post_depth_coverage) { spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_post_depth_coverage"); + spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySampleMaskPostDepthCoverage); spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModePostDepthCoverage); } @@ -3945,13 +4731,13 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModeSampleInterlockUnorderedEXT); break; case MESA_SHADER_TESS_CTRL: - spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, - SpvExecutionModeOutputVertices, - s->info.tess.tcs_vertices_out); + tcs_vertices_out_word = spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, + SpvExecutionModeOutputVertices, + s->info.tess.tcs_vertices_out); break; case MESA_SHADER_TESS_EVAL: spirv_builder_emit_exec_mode(&ctx.builder, entry_point, - get_primitive_mode(s->info.tess.primitive_mode)); + get_primitive_mode(s->info.tess._primitive_mode)); spirv_builder_emit_exec_mode(&ctx.builder, entry_point, s->info.tess.ccw ? SpvExecutionModeVertexOrderCcw : SpvExecutionModeVertexOrderCw); @@ -3970,12 +4756,10 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t s->info.gs.invocations); spirv_builder_emit_exec_mode_literal(&ctx.builder, entry_point, SpvExecutionModeOutputVertices, - s->info.gs.vertices_out); + MAX2(s->info.gs.vertices_out, 1)); break; + case MESA_SHADER_KERNEL: case MESA_SHADER_COMPUTE: - if (s->info.shared_size) - create_shared_block(&ctx, s->info.shared_size); - if (s->info.workgroup_size[0] || s->info.workgroup_size[1] || s->info.workgroup_size[2]) spirv_builder_emit_exec_mode_literal3(&ctx.builder, entry_point, SpvExecutionModeLocalSize, (uint32_t[3]){(uint32_t)s->info.workgroup_size[0], (uint32_t)s->info.workgroup_size[1], @@ -3990,9 +4774,31 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_name(&ctx.builder, sizes[i], names[i]); } SpvId var_type = get_uvec_type(&ctx, 32, 3); + // Even when using LocalSizeId this need to be initialized for nir_intrinsic_load_workgroup_size ctx.local_group_size_var = spirv_builder_spec_const_composite(&ctx.builder, var_type, sizes, 3); - spirv_builder_emit_name(&ctx.builder, ctx.local_group_size_var, "gl_LocalGroupSize"); - spirv_builder_emit_builtin(&ctx.builder, ctx.local_group_size_var, SpvBuiltInWorkgroupSize); + spirv_builder_emit_name(&ctx.builder, ctx.local_group_size_var, "gl_LocalGroupSizeARB"); + + /* WorkgroupSize is deprecated in SPIR-V 1.6 */ + if (spirv_version >= SPIRV_VERSION(1, 6)) { + spirv_builder_emit_exec_mode_id3(&ctx.builder, entry_point, + SpvExecutionModeLocalSizeId, + sizes); + } else { + spirv_builder_emit_builtin(&ctx.builder, ctx.local_group_size_var, SpvBuiltInWorkgroupSize); + } + } + if (s->info.cs.has_variable_shared_mem) { + ctx.shared_mem_size = spirv_builder_spec_const_uint(&ctx.builder, 32); + spirv_builder_emit_specid(&ctx.builder, ctx.shared_mem_size, ZINK_VARIABLE_SHARED_MEM); + spirv_builder_emit_name(&ctx.builder, ctx.shared_mem_size, "variable_shared_mem"); + } + if (s->info.cs.derivative_group) { + SpvCapability caps[] = { 0, SpvCapabilityComputeDerivativeGroupQuadsNV, SpvCapabilityComputeDerivativeGroupLinearNV }; + SpvExecutionMode modes[] = { 0, SpvExecutionModeDerivativeGroupQuadsNV, SpvExecutionModeDerivativeGroupLinearNV }; + spirv_builder_emit_extension(&ctx.builder, "SPV_NV_compute_shader_derivatives"); + spirv_builder_emit_cap(&ctx.builder, caps[s->info.cs.derivative_group]); + spirv_builder_emit_exec_mode(&ctx.builder, entry_point, modes[s->info.cs.derivative_group]); + ctx.explicit_lod = false; } break; default: @@ -4002,31 +4808,55 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySubgroupBallotKHR); spirv_builder_emit_extension(&ctx.builder, "SPV_KHR_shader_ballot"); } - if (s->info.has_transform_feedback_varyings) { + if (s->info.has_transform_feedback_varyings && s->info.stage != MESA_SHADER_FRAGMENT) { spirv_builder_emit_cap(&ctx.builder, SpvCapabilityTransformFeedback); spirv_builder_emit_exec_mode(&ctx.builder, entry_point, SpvExecutionModeXfb); } + + if (s->info.stage == MESA_SHADER_FRAGMENT && s->info.fs.uses_discard) { + ctx.discard_func = spirv_builder_new_id(&ctx.builder); + spirv_builder_emit_name(&ctx.builder, ctx.discard_func, "discard"); + spirv_builder_function(&ctx.builder, ctx.discard_func, type_void, + SpvFunctionControlMaskNone, + type_void_func); + SpvId label = spirv_builder_new_id(&ctx.builder); + spirv_builder_label(&ctx.builder, label); + + /* kill is deprecated in SPIR-V 1.6, use terminate instead */ + if (spirv_version >= SPIRV_VERSION(1, 6)) + spirv_builder_emit_terminate(&ctx.builder); + else + spirv_builder_emit_kill(&ctx.builder); + + spirv_builder_function_end(&ctx.builder); + } + spirv_builder_function(&ctx.builder, entry_point, type_void, - SpvFunctionControlMaskNone, - type_main); + SpvFunctionControlMaskNone, + type_void_func); nir_function_impl *entry = nir_shader_get_entrypoint(s); nir_metadata_require(entry, nir_metadata_block_index); - ctx.defs = ralloc_array_size(ctx.mem_ctx, - sizeof(SpvId), entry->ssa_alloc); - if (!ctx.defs) + ctx.defs = rzalloc_array_size(ctx.mem_ctx, + sizeof(SpvId), entry->ssa_alloc); + ctx.def_types = ralloc_array_size(ctx.mem_ctx, + sizeof(nir_alu_type), entry->ssa_alloc); + if (!ctx.defs || !ctx.def_types) goto fail; + if (sinfo->have_sparse) { + spirv_builder_emit_cap(&ctx.builder, SpvCapabilitySparseResidency); + /* this could be huge, so only alloc if needed since it's extremely unlikely to + * ever be used by anything except cts + */ + ctx.resident_defs = rzalloc_array_size(ctx.mem_ctx, + sizeof(SpvId), entry->ssa_alloc); + if (!ctx.resident_defs) + goto fail; + } ctx.num_defs = entry->ssa_alloc; - nir_index_local_regs(entry); - ctx.regs = ralloc_array_size(ctx.mem_ctx, - sizeof(SpvId), entry->reg_alloc); - if (!ctx.regs) - goto fail; - ctx.num_regs = entry->reg_alloc; - SpvId *block_ids = ralloc_array_size(ctx.mem_ctx, sizeof(SpvId), entry->num_blocks); if (!block_ids) @@ -4040,22 +4870,21 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t /* emit a block only for the variable declarations */ start_block(&ctx, spirv_builder_new_id(&ctx.builder)); - foreach_list_typed(nir_register, reg, node, &entry->registers) { - SpvId type = get_vec_from_bit_size(&ctx, reg->bit_size, reg->num_components); - SpvId pointer_type = spirv_builder_type_pointer(&ctx.builder, - SpvStorageClassFunction, - type); - SpvId var = spirv_builder_emit_var(&ctx.builder, pointer_type, - SpvStorageClassFunction); + spirv_builder_begin_local_vars(&ctx.builder); - ctx.regs[reg->index] = var; + nir_foreach_reg_decl(reg, entry) { + if (nir_intrinsic_bit_size(reg) == 1) + init_reg(&ctx, reg, nir_type_bool); } - emit_cf_list(&ctx, &entry->body); + nir_foreach_variable_with_modes(var, s, nir_var_shader_temp) + emit_shader_temp(&ctx, var); - /* vertex/tess shader emits copied xfb outputs at the end of the shader */ - if (so_info && (ctx.stage == MESA_SHADER_VERTEX || ctx.stage == MESA_SHADER_TESS_EVAL)) - emit_so_outputs(&ctx, so_info); + nir_foreach_function_temp_variable(var, entry) + emit_temp(&ctx, var); + + + emit_cf_list(&ctx, &entry->body); spirv_builder_return(&ctx.builder); // doesn't belong here, but whatevz spirv_builder_function_end(&ctx.builder); @@ -4074,7 +4903,8 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info, uint32_t if (!ret->words) goto fail; - ret->num_words = spirv_builder_get_words(&ctx.builder, ret->words, num_words, spirv_version); + ret->num_words = spirv_builder_get_words(&ctx.builder, ret->words, num_words, spirv_version, &tcs_vertices_out_word); + ret->tcs_vertices_out_word = tcs_vertices_out_word; assert(ret->num_words == num_words); ralloc_free(ctx.mem_ctx); |