/*
 * Copyright © 2019 Raspberry Pi
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "vk_util.h"

#include "v3dv_debug.h"
#include "v3dv_private.h"

#include "vk_format_info.h"

#include "common/v3d_debug.h"

#include "vulkan/util/vk_format.h"

VkResult
v3dv_CreateShaderModule(VkDevice _device,
                        const VkShaderModuleCreateInfo *pCreateInfo,
                        const VkAllocationCallbacks *pAllocator,
                        VkShaderModule *pShaderModule)
{
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
   struct v3dv_shader_module *module;

   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO);
   assert(pCreateInfo->flags == 0);

   module = vk_alloc2(&device->alloc, pAllocator,
                       sizeof(*module) + pCreateInfo->codeSize, 8,
                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (module == NULL)
      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);

   module->size = pCreateInfo->codeSize;
   memcpy(module->data, pCreateInfo->pCode, module->size);

   _mesa_sha1_compute(module->data, module->size, module->sha1);

   *pShaderModule = v3dv_shader_module_to_handle(module);

   return VK_SUCCESS;
}

void
v3dv_DestroyShaderModule(VkDevice _device,
                         VkShaderModule _module,
                         const VkAllocationCallbacks *pAllocator)
{
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
   V3DV_FROM_HANDLE(v3dv_shader_module, module, _module);

   if (!module)
      return;

   vk_free2(&device->alloc, pAllocator, module);
}

static void
destroy_pipeline_stage(struct v3dv_device *device,
                       struct v3dv_pipeline_stage *p_stage,
                       const VkAllocationCallbacks *pAllocator)
{
   v3dv_bo_free(device, p_stage->assembly_bo);

   vk_free2(&device->alloc, pAllocator, p_stage);
}

void
v3dv_DestroyPipeline(VkDevice _device,
                     VkPipeline _pipeline,
                     const VkAllocationCallbacks *pAllocator)
{
   V3DV_FROM_HANDLE(v3dv_device, device, _device);
   V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);

   if (!pipeline)
      return;

   /* FIXME: we can't just use a loop over mesa stage due the bin, would be
    * good to find an alternative.
    */
   destroy_pipeline_stage(device, pipeline->vs, pAllocator);
   destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);
   destroy_pipeline_stage(device, pipeline->fs, pAllocator);

   vk_free2(&device->alloc, pAllocator, pipeline);
}

static const struct spirv_to_nir_options default_spirv_options =  {
   .caps = { false },
   .ubo_addr_format = nir_address_format_32bit_index_offset,
   .ssbo_addr_format = nir_address_format_32bit_index_offset,
   .phys_ssbo_addr_format = nir_address_format_64bit_global,
   .push_const_addr_format = nir_address_format_logical,
   .shared_addr_format = nir_address_format_32bit_offset,
   .frag_coord_is_sysval = false,
};

const nir_shader_compiler_options v3dv_nir_options = {
   .lower_all_io_to_temps = true,
   .lower_extract_byte = true,
   .lower_extract_word = true,
   .lower_bitfield_insert_to_shifts = true,
   .lower_bitfield_extract_to_shifts = true,
   .lower_bitfield_reverse = true,
   .lower_bit_count = true,
   .lower_cs_local_id_from_index = true,
   .lower_ffract = true,
   .lower_fmod = true,
   .lower_pack_unorm_2x16 = true,
   .lower_pack_snorm_2x16 = true,
   .lower_pack_unorm_4x8 = true,
   .lower_pack_snorm_4x8 = true,
   .lower_unpack_unorm_4x8 = true,
   .lower_unpack_snorm_4x8 = true,
   .lower_pack_half_2x16 = true,
   .lower_unpack_half_2x16 = true,
   .lower_fdiv = true,
   .lower_find_lsb = true,
   .lower_ffma16 = true,
   .lower_ffma32 = true,
   .lower_ffma64 = true,
   .lower_flrp32 = true,
   .lower_fpow = true,
   .lower_fsat = true,
   .lower_fsqrt = true,
   .lower_ifind_msb = true,
   .lower_isign = true,
   .lower_ldexp = true,
   .lower_mul_high = true,
   .lower_wpos_pntc = true,
   .lower_rotate = true,
   .lower_to_scalar = true,
   .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic
                                   * needs to be supported */
};

static nir_shader *
shader_module_compile_to_nir(struct v3dv_device *device,
                             struct v3dv_pipeline_stage *stage)
{
   nir_shader *nir;
   const nir_shader_compiler_options *nir_options = &v3dv_nir_options;

   uint32_t *spirv = (uint32_t *) stage->module->data;
   assert(stage->module->size % 4 == 0);

   if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)
      v3dv_print_spirv(stage->module->data, stage->module->size, stderr);

   uint32_t num_spec_entries = 0;
   struct nir_spirv_specialization *spec_entries = NULL;

   const struct spirv_to_nir_options spirv_options = default_spirv_options;
   nir = spirv_to_nir(spirv, stage->module->size / 4,
                      spec_entries, num_spec_entries,
                      stage->stage, stage->entrypoint,
                      &spirv_options, nir_options);
   assert(nir->info.stage == stage->stage);
   nir_validate_shader(nir, "after spirv_to_nir");

   free(spec_entries);

   /* We have to lower away local variable initializers right before we
    * inline functions.  That way they get properly initialized at the top
    * of the function and not at the top of its caller.
    */
   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
   NIR_PASS_V(nir, nir_lower_returns);
   NIR_PASS_V(nir, nir_inline_functions);
   NIR_PASS_V(nir, nir_opt_deref);

   /* Pick off the single entrypoint that we want */
   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
      if (func->is_entrypoint)
         func->name = ralloc_strdup(func, "main");
      else
         exec_node_remove(&func->node);
   }
   assert(exec_list_length(&nir->functions) == 1);

   /* Make sure we lower variable initializers on output variables so that
    * nir_remove_dead_variables below sees the corresponding stores
    */
   NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);

   /* Now that we've deleted all but the main function, we can go ahead and
    * lower the rest of the variable initializers.
    */
   NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);

   /* Split member structs.  We do this before lower_io_to_temporaries so that
    * it doesn't lower system values to temporaries by accident.
    */
   NIR_PASS_V(nir, nir_split_var_copies);
   NIR_PASS_V(nir, nir_split_per_member_structs);

   /* FIXME: needed? */
   if (nir->info.stage == MESA_SHADER_FRAGMENT)
      NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
      NIR_PASS_V(nir, nir_lower_input_attachments,
                 &(nir_input_attachment_options) {
                    .use_fragcoord_sysval = false,
                       });                                                                                                                                                                                            }

   NIR_PASS_V(nir, nir_remove_dead_variables,
              nir_var_shader_in | nir_var_shader_out |
              nir_var_system_value | nir_var_mem_shared,
              NULL);

   NIR_PASS_V(nir, nir_propagate_invariant);

   NIR_PASS_V(nir, nir_lower_system_values);
   NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);

   /* Vulkan uses the separate-shader linking model */
   nir->info.separate_shader = true;

   return nir;
}

static void
v3dv_nir_lower_fs_inputs(nir_shader *nir)
{
   /* FIXME: stub */
}

static int
type_size_vec4(const struct glsl_type *type, bool bindless)
{
   return glsl_count_attribute_slots(type, false);
}

static void
v3dv_nir_lower_fs_outputs(nir_shader *nir)
{
   NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out, type_size_vec4, 0);
}

static void
shader_debug_output(const char *message, void *data)
{
   /* FIXME: We probably don't want to debug anything extra here, and in fact
    * the compiler is not using this callback too much, only as an alternative
    * way to debug out the shaderdb stats, that you can already get using
    * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d
    * compiler to remove that callback.
    */
}

static void
pipeline_populate_v3d_key(struct v3d_key *key,
                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
                          const struct v3dv_pipeline_stage *p_stage)
{
   /* default value. Would be override on the vs/gs populate methods when GS
    * gets supported
    */
   key->is_last_geometry_stage = true;

   /* Vulkan provides a way to define clip distances, but not clip planes, so
    * we understand that this would be always zero. Probably would need to be
    * revisited based on all the clip related extensions available.
    */
   key->ucp_enables = 0;
}

/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the
 * same. For not using prim_mode that is the one already used on v3d
 */
static const enum pipe_prim_type vk_to_pipe_prim_type[] = {
   [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,
   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,
   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,
   [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,
   [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,
   [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
};

static const enum pipe_logicop vk_to_pipe_logicop[] = {
   [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,
   [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,
   [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,
   [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,
   [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,
   [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,
   [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,
   [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,
   [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,
   [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,
   [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,
   [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,
   [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,
   [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,
   [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,
   [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,
};

static void
pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,
                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
                             const struct v3dv_pipeline_stage *p_stage)
{
   memset(key, 0, sizeof(*key));

   pipeline_populate_v3d_key(&key->base, pCreateInfo, p_stage);

   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
      pCreateInfo->pInputAssemblyState;
   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];

   key->is_points = (topology == PIPE_PRIM_POINTS);
   key->is_lines = (topology >= PIPE_PRIM_LINES &&
                    topology <= PIPE_PRIM_LINE_STRIP);

   /* Vulkan doesn't appear to specify (anv does the same) */
   key->clamp_color = false;

   const VkPipelineColorBlendStateCreateInfo *cb_info =
      pCreateInfo->pColorBlendState;

   key->logicop_func = (cb_info->logicOpEnable == VK_FALSE ?
                        PIPE_LOGICOP_COPY :
                        vk_to_pipe_logicop[cb_info->logicOp]);

   const VkPipelineMultisampleStateCreateInfo *ms_info =
      pCreateInfo->pMultisampleState;

   /* FIXME: msaa not supported yet (although we add some of the code to
    * translate vk sample info in advance)
    */
   key->msaa = false;
   if (key->msaa & (ms_info != NULL)) {
      uint32_t sample_mask = 0xffff;

      if (ms_info->pSampleMask)
         sample_mask = ms_info->pSampleMask[0] & 0xffff;

      key->sample_coverage = (sample_mask != (1 << V3D_MAX_SAMPLES) - 1);
      key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;
      key->sample_alpha_to_one = ms_info->alphaToOneEnable;
   }

   const VkPipelineDepthStencilStateCreateInfo *ds_info =
      pCreateInfo->pDepthStencilState;

   key->depth_enabled = (ds_info == NULL ? false : ds_info->depthTestEnable);

   /* Vulkan doesn't support alpha test */
   key->alpha_test = false;
   key->alpha_test_func = COMPARE_FUNC_NEVER;

   /* FIXME: placeholder. Final value for swap_color_rb depends on the format
    * of the surface to be used.
    */
   key->swap_color_rb = false;

   const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;
   for (uint32_t i = 0; i < subpass->color_count; i++) {
      if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
         continue;

      key->cbufs |= 1 << i;

      /* FIXME: in order to know this we need to access to the color
       * framebuffer. Still not in place. Using default hardcode value.
       */
      VkFormat fb_format = VK_FORMAT_R8G8B8A8_UNORM;
      enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);

      /* If logic operations are enabled then we might emit color reads and we
       * need to know the color buffer format and swizzle for that
       */
      if (key->logicop_func != PIPE_LOGICOP_COPY) {
         key->color_fmt[i].format = fb_pipe_format;
         key->color_fmt[i].swizzle = v3dv_get_format_swizzle(fb_format);
      }

      const struct util_format_description *desc =
         vk_format_description(fb_format);

      if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
          desc->channel[0].size == 32) {
         key->f32_color_rb |= 1 << i;
      }

      if (p_stage->nir->info.fs.untyped_color_outputs) {
         if (util_format_is_pure_uint(fb_pipe_format))
            key->uint_color_rb |= 1 << i;
         else if (util_format_is_pure_sint(fb_pipe_format))
            key->int_color_rb |= 1 << i;
      }

      if (key->is_points) {
         /* FIXME: The mask would need to be computed based on the shader
          * inputs. On gallium it is done at st_atom_rasterizer
          * (sprite_coord_enable). anv seems (need to confirm) to do that on
          * genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also
          * better to have tests to guide filling the mask.
          */
         key->point_sprite_mask = 0;

         /* Vulkan mandates upper left. */
         key->point_coord_upper_left = true;
      }
   }

   /* FIXME: we understand that this is used on GL to configure fixed-function
    * two side lighting support, and not make sense for Vulkan. Need to
    * confirm though.
    */
   key->light_twoside = false;

   /* FIXME: ditto, although for flat lighting. Again, neet to confirm.*/
   key->shade_model_flat = false;
}

static void
pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,
                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
                             const struct v3dv_pipeline_stage *p_stage)
{
   memset(key, 0, sizeof(*key));

   pipeline_populate_v3d_key(&key->base, pCreateInfo, p_stage);

   /* Vulkan doesn't appear to specify (anv does the same) */
   key->clamp_color = false;

   /* Vulkan specifies a point size per vertex, so true for if the prim are
    * points, like on ES2)
    */
   const VkPipelineInputAssemblyStateCreateInfo *ia_info =
      pCreateInfo->pInputAssemblyState;
   uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];

   /* FIXME: not enough to being PRIM_POINTS, on gallium the full check is
    * PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */
   key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);

   key->is_coord = p_stage->is_coord;
   if (p_stage->is_coord) {
      /* The only output varying on coord shaders are for transform
       * feedback. Set to 0 as VK_EXT_transform_feedback is not supported.
       */
      key->num_used_outputs = 0;
   } else {
      struct v3dv_pipeline *pipeline = p_stage->pipeline;
      key->num_used_outputs = pipeline->fs->prog_data.fs->num_inputs;
      STATIC_ASSERT(sizeof(key->used_outputs) ==
                    sizeof(pipeline->fs->prog_data.fs->input_slots));
      memcpy(key->used_outputs, pipeline->fs->prog_data.fs->input_slots,
             sizeof(key->used_outputs));
   }
}

/*
 * Creates the pipeline_stage for the coordinate shader. Initially a clone of
 * the vs pipeline_stage, with is_coord to true;
 */
static struct v3dv_pipeline_stage*
pipeline_stage_create_vs_bin(const struct v3dv_pipeline_stage *src,
                             const VkAllocationCallbacks *alloc)
{
   struct v3dv_device *device = src->pipeline->device;

   struct v3dv_pipeline_stage *p_stage =
      vk_zalloc2(&device->alloc, alloc, sizeof(*p_stage), 8,
                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

   p_stage->pipeline = src->pipeline;
   assert(src->stage == MESA_SHADER_VERTEX);
   p_stage->stage = src->stage;
   p_stage->entrypoint = src->entrypoint;
   p_stage->module = src->module;
   p_stage->nir = src->nir;

   p_stage->is_coord = true;

   return p_stage;
}

/* FIXME: right now this just asks for an bo for the exact size of the qpu
 * assembly. It would be good to be slighly smarter and having one "all
 * shaders" bo per pipeline, so each p_stage would save their offset on
 * such. That is really relevant due the fact that bo are always aligned to
 * 4096, so that would allow to use less memory.
 *
 * For now one-bo per-assembly would work.
 */
static void
upload_assembly(struct v3dv_pipeline_stage *p_stage,
                const void *data,
                uint32_t size)
{
   /* We are uploading the assembly just once, so at this point we shouldn't
    * have any bo
    */
   assert(p_stage->assembly_bo == NULL);
   struct v3dv_device *device = p_stage->pipeline->device;

   struct v3dv_bo *bo = v3dv_bo_alloc(device, size);
   if (!bo) {
      fprintf(stderr, "failed to allocate memory for shader\n");
      abort();
   }

   bool ok = v3dv_bo_map(device, bo, size);
   if (!ok) {
      fprintf(stderr, "failed to map source shader buffer\n");
      abort();
   }

   memcpy(bo->map, data, size);

   v3dv_bo_unmap(device, bo);

   p_stage->assembly_bo = bo;
}

static void
compile_pipeline_stage(struct v3dv_pipeline_stage *p_stage)
{
   struct v3dv_physical_device *physical_device =
      &p_stage->pipeline->device->instance->physicalDevice;
   const struct v3d_compiler *compiler = physical_device->compiler;

   /* We don't support variants (and probably will never support them) */
   int variant_id = 0;

   /* Note that we are assigning program_id slightly differently that
    * v3d. Here we are assigning one per pipeline stage, so vs and vs_bin
    * would have a different program_id, while v3d would have the same for
    * both. For the case of v3dv, it is more natural to have an id this way,
    * as right now we are using it for debugging, not for shader-db.
    */
   p_stage->program_id = physical_device->next_program_id++;

   if (V3D_DEBUG & (V3D_DEBUG_NIR |
                    v3d_debug_flag_for_shader_stage(p_stage->stage))) {
      fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",
              gl_shader_stage_name(p_stage->stage),
              p_stage->program_id);
      nir_print_shader(p_stage->nir, stderr);
      fprintf(stderr, "\n");
   }

   uint64_t *qpu_insts;
   uint32_t qpu_insts_size;

   qpu_insts = v3d_compile(compiler,
                           &p_stage->key.base, &p_stage->prog_data.base,
                           p_stage->nir,
                           shader_debug_output, NULL,
                           p_stage->program_id,
                           variant_id,
                           &qpu_insts_size);

   if (!qpu_insts) {
      fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",
              gl_shader_stage_name(p_stage->stage),
              p_stage->program_id);
   } else {
      upload_assembly(p_stage, qpu_insts, qpu_insts_size);
   }

   free(qpu_insts);
}

static VkResult
pipeline_compile_graphics(struct v3dv_pipeline *pipeline,
                          const VkGraphicsPipelineCreateInfo *pCreateInfo,
                          const VkAllocationCallbacks *alloc)
{
   struct v3dv_pipeline_stage *stages[MESA_SHADER_STAGES] = { };
   struct v3dv_device *device = pipeline->device;

   /* First pass to get the the common info from the shader and the nir
    * shader. We don't care of the coord shader for now.
    */
   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);

      struct v3dv_pipeline_stage *p_stage =
         vk_zalloc2(&device->alloc, alloc, sizeof(*p_stage), 8,
                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);

      p_stage->pipeline = pipeline;
      p_stage->stage = stage;
      if (stage == MESA_SHADER_VERTEX)
         p_stage->is_coord = false;
      p_stage->entrypoint = sinfo->pName;
      p_stage->module = v3dv_shader_module_from_handle(sinfo->module);

      pipeline->active_stages |= sinfo->stage;

      /* FIXME: when cache support is in place, first check if for the given
       * spirv module and options, we already have a nir shader.
       */
      p_stage->nir = shader_module_compile_to_nir(pipeline->device, p_stage);

      stages[stage] = p_stage;
   }


   for (int stage = MESA_SHADER_STAGES - 1; stage >= 0; stage--) {
      if (stages[stage] == NULL || stages[stage]->entrypoint == NULL)
         continue;

      struct v3dv_pipeline_stage *p_stage = stages[stage];

      switch(stage) {
      case MESA_SHADER_VERTEX:
         /* Right now we only support pipelines with both vertex and fragment
          * shader.
          */
         assert(pipeline->fs);

         pipeline->vs = p_stage;

         pipeline->vs_bin = pipeline_stage_create_vs_bin(pipeline->vs, alloc);

         /* FIXME: likely this to be moved to a gather info method to a full
          * struct inside pipeline_stage
          */
         const VkPipelineInputAssemblyStateCreateInfo *ia_info =
            pCreateInfo->pInputAssemblyState;
         pipeline->vs->topology = vk_to_pipe_prim_type[ia_info->topology];

         /* Note that at this point we would compile twice, one for vs and
          * other for vs_bin. For now we are maintaining two pipeline_stage
          * and two keys. Eventually we could reuse the key.
          */
         pipeline_populate_v3d_vs_key(&pipeline->vs->key.vs, pCreateInfo, pipeline->vs);
         pipeline_populate_v3d_vs_key(&pipeline->vs_bin->key.vs, pCreateInfo, pipeline->vs_bin);

         compile_pipeline_stage(pipeline->vs);
         compile_pipeline_stage(pipeline->vs_bin);
         break;
      case MESA_SHADER_FRAGMENT:
         pipeline->fs = p_stage;

         pipeline_populate_v3d_fs_key(&p_stage->key.fs, pCreateInfo,
                             p_stage);

         /* FIXME: create a per-build method with all the lowering
          * needed. perhaps move to shader_compile_module_to_nir? */
         v3dv_nir_lower_fs_inputs(p_stage->nir);
         v3dv_nir_lower_fs_outputs(p_stage->nir);

         compile_pipeline_stage(pipeline->fs);
         break;
      default:
         unreachable("not supported shader stage");
      }
   }

   return VK_SUCCESS;
}

static unsigned
v3dv_dynamic_state_mask(VkDynamicState state)
{
   switch(state) {
   case VK_DYNAMIC_STATE_VIEWPORT:
      return V3DV_DYNAMIC_VIEWPORT;
   case VK_DYNAMIC_STATE_SCISSOR:
      return V3DV_DYNAMIC_SCISSOR;
   default:
      unreachable("Unhandled dynamic state");
   }
}

static uint32_t
pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
   uint32_t states = V3DV_DYNAMIC_ALL;

   /* FIXME: stub. Based on other values at pCreateInfo, we would need to
    * remove flags from states
    */

   return states;
}

static void
pipeline_init_dynamic_state(struct v3dv_pipeline *pipeline,
                            const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
   uint32_t needed_states = pipeline_needed_dynamic_state(pCreateInfo);
   uint32_t states = needed_states;

   pipeline->dynamic_state = default_dynamic_state;

   if (pCreateInfo->pDynamicState) {
      uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;

      for (uint32_t s = 0; s < count; s++) {
         /* Remove all of the states that are marked as dynamic */
         states &= ~v3dv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
      }
   }

   struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;
   /* Note, as this can be counter-intuitive: although we are checking against
    * _DYNAMIC flags, here we are copying the data from the pipeline that was
    * not defined as dynamic.
    */
   if (needed_states & V3DV_DYNAMIC_VIEWPORT) {
      assert(pCreateInfo->pViewportState);

      dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
      if (states & V3DV_DYNAMIC_VIEWPORT) {
         typed_memcpy(dynamic->viewport.viewports,
                      pCreateInfo->pViewportState->pViewports,
                      pCreateInfo->pViewportState->viewportCount);
      }
   }

   if (needed_states & V3DV_DYNAMIC_SCISSOR) {
      dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
      if (states & V3DV_DYNAMIC_SCISSOR) {
         typed_memcpy(dynamic->scissor.scissors,
                      pCreateInfo->pViewportState->pScissors,
                      pCreateInfo->pViewportState->scissorCount);
      }
   }

   pipeline->dynamic_state.mask = states;
}

static VkResult
pipeline_init(struct v3dv_pipeline *pipeline,
              struct v3dv_device *device,
              const VkGraphicsPipelineCreateInfo *pCreateInfo,
              const VkAllocationCallbacks *alloc)
{
   VkResult result = VK_SUCCESS;

   pipeline->device = device;

   V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);
   assert(pCreateInfo->subpass < render_pass->subpass_count);
   pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];

   pipeline_init_dynamic_state(pipeline, pCreateInfo);

   result = pipeline_compile_graphics(pipeline, pCreateInfo, alloc);

   if (result != VK_SUCCESS) {
      /* Caller would already destroy the pipeline, and we didn't allocate any
       * extra info. We don't need to do anything else.
       */
      return result;
   }

   return result;
}

static VkResult
graphics_pipeline_create(VkDevice _device,
                         VkPipelineCache _cache,
                         const VkGraphicsPipelineCreateInfo *pCreateInfo,
                         const VkAllocationCallbacks *pAllocator,
                         VkPipeline *pPipeline)
{
   V3DV_FROM_HANDLE(v3dv_device, device, _device);

   struct v3dv_pipeline *pipeline;
   VkResult result;

   pipeline = vk_zalloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   if (pipeline == NULL)
      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

   result = pipeline_init(pipeline, device,
                          pCreateInfo,
                          pAllocator);

   if (result != VK_SUCCESS) {
      vk_free2(&device->alloc, pAllocator, pipeline);
      return result;
   }

   *pPipeline = v3dv_pipeline_to_handle(pipeline);

   return VK_SUCCESS;
}

VkResult
v3dv_CreateGraphicsPipelines(VkDevice _device,
                             VkPipelineCache pipelineCache,
                             uint32_t count,
                             const VkGraphicsPipelineCreateInfo *pCreateInfos,
                             const VkAllocationCallbacks *pAllocator,
                             VkPipeline *pPipelines)
{
   VkResult result = VK_SUCCESS;

   for (uint32_t i = 0; i < count; i++) {
      VkResult local_result;

      local_result = graphics_pipeline_create(_device,
                                              pipelineCache,
                                              &pCreateInfos[i],
                                              pAllocator,
                                              &pPipelines[i]);

      if (local_result != VK_SUCCESS) {
         result = local_result;
         pPipelines[i] = VK_NULL_HANDLE;
      }
   }

   return result;
}