1 files changed, 343 insertions, 86 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 0d51ccb0349..d108f35f719 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -61,6 +61,9 @@
  * #   |   #   |   #
  * #################
  *
+ * If we iterate over multiple quads at once, quads 01 and 23 are processed
+ * together.
+ *
  * Within each quad, we have four pixels which are represented in SOA
  * order:
  *
@@ -72,6 +75,10 @@
  *
  * So the green channel (for example) of the four pixels is stored in
  * a single vector register: {g0, g1, g2, g3}.
+ * The order stays the same even with multiple quads:
+ * 0 1 4 5
+ * 2 3 6 7
+ * is stored as g0..g7
  */
 
 
@@ -102,8 +109,8 @@
 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
 
 
-static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
-static const unsigned char quad_offset_y[4] = {0, 0, 1, 1};
+static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
+static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
 
 
 static void
@@ -115,132 +122,353 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix
       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
 }
 
-
-/**
- * Initialize the bld->a0, dadx, dady fields.  This involves fetching
- * those values from the arrays which are passed into the JIT function.
+/* Much easier, and significantly less instructions in the per-stamp
+ * part (less than half) but overall more instructions so a loss if
+ * most quads are active. Might be a win though with larger vectors.
+ * No ability to do per-quad divide (doable but not implemented)
+ * Could be made to work with passed in pixel offsets (i.e. active quad merging).
  */
 static void
-coeffs_init(struct lp_build_interp_soa_context *bld,
-            LLVMValueRef a0_ptr,
-            LLVMValueRef dadx_ptr,
-            LLVMValueRef dady_ptr)
+coeffs_init_simple(struct lp_build_interp_soa_context *bld,
+                   LLVMValueRef a0_ptr,
+                   LLVMValueRef dadx_ptr,
+                   LLVMValueRef dady_ptr)
 {
    struct lp_build_context *coeff_bld = &bld->coeff_bld;
+   struct lp_build_context *setup_bld = &bld->setup_bld;
    struct gallivm_state *gallivm = coeff_bld->gallivm;
    LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef zero = LLVMConstNull(coeff_bld->elem_type);
-   LLVMValueRef one = LLVMConstReal(coeff_bld->elem_type, 1.0);
-   LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
-   LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
-   LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
-   LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
    unsigned attrib;
-   unsigned chan;
-
-   /* TODO: Use more vector operations */
 
    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      /*
+       * always fetch all 4 values for performance/simplicity
+       * Note: we do that here because it seems to generate better
+       * code. It generates a lot of moves initially but less
+       * moves later. As far as I can tell this looks like a
+       * llvm issue, instead of simply reloading the values from
+       * the passed in pointers it if it runs out of registers
+       * it spills/reloads them. Maybe some optimization passes
+       * would help.
+       * Might want to investigate this again later.
+       */
+      const unsigned interp = bld->interp[attrib];
+      LLVMValueRef index = lp_build_const_int32(gallivm,
+                                attrib * TGSI_NUM_CHANNELS);
+      LLVMValueRef ptr;
+      LLVMValueRef dadxaos = setup_bld->zero;
+      LLVMValueRef dadyaos = setup_bld->zero;
+      LLVMValueRef a0aos = setup_bld->zero;
+
+      switch (interp) {
+      case LP_INTERP_PERSPECTIVE:
+         /* fall-through */
+
+      case LP_INTERP_LINEAR:
+         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
+         ptr = LLVMBuildBitCast(builder, ptr,
+               LLVMPointerType(setup_bld->vec_type, 0), "");
+         dadxaos = LLVMBuildLoad(builder, ptr, "");
+
+         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
+         ptr = LLVMBuildBitCast(builder, ptr,
+               LLVMPointerType(setup_bld->vec_type, 0), "");
+         dadyaos = LLVMBuildLoad(builder, ptr, "");
+
+         attrib_name(dadxaos, attrib, 0, ".dadxaos");
+         attrib_name(dadyaos, attrib, 0, ".dadyaos");
+         /* fall-through */
+
+      case LP_INTERP_CONSTANT:
+      case LP_INTERP_FACING:
+         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
+         ptr = LLVMBuildBitCast(builder, ptr,
+               LLVMPointerType(setup_bld->vec_type, 0), "");
+         a0aos = LLVMBuildLoad(builder, ptr, "");
+         attrib_name(a0aos, attrib, 0, ".a0aos");
+         break;
+
+      case LP_INTERP_POSITION:
+         /* Nothing to do as the position coeffs are already setup in slot 0 */
+         continue;
+
+      default:
+         assert(0);
+         break;
+      }
+      bld->a0aos[attrib] = a0aos;
+      bld->dadxaos[attrib] = dadxaos;
+      bld->dadyaos[attrib] = dadyaos;
+   }
+}
+
+/**
+ * Interpolate the shader input attribute values.
+ * This is called for each (group of) quad(s).
+ */
+static void
+attribs_update_simple(struct lp_build_interp_soa_context *bld,
+                      struct gallivm_state *gallivm,
+                      int quad_start_index,
+                      int start,
+                      int end)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *coeff_bld = &bld->coeff_bld;
+   struct lp_build_context *setup_bld = &bld->setup_bld;
+   LLVMValueRef oow = NULL;
+   unsigned attrib, i;
+   LLVMValueRef pixoffx;
+   LLVMValueRef pixoffy;
+   unsigned num_pix = coeff_bld->type.length;
+
+   /* could do this with code-generated passed in pixel offsets */
+   pixoffx = coeff_bld->undef;
+   pixoffy = coeff_bld->undef;
+   for (i = 0; i < coeff_bld->type.length; i++) {
+      LLVMValueRef nr = lp_build_const_int32(gallivm, i);
+      LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
+                                                (quad_start_index & 1) * 2);
+      LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
+                                                (quad_start_index & 2));
+      pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
+      pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
+   }
+
+   pixoffx = LLVMBuildFAdd(builder, pixoffx,
+                           lp_build_broadcast_scalar(coeff_bld, bld->x), "");
+   pixoffy = LLVMBuildFAdd(builder, pixoffy,
+                           lp_build_broadcast_scalar(coeff_bld, bld->y), "");
+
+   for (attrib = start; attrib < end; attrib++) {
       const unsigned mask = bld->mask[attrib];
       const unsigned interp = bld->interp[attrib];
-      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+      unsigned chan;
+
+      for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
          if (mask & (1 << chan)) {
-            LLVMValueRef index = lp_build_const_int32(gallivm,
-                                      attrib * TGSI_NUM_CHANNELS + chan);
-            LLVMValueRef a0 = zero;
-            LLVMValueRef dadx = zero;
-            LLVMValueRef dady = zero;
-            LLVMValueRef dadxy = zero;
-            LLVMValueRef dadq;
-            LLVMValueRef dadq2;
-            LLVMValueRef a;
+            LLVMValueRef index;
+            LLVMValueRef dadx = coeff_bld->zero;
+            LLVMValueRef dady = coeff_bld->zero;
+            LLVMValueRef a = coeff_bld->zero;
 
+            index = lp_build_const_int32(gallivm, chan);
             switch (interp) {
             case LP_INTERP_PERSPECTIVE:
                /* fall-through */
 
             case LP_INTERP_LINEAR:
                if (attrib == 0 && chan == 0) {
-                  dadxy = dadx = one;
+                  dadx = coeff_bld->one;
                }
                else if (attrib == 0 && chan == 1) {
-                  dadxy = dady = one;
+                  dady = coeff_bld->one;
                }
                else {
-                  dadx = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""), "");
-                  dady = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dady_ptr, &index, 1, ""), "");
-                  dadxy = LLVMBuildFAdd(builder, dadx, dady, "");
-                  attrib_name(dadx, attrib, chan, ".dadx");
-                  attrib_name(dady, attrib, chan, ".dady");
-                  attrib_name(dadxy, attrib, chan, ".dadxy");
+                  dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                                    coeff_bld->type, bld->dadxaos[attrib],
+                                                    index);
+                  dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                                    coeff_bld->type, bld->dadyaos[attrib],
+                                                    index);
+                  a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                                 coeff_bld->type, bld->a0aos[attrib],
+                                                 index);
                }
-               /* fall-through */
+               /*
+                * a = a0 + (x * dadx + y * dady)
+                */
+               dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
+               dady = LLVMBuildFMul(builder, dady, pixoffy, "");
+               a = LLVMBuildFAdd(builder, a, dadx, "");
+               a = LLVMBuildFAdd(builder, a, dady, "");
+
+               if (interp == LP_INTERP_PERSPECTIVE) {
+                  if (oow == NULL) {
+                     LLVMValueRef w = bld->attribs[0][3];
+                     assert(attrib != 0);
+                     assert(bld->mask[0] & TGSI_WRITEMASK_W);
+                     oow = lp_build_rcp(coeff_bld, w);
+                  }
+                  a = lp_build_mul(coeff_bld, a, oow);
+               }
+               break;
 
             case LP_INTERP_CONSTANT:
             case LP_INTERP_FACING:
-               a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
-               attrib_name(a0, attrib, chan, ".a0");
+               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                              coeff_bld->type, bld->a0aos[attrib],
+                                              index);
                break;
 
             case LP_INTERP_POSITION:
-               /* Nothing to do as the position coeffs are already setup in slot 0 */
-               continue;
+               assert(attrib > 0);
+               a = bld->attribs[0][chan];
+               break;
 
             default:
                assert(0);
                break;
             }
 
-            /*
-             * dadq = {0, dadx, dady, dadx + dady}
-             */
+            if ((attrib == 0) && (chan == 2)){
+               /* FIXME: Depth values can exceed 1.0, due to the fact that
+                * setup interpolation coefficients refer to (0,0) which causes
+                * precision loss. So we must clamp to 1.0 here to avoid artifacts
+                */
+               a = lp_build_min(coeff_bld, a, coeff_bld->one);
+            }
+            bld->attribs[attrib][chan] = a;
+         }
+      }
+   }
+}
 
-            dadq = coeff_bld->undef;
-            dadq = LLVMBuildInsertElement(builder, dadq, zero,  i0, "");
-            dadq = LLVMBuildInsertElement(builder, dadq, dadx,  i1, "");
-            dadq = LLVMBuildInsertElement(builder, dadq, dady,  i2, "");
-            dadq = LLVMBuildInsertElement(builder, dadq, dadxy, i3, "");
+/**
+ * Initialize the bld->a, dadq fields.  This involves fetching
+ * those values from the arrays which are passed into the JIT function.
+ */
+static void
+coeffs_init(struct lp_build_interp_soa_context *bld,
+            LLVMValueRef a0_ptr,
+            LLVMValueRef dadx_ptr,
+            LLVMValueRef dady_ptr)
+{
+   struct lp_build_context *coeff_bld = &bld->coeff_bld;
+   struct lp_build_context *setup_bld = &bld->setup_bld;
+   struct gallivm_state *gallivm = coeff_bld->gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef pixoffx, pixoffy;
+   unsigned attrib;
+   unsigned chan;
+   unsigned i;
+
+   pixoffx = coeff_bld->undef;
+   pixoffy = coeff_bld->undef;
+   for (i = 0; i < coeff_bld->type.length; i++) {
+      LLVMValueRef nr = lp_build_const_int32(gallivm, i);
+      LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]);
+      LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]);
+      pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, "");
+      pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, "");
+   }
 
-            /*
-             * dadq2 = 2 * dq
-             */
 
-            dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
+   for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
+      const unsigned mask = bld->mask[attrib];
+      const unsigned interp = bld->interp[attrib];
+      LLVMValueRef index = lp_build_const_int32(gallivm,
+                                attrib * TGSI_NUM_CHANNELS);
+      LLVMValueRef ptr;
+      LLVMValueRef dadxaos = setup_bld->zero;
+      LLVMValueRef dadyaos = setup_bld->zero;
+      LLVMValueRef a0aos = setup_bld->zero;
+
+      /* always fetch all 4 values for performance/simplicity */
+      switch (interp) {
+      case LP_INTERP_PERSPECTIVE:
+         /* fall-through */
+
+      case LP_INTERP_LINEAR:
+         ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, "");
+         ptr = LLVMBuildBitCast(builder, ptr,
+               LLVMPointerType(setup_bld->vec_type, 0), "");
+         dadxaos = LLVMBuildLoad(builder, ptr, "");
+
+         ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, "");
+         ptr = LLVMBuildBitCast(builder, ptr,
+               LLVMPointerType(setup_bld->vec_type, 0), "");
+         dadyaos = LLVMBuildLoad(builder, ptr, "");
+
+         attrib_name(dadxaos, attrib, 0, ".dadxaos");
+         attrib_name(dadyaos, attrib, 0, ".dadyaos");
+         /* fall-through */
+
+      case LP_INTERP_CONSTANT:
+      case LP_INTERP_FACING:
+         ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, "");
+         ptr = LLVMBuildBitCast(builder, ptr,
+               LLVMPointerType(setup_bld->vec_type, 0), "");
+         a0aos = LLVMBuildLoad(builder, ptr, "");
+         attrib_name(a0aos, attrib, 0, ".a0aos");
+         break;
+
+      case LP_INTERP_POSITION:
+         /* Nothing to do as the position coeffs are already setup in slot 0 */
+         continue;
+
+      default:
+         assert(0);
+         break;
+      }
 
-            /*
-             * a = a0 + (x * dadx + y * dady)
-             */
+      /*
+       * a = a0 + (x * dadx + y * dady)
+       * a0aos is the attrib value at top left corner of stamp
+       */
+      if (interp != LP_INTERP_CONSTANT &&
+          interp != LP_INTERP_FACING) {
+         LLVMValueRef axaos, ayaos;
+         axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x),
+                               dadxaos, "");
+         ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y),
+                               dadyaos, "");
+         a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, "");
+         a0aos = LLVMBuildFAdd(builder, a0aos, axaos, "");
+      }
+
+      /*
+       * dadq = {0, dadx, dady, dadx + dady}
+       * for two quads (side by side) this is:
+       * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady}
+       */
+      for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+         /* this generates a CRAPLOAD of shuffles... */
+         if (mask & (1 << chan)) {
+            LLVMValueRef dadx, dady;
+            LLVMValueRef dadq, dadq2;
+            LLVMValueRef a;
+            LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan);
 
             if (attrib == 0 && chan == 0) {
-               a = bld->x;
+               a = lp_build_broadcast_scalar(coeff_bld, bld->x);
+               dadx = coeff_bld->one;
+               dady = coeff_bld->zero;
             }
             else if (attrib == 0 && chan == 1) {
-               a = bld->y;
+               a = lp_build_broadcast_scalar(coeff_bld, bld->y);
+               dady = coeff_bld->one;
+               dadx = coeff_bld->zero;
             }
             else {
-               a = a0;
-               if (interp != LP_INTERP_CONSTANT &&
-                   interp != LP_INTERP_FACING) {
-                  LLVMValueRef ax, ay, axy;
-                  ax = LLVMBuildFMul(builder, bld->x, dadx, "");
-                  ay = LLVMBuildFMul(builder, bld->y, dady, "");
-                  axy = LLVMBuildFAdd(builder, ax, ay, "");
-                  a = LLVMBuildFAdd(builder, a, axy, "");
-               }
-            }
+               dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                              coeff_bld->type, dadxaos, chan_index);
+               dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                              coeff_bld->type, dadyaos, chan_index);
 
-            /*
-             * a = {a, a, a, a}
-             */
+               /*
+                * a = {a, a, a, a}
+                */
+               a = lp_build_extract_broadcast(gallivm, setup_bld->type,
+                                              coeff_bld->type, a0aos, chan_index);
+            }
 
-            a = lp_build_broadcast(gallivm, coeff_bld->vec_type, a);
+            dadx = LLVMBuildFMul(builder, dadx, pixoffx, "");
+            dady = LLVMBuildFMul(builder, dady, pixoffy, "");
+            dadq = LLVMBuildFAdd(builder, dadx, dady, "");
 
             /*
-             * Compute the attrib values on the upper-left corner of each quad.
+             * Compute the attrib values on the upper-left corner of each
+             * group of quads.
+             * Note that if we process 2 quads at once this doesn't
+             * really exactly to what we want.
+             * We need to access elem 0 and 2 respectively later if we process
+             * 2 quads at once.
              */
 
             if (interp != LP_INTERP_CONSTANT &&
                 interp != LP_INTERP_FACING) {
+               dadq2 = LLVMBuildFAdd(builder, dadq, dadq, "");
                a = LLVMBuildFAdd(builder, a, dadq2, "");
 	    }
 
@@ -249,6 +477,12 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
              * a *= 1 / w
              */
 
+            /*
+             * XXX since we're only going to access elements 0,2 out of 8
+             * if we have 8-wide vectors we should do the division only 4-wide.
+             * a is really a 2-elements in a 4-wide vector disguised as 8-wide
+             * in this case.
+             */
             if (interp == LP_INTERP_PERSPECTIVE) {
                LLVMValueRef w = bld->a[0][3];
                assert(attrib != 0);
@@ -279,18 +513,18 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
 static void
 attribs_update(struct lp_build_interp_soa_context *bld,
                struct gallivm_state *gallivm,
-               int quad_index,
+               int quad_start_index,
                int start,
                int end)
 {
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_build_context *coeff_bld = &bld->coeff_bld;
-   LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_index);
+   LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_start_index);
    LLVMValueRef oow = NULL;
    unsigned attrib;
    unsigned chan;
 
-   assert(quad_index < 4);
+   assert(quad_start_index < 4);
 
    for(attrib = start; attrib < end; ++attrib) {
       const unsigned mask = bld->mask[attrib];
@@ -412,6 +646,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                          LLVMValueRef y0)
 {
    struct lp_type coeff_type;
+   struct lp_type setup_type;
    unsigned attrib;
    unsigned chan;
 
@@ -421,19 +656,26 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
    coeff_type.floating = TRUE;
    coeff_type.sign = TRUE;
    coeff_type.width = 32;
-   coeff_type.length = TGSI_QUAD_SIZE;
+   coeff_type.length = type.length;
+
+   memset(&setup_type, 0, sizeof setup_type);
+   setup_type.floating = TRUE;
+   setup_type.sign = TRUE;
+   setup_type.width = 32;
+   setup_type.length = TGSI_NUM_CHANNELS;
+
 
    /* XXX: we don't support interpolating into any other types */
    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
 
    lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
+   lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
 
    /* For convenience */
    bld->pos = bld->attribs[0];
    bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
 
    /* Position */
-   bld->num_attribs = 1;
    bld->mask[0] = TGSI_WRITEMASK_XYZW;
    bld->interp[0] = LP_INTERP_LINEAR;
 
@@ -453,7 +695,12 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 
    pos_init(bld, x0, y0);
 
-   coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+   if (coeff_type.length > 4) {
+      coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
+   }
+   else {
+      coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr);
+   }
 }
 
 
@@ -463,20 +710,30 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
 void
 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld,
                                   struct gallivm_state *gallivm,
-                                  int quad_index)
+                                  int quad_start_index)
 {
-   assert(quad_index < 4);
+   assert(quad_start_index < 4);
 
-   attribs_update(bld, gallivm, quad_index, 1, bld->num_attribs);
+   if (bld->coeff_bld.type.length > 4) {
+      attribs_update_simple(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+   }
+   else {
+      attribs_update(bld, gallivm, quad_start_index, 1, bld->num_attribs);
+   }
 }
 
 void
 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld,
                                   struct gallivm_state *gallivm,
-                                  int quad_index)
+                                  int quad_start_index)
 {
-   assert(quad_index < 4);
+   assert(quad_start_index < 4);
 
-   attribs_update(bld, gallivm, quad_index, 0, 1);
+   if (bld->coeff_bld.type.length > 4) {
+      attribs_update_simple(bld, gallivm, quad_start_index, 0, 1);
+   }
+   else {
+      attribs_update(bld, gallivm, quad_start_index, 0, 1);
+   }
 }