diff options
Diffstat (limited to 'src/uxa/i965_video.c')
-rw-r--r-- | src/uxa/i965_video.c | 1939 |
1 files changed, 1939 insertions, 0 deletions
diff --git a/src/uxa/i965_video.c b/src/uxa/i965_video.c new file mode 100644 index 00000000..5706b201 --- /dev/null +++ b/src/uxa/i965_video.c | |||
@@ -0,0 +1,1939 @@ | |||
1 | /* | ||
2 | * Copyright © 2006 Intel Corporation | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
21 | * DEALINGS IN THE SOFTWARE. | ||
22 | * | ||
23 | * Authors: | ||
24 | * Eric Anholt <eric@anholt.net> | ||
25 | * Keith Packard <keithp@keithp.com> | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #ifdef HAVE_CONFIG_H | ||
30 | #include "config.h" | ||
31 | #endif | ||
32 | |||
33 | #include "xf86.h" | ||
34 | #include "xf86_OSproc.h" | ||
35 | #include "xf86xv.h" | ||
36 | #include "fourcc.h" | ||
37 | |||
38 | #include "intel.h" | ||
39 | #include "intel_xvmc.h" | ||
40 | #include "intel_video.h" | ||
41 | #include "i830_reg.h" | ||
42 | #include "i965_reg.h" | ||
43 | #include "brw_defines.h" | ||
44 | #include "brw_structs.h" | ||
45 | #include <string.h> | ||
46 | |||
47 | |||
48 | /* Make assert() work. */ | ||
49 | #undef NDEBUG | ||
50 | #include <assert.h> | ||
51 | |||
52 | static const uint32_t sip_kernel_static[][4] = { | ||
53 | /* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ | ||
54 | {0x00000030, 0x20000108, 0x00001220, 0x00000000}, | ||
55 | /* nop (4) g0<1>UD { align1 + } */ | ||
56 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
57 | /* nop (4) g0<1>UD { align1 + } */ | ||
58 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
59 | /* nop (4) g0<1>UD { align1 + } */ | ||
60 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
61 | /* nop (4) g0<1>UD { align1 + } */ | ||
62 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
63 | /* nop (4) g0<1>UD { align1 + } */ | ||
64 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
65 | /* nop (4) g0<1>UD { align1 + } */ | ||
66 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
67 | /* nop (4) g0<1>UD { align1 + } */ | ||
68 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
69 | /* nop (4) g0<1>UD { align1 + } */ | ||
70 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
71 | /* nop (4) g0<1>UD { align1 + } */ | ||
72 | {0x0040007e, 0x20000c21, 0x00690000, 0x00000000}, | ||
73 | }; | ||
74 | |||
75 | /* | ||
76 | * this program computes dA/dx and dA/dy for the texture coordinates along | ||
77 | * with the base texture coordinate. It was extracted from the Mesa driver. | ||
78 | * It uses about 10 GRF registers. | ||
79 | */ | ||
80 | |||
81 | #define SF_KERNEL_NUM_GRF 16 | ||
82 | #define SF_MAX_THREADS 1 | ||
83 | |||
84 | static const uint32_t sf_kernel_static[][4] = { | ||
85 | #include "exa_sf.g4b" | ||
86 | }; | ||
87 | |||
88 | /* | ||
89 | * Ok, this kernel picks up the required data flow values in g0 and g1 | ||
90 | * and passes those along in m0 and m1. In m2-m9, it sticks constant | ||
91 | * values (bright pink). | ||
92 | */ | ||
93 | |||
94 | /* Our PS kernel uses less than 32 GRF registers (about 20) */ | ||
95 | #define PS_KERNEL_NUM_GRF 32 | ||
96 | #define PS_MAX_THREADS 32 | ||
97 | |||
98 | #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) | ||
99 | |||
100 | static const uint32_t ps_kernel_packed_static[][4] = { | ||
101 | #include "exa_wm_xy.g4b" | ||
102 | #include "exa_wm_src_affine.g4b" | ||
103 | #include "exa_wm_src_sample_argb.g4b" | ||
104 | #include "exa_wm_yuv_rgb.g4b" | ||
105 | #include "exa_wm_write.g4b" | ||
106 | }; | ||
107 | |||
108 | static const uint32_t ps_kernel_planar_static[][4] = { | ||
109 | #include "exa_wm_xy.g4b" | ||
110 | #include "exa_wm_src_affine.g4b" | ||
111 | #include "exa_wm_src_sample_planar.g4b" | ||
112 | #include "exa_wm_yuv_rgb.g4b" | ||
113 | #include "exa_wm_write.g4b" | ||
114 | }; | ||
115 | |||
116 | /* new program for Ironlake */ | ||
117 | static const uint32_t sf_kernel_static_gen5[][4] = { | ||
118 | #include "exa_sf.g4b.gen5" | ||
119 | }; | ||
120 | |||
121 | static const uint32_t ps_kernel_packed_static_gen5[][4] = { | ||
122 | #include "exa_wm_xy.g4b.gen5" | ||
123 | #include "exa_wm_src_affine.g4b.gen5" | ||
124 | #include "exa_wm_src_sample_argb.g4b.gen5" | ||
125 | #include "exa_wm_yuv_rgb.g4b.gen5" | ||
126 | #include "exa_wm_write.g4b.gen5" | ||
127 | }; | ||
128 | |||
129 | static const uint32_t ps_kernel_planar_static_gen5[][4] = { | ||
130 | #include "exa_wm_xy.g4b.gen5" | ||
131 | #include "exa_wm_src_affine.g4b.gen5" | ||
132 | #include "exa_wm_src_sample_planar.g4b.gen5" | ||
133 | #include "exa_wm_yuv_rgb.g4b.gen5" | ||
134 | #include "exa_wm_write.g4b.gen5" | ||
135 | }; | ||
136 | |||
137 | /* programs for Sandybridge */ | ||
138 | static const uint32_t ps_kernel_packed_static_gen6[][4] = { | ||
139 | #include "exa_wm_src_affine.g6b" | ||
140 | #include "exa_wm_src_sample_argb.g6b" | ||
141 | #include "exa_wm_yuv_rgb.g6b" | ||
142 | #include "exa_wm_write.g6b" | ||
143 | }; | ||
144 | |||
145 | static const uint32_t ps_kernel_planar_static_gen6[][4] = { | ||
146 | #include "exa_wm_src_affine.g6b" | ||
147 | #include "exa_wm_src_sample_planar.g6b" | ||
148 | #include "exa_wm_yuv_rgb.g6b" | ||
149 | #include "exa_wm_write.g6b" | ||
150 | }; | ||
151 | |||
152 | /* programs for Ivybridge */ | ||
153 | static const uint32_t ps_kernel_packed_static_gen7[][4] = { | ||
154 | #include "exa_wm_src_affine.g7b" | ||
155 | #include "exa_wm_src_sample_argb.g7b" | ||
156 | #include "exa_wm_yuv_rgb.g7b" | ||
157 | #include "exa_wm_write.g7b" | ||
158 | }; | ||
159 | |||
160 | static const uint32_t ps_kernel_planar_static_gen7[][4] = { | ||
161 | #include "exa_wm_src_affine.g7b" | ||
162 | #include "exa_wm_src_sample_planar.g7b" | ||
163 | #include "exa_wm_yuv_rgb.g7b" | ||
164 | #include "exa_wm_write.g7b" | ||
165 | }; | ||
166 | |||
167 | #ifndef MAX2 | ||
168 | #define MAX2(a,b) ((a) > (b) ? (a) : (b)) | ||
169 | #endif | ||
170 | |||
171 | #define SURFACE_STATE_PADDED_SIZE_I965 ALIGN(sizeof(struct brw_surface_state), 32) | ||
172 | #define SURFACE_STATE_PADDED_SIZE_GEN7 ALIGN(sizeof(struct gen7_surface_state), 32) | ||
173 | #define SURFACE_STATE_PADDED_SIZE MAX2(SURFACE_STATE_PADDED_SIZE_I965, SURFACE_STATE_PADDED_SIZE_GEN7) | ||
174 | #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) | ||
175 | |||
176 | static uint32_t float_to_uint(float f) | ||
177 | { | ||
178 | union { | ||
179 | uint32_t i; | ||
180 | float f; | ||
181 | } x; | ||
182 | x.f = f; | ||
183 | return x.i; | ||
184 | } | ||
185 | |||
186 | #if 0 | ||
187 | static struct { | ||
188 | uint32_t svg_ctl; | ||
189 | char *name; | ||
190 | } svg_ctl_bits[] = { | ||
191 | { | ||
192 | BRW_SVG_CTL_GS_BA, "General State Base Address"}, { | ||
193 | BRW_SVG_CTL_SS_BA, "Surface State Base Address"}, { | ||
194 | BRW_SVG_CTL_IO_BA, "Indirect Object Base Address"}, { | ||
195 | BRW_SVG_CTL_GS_AUB, "Generate State Access Upper Bound"}, { | ||
196 | BRW_SVG_CTL_IO_AUB, "Indirect Object Access Upper Bound"}, { | ||
197 | BRW_SVG_CTL_SIP, "System Instruction Pointer"}, { | ||
198 | 0, 0},}; | ||
199 | |||
200 | static void brw_debug(ScrnInfoPtr scrn, char *when) | ||
201 | { | ||
202 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
203 | int i; | ||
204 | uint32_t v; | ||
205 | |||
206 | ErrorF("brw_debug: %s\n", when); | ||
207 | for (i = 0; svg_ctl_bits[i].name; i++) { | ||
208 | OUTREG(BRW_SVG_CTL, svg_ctl_bits[i].svg_ctl); | ||
209 | v = INREG(BRW_SVG_RDATA); | ||
210 | ErrorF("\t%34.34s: 0x%08x\n", svg_ctl_bits[i].name, v); | ||
211 | } | ||
212 | } | ||
213 | #endif | ||
214 | |||
215 | #define WATCH_SF 0 | ||
216 | #define WATCH_WIZ 0 | ||
217 | #define WATCH_STATS 0 | ||
218 | |||
219 | static void i965_pre_draw_debug(ScrnInfoPtr scrn) | ||
220 | { | ||
221 | #if 0 | ||
222 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
223 | #endif | ||
224 | |||
225 | #if 0 | ||
226 | ErrorF("before EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", | ||
227 | INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), | ||
228 | INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); | ||
229 | |||
230 | OUTREG(BRW_VF_CTL, | ||
231 | BRW_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID | | ||
232 | BRW_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX | | ||
233 | BRW_VF_CTL_SNAPSHOT_ENABLE); | ||
234 | OUTREG(BRW_VF_STRG_VAL, 0); | ||
235 | #endif | ||
236 | |||
237 | #if 0 | ||
238 | OUTREG(BRW_VS_CTL, | ||
239 | BRW_VS_CTL_SNAPSHOT_ALL_THREADS | | ||
240 | BRW_VS_CTL_SNAPSHOT_MUX_VALID_COUNT | | ||
241 | BRW_VS_CTL_THREAD_SNAPSHOT_ENABLE); | ||
242 | |||
243 | OUTREG(BRW_VS_STRG_VAL, 0); | ||
244 | #endif | ||
245 | |||
246 | #if WATCH_SF | ||
247 | OUTREG(BRW_SF_CTL, | ||
248 | BRW_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT | | ||
249 | BRW_SF_CTL_SNAPSHOT_ALL_THREADS | | ||
250 | BRW_SF_CTL_THREAD_SNAPSHOT_ENABLE); | ||
251 | OUTREG(BRW_SF_STRG_VAL, 0); | ||
252 | #endif | ||
253 | |||
254 | #if WATCH_WIZ | ||
255 | OUTREG(BRW_WIZ_CTL, | ||
256 | BRW_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE | | ||
257 | BRW_WIZ_CTL_SNAPSHOT_ALL_THREADS | BRW_WIZ_CTL_SNAPSHOT_ENABLE); | ||
258 | OUTREG(BRW_WIZ_STRG_VAL, (box_x1) | (box_y1 << 16)); | ||
259 | #endif | ||
260 | |||
261 | #if 0 | ||
262 | OUTREG(BRW_TS_CTL, | ||
263 | BRW_TS_CTL_SNAPSHOT_MESSAGE_ERROR | | ||
264 | BRW_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS | | ||
265 | BRW_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS | | ||
266 | BRW_TS_CTL_SNAPSHOT_ENABLE); | ||
267 | #endif | ||
268 | } | ||
269 | |||
270 | static void i965_post_draw_debug(ScrnInfoPtr scrn) | ||
271 | { | ||
272 | #if 0 | ||
273 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
274 | #endif | ||
275 | |||
276 | #if 0 | ||
277 | for (j = 0; j < 100000; j++) { | ||
278 | ctl = INREG(BRW_VF_CTL); | ||
279 | if (ctl & BRW_VF_CTL_SNAPSHOT_COMPLETE) | ||
280 | break; | ||
281 | } | ||
282 | |||
283 | rdata = INREG(BRW_VF_RDATA); | ||
284 | OUTREG(BRW_VF_CTL, 0); | ||
285 | ErrorF("VF_CTL: 0x%08x VF_RDATA: 0x%08x\n", ctl, rdata); | ||
286 | #endif | ||
287 | |||
288 | #if 0 | ||
289 | for (j = 0; j < 1000000; j++) { | ||
290 | ctl = INREG(BRW_VS_CTL); | ||
291 | if (ctl & BRW_VS_CTL_SNAPSHOT_COMPLETE) | ||
292 | break; | ||
293 | } | ||
294 | |||
295 | rdata = INREG(BRW_VS_RDATA); | ||
296 | for (k = 0; k <= 3; k++) { | ||
297 | OUTREG(BRW_VS_CTL, BRW_VS_CTL_SNAPSHOT_COMPLETE | (k << 8)); | ||
298 | rdata = INREG(BRW_VS_RDATA); | ||
299 | ErrorF("VS_CTL: 0x%08x VS_RDATA(%d): 0x%08x\n", ctl, k, rdata); | ||
300 | } | ||
301 | |||
302 | OUTREG(BRW_VS_CTL, 0); | ||
303 | #endif | ||
304 | |||
305 | #if WATCH_SF | ||
306 | for (j = 0; j < 1000000; j++) { | ||
307 | ctl = INREG(BRW_SF_CTL); | ||
308 | if (ctl & BRW_SF_CTL_SNAPSHOT_COMPLETE) | ||
309 | break; | ||
310 | } | ||
311 | |||
312 | for (k = 0; k <= 7; k++) { | ||
313 | OUTREG(BRW_SF_CTL, BRW_SF_CTL_SNAPSHOT_COMPLETE | (k << 8)); | ||
314 | rdata = INREG(BRW_SF_RDATA); | ||
315 | ErrorF("SF_CTL: 0x%08x SF_RDATA(%d): 0x%08x\n", ctl, k, rdata); | ||
316 | } | ||
317 | |||
318 | OUTREG(BRW_SF_CTL, 0); | ||
319 | #endif | ||
320 | |||
321 | #if WATCH_WIZ | ||
322 | for (j = 0; j < 100000; j++) { | ||
323 | ctl = INREG(BRW_WIZ_CTL); | ||
324 | if (ctl & BRW_WIZ_CTL_SNAPSHOT_COMPLETE) | ||
325 | break; | ||
326 | } | ||
327 | |||
328 | rdata = INREG(BRW_WIZ_RDATA); | ||
329 | OUTREG(BRW_WIZ_CTL, 0); | ||
330 | ErrorF("WIZ_CTL: 0x%08x WIZ_RDATA: 0x%08x\n", ctl, rdata); | ||
331 | #endif | ||
332 | |||
333 | #if 0 | ||
334 | for (j = 0; j < 100000; j++) { | ||
335 | ctl = INREG(BRW_TS_CTL); | ||
336 | if (ctl & BRW_TS_CTL_SNAPSHOT_COMPLETE) | ||
337 | break; | ||
338 | } | ||
339 | |||
340 | rdata = INREG(BRW_TS_RDATA); | ||
341 | OUTREG(BRW_TS_CTL, 0); | ||
342 | ErrorF("TS_CTL: 0x%08x TS_RDATA: 0x%08x\n", ctl, rdata); | ||
343 | |||
344 | ErrorF("after EU_ATT 0x%08x%08x EU_ATT_DATA 0x%08x%08x\n", | ||
345 | INREG(BRW_EU_ATT_1), INREG(BRW_EU_ATT_0), | ||
346 | INREG(BRW_EU_ATT_DATA_1), INREG(BRW_EU_ATT_DATA_0)); | ||
347 | #endif | ||
348 | |||
349 | #if 0 | ||
350 | for (j = 0; j < 256; j++) { | ||
351 | OUTREG(BRW_TD_CTL, j << BRW_TD_CTL_MUX_SHIFT); | ||
352 | rdata = INREG(BRW_TD_RDATA); | ||
353 | ErrorF("TD_RDATA(%d): 0x%08x\n", j, rdata); | ||
354 | } | ||
355 | #endif | ||
356 | } | ||
357 | |||
358 | /* For 3D, the VS must have 8, 12, 16, 24, or 32 VUEs allocated to it. | ||
359 | * A VUE consists of a 256-bit vertex header followed by the vertex data, | ||
360 | * which in our case is 4 floats (128 bits), thus a single 512-bit URB | ||
361 | * entry. | ||
362 | */ | ||
363 | #define URB_VS_ENTRIES 8 | ||
364 | #define URB_VS_ENTRY_SIZE 1 | ||
365 | |||
366 | #define URB_GS_ENTRIES 0 | ||
367 | #define URB_GS_ENTRY_SIZE 0 | ||
368 | |||
369 | #define URB_CLIP_ENTRIES 0 | ||
370 | #define URB_CLIP_ENTRY_SIZE 0 | ||
371 | |||
372 | /* The SF kernel we use outputs only 4 256-bit registers, leading to an | ||
373 | * entry size of 2 512-bit URBs. We don't need to have many entries to | ||
374 | * output as we're generally working on large rectangles and don't care | ||
375 | * about having WM threads running on different rectangles simultaneously. | ||
376 | */ | ||
377 | #define URB_SF_ENTRIES 1 | ||
378 | #define URB_SF_ENTRY_SIZE 2 | ||
379 | |||
380 | #define URB_CS_ENTRIES 0 | ||
381 | #define URB_CS_ENTRY_SIZE 0 | ||
382 | |||
383 | static void i965_create_dst_surface_state(ScrnInfoPtr scrn, | ||
384 | PixmapPtr pixmap, | ||
385 | drm_intel_bo *surf_bo, | ||
386 | uint32_t offset) | ||
387 | { | ||
388 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
389 | struct brw_surface_state dest_surf_state; | ||
390 | drm_intel_bo *pixmap_bo = intel_get_pixmap_bo(pixmap); | ||
391 | assert(pixmap_bo != NULL); | ||
392 | |||
393 | memset(&dest_surf_state, 0, sizeof(dest_surf_state)); | ||
394 | |||
395 | dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; | ||
396 | dest_surf_state.ss0.data_return_format = | ||
397 | BRW_SURFACERETURNFORMAT_FLOAT32; | ||
398 | if (intel->cpp == 2) { | ||
399 | dest_surf_state.ss0.surface_format = | ||
400 | BRW_SURFACEFORMAT_B5G6R5_UNORM; | ||
401 | } else { | ||
402 | dest_surf_state.ss0.surface_format = | ||
403 | BRW_SURFACEFORMAT_B8G8R8A8_UNORM; | ||
404 | } | ||
405 | dest_surf_state.ss0.writedisable_alpha = 0; | ||
406 | dest_surf_state.ss0.writedisable_red = 0; | ||
407 | dest_surf_state.ss0.writedisable_green = 0; | ||
408 | dest_surf_state.ss0.writedisable_blue = 0; | ||
409 | dest_surf_state.ss0.color_blend = 1; | ||
410 | dest_surf_state.ss0.vert_line_stride = 0; | ||
411 | dest_surf_state.ss0.vert_line_stride_ofs = 0; | ||
412 | dest_surf_state.ss0.mipmap_layout_mode = 0; | ||
413 | dest_surf_state.ss0.render_cache_read_mode = 0; | ||
414 | |||
415 | dest_surf_state.ss1.base_addr = | ||
416 | intel_emit_reloc(surf_bo, offset + offsetof(struct brw_surface_state, ss1), | ||
417 | pixmap_bo, 0, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); | ||
418 | |||
419 | dest_surf_state.ss2.height = pixmap->drawable.height - 1; | ||
420 | dest_surf_state.ss2.width = pixmap->drawable.width - 1; | ||
421 | dest_surf_state.ss2.mip_count = 0; | ||
422 | dest_surf_state.ss2.render_target_rotation = 0; | ||
423 | dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; | ||
424 | dest_surf_state.ss3.tiled_surface = intel_pixmap_tiled(pixmap); | ||
425 | dest_surf_state.ss3.tile_walk = 0; /* TileX */ | ||
426 | |||
427 | dri_bo_subdata(surf_bo, | ||
428 | offset, sizeof(dest_surf_state), | ||
429 | &dest_surf_state); | ||
430 | } | ||
431 | |||
432 | static void i965_create_src_surface_state(ScrnInfoPtr scrn, | ||
433 | drm_intel_bo * src_bo, | ||
434 | uint32_t src_offset, | ||
435 | int src_width, | ||
436 | int src_height, | ||
437 | int src_pitch, | ||
438 | uint32_t src_surf_format, | ||
439 | drm_intel_bo *surface_bo, | ||
440 | uint32_t offset) | ||
441 | { | ||
442 | struct brw_surface_state src_surf_state; | ||
443 | |||
444 | memset(&src_surf_state, 0, sizeof(src_surf_state)); | ||
445 | |||
446 | /* Set up the source surface state buffer */ | ||
447 | src_surf_state.ss0.surface_type = BRW_SURFACE_2D; | ||
448 | src_surf_state.ss0.surface_format = src_surf_format; | ||
449 | src_surf_state.ss0.writedisable_alpha = 0; | ||
450 | src_surf_state.ss0.writedisable_red = 0; | ||
451 | src_surf_state.ss0.writedisable_green = 0; | ||
452 | src_surf_state.ss0.writedisable_blue = 0; | ||
453 | src_surf_state.ss0.color_blend = 1; | ||
454 | src_surf_state.ss0.vert_line_stride = 0; | ||
455 | src_surf_state.ss0.vert_line_stride_ofs = 0; | ||
456 | src_surf_state.ss0.mipmap_layout_mode = 0; | ||
457 | src_surf_state.ss0.render_cache_read_mode = 0; | ||
458 | |||
459 | src_surf_state.ss2.width = src_width - 1; | ||
460 | src_surf_state.ss2.height = src_height - 1; | ||
461 | src_surf_state.ss2.mip_count = 0; | ||
462 | src_surf_state.ss2.render_target_rotation = 0; | ||
463 | src_surf_state.ss3.pitch = src_pitch - 1; | ||
464 | |||
465 | if (src_bo) { | ||
466 | src_surf_state.ss1.base_addr = | ||
467 | intel_emit_reloc(surface_bo, | ||
468 | offset + offsetof(struct brw_surface_state, ss1), | ||
469 | src_bo, src_offset, | ||
470 | I915_GEM_DOMAIN_SAMPLER, 0); | ||
471 | } else { | ||
472 | src_surf_state.ss1.base_addr = src_offset; | ||
473 | } | ||
474 | |||
475 | dri_bo_subdata(surface_bo, | ||
476 | offset, sizeof(src_surf_state), | ||
477 | &src_surf_state); | ||
478 | } | ||
479 | |||
480 | static void gen7_create_dst_surface_state(ScrnInfoPtr scrn, | ||
481 | PixmapPtr pixmap, | ||
482 | drm_intel_bo *surf_bo, | ||
483 | uint32_t offset) | ||
484 | { | ||
485 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
486 | struct gen7_surface_state dest_surf_state; | ||
487 | drm_intel_bo *pixmap_bo = intel_get_pixmap_bo(pixmap); | ||
488 | assert(pixmap_bo != NULL); | ||
489 | |||
490 | memset(&dest_surf_state, 0, sizeof(dest_surf_state)); | ||
491 | |||
492 | dest_surf_state.ss0.surface_type = BRW_SURFACE_2D; | ||
493 | dest_surf_state.ss0.tiled_surface = intel_pixmap_tiled(pixmap); | ||
494 | dest_surf_state.ss0.tile_walk = 0; /* TileX */ | ||
495 | |||
496 | if (intel->cpp == 2) { | ||
497 | dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; | ||
498 | } else { | ||
499 | dest_surf_state.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; | ||
500 | } | ||
501 | |||
502 | dest_surf_state.ss1.base_addr = | ||
503 | intel_emit_reloc(surf_bo, | ||
504 | offset + offsetof(struct gen7_surface_state, ss1), | ||
505 | pixmap_bo, 0, | ||
506 | I915_GEM_DOMAIN_SAMPLER, 0); | ||
507 | |||
508 | dest_surf_state.ss2.height = pixmap->drawable.height - 1; | ||
509 | dest_surf_state.ss2.width = pixmap->drawable.width - 1; | ||
510 | |||
511 | dest_surf_state.ss3.pitch = intel_pixmap_pitch(pixmap) - 1; | ||
512 | |||
513 | if (IS_HSW(intel)) { | ||
514 | dest_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; | ||
515 | dest_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; | ||
516 | dest_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; | ||
517 | dest_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; | ||
518 | } | ||
519 | |||
520 | dri_bo_subdata(surf_bo, | ||
521 | offset, sizeof(dest_surf_state), | ||
522 | &dest_surf_state); | ||
523 | } | ||
524 | |||
525 | static void gen7_create_src_surface_state(ScrnInfoPtr scrn, | ||
526 | drm_intel_bo * src_bo, | ||
527 | uint32_t src_offset, | ||
528 | int src_width, | ||
529 | int src_height, | ||
530 | int src_pitch, | ||
531 | uint32_t src_surf_format, | ||
532 | drm_intel_bo *surface_bo, | ||
533 | uint32_t offset) | ||
534 | { | ||
535 | intel_screen_private * const intel = intel_get_screen_private(scrn); | ||
536 | struct gen7_surface_state src_surf_state; | ||
537 | |||
538 | memset(&src_surf_state, 0, sizeof(src_surf_state)); | ||
539 | |||
540 | src_surf_state.ss0.surface_type = BRW_SURFACE_2D; | ||
541 | src_surf_state.ss0.surface_format = src_surf_format; | ||
542 | |||
543 | if (src_bo) { | ||
544 | src_surf_state.ss1.base_addr = | ||
545 | intel_emit_reloc(surface_bo, | ||
546 | offset + offsetof(struct gen7_surface_state, ss1), | ||
547 | src_bo, src_offset, | ||
548 | I915_GEM_DOMAIN_SAMPLER, 0); | ||
549 | } else { | ||
550 | src_surf_state.ss1.base_addr = src_offset; | ||
551 | } | ||
552 | |||
553 | src_surf_state.ss2.width = src_width - 1; | ||
554 | src_surf_state.ss2.height = src_height - 1; | ||
555 | |||
556 | src_surf_state.ss3.pitch = src_pitch - 1; | ||
557 | |||
558 | if (IS_HSW(intel)) { | ||
559 | src_surf_state.ss7.shader_chanel_select_r = HSW_SCS_RED; | ||
560 | src_surf_state.ss7.shader_chanel_select_g = HSW_SCS_GREEN; | ||
561 | src_surf_state.ss7.shader_chanel_select_b = HSW_SCS_BLUE; | ||
562 | src_surf_state.ss7.shader_chanel_select_a = HSW_SCS_ALPHA; | ||
563 | } | ||
564 | |||
565 | dri_bo_subdata(surface_bo, | ||
566 | offset, sizeof(src_surf_state), | ||
567 | &src_surf_state); | ||
568 | } | ||
569 | |||
570 | static void i965_create_binding_table(ScrnInfoPtr scrn, | ||
571 | drm_intel_bo *bind_bo, | ||
572 | int n_surf) | ||
573 | { | ||
574 | uint32_t binding_table[n_surf]; | ||
575 | int i; | ||
576 | |||
577 | /* Set up a binding table for our surfaces. Only the PS will use it */ | ||
578 | for (i = 0; i < n_surf; i++) | ||
579 | binding_table[i] = i * SURFACE_STATE_PADDED_SIZE; | ||
580 | |||
581 | dri_bo_subdata(bind_bo, | ||
582 | n_surf * SURFACE_STATE_PADDED_SIZE, | ||
583 | sizeof(binding_table), binding_table); | ||
584 | } | ||
585 | |||
586 | static drm_intel_bo *i965_create_sampler_state(ScrnInfoPtr scrn) | ||
587 | { | ||
588 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
589 | struct brw_sampler_state sampler_state; | ||
590 | |||
591 | memset(&sampler_state, 0, sizeof(sampler_state)); | ||
592 | sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; | ||
593 | sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; | ||
594 | sampler_state.ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; | ||
595 | sampler_state.ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; | ||
596 | sampler_state.ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; | ||
597 | |||
598 | return intel_bo_alloc_for_data(intel, | ||
599 | &sampler_state, sizeof(sampler_state), | ||
600 | "textured video sampler state"); | ||
601 | } | ||
602 | |||
603 | static drm_intel_bo *gen7_create_sampler_state(ScrnInfoPtr scrn) | ||
604 | { | ||
605 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
606 | struct gen7_sampler_state sampler_state; | ||
607 | |||
608 | memset(&sampler_state, 0, sizeof(sampler_state)); | ||
609 | sampler_state.ss0.min_filter = BRW_MAPFILTER_LINEAR; | ||
610 | sampler_state.ss0.mag_filter = BRW_MAPFILTER_LINEAR; | ||
611 | sampler_state.ss3.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; | ||
612 | sampler_state.ss3.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; | ||
613 | sampler_state.ss3.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; | ||
614 | |||
615 | return intel_bo_alloc_for_data(intel, | ||
616 | &sampler_state, sizeof(sampler_state), | ||
617 | "textured video sampler state"); | ||
618 | } | ||
619 | |||
620 | static drm_intel_bo *i965_create_vs_state(ScrnInfoPtr scrn) | ||
621 | { | ||
622 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
623 | struct brw_vs_unit_state vs_state; | ||
624 | |||
625 | /* Set up the vertex shader to be disabled (passthrough) */ | ||
626 | memset(&vs_state, 0, sizeof(vs_state)); | ||
627 | if (IS_GEN5(intel)) | ||
628 | vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; | ||
629 | else | ||
630 | vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; | ||
631 | vs_state.thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; | ||
632 | vs_state.vs6.vs_enable = 0; | ||
633 | vs_state.vs6.vert_cache_disable = 1; | ||
634 | |||
635 | return intel_bo_alloc_for_data(intel, | ||
636 | &vs_state, sizeof(vs_state), | ||
637 | "textured video vs state"); | ||
638 | } | ||
639 | |||
640 | static drm_intel_bo *i965_create_program(ScrnInfoPtr scrn, | ||
641 | const uint32_t * program, | ||
642 | unsigned int program_size) | ||
643 | { | ||
644 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
645 | return intel_bo_alloc_for_data(intel, | ||
646 | program, program_size, | ||
647 | "textured video program"); | ||
648 | } | ||
649 | |||
650 | static drm_intel_bo *i965_create_sf_state(ScrnInfoPtr scrn) | ||
651 | { | ||
652 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
653 | drm_intel_bo *sf_bo, *kernel_bo; | ||
654 | struct brw_sf_unit_state sf_state; | ||
655 | |||
656 | if (IS_GEN5(intel)) | ||
657 | kernel_bo = i965_create_program(scrn, | ||
658 | &sf_kernel_static_gen5[0][0], | ||
659 | sizeof(sf_kernel_static_gen5)); | ||
660 | else | ||
661 | kernel_bo = i965_create_program(scrn, | ||
662 | &sf_kernel_static[0][0], | ||
663 | sizeof(sf_kernel_static)); | ||
664 | if (!kernel_bo) | ||
665 | return NULL; | ||
666 | |||
667 | sf_bo = drm_intel_bo_alloc(intel->bufmgr, | ||
668 | "textured video sf state", 4096, | ||
669 | sizeof(sf_state)); | ||
670 | if (sf_bo == NULL) { | ||
671 | drm_intel_bo_unreference(kernel_bo); | ||
672 | return NULL; | ||
673 | } | ||
674 | |||
675 | /* Set up the SF kernel to do coord interp: for each attribute, | ||
676 | * calculate dA/dx and dA/dy. Hand these interpolation coefficients | ||
677 | * back to SF which then hands pixels off to WM. | ||
678 | */ | ||
679 | memset(&sf_state, 0, sizeof(sf_state)); | ||
680 | sf_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); | ||
681 | sf_state.thread0.kernel_start_pointer = | ||
682 | intel_emit_reloc(sf_bo, offsetof(struct brw_sf_unit_state, thread0), | ||
683 | kernel_bo, sf_state.thread0.grf_reg_count << 1, | ||
684 | I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; | ||
685 | sf_state.sf1.single_program_flow = 1; /* XXX */ | ||
686 | sf_state.sf1.binding_table_entry_count = 0; | ||
687 | sf_state.sf1.thread_priority = 0; | ||
688 | sf_state.sf1.floating_point_mode = 0; /* Mesa does this */ | ||
689 | sf_state.sf1.illegal_op_exception_enable = 1; | ||
690 | sf_state.sf1.mask_stack_exception_enable = 1; | ||
691 | sf_state.sf1.sw_exception_enable = 1; | ||
692 | sf_state.thread2.per_thread_scratch_space = 0; | ||
693 | /* scratch space is not used in our kernel */ | ||
694 | sf_state.thread2.scratch_space_base_pointer = 0; | ||
695 | sf_state.thread3.const_urb_entry_read_length = 0; /* no const URBs */ | ||
696 | sf_state.thread3.const_urb_entry_read_offset = 0; /* no const URBs */ | ||
697 | sf_state.thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ | ||
698 | sf_state.thread3.urb_entry_read_offset = 0; | ||
699 | sf_state.thread3.dispatch_grf_start_reg = 3; | ||
700 | sf_state.thread4.max_threads = SF_MAX_THREADS - 1; | ||
701 | sf_state.thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; | ||
702 | sf_state.thread4.nr_urb_entries = URB_SF_ENTRIES; | ||
703 | sf_state.thread4.stats_enable = 1; | ||
704 | sf_state.sf5.viewport_transform = FALSE; /* skip viewport */ | ||
705 | sf_state.sf6.cull_mode = BRW_CULLMODE_NONE; | ||
706 | sf_state.sf6.scissor = 0; | ||
707 | sf_state.sf7.trifan_pv = 2; | ||
708 | sf_state.sf6.dest_org_vbias = 0x8; | ||
709 | sf_state.sf6.dest_org_hbias = 0x8; | ||
710 | |||
711 | dri_bo_subdata(sf_bo, 0, sizeof(sf_state), &sf_state); | ||
712 | return sf_bo; | ||
713 | } | ||
714 | |||
715 | static drm_intel_bo *i965_create_wm_state(ScrnInfoPtr scrn, | ||
716 | drm_intel_bo * sampler_bo, | ||
717 | Bool is_packed) | ||
718 | { | ||
719 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
720 | drm_intel_bo *wm_bo, *kernel_bo; | ||
721 | struct brw_wm_unit_state wm_state; | ||
722 | |||
723 | if (is_packed) { | ||
724 | if (IS_GEN5(intel)) | ||
725 | kernel_bo = | ||
726 | i965_create_program(scrn, | ||
727 | &ps_kernel_packed_static_gen5[0] | ||
728 | [0], | ||
729 | sizeof | ||
730 | (ps_kernel_packed_static_gen5)); | ||
731 | else | ||
732 | kernel_bo = | ||
733 | i965_create_program(scrn, | ||
734 | &ps_kernel_packed_static[0][0], | ||
735 | sizeof | ||
736 | (ps_kernel_packed_static)); | ||
737 | } else { | ||
738 | if (IS_GEN5(intel)) | ||
739 | kernel_bo = | ||
740 | i965_create_program(scrn, | ||
741 | &ps_kernel_planar_static_gen5[0] | ||
742 | [0], | ||
743 | sizeof | ||
744 | (ps_kernel_planar_static_gen5)); | ||
745 | else | ||
746 | kernel_bo = | ||
747 | i965_create_program(scrn, | ||
748 | &ps_kernel_planar_static[0][0], | ||
749 | sizeof | ||
750 | (ps_kernel_planar_static)); | ||
751 | } | ||
752 | if (!kernel_bo) | ||
753 | return NULL; | ||
754 | |||
755 | wm_bo = drm_intel_bo_alloc(intel->bufmgr, | ||
756 | "textured video wm state", | ||
757 | sizeof(wm_state), 0); | ||
758 | if (wm_bo == NULL) { | ||
759 | drm_intel_bo_unreference(kernel_bo); | ||
760 | return NULL; | ||
761 | } | ||
762 | |||
763 | memset(&wm_state, 0, sizeof(wm_state)); | ||
764 | wm_state.thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); | ||
765 | wm_state.thread0.kernel_start_pointer = | ||
766 | intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, thread0), | ||
767 | kernel_bo, wm_state.thread0.grf_reg_count << 1, | ||
768 | I915_GEM_DOMAIN_INSTRUCTION, 0) >> 6; | ||
769 | wm_state.thread1.single_program_flow = 1; /* XXX */ | ||
770 | if (is_packed) | ||
771 | wm_state.thread1.binding_table_entry_count = 2; | ||
772 | else | ||
773 | wm_state.thread1.binding_table_entry_count = 7; | ||
774 | |||
775 | /* binding table entry count is only used for prefetching, and it has to | ||
776 | * be set 0 for Ironlake | ||
777 | */ | ||
778 | if (IS_GEN5(intel)) | ||
779 | wm_state.thread1.binding_table_entry_count = 0; | ||
780 | |||
781 | /* Though we never use the scratch space in our WM kernel, it has to be | ||
782 | * set, and the minimum allocation is 1024 bytes. | ||
783 | */ | ||
784 | wm_state.thread2.scratch_space_base_pointer = 0; | ||
785 | wm_state.thread2.per_thread_scratch_space = 0; /* 1024 bytes */ | ||
786 | wm_state.thread3.dispatch_grf_start_reg = 3; /* XXX */ | ||
787 | wm_state.thread3.const_urb_entry_read_length = 0; | ||
788 | wm_state.thread3.const_urb_entry_read_offset = 0; | ||
789 | wm_state.thread3.urb_entry_read_length = 1; /* XXX */ | ||
790 | wm_state.thread3.urb_entry_read_offset = 0; /* XXX */ | ||
791 | wm_state.wm4.stats_enable = 1; | ||
792 | wm_state.wm4.sampler_state_pointer = | ||
793 | intel_emit_reloc(wm_bo, offsetof(struct brw_wm_unit_state, wm4), | ||
794 | sampler_bo, 0, | ||
795 | I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; | ||
796 | if (IS_GEN5(intel)) | ||
797 | wm_state.wm4.sampler_count = 0; | ||
798 | else | ||
799 | wm_state.wm4.sampler_count = 1; /* 1-4 samplers used */ | ||
800 | wm_state.wm5.max_threads = PS_MAX_THREADS - 1; | ||
801 | wm_state.wm5.thread_dispatch_enable = 1; | ||
802 | wm_state.wm5.enable_16_pix = 1; | ||
803 | wm_state.wm5.enable_8_pix = 0; | ||
804 | wm_state.wm5.early_depth_test = 1; | ||
805 | |||
806 | dri_bo_subdata(wm_bo, 0, sizeof(wm_state), &wm_state); | ||
807 | drm_intel_bo_unreference(kernel_bo); | ||
808 | return wm_bo; | ||
809 | } | ||
810 | |||
811 | static drm_intel_bo *i965_create_cc_vp_state(ScrnInfoPtr scrn) | ||
812 | { | ||
813 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
814 | struct brw_cc_viewport cc_viewport; | ||
815 | |||
816 | memset(&cc_viewport, 0, sizeof(cc_viewport)); | ||
817 | cc_viewport.min_depth = -1.e35; | ||
818 | cc_viewport.max_depth = 1.e35; | ||
819 | |||
820 | return intel_bo_alloc_for_data(intel, | ||
821 | &cc_viewport, sizeof(cc_viewport), | ||
822 | "textured video cc viewport"); | ||
823 | } | ||
824 | |||
825 | static drm_intel_bo *i965_create_cc_state(ScrnInfoPtr scrn) | ||
826 | { | ||
827 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
828 | drm_intel_bo *cc_bo, *cc_vp_bo; | ||
829 | struct brw_cc_unit_state cc_state; | ||
830 | |||
831 | cc_vp_bo = i965_create_cc_vp_state(scrn); | ||
832 | if (!cc_vp_bo) | ||
833 | return NULL; | ||
834 | |||
835 | cc_bo = drm_intel_bo_alloc(intel->bufmgr, | ||
836 | "textured video cc state", | ||
837 | sizeof(cc_state), 0); | ||
838 | if (cc_bo == NULL){ | ||
839 | drm_intel_bo_unreference(cc_vp_bo); | ||
840 | return NULL; | ||
841 | } | ||
842 | |||
843 | /* Color calculator state */ | ||
844 | memset(&cc_state, 0, sizeof(cc_state)); | ||
845 | cc_state.cc0.stencil_enable = 0; /* disable stencil */ | ||
846 | cc_state.cc2.depth_test = 0; /* disable depth test */ | ||
847 | cc_state.cc2.logicop_enable = 1; /* enable logic op */ | ||
848 | cc_state.cc3.ia_blend_enable = 1; /* blend alpha just like colors */ | ||
849 | cc_state.cc3.blend_enable = 0; /* disable color blend */ | ||
850 | cc_state.cc3.alpha_test = 0; /* disable alpha test */ | ||
851 | cc_state.cc4.cc_viewport_state_offset = | ||
852 | intel_emit_reloc(cc_bo, offsetof(struct brw_cc_unit_state, cc4), | ||
853 | cc_vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0) >> 5; | ||
854 | cc_state.cc5.dither_enable = 0; /* disable dither */ | ||
855 | cc_state.cc5.logicop_func = 0xc; /* WHITE */ | ||
856 | cc_state.cc5.statistics_enable = 1; | ||
857 | cc_state.cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD; | ||
858 | cc_state.cc5.ia_src_blend_factor = BRW_BLENDFACTOR_ONE; | ||
859 | cc_state.cc5.ia_dest_blend_factor = BRW_BLENDFACTOR_ONE; | ||
860 | |||
861 | dri_bo_subdata(cc_bo, 0, sizeof(cc_state), &cc_state); | ||
862 | drm_intel_bo_unreference(cc_vp_bo); | ||
863 | |||
864 | return cc_bo; | ||
865 | } | ||
866 | |||
867 | static void | ||
868 | i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * surface_state_binding_table_bo, int n_src_surf, PixmapPtr pixmap) | ||
869 | { | ||
870 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
871 | int urb_vs_start, urb_vs_size; | ||
872 | int urb_gs_start, urb_gs_size; | ||
873 | int urb_clip_start, urb_clip_size; | ||
874 | int urb_sf_start, urb_sf_size; | ||
875 | int urb_cs_start, urb_cs_size; | ||
876 | int pipe_ctl; | ||
877 | |||
878 | IntelEmitInvarientState(scrn); | ||
879 | intel->last_3d = LAST_3D_VIDEO; | ||
880 | intel->needs_3d_invariant = TRUE; | ||
881 | |||
882 | urb_vs_start = 0; | ||
883 | urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; | ||
884 | urb_gs_start = urb_vs_start + urb_vs_size; | ||
885 | urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; | ||
886 | urb_clip_start = urb_gs_start + urb_gs_size; | ||
887 | urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; | ||
888 | urb_sf_start = urb_clip_start + urb_clip_size; | ||
889 | urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; | ||
890 | urb_cs_start = urb_sf_start + urb_sf_size; | ||
891 | urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; | ||
892 | |||
893 | OUT_BATCH(MI_FLUSH | | ||
894 | MI_STATE_INSTRUCTION_CACHE_FLUSH | | ||
895 | BRW_MI_GLOBAL_SNAPSHOT_RESET); | ||
896 | OUT_BATCH(MI_NOOP); | ||
897 | |||
898 | /* brw_debug (scrn, "before base address modify"); */ | ||
899 | /* Match Mesa driver setup */ | ||
900 | if (INTEL_INFO(intel)->gen >= 045) | ||
901 | OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); | ||
902 | else | ||
903 | OUT_BATCH(BRW_PIPELINE_SELECT | PIPELINE_SELECT_3D); | ||
904 | |||
905 | /* Mesa does this. Who knows... */ | ||
906 | OUT_BATCH(BRW_CS_URB_STATE | 0); | ||
907 | OUT_BATCH((0 << 4) | /* URB Entry Allocation Size */ | ||
908 | (0 << 0)); /* Number of URB Entries */ | ||
909 | |||
910 | /* Zero out the two base address registers so all offsets are | ||
911 | * absolute | ||
912 | */ | ||
913 | if (IS_GEN5(intel)) { | ||
914 | OUT_BATCH(BRW_STATE_BASE_ADDRESS | 6); | ||
915 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ | ||
916 | OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ | ||
917 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ | ||
918 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Instruction base address */ | ||
919 | /* general state max addr, disabled */ | ||
920 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); | ||
921 | /* media object state max addr, disabled */ | ||
922 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); | ||
923 | /* Instruction max addr, disabled */ | ||
924 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); | ||
925 | } else { | ||
926 | OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4); | ||
927 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* Generate state base address */ | ||
928 | OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ | ||
929 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); /* media base addr, don't care */ | ||
930 | /* general state max addr, disabled */ | ||
931 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); | ||
932 | /* media object state max addr, disabled */ | ||
933 | OUT_BATCH(0 | BASE_ADDRESS_MODIFY); | ||
934 | } | ||
935 | |||
936 | /* Set system instruction pointer */ | ||
937 | OUT_BATCH(BRW_STATE_SIP | 0); | ||
938 | /* system instruction pointer */ | ||
939 | OUT_RELOC(intel->video.gen4_sip_kernel_bo, | ||
940 | I915_GEM_DOMAIN_INSTRUCTION, 0, 0); | ||
941 | |||
942 | /* brw_debug (scrn, "after base address modify"); */ | ||
943 | |||
944 | if (IS_GEN5(intel)) | ||
945 | pipe_ctl = BRW_PIPE_CONTROL_NOWRITE; | ||
946 | else | ||
947 | pipe_ctl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH; | ||
948 | |||
949 | /* Pipe control */ | ||
950 | OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctl | 2); | ||
951 | OUT_BATCH(0); /* Destination address */ | ||
952 | OUT_BATCH(0); /* Immediate data low DW */ | ||
953 | OUT_BATCH(0); /* Immediate data high DW */ | ||
954 | |||
955 | /* Binding table pointers */ | ||
956 | OUT_BATCH(BRW_3DSTATE_BINDING_TABLE_POINTERS | 4); | ||
957 | OUT_BATCH(0); /* vs */ | ||
958 | OUT_BATCH(0); /* gs */ | ||
959 | OUT_BATCH(0); /* clip */ | ||
960 | OUT_BATCH(0); /* sf */ | ||
961 | /* Only the PS uses the binding table */ | ||
962 | OUT_BATCH((n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); | ||
963 | |||
964 | /* Blend constant color (magenta is fun) */ | ||
965 | OUT_BATCH(BRW_3DSTATE_CONSTANT_COLOR | 3); | ||
966 | OUT_BATCH(float_to_uint(1.0)); | ||
967 | OUT_BATCH(float_to_uint(0.0)); | ||
968 | OUT_BATCH(float_to_uint(1.0)); | ||
969 | OUT_BATCH(float_to_uint(1.0)); | ||
970 | |||
971 | /* The drawing rectangle clipping is always on. Set it to values that | ||
972 | * shouldn't do any clipping. | ||
973 | */ | ||
974 | OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); /* XXX 3 for BLC or CTG */ | ||
975 | OUT_BATCH(0x00000000); /* ymin, xmin */ | ||
976 | OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ | ||
977 | OUT_BATCH(0x00000000); /* yorigin, xorigin */ | ||
978 | |||
979 | /* skip the depth buffer */ | ||
980 | /* skip the polygon stipple */ | ||
981 | /* skip the polygon stipple offset */ | ||
982 | /* skip the line stipple */ | ||
983 | |||
984 | /* Set the pointers to the 3d pipeline state */ | ||
985 | OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); | ||
986 | OUT_RELOC(intel->video.gen4_vs_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); | ||
987 | /* disable GS, resulting in passthrough */ | ||
988 | OUT_BATCH(BRW_GS_DISABLE); | ||
989 | /* disable CLIP, resulting in passthrough */ | ||
990 | OUT_BATCH(BRW_CLIP_DISABLE); | ||
991 | OUT_RELOC(intel->video.gen4_sf_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); | ||
992 | if (n_src_surf == 1) | ||
993 | OUT_RELOC(intel->video.gen4_wm_packed_bo, | ||
994 | I915_GEM_DOMAIN_INSTRUCTION, 0, 0); | ||
995 | else | ||
996 | OUT_RELOC(intel->video.gen4_wm_planar_bo, | ||
997 | I915_GEM_DOMAIN_INSTRUCTION, 0, 0); | ||
998 | OUT_RELOC(intel->video.gen4_cc_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); | ||
999 | |||
1000 | /* URB fence */ | ||
1001 | OUT_BATCH(BRW_URB_FENCE | | ||
1002 | UF0_CS_REALLOC | | ||
1003 | UF0_SF_REALLOC | | ||
1004 | UF0_CLIP_REALLOC | UF0_GS_REALLOC | UF0_VS_REALLOC | 1); | ||
1005 | OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | | ||
1006 | ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | | ||
1007 | ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); | ||
1008 | OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | | ||
1009 | ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); | ||
1010 | |||
1011 | /* Constant buffer state */ | ||
1012 | OUT_BATCH(BRW_CS_URB_STATE | 0); | ||
1013 | OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) | (URB_CS_ENTRIES << 0)); | ||
1014 | |||
1015 | /* Set up our vertex elements, sourced from the single vertex buffer. */ | ||
1016 | |||
1017 | if (IS_GEN5(intel)) { | ||
1018 | OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); | ||
1019 | /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ | ||
1020 | OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | | ||
1021 | VE0_VALID | | ||
1022 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | | ||
1023 | (0 << VE0_OFFSET_SHIFT)); | ||
1024 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | ||
1025 | | (BRW_VFCOMPONENT_STORE_SRC << | ||
1026 | VE1_VFCOMPONENT_1_SHIFT) | | ||
1027 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1028 | VE1_VFCOMPONENT_2_SHIFT) | | ||
1029 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1030 | VE1_VFCOMPONENT_3_SHIFT)); | ||
1031 | /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ | ||
1032 | OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | | ||
1033 | VE0_VALID | | ||
1034 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | | ||
1035 | (8 << VE0_OFFSET_SHIFT)); | ||
1036 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | ||
1037 | | (BRW_VFCOMPONENT_STORE_SRC << | ||
1038 | VE1_VFCOMPONENT_1_SHIFT) | | ||
1039 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1040 | VE1_VFCOMPONENT_2_SHIFT) | | ||
1041 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1042 | VE1_VFCOMPONENT_3_SHIFT)); | ||
1043 | } else { | ||
1044 | OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | 3); | ||
1045 | /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ | ||
1046 | OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | | ||
1047 | VE0_VALID | | ||
1048 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | | ||
1049 | (0 << VE0_OFFSET_SHIFT)); | ||
1050 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | ||
1051 | | (BRW_VFCOMPONENT_STORE_SRC << | ||
1052 | VE1_VFCOMPONENT_1_SHIFT) | | ||
1053 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1054 | VE1_VFCOMPONENT_2_SHIFT) | | ||
1055 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1056 | VE1_VFCOMPONENT_3_SHIFT) | (0 << | ||
1057 | VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); | ||
1058 | /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ | ||
1059 | OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) | | ||
1060 | VE0_VALID | | ||
1061 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | | ||
1062 | (8 << VE0_OFFSET_SHIFT)); | ||
1063 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | ||
1064 | | (BRW_VFCOMPONENT_STORE_SRC << | ||
1065 | VE1_VFCOMPONENT_1_SHIFT) | | ||
1066 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1067 | VE1_VFCOMPONENT_2_SHIFT) | | ||
1068 | (BRW_VFCOMPONENT_STORE_1_FLT << | ||
1069 | VE1_VFCOMPONENT_3_SHIFT) | (4 << | ||
1070 | VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); | ||
1071 | } | ||
1072 | } | ||
1073 | |||
1074 | void | ||
1075 | I965DisplayVideoTextured(ScrnInfoPtr scrn, | ||
1076 | intel_adaptor_private *adaptor_priv, int id, | ||
1077 | RegionPtr dstRegion, | ||
1078 | short width, short height, | ||
1079 | int video_pitch, int video_pitch2, | ||
1080 | short src_w, short src_h, | ||
1081 | short drw_w, short drw_h, PixmapPtr pixmap) | ||
1082 | { | ||
1083 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1084 | BoxPtr pbox; | ||
1085 | int nbox, dxo, dyo, pix_xoff, pix_yoff; | ||
1086 | float src_scale_x, src_scale_y; | ||
1087 | int src_surf; | ||
1088 | int n_src_surf; | ||
1089 | uint32_t src_surf_format; | ||
1090 | uint32_t src_surf_base[6]; | ||
1091 | int src_width[6]; | ||
1092 | int src_height[6]; | ||
1093 | int src_pitch[6]; | ||
1094 | drm_intel_bo *surface_state_binding_table_bo; | ||
1095 | |||
1096 | #if 0 | ||
1097 | ErrorF("BroadwaterDisplayVideoTextured: %dx%d (pitch %d)\n", width, | ||
1098 | height, video_pitch); | ||
1099 | #endif | ||
1100 | |||
1101 | #if 0 | ||
1102 | /* enable debug */ | ||
1103 | OUTREG(INST_PM, (1 << (16 + 4)) | (1 << 4)); | ||
1104 | ErrorF("INST_PM 0x%08x\n", INREG(INST_PM)); | ||
1105 | #endif | ||
1106 | |||
1107 | src_surf_base[0] = adaptor_priv->YBufOffset; | ||
1108 | src_surf_base[1] = adaptor_priv->YBufOffset; | ||
1109 | src_surf_base[2] = adaptor_priv->VBufOffset; | ||
1110 | src_surf_base[3] = adaptor_priv->VBufOffset; | ||
1111 | src_surf_base[4] = adaptor_priv->UBufOffset; | ||
1112 | src_surf_base[5] = adaptor_priv->UBufOffset; | ||
1113 | #if 0 | ||
1114 | ErrorF("base 0 0x%x base 1 0x%x base 2 0x%x\n", | ||
1115 | src_surf_base[0], src_surf_base[1], src_surf_base[2]); | ||
1116 | #endif | ||
1117 | |||
1118 | if (is_planar_fourcc(id)) { | ||
1119 | src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; | ||
1120 | src_width[1] = src_width[0] = width; | ||
1121 | src_height[1] = src_height[0] = height; | ||
1122 | src_pitch[1] = src_pitch[0] = video_pitch2; | ||
1123 | src_width[4] = src_width[5] = src_width[2] = src_width[3] = | ||
1124 | width / 2; | ||
1125 | src_height[4] = src_height[5] = src_height[2] = src_height[3] = | ||
1126 | height / 2; | ||
1127 | src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = | ||
1128 | video_pitch; | ||
1129 | n_src_surf = 6; | ||
1130 | } else { | ||
1131 | if (id == FOURCC_UYVY) | ||
1132 | src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; | ||
1133 | else | ||
1134 | src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; | ||
1135 | |||
1136 | src_width[0] = width; | ||
1137 | src_height[0] = height; | ||
1138 | src_pitch[0] = video_pitch; | ||
1139 | n_src_surf = 1; | ||
1140 | } | ||
1141 | |||
1142 | #if 0 | ||
1143 | ErrorF("dst surf: 0x%08x\n", state_base_offset + dest_surf_offset); | ||
1144 | ErrorF("src surf: 0x%08x\n", state_base_offset + src_surf_offset); | ||
1145 | #endif | ||
1146 | |||
1147 | /* We'll be poking the state buffers that could be in use by the 3d | ||
1148 | * hardware here, but we should have synced the 3D engine already in | ||
1149 | * I830PutImage. | ||
1150 | */ | ||
1151 | |||
1152 | surface_state_binding_table_bo = | ||
1153 | drm_intel_bo_alloc(intel->bufmgr, | ||
1154 | "surface state & binding table", | ||
1155 | (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), | ||
1156 | 4096); | ||
1157 | |||
1158 | if (!surface_state_binding_table_bo) | ||
1159 | return; | ||
1160 | |||
1161 | i965_create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); | ||
1162 | |||
1163 | for (src_surf = 0; src_surf < n_src_surf; src_surf++) { | ||
1164 | i965_create_src_surface_state(scrn, | ||
1165 | adaptor_priv->buf, | ||
1166 | src_surf_base[src_surf], | ||
1167 | src_width[src_surf], | ||
1168 | src_height[src_surf], | ||
1169 | src_pitch[src_surf], | ||
1170 | src_surf_format, | ||
1171 | surface_state_binding_table_bo, | ||
1172 | (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); | ||
1173 | } | ||
1174 | |||
1175 | i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); | ||
1176 | |||
1177 | if (intel->video.gen4_sampler_bo == NULL) | ||
1178 | intel->video.gen4_sampler_bo = i965_create_sampler_state(scrn); | ||
1179 | if (intel->video.gen4_sip_kernel_bo == NULL) { | ||
1180 | intel->video.gen4_sip_kernel_bo = | ||
1181 | i965_create_program(scrn, &sip_kernel_static[0][0], | ||
1182 | sizeof(sip_kernel_static)); | ||
1183 | if (!intel->video.gen4_sip_kernel_bo) { | ||
1184 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1185 | return; | ||
1186 | } | ||
1187 | } | ||
1188 | |||
1189 | if (intel->video.gen4_vs_bo == NULL) { | ||
1190 | intel->video.gen4_vs_bo = i965_create_vs_state(scrn); | ||
1191 | if (!intel->video.gen4_vs_bo) { | ||
1192 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1193 | return; | ||
1194 | } | ||
1195 | } | ||
1196 | if (intel->video.gen4_sf_bo == NULL) { | ||
1197 | intel->video.gen4_sf_bo = i965_create_sf_state(scrn); | ||
1198 | if (!intel->video.gen4_sf_bo) { | ||
1199 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1200 | return; | ||
1201 | } | ||
1202 | } | ||
1203 | if (intel->video.gen4_wm_packed_bo == NULL) { | ||
1204 | intel->video.gen4_wm_packed_bo = | ||
1205 | i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, | ||
1206 | TRUE); | ||
1207 | if (!intel->video.gen4_wm_packed_bo) { | ||
1208 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1209 | return; | ||
1210 | } | ||
1211 | } | ||
1212 | |||
1213 | if (intel->video.gen4_wm_planar_bo == NULL) { | ||
1214 | intel->video.gen4_wm_planar_bo = | ||
1215 | i965_create_wm_state(scrn, intel->video.gen4_sampler_bo, | ||
1216 | FALSE); | ||
1217 | if (!intel->video.gen4_wm_planar_bo) { | ||
1218 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1219 | return; | ||
1220 | } | ||
1221 | } | ||
1222 | |||
1223 | if (intel->video.gen4_cc_bo == NULL) { | ||
1224 | intel->video.gen4_cc_bo = i965_create_cc_state(scrn); | ||
1225 | if (!intel->video.gen4_cc_bo) { | ||
1226 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1227 | return; | ||
1228 | } | ||
1229 | } | ||
1230 | |||
1231 | /* Set up the offset for translating from the given region (in screen | ||
1232 | * coordinates) to the backing pixmap. | ||
1233 | */ | ||
1234 | #ifdef COMPOSITE | ||
1235 | pix_xoff = -pixmap->screen_x + pixmap->drawable.x; | ||
1236 | pix_yoff = -pixmap->screen_y + pixmap->drawable.y; | ||
1237 | #else | ||
1238 | pix_xoff = 0; | ||
1239 | pix_yoff = 0; | ||
1240 | #endif | ||
1241 | |||
1242 | dxo = dstRegion->extents.x1; | ||
1243 | dyo = dstRegion->extents.y1; | ||
1244 | |||
1245 | /* Use normalized texture coordinates */ | ||
1246 | src_scale_x = ((float)src_w / width) / (float)drw_w; | ||
1247 | src_scale_y = ((float)src_h / height) / (float)drw_h; | ||
1248 | |||
1249 | pbox = REGION_RECTS(dstRegion); | ||
1250 | nbox = REGION_NUM_RECTS(dstRegion); | ||
1251 | while (nbox--) { | ||
1252 | int box_x1 = pbox->x1; | ||
1253 | int box_y1 = pbox->y1; | ||
1254 | int box_x2 = pbox->x2; | ||
1255 | int box_y2 = pbox->y2; | ||
1256 | int i; | ||
1257 | float vb[12]; | ||
1258 | drm_intel_bo *bo_table[] = { | ||
1259 | NULL, /* vb_bo */ | ||
1260 | intel->batch_bo, | ||
1261 | surface_state_binding_table_bo, | ||
1262 | intel->video.gen4_sampler_bo, | ||
1263 | intel->video.gen4_sip_kernel_bo, | ||
1264 | intel->video.gen4_vs_bo, | ||
1265 | intel->video.gen4_sf_bo, | ||
1266 | intel->video.gen4_wm_packed_bo, | ||
1267 | intel->video.gen4_wm_planar_bo, | ||
1268 | intel->video.gen4_cc_bo, | ||
1269 | }; | ||
1270 | |||
1271 | pbox++; | ||
1272 | |||
1273 | i = 0; | ||
1274 | vb[i++] = (box_x2 - dxo) * src_scale_x; | ||
1275 | vb[i++] = (box_y2 - dyo) * src_scale_y; | ||
1276 | vb[i++] = (float)box_x2 + pix_xoff; | ||
1277 | vb[i++] = (float)box_y2 + pix_yoff; | ||
1278 | |||
1279 | vb[i++] = (box_x1 - dxo) * src_scale_x; | ||
1280 | vb[i++] = (box_y2 - dyo) * src_scale_y; | ||
1281 | vb[i++] = (float)box_x1 + pix_xoff; | ||
1282 | vb[i++] = (float)box_y2 + pix_yoff; | ||
1283 | |||
1284 | vb[i++] = (box_x1 - dxo) * src_scale_x; | ||
1285 | vb[i++] = (box_y1 - dyo) * src_scale_y; | ||
1286 | vb[i++] = (float)box_x1 + pix_xoff; | ||
1287 | vb[i++] = (float)box_y1 + pix_yoff; | ||
1288 | |||
1289 | bo_table[0] = intel_bo_alloc_for_data(intel, | ||
1290 | vb, sizeof(vb), | ||
1291 | "textured video vbo"); | ||
1292 | |||
1293 | if (IS_GEN4(intel)) | ||
1294 | i965_pre_draw_debug(scrn); | ||
1295 | |||
1296 | /* If this command won't fit in the current batch, flush. | ||
1297 | * Assume that it does after being flushed. | ||
1298 | */ | ||
1299 | if (drm_intel_bufmgr_check_aperture_space(bo_table, | ||
1300 | ARRAY_SIZE(bo_table)) | ||
1301 | < 0) { | ||
1302 | intel_batch_submit(scrn); | ||
1303 | } | ||
1304 | |||
1305 | intel_batch_start_atomic(scrn, 150); | ||
1306 | |||
1307 | i965_emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap); | ||
1308 | |||
1309 | /* Set up the pointer to our vertex buffer */ | ||
1310 | OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3); | ||
1311 | /* four 32-bit floats per vertex */ | ||
1312 | OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | | ||
1313 | VB0_VERTEXDATA | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); | ||
1314 | OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, 0); | ||
1315 | if (IS_GEN5(intel)) | ||
1316 | OUT_RELOC(bo_table[0], I915_GEM_DOMAIN_VERTEX, 0, | ||
1317 | i * 4); | ||
1318 | else | ||
1319 | OUT_BATCH(3); /* four corners to our rectangle */ | ||
1320 | OUT_BATCH(0); /* reserved */ | ||
1321 | |||
1322 | OUT_BATCH(BRW_3DPRIMITIVE | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | (0 << 9) | /* CTG - indirect vertex count */ | ||
1323 | 4); | ||
1324 | OUT_BATCH(3); /* vertex count per instance */ | ||
1325 | OUT_BATCH(0); /* start vertex offset */ | ||
1326 | OUT_BATCH(1); /* single instance */ | ||
1327 | OUT_BATCH(0); /* start instance location */ | ||
1328 | OUT_BATCH(0); /* index buffer offset, ignored */ | ||
1329 | OUT_BATCH(MI_NOOP); | ||
1330 | |||
1331 | intel_batch_end_atomic(scrn); | ||
1332 | |||
1333 | drm_intel_bo_unreference(bo_table[0]); | ||
1334 | |||
1335 | if (IS_GEN4(intel)) | ||
1336 | i965_post_draw_debug(scrn); | ||
1337 | |||
1338 | } | ||
1339 | |||
1340 | /* release reference once we're finished */ | ||
1341 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1342 | |||
1343 | intel_debug_flush(scrn); | ||
1344 | } | ||
1345 | |||
1346 | void i965_free_video(ScrnInfoPtr scrn) | ||
1347 | { | ||
1348 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1349 | |||
1350 | drm_intel_bo_unreference(intel->video.gen4_vs_bo); | ||
1351 | intel->video.gen4_vs_bo = NULL; | ||
1352 | drm_intel_bo_unreference(intel->video.gen4_sf_bo); | ||
1353 | intel->video.gen4_sf_bo = NULL; | ||
1354 | drm_intel_bo_unreference(intel->video.gen4_cc_bo); | ||
1355 | intel->video.gen4_cc_bo = NULL; | ||
1356 | drm_intel_bo_unreference(intel->video.gen4_wm_packed_bo); | ||
1357 | intel->video.gen4_wm_packed_bo = NULL; | ||
1358 | drm_intel_bo_unreference(intel->video.gen4_wm_planar_bo); | ||
1359 | intel->video.gen4_wm_planar_bo = NULL; | ||
1360 | drm_intel_bo_unreference(intel->video.gen4_cc_vp_bo); | ||
1361 | intel->video.gen4_cc_vp_bo = NULL; | ||
1362 | drm_intel_bo_unreference(intel->video.gen4_sampler_bo); | ||
1363 | intel->video.gen4_sampler_bo = NULL; | ||
1364 | drm_intel_bo_unreference(intel->video.gen4_sip_kernel_bo); | ||
1365 | intel->video.gen4_sip_kernel_bo = NULL; | ||
1366 | drm_intel_bo_unreference(intel->video.wm_prog_packed_bo); | ||
1367 | intel->video.wm_prog_packed_bo = NULL; | ||
1368 | drm_intel_bo_unreference(intel->video.wm_prog_planar_bo); | ||
1369 | intel->video.wm_prog_planar_bo = NULL; | ||
1370 | drm_intel_bo_unreference(intel->video.gen6_blend_bo); | ||
1371 | intel->video.gen6_blend_bo = NULL; | ||
1372 | drm_intel_bo_unreference(intel->video.gen6_depth_stencil_bo); | ||
1373 | intel->video.gen6_depth_stencil_bo = NULL; | ||
1374 | } | ||
1375 | |||
1376 | /* for GEN6+ */ | ||
1377 | static drm_intel_bo * | ||
1378 | gen6_create_cc_state(ScrnInfoPtr scrn) | ||
1379 | { | ||
1380 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1381 | struct gen6_color_calc_state cc_state; | ||
1382 | |||
1383 | memset(&cc_state, 0, sizeof(cc_state)); | ||
1384 | cc_state.constant_r = 1.0; | ||
1385 | cc_state.constant_g = 0.0; | ||
1386 | cc_state.constant_b = 1.0; | ||
1387 | cc_state.constant_a = 1.0; | ||
1388 | |||
1389 | return intel_bo_alloc_for_data(intel, | ||
1390 | &cc_state, sizeof(cc_state), | ||
1391 | "textured video cc state"); | ||
1392 | } | ||
1393 | |||
1394 | static drm_intel_bo * | ||
1395 | gen6_create_blend_state(ScrnInfoPtr scrn) | ||
1396 | { | ||
1397 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1398 | struct gen6_blend_state blend_state; | ||
1399 | |||
1400 | memset(&blend_state, 0, sizeof(blend_state)); | ||
1401 | blend_state.blend1.logic_op_enable = 1; | ||
1402 | blend_state.blend1.logic_op_func = 0xc; | ||
1403 | blend_state.blend1.pre_blend_clamp_enable = 1; | ||
1404 | |||
1405 | return intel_bo_alloc_for_data(intel, | ||
1406 | &blend_state, sizeof(blend_state), | ||
1407 | "textured video blend state"); | ||
1408 | } | ||
1409 | |||
1410 | static drm_intel_bo * | ||
1411 | gen6_create_depth_stencil_state(ScrnInfoPtr scrn) | ||
1412 | { | ||
1413 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1414 | struct gen6_depth_stencil_state depth_stencil_state; | ||
1415 | |||
1416 | memset(&depth_stencil_state, 0, sizeof(depth_stencil_state)); | ||
1417 | return intel_bo_alloc_for_data(intel, | ||
1418 | &depth_stencil_state, | ||
1419 | sizeof(depth_stencil_state), | ||
1420 | "textured video blend state"); | ||
1421 | } | ||
1422 | |||
1423 | static Bool | ||
1424 | gen6_create_vidoe_objects(ScrnInfoPtr scrn) | ||
1425 | { | ||
1426 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1427 | drm_intel_bo *(*create_sampler_state)(ScrnInfoPtr); | ||
1428 | const uint32_t *packed_ps_kernel, *planar_ps_kernel; | ||
1429 | unsigned int packed_ps_size, planar_ps_size; | ||
1430 | |||
1431 | if (INTEL_INFO(intel)->gen >= 070) { | ||
1432 | create_sampler_state = gen7_create_sampler_state; | ||
1433 | packed_ps_kernel = &ps_kernel_packed_static_gen7[0][0]; | ||
1434 | packed_ps_size = sizeof(ps_kernel_packed_static_gen7); | ||
1435 | planar_ps_kernel = &ps_kernel_planar_static_gen7[0][0]; | ||
1436 | planar_ps_size = sizeof(ps_kernel_planar_static_gen7); | ||
1437 | } else { | ||
1438 | create_sampler_state = i965_create_sampler_state; | ||
1439 | packed_ps_kernel = &ps_kernel_packed_static_gen6[0][0]; | ||
1440 | packed_ps_size = sizeof(ps_kernel_packed_static_gen6); | ||
1441 | planar_ps_kernel = &ps_kernel_planar_static_gen6[0][0]; | ||
1442 | planar_ps_size = sizeof(ps_kernel_planar_static_gen6); | ||
1443 | } | ||
1444 | |||
1445 | if (intel->video.gen4_sampler_bo == NULL) | ||
1446 | intel->video.gen4_sampler_bo = create_sampler_state(scrn); | ||
1447 | |||
1448 | if (intel->video.wm_prog_packed_bo == NULL) | ||
1449 | intel->video.wm_prog_packed_bo = | ||
1450 | i965_create_program(scrn, | ||
1451 | packed_ps_kernel, | ||
1452 | packed_ps_size); | ||
1453 | |||
1454 | if (intel->video.wm_prog_planar_bo == NULL) | ||
1455 | intel->video.wm_prog_planar_bo = | ||
1456 | i965_create_program(scrn, | ||
1457 | planar_ps_kernel, | ||
1458 | planar_ps_size); | ||
1459 | |||
1460 | if (intel->video.gen4_cc_vp_bo == NULL) | ||
1461 | intel->video.gen4_cc_vp_bo = i965_create_cc_vp_state(scrn); | ||
1462 | |||
1463 | if (intel->video.gen4_cc_bo == NULL) | ||
1464 | intel->video.gen4_cc_bo = gen6_create_cc_state(scrn); | ||
1465 | |||
1466 | if (intel->video.gen6_blend_bo == NULL) | ||
1467 | intel->video.gen6_blend_bo = gen6_create_blend_state(scrn); | ||
1468 | |||
1469 | if (intel->video.gen6_depth_stencil_bo == NULL) | ||
1470 | intel->video.gen6_depth_stencil_bo = gen6_create_depth_stencil_state(scrn); | ||
1471 | |||
1472 | |||
1473 | return (intel->video.gen4_sampler_bo != NULL && | ||
1474 | intel->video.wm_prog_packed_bo != NULL && | ||
1475 | intel->video.wm_prog_planar_bo != NULL && | ||
1476 | intel->video.gen4_cc_vp_bo != NULL && | ||
1477 | intel->video.gen4_cc_bo != NULL && | ||
1478 | intel->video.gen6_blend_bo != NULL && | ||
1479 | intel->video.gen6_depth_stencil_bo != NULL); | ||
1480 | } | ||
1481 | |||
1482 | static void | ||
1483 | gen6_upload_state_base_address(ScrnInfoPtr scrn, drm_intel_bo *surface_state_binding_table_bo) | ||
1484 | { | ||
1485 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1486 | |||
1487 | OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2)); | ||
1488 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */ | ||
1489 | OUT_RELOC(surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */ | ||
1490 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state base address */ | ||
1491 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object base address */ | ||
1492 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction base address */ | ||
1493 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state upper bound */ | ||
1494 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */ | ||
1495 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */ | ||
1496 | OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */ | ||
1497 | } | ||
1498 | |||
1499 | static void | ||
1500 | gen6_upload_drawing_rectangle(ScrnInfoPtr scrn, PixmapPtr pixmap) | ||
1501 | { | ||
1502 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1503 | |||
1504 | OUT_BATCH(BRW_3DSTATE_DRAWING_RECTANGLE | 2); | ||
1505 | OUT_BATCH(0x00000000); /* ymin, xmin */ | ||
1506 | OUT_BATCH((pixmap->drawable.width - 1) | (pixmap->drawable.height - 1) << 16); /* ymax, xmax */ | ||
1507 | OUT_BATCH(0x00000000); /* yorigin, xorigin */ | ||
1508 | } | ||
1509 | |||
1510 | static void | ||
1511 | gen6_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) | ||
1512 | { | ||
1513 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1514 | |||
1515 | /* disable WM constant buffer */ | ||
1516 | OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); | ||
1517 | OUT_BATCH(0); | ||
1518 | OUT_BATCH(0); | ||
1519 | OUT_BATCH(0); | ||
1520 | OUT_BATCH(0); | ||
1521 | |||
1522 | OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); | ||
1523 | if (is_packed) { | ||
1524 | OUT_RELOC(intel->video.wm_prog_packed_bo, | ||
1525 | I915_GEM_DOMAIN_INSTRUCTION, 0, | ||
1526 | 0); | ||
1527 | OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | | ||
1528 | (2 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); | ||
1529 | } else { | ||
1530 | OUT_RELOC(intel->video.wm_prog_planar_bo, | ||
1531 | I915_GEM_DOMAIN_INSTRUCTION, 0, | ||
1532 | 0); | ||
1533 | OUT_BATCH((1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) | | ||
1534 | (7 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT)); | ||
1535 | } | ||
1536 | OUT_BATCH(0); | ||
1537 | OUT_BATCH((6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */ | ||
1538 | OUT_BATCH(((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) | | ||
1539 | GEN6_3DSTATE_WM_DISPATCH_ENABLE | | ||
1540 | GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); | ||
1541 | OUT_BATCH((1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) | | ||
1542 | GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); | ||
1543 | OUT_BATCH(0); | ||
1544 | OUT_BATCH(0); | ||
1545 | } | ||
1546 | |||
1547 | static void | ||
1548 | gen6_upload_vertex_element_state(ScrnInfoPtr scrn) | ||
1549 | { | ||
1550 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1551 | |||
1552 | /* Set up our vertex elements, sourced from the single vertex buffer. */ | ||
1553 | OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | (5 - 2)); | ||
1554 | /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */ | ||
1555 | OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | | ||
1556 | GEN6_VE0_VALID | | ||
1557 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | | ||
1558 | (0 << VE0_OFFSET_SHIFT)); | ||
1559 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | | ||
1560 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | | ||
1561 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | | ||
1562 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); | ||
1563 | /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */ | ||
1564 | OUT_BATCH((0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) | | ||
1565 | GEN6_VE0_VALID | | ||
1566 | (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) | | ||
1567 | (8 << VE0_OFFSET_SHIFT)); | ||
1568 | OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | | ||
1569 | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | | ||
1570 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | | ||
1571 | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); | ||
1572 | } | ||
1573 | |||
1574 | static void | ||
1575 | gen6_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) | ||
1576 | { | ||
1577 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1578 | |||
1579 | /* Set up the pointer to our vertex buffer */ | ||
1580 | OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); | ||
1581 | /* four 32-bit floats per vertex */ | ||
1582 | OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | | ||
1583 | GEN6_VB0_VERTEXDATA | | ||
1584 | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); | ||
1585 | OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); | ||
1586 | OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); | ||
1587 | OUT_BATCH(0); /* reserved */ | ||
1588 | } | ||
1589 | |||
1590 | static void | ||
1591 | gen6_upload_primitive(ScrnInfoPtr scrn) | ||
1592 | { | ||
1593 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1594 | |||
1595 | OUT_BATCH(BRW_3DPRIMITIVE | | ||
1596 | BRW_3DPRIMITIVE_VERTEX_SEQUENTIAL | | ||
1597 | (_3DPRIM_RECTLIST << BRW_3DPRIMITIVE_TOPOLOGY_SHIFT) | | ||
1598 | (0 << 9) | /* Internal Vertex Count */ | ||
1599 | (6 - 2)); | ||
1600 | OUT_BATCH(3); /* vertex count per instance */ | ||
1601 | OUT_BATCH(0); /* start vertex offset */ | ||
1602 | OUT_BATCH(1); /* single instance */ | ||
1603 | OUT_BATCH(0); /* start instance location */ | ||
1604 | OUT_BATCH(0); /* index buffer offset, ignored */ | ||
1605 | } | ||
1606 | |||
1607 | static void | ||
1608 | gen6_emit_video_setup(ScrnInfoPtr scrn, | ||
1609 | drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, | ||
1610 | PixmapPtr pixmap, | ||
1611 | drm_intel_bo *vertex_bo, uint32_t end_address_offset) | ||
1612 | { | ||
1613 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1614 | |||
1615 | assert(n_src_surf == 1 || n_src_surf == 6); | ||
1616 | IntelEmitInvarientState(scrn); | ||
1617 | intel->last_3d = LAST_3D_VIDEO; | ||
1618 | intel->needs_3d_invariant = TRUE; | ||
1619 | |||
1620 | gen6_upload_invariant_states(intel); | ||
1621 | gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); | ||
1622 | gen6_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); | ||
1623 | gen6_upload_urb(intel); | ||
1624 | gen6_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); | ||
1625 | gen6_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); | ||
1626 | gen6_upload_vs_state(intel); | ||
1627 | gen6_upload_gs_state(intel); | ||
1628 | gen6_upload_clip_state(intel); | ||
1629 | gen6_upload_sf_state(intel, 1, 0); | ||
1630 | gen6_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); | ||
1631 | gen6_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); | ||
1632 | gen6_upload_depth_buffer_state(intel); | ||
1633 | gen6_upload_drawing_rectangle(scrn, pixmap); | ||
1634 | gen6_upload_vertex_element_state(scrn); | ||
1635 | gen6_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); | ||
1636 | gen6_upload_primitive(scrn); | ||
1637 | } | ||
1638 | |||
1639 | static void | ||
1640 | gen7_upload_wm_state(ScrnInfoPtr scrn, Bool is_packed) | ||
1641 | { | ||
1642 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1643 | unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB; | ||
1644 | unsigned int num_samples = 0; | ||
1645 | |||
1646 | if (IS_HSW(intel)) { | ||
1647 | max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW; | ||
1648 | num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW; | ||
1649 | } | ||
1650 | |||
1651 | /* disable WM constant buffer */ | ||
1652 | OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (7 - 2)); | ||
1653 | OUT_BATCH(0); | ||
1654 | OUT_BATCH(0); | ||
1655 | OUT_BATCH(0); | ||
1656 | OUT_BATCH(0); | ||
1657 | OUT_BATCH(0); | ||
1658 | OUT_BATCH(0); | ||
1659 | |||
1660 | OUT_BATCH(GEN6_3DSTATE_WM | (3 - 2)); | ||
1661 | OUT_BATCH(GEN7_WM_DISPATCH_ENABLE | | ||
1662 | GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); | ||
1663 | OUT_BATCH(0); | ||
1664 | |||
1665 | OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2)); | ||
1666 | |||
1667 | if (is_packed) { | ||
1668 | OUT_RELOC(intel->video.wm_prog_packed_bo, | ||
1669 | I915_GEM_DOMAIN_INSTRUCTION, 0, | ||
1670 | 0); | ||
1671 | OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | | ||
1672 | (2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); | ||
1673 | } else { | ||
1674 | OUT_RELOC(intel->video.wm_prog_planar_bo, | ||
1675 | I915_GEM_DOMAIN_INSTRUCTION, 0, | ||
1676 | 0); | ||
1677 | OUT_BATCH((1 << GEN7_PS_SAMPLER_COUNT_SHIFT) | | ||
1678 | (7 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); | ||
1679 | } | ||
1680 | |||
1681 | OUT_BATCH(0); /* scratch space base offset */ | ||
1682 | OUT_BATCH( | ||
1683 | ((48 - 1) << max_threads_shift) | num_samples | | ||
1684 | GEN7_PS_ATTRIBUTE_ENABLE | | ||
1685 | GEN7_PS_16_DISPATCH_ENABLE); | ||
1686 | OUT_BATCH( | ||
1687 | (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0)); | ||
1688 | OUT_BATCH(0); /* kernel 1 pointer */ | ||
1689 | OUT_BATCH(0); /* kernel 2 pointer */ | ||
1690 | } | ||
1691 | |||
1692 | static void | ||
1693 | gen7_upload_vertex_buffer(ScrnInfoPtr scrn, drm_intel_bo *vertex_bo, uint32_t end_address_offset) | ||
1694 | { | ||
1695 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1696 | |||
1697 | /* Set up the pointer to our vertex buffer */ | ||
1698 | OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | (5 - 2)); | ||
1699 | /* four 32-bit floats per vertex */ | ||
1700 | OUT_BATCH((0 << GEN6_VB0_BUFFER_INDEX_SHIFT) | | ||
1701 | GEN6_VB0_VERTEXDATA | | ||
1702 | GEN7_VB0_ADDRESS_MODIFYENABLE | | ||
1703 | ((4 * 4) << VB0_BUFFER_PITCH_SHIFT)); | ||
1704 | OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); | ||
1705 | OUT_RELOC(vertex_bo, I915_GEM_DOMAIN_VERTEX, 0, end_address_offset); | ||
1706 | OUT_BATCH(0); /* reserved */ | ||
1707 | } | ||
1708 | |||
1709 | static void | ||
1710 | gen7_upload_primitive(ScrnInfoPtr scrn) | ||
1711 | { | ||
1712 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1713 | |||
1714 | OUT_BATCH(BRW_3DPRIMITIVE | (7 - 2)); | ||
1715 | OUT_BATCH(_3DPRIM_RECTLIST | | ||
1716 | GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); | ||
1717 | OUT_BATCH(3); /* vertex count per instance */ | ||
1718 | OUT_BATCH(0); /* start vertex offset */ | ||
1719 | OUT_BATCH(1); /* single instance */ | ||
1720 | OUT_BATCH(0); /* start instance location */ | ||
1721 | OUT_BATCH(0); | ||
1722 | } | ||
1723 | |||
1724 | static void | ||
1725 | gen7_emit_video_setup(ScrnInfoPtr scrn, | ||
1726 | drm_intel_bo *surface_state_binding_table_bo, int n_src_surf, | ||
1727 | PixmapPtr pixmap, | ||
1728 | drm_intel_bo *vertex_bo, uint32_t end_address_offset) | ||
1729 | { | ||
1730 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1731 | |||
1732 | assert(n_src_surf == 1 || n_src_surf == 6); | ||
1733 | IntelEmitInvarientState(scrn); | ||
1734 | intel->last_3d = LAST_3D_VIDEO; | ||
1735 | intel->needs_3d_invariant = TRUE; | ||
1736 | |||
1737 | gen6_upload_invariant_states(intel); | ||
1738 | gen6_upload_state_base_address(scrn, surface_state_binding_table_bo); | ||
1739 | gen7_upload_viewport_state_pointers(intel, intel->video.gen4_cc_vp_bo); | ||
1740 | gen7_upload_urb(intel); | ||
1741 | gen7_upload_cc_state_pointers(intel, intel->video.gen6_blend_bo, intel->video.gen4_cc_bo, intel->video.gen6_depth_stencil_bo, 0); | ||
1742 | gen7_upload_sampler_state_pointers(intel, intel->video.gen4_sampler_bo); | ||
1743 | gen7_upload_bypass_states(intel); | ||
1744 | gen6_upload_vs_state(intel); | ||
1745 | gen6_upload_clip_state(intel); | ||
1746 | gen7_upload_sf_state(intel, 1, 0); | ||
1747 | gen7_upload_wm_state(scrn, n_src_surf == 1 ? TRUE : FALSE); | ||
1748 | gen7_upload_binding_table(intel, (n_src_surf + 1) * SURFACE_STATE_PADDED_SIZE); | ||
1749 | gen7_upload_depth_buffer_state(intel); | ||
1750 | gen6_upload_drawing_rectangle(scrn, pixmap); | ||
1751 | gen6_upload_vertex_element_state(scrn); | ||
1752 | gen7_upload_vertex_buffer(scrn, vertex_bo, end_address_offset); | ||
1753 | gen7_upload_primitive(scrn); | ||
1754 | } | ||
1755 | |||
1756 | void Gen6DisplayVideoTextured(ScrnInfoPtr scrn, | ||
1757 | intel_adaptor_private *adaptor_priv, int id, | ||
1758 | RegionPtr dstRegion, | ||
1759 | short width, short height, | ||
1760 | int video_pitch, int video_pitch2, | ||
1761 | short src_w, short src_h, | ||
1762 | short drw_w, short drw_h, PixmapPtr pixmap) | ||
1763 | { | ||
1764 | intel_screen_private *intel = intel_get_screen_private(scrn); | ||
1765 | BoxPtr pbox; | ||
1766 | int nbox, dxo, dyo, pix_xoff, pix_yoff; | ||
1767 | float src_scale_x, src_scale_y; | ||
1768 | int src_surf; | ||
1769 | int n_src_surf; | ||
1770 | uint32_t src_surf_format; | ||
1771 | uint32_t src_surf_base[6]; | ||
1772 | int src_width[6]; | ||
1773 | int src_height[6]; | ||
1774 | int src_pitch[6]; | ||
1775 | drm_intel_bo *surface_state_binding_table_bo; | ||
1776 | void (*create_dst_surface_state)(ScrnInfoPtr, | ||
1777 | PixmapPtr, | ||
1778 | drm_intel_bo *, | ||
1779 | uint32_t); | ||
1780 | void (*create_src_surface_state)(ScrnInfoPtr, | ||
1781 | drm_intel_bo *, | ||
1782 | uint32_t, int, | ||
1783 | int, int, uint32_t, | ||
1784 | drm_intel_bo *, uint32_t); | ||
1785 | void (*emit_video_setup)(ScrnInfoPtr, | ||
1786 | drm_intel_bo *, int, | ||
1787 | PixmapPtr, | ||
1788 | drm_intel_bo *, uint32_t); | ||
1789 | |||
1790 | if (INTEL_INFO(intel)->gen >= 070) { | ||
1791 | create_dst_surface_state = gen7_create_dst_surface_state; | ||
1792 | create_src_surface_state = gen7_create_src_surface_state; | ||
1793 | emit_video_setup = gen7_emit_video_setup; | ||
1794 | } else { | ||
1795 | create_dst_surface_state = i965_create_dst_surface_state; | ||
1796 | create_src_surface_state = i965_create_src_surface_state; | ||
1797 | emit_video_setup = gen6_emit_video_setup; | ||
1798 | } | ||
1799 | |||
1800 | src_surf_base[0] = adaptor_priv->YBufOffset; | ||
1801 | src_surf_base[1] = adaptor_priv->YBufOffset; | ||
1802 | src_surf_base[2] = adaptor_priv->VBufOffset; | ||
1803 | src_surf_base[3] = adaptor_priv->VBufOffset; | ||
1804 | src_surf_base[4] = adaptor_priv->UBufOffset; | ||
1805 | src_surf_base[5] = adaptor_priv->UBufOffset; | ||
1806 | |||
1807 | if (is_planar_fourcc(id)) { | ||
1808 | src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; | ||
1809 | src_width[1] = src_width[0] = width; | ||
1810 | src_height[1] = src_height[0] = height; | ||
1811 | src_pitch[1] = src_pitch[0] = video_pitch2; | ||
1812 | src_width[4] = src_width[5] = src_width[2] = src_width[3] = | ||
1813 | width / 2; | ||
1814 | src_height[4] = src_height[5] = src_height[2] = src_height[3] = | ||
1815 | height / 2; | ||
1816 | src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = | ||
1817 | video_pitch; | ||
1818 | n_src_surf = 6; | ||
1819 | } else { | ||
1820 | if (id == FOURCC_UYVY) | ||
1821 | src_surf_format = BRW_SURFACEFORMAT_YCRCB_SWAPY; | ||
1822 | else | ||
1823 | src_surf_format = BRW_SURFACEFORMAT_YCRCB_NORMAL; | ||
1824 | |||
1825 | src_width[0] = width; | ||
1826 | src_height[0] = height; | ||
1827 | src_pitch[0] = video_pitch; | ||
1828 | n_src_surf = 1; | ||
1829 | } | ||
1830 | |||
1831 | surface_state_binding_table_bo = | ||
1832 | drm_intel_bo_alloc(intel->bufmgr, | ||
1833 | "surface state & binding table", | ||
1834 | (n_src_surf + 1) * (SURFACE_STATE_PADDED_SIZE + sizeof(uint32_t)), | ||
1835 | 4096); | ||
1836 | |||
1837 | if (!surface_state_binding_table_bo) | ||
1838 | return; | ||
1839 | |||
1840 | create_dst_surface_state(scrn, pixmap, surface_state_binding_table_bo, 0); | ||
1841 | |||
1842 | for (src_surf = 0; src_surf < n_src_surf; src_surf++) { | ||
1843 | create_src_surface_state(scrn, | ||
1844 | adaptor_priv->buf, | ||
1845 | src_surf_base[src_surf], | ||
1846 | src_width[src_surf], | ||
1847 | src_height[src_surf], | ||
1848 | src_pitch[src_surf], | ||
1849 | src_surf_format, | ||
1850 | surface_state_binding_table_bo, | ||
1851 | (src_surf + 1) * SURFACE_STATE_PADDED_SIZE); | ||
1852 | } | ||
1853 | |||
1854 | i965_create_binding_table(scrn, surface_state_binding_table_bo, n_src_surf + 1); | ||
1855 | |||
1856 | if (!gen6_create_vidoe_objects(scrn)) { | ||
1857 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1858 | return; | ||
1859 | } | ||
1860 | |||
1861 | /* Set up the offset for translating from the given region (in screen | ||
1862 | * coordinates) to the backing pixmap. | ||
1863 | */ | ||
1864 | #ifdef COMPOSITE | ||
1865 | pix_xoff = -pixmap->screen_x + pixmap->drawable.x; | ||
1866 | pix_yoff = -pixmap->screen_y + pixmap->drawable.y; | ||
1867 | #else | ||
1868 | pix_xoff = 0; | ||
1869 | pix_yoff = 0; | ||
1870 | #endif | ||
1871 | |||
1872 | dxo = dstRegion->extents.x1; | ||
1873 | dyo = dstRegion->extents.y1; | ||
1874 | |||
1875 | /* Use normalized texture coordinates */ | ||
1876 | src_scale_x = ((float)src_w / width) / (float)drw_w; | ||
1877 | src_scale_y = ((float)src_h / height) / (float)drw_h; | ||
1878 | |||
1879 | pbox = REGION_RECTS(dstRegion); | ||
1880 | nbox = REGION_NUM_RECTS(dstRegion); | ||
1881 | while (nbox--) { | ||
1882 | int box_x1 = pbox->x1; | ||
1883 | int box_y1 = pbox->y1; | ||
1884 | int box_x2 = pbox->x2; | ||
1885 | int box_y2 = pbox->y2; | ||
1886 | int i; | ||
1887 | float vb[12]; | ||
1888 | drm_intel_bo *bo_table[] = { | ||
1889 | NULL, /* vb_bo */ | ||
1890 | intel->batch_bo, | ||
1891 | surface_state_binding_table_bo, | ||
1892 | intel->video.gen4_sampler_bo, | ||
1893 | intel->video.wm_prog_packed_bo, | ||
1894 | intel->video.wm_prog_planar_bo, | ||
1895 | intel->video.gen4_cc_vp_bo, | ||
1896 | intel->video.gen4_cc_bo, | ||
1897 | intel->video.gen6_blend_bo, | ||
1898 | intel->video.gen6_depth_stencil_bo, | ||
1899 | }; | ||
1900 | |||
1901 | pbox++; | ||
1902 | |||
1903 | i = 0; | ||
1904 | vb[i++] = (box_x2 - dxo) * src_scale_x; | ||
1905 | vb[i++] = (box_y2 - dyo) * src_scale_y; | ||
1906 | vb[i++] = (float)box_x2 + pix_xoff; | ||
1907 | vb[i++] = (float)box_y2 + pix_yoff; | ||
1908 | |||
1909 | vb[i++] = (box_x1 - dxo) * src_scale_x; | ||
1910 | vb[i++] = (box_y2 - dyo) * src_scale_y; | ||
1911 | vb[i++] = (float)box_x1 + pix_xoff; | ||
1912 | vb[i++] = (float)box_y2 + pix_yoff; | ||
1913 | |||
1914 | vb[i++] = (box_x1 - dxo) * src_scale_x; | ||
1915 | vb[i++] = (box_y1 - dyo) * src_scale_y; | ||
1916 | vb[i++] = (float)box_x1 + pix_xoff; | ||
1917 | vb[i++] = (float)box_y1 + pix_yoff; | ||
1918 | |||
1919 | bo_table[0] = intel_bo_alloc_for_data(intel, | ||
1920 | vb, sizeof(vb), | ||
1921 | "video vbo"); | ||
1922 | |||
1923 | /* If this command won't fit in the current batch, flush. | ||
1924 | * Assume that it does after being flushed. | ||
1925 | */ | ||
1926 | if (drm_intel_bufmgr_check_aperture_space(bo_table, ARRAY_SIZE(bo_table)) < 0) | ||
1927 | intel_batch_submit(scrn); | ||
1928 | |||
1929 | intel_batch_start_atomic(scrn, 200); | ||
1930 | emit_video_setup(scrn, surface_state_binding_table_bo, n_src_surf, pixmap, bo_table[0], i * 4); | ||
1931 | intel_batch_end_atomic(scrn); | ||
1932 | |||
1933 | drm_intel_bo_unreference(bo_table[0]); | ||
1934 | } | ||
1935 | |||
1936 | /* release reference once we're finished */ | ||
1937 | drm_intel_bo_unreference(surface_state_binding_table_bo); | ||
1938 | intel_debug_flush(scrn); | ||
1939 | } | ||