summaryrefslogtreecommitdiff
path: root/src/shaders/h264/mc/inter_Header.inc
blob: 97829c065bf5f08f94421af32c804252102f03ed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
/*
 * Header file for all AVC INTER prediction kernels
 * Copyright © <2010>, Intel Corporation.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * This file was originally licensed under the following license
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
#if !defined(__INTER_HEADER__)	// Make sure this file is only included once
#define __INTER_HEADER__

// Module name: inter_header.inc
//
// Header file for all AVC INTER prediction kernels
//

#define INTER_KERNEL

//-------------------------------------------------------------------------------------------
// TODO: The followings will be merged with the above definitions later
//-------------------------------------------------------------------------------------------


//------------ Input parameters & bit masks

// SW WA for weighted prediction - 2007/09/06	
//.declare	guwR1			Base=r1 ElementSize=2 Type=uw	
//.declare	guwW128			Base=r63.13 ElementSize=2 Type=uw

#ifdef DEV_ILK
// #define SW_W_128		// Enable SW WA for special Weight=128 case. Can be commented to disable it
#else	// Pre DEV_ILK
#define SW_W_128		// Enable SW WA for special Weight=128 case.
#endif	// DEV_ILK

#ifdef	SW_W_128
.declare	gudW128			Base=r1.0 ElementSize=4 Type=ud
#else
#endif	// SW_W_128

#define		gORIX			r3.4				// :ub, X origin
#define		gORIY			r3.5				// :ub, Y origin

#define		gCBP			r3.9				// :ub, CBP (0, 0, Y0, Y1, Y2, Y3, Cb, Cr)
#define		nCBPY_MASK		0x3c
#define		nCBPU_MASK		0x2
#define		nCBPV_MASK		0x1

#define		gFIELDFLAGS		r3.1				// :uw - To compute message descriptor for write

#define		gMBTYPE			r3.1				// :ub, MB type
#define		nMBTYPE_MASK	0x1f
#define		gFIELDMBFLAG	r3.1				// :ub, Field MB flag
#define		nFIELDMB_MASK	0x40
#define		gMBPARITY		r3.3				// :ub, Bottom field flag
#define		nMBPARITY_MASK	0x01

#define		gWPREDFLAG		r3.0				// :ub, Weighted pred flag
#define		nWBIDIR_MASK	0xc0

#define		gSUBMB_SHAPE	r3.12				// :ub, Sub-MB shape
#define		gSUBMB_MODE		r3.13				// :ub, Sub-MB prediction mode
.declare	guwSUBMB_SHAPE_MODE	Base=r3.6 ElementSize=2 Type=uw

#define		gYWDENOM		r3.14				// :ub, Luma log2 weight denom
#define		gCWDENOM		r3.15				// :ub, Chroma log2 weight denom

#define		gADDR			r3.24				// :ub, Register addresses of error data / MV

.declare	gubBIDX			Base=r3.16 ElementSize=1 Type=ub

#define		gWGT			r8					// Weights/offsets
.declare    gdWGT			Base=r8  ElementSize=4 Type=d
.declare    gwWGT			Base=r8  ElementSize=2 Type=w
#define		gMV				r4					// MVs
.declare    gwMV			Base=r4  ElementSize=2 Type=w
.declare    gdMV			Base=r4  ElementSize=4 Type=d

.declare	gwERRORY		Base=r10 ElementSize=2 Type=w		// 16 GRFs
.declare	gubERRORY		Base=r10 ElementSize=1 Type=ub
.declare	gwERRORC		Base=r26 ElementSize=2 Type=w		// 8 GRFs
.declare	gubERRORC		Base=r26 ElementSize=2 Type=ub

//------------ Address registers
#define		pMSGDSC			a0.0				// ud: Must be the leading dword of the address register
#define		pREF			a0.0

#define		pBIDX			a0.2				
#define		pWGT			a0.3
#define		pERRORYC		a0.2				// :ud	
#define		pERRORY			a0.4
#define		pERRORC			a0.5
#define		pMV				a0.6	

#define		pWGT_BIDX		a0.1				// :ud, WGT & BIDX
#define		pRECON_MV		a0.3				// :ud, RECON & MV

#define		pREF0			a0.0				// :uw
#define		pREF0D			a0.0				// :ud
#define		pREF1			a0.1
#define		pREF2			a0.2
#define		pREF2D			a0.1				// :ud
#define		pREF3			a0.3
#define		pREF4			a0.4
#define		pREF4D			a0.2				// :ud
#define		pREF5			a0.5
#define		pREF6			a0.6
#define		pREF6D			a0.3				// :ud
#define		pREF7			a0.7

#define		pRES			a0.6
#define		pRESD			a0.3				// :ud
#define		pRESULT			a0.7

#define		p0				a0.0
#define		p1				a0.1

//------------ Constants for static/inline/indirect
#define		nOFFSET_BIDX	112					// = 32*3+4*4

#define		nOFFSET_WGT		256					// = 32*8
#define		nOFFSET_WGT_BIDX 0x01000070			// = (256<<16)+112
#define		nOFFSET_ERROR	0x03400140			// = (320+128*4)<<16+320=0x03400140
#define		nOFFSET_ERRORY	0x0140
#define		nOFFSET_ERRORC	0x0340
#define		nOFFSET_MV		128					// = 32*4
#define		nOFFSET_RECON_MV 0x04400080			// = (1088<<16)+128		// TODO: OFFSET_RECON is obsolete

//------------ Constants for kernel internal variables
#define		nOFFSET_INTPY0	0x0640				// = 32*50
#define		nOFFSET_INTPY1	0x0780				// = 32*60
#define		nOFFSET_INTPC0	0x06c0				// = 32*54
#define		nOFFSET_INTPC1	0x0480				// = 32*36
#define		nOFFSET_INTP0	0x06c00640
#define		nOFFSET_INTP1	0x04800780

#define		nOFFSET_INTERIM		0x0480				// = 32*36
#define		nOFFSET_INTERIM2	0x04A00480			// = ((32*37)<<16)|(32*36)
#define		nOFFSET_INTERIM3	0x04A00480			// = ((32*36+32)<<16)|(32*36)
#define		nOFFSET_INTERIM4	0x04A00490			// = ((32*37)<<16)|(32*36+16)

#define		nOFFSET_INTERIM4x4		0x04C0			// = 32*38
#define		nOFFSET_INTERIM4x4_4	0x04E004D0		// = ((32*38+32)<<16)|(32*38+16)
#define		nOFFSET_INTERIM4x4_5	0x04D004C0		// = ((32*38+16)<<16)|(32*38)
#define		nOFFSET_INTERIM4x4_6	0x04E004C0		// = ((32*38+32)<<16)|(32*38)
#define		nOFFSET_INTERIM4x4_7	0x04D004C8		// = ((32*38+16)<<16)|(32*38+8)
#define		nOFFSET_INTERIM4x4_8	0x04E004D8		// = ((32*38+32)<<16)|(32*38+24)
#define		nOFFSET_INTERIM4x4_9	0x04F004E8		// = ((32*38+48)<<16)|(32*38+40)

#define		nOFFSET_RES		0x540				// = 32*42
#define		nOFFSET_REF		0x560				// = 32*43
#define		nOFFSET_REFC	0x700				// = 32*56

			// Binding table index
#define		nBDIX_DESTY		0
#define		nBDIX_DESTC		1
#define		nBI_LC_DIFF		0x10				// Binding table index diff between luma and chroma

#define		nGRFWIB			32
#define		nGRFHWIB		16

//------------ Regions

.declare    gudREF			Base=r43 ElementSize=4 SrcRegion=<16;16,1> Type=ud
.declare    gubREF			Base=r43 ElementSize=1 Type=ub
.declare    gudREFC			Base=r56 ElementSize=4 SrcRegion=<16;16,1> Type=ud

// 16x16 handling
.declare    gudREF21x21		Base=r58 ElementSize=4 SrcRegion=<16;16,1> Type=ud
.declare    gudREF18x10		Base=r66 ElementSize=4 SrcRegion=<16;16,1> Type=ud
.declare    gubREF18x10		Base=r66 ElementSize=1 SrcRegion=<16;16,1> Type=ub



.declare    gudREF16x16		Base=r38 ElementSize=4 Type=ud			// 8 GRFs
.declare    gubREF16x16		Base=r38 ElementSize=1 Type=ub
.declare    gudREFC16x8		Base=r46 ElementSize=4 Type=ud			// 4 GRFs
.declare    gubREFC16x8		Base=r46 ElementSize=1 Type=ub

// TODO
.declare    gubAVG			Base=r56 ElementSize=1 Type=ub
.declare	gubREFY_BWD		Base=r64 ElementSize=1 Type=ub
.declare	gubREFC_BWD		Base=r72 ElementSize=1 Type=ub


.declare    guwINTPY0		Base=r50 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare	gudINTPY0		Base=r50 ElementSize=4 Type=ud
.declare    gubINTPY0		Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwINTPY1		Base=r60 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare	gudINTPY1		Base=r60 ElementSize=4 Type=ud
.declare    gubINTPY1		Base=r60 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwYPRED		Base=r50 ElementSize=2 SrcRegion=<8;8,1> Type=uw
.declare    gubYPRED		Base=r50 ElementSize=1 SrcRegion=<32;16,2> Type=ub

.declare    guwINTPC0		Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare    gwINTPC0		Base=r54 ElementSize=2 SrcRegion=<16;16,1> Type=w
.declare	gudINTPC0		Base=r54 ElementSize=4 Type=ud
.declare    gubINTPC0		Base=r54 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwINTPC1		Base=r36 ElementSize=2 SrcRegion=<16;16,1> Type=uw
.declare	gudINTPC1		Base=r36 ElementSize=4 Type=ud
.declare    gubINTPC1		Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare    guwCPRED		Base=r54 ElementSize=2 SrcRegion=<16;8,2> Type=uw
.declare    gubCPRED		Base=r54 ElementSize=1 SrcRegion=<32;8,4> Type=ub

#define		gINTERIM		r36
.declare	gubINTERIM_BUF	Base=r36 ElementSize=1 SrcRegion=<32;16,2> Type=ub
#define		gINTERIM4x4		r38
.declare	gubINTERIM4x4_BUF Base=r38 ElementSize=1 SrcRegion=<32;16,2> Type=ub
.declare	gwINTERIM4x4_BUF Base=r38 ElementSize=2  Type=w

.declare	gubINTERIM_BUF2	Base=r42 ElementSize=1 SrcRegion=<8;4,2> Type=ub
.declare	gwINTERIM_BUF2	Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w	
.declare	guwINTERIM_BUF2	Base=r42 ElementSize=2 Type=uw	

.declare	gwINTERIM_BUF3	Base=r38 ElementSize=2 SrcRegion=<16;16,1> Type=w		// 2 GRFs
.declare	gubINTERIM_BUF3	Base=r38 ElementSize=1 Type=ub							

.declare	gwTEMP			Base=r42 ElementSize=2 SrcRegion=<16;16,1> Type=w

//------------ General registers

#define		gX				r3.2				// w
#define		gY				r3.3				// w

#define		gMSGDSC_R		r3.6				// ud
#define		gMSGDSC_W		r3.7				// ud

#ifdef	SW_W_128
.declare	gwMBTYPE		Base=r8.6 ElementSize=2 Type=w			// Shared with gLOOP_SUBMB

// TODO
#define		gLOOP_SUBMB		r8.6
#define		gLOOP_SUBMBPT	r8.7
#define		gLOOP_DIR		r9.6
#define		gLOOPCNT		r9.7				// Loop counter for submodules
#else
.declare	gwMBTYPE		Base=r1.0 ElementSize=2 Type=w			// Shared with gLOOP_SUBMB

// TODO
#define		gLOOP_SUBMB		r1.0
#define		gLOOP_SUBMBPT	r1.1
#define		gLOOP_DIR		r8.7
#define		gLOOPCNT		r9.7				// Loop counter for submodules
#endif	// SW_W_128

#define		gW0				r34.6				// Temporary WORD 
#define		gW1				r34.7				// Temporary WORD 
#define		gW2				r34.8				// Temporary WORD 
#define		gW3				r34.9				// Temporary WORD 
#define		gD0				r34.3				// Temporary DWORD

#define		gW4				r34.15

//

#define		gMVX_INT		r34.0				// :w
#define		gMVY_INT		r34.1				// :w
#define		gMVX_FRAC		r34.2				// :w
#define		gMVY_FRAC		r34.3				// :w
#define		gMVX_FRACC		r34.4				// :w
#define		gMVY_FRACC		r34.5				// :w

#define		gpINTPY			r34.10
#define		gpINTPC			r34.11
#define		gpINTP			r34.5				// DW

#define		gPREDFLAG		r34.12
#define		gBIDX			r34.13
#define		gREFPARITY		r34.14
#define		gCHRMVADJ		r1.14
#define		gPARITY			r1.15
#define		gCBP_MASK		r1.1

#define		gMVSTEP			r1.13

#define		gpADDR			r1.2				// :uw (8 words)

#define		gSHAPETEMP		r8.15				// :uw

#define		gCOEFA			r42.0				
#define		gCOEFB			r42.1				
#define		gCOEFC			r42.2				
#define		gCOEFD			r42.3

// Weighted prediction
#define		gPREDFLAG0		r46.0
#define		gPREDFLAG1		r46.2

#define		gWEIGHTFLAG		r43.2
#define		gBIPRED			r43.3
#define		gYADD			r43.4
#define		gCADD			r43.5
#define		gYSHIFT			r43.6
#define		gCSHIFT			r43.7

#define		gOFFSET			r44.0
#define		gUOFFSET		r44.1
#define		gVOFFSET		r44.2

#define		gWT0			r45.0
#define		gO0				r45.1
#define		gWT1			r45.2
#define		gO1				r45.3
#define		gUW0			r45.4
#define		gUO0			r45.5
#define		gUW1			r45.6
#define		gUO1			r45.7	
#define		gVW0			r45.8	
#define		gVO0			r45.9	
#define		gVW1			r45.10	
#define		gVO1			r45.11

#define		gWT0_D			r45.0	
#define		gUW0_D			r45.2	

//------------ Message-related Registers & constants
#define		gMSGSRC			r2					// Message Source

#define		mMSGHDR			m1		
#define		mMSGHDRY		m1		
#define		mMSGHDRC		m2		
#define		mMSGHDR1		m1		
#define		mMSGHDR2		m2		
#define		mMSGHDR3		m3		
#define		mMSGHDR4		m4		
#define		mMSGHDRYW		m1		
#define		mMSGHDRCW		m10		

#ifdef DEV_ILK
	// 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)
#define		nDWBRMSGDSC_SC		0x0208A002	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_TF	0x0208E602	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_BF	0x0208E702	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
	// 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)										
#define		nDWBWMSGDSC		0x02082000  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_TF	0x02082600  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_BF	0x02082700  // DWORD Block Write Message Descriptor through Data Port, Render Cache										

#else	// Pre DEV_ILK
	// 0000 0100(read)  0001(msg len) xxxx(resp len) 1010 (sampler cache) xxxx (field/frame) xxxx xxxx (bidx)
#define		nDWBRMSGDSC_SC		0x0410A002	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_TF	0x0410A602	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
#define		nDWBRMSGDSC_SC_BF	0x0410A702	// DWORD Block Read Message Descriptor through Data Port, Sampler Cache
	// 0000 0101(write) 0001(msg len) xxxx(resp len) 0010 (render cache)  xxxx (field/frame) xxxx xxxx (bidx)										
#define		nDWBWMSGDSC		0x05102000  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_TF	0x05102600  // DWORD Block Write Message Descriptor through Data Port, Render Cache
#define		nDWBWMSGDSC_BF	0x05102700  // DWORD Block Write Message Descriptor through Data Port, Render Cache										
#endif	// DEV_ILK

#define		nDWB_FIELD_MASK	0x0600
										
// message data payload
.declare    mbMSGPAYLOADY	Base=m2  ElementSize=1 SrcRegion=REGION(16,1) Type=b
.declare    mbMSGPAYLOADC	Base=m11 ElementSize=1 SrcRegion=REGION(16,1) Type=b

// Destination registers for write commit
#define		gREG_WRITE_COMMIT_Y		r10.0
#define		gREG_WRITE_COMMIT_UV	r11.0

#define RETURN_REG_INTER	r1.5		// Return pointer for all sub-routine calls (type DWORD)

#define CALL_INTER(subFunc, skipInst)	add (1) RETURN_REG_INTER<1>:ud   ip:ud	1+skipInst*INST_SIZE \n\
				jmpi (1) subFunc
#define	RETURN_INTER		mov (1)	ip:ud	RETURN_REG_INTER<0;1,0>:ud		// Return to calling module


// End of inter_header.inc

#endif	// !defined(__INTER_HEADER__)