diff options
author | kem <kem> | 2000-11-09 02:55:08 +0000 |
---|---|---|
committer | kem <kem> | 2000-11-09 02:55:08 +0000 |
commit | 694a61e90fe006a92e0b52f332c061b39f5ed9f7 (patch) | |
tree | bc8946aebe54da1a374ee822dd98acfd380b9700 | |
parent | 998a47db23cf19f0a39e267c8518936f4f21ea20 (diff) |
Sync with latest Mesa 3.4 CVS sources
164 files changed, 13216 insertions, 20751 deletions
diff --git a/xc/config/cf/host.def b/xc/config/cf/host.def index 681b737e4..f10005fc4 100644 --- a/xc/config/cf/host.def +++ b/xc/config/cf/host.def @@ -15,10 +15,21 @@ #ifdef i386Architecture #define MesaUse3DNow +#if HasKatmaiSupport +# define MesaUseKatmai #endif +#endif + +/* To do profiling of the dynamically loaded 'xyz_dri.so' object, turn + * this on. + * Use 'xc/lib/GL/makeprofile.sh' to make it work. + */ +/* #define GlxSoProf YES */ -/* To do profiling turn on this plus one of the builtin drivers*/ -/* #define ProfileLibGlx YES */ +#ifdef GlxSoProf +# undef DefaultCCOptions +# define DefaultCCOptions -ansi GccWarningOptions -pipe -g -p +#endif /* Optionally turn these on for debugging */ /* #define GlxBuiltInTdfx YES */ @@ -33,7 +44,6 @@ /* Optionally turn this on to force the kernel modules to build */ /* #define BuildXF86DRM YES */ -#define SharedLibFont NO #define XnestServer NO #define XVirtualFramebufferServer NO #define XprtServer NO diff --git a/xc/config/cf/linux.cf b/xc/config/cf/linux.cf index 5f23b6c24..d71076c38 100644 --- a/xc/config/cf/linux.cf +++ b/xc/config/cf/linux.cf @@ -226,6 +226,21 @@ XCOMM binutils: (LinuxBinUtilsMajorVersion) # endif #endif +/* Support for Intel's Katmai Native Instructions, also known as the + * Streaming SIMD Extensions, was introduced in the 2.4.x kernels. + */ +#ifndef HasKatmaiSupport +# ifdef i386Architecture +# if OSMajorVersion > 2 || (OSMajorVersion == 2 && OSMinorVersion >= 4) +# define HasKatmaiSupport YES +# else +# define HasKatmaiSupport NO +# endif +# else +# define HasKatmaiSupport NO +# endif +#endif + /* * The Rush extension support. */ diff --git a/xc/extras/Mesa/include/GL/gl.h b/xc/extras/Mesa/include/GL/gl.h index 5a31ddebd..11e67c6f3 100644 --- a/xc/extras/Mesa/include/GL/gl.h +++ b/xc/extras/Mesa/include/GL/gl.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/include/GL/gl.h,v 1.7 2000/09/24 13:50:05 alanh Exp $ */ #ifndef __gl_h_ @@ -44,11 +43,11 @@ #define OPENSTEP #endif -#if defined(_WIN32) && !defined(__WIN32__) && !defined(__CYGWIN__) +#if defined(_WIN32) && !defined(__WIN32__) #define __WIN32__ #endif -#if !defined(OPENSTEP) && (defined(__WIN32__) && !defined(__CYGWIN__)) +#if !defined(OPENSTEP) && (defined(__WIN32__) || defined(__CYGWIN__)) # if defined(_MSC_VER) && defined(BUILD_GL32) /* tag specify we're building mesa as a DLL */ # define GLAPI __declspec(dllexport) # elif defined(_MSC_VER) && defined(_DLL) /* tag specifying we're building for DLL runtime support */ @@ -63,7 +62,7 @@ # define GLAPIENTRY #endif /* WIN32 / CYGWIN bracket */ -#if defined(_WIN32) && !defined(_WINGDI_) && !defined(__CYGWIN__) && !defined(_GNU_H_WINDOWS32_DEFINES) && !defined(OPENSTEP) +#if defined(_WIN32) && !defined(_WINGDI_) && !defined(_GNU_H_WINDOWS32_DEFINES) && !defined(OPENSTEP) #include <gl/mesa_wgl.h> #endif diff --git a/xc/extras/Mesa/include/GL/glext.h b/xc/extras/Mesa/include/GL/glext.h index 3df22a373..f721a9fd7 100644 --- a/xc/extras/Mesa/include/GL/glext.h +++ b/xc/extras/Mesa/include/GL/glext.h @@ -1,5 +1,3 @@ -/* $XFree86: xc/extras/Mesa/include/GL/glext.h,v 1.4 2000/09/26 15:56:28 tsi Exp $ */ - #ifndef __glext_h_ #define __glext_h_ diff --git a/xc/extras/Mesa/include/GL/xmesa.h b/xc/extras/Mesa/include/GL/xmesa.h index 3baec0eb0..d6ef934f3 100644 --- a/xc/extras/Mesa/include/GL/xmesa.h +++ b/xc/extras/Mesa/include/GL/xmesa.h @@ -88,7 +88,7 @@ extern struct Library *XLibBase; #define XMESA_MAJOR_VERSION 3 -#define XMESA_MINOR_VERSION 3 +#define XMESA_MINOR_VERSION 4 diff --git a/xc/extras/Mesa/include/GL/xmesa_xf86.h b/xc/extras/Mesa/include/GL/xmesa_xf86.h index a142e9524..983f234dd 100644 --- a/xc/extras/Mesa/include/GL/xmesa_xf86.h +++ b/xc/extras/Mesa/include/GL/xmesa_xf86.h @@ -25,7 +25,6 @@ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. **************************************************************************/ -/* $XFree86: xc/extras/Mesa/include/GL/xmesa_xf86.h,v 1.4 2000/03/03 16:02:10 tsi Exp $ */ /* * Authors: diff --git a/xc/extras/Mesa/src/OSmesa/osmesa.c b/xc/extras/Mesa/src/OSmesa/osmesa.c index e489f2553..4cc3e29d5 100644 --- a/xc/extras/Mesa/src/OSmesa/osmesa.c +++ b/xc/extras/Mesa/src/OSmesa/osmesa.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/OSmesa/osmesa.c,v 1.3 2000/09/26 15:56:38 tsi Exp $ */ + /* * Off-Screen Mesa rendering / Rendering into client memory space @@ -1356,7 +1356,7 @@ static line_func choose_line_function( GLcontext *ctx ) OSMesaContext osmesa = (OSMesaContext) ctx; if (ctx->Line.SmoothFlag) return NULL; - if (ctx->Texture.Enabled) return NULL; + if (ctx->Texture.ReallyEnabled) return NULL; if (ctx->Light.ShadeModel!=GL_FLAT) return NULL; if (ctx->Line.Width==1.0F @@ -1542,7 +1542,7 @@ static triangle_func choose_triangle_function( GLcontext *ctx ) if (ctx->Polygon.SmoothFlag) return NULL; if (ctx->Polygon.StippleFlag) return NULL; - if (ctx->Texture.Enabled) return NULL; + if (ctx->Texture.ReallyEnabled) return NULL; if (ctx->RasterMask==DEPTH_BIT && ctx->Depth.Func==GL_LESS diff --git a/xc/extras/Mesa/src/X/fakeglx.c b/xc/extras/Mesa/src/X/fakeglx.c index f7dde2533..0b7cf6bf3 100644 --- a/xc/extras/Mesa/src/X/fakeglx.c +++ b/xc/extras/Mesa/src/X/fakeglx.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X/fakeglx.c,v 1.7 2000/09/26 15:56:38 tsi Exp $ */ + /* * This is an emulation of the GLX API which allows Mesa/GLX-based programs @@ -857,7 +857,7 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list ) int *parselist; XVisualInfo *vis; int min_ci = 0; - int min_red=0, min_green=0, min_blue=0; + int min_red=0, min_green=0, min_blue=0, min_alpha=0; GLboolean rgb_flag = GL_FALSE; GLboolean alpha_flag = GL_FALSE; GLboolean double_flag = GL_FALSE; @@ -921,10 +921,8 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list ) break; case GLX_ALPHA_SIZE: parselist++; - { - GLint size = *parselist++; - alpha_flag = size>0 ? 1 : 0; - } + min_alpha = *parselist++; + alpha_flag = (min_alpha > 0); break; case GLX_DEPTH_SIZE: parselist++; @@ -1003,6 +1001,16 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list ) } } + /* DEBUG + printf("glXChooseVisual:\n"); + printf(" GLX_RED_SIZE = %d\n", min_red); + printf(" GLX_GREEN_SIZE = %d\n", min_green); + printf(" GLX_BLUE_SIZE = %d\n", min_blue); + printf(" GLX_ALPHA_SIZE = %d\n", min_alpha); + printf(" GLX_DEPTH_SIZE = %d\n", depth_size); + printf(" GLX_STENCIL_SIZE = %d\n", stencil_size); + */ + /* * Since we're only simulating the GLX extension this function will never * find any real GL visuals. Instead, all we can do is try to find an RGB @@ -1049,15 +1057,32 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list ) if (vis) { /* Note: we're not exactly obeying the glXChooseVisual rules here. * When GLX_DEPTH_SIZE = 1 is specified we're supposed to choose the - * largest depth buffer size, which is 32bits/value. However, we + * largest depth buffer size, which is 32bits/value. Instead, we * return 16 to maintain performance with earlier versions of Mesa. */ - if (depth_size == 1) - depth_size = DEFAULT_SOFTWARE_DEPTH_BITS; - else if (depth_size > 24) - depth_size = 31; + if (depth_size > 24) + depth_size = 31; /* 32 causes int overflow problems */ else if (depth_size > 16) depth_size = 24; + else if (depth_size > 0) + depth_size = DEFAULT_SOFTWARE_DEPTH_BITS; /*16*/ + + /* If using Glide, make sure we don't try to setup an impossible + * visual. This fixes the Q3 bug in which 24-bit Z was being reported. + */ + { + const char *fx = getenv("MESA_GLX_FX"); + if (fx && fx[0] != 'd') + if (depth_size > 16 || + stencil_size > 0 || + (min_red > 1 && min_red > 5) || + (min_green > 1 && min_green > 6) || + (min_blue > 1 && min_blue > 5) || + alpha_flag) + return NULL; + } + + /* we only support one size of stencil and accum buffers. */ if (stencil_size > 0) stencil_size = STENCIL_BITS; @@ -1837,7 +1862,7 @@ struct _glxapi_table *_mesa_GetGLXDispatchTable(void) { int size = sizeof(struct _glxapi_table) / sizeof(void *); (void) size; - assert(_glxapi_get_dispatch_table_size() >= size); + assert((GLint) _glxapi_get_dispatch_table_size() >= size); } /* initialize the whole table to no-ops */ diff --git a/xc/extras/Mesa/src/X/fakeglx.h b/xc/extras/Mesa/src/X/fakeglx.h deleted file mode 100644 index 62c4fc435..000000000 --- a/xc/extras/Mesa/src/X/fakeglx.h +++ /dev/null @@ -1,141 +0,0 @@ - -/* - * Mesa 3-D graphics library - * Version: 3.1 - * - * Copyright (C) 1999 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - - - - -/* - * GLX API functions which either call fake or real GLX implementations - */ - - - -#ifndef FAKEGLX_H -#define FAKEGLX_H - - -#include <X11/Xlib.h> -#include <X11/Xutil.h> -#include "GL/glx.h" - - -extern XVisualInfo *Fake_glXChooseVisual( Display *dpy, - int screen, int *list ); - - -extern int Fake_glXGetConfig( Display *dpy, XVisualInfo *visinfo, - int attrib, int *value ); - - -extern GLXContext Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo, - GLXContext shareList, Bool direct ); - - -extern void Fake_glXDestroyContext( Display *dpy, GLXContext ctx ); - - -extern void Fake_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, - GLuint mask ); - - -extern Bool Fake_glXMakeCurrent( Display *dpy, GLXDrawable drawable, - GLXContext ctx ); - - -extern GLXContext Fake_glXGetCurrentContext( void ); - - -extern GLXDrawable Fake_glXGetCurrentDrawable( void ); - - -extern GLXPixmap Fake_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, - Pixmap pixmap ); - - -extern void Fake_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ); - - -extern Bool Fake_glXQueryExtension( Display *dpy, int *errorb, int *event ); - - -extern Bool Fake_glXIsDirect( Display *dpy, GLXContext ctx ); - - -extern void Fake_glXSwapBuffers( Display *dpy, GLXDrawable drawable ); - - -extern void Fake_glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, - int x, int y, int width, int height ); - - -extern Bool Fake_glXQueryVersion( Display *dpy, int *maj, int *min ); - - -extern void Fake_glXUseXFont( Font font, int first, int count, int listBase ); - - -extern void Fake_glXWaitGL( void ); - - -extern void Fake_glXWaitX( void ); - - -/* GLX 1.1 and later */ -extern const char *Fake_glXQueryExtensionsString( Display *dpy, int screen ); - - -/* GLX 1.1 and later */ -extern const char *Fake_glXQueryServerString( Display *dpy, int screen, - int name ); - - -/* GLX 1.1 and later */ -extern const char *Fake_glXGetClientString( Display *dpy, int name ); - - -#ifdef GLX_MESA_release_buffers -extern Bool Fake_glXReleaseBuffersMESA( Display *dpy, Window w ); -#endif - - -#ifdef GLX_MESA_pixmap_colormap -extern GLXPixmap Fake_glXCreateGLXPixmapMESA( Display *dpy, - XVisualInfo *visinfo, - Pixmap pixmap, Colormap cmap ); -#endif - - -#ifdef GLX_MESA_set_3dfx_mode -extern GLboolean Fake_glXSet3DfxModeMESA( GLint mode ); -#endif - - -#ifdef GLX_EXT_get_proc_address -/*extern GLfunction Fake_glXGetProcAddress( const GLubyte *procName );*/ -extern void (*Fake_glXGetProcAddress( const GLubyte *procName ))(); -#endif - -#endif diff --git a/xc/extras/Mesa/src/X/realglx.c b/xc/extras/Mesa/src/X/realglx.c deleted file mode 100644 index d443087f6..000000000 --- a/xc/extras/Mesa/src/X/realglx.c +++ /dev/null @@ -1,238 +0,0 @@ - -/* - * Mesa 3-D graphics library - * Version: 3.1 - * - * Copyright (C) 1999 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - - - - -/* - * Real GLX-encoder functions. Called from glxapi.c - * - * Steven Parker's code for the GLX client API functions should be - * put in this file. - * - * Also, the main API functions in api.c should somehow hook into the - * GLX-encoding functions... - */ - - - -#include <X11/Xlib.h> -#include <X11/Xutil.h> -#include "realglx.h" - - - -XVisualInfo *Real_glXChooseVisual( Display *dpy, int screen, int *list ) -{ - (void) dpy; - (void) screen; - (void) list; - return 0; -} - - - -int Real_glXGetConfig( Display *dpy, XVisualInfo *visinfo, - int attrib, int *value ) -{ - (void) dpy; - (void) visinfo; - (void) attrib; - (void) value; - return 0; -} - - - -GLXContext Real_glXCreateContext( Display *dpy, XVisualInfo *visinfo, - GLXContext shareList, Bool direct ) -{ - (void) dpy; - (void) visinfo; - (void) shareList; - (void) direct; - return 0; -} - - - -void Real_glXDestroyContext( Display *dpy, GLXContext ctx ) -{ - (void) dpy; - (void) ctx; -} - - - -void Real_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, - GLuint mask ) -{ - (void) dpy; - (void) src; - (void) dst; - (void) mask; -} - - - -Bool Real_glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx ) -{ - (void) dpy; - (void) drawable; - (void) ctx; - return 0; -} - - - -GLXContext Real_glXGetCurrentContext( void ) -{ - return 0; -} - - - -GLXDrawable Real_glXGetCurrentDrawable( void ) -{ - return 0; -} - - - -GLXPixmap Real_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, - Pixmap pixmap ) -{ - (void) dpy; - (void) visinfo; - (void) pixmap; - return 0; -} - - -void Real_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ) -{ - (void) dpy; - (void) pixmap; -} - - - -Bool Real_glXQueryExtension( Display *dpy, int *errorb, int *event ) -{ - (void) dpy; - (void) errorb; - (void) event; - return 0; -} - - - -Bool Real_glXIsDirect( Display *dpy, GLXContext ctx ) -{ - (void) dpy; - (void) ctx; - return 0; -} - - - -void Real_glXSwapBuffers( Display *dpy, GLXDrawable drawable ) -{ - (void) dpy; - (void) drawable; -} - - - -Bool Real_glXQueryVersion( Display *dpy, int *maj, int *min ) -{ - (void) dpy; - (void) maj; - (void) min; - return 0; -} - - - -void Real_glXUseXFont( Font font, int first, int count, int listBase ) -{ - (void) font; - (void) first; - (void) count; - (void) listBase; -} - - -typedef struct { - struct { - int major_opcode; - } codes; - - - -} XExtDisplayInfo; - - -void Real_glXWaitGL( void ) -{ -} - - - -void Real_glXWaitX( void ) -{ -} - - - -/* GLX 1.1 and later */ -const char *Real_glXQueryExtensionsString( Display *dpy, int screen ) -{ - (void) dpy; - (void) screen; - return 0; -} - - - -/* GLX 1.1 and later */ -const char *Real_glXQueryServerString( Display *dpy, int screen, int name ) -{ - (void) dpy; - (void) screen; - (void) name; - return 0; -} - - - -/* GLX 1.1 and later */ -const char *Real_glXGetClientString( Display *dpy, int name ) -{ - (void) dpy; - (void) name; - return 0; -} diff --git a/xc/extras/Mesa/src/X/realglx.h b/xc/extras/Mesa/src/X/realglx.h deleted file mode 100644 index d9ecc0f34..000000000 --- a/xc/extras/Mesa/src/X/realglx.h +++ /dev/null @@ -1,110 +0,0 @@ - -/* - * Mesa 3-D graphics library - * Version: 3.1 - * - * Copyright (C) 1999 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - - - - -#ifndef REALGLX_H -#define REALGLX_H - - -#include <X11/Xlib.h> -#include <X11/Xutil.h> -#include "GL/glx.h" - - - -extern XVisualInfo *Real_glXChooseVisual( Display *dpy, - int screen, int *list ); - - -extern int Real_glXGetConfig( Display *dpy, XVisualInfo *visinfo, - int attrib, int *value ); - - -extern GLXContext Real_glXCreateContext( Display *dpy, XVisualInfo *visinfo, - GLXContext shareList, Bool direct ); - - -extern void Real_glXDestroyContext( Display *dpy, GLXContext ctx ); - - -extern void Real_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, - GLuint mask ); - - -extern Bool Real_glXMakeCurrent( Display *dpy, GLXDrawable drawable, - GLXContext ctx ); - - -extern GLXContext Real_glXGetCurrentContext( void ); - - -extern GLXDrawable Real_glXGetCurrentDrawable( void ); - - -extern GLXPixmap Real_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, - Pixmap pixmap ); - - -extern void Real_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ); - - -extern Bool Real_glXQueryExtension( Display *dpy, int *errorb, int *event ); - - -extern Bool Real_glXIsDirect( Display *dpy, GLXContext ctx ); - - -extern void Real_glXSwapBuffers( Display *dpy, GLXDrawable drawable ); - - -extern Bool Real_glXQueryVersion( Display *dpy, int *maj, int *min ); - - -extern void Real_glXUseXFont( Font font, int first, int count, int listBase ); - - -extern void Real_glXWaitGL( void ); - - -extern void Real_glXWaitX( void ); - - -/* GLX 1.1 and later */ -extern const char *Real_glXQueryExtensionsString( Display *dpy, int screen ); - - -/* GLX 1.1 and later */ -extern const char *Real_glXQueryServerString( Display *dpy, int screen, - int name ); - - -/* GLX 1.1 and later */ -extern const char *Real_glXGetClientString( Display *dpy, int name ); - - -#endif diff --git a/xc/extras/Mesa/src/X/xfonts.c b/xc/extras/Mesa/src/X/xfonts.c index 2a1aca33c..9b1148db9 100644 --- a/xc/extras/Mesa/src/X/xfonts.c +++ b/xc/extras/Mesa/src/X/xfonts.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -40,11 +40,9 @@ #include <X11/Xutil.h> #include "GL/gl.h" #include "GL/glx.h" -#include "GL/xmesa.h" #include "context.h" #include "mem.h" #include "xfonts.h" -#include "xmesaP.h" /* Some debugging info. */ @@ -222,7 +220,6 @@ static XCharStruct *isvalid(XFontStruct *fs, int which) void Fake_glXUseXFont( Font font, int first, int count, int listbase ) { - XMesaContext CC; Display *dpy; Window win; Pixmap pixmap; @@ -230,26 +227,23 @@ void Fake_glXUseXFont( Font font, int first, int count, int listbase ) XGCValues values; unsigned long valuemask; XFontStruct *fs; - GLint swapbytes, lsbfirst, rowlength; GLint skiprows, skippixels, alignment; - unsigned int max_width, max_height, max_bm_width, max_bm_height; GLubyte *bm; - int i; - CC = XMesaGetCurrentContext(); - dpy = CC->display; - win = CC->xm_buffer->frontbuffer; + dpy = glXGetCurrentDisplay(); + if (!dpy) + return; /* I guess glXMakeCurrent wasn't called */ + win = RootWindow(dpy, DefaultScreen(dpy)); fs = XQueryFont (dpy, font); - if (!fs) - { - gl_error (CC->gl_ctx, GL_INVALID_VALUE, - "Couldn't get font structure information"); + if (!fs) { + gl_error(NULL, GL_INVALID_VALUE, + "Couldn't get font structure information"); return; - } + } /* Allocate a bitmap that can fit all characters. */ max_width = fs->max_bounds.rbearing - fs->min_bounds.lbearing; @@ -261,10 +255,10 @@ void Fake_glXUseXFont( Font font, int first, int count, int listbase ) (GLubyte)); if (!bm) { XFreeFontInfo( NULL, fs, 0 ); - gl_error (CC->gl_ctx, GL_OUT_OF_MEMORY, + gl_error(NULL, GL_OUT_OF_MEMORY, "Couldn't allocate bitmap in glXUseXFont()"); return; - } + } #if 0 /* get the page info */ @@ -386,16 +380,3 @@ bm_height); glPixelStorei(GL_UNPACK_SKIP_PIXELS, skippixels); glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); } - - -extern void xmesa_xfonts_dummy( void ); -void xmesa_xfonts_dummy( void ) -{ - /* silence unused var warnings */ - (void) kernel8; - (void) DitherValues; - (void) HPCR_DRGB; - (void) kernel1; -} - -/* The End. */ diff --git a/xc/extras/Mesa/src/X/xmesa1.c b/xc/extras/Mesa/src/X/xmesa1.c index 8a84ca9a4..f7400da4b 100644 --- a/xc/extras/Mesa/src/X/xmesa1.c +++ b/xc/extras/Mesa/src/X/xmesa1.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X/xmesa1.c,v 1.7 2000/09/24 13:50:53 alanh Exp $ */ /* @@ -90,6 +89,7 @@ _glthread_Mutex _xmesa_lock; + /* * Lookup tables for HPCR pixel format: */ @@ -322,7 +322,7 @@ static int bits_per_pixel( XMesaVisual xmv ) /* * Determine if a given X window ID is valid (window exists). - * Do this by calling XGetWindowAttributes() for the window and + * Do this by calling XGetIconName() for the window and * checking if we catch an X error. * Input: dpy - the display * win - the window to check for existance @@ -343,12 +343,14 @@ static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr ) static GLboolean window_exists( XMesaDisplay *dpy, Window win ) { - XWindowAttributes wa; + char *name; int (*old_handler)( XMesaDisplay*, XErrorEvent* ); WindowExistsFlag = GL_TRUE; old_handler = XSetErrorHandler(window_exists_err_handler); - XGetWindowAttributes( dpy, win, &wa ); /* dummy request */ + XGetIconName(dpy, win, &name); /* dummy request */ XSetErrorHandler(old_handler); + if (name) + XFree(name); return WindowExistsFlag; } #endif @@ -2509,7 +2511,7 @@ const char *XMesaGetString( XMesaContext c, int name ) { (void) c; if (name==XMESA_VERSION) { - return "3.1"; + return "3.4"; } else if (name==XMESA_EXTENSIONS) { return ""; diff --git a/xc/extras/Mesa/src/X/xmesa2.c b/xc/extras/Mesa/src/X/xmesa2.c index 48cea9497..3b4ca355e 100644 --- a/xc/extras/Mesa/src/X/xmesa2.c +++ b/xc/extras/Mesa/src/X/xmesa2.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X/xmesa2.c,v 1.7 2000/09/26 15:56:38 tsi Exp $ */ + /* * Mesa/X11 interface, part 2. @@ -5112,7 +5112,7 @@ static const GLubyte *get_string( GLcontext *ctx, GLenum name ) #endif case GL_VENDOR: #ifdef XFree86Server - return (const GLubyte *) "Precision Insight, Inc."; + return (const GLubyte *) "VA Linux Systems, Inc."; #else return NULL; #endif diff --git a/xc/extras/Mesa/src/X/xmesa3.c b/xc/extras/Mesa/src/X/xmesa3.c index 3f6f507c6..ca8912718 100644 --- a/xc/extras/Mesa/src/X/xmesa3.c +++ b/xc/extras/Mesa/src/X/xmesa3.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X/xmesa3.c,v 1.6 2000/09/26 15:56:39 tsi Exp $ */ + /* * Mesa/X11 interface, part 3. @@ -61,7 +61,7 @@ static void draw_points_ANY_pixmap( GLcontext *ctx, GLuint first, GLuint last ) if (xmesa->xm_visual->gl_visual->RGBAflag) { /* RGB mode */ - for (i=first;i<=last;i++) { + for (i=first;i<last;i++) { if (VB->ClipMask[i]==0) { register int x, y; const GLubyte *color = VB->ColorPtr->data[i]; @@ -99,16 +99,16 @@ points_func xmesa_get_points_func( GLcontext *ctx ) XMesaContext xmesa = (XMesaContext) ctx->DriverCtx; if (ctx->Point.Size==1.0F && !ctx->Point.SmoothFlag && ctx->RasterMask==0 - && !ctx->Texture.Enabled) { + && !ctx->Texture.ReallyEnabled) { if (xmesa->xm_buffer->buffer==XIMAGE) { - return NULL; /*draw_points_ximage;*/ + return (points_func)NULL; /*draw_points_ximage;*/ } else { return draw_points_ANY_pixmap; } } else { - return NULL; + return (points_func)NULL; } } @@ -670,11 +670,11 @@ line_func xmesa_get_line_func( GLcontext *ctx ) (void) DitherValues; /* silence unused var warning */ (void) kernel1; /* silence unused var warning */ - if (ctx->Line.SmoothFlag) return NULL; - if (ctx->Texture.Enabled) return NULL; - if (ctx->Light.ShadeModel!=GL_FLAT) return NULL; + if (ctx->Line.SmoothFlag) return (line_func)NULL; + if (ctx->Texture.ReallyEnabled) return (line_func)NULL; + if (ctx->Light.ShadeModel!=GL_FLAT) return (line_func)NULL; /* X line stippling doesn't match OpenGL stippling */ - if (ctx->Line.StippleFlag==GL_TRUE) return NULL; + if (ctx->Line.StippleFlag==GL_TRUE) return (line_func)NULL; if (xmesa->xm_buffer->buffer==XIMAGE && ctx->RasterMask==DEPTH_BIT @@ -696,9 +696,9 @@ line_func xmesa_get_line_func( GLcontext *ctx ) case PF_DITHER_5R6G5B: return flat_DITHER_5R6G5B_z_line; case PF_DITHER: - return (depth==8) ? flat_DITHER8_z_line : NULL; + return (depth==8) ? flat_DITHER8_z_line : (line_func)NULL; case PF_LOOKUP: - return (depth==8) ? flat_LOOKUP8_z_line : NULL; + return (depth==8) ? flat_LOOKUP8_z_line : (line_func)NULL; case PF_HPCR: return flat_HPCR_z_line; default: @@ -722,9 +722,9 @@ line_func xmesa_get_line_func( GLcontext *ctx ) case PF_DITHER_5R6G5B: return flat_DITHER_5R6G5B_line; case PF_DITHER: - return (depth==8) ? flat_DITHER8_line : NULL; + return (depth==8) ? flat_DITHER8_line : (line_func)NULL; case PF_LOOKUP: - return (depth==8) ? flat_LOOKUP8_line : NULL; + return (depth==8) ? flat_LOOKUP8_line : (line_func)NULL; case PF_HPCR: return flat_HPCR_line; default: @@ -741,5 +741,5 @@ line_func xmesa_get_line_func( GLcontext *ctx ) return flat_pixmap_line; } #endif - return NULL; + return (line_func)NULL; } diff --git a/xc/extras/Mesa/src/X/xmesa4.c b/xc/extras/Mesa/src/X/xmesa4.c index 2211eaffc..223c653e2 100644 --- a/xc/extras/Mesa/src/X/xmesa4.c +++ b/xc/extras/Mesa/src/X/xmesa4.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X/xmesa4.c,v 1.6 2000/09/26 15:56:39 tsi Exp $ */ + /* * Mesa/X11 interface, part 4. @@ -1539,8 +1539,8 @@ triangle_func xmesa_get_triangle_func( GLcontext *ctx ) (void) kernel1; - if (ctx->Polygon.SmoothFlag) return NULL; - if (ctx->Texture.Enabled) return NULL; + if (ctx->Polygon.SmoothFlag) return (triangle_func)NULL; + if (ctx->Texture.ReallyEnabled) return (triangle_func)NULL; if (xmesa->xm_buffer->buffer==XIMAGE) { if ( ctx->Light.ShadeModel==GL_SMOOTH @@ -1570,9 +1570,9 @@ triangle_func xmesa_get_triangle_func( GLcontext *ctx ) return (depth==8) ? smooth_DITHER8_z_triangle : smooth_DITHER_z_triangle; case PF_LOOKUP: - return (depth==8) ? smooth_LOOKUP8_z_triangle : NULL; + return (depth==8) ? smooth_LOOKUP8_z_triangle : (triangle_func)NULL; default: - return NULL; + return (triangle_func)NULL; } } if ( ctx->Light.ShadeModel==GL_FLAT @@ -1602,9 +1602,9 @@ triangle_func xmesa_get_triangle_func( GLcontext *ctx ) return (depth==8) ? flat_DITHER8_z_triangle : flat_DITHER_z_triangle; case PF_LOOKUP: - return (depth==8) ? flat_LOOKUP8_z_triangle : NULL; + return (depth==8) ? flat_LOOKUP8_z_triangle : (triangle_func)NULL; default: - return NULL; + return (triangle_func)NULL; } } if ( ctx->RasterMask==0 /* no depth test */ @@ -1631,9 +1631,9 @@ triangle_func xmesa_get_triangle_func( GLcontext *ctx ) return (depth==8) ? smooth_DITHER8_triangle : smooth_DITHER_triangle; case PF_LOOKUP: - return (depth==8) ? smooth_LOOKUP8_triangle : NULL; + return (depth==8) ? smooth_LOOKUP8_triangle : (triangle_func)NULL; default: - return NULL; + return (triangle_func)NULL; } } @@ -1661,13 +1661,13 @@ triangle_func xmesa_get_triangle_func( GLcontext *ctx ) return (depth==8) ? flat_DITHER8_triangle : flat_DITHER_triangle; case PF_LOOKUP: - return (depth==8) ? flat_LOOKUP8_triangle : NULL; + return (depth==8) ? flat_LOOKUP8_triangle : (triangle_func)NULL; default: - return NULL; + return (triangle_func)NULL; } } - return NULL; + return (triangle_func)NULL; } else { /* draw to pixmap */ @@ -1684,6 +1684,6 @@ triangle_func xmesa_get_triangle_func( GLcontext *ctx ) return flat_pixmap_triangle; } #endif - return NULL; + return (triangle_func)NULL; } } diff --git a/xc/extras/Mesa/src/X/xmesaP.h b/xc/extras/Mesa/src/X/xmesaP.h index 8d8000874..0886fa3dc 100644 --- a/xc/extras/Mesa/src/X/xmesaP.h +++ b/xc/extras/Mesa/src/X/xmesaP.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X/xmesaP.h,v 1.7 2000/09/26 15:56:39 tsi Exp $ */ + #ifndef XMESAP_H #define XMESAP_H @@ -226,7 +226,7 @@ struct xmesa_buffer { /* Used to do XAllocColor/XFreeColors accounting: */ int num_alloced; - Pixel alloced_colors[256]; + unsigned long alloced_colors[256]; #if defined(GLX_DIRECT_RENDERING) && !defined(XFree86Server) __DRIdrawablePrivate *driDrawPriv; /* back pointer to DRI drawable diff --git a/xc/extras/Mesa/src/X86/3dnow.c b/xc/extras/Mesa/src/X86/3dnow.c index ef92d27c0..7b642dc04 100644 --- a/xc/extras/Mesa/src/X86/3dnow.c +++ b/xc/extras/Mesa/src/X86/3dnow.c @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.3 - * + * Version: 3.4 + * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -22,176 +22,163 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X86/3dnow.c,v 1.6 2000/09/26 15:56:39 tsi Exp $ */ /* * 3DNow! optimizations contributed by * Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> */ -#if defined(USE_3DNOW_ASM) && defined(USE_X86_ASM) -#include "3dnow.h" - -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <math.h> +#include "glheader.h" #include "context.h" #include "types.h" -#include "xform.h" #include "vertices.h" +#include "xform.h" +#include "3dnow.h" #ifdef DEBUG #include "debug_xform.h" #endif +#define XFORM_ARGS GLvector4f *to_vec, \ + const GLfloat m[16], \ + const GLvector4f *from_vec, \ + const GLubyte *mask, \ + const GLubyte flag + + +#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS ); + + +#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \ + gl_transform_tab[cma][sz][MATRIX_GENERAL] = \ + gl_##pfx##_transform_points##sz##_general_##masked; \ + gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \ + gl_##pfx##_transform_points##sz##_identity_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \ + gl_##pfx##_transform_points##sz##_perspective_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D] = \ + gl_##pfx##_transform_points##sz##_2d_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D] = \ + gl_##pfx##_transform_points##sz##_3d_##masked; + + + +#define NORM_ARGS const GLmatrix *mat, \ + GLfloat scale, \ + const GLvector3f *in, \ + const GLfloat *lengths, \ + const GLubyte mask[], \ + GLvector3f *dest + + +#define DECLARE_NORM_GROUP( pfx, masked ) \ + extern void _ASMAPI gl_##pfx##_rescale_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_normalize_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked( NORM_ARGS ); + + +#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \ + gl_normal_tab[NORM_RESCALE][cma] = \ + gl_##pfx##_rescale_normals_##masked; \ + gl_normal_tab[NORM_NORMALIZE][cma] = \ + gl_##pfx##_normalize_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM][cma] = \ + gl_##pfx##_transform_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM_NO_ROT][cma] = \ + gl_##pfx##_transform_normals_no_rot_##masked; \ + gl_normal_tab[NORM_TRANSFORM | NORM_RESCALE][cma] = \ + gl_##pfx##_transform_rescale_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][cma] = \ + gl_##pfx##_transform_rescale_normals_no_rot_##masked; \ + gl_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE][cma] = \ + gl_##pfx##_transform_normalize_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE][cma] = \ + gl_##pfx##_transform_normalize_normals_no_rot_##masked; + + +#ifdef USE_3DNOW_ASM +DECLARE_XFORM_GROUP( 3dnow, 1, raw ) +DECLARE_XFORM_GROUP( 3dnow, 2, raw ) +DECLARE_XFORM_GROUP( 3dnow, 3, raw ) +DECLARE_XFORM_GROUP( 3dnow, 4, raw ) + +DECLARE_XFORM_GROUP( 3dnow, 1, masked ) +DECLARE_XFORM_GROUP( 3dnow, 2, masked ) +DECLARE_XFORM_GROUP( 3dnow, 3, masked ) +DECLARE_XFORM_GROUP( 3dnow, 4, masked ) + +DECLARE_NORM_GROUP( 3dnow, raw ) +/*DECLARE_NORM_GROUP( 3dnow, masked )*/ -#define XFORM_ARGS GLvector4f *to_vec, \ - const GLfloat m[16], \ - const GLvector4f *from_vec, \ - const GLubyte *mask, \ - const GLubyte flag - - - -#define DECLARE_XFORM_GROUP(pfx, v, masked) \ - extern void _ASMAPI gl##pfx##_transform_points##v##_general_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_points##v##_identity_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_points##v##_3d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_points##v##_perspective_##masked(XFORM_ARGS);\ - extern void _ASMAPI gl##pfx##_transform_points##v##_2d_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_points##v##_2d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_points##v##_3d_##masked(XFORM_ARGS); - - - -#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \ - gl_transform_tab[cma][vsize][MATRIX_GENERAL] \ - = gl##pfx##_transform_points##vsize##_general_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_IDENTITY] \ - = gl##pfx##_transform_points##vsize##_identity_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] \ - = gl##pfx##_transform_points##vsize##_3d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] \ - = gl##pfx##_transform_points##vsize##_perspective_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D] \ - = gl##pfx##_transform_points##vsize##_2d_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] \ - = gl##pfx##_transform_points##vsize##_2d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D] \ - = gl##pfx##_transform_points##vsize##_3d_##masked; - - - - -#define NORM_ARGS const GLmatrix *mat, \ - GLfloat scale, \ - const GLvector3f *in, \ - const GLfloat *lengths, \ - const GLubyte mask[], \ - GLvector3f *dest - - - -#define DECLARE_NORM_GROUP(pfx, masked) \ - extern void _ASMAPI gl##pfx##_rescale_normals_##masked## (NORM_ARGS); \ - extern void _ASMAPI gl##pfx##_normalize_normals_##masked## (NORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_normals_##masked## (NORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_normals_no_rot_##masked## (NORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_rescale_normals_##masked## (NORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_rescale_normals_no_rot_##masked## (NORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_normalize_normals_##masked## (NORM_ARGS); \ - extern void _ASMAPI gl##pfx##_transform_normalize_normals_no_rot_##masked## (NORM_ARGS); - - - -#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \ - gl_normal_tab[NORM_RESCALE][cma] = \ - gl##pfx##_rescale_normals_##masked##; \ - gl_normal_tab[NORM_NORMALIZE][cma] = \ - gl##pfx##_normalize_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM][cma] = \ - gl##pfx##_transform_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM_NO_ROT][cma] = \ - gl##pfx##_transform_normals_no_rot_##masked##; \ - gl_normal_tab[NORM_TRANSFORM | NORM_RESCALE][cma] = \ - gl##pfx##_transform_rescale_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][cma] = \ - gl##pfx##_transform_rescale_normals_no_rot_##masked##; \ - gl_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE][cma] = \ - gl##pfx##_transform_normalize_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE][cma] = \ - gl##pfx##_transform_normalize_normals_no_rot_##masked##; - +extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert, + const GLfloat *m, + const GLfloat *src, + GLuint src_stride, + GLuint count ); extern void _ASMAPI gl_3dnow_project_vertices( GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride ); - -extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first, GLfloat *last, const GLfloat *m, - GLuint stride, - const GLubyte *clipmask ); + GLuint stride ); -extern void _ASMAPI gl_v16_3dnow_general_xform( GLfloat *first_vert, - const GLfloat *m, - const GLfloat *src, - GLuint src_stride, - GLuint count ); +extern void _ASMAPI gl_3dnow_project_clipped_vertices( GLfloat *first, + GLfloat *last, + const GLfloat *m, + GLuint stride, + const GLubyte *clipmask ); +#endif -void gl_init_3dnow_asm_transforms (void) -{ - DECLARE_XFORM_GROUP( _3dnow, 1, raw ) - DECLARE_XFORM_GROUP( _3dnow, 2, raw ) - DECLARE_XFORM_GROUP( _3dnow, 3, raw ) - DECLARE_XFORM_GROUP( _3dnow, 4, raw ) - - DECLARE_XFORM_GROUP( _3dnow, 1, masked ) - DECLARE_XFORM_GROUP( _3dnow, 2, masked ) - DECLARE_XFORM_GROUP( _3dnow, 3, masked ) - DECLARE_XFORM_GROUP( _3dnow, 4, masked ) - - DECLARE_NORM_GROUP( _3dnow, raw ) -/* DECLARE_NORM_GROUP( _3dnow, masked ) -*/ - - ASSIGN_XFORM_GROUP( _3dnow, 0, 1, raw ) - ASSIGN_XFORM_GROUP( _3dnow, 0, 2, raw ) - ASSIGN_XFORM_GROUP( _3dnow, 0, 3, raw ) - ASSIGN_XFORM_GROUP( _3dnow, 0, 4, raw ) - - ASSIGN_XFORM_GROUP( _3dnow, CULL_MASK_ACTIVE, 1, masked ) - ASSIGN_XFORM_GROUP( _3dnow, CULL_MASK_ACTIVE, 2, masked ) - ASSIGN_XFORM_GROUP( _3dnow, CULL_MASK_ACTIVE, 3, masked ) - ASSIGN_XFORM_GROUP( _3dnow, CULL_MASK_ACTIVE, 4, masked ) - - ASSIGN_NORM_GROUP( _3dnow, 0, raw ) -/* ASSIGN_NORM_GROUP( _3dnow, CULL_MASK_ACTIVE, masked ) -*/ -#ifdef DEBUG - gl_test_all_transform_functions("3Dnow!"); - gl_test_all_normal_transform_functions("3Dnow!"); -#endif +void gl_init_3dnow_transform_asm( void ) +{ +#ifdef USE_3DNOW_ASM + ASSIGN_XFORM_GROUP( 3dnow, 0, 1, raw ); + ASSIGN_XFORM_GROUP( 3dnow, 0, 2, raw ); + ASSIGN_XFORM_GROUP( 3dnow, 0, 3, raw ); + ASSIGN_XFORM_GROUP( 3dnow, 0, 4, raw ); - /* Hook in some stuff for vertices.c. - */ - gl_xform_points3_v16_general = gl_v16_3dnow_general_xform; - gl_project_v16 = gl_3dnow_project_vertices; - gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices; -} + ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 1, masked ); + ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 2, masked ); + ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 3, masked ); + ASSIGN_XFORM_GROUP( 3dnow, CULL_MASK_ACTIVE, 4, masked ); -#else + ASSIGN_NORM_GROUP( 3dnow, 0, raw ); +/* ASSIGN_NORM_GROUP( 3dnow, CULL_MASK_ACTIVE, masked ); */ +#ifdef DEBUG + gl_test_all_transform_functions( "3DNow!" ); + gl_test_all_normal_transform_functions( "3DNow!" ); +#endif +#endif +} -/* silence compiler warning */ -extern void _mesa_3dnow_dummy_function(void); -void _mesa_3dnow_dummy_function(void) +void gl_init_3dnow_vertex_asm( void ) { -} +#ifdef USE_3DNOW_ASM + gl_xform_points3_v16_general = gl_v16_3dnow_general_xform; + gl_project_v16 = gl_3dnow_project_vertices; + gl_project_clipped_v16 = gl_3dnow_project_clipped_vertices; +#if 0 + gl_test_all_vertex_functions( "3DNow!" ); #endif +#endif +} diff --git a/xc/extras/Mesa/src/X86/3dnow.h b/xc/extras/Mesa/src/X86/3dnow.h index c7d3040d5..6e34b6ce4 100644 --- a/xc/extras/Mesa/src/X86/3dnow.h +++ b/xc/extras/Mesa/src/X86/3dnow.h @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.4 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -23,72 +23,17 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /* * 3DNow! optimizations contributed by * Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> */ - -#ifndef _3dnow_h -#define _3dnow_h - - +#ifndef __3DNOW_H__ +#define __3DNOW_H__ #include "xform.h" - -void gl_init_3dnow_asm_transforms (void); - - - - -#if 0 -GLvector4f *gl_project_points( GLvector4f *proj_vec, - const GLvector4f *clip_vec ) -{ - __asm__ ( - " femms \n" - " \n" - " movq (%0), %%mm0 # x1 | x0 \n" - " movq 8(%0), %%mm1 # oow | x2 \n" - " \n" - "1: movq %%mm1, %%mm2 # oow | x2 \n" - " addl %2, %0 # next point \n" - " \n" - " punpckhdq %%mm2, %%mm2 # oow | oow \n" - " addl $16, %1 # next point \n" - " \n" - " pfrcp %%mm2, %%mm3 # 1/oow | 1/oow \n" - " decl %3 \n" - " \n" - " pfmul %%mm3, %%mm0 # x1/oow | x0/oow \n" - " movq %%mm0, -16(%1) # write r0, r1 \n" - " \n" - " pfmul %%mm3, %%mm1 # 1 | x2/oow \n" - " movq (%0), %%mm0 # x1 | x0 \n" - " \n" - " movd %%mm1, 8(%1) # write r2 \n" - " movd %%mm3, 12(%1) # write r3 \n" - " \n" - " movq 8(%0), %%mm1 # oow | x2 \n" - " ja 1b \n" - " \n" - " femms \n" - " " - ::"a" (clip_vec->start), - "c" (proj_vec->start), - "g" (clip_vec->stride), - "d" (clip_vec->count) - ); - - proj_vec->flags |= VEC_SIZE_4; - proj_vec->size = 3; - proj_vec->count = clip_vec->count; - return proj_vec; -} -#endif - - +void gl_init_3dnow_transform_asm( void ); +void gl_init_3dnow_vertex_asm( void ); #endif diff --git a/xc/extras/Mesa/src/X86/3dnow_norm_raw.S b/xc/extras/Mesa/src/X86/3dnow_norm_raw.S index e074c3337..d844b3c87 100644 --- a/xc/extras/Mesa/src/X86/3dnow_norm_raw.S +++ b/xc/extras/Mesa/src/X86/3dnow_norm_raw.S @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -21,7 +21,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X86/3dnow_norm_raw.S,v 1.4 2000/09/26 15:56:39 tsi Exp $ */ /* * 3Dnow assembly code by Holger Waechtler @@ -254,7 +253,7 @@ ALIGNTEXT16 GLOBL GLNAME(gl_3dnow_transform_normalize_normals_no_rot_raw) GLNAME(gl_3dnow_transform_normalize_normals_no_rot_raw): - #undef FRAME_OFFSET + #undef FRAME_OFFSET #define FRAME_OFFSET 12 PUSH_L ( EDI ) @@ -396,7 +395,7 @@ ALIGNTEXT16 GLOBL GLNAME(gl_3dnow_transform_rescale_normals_no_rot_raw) GLNAME(gl_3dnow_transform_rescale_normals_no_rot_raw): - #undef FRAME_OFFSET + #undef FRAME_OFFSET #define FRAME_OFFSET 12 PUSH_L ( EDI ) @@ -887,6 +886,3 @@ LLBL (G3R_end): POP_L ( ESI ) POP_L ( EDI ) RET - - - diff --git a/xc/extras/Mesa/src/X86/3dnow_vertex.S b/xc/extras/Mesa/src/X86/3dnow_vertex.S new file mode 100644 index 000000000..a66432c3a --- /dev/null +++ b/xc/extras/Mesa/src/X86/3dnow_vertex.S @@ -0,0 +1,220 @@ +#include "assyntax.h" + + SEG_TEXT + +#define MAT_SY 20 +#define MAT_SZ 40 +#define MAT_TX 48 +#define MAT_TY 52 +#define MAT_TZ 56 + + +/* + * void gl_v16_3dnow_general_xform( GLfloat *dest, + * const GLfloat *m, + * const GLfloat *src, + * GLuint src_stride, + * GLuint count ) + * + * These tranformation functions could disappear if the standard ones + * took an output stride. + */ +GLOBL GLNAME( gl_v16_3dnow_general_xform ) +GLNAME( gl_v16_3dnow_general_xform ): + + PUSH_L ( EDI ) + PUSH_L ( ESI ) + + MOV_L ( REGOFF(12, ESP), EAX ) /* dest */ + MOV_L ( REGOFF(16, ESP), ESI ) /* mat */ + MOV_L ( REGOFF(20, ESP), EDX ) /* src */ + MOV_L ( REGOFF(24, ESP), EDI ) /* src_stride */ + MOV_L ( REGOFF(28, ESP), ECX ) /* count */ + + FEMMS + + MOVQ ( REGOFF(MAT_TX, ESI), MM7 ) /* ty | tx */ + MOVQ ( REGOFF(MAT_TZ, ESI), MM3 ) /* tw | tz */ + +ALIGNTEXT32 +LLBL( v16_3dnow_general_loop ): + + PREFETCHW ( REGOFF(128, EAX) ) /* write alloc 2 verts ahead */ + PREFETCH ( REGOFF(32, EDX) ) /* prefetch next cache line */ + + MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EDX), MM1 ) /* | x2 */ + MOVQ ( REGIND(ESI), MM4 ) /* m1 | m0 */ + PUNPCKHDQ ( MM0, MM2 ) /* x1 | */ + MOVQ ( REGOFF(16, ESI), MM5 ) /* m5 | m4 */ + PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ + MOVQ ( REGOFF(32, ESI), MM6 ) /* m9 | m8 */ + PFMUL ( MM0, MM4 ) /* x0*m1 | x0*m0 */ + PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */ + PFMUL ( MM2, MM5 ) /* x1*m5 | x1*m4 */ + PUNPCKLDQ ( MM1, MM1 ) /* x2 | x2 */ + PFMUL ( REGOFF(8, ESI), MM0 ) /* x0*m3 | x0*m2 */ + PFMUL ( REGOFF(24, ESI), MM2 ) /* x1*m7 | x1*m6 */ + PFMUL ( MM1, MM6 ) /* x2*m9 | x2*m8 */ + PFADD ( MM4, MM5 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ + PFMUL ( REGOFF(40, ESI), MM1 ) /* x2*m11 | x2*m10 */ + PFADD ( MM0, MM2 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */ + PFADD ( MM5, MM6 ) + PFADD ( MM1, MM2 ) + PFADD ( MM7, MM6 ) /* r1 | r0 */ + PFADD ( MM3, MM2 ) /* r3 | r2 */ + ADD_L ( EDI, EDX ) /* next input vertex */ + MOVQ ( MM6, REGIND(EAX) ) + MOVQ ( MM2, REGOFF(8, EAX) ) + ADD_L ( CONST(64), EAX ) /* next output vertex */ + DEC_L ( ECX ) + JNE ( LLBL(v16_3dnow_general_loop) ) + + FEMMS + + POP_L ( ESI ) + POP_L ( EDI ) + RET + + + +/* Do viewport map and perspective projection. Args should look like: + * + * gl_3dnow_project_vertices( float *first_vertex, + * const float *last_vertex, + * float *matrix, + * GLuint stride ) + * + * This routine assumes a sane vertex layout with x,y,z,w as + * the first four elements, to be projected in clip-space, to + * x/w,y/w,z/w,1/w, and then transformed according to the matrix to + * device space. The device coordinates will overwrite the clip + * coordinates as the first four elements of the vertex. + * + * If projection is required for other elements, such as texcoords, + * you will have to code a specialized version of this routine. See + * FX/X86 for examples. + * + * These routines are simplified versions of the FX code written by + * Holger. + */ + +GLOBL GLNAME( gl_3dnow_project_vertices ) +GLNAME( gl_3dnow_project_vertices ): + + PUSH_L ( EBP ) + FEMMS + PREFETCH ( REGOFF(8, ESP) ) /* fetch the first vertex */ + + MOV_L ( REGOFF(8, ESP), ECX ) /* first_vert */ + MOV_L ( REGOFF(12, ESP), EDX ) /* last_vert */ + MOV_L ( REGOFF(16, ESP), EBP ) /* matrix */ + MOV_L ( REGOFF(20, ESP), EAX ) /* stride */ + + MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */ + PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */ + MOVD ( REGIND(EBP), MM5 ) + PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */ + MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */ + SUB_L ( ECX, EDX ) /* last -= first */ + +ALIGNTEXT32 +LLBL( v16_3dnow_pv_loop_start ): + + PREFETCH ( REGOFF(64, ECX) ) /* fetch one/two verts ahead */ + MOVD ( REGOFF(12, ECX), MM0 ) /* | f[3] */ + PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */ + MOVD ( REGOFF(12, ECX), MM7 ) /* | f[3] */ + PFRCPIT1 ( MM0, MM7 ) + PFRCPIT2 ( MM0, MM7 ) /* oow | oow */ + PUNPCKLDQ ( MM7, MM7 ) + MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */ + PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */ + MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */ + PFMUL ( MM7, MM3 ) /* | f[2] * oow */ + MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */ + PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */ + PFADD ( MM0, MM3 ) /* | f[2] += vtz */ + PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */ + PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */ + PUNPCKLDQ ( MM7, MM3 ) /* f[3] = oow | f[2] */ + MOVQ ( MM2, REGOFF(0, ECX) ) + MOVQ ( MM3, REGOFF(8, ECX) ) + ADD_L ( EAX, ECX ) /* f += stride */ + SUB_L ( EAX, EDX ) + JA ( LLBL(v16_3dnow_pv_loop_start) ) + + FEMMS + POP_L ( EBP ) + RET + + + +/* + * gl_3dnow_project_clipped_vertices( float *first_vertex, + * const float *last_vertex, + * float *matrix, + * GLuint stride, + * const GLubyte *clip_mask ) + */ +GLOBL GLNAME( gl_3dnow_project_clipped_vertices ) +GLNAME( gl_3dnow_project_clipped_vertices ): + + PUSH_L ( EBP ) + PUSH_L ( ESI ) + + FEMMS + + PREFETCH ( REGOFF(12, ESP) ) /* fetch the first vertex */ + + MOV_L ( REGOFF(12, ESP), ECX ) /* first_vert */ + MOV_L ( REGOFF(16, ESP), EDX ) /* last_vert */ + MOV_L ( REGOFF(20, ESP), EBP ) /* matrix */ + MOV_L ( REGOFF(24, ESP), EAX ) /* stride */ + MOV_L ( REGOFF(28, ESP), ESI ) /* clip_mask */ + + + MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */ + PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */ + MOVD ( REGIND(EBP), MM5 ) + PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */ + MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */ + +ALIGNTEXT32 +LLBL( v16_3dnow_pcv_loop_start ): + + CMP_B ( CONST(0), REGIND(ESI) ) + JNE ( LLBL(v16_3dnow_pcv_skip) ) + + MOVD ( REGOFF(12, ECX), MM0) /* | f[3] */ + PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */ + MOVD ( REGOFF(12, ECX), MM7) /* | f[3] */ + PFRCPIT1 ( MM0, MM7 ) + PFRCPIT2 ( MM0, MM7 ) /* oow | oow */ + PUNPCKLDQ ( MM7, MM7 ) + MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */ + PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */ + MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */ + PFMUL ( MM7, MM3 ) /* | f[2] * oow */ + MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */ + PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */ + PFADD ( MM0, MM3 ) /* | f[2] += vtz */ + PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */ + PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */ + PUNPCKLDQ ( MM7, MM3 ) /* f[3] = oow | f[2] */ + MOVQ ( MM2, REGOFF(0, ECX) ) + MOVQ ( MM3, REGOFF(8, ECX) ) + +LLBL( v16_3dnow_pcv_skip ): + + ADD_L ( EAX, ECX ) /* f += stride */ + INC_L ( ESI ) /* next clip_mask */ + + CMP_L ( ECX, EDX ) + JNE ( LLBL(v16_3dnow_pcv_loop_start) ) + + FEMMS + + POP_L ( ESI ) + POP_L ( EBP ) + RET diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_masked1.S b/xc/extras/Mesa/src/X86/3dnow_xform_masked1.S index 622edd9dd..b9ae99202 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_masked1.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_masked1.S @@ -1,528 +1,501 @@ -#include "assyntax.h" - SEG_TEXT +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "assyntax.h" +#include "xform_args.h" -ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_general_masked) -GLNAME( gl_3dnow_transform_points1_general_masked ): + SEG_TEXT - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) +#define FRAME_OFFSET 16 -ALIGNTEXT32 +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points1_general_masked ) +GLNAME( gl_3dnow_transform_points1_general_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */ + + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGM_4 ) ) - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ +ALIGNTEXT16 +LLBL( G3TPGM_2 ): - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGM_4) ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPGM_3 ) ) /* skip vertex */ + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ -ALIGNTEXT32 -LLBL(G3TPGM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPGM_3) ) /* skip vertex */ + MOVQ ( MM4, MM5 ) /* x0 | x0 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ + PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */ + PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ - MOVQ ( MM4, MM5 ) /* x0 | x0 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */ - PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ + MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */ - PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ +LLBL( G3TPGM_3 ): - MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ -LLBL(G3TPGM_3): - ADD_L ( EDI, EAX ) /* next vertex */ + INC_L ( EBP ) /* next clipmask */ + ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ - ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPGM_2 ) ) /* cnt > 0 ? -> process next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPGM_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPGM_4 ): -LLBL(G3TPGM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_identity_masked) -GLNAME( gl_3dnow_transform_points1_identity_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(1), REGOFF(16, ECX) ) - OR_B ( CONST(1), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points1_perspective_masked ) +GLNAME( gl_3dnow_transform_points1_perspective_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ -ALIGNTEXT32 + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPM_4 ) ) -LLBL(G3TPIM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPIM_3) /* skip vertex */ ) +ALIGNTEXT16 +LLBL( G3TPPM_2 ): - MOVD ( REGIND(EAX), MM0 ) /* | x0 */ - MOVD ( MM0, REGIND(EDX) ) /* | r0 */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPPM_3 ) ) /* skip vertex */ -LLBL(G3TPIM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */ + PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ - JA ( LLBL(G3TPIM_2) /* cnt > 0 ? -> process next vertex */ ) +LLBL( G3TPPM_3 ): -LLBL(G3TPIM_4): - FEMMS + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TPPM_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TPPM_4 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_2d_masked) -GLNAME( gl_3dnow_transform_points1_2d_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points1_3d_masked ) +GLNAME( gl_3dnow_transform_points1_3d_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */ - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2M_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3M_4 ) ) +ALIGNTEXT16 +LLBL( G3TP3M_2 ): -ALIGNTEXT32 + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3M_3 ) ) /* skip vertex */ -LLBL(G3TP2M_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2M_3) ) /* skip vertex */ + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ + MOVQ ( MM4, MM5 ) /* | x0 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ + PFMUL ( MM1, MM5 ) /* | x0*m02 */ + PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + PFADD ( MM3, MM5 ) /* | x0*m02+m32 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ -LLBL(G3TP2M_3): - ADD_L ( EDI, EAX ) /* next vertex */ + MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ - ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ +LLBL( G3TP3M_3 ): - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP2M_2) ) /* cnt > 0 ? -> process next vertex */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TP2M_4): - FEMMS + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3M_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TP3M_4 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_2d_no_rot_masked) -GLNAME( gl_3dnow_transform_points1_2d_no_rot_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points1_3d_no_rot_masked ) +GLNAME( gl_3dnow_transform_points1_3d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRM_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRM_4 ) ) +ALIGNTEXT16 +LLBL( G3TP3NRM_2 ): -ALIGNTEXT32 + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3NRM_3)) /* skip vertex */ -LLBL(G3TP2NRM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2NRM_3) /* skip vertex */ ) + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PFMUL ( MM0, MM4 ) /* | x0*m00 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PFMUL ( MM0, MM4 ) /* | x0*m00 */ + PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ -LLBL(G3TP2NRM_3): - ADD_L ( CONST(16), EDX ) /* next r */ - ADD_L ( EDI, EAX ) /* next vertex */ +LLBL( G3TP3NRM_3 ): - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - JA ( LLBL(G3TP2NRM_2) /* cnt > 0 ? -> process next vertex */ ) + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ -LLBL(G3TP2NRM_4): - FEMMS +LLBL( G3TP3NRM_4 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_3d_masked) -GLNAME( gl_3dnow_transform_points1_3d_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points1_2d_masked ) +GLNAME( gl_3dnow_transform_points1_2d_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ - - FEMMS + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3M_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2M_4 ) ) -ALIGNTEXT32 +ALIGNTEXT16 +LLBL( G3TP2M_2 ): -LLBL(G3TP3M_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3M_3) /* skip vertex */ ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2M_3 ) ) /* skip vertex */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ - MOVQ ( MM4, MM5 ) /* | x0 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ - PFMUL ( MM1, MM5 ) /* | x0*m02 */ - PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFADD ( MM3, MM5 ) /* | x0*m02+m32 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ +LLBL( G3TP2M_3 ): - MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ -LLBL(G3TP3M_3): - ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + INC_L ( EBP ) /* next clipmask */ - ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2M_2 ) ) /* cnt > 0 ? -> process next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3M_2) /* cnt > 0 ? -> process next vertex */ ) +LLBL( G3TP2M_4 ): -LLBL(G3TP3M_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_3d_no_rot_masked) -GLNAME( gl_3dnow_transform_points1_3d_no_rot_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - - -ALIGNTEXT32 +GLOBL GLNAME(gl_3dnow_transform_points1_2d_no_rot_masked) +GLNAME( gl_3dnow_transform_points1_2d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRM_4 ) ) - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRM_4) ) +ALIGNTEXT16 +LLBL( G3TP2NRM_2 ): -ALIGNTEXT32 + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2NRM_3 ) ) /* skip vertex */ -LLBL(G3TP3NRM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3NRM_3) /* skip vertex */ ) + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PFMUL ( MM0, MM4 ) /* | x0*m00 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PFMUL ( MM0, MM4 ) /* | x0*m00 */ + PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ +LLBL( G3TP2NRM_3 ): - MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( CONST(16), EDX ) /* next r */ + ADD_L ( EDI, EAX ) /* next vertex */ -LLBL(G3TP3NRM_3): - ADD_L ( EDI, EAX ) /* next vertex */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ + JNZ ( LLBL( G3TP2NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3NRM_2) /* cnt > 0 ? -> process next vertex */ ) +LLBL( G3TP2NRM_4 ): -LLBL(G3TP3NRM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_perspective_masked) -GLNAME( gl_3dnow_transform_points1_perspective_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points1_identity_masked ) +GLNAME( gl_3dnow_transform_points1_identity_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ - - FEMMS + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPM_4) ) +ALIGNTEXT16 +LLBL( G3TPIM_2 ): -ALIGNTEXT32 + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPIM_3 ) ) /* skip vertex */ -LLBL(G3TPPM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPPM_3) /* skip vertex */ ) + MOVD ( REGIND(EAX), MM0 ) /* | x0 */ + MOVD ( MM0, REGIND(EDX) ) /* | r0 */ - MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */ - PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */ +LLBL( G3TPIM_3 ): - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TPPM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPIM_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TPPM_2) /* cnt > 0 ? -> process next vertex */ ) +LLBL( G3TPIM_4 ): -LLBL(G3TPPM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - - - - - - - diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_masked2.S b/xc/extras/Mesa/src/X86/3dnow_xform_masked2.S index 8ea66549f..37204c2dc 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_masked2.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_masked2.S @@ -1,587 +1,568 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + #include "assyntax.h" +#include "xform_args.h" - SEG_TEXT + SEG_TEXT + +#define FRAME_OFFSET 16 ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_general_masked) +GLOBL GLNAME( gl_3dnow_transform_points2_general_masked ) GLNAME( gl_3dnow_transform_points2_general_masked ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 - - PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - FEMMS - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ - - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ - - MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ - MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ - PSLLQ ( CONST(32), MM7 ) /* m12 | */ - POR ( MM7, MM2 ) /* m12 | m02 */ - - MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */ - MOVD ( REGOFF(28, ECX), MM7 ) /* | m13 */ - PSLLQ ( CONST(32), MM7 ) /* m13 | */ - POR ( MM7, MM3 ) /* m13 | m03 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) + + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ + + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ + + MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ + MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ + PSLLQ ( CONST(32), MM7 ) /* m12 | */ + POR ( MM7, MM2 ) /* m12 | m02 */ + + MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */ + MOVD ( REGOFF(28, ECX), MM7 ) /* | m13 */ + PSLLQ ( CONST(32), MM7 ) /* m13 | */ + POR ( MM7, MM3 ) /* m13 | m03 */ + + MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ + MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */ + + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGM_4 ) ) - MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ - MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */ +ALIGNTEXT16 +LLBL( G3TPGM_2 ): - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGM_4) ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPGM_3 ) ) /* skip vertex */ -ALIGNTEXT32 + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ -LLBL(G3TPGM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPGM_3) ) /* skip vertex */ + PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ + PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ + PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ + PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ - PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ - PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ + MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ - PFADD ( MM4, MM6 ) /* x0*...*m11+m31| x0*x00+x1*m10+m30 */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ + PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ - MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */ + PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ - PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ + PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */ + MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */ - PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */ - PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */ +ALIGNTEXT16 +LLBL( G3TPGM_3 ): - PFADD ( MM5, MM6 ) /* x0*...*m13+m33| x0*x02+x1*m12+m32 */ - MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TPGM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPGM_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TPGM_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPGM_4 ): -LLBL(G3TPGM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_identity_masked) -GLNAME( gl_3dnow_transform_points2_identity_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points2_perspective_masked ) +GLNAME( gl_3dnow_transform_points2_perspective_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM1 ) /* | m11 */ -ALIGNTEXT32 + PSLLQ ( CONST(32), MM1 ) /* m11 | */ + POR ( MM1, MM0 ) /* m11 | m00 */ -LLBL(G3TPIM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPIM_3) ) /* skip vertex */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPM_4 ) ) -LLBL(G3TPIM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ +ALIGNTEXT16 +LLBL( G3TPPM_2 ): - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPPM_3 ) ) /* skip vertex */ - JA ( LLBL(G3TPIM_2) ) /* cnt > 0 ? -> process next vertex */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ -LLBL(G3TPIM_4): - FEMMS + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ + +LLBL( G3TPPM_3 ): + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TPPM_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TPPM_4 ): + + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_2d_masked) -GLNAME( gl_3dnow_transform_points2_2d_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points2_3d_masked ) +GLNAME( gl_3dnow_transform_points2_3d_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2M_4) ) + MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ + MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ + PSLLQ ( CONST(32), MM7 ) /* m12 | */ + POR ( MM7, MM2 ) /* m12 | m02 */ -ALIGNTEXT32 + MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */ -LLBL(G3TP2M_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2M_3) ) /* skip vertex */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3M_4 ) ) - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */ +ALIGNTEXT16 +LLBL( G3TP3M_2 ): - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3M_3 ) ) /* skip vertex */ - PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */ - PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */ + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ - PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ - PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*m00+x1*m10+m30 */ + PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ + PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ + PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ -LLBL(G3TP2M_3): - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - INC_L ( EBP ) /* next clipmask */ - ADD_L ( CONST(16), EDX ) /* next r */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ + PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP2M_2) ) /* cnt > 0 ? -> process next vertex */ + PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */ -LLBL(G3TP2M_4): - FEMMS + PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */ + MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ + +LLBL( G3TP3M_3 ): + + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TP3M_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TP3M_4 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_2d_no_rot_masked) -GLNAME( gl_3dnow_transform_points2_2d_no_rot_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points2_3d_no_rot_masked ) +GLNAME( gl_3dnow_transform_points2_3d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM1 ) /* | m11 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM1 ) /* | m11 */ + PSLLQ ( CONST(32), MM1 ) /* m11 | */ + POR ( MM1, MM0 ) /* m11 | m00 */ - PSLLQ ( CONST(32), MM1 ) /* m11 | */ - POR ( MM1, MM0 ) /* m11 | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRM_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRM_4 ) ) -ALIGNTEXT32 +ALIGNTEXT16 +LLBL( G3TP3NRM_2 ): -LLBL(G3TP2NRM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2NRM_3) ) /* skip vertex */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3NRM_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ -LLBL(G3TP2NRM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ +LLBL( G3TP3NRM_3 ): - JA ( LLBL(G3TP2NRM_2) /* cnt > 0 ? -> process next vertex */ ) + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TP2NRM_4): - FEMMS + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TP3NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TP3NRM_4 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_3d_masked) -GLNAME( gl_3dnow_transform_points2_3d_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points2_2d_masked ) +GLNAME( gl_3dnow_transform_points2_2d_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - FEMMS - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ - - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ - MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ - PSLLQ ( CONST(32), MM7 ) /* m12 | */ - POR ( MM7, MM2 ) /* m12 | m02 */ + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */ - MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3M_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2M_4 ) ) -ALIGNTEXT32 +ALIGNTEXT16 +LLBL( G3TP2M_2 ): -LLBL(G3TP3M_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3M_3) ) /* skip vertex */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2M_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */ - PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ - PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ - PFADD ( MM4, MM6 ) /* x0*...*m11+m31| x0*x00+x1*m10+m30 */ + PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */ + PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */ - MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*...*m10+m30 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ - PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */ +LLBL( G3TP2M_3 ): - PFADD ( MM5, MM6 ) /* ***trash*** | x0*x02+x1*m12+m32 */ - MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ -LLBL(G3TP3M_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + INC_L ( EBP ) /* next clipmask */ + ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2M_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TP3M_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP2M_4 ): -LLBL(G3TP3M_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_3d_no_rot_masked) -GLNAME( gl_3dnow_transform_points2_3d_no_rot_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points2_2d_no_rot_masked ) +GLNAME( gl_3dnow_transform_points2_2d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - FEMMS + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM1 ) /* | m11 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM1 ) /* | m11 */ - PSLLQ ( CONST(32), MM1 ) /* m11 | */ - POR ( MM1, MM0 ) /* m11 | m00 */ + PSLLQ ( CONST(32), MM1 ) /* m11 | */ + POR ( MM1, MM0 ) /* m11 | m00 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRM_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRM_4 ) ) -ALIGNTEXT32 +ALIGNTEXT16 +LLBL( G3TP2NRM_2 ): -LLBL(G3TP3NRM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3NRM_3) ) /* skip vertex */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2NRM_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ +LLBL( G3TP2NRM_3 ): -LLBL(G3TP3NRM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3NRM_2) ) /* cnt > 0 ? -> process next vertex */ + JNZ ( LLBL( G3TP2NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ -LLBL(G3TP3NRM_4): +LLBL( G3TP2NRM_4 ): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_perspective_masked) -GLNAME( gl_3dnow_transform_points2_perspective_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points2_identity_masked ) +GLNAME( gl_3dnow_transform_points2_identity_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ - - FEMMS - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM1 ) /* | m11 */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - PSLLQ ( CONST(32), MM1 ) /* m11 | */ - POR ( MM1, MM0 ) /* m11 | m00 */ - - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPM_4) ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) -ALIGNTEXT32 +ALIGNTEXT16 +LLBL( G3TPIM_2 ): -LLBL(G3TPPM_2): + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPIM_3 ) ) /* skip vertex */ - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPPM_3) ) /* skip vertex */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ +LLBL( G3TPIM_3 ): - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TPPM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPIM_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TPPM_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPIM_4 ): -LLBL(G3TPPM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - - - - diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_masked3.S b/xc/extras/Mesa/src/X86/3dnow_xform_masked3.S index fbc66c34b..45686e03f 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_masked3.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_masked3.S @@ -1,729 +1,709 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + #include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT - SEG_TEXT +#define FRAME_OFFSET 16 ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points3_general_masked) +GLOBL GLNAME( gl_3dnow_transform_points3_general_masked ) GLNAME( gl_3dnow_transform_points3_general_masked ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 - - PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - FEMMS + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ - MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ - MOVQ ( REGOFF(48, ECX), MM3 ) /* m31 | m30 */ + MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ + MOVQ ( REGOFF(48, ECX), MM3 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGM_6) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGM_6 ) ) PUSH_L ( EBP ) PUSH_L ( EAX ) PUSH_L ( EDX ) PUSH_L ( ESI ) +ALIGNTEXT16 +LLBL( G3TPGM_2 ): -ALIGNTEXT32 -LLBL(G3TPGM_2): + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPGM_3 ) ) /* skip vertex */ - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPGM_3) /* skip vertex */ ) + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ + MOVQ ( MM4, MM5 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ - MOVQ ( MM4, MM5 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ + PUNPCKLDQ ( MM6, MM6 ) /* x2 | x2 */ + PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ - PUNPCKLDQ ( MM6, MM6 ) /* x2 | x2 */ - PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ + PFMUL ( MM2, MM6 ) /* x2*m21 | x2*m20 */ + PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ - PFMUL ( MM2, MM6 ) /* x2*m21 | x2*m20 */ - PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + PFADD ( MM3, MM6 ) /* x2*m21+m31 | x2*m20+m30 */ + PFADD ( MM4, MM6 ) /* r1 | r0 */ - PFADD ( MM3, MM6 ) /* x2*m21+m31 | x2*m20+m30 */ - PFADD ( MM4, MM6 ) /* r1 | r0 */ + MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ - MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ +ALIGNTEXT16 +LLBL( G3TPGM_3 ): -LLBL(G3TPGM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPGM_2) /* cnt > 0 ? -> process next vertex */ ) - /* and now the second stripe ... */ + JNZ ( LLBL( G3TPGM_2 ) ) /* cnt > 0 ? -> process next vertex */ - POP_L ( ESI ) /* reset counter & pointers */ + /* and now the second stripe ... */ + POP_L ( ESI ) /* reset counter & pointers */ POP_L ( EDX ) POP_L ( EAX ) POP_L ( EBP ) - MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ - MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ + MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ + MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ - PSLLQ ( CONST(32), MM7 ) /* m12 | */ - POR ( MM7, MM0 ) /* m12 | m02 */ + PSLLQ ( CONST(32), MM7 ) /* m12 | */ + POR ( MM7, MM0 ) /* m12 | m02 */ - MOVD ( REGOFF(12, ECX), MM1 ) /* | m03 */ - MOVD ( REGOFF(28, ECX), MM7 ) /* | m13 */ + MOVD ( REGOFF(12, ECX), MM1 ) /* | m03 */ + MOVD ( REGOFF(28, ECX), MM7 ) /* | m13 */ - PSLLQ ( CONST(32), MM7 ) /* m13 | */ - POR ( MM7, MM1 ) /* m13 | m03 */ + PSLLQ ( CONST(32), MM7 ) /* m13 | */ + POR ( MM7, MM1 ) /* m13 | m03 */ - MOVQ ( REGOFF(40, ECX), MM2 ) /* m23 | m22 */ - MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ + MOVQ ( REGOFF(40, ECX), MM2 ) /* m23 | m22 */ + MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ +ALIGNTEXT16 +LLBL( G3TPGM_4 ): -ALIGNTEXT32 -LLBL(G3TPGM_4): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPGM_5) /* skip vertex */ ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPGM_5 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ - MOVQ ( MM4, MM5 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ + MOVQ ( MM4, MM5 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ - PUNPCKLDQ ( MM6, MM6 ) /* x2 | x2 */ - PFMUL ( MM1, MM5 ) /* x1*m13 | x0*m03 */ + PUNPCKLDQ ( MM6, MM6 ) /* x2 | x2 */ + PFMUL ( MM1, MM5 ) /* x1*m13 | x0*m03 */ - PFMUL ( MM2, MM6 ) /* x2*m23 | x2*m22 */ - PFACC ( MM5, MM4 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ + PFMUL ( MM2, MM6 ) /* x2*m23 | x2*m22 */ + PFACC ( MM5, MM4 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ - PFADD ( MM3, MM6 ) /* x2*m23+m33 | x2*m22+m32 */ - PFADD ( MM4, MM6 ) /* r3 | r2 */ + PFADD ( MM3, MM6 ) /* x2*m23+m33 | x2*m22+m32 */ + PFADD ( MM4, MM6 ) /* r3 | r2 */ - MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ + MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ -LLBL(G3TPGM_5): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ +ALIGNTEXT16 +LLBL( G3TPGM_5 ): - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - JA ( LLBL(G3TPGM_4) /* cnt > 0 ? -> process next vertex */ ) + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ -LLBL(G3TPGM_6): + JNZ ( LLBL( G3TPGM_4 ) ) /* cnt > 0 ? -> process next vertex */ - FEMMS +LLBL( G3TPGM_6 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points3_identity_masked) -GLNAME( gl_3dnow_transform_points3_identity_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points3_perspective_masked ) +GLNAME( gl_3dnow_transform_points3_perspective_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM0 ) /* m11 | m00 */ -ALIGNTEXT32 -LLBL(G3TPIM_2): + MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPIM_3) /* skip vertex */ ) + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPM_4 ) ) - MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ - MOVD ( MM1, REGOFF(8, EDX) ) /* | r2 */ +ALIGNTEXT16 +LLBL( G3TPPM_2 ): -LLBL(G3TPIM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPPM_3 ) ) /* skip vertex */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - JA ( LLBL(G3TPIM_2) /* cnt > 0 ? -> process next vertex */ ) + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( MM5, MM6 ) /* | x2 */ -LLBL(G3TPIM_4): - FEMMS + PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ + PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */ - POP_L ( EBP ) - POP_L ( EBX ) - POP_L ( EDI ) - POP_L ( ESI ) + PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ + MOVQ ( MM5, REGIND(EDX) ) /* write r0, r1 */ - POP_L ( ESI ) - RET + MOVQ ( MM6, MM5 ) /* | x2 */ + PFMUL ( MM2, MM5 ) /* | x2*m22 */ + PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ + PFSUBR ( MM7, MM6 ) /* (LO mm7 == 0) | -x2 */ + MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ + MOVD ( MM6, REGOFF(12, EDX) ) /* write r3 */ ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points3_2d_masked) -GLNAME( gl_3dnow_transform_points3_2d_masked ): +LLBL( G3TPPM_3 ): - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -ALIGNTEXT32 + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - PUSH_L ( ESI ) - PUSH_L ( EDI ) - PUSH_L ( EBX ) - PUSH_L ( EBP ) + JNZ ( LLBL( G3TPPM_2 ) ) /* cnt > 0 ? -> process next vertex */ - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ +LLBL( G3TPPM_4 ): FEMMS + POP_L ( EBP ) + POP_L ( EBX ) + POP_L ( EDI ) + POP_L ( ESI ) + RET - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_3d_masked ) +GLNAME( gl_3dnow_transform_points3_3d_masked ): - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2M_4) ) + PUSH_L ( ESI ) + PUSH_L ( EDI ) + PUSH_L ( EBX ) + PUSH_L ( EBP ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) -ALIGNTEXT32 -LLBL(G3TP2M_2): + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2M_3) /* skip vertex */ ) + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ - MOVQ ( MM3, MM4 ) /* x1 | x0 */ + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ - PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ - PFADD ( MM2, MM3 ) /* x0*...*m10+m30| x0*m01+x1*m11+m31 */ - MOVQ ( MM3, REGIND(EDX) ) /* write r0, r1 */ + MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ + MOVQ ( REGOFF(48, ECX), MM3 ) /* m31 | m30 */ - MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 (=x2) */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3M_6 ) ) -LLBL(G3TP2M_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + PUSH_L ( EBP ) + PUSH_L ( EAX ) + PUSH_L ( EDX ) + PUSH_L ( ESI ) - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ +ALIGNTEXT16 +LLBL( G3TP3M_2 ): - JA ( LLBL(G3TP2M_2) /* cnt > 0 ? -> process next vertex */ ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3M_3 ) ) /* skip vertex */ -LLBL(G3TP2M_4): - FEMMS + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( MM4, MM5 ) /* x1 | x0 */ - POP_L ( EBP ) - POP_L ( EBX ) - POP_L ( EDI ) - POP_L ( ESI ) + PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ + MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ - POP_L ( ESI ) - RET + PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ + PUNPCKLDQ ( MM6, MM6 ) /* x2 | x2 */ + PFMUL ( MM2, MM6 ) /* x2*m21 | x2*m20 */ + PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + PFADD ( MM3, MM6 ) /* x2*m21+m31 | x2*m20+m30 */ + PFADD ( MM4, MM6 ) /* r1 | r0 */ + MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points3_2d_no_rot_masked) -GLNAME( gl_3dnow_transform_points3_2d_no_rot_masked ): +LLBL( G3TP3M_3 ): - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -ALIGNTEXT32 + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - PUSH_L ( ESI ) - PUSH_L ( EDI ) - PUSH_L ( EBX ) - PUSH_L ( EBP ) + JNZ ( LLBL( G3TP3M_2 ) ) /* cnt > 0 ? -> process next vertex */ - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + /* and now the second stripe ... */ + MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ - FEMMS + MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ + PSLLQ ( CONST(32), MM7 ) /* m12 | */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + POR ( MM7, MM0 ) /* m12 | m02 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM0 ) /* m11 | m00 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRM_4) ) + POP_L ( ESI ) /* reset counter & pointers */ + POP_L ( EDX ) + POP_L ( EAX ) + POP_L ( EBP ) +ALIGNTEXT16 +LLBL( G3TP3M_4 ): -ALIGNTEXT32 -LLBL(G3TP2NRM_2): + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3M_5 ) ) /* skip vertex */ - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2NRM_3) /* skip vertex */ ) + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( MM4, MM5 ) /* x1 | x0 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ + MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 (=x2) */ + PFMUL ( MM1, MM5 ) /* x1*m13 | x0*m03 */ + PFACC ( MM5, MM4 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ - PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ + PFMUL ( MM2, MM6 ) /* | x2*m22 */ + PFADD ( MM3, MM6 ) /* | x2*m22+m32 */ -LLBL(G3TP2NRM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + PFADD ( MM4, MM6 ) /* | r2 */ + MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ +ALIGNTEXT16 +LLBL( G3TP3M_5 ): - JA ( LLBL(G3TP2NRM_2) /* cnt > 0 ? -> process next vertex */ ) + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TP2NRM_4): - FEMMS + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TP3M_4 ) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP3M_6 ): + + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points3_3d_masked) -GLNAME( gl_3dnow_transform_points3_3d_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points3_3d_no_rot_masked ) +GLNAME( gl_3dnow_transform_points3_3d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM0 ) /* m11 | m00 */ - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ + PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ + PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */ - MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ - MOVQ ( REGOFF(48, ECX), MM3 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3M_6) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRM_4 ) ) - PUSH_L ( EBP ) - PUSH_L ( EAX ) - PUSH_L ( EDX ) - PUSH_L ( ESI ) +ALIGNTEXT16 +LLBL( G3TP3NRM_2 ): + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3NRM_3 ) ) /* skip vertex */ -ALIGNTEXT32 -LLBL(G3TP3M_2): + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3M_3) /* skip vertex */ ) + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + PFMUL ( MM2, MM5 ) /* | x2*m22 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( MM4, MM5 ) /* x1 | x0 */ + PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ + PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ - PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ - MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ + MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ - PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ - PUNPCKLDQ ( MM6, MM6 ) /* x2 | x2 */ +LLBL( G3TP3NRM_3 ): - PFMUL ( MM2, MM6 ) /* x2*m21 | x2*m20 */ - PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - PFADD ( MM3, MM6 ) /* x2*m21+m31 | x2*m20+m30 */ - PFADD ( MM4, MM6 ) /* r1 | r0 */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ + JNZ ( LLBL( G3TP3NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ -LLBL(G3TP3M_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ +LLBL( G3TP3NRM_4 ): - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + FEMMS + POP_L ( EBP ) + POP_L ( EBX ) + POP_L ( EDI ) + POP_L ( ESI ) + RET - JA ( LLBL(G3TP3M_2) /* cnt > 0 ? -> process next vertex */ ) - /* and now the second stripe ... */ - MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ - MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ - PSLLQ ( CONST(32), MM7 ) /* m12 | */ - POR ( MM7, MM0 ) /* m12 | m02 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - POP_L ( ESI ) /* reset counter & pointers */ +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_2d_masked ) +GLNAME( gl_3dnow_transform_points3_2d_masked ): - POP_L ( EDX ) - POP_L ( EAX ) + PUSH_L ( ESI ) + PUSH_L ( EDI ) + PUSH_L ( EBX ) + PUSH_L ( EBP ) - POP_L ( EBP ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) -ALIGNTEXT32 -LLBL(G3TP3M_4): + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3M_5) /* skip vertex */ ) + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( MM4, MM5 ) /* x1 | x0 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ - MOVD ( REGOFF(8, EAX), MM6 ) /* | x2 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ - PFMUL ( MM1, MM5 ) /* x1*m13 | x0*m03 */ - PFACC ( MM5, MM4 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - PFMUL ( MM2, MM6 ) /* | x2*m22 */ - PFADD ( MM3, MM6 ) /* | x2*m22+m32 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2M_4 ) ) - PFADD ( MM4, MM6 ) /* | r2 */ - MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ +ALIGNTEXT16 +LLBL( G3TP2M_2 ): -LLBL(G3TP3M_5): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2M_3 ) ) /* skip vertex */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ + MOVQ ( MM3, MM4 ) /* x1 | x0 */ - JA ( LLBL(G3TP3M_4) /* cnt > 0 ? -> process next vertex */ ) + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ -LLBL(G3TP3M_6): - FEMMS + PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ + PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */ + + PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */ + MOVQ ( MM3, REGIND(EDX) ) /* write r0, r1 */ + + MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 (=x2) */ +ALIGNTEXT16 +LLBL( G3TP2M_3 ): + + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TP2M_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TP2M_4 ): + + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points3_3d_no_rot_masked) -GLNAME( gl_3dnow_transform_points3_3d_no_rot_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points3_2d_no_rot_masked ) +GLNAME( gl_3dnow_transform_points3_2d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ - - FEMMS + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM0 ) /* m11 | m00 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM0 ) /* m11 | m00 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRM_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRM_4 ) ) +ALIGNTEXT16 +LLBL( G3TP2NRM_2 ): -ALIGNTEXT32 -LLBL(G3TP3NRM_2): + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2NRM_3 ) ) /* skip vertex */ - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3NRM_3) /* skip vertex */ ) + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 (=x2) */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PFMUL ( MM2, MM5 ) /* | x2*m22 */ + PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ - PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ - PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ +ALIGNTEXT16 +LLBL( G3TP2NRM_3 ): - MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ - MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TP3NRM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TP3NRM_2) /* cnt > 0 ? -> process next vertex */ ) +LLBL( G3TP2NRM_4 ): -LLBL(G3TP3NRM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points3_perspective_masked) -GLNAME( gl_3dnow_transform_points3_perspective_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points3_identity_masked ) +GLNAME( gl_3dnow_transform_points3_identity_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ - - FEMMS - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM0 ) /* m11 | m00 */ - - MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPM_4) ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) -ALIGNTEXT32 -LLBL(G3TPPM_2): - - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPPM_3) /* skip vertex */ ) - - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - MOVQ ( MM5, MM6 ) /* | x2 */ +ALIGNTEXT16 +LLBL( G3TPIM_2 ): - PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ - PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPIM_3 ) ) /* skip vertex */ - PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ - MOVQ ( MM5, REGIND(EDX) ) /* write r0, r1 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ - MOVQ ( MM6, MM5 ) /* | x2 */ - PFMUL ( MM2, MM5 ) /* | x2*m22 */ + MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ + MOVD ( MM1, REGOFF(8, EDX) ) /* | r2 */ - PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ - PFSUBR ( MM7, MM6 ) /* (LO mm7 == 0) | -x2 */ +LLBL( G3TPIM_3 ): - MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ - MOVD ( MM6, REGOFF(12, EDX) ) /* write r3 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -LLBL(G3TPPM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPIM_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TPPM_2) /* cnt > 0 ? -> process next vertex */ ) +LLBL( G3TPIM_4 ): -LLBL(G3TPPM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - - - - - - - diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_masked4.S b/xc/extras/Mesa/src/X86/3dnow_xform_masked4.S index 6c09efa21..ab496988f 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_masked4.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_masked4.S @@ -1,63 +1,87 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + #include "assyntax.h" +#include "xform_args.h" - SEG_TEXT + SEG_TEXT -ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points4_general_masked) -GLNAME( gl_3dnow_transform_points4_general_masked ): +#define FRAME_OFFSET 16 - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_general_masked ) +GLNAME( gl_3dnow_transform_points4_general_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - FEMMS + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ - MOVD ( REGOFF(32, ECX), MM2 ) /* | m20 */ - MOVD ( REGOFF(48, ECX), MM7 ) /* | m30 */ + MOVD ( REGOFF(32, ECX), MM2 ) /* | m20 */ + MOVD ( REGOFF(48, ECX), MM7 ) /* | m30 */ - PSLLQ ( CONST(32), MM7 ) /* m30 | */ - POR ( MM7, MM2 ) /* m30 | m20 */ + PSLLQ ( CONST(32), MM7 ) /* m30 | */ + POR ( MM7, MM2 ) /* m30 | m20 */ - MOVD ( REGOFF(36, ECX), MM3 ) /* | m21 */ - MOVD ( REGOFF(52, ECX), MM7 ) /* | m31 */ + MOVD ( REGOFF(36, ECX), MM3 ) /* | m21 */ + MOVD ( REGOFF(52, ECX), MM7 ) /* | m31 */ - PSLLQ ( CONST(32), MM7 ) /* m31 | */ - POR ( MM7, MM3 ) /* m31 | m21 */ + PSLLQ ( CONST(32), MM7 ) /* m31 | */ + POR ( MM7, MM3 ) /* m31 | m21 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGM_6) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGM_6 ) ) PUSH_L ( EBP ) PUSH_L ( EAX ) @@ -65,697 +89,664 @@ ALIGNTEXT32 PUSH_L ( ESI ) ALIGNTEXT32 +LLBL( G3TPGM_2 ): -LLBL(G3TPGM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPGM_3) /* skip vertex */ ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPGM_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( MM4, MM5 ) /* x1 | x0 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( MM4, MM5 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ - PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ + MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ - MOVQ ( MM6, MM7 ) /* x3 | x2 */ - PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ + MOVQ ( MM6, MM7 ) /* x3 | x2 */ + PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ - PFMUL ( MM2, MM6 ) /* x3*m30 | x2*m20 */ - PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + PFMUL ( MM2, MM6 ) /* x3*m30 | x2*m20 */ + PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ - PFMUL ( MM3, MM7 ) /* x3*m31 | x2*m21 */ - PFACC ( MM7, MM6 ) /* x2*m21+x3*m31 | x2*m20+x3*m30 */ + PFMUL ( MM3, MM7 ) /* x3*m31 | x2*m21 */ + PFACC ( MM7, MM6 ) /* x2*m21+x3*m31 | x2*m20+x3*m30 */ - PFADD ( MM4, MM6 ) /* r1 | r0 */ - MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ + PFADD ( MM4, MM6 ) /* r1 | r0 */ + MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ -LLBL(G3TPGM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ +LLBL( G3TPGM_3 ): - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - JA ( LLBL(G3TPGM_2) /* cnt > 0 ? -> process next vertex */ ) - /* and now the second stripe ... */ - MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ - MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - PSLLQ ( CONST(32), MM7 ) /* m12 | */ - POR ( MM7, MM0 ) /* m12 | m02 */ + JNZ ( LLBL( G3TPGM_2 ) ) /* cnt > 0 ? -> process next vertex */ - MOVD ( REGOFF(12, ECX), MM1 ) /* | m03 */ - MOVD ( REGOFF(28, ECX), MM7 ) /* | m13 */ + /* and now the second stripe ... */ + MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ + MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ - PSLLQ ( CONST(32), MM7 ) /* m13 | */ - POR ( MM7, MM1 ) /* m13 | m03 */ + PSLLQ ( CONST(32), MM7 ) /* m12 | */ + POR ( MM7, MM0 ) /* m12 | m02 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - MOVD ( REGOFF(56, ECX), MM7 ) /* | m32 */ + MOVD ( REGOFF(12, ECX), MM1 ) /* | m03 */ + MOVD ( REGOFF(28, ECX), MM7 ) /* | m13 */ - PSLLQ ( CONST(32), MM7 ) /* m32 | */ - POR ( MM7, MM2 ) /* m32 | m22 */ + PSLLQ ( CONST(32), MM7 ) /* m13 | */ + POR ( MM7, MM1 ) /* m13 | m03 */ - MOVD ( REGOFF(44, ECX), MM3 ) /* | m23 */ - MOVD ( REGOFF(60, ECX), MM7 ) /* | m33 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ + MOVD ( REGOFF(56, ECX), MM7 ) /* | m32 */ - PSLLQ ( CONST(32), MM7 ) /* m33 | */ - POR ( MM7, MM3 ) /* m33 | m23 */ + PSLLQ ( CONST(32), MM7 ) /* m32 | */ + POR ( MM7, MM2 ) /* m32 | m22 */ - POP_L ( ESI ) /* reset counter & pointers */ + MOVD ( REGOFF(44, ECX), MM3 ) /* | m23 */ + MOVD ( REGOFF(60, ECX), MM7 ) /* | m33 */ + + PSLLQ ( CONST(32), MM7 ) /* m33 | */ + POR ( MM7, MM3 ) /* m33 | m23 */ + + POP_L ( ESI ) /* reset counter & pointers */ POP_L ( EDX ) POP_L ( EAX ) POP_L ( EBP ) ALIGNTEXT32 +LLBL( G3TPGM_4 ): -LLBL(G3TPGM_4): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPGM_5) /* skip vertex */ ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPGM_5 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( MM4, MM5 ) /* x1 | x0 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( MM4, MM5 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ - PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ + MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ - MOVQ ( MM6, MM7 ) /* x3 | x2 */ - PFMUL ( MM1, MM5 ) /* x1*m13 | x0*m03 */ + MOVQ ( MM6, MM7 ) /* x3 | x2 */ + PFMUL ( MM1, MM5 ) /* x1*m13 | x0*m03 */ - PFMUL ( MM2, MM6 ) /* x3*m32 | x2*m22 */ - PFACC ( MM5, MM4 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ + PFMUL ( MM2, MM6 ) /* x3*m32 | x2*m22 */ + PFACC ( MM5, MM4 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ - PFMUL ( MM3, MM7 ) /* x3*m33 | x2*m23 */ - PFACC ( MM7, MM6 ) /* x2*m23+x3*m33 | x2*m22+x3*m32 */ + PFMUL ( MM3, MM7 ) /* x3*m33 | x2*m23 */ + PFACC ( MM7, MM6 ) /* x2*m23+x3*m33 | x2*m22+x3*m32 */ - PFADD ( MM4, MM6 ) /* r3 | r2 */ - MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ + PFADD ( MM4, MM6 ) /* r3 | r2 */ + MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ -LLBL(G3TPGM_5): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ +LLBL( G3TPGM_5 ): - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - JA ( LLBL(G3TPGM_4) /* cnt > 0 ? -> process next vertex */ ) + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ -LLBL(G3TPGM_6): - FEMMS + JNZ ( LLBL( G3TPGM_4 ) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPGM_6 ): + + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points4_identity_masked) -GLNAME( gl_3dnow_transform_points4_identity_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points4_perspective_masked ) +GLNAME( gl_3dnow_transform_points4_perspective_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM0 ) /* m11 | m00 */ + + MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */ + MOVD ( REGOFF(56, ECX), MM7 ) /* | m32 */ + + PSLLQ ( CONST(32), MM7 ) /* m32 | */ + POR ( MM7, MM1 ) /* m32 | m22 */ + + MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ + PUNPCKLDQ ( MM7, MM7 ) /* 0 | 0 */ + + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPM_4 ) ) ALIGNTEXT32 +LLBL( G3TPPM_2 ): -LLBL(G3TPIM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPIM_3) /* skip vertex */ ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPPM_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ - MOVQ ( MM1, REGOFF(8, EDX) ) /* r3 | r2 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( MM5, MM6 ) /* x3 | x2 */ -LLBL(G3TPIM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ + PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */ + PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ - JA ( LLBL(G3TPIM_2) /* cnt > 0 ? -> process next vertex */ ) + MOVQ ( MM5, REGIND(EDX) ) /* write r0, r1 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ -LLBL(G3TPIM_4): - FEMMS + PFSUBR ( MM7, MM5 ) /* | -x2 */ + PFACC ( MM5, MM6 ) /* -x2 | x2*m22+x3*m32 */ + MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ + +LLBL( G3TPPM_3 ): + + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TPPM_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TPPM_4 ): + + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points4_2d_masked) -GLNAME( gl_3dnow_transform_points4_2d_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME(gl_3dnow_transform_points4_3d_masked) +GLNAME( gl_3dnow_transform_points4_3d_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ - - FEMMS - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ - - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2M_4) ) + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ -ALIGNTEXT32 + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ -LLBL(G3TP2M_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2M_3) /* skip vertex */ ) + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ - MOVQ ( MM3, MM4 ) /* x1 | x0 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ + MOVD ( REGOFF(32, ECX), MM2 ) /* | m20 */ + MOVD ( REGOFF(48, ECX), MM7 ) /* | m30 */ - MOVQ ( MM5, MM6 ) /* x3 | x2 */ - PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ + PSLLQ ( CONST(32), MM7 ) /* m30 | */ + POR ( MM7, MM2 ) /* m30 | m20 */ - PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ - PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + MOVD ( REGOFF(36, ECX), MM3 ) /* | m21 */ + MOVD ( REGOFF(52, ECX), MM7 ) /* | m31 */ - PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */ - PFADD ( MM6, MM3 ) /* r1 | r0 */ + PSLLQ ( CONST(32), MM7 ) /* m31 | */ + POR ( MM7, MM3 ) /* m31 | m21 */ - MOVQ ( MM3, REGIND(EDX) ) /* write r0, r1 */ - MOVQ ( MM5, REGOFF(8, EDX) ) /* write r2, r3 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3M_6 ) ) -LLBL(G3TP2M_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + PUSH_L ( EBP ) + PUSH_L ( EAX ) + PUSH_L ( EDX ) + PUSH_L ( ESI ) - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ +ALIGNTEXT32 +LLBL( G3TP3M_2 ): - JA ( LLBL(G3TP2M_2) /* cnt > 0 ? -> process next vertex */ ) + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3M_3 ) ) /* skip vertex */ -LLBL(G3TP2M_4): - FEMMS + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( MM4, MM5 ) /* x1 | x0 */ - POP_L ( EBP ) - POP_L ( EBX ) - POP_L ( EDI ) - POP_L ( ESI ) + PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ + MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ - POP_L ( ESI ) - RET + PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ + MOVQ ( MM6, MM7 ) /* x3 | x2 */ + PFMUL ( MM2, MM6 ) /* x3*m30 | x2*m20 */ + PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + PFMUL ( MM3, MM7 ) /* x3*m31 | x2*m21 */ + PFACC ( MM7, MM6 ) /* x2*m21+x3*m31 | x2*m20+x3*m30 */ + PFADD ( MM4, MM6 ) /* r1 | r0 */ + MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ -ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points4_2d_no_rot_masked) -GLNAME( gl_3dnow_transform_points4_2d_no_rot_masked ): +LLBL( G3TP3M_3 ): - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ -ALIGNTEXT32 + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - PUSH_L ( ESI ) - PUSH_L ( EDI ) - PUSH_L ( EBX ) - PUSH_L ( EBP ) + JNZ ( LLBL( G3TP3M_2 ) ) /* cnt > 0 ? -> process next vertex */ - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + /* and now the second stripe ... */ + MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ + MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ - FEMMS + PSLLQ ( CONST(32), MM7 ) /* m12 | */ + POR ( MM7, MM0 ) /* m12 | m02 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ + MOVD ( REGOFF(56, ECX), MM7 ) /* | m32 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM0 ) /* m11 | m00 */ + PSLLQ ( CONST(32), MM7 ) /* m32 | */ + POR ( MM7, MM2 ) /* m32 | m22 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRM_4) ) + POP_L ( ESI ) /* reset counter & pointers */ + POP_L ( EDX ) + POP_L ( EAX ) + POP_L ( EBP ) ALIGNTEXT32 +LLBL( G3TP3M_4 ): -LLBL(G3TP2NRM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP2NRM_3) ) /* skip vertex */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3M_5 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ - MOVQ ( MM5, MM6 ) /* x3 | x2 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ + MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ - PUNPCKHDQ ( MM6, MM6 ) /* x3 | x2 */ - PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ + PFMUL ( MM2, MM6 ) /* x3*m32 | x2*m22 */ + PFACC ( MM4, MM6 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ - PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ + PFACC ( MM7, MM6 ) /* x3 | x2*m22+x3*m32 */ + MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ - MOVQ ( MM5, REGOFF(8, EDX) ) /* write r2, r3 */ +LLBL( G3TP3M_5 ): -LLBL(G3TP2NRM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP2NRM_2) /* cnt > 0 ? -> process next vertex */ ) + JNZ ( LLBL( G3TP3M_4 ) ) /* cnt > 0 ? -> process next vertex */ -LLBL(G3TP2NRM_4): - FEMMS +LLBL( G3TP3M_6 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET -ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points4_3d_masked) -GLNAME( gl_3dnow_transform_points4_3d_masked ): - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_3d_no_rot_masked ) +GLNAME( gl_3dnow_transform_points4_3d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) + + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - FEMMS + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM0 ) /* m11 | m00 */ - PSLLQ ( CONST(32), MM7 ) /* m10 | */ - POR ( MM7, MM0 ) /* m10 | m00 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + PSLLQ ( CONST(32), MM3 ) /* m32 | */ + POR ( MM3, MM2 ) /* m32 | m22 */ - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM1 ) /* m11 | m01 */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - MOVD ( REGOFF(32, ECX), MM2 ) /* | m20 */ - MOVD ( REGOFF(48, ECX), MM7 ) /* | m30 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRM_4 ) ) - PSLLQ ( CONST(32), MM7 ) /* m30 | */ - POR ( MM7, MM2 ) /* m30 | m20 */ +ALIGNTEXT32 +LLBL( G3TP3NRM_2 ): - MOVD ( REGOFF(36, ECX), MM3 ) /* | m21 */ - MOVD ( REGOFF(52, ECX), MM7 ) /* | m31 */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP3NRM_3 ) ) /* skip vertex */ - PSLLQ ( CONST(32), MM7 ) /* m31 | */ - POR ( MM7, MM3 ) /* m31 | m21 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3M_6) ) + MOVQ ( MM5, MM6 ) /* x3 | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PUSH_L ( EBP ) - PUSH_L ( EAX ) - PUSH_L ( EDX ) - PUSH_L ( ESI ) + MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ + PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */ -ALIGNTEXT32 + PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ + PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */ -LLBL(G3TP3M_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3M_3) ) /* skip vertex */ + PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ + PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( MM4, MM5 ) /* x1 | x0 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ + MOVQ ( MM5, REGOFF(8, EDX) ) /* write r2, r3 */ - PFMUL ( MM0, MM4 ) /* x1*m10 | x0*m00 */ - MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ +LLBL( G3TP3NRM_3 ): - PFMUL ( MM1, MM5 ) /* x1*m11 | x0*m01 */ - MOVQ ( MM6, MM7 ) /* x3 | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - PFMUL ( MM2, MM6 ) /* x3*m30 | x2*m20 */ - PFACC ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - PFMUL ( MM3, MM7 ) /* x3*m31 | x2*m21 */ - PFACC ( MM7, MM6 ) /* x2*m21+x3*m31 | x2*m20+x3*m30 */ + JNZ ( LLBL( G3TP3NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ - PFADD ( MM4, MM6 ) /* r1 | r0 */ - MOVQ ( MM6, REGIND(EDX) ) /* write r0, r1 */ +LLBL( G3TP3NRM_4 ): -LLBL(G3TP3M_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + FEMMS + POP_L ( EBP ) + POP_L ( EBX ) + POP_L ( EDI ) + POP_L ( ESI ) + RET - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3M_2) ) /* cnt > 0 ? -> process next vertex */ - /* and now the second stripe ... */ - MOVD ( REGOFF(8, ECX), MM0 ) /* | m02 */ - MOVD ( REGOFF(24, ECX), MM7 ) /* | m12 */ - PSLLQ ( CONST(32), MM7 ) /* m12 | */ - POR ( MM7, MM0 ) /* m12 | m02 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - MOVD ( REGOFF(56, ECX), MM7 ) /* | m32 */ +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_2d_masked ) +GLNAME( gl_3dnow_transform_points4_2d_masked ): - PSLLQ ( CONST(32), MM7 ) /* m32 | */ - POR ( MM7, MM2 ) /* m32 | m22 */ + PUSH_L ( ESI ) + PUSH_L ( EDI ) + PUSH_L ( EBX ) + PUSH_L ( EBP ) - POP_L ( ESI ) /* reset counter & pointers */ - POP_L ( EDX ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - POP_L ( EAX ) - POP_L ( EBP ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) + + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(16, ECX), MM7 ) /* | m10 */ + + PSLLQ ( CONST(32), MM7 ) /* m10 | */ + POR ( MM7, MM0 ) /* m10 | m00 */ + + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ + + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM1 ) /* m11 | m01 */ + + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2M_4 ) ) ALIGNTEXT32 +LLBL( G3TP2M_2 ): -LLBL(G3TP3M_4): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3M_5) ) /* skip vertex */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2M_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM6 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ + MOVQ ( MM3, MM4 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m12 | x0*m02 */ - MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ - PFMUL ( MM2, MM6 ) /* x3*m32 | x2*m22 */ - PFACC ( MM4, MM6 ) /* x0*m03+x1*m13 | x0*m02+x1*m12 */ + MOVQ ( MM5, MM6 ) /* x3 | x2 */ + PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ - PFACC ( MM7, MM6 ) /* x3 | x2*m22+x3*m32 */ - MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ + PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ + PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ -LLBL(G3TP3M_5): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */ + PFADD ( MM6, MM3 ) /* r1 | r0 */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( MM3, REGIND(EDX) ) /* write r0, r1 */ + MOVQ ( MM5, REGOFF(8, EDX) ) /* write r2, r3 */ - JA ( LLBL(G3TP3M_4) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP2M_3 ): -LLBL(G3TP3M_6): - FEMMS + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TP2M_2 ) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP2M_4 ): + + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points4_3d_no_rot_masked) -GLNAME( gl_3dnow_transform_points4_3d_no_rot_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points4_2d_no_rot_masked ) +GLNAME( gl_3dnow_transform_points4_2d_no_rot_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - FEMMS - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM0 ) /* m11 | m00 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - PSLLQ ( CONST(32), MM3 ) /* m32 | */ - POR ( MM3, MM2 ) /* m32 | m22 */ + PSLLQ ( CONST(32), MM7 ) /* m11 | */ + POR ( MM7, MM0 ) /* m11 | m00 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRM_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRM_4 ) ) ALIGNTEXT32 +LLBL( G3TP2NRM_2 ): -LLBL(G3TP3NRM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TP3NRM_3) ) /* skip vertex */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TP2NRM_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVQ ( MM5, MM6 ) /* x3 | x2 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( MM5, MM6 ) /* x3 | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ - PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */ + PUNPCKHDQ ( MM6, MM6 ) /* x3 | x2 */ + PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ - PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ - PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */ + PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ - PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ - PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ + MOVQ ( MM5, REGOFF(8, EDX) ) /* write r2, r3 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ - MOVQ ( MM5, REGOFF(8, EDX) ) /* write r2, r3 */ +LLBL( G3TP2NRM_3 ): -LLBL(G3TP3NRM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3NRM_2) /* cnt > 0 ? -> process next vertex */ ) + JNZ ( LLBL( G3TP2NRM_2 ) ) /* cnt > 0 ? -> process next vertex */ -LLBL(G3TP3NRM_4): - FEMMS +LLBL( G3TP2NRM_4 ): + FEMMS POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points4_perspective_masked) -GLNAME( gl_3dnow_transform_points4_perspective_masked ): - - PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) - -ALIGNTEXT32 +GLOBL GLNAME( gl_3dnow_transform_points4_identity_masked ) +GLNAME( gl_3dnow_transform_points4_identity_masked ): PUSH_L ( ESI ) PUSH_L ( EDI ) PUSH_L ( EBX ) PUSH_L ( EBP ) - MOV_L ( REGOFF(4, ECX), EDX ) - MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) /* count */ - MOV_L ( REGOFF(12, EAX), EDI ) /* input stride */ - MOV_L ( REGOFF(4, EAX), EAX ) - MOV_L ( REGOFF(36, ESP), EBP ) /* clipmask */ - MOV_B ( REGOFF(40, ESP), BL ) /* clip flag */ - - FEMMS + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(20, ECX), MM7 ) /* | m11 */ - - PSLLQ ( CONST(32), MM7 ) /* m11 | */ - POR ( MM7, MM0 ) /* m11 | m00 */ - - MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */ - MOVD ( REGOFF(56, ECX), MM7 ) /* | m32 */ - - PSLLQ ( CONST(32), MM7 ) /* m32 | */ - POR ( MM7, MM1 ) /* m32 | m22 */ - - MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ - PUNPCKLDQ ( MM7, MM7 ) /* 0 | 0 */ + MOV_L ( REGOFF(V4F_START, ECX), EDX ) + MOV_L ( ESI, ECX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) + MOV_L ( ARG_CLIP, EBP ) + MOV_B ( ARG_FLAG, BL ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPM_4) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPIM_4 ) ) ALIGNTEXT32 +LLBL( G3TPIM_2 ): -LLBL(G3TPPM_2): - TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ - JNZ ( LLBL(G3TPPM_3) ) /* skip vertex */ + TEST_B ( BL, REGIND(EBP) ) /* mask [i] != clip flag ?? */ + JNZ ( LLBL( G3TPIM_3 ) ) /* skip vertex */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - MOVQ ( MM5, MM6 ) /* x3 | x2 */ + MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ + MOVQ ( MM1, REGOFF(8, EDX) ) /* r3 | r2 */ - PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ - PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */ +LLBL( G3TPIM_3 ): - PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */ - PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - MOVQ ( MM5, REGIND(EDX) ) /* write r0, r1 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + INC_L ( EBP ) /* next clipmask */ + DEC_L ( ESI ) /* decrement vertex counter */ - PFSUBR ( MM7, MM5 ) /* | -x2 */ - PFACC ( MM5, MM6 ) /* -x2 | x2*m22+x3*m32 */ + JNZ ( LLBL( G3TPIM_2 ) ) /* cnt > 0 ? -> process next vertex */ - MOVQ ( MM6, REGOFF(8, EDX) ) /* write r2, r3 */ +LLBL( G3TPIM_4 ): -LLBL(G3TPPM_3): - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ - - INC_L ( EBP ) /* next clipmask */ - DEC_L ( ESI ) /* decrement vertex counter */ - - JA ( LLBL(G3TPPM_2) ) /* cnt > 0 ? -> process next vertex */ - -LLBL(G3TPPM_4): FEMMS - POP_L ( EBP ) POP_L ( EBX ) POP_L ( EDI ) POP_L ( ESI ) - - POP_L ( ESI ) RET - - - - diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_raw1.S b/xc/extras/Mesa/src/X86/3dnow_xform_raw1.S index 4095fe558..5f097ff87 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_raw1.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_raw1.S @@ -1,125 +1,141 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + #include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT - SEG_TEXT +#define FRAME_OFFSET 4 ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_general_raw) -GLNAME(gl_3dnow_transform_points1_general_raw): +GLOBL GLNAME( gl_3dnow_transform_points1_general_raw ) +GLNAME( gl_3dnow_transform_points1_general_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) MOV_L ( REGOFF(4, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - FEMMS + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGR_3 ) ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGR_3) ) + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */ - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVQ ( REGOFF(8, ECX), MM1 ) /* m03 | m02 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVQ ( REGOFF(56, ECX), MM3 ) /* m33 | m32 */ +ALIGNTEXT16 +LLBL( G3TPGR_2 ): -ALIGNTEXT32 -LLBL(G3TPGR_2): + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ + MOVQ ( MM4, MM5 ) /* x0 | x0 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - MOVQ ( MM4, MM5 ) /* x0 | x0 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */ + PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ - PFMUL ( MM1, MM5 ) /* x0*m03 | x0*m02 */ - PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ + PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFADD ( MM3, MM5 ) /* x0*m03+m33 | x0*m02+m32 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVQ ( MM5, REGOFF(8, EDX) ) /* write r3, r2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TPGR_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPGR_3 ): -LLBL(G3TPGR_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_identity_raw) -GLNAME(gl_3dnow_transform_points1_identity_raw): +GLOBL GLNAME( gl_3dnow_transform_points1_identity_raw ) +GLNAME( gl_3dnow_transform_points1_identity_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(1), REGOFF(16, ECX) ) - OR_B ( CONST(1), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(1), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_1), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) MOV_L ( REGOFF(4, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - FEMMS + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPIR_4) ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPIR_4) ) +ALIGNTEXT16 +LLBL( G3TPIR_3 ): -LLBL(G3TPIR_3): + MOVD ( REGIND(EAX), MM0 ) /* | x0 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVD ( REGIND(EAX), MM0 ) /* | x0 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVD ( MM0, REGIND(EDX) ) /* | r0 */ + ADD_L ( CONST(16), EDX ) /* next r */ - MOVD ( MM0, REGIND(EDX) ) /* | r0 */ - ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPIR_3) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPIR_4 ): -LLBL(G3TPIR_4): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET @@ -127,322 +143,280 @@ LLBL(G3TPIR_4): ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_2d_raw) -GLNAME(gl_3dnow_transform_points1_2d_raw): +GLOBL GLNAME( gl_3dnow_transform_points1_3d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points1_3d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) MOV_L ( REGOFF(4, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - FEMMS + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRR_3 ) ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2R_3) ) + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ +ALIGNTEXT16 +LLBL( G3TP3NRR_2 ): -ALIGNTEXT32 -LLBL(G3TP2R_2): + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PFMUL ( MM0, MM4 ) /* | x0*m00 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ + PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ + MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TP2R_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP3NRR_3 ): -LLBL(G3TP2R_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_2d_no_rot_raw) -GLNAME(gl_3dnow_transform_points1_2d_no_rot_raw): +GLOBL GLNAME( gl_3dnow_transform_points1_perspective_raw ) +GLNAME( gl_3dnow_transform_points1_perspective_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) MOV_L ( REGOFF(4, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRR_3) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPR_3 ) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ +ALIGNTEXT16 +LLBL( G3TPPR_2 ): -ALIGNTEXT32 -LLBL(G3TP2NRR_2): + MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */ + PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ - PFMUL ( MM0, MM4 ) /* | x0*m00 */ - PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP2NRR_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPPR_3 ): -LLBL(G3TP2NRR_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_3d_raw) -GLNAME(gl_3dnow_transform_points1_3d_raw): +GLOBL GLNAME( gl_3dnow_transform_points1_2d_raw ) +GLNAME( gl_3dnow_transform_points1_2d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) MOV_L ( REGOFF(4, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3R_3) ) - - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2R_3 ) ) + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ -ALIGNTEXT32 -LLBL(G3TP3R_2): - - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ +ALIGNTEXT16 +LLBL( G3TP2R_2 ): - MOVQ ( MM4, MM5 ) /* | x0 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ - PFMUL ( MM1, MM5 ) /* | x0*m02 */ - PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ - PFADD ( MM3, MM5 ) /* | x0*m02+m32 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TP3R_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP2R_3 ): -LLBL(G3TP3R_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_3d_no_rot_raw) -GLNAME(gl_3dnow_transform_points1_3d_no_rot_raw): +GLOBL GLNAME( gl_3dnow_transform_points1_2d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points1_2d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) MOV_L ( REGOFF(4, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRR_3) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRR_3 ) ) - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ -ALIGNTEXT32 -LLBL(G3TP3NRR_2): +ALIGNTEXT16 +LLBL( G3TP2NRR_2 ): - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - PFMUL ( MM0, MM4 ) /* | x0*m00 */ + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + ADD_L ( EDI, EAX ) /* next vertex */ - PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + PFMUL ( MM0, MM4 ) /* | x0*m00 */ + PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ - MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + ADD_L ( CONST(16), EDX ) /* next r */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TP3NRR_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP2NRR_3 ): -LLBL(G3TP3NRR_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points1_perspective_raw) -GLNAME(gl_3dnow_transform_points1_perspective_raw): +GLOBL GLNAME( gl_3dnow_transform_points1_3d_raw ) +GLNAME( gl_3dnow_transform_points1_3d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) MOV_L ( REGOFF(4, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPR_3) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3R_3 ) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVD ( REGOFF(8, ECX), MM1 ) /* | m02 */ -ALIGNTEXT32 -LLBL(G3TPPR_2): - MOVD ( REGIND(EAX), MM4 ) /* 0 | x0 */ - PFMUL ( MM0, MM4 ) /* 0 | x0*m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ - - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ - - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPPR_2) ) /* cnt > 0 ? -> process next vertex */ +ALIGNTEXT16 +LLBL( G3TP3R_2 ): -LLBL(G3TPPR_3): - FEMMS + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ - POP_L ( ESI ) - POP_L ( EDI ) + MOVQ ( MM4, MM5 ) /* | x0 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - POP_L ( ESI ) - RET + PFMUL ( MM1, MM5 ) /* | x0*m02 */ + PFADD ( MM2, MM4 ) /* x0*m01+m31 | x0*m00+m30 */ + PFADD ( MM3, MM5 ) /* | x0*m02+m32 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVD ( MM5, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP3R_3 ): + FEMMS + POP_L ( EDI ) + POP_L ( ESI ) + RET diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_raw2.S b/xc/extras/Mesa/src/X86/3dnow_xform_raw2.S index 5355a16b9..1d785e4fd 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_raw2.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_raw2.S @@ -1,142 +1,165 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + #include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT - SEG_TEXT +#define FRAME_OFFSET 4 ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_general_raw) -GLNAME(gl_3dnow_transform_points2_general_raw): +GLOBL GLNAME( gl_3dnow_transform_points2_general_raw ) +GLNAME( gl_3dnow_transform_points2_general_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGR_3) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGR_3 ) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ - MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ - PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ + MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ + PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ - MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */ - PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */ + MOVD ( REGOFF(12, ECX), MM3 ) /* | m03 */ + PUNPCKLDQ ( REGOFF(28, ECX), MM3 ) /* m13 | m03 */ - MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ - MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */ + MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ + MOVQ ( REGOFF(56, ECX), MM5 ) /* m33 | m32 */ +ALIGNTEXT16 +LLBL( G3TPGR_2 ): -ALIGNTEXT32 -LLBL(G3TPGR_2): + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ + PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ + PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ - PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ - PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ + PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ + PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ - PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ - PFADD ( MM4, MM6 ) /* x0*...*m11+m31| x0*x00+x1*m10+m30 */ + MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ + PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ - PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ + PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */ + ADD_L ( EDI, EAX ) /* next vertex */ - PFMUL ( MM3, MM7 ) /* x1*m13 | x0*m03 */ - ADD_L ( EDI, EAX ) /* next vertex */ + PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */ + PFADD ( MM5, MM6 ) /* x0*...*m13+m33 | x0*...*m12+m32 */ - PFACC ( MM7, MM6 ) /* x0*m03+x1*m13 | x0*x02+x1*m12 */ - PFADD ( MM5, MM6 ) /* x0*...*m13+m33| x0*x02+x1*m12+m32 */ + MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */ + ADD_L ( CONST(16), EDX ) /* next r */ - MOVQ ( MM6, REGOFF(8, EDX) ) /* write r3, r2 */ - ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPGR_2 ) ) /* cnt > 0 ? -> process next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPGR_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPGR_3 ): -LLBL(G3TPGR_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_identity_raw) -GLNAME(gl_3dnow_transform_points2_identity_raw): +GLOBL GLNAME( gl_3dnow_transform_points2_perspective_raw ) +GLNAME( gl_3dnow_transform_points2_perspective_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - FEMMS + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPR_3 ) ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPIR_3) ) + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ -LLBL(G3TPIR_3): - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ - ADD_L ( CONST(16), EDX ) /* next r */ +ALIGNTEXT16 +LLBL( G3TPPR_2 ): - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPIR_3) ) /* cnt > 0 ? -> process next vertex */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ -LLBL(G3TPIR_4): - FEMMS + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ - POP_L ( ESI ) - POP_L ( EDI ) + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPPR_2 ) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPPR_3 ): + + FEMMS + POP_L ( EDI ) POP_L ( ESI ) RET @@ -144,68 +167,75 @@ LLBL(G3TPIR_4): ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_2d_raw) -GLNAME(gl_3dnow_transform_points2_2d_raw): +GLOBL GLNAME( gl_3dnow_transform_points2_3d_raw ) +GLNAME( gl_3dnow_transform_points2_3d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - FEMMS + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3R_3 ) ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2R_3) ) + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ - MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ - MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ + PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ -ALIGNTEXT32 -LLBL(G3TP2R_2): + MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */ - MOVD ( REGIND(EAX), MM4 ) /* | x0 */ - MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */ +ALIGNTEXT16 +LLBL( G3TP3R_2 ): - PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ - PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */ + PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ + PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ - PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */ - PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*m00+x1*m10+m30 */ + PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ + PFADD ( MM4, MM6 ) /* x0*...*m11+m31 | x0*...*m10+m30 */ - PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ + MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( MM6, MM7 ) /* x1 | x0 */ + PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ - JA ( LLBL(G3TP2R_2) ) /* cnt > 0 ? -> process next vertex */ + PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */ + PFADD ( MM5, MM6 ) /* ***trash*** | x0*...*m12+m32 */ -LLBL(G3TP2R_3): - FEMMS + MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - POP_L ( ESI ) - POP_L ( EDI ) + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3R_2 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TP3R_3 ): + + FEMMS + POP_L ( EDI ) POP_L ( ESI ) RET @@ -213,144 +243,120 @@ LLBL(G3TP2R_3): ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_2d_no_rot_raw) -GLNAME(gl_3dnow_transform_points2_2d_no_rot_raw): +GLOBL GLNAME( gl_3dnow_transform_points2_3d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points2_3d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(2), REGOFF(16, ECX) ) - OR_B ( CONST(3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3 ), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - FEMMS + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRR_3 ) ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRR_3) ) + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ +ALIGNTEXT16 +LLBL( G3TP3NRR_2 ): -ALIGNTEXT32 -LLBL(G3TP2NRR_2): + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - ADD_L ( EDI, EAX ) /* next vertex */ + PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ + MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - ADD_L ( CONST(16), EDX ) /* next r */ + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP2NRR_2) ) /* cnt > 0 ? -> process next vertex */ + JNZ ( LLBL( G3TP3NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ -LLBL(G3TP2NRR_3): - FEMMS +LLBL( G3TP3NRR_3 ): - POP_L ( ESI ) + FEMMS POP_L ( EDI ) - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_3d_raw) -GLNAME(gl_3dnow_transform_points2_3d_raw): +GLOBL GLNAME( gl_3dnow_transform_points2_2d_raw ) +GLNAME( gl_3dnow_transform_points2_2d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3R_3) ) - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ - - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - MOVD ( REGOFF(8, ECX), MM2 ) /* | m02 */ - PUNPCKLDQ ( REGOFF(24, ECX), MM2 ) /* m12 | m02 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2R_3 ) ) - MOVQ ( REGOFF(48, ECX), MM4 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM5 ) /* | m32 */ + MOVQ ( REGIND(ECX), MM0 ) /* m01 | m00 */ + MOVQ ( REGOFF(16, ECX), MM1 ) /* m11 | m10 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ -ALIGNTEXT32 -LLBL(G3TP3R_2): - - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ +ALIGNTEXT16 +LLBL( G3TP2R_2 ): - PFMUL ( MM0, MM6 ) /* x1*m10 | x0*m00 */ - PFMUL ( MM1, MM7 ) /* x1*m11 | x0*m01 */ + MOVD ( REGIND(EAX), MM4 ) /* | x0 */ + MOVD ( REGOFF(4, EAX), MM5 ) /* | x1 */ - PFACC ( MM7, MM6 ) /* x0*m01+x1*m11 | x0*x00+x1*m10 */ - PFADD ( MM4, MM6 ) /* x0*...*m11+m31| x0*x00+x1*m10+m30 */ + PUNPCKLDQ ( MM4, MM4 ) /* x0 | x0 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVQ ( MM6, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( REGIND(EAX), MM6 ) /* x1 | x0 */ + PFMUL ( MM0, MM4 ) /* x0*m01 | x0*m00 */ + PUNPCKLDQ ( MM5, MM5 ) /* x1 | x1 */ - MOVQ ( MM6, MM7 ) /* x1 | x0 */ - PFMUL ( MM2, MM6 ) /* x1*m12 | x0*m02 */ + PFMUL ( MM1, MM5 ) /* x1*m11 | x1*m10 */ + PFADD ( MM2, MM4 ) /* x...x1*m11+31 | x0*..*m10+m30 */ - PFACC ( MM7, MM6 ) /* ***trash*** | x0*x02+x1*m12 */ - PFADD ( MM5, MM6 ) /* ***trash*** | x0*x02+x1*m12+m32 */ + PFADD ( MM5, MM4 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - MOVD ( MM6, REGOFF(8, EDX) ) /* write r2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TP3R_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP2R_3 ): -LLBL(G3TP3R_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET @@ -358,129 +364,100 @@ LLBL(G3TP3R_3): ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_3d_no_rot_raw) -GLNAME(gl_3dnow_transform_points2_3d_no_rot_raw): +GLOBL GLNAME( gl_3dnow_transform_points2_2d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points2_2d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(7), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRR_3) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRR_3 ) ) - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ -ALIGNTEXT32 -LLBL(G3TP3NRR_2): +ALIGNTEXT16 +LLBL( G3TP2NRR_2 ): - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + ADD_L ( EDI, EAX ) /* next vertex */ - PFADD ( MM2, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + PFADD ( MM2, MM4 ) /* m31 | x0*m00+m30 */ - MOVD ( MM3, REGOFF(8, EDX) ) /* write r2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ + ADD_L ( CONST(16), EDX ) /* next r */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ - JA ( LLBL(G3TP3NRR_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TP2NRR_3 ): -LLBL(G3TP3NRR_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - ALIGNTEXT16 -GLOBL GLNAME(gl_3dnow_transform_points2_perspective_raw) -GLNAME(gl_3dnow_transform_points2_perspective_raw): +GLOBL GLNAME( gl_3dnow_transform_points2_identity_raw ) +GLNAME( gl_3dnow_transform_points2_identity_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(15), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) -ALIGNTEXT32 + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(2), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - PUSH_L ( ESI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - FEMMS - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPR_3) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPIR_3 ) ) - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - -ALIGNTEXT32 -LLBL(G3TPPR_2): +ALIGNTEXT16 +LLBL( G3TPIR_3 ): - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVQ ( MM4, REGIND(EDX) ) /* write r1, r0 */ - MOVQ ( MM3, REGOFF(8, EDX) ) /* write r2 (=m32), r3 (=0) */ + MOVQ ( MM0, REGIND(EDX) ) /* r1 | r0 */ + ADD_L ( CONST(16), EDX ) /* next r */ - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPIR_3 ) ) /* cnt > 0 ? -> process next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPPR_2) ) /* cnt > 0 ? -> process next vertex */ +LLBL( G3TPIR_4 ): -LLBL(G3TPPR_3): FEMMS - - POP_L ( ESI ) POP_L ( EDI ) - POP_L ( ESI ) RET - - - - diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_raw3.S b/xc/extras/Mesa/src/X86/3dnow_xform_raw3.S index dc4105d9f..5dd2e2294 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_raw3.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_raw3.S @@ -1,96 +1,115 @@ -#include "assyntax.h" - - SEG_TEXT +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ -#define VEC_SIZE_1 1 -#define VEC_SIZE_2 3 -#define VEC_SIZE_3 7 -#define VEC_SIZE_4 15 +#include "assyntax.h" +#include "xform_args.h" + SEG_TEXT +#define FRAME_OFFSET 4 -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points3_general_raw) +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_general_raw ) GLNAME( gl_3dnow_transform_points3_general_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGR_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGR_2 ) ) PREFETCHW ( REGIND(EDX) ) - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */ - - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */ -ALIGNTEXT32 -LLBL(G3TPGR_1): + ADD_L ( EDI, EAX ) /* next vertex */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ +ALIGNTEXT16 +LLBL( G3TPGR_1 ): + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ PREFETCH ( REGIND(EAX) ) - MOVQ ( MM0, MM1 ) /* x1 | x0 */ - PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ + MOVQ ( MM0, MM1 ) /* x1 | x0 */ + PUNPCKLDQ ( MM2, MM2 ) /* x2 | x2 */ - PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ - MOVQ ( MM2, MM5 ) /* x2 | x2 */ + PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ + MOVQ ( MM2, MM5 ) /* x2 | x2 */ - PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ - PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */ + PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ + PFMUL ( REGOFF(32, ECX), MM2 ) /* x2*m9 | x2*m8 */ - MOVQ ( MM0, MM3 ) /* x0 | x0 */ - PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ + MOVQ ( MM0, MM3 ) /* x0 | x0 */ + PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ - MOVQ ( MM1, MM4 ) /* x1 | x1 */ - PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ + MOVQ ( MM1, MM4 ) /* x1 | x1 */ + PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ - PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */ - PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ + PFADD ( REGOFF(48, ECX), MM2 ) /* x2*m9+m13 | x2*m8+m12 */ + PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ - PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */ - PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ + PFADD ( REGOFF(56, ECX), MM5 ) /* x2*m11+m15 | x2*m10+m14 */ + PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ - PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */ - PFADD ( MM1, MM2 ) /* result r1 | r0 */ + PFMUL ( REGOFF(8, ECX), MM3 ) /* x0*m3 | x0*m2 */ + PFADD ( MM1, MM2 ) /* r1 | r0 */ - PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */ - ADD_L ( CONST(16), EDX ) /* next r */ + PFMUL ( REGOFF(24, ECX), MM4 ) /* x1*m7 | x1*m6 */ + ADD_L ( CONST(16), EDX ) /* next output vertex */ - PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */ - MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */ + PFADD ( MM3, MM4 ) /* x0*m3+x1*m7 | x0*m2+x1*m6 */ + MOVQ ( MM2, REGOFF(-16, EDX) ) /* write r0, r1 */ - PFADD ( MM4, MM5 ) /* r3 | r2 */ - MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ + PFADD ( MM4, MM5 ) /* r3 | r2 */ + MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM2 ) /* | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPGR_1) ) /* cnt > 0 ? -> process next vertex */ + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TPGR_2): +LLBL( G3TPGR_2 ): FEMMS POP_L ( EDI ) @@ -100,55 +119,81 @@ LLBL(G3TPGR_2): - -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points3_identity_raw) -GLNAME( gl_3dnow_transform_points3_identity_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_perspective_raw ) +GLNAME( gl_3dnow_transform_points3_perspective_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPIR_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPR_2 ) ) + PREFETCH ( REGIND(EAX) ) PREFETCHW ( REGIND(EDX) ) - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ -ALIGNTEXT32 -LLBL(G3TPIR_1): + MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - PREFETCHW ( REGOFF(32, EDX) ) + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ - ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( CONST(16), EDX ) /* next r */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - DEC_L ( ESI ) /* decrement vertex counter */ - MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ +ALIGNTEXT16 +LLBL( G3TPPR_1 ): - MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ - JA ( LLBL(G3TPIR_1) ) /* cnt > 0 ? -> process next vertex */ + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGIND(EAX) ) + + PXOR ( MM7, MM7 ) /* 0 | 0 */ + MOVQ ( MM5, MM6 ) /* | x2 */ + + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + PFSUB ( MM5, MM7 ) /* | -x2 */ + + PFMUL ( MM2, MM6 ) /* | x2*m22 */ + PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ + + ADD_L ( CONST(16), EDX ) /* next r */ + PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */ + + PFADD ( MM3, MM6 ) /* | x2*m22+m32 */ + PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ -ALIGNTEXT32 -LLBL(G3TPIR_2): + MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ + MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */ + + MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + ADD_L ( EDI, EAX ) /* next vertex */ + + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TPPR_2 ): FEMMS POP_L ( EDI ) @@ -158,74 +203,87 @@ LLBL(G3TPIR_2): -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points3_2d_raw) -GLNAME( gl_3dnow_transform_points3_2d_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_3d_raw ) +GLNAME( gl_3dnow_transform_points3_3d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2R_3) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3R_2 ) ) PREFETCH ( REGIND(EAX) ) + PREFETCH ( REGIND(EDX) ) - PREFETCHW ( REGIND(EDX) ) + MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */ + PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */ - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ + ADD_L ( EDI, EAX ) /* next vertex */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ +ALIGNTEXT16 +LLBL( G3TP3R_1 ): - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGIND(EAX) ) -ALIGNTEXT32 -LLBL(G3TP2R_2): + MOVQ ( MM0, MM2 ) /* x1 | x0 */ + ADD_L ( CONST(16), EDX ) /* next r */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */ + MOVQ ( MM0, MM3 ) /* x1 | x0 */ - PREFETCH ( REGIND(EAX) ) + PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */ + PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */ - MOVQ ( MM3, MM4 ) /* x1 | x0 */ - PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ + MOVQ ( MM1, MM4 ) /* | x2 */ + PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */ - ADD_L ( CONST(16), EDX ) /* next r */ - PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ + PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ + PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ + + PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ + PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */ + + PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */ + PFADD ( MM4, MM3 ) /* r1 | r0 */ + + PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */ + PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */ + + PFACC ( MM0, MM1 ) - PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */ - MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ + MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ + PFACC ( MM1, MM1 ) /* | r2 */ - PFADD ( MM2, MM3 ) /* x0*...*m10+m30| x0*m01+x1*m11+m31 */ - MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ + MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP2R_2) ) /* cnt > 0 ? -> process next vertex */ + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TP2R_3): +LLBL( G3TP3R_2 ): FEMMS POP_L ( EDI ) @@ -235,70 +293,74 @@ LLBL(G3TP2R_3): - -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points3_2d_no_rot_raw) -GLNAME( gl_3dnow_transform_points3_2d_no_rot_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_3d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points3_3d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRR_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRR_2 ) ) PREFETCH ( REGIND(EAX) ) - PREFETCHW ( REGIND(EDX) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ + PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ + MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ -ALIGNTEXT32 -LLBL(G3TP2NRR_1): + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ - PREFETCH ( REGIND(EAX) ) +ALIGNTEXT16 +LLBL( G3TP3NRR_1 ): + + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + ADD_L ( EDI, EAX ) /* next vertex */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - ADD_L ( CONST(16), EDX ) /* next r */ + PREFETCHW ( REGIND(EAX) ) + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ + PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ + PFMUL ( MM2, MM5 ) /* | x2*m22 */ - MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ - MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ + PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ + MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + ADD_L ( CONST(16), EDX ) /* next r */ + DEC_L ( ESI ) /* decrement vertex counter */ - ADD_L ( EDI, EAX ) /* next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - JA ( LLBL(G3TP2NRR_1) ) /* cnt > 0 ? -> process next vertex */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TP2NRR_2): +LLBL( G3TP3NRR_2 ): FEMMS POP_L ( EDI ) @@ -308,89 +370,72 @@ LLBL(G3TP2NRR_2): -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points3_3d_raw) -GLNAME( gl_3dnow_transform_points3_3d_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_2d_raw ) +GLNAME( gl_3dnow_transform_points3_2d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3R_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2R_3) ) PREFETCH ( REGIND(EAX) ) + PREFETCHW ( REGIND(EDX) ) - PREFETCH ( REGIND(EDX) ) - - MOVD ( REGOFF(8, ECX), MM7 ) /* | m2 */ - PUNPCKLDQ ( REGOFF(24, ECX), MM7 ) /* m6 | m2 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ -ALIGNTEXT32 -LLBL(G3TP3R_1): + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ +ALIGNTEXT16 +LLBL( G3TP2R_2 ): + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ PREFETCH ( REGIND(EAX) ) - MOVQ ( MM0, MM2 ) /* x1 | x0 */ - ADD_L ( CONST(16), EDX ) /* next r */ - - PUNPCKLDQ ( MM2, MM2 ) /* x0 | x0 */ - MOVQ ( MM0, MM3 ) /* x1 | x0 */ - - PFMUL ( REGIND(ECX), MM2 ) /* x0*m1 | x0*m0 */ - PUNPCKHDQ ( MM3, MM3 ) /* x1 | x1 */ + MOVQ ( MM3, MM4 ) /* x1 | x0 */ + PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ - MOVQ ( MM1, MM4 ) /* | x2 */ - PFMUL ( REGOFF(16, ECX), MM3 ) /* x1*m5 | x1*m4 */ + ADD_L ( CONST(16), EDX ) /* next r */ + PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ - PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ - PFADD ( MM2, MM3 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ - - PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ - PFADD ( REGOFF(48, ECX), MM3 ) /* x0*m1+...+m11 | x0*m0+x1*m4+m12 */ - - PFMUL ( MM7, MM0 ) /* x1*m6 | x0*m2 */ - PFADD ( MM4, MM3 ) /* r1 | r0 */ - - PFMUL ( REGOFF(40, ECX), MM1 ) /* | x2*m10 */ - PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m14 | x2*m10 */ - - PFACC ( MM0, MM1 ) + PFACC ( MM4, MM3 ) /* x0*m00+x1*m10 | x0*m01+x1*m11 */ + MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ - MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ - PFACC ( MM1, MM1 ) /* | r2 */ + PFADD ( MM2, MM3 ) /* x0*...*m10+m30 | x0*...*m11+m31 */ + MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ - MOVD ( MM1, REGOFF(-8, EDX) ) /* write r2 */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2R_2 ) ) /* cnt > 0 ? -> process next vertex */ - ADD_L ( EDI, EAX ) /* next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3R_1) ) /* cnt > 0 ? -> process next vertex */ - -ALIGNTEXT32 -LLBL(G3TP3R_2): +LLBL( G3TP2R_3 ): FEMMS POP_L ( EDI ) @@ -400,74 +445,66 @@ LLBL(G3TP3R_2): -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points3_3d_no_rot_raw) -GLNAME( gl_3dnow_transform_points3_3d_no_rot_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_2d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points3_2d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(3), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_3), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRR_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRR_2 ) ) PREFETCH ( REGIND(EAX) ) - PREFETCHW ( REGIND(EDX) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - PUNPCKLDQ ( MM2, MM2 ) /* m22 | m22 */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - PUNPCKLDQ ( MM3, MM3 ) /* m32 | m32 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ +ALIGNTEXT16 +LLBL( G3TP2NRR_1 ): + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGIND(EAX) ) -ALIGNTEXT32 -LLBL(G3TP3NRR_1): - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ - ADD_L ( EDI, EAX ) /* next vertex */ - - PREFETCHW ( REGIND(EAX) ) - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - - PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ - PFMUL ( MM2, MM5 ) /* | x2*m22 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + ADD_L ( CONST(16), EDX ) /* next r */ - PFADD ( MM3, MM5 ) /* | x2*m22+m32 */ - MOVQ ( MM4, REGIND(EDX) ) /* write r0, r1 */ + PFADD ( MM1, MM4 ) /* x1*m11+m31 | x0*m00+m30 */ - ADD_L ( CONST(16), EDX ) /* next r */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ + MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 (=x2) */ - MOVD ( MM5, REGOFF(-8, EDX) ) /* write r2 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - JA ( LLBL(G3TP3NRR_1) ) /* cnt > 0 ? -> process next vertex */ + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TP3NRR_2): +LLBL( G3TP2NRR_2 ): FEMMS POP_L ( EDI ) @@ -477,90 +514,56 @@ LLBL(G3TP3NRR_2): -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points3_perspective_raw) -GLNAME( gl_3dnow_transform_points3_perspective_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points3_identity_raw ) +GLNAME( gl_3dnow_transform_points3_identity_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(3), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPR_2) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - PREFETCH ( REGIND(EAX) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPIR_2 ) ) PREFETCHW ( REGIND(EDX) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ - - MOVQ ( REGOFF(32, ECX), MM1 ) /* m21 | m20 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - - MOVD ( REGOFF(56, ECX), MM3 ) /* | m32 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - - ADD_L ( EDI, EAX ) /* next vertex */ - -ALIGNTEXT32 -LLBL(G3TPPR_1): - - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ - - PREFETCH ( REGIND(EAX) ) +ALIGNTEXT16 +LLBL( G3TPIR_1 ): - PXOR ( MM7, MM7 ) /* 0 | 0 */ - MOVQ ( MM5, MM6 ) /* | x2 */ - - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PFSUB ( MM5, MM7 ) /* | -x2 */ - - PFMUL ( MM2, MM6 ) /* | x2*m22 */ - PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ - - ADD_L ( CONST(16), EDX ) /* next r */ - PFMUL ( MM1, MM5 ) /* x2*m21 | x2*m20 */ - - PFADD ( MM3, MM6 ) /* | x2*m22+m32 */ - PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ + PREFETCHW ( REGOFF(32, EDX) ) - MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ - MOVD ( MM6, REGOFF(-8, EDX) ) /* write r2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( CONST(16), EDX ) /* next r */ - MOVD ( MM7, REGOFF(-4, EDX) ) /* write r3 */ - MOVD ( REGOFF(8, EAX), MM5 ) /* | x2 */ + DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVD ( MM1, REGOFF(-8, EDX) ) /* | r2 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPPR_1) ) /* cnt > 0 ? -> process next vertex */ + MOVD ( REGOFF(8, EAX), MM1 ) /* | x2 */ + JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TPPR_2): +LLBL( G3TPIR_2 ): FEMMS POP_L ( EDI ) POP_L ( ESI ) RET - - - - - diff --git a/xc/extras/Mesa/src/X86/3dnow_xform_raw4.S b/xc/extras/Mesa/src/X86/3dnow_xform_raw4.S index 17f584f75..fcabb5949 100644 --- a/xc/extras/Mesa/src/X86/3dnow_xform_raw4.S +++ b/xc/extras/Mesa/src/X86/3dnow_xform_raw4.S @@ -1,86 +1,105 @@ -#include "assyntax.h" - - SEG_TEXT - +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ -#define VEC_SIZE_1 1 -#define VEC_SIZE_2 3 -#define VEC_SIZE_3 7 -#define VEC_SIZE_4 15 +#include "assyntax.h" +#include "xform_args.h" + SEG_TEXT +#define FRAME_OFFSET 4 -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points4_general_raw) +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_general_raw ) GLNAME( gl_3dnow_transform_points4_general_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPGR_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPGR_2 ) ) PREFETCHW ( REGIND(EDX) ) - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( EDI, EAX ) /* next vertex */ -ALIGNTEXT32 -LLBL(G3TPGR_1): - - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ +ALIGNTEXT16 +LLBL( G3TPGR_1 ): + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ PREFETCH ( REGIND(EAX) ) - MOVQ ( MM0, MM2 ) /* x1 | x0 */ - MOVQ ( MM4, MM6 ) /* x3 | x2 */ + MOVQ ( MM0, MM2 ) /* x1 | x0 */ + MOVQ ( MM4, MM6 ) /* x3 | x2 */ - PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ - PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */ + PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ + PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */ - MOVQ ( MM0, MM1 ) /* x0 | x0 */ - ADD_L ( CONST(16), EDX ) /* next r */ + MOVQ ( MM0, MM1 ) /* x0 | x0 */ + ADD_L ( CONST(16), EDX ) /* next r */ - PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ - MOVQ ( MM2, MM3 ) /* x1 | x1 */ + PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ + MOVQ ( MM2, MM3 ) /* x1 | x1 */ - PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */ - PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ + PFMUL ( REGOFF(8, ECX), MM1 ) /* x0*m3 | x0*m2 */ + PUNPCKLDQ ( MM4, MM4 ) /* x2 | x2 */ - PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */ - MOVQ ( MM4, MM5 ) /* x2 | x2 */ + PFMUL ( REGOFF(16, ECX), MM2 ) /* x1*m5 | x1*m4 */ + MOVQ ( MM4, MM5 ) /* x2 | x2 */ - PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */ - PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ + PFMUL ( REGOFF(24, ECX), MM3 ) /* x1*m7 | x1*m6 */ + PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ - PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ - MOVQ ( MM6, MM7 ) /* x3 | x3 */ + PFMUL ( REGOFF(32, ECX), MM4 ) /* x2*m9 | x2*m8 */ + MOVQ ( MM6, MM7 ) /* x3 | x3 */ - PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ + PFMUL ( REGOFF(40, ECX), MM5 ) /* x2*m11 | x2*m10 */ PFADD ( MM0, MM2 ) - PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */ + PFMUL ( REGOFF(48, ECX), MM6 ) /* x3*m13 | x3*m12 */ PFADD ( MM1, MM3 ) - PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */ + PFMUL ( REGOFF(56, ECX), MM7 ) /* x3*m15 | x3*m14 */ PFADD ( MM4, MM6 ) PFADD ( MM5, MM7 ) @@ -90,17 +109,15 @@ LLBL(G3TPGR_1): MOVQ ( MM6, REGOFF(-16, EDX) ) MOVQ ( MM7, REGOFF(-8, EDX) ) - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - - MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPGR_1) ) /* cnt > 0 ? -> process next vertex */ + MOVQ ( REGOFF(8, EAX), MM4 ) /* x3 | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPGR_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TPGR_2): +LLBL( G3TPGR_2 ): FEMMS POP_L ( EDI ) @@ -110,57 +127,82 @@ LLBL(G3TPGR_2): - -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points4_identity_raw) -GLNAME( gl_3dnow_transform_points4_identity_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_perspective_raw ) +GLNAME( gl_3dnow_transform_points4_perspective_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPIR_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPPR_2 ) ) - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ + PREFETCH ( REGIND(EAX) ) + PREFETCHW ( REGIND(EDX) ) - ADD_L ( EDI, EAX ) /* next vertex */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ -ALIGNTEXT32 -LLBL(G3TPIR_1): + MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */ + PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ + PXOR ( MM7, MM7 ) /* 0 | 0 */ - PREFETCH ( REGIND(EAX) ) + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */ + + ADD_L ( EDI, EAX ) /* next vertex */ + +ALIGNTEXT16 +LLBL( G3TPPR_1 ): + + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ + + MOVQ ( MM5, MM6 ) /* x3 | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - ADD_L ( CONST(16), EDX ) /* next r */ - MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ + PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ + ADD_L ( CONST(16), EDX ) /* next r */ - MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */ - MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */ + PFSUBR ( MM7, MM3 ) /* | -x2 */ - MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */ + PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TPIR_1) ) /* cnt > 0 ? -> process next vertex */ + PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */ + MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ -ALIGNTEXT32 -LLBL(G3TPIR_2): + MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */ + + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + + JNZ ( LLBL( G3TPPR_1 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TPPR_2 ): FEMMS POP_L ( EDI ) @@ -170,77 +212,89 @@ LLBL(G3TPIR_2): - -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points4_2d_raw) -GLNAME( gl_3dnow_transform_points4_2d_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_3d_raw ) +GLNAME( gl_3dnow_transform_points4_3d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2R_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3R_2 ) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ + MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */ + PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */ - MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ + MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ + PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */ - MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ + MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */ - MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ +ALIGNTEXT16 +LLBL( G3TP3R_1 ): - ADD_L ( EDI, EAX ) /* next vertex */ + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */ -ALIGNTEXT32 -LLBL(G3TP2R_1): + MOVQ ( MM2, MM0 ) /* x1 | x0 */ + MOVQ ( MM3, MM4 ) /* x3 | x2 */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + MOVQ ( MM0, MM1 ) /* x1 | x0 */ + MOVQ ( MM4, MM5 ) /* x3 | x2 */ - PREFETCH ( REGIND(EAX) ) + PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ + PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ - MOVQ ( MM3, MM4 ) /* x1 | x0 */ - MOVQ ( MM5, MM6 ) /* x3 | x2 */ + PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ + PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */ - PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ - PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ + PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ + PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */ - PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ - ADD_L ( CONST(16), EDX ) /* next r */ + PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */ + PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ - PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ - PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */ + PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */ + ADD_L ( CONST(16), EDX ) /* next r */ - PFADD ( MM6, MM3 ) /* r1 | r0 */ - MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ + PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */ + PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9 | x0*m0+...+x2*m8 */ - MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ - MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ + PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */ + PFADD ( MM3, MM4 ) /* r1 | r0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ + PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */ + MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP2R_1) ) /* cnt > 0 ? -> process next vertex */ + ADD_L ( EDI, EAX ) /* next vertex */ + PFACC ( MM0, MM5 ) /* r3 | r2 */ -ALIGNTEXT32 -LLBL(G3TP2R_2): + MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ + MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ + + MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */ + + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3R_1 ) ) /* cnt > 0 ? -> process next vertex */ + +LLBL( G3TP3R_2 ): FEMMS POP_L ( EDI ) @@ -250,70 +304,75 @@ LLBL(G3TP2R_2): - -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points4_2d_no_rot_raw) -GLNAME( gl_3dnow_transform_points4_2d_no_rot_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_3d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points4_3d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP2NRR_3) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP3NRR_2 ) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ + MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ + PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - ADD_L ( EDI, EAX ) /* next vertex */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ -ALIGNTEXT32 -LLBL(G3TP2NRR_2): + ADD_L ( EDI, EAX ) /* next vertex */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ +ALIGNTEXT16 +LLBL( G3TP3NRR_1 ): - PREFETCH ( REGIND(EAX) ) + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ + + MOVQ ( MM5, MM6 ) /* x3 | x2 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - MOVQ ( MM5, MM6 ) /* x3 | x2 */ + PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ + PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */ - ADD_L ( CONST(16), EDX ) /* next r */ - PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ + PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ + PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */ - PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ - PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ + PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ + ADD_L ( CONST(16), EDX ) /* next r */ - MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */ - MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ + MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ + MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - ADD_L ( EDI, EAX ) /* next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ + ADD_L ( EDI, EAX ) /* next vertex */ - JA ( LLBL(G3TP2NRR_2) ) /* cnt > 0 ? -> process next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP3NRR_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TP2NRR_3): +LLBL( G3TP3NRR_2 ): FEMMS POP_L ( EDI ) @@ -323,92 +382,75 @@ LLBL(G3TP2NRR_3): - -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points4_3d_raw) -GLNAME( gl_3dnow_transform_points4_3d_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_2d_raw ) +GLNAME( gl_3dnow_transform_points4_2d_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3R_2) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - MOVD ( REGOFF(8, ECX), MM6 ) /* | m2 */ - PUNPCKLDQ ( REGOFF(24, ECX), MM6 ) /* m6 | m2 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2R_2 ) ) - MOVD ( REGOFF(40, ECX), MM7 ) /* | m10 */ - PUNPCKLDQ ( REGOFF(56, ECX), MM7 ) /* m14 | m10 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(16, ECX), MM0 ) /* m10 | m00 */ - MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */ + MOVD ( REGOFF(4, ECX), MM1 ) /* | m01 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM1 ) /* m11 | m01 */ -ALIGNTEXT32 -LLBL(G3TP3R_1): + MOVQ ( REGOFF(48, ECX), MM2 ) /* m31 | m30 */ - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ - - PREFETCH ( REGOFF(32, EAX) ) /* hopefully array is tightly packed */ - - MOVQ ( MM2, MM0 ) /* x1 | x0 */ - MOVQ ( MM3, MM4 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVQ ( MM0, MM1 ) /* x1 | x0 */ - MOVQ ( MM4, MM5 ) /* x3 | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ - PUNPCKHDQ ( MM1, MM1 ) /* x1 | x1 */ - - PFMUL ( REGIND(ECX), MM0 ) /* x0*m1 | x0*m0 */ - PUNPCKLDQ ( MM3, MM3 ) /* x2 | x2 */ +ALIGNTEXT16 +LLBL( G3TP2R_1 ): - PFMUL ( REGOFF(16, ECX), MM1 ) /* x1*m5 | x1*m4 */ - PUNPCKHDQ ( MM4, MM4 ) /* x3 | x3 */ - - PFMUL ( MM6, MM2 ) /* x1*m6 | x0*m2 */ - PFADD ( MM0, MM1 ) /* x0*m1+x1*m5 | x0*m0+x1*m4 */ + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGIND(EAX) ) - PFMUL ( REGOFF(32, ECX), MM3 ) /* x2*m9 | x2*m8 */ - ADD_L ( CONST(16), EDX ) /* next r */ + MOVQ ( MM3, MM4 ) /* x1 | x0 */ + MOVQ ( MM5, MM6 ) /* x3 | x2 */ - PFMUL ( REGOFF(48, ECX), MM4 ) /* x3*m13 | x3*m12 */ - PFADD ( MM1, MM3 ) /* x0*m1+..+x2*m9| x0*m0+...+x2*m8 */ - - PFMUL ( MM7, MM5 ) /* x3*m14 | x2*m10 */ - PFADD ( MM3, MM4 ) /* r1 | r0 */ + PFMUL ( MM1, MM4 ) /* x1*m11 | x0*m01 */ + PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ - PFACC ( MM2, MM5 ) /* x0*m2+x1*m6 | x2*m10+x3*m14 */ - MOVD ( REGOFF(12, EAX), MM0 ) /* | x3 */ + PFMUL ( MM0, MM3 ) /* x1*m10 | x0*m00 */ + ADD_L ( CONST(16), EDX ) /* next r */ - ADD_L ( EDI, EAX ) /* next vertex */ - PFACC ( MM0, MM5 ) /* r3 | r2 */ + PFACC ( MM4, MM3 ) /* x0*m01+x1*m11 | x0*m00+x1*m10 */ + PFMUL ( MM2, MM6 ) /* x3*m31 | x3*m30 */ - MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ - MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ + PFADD ( MM6, MM3 ) /* r1 | r0 */ + MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ - MOVQ ( REGIND(EAX), MM2 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM3 ) /* x3 | x2 */ + MOVQ ( MM3, REGOFF(-16, EDX) ) /* write r0, r1 */ + MOVQ ( REGIND(EAX), MM3 ) /* x1 | x0 */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3R_1) ) /* cnt > 0 ? -> process next vertex */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TP2R_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TP3R_2): +LLBL( G3TP2R_2 ): FEMMS POP_L ( EDI ) @@ -418,78 +460,68 @@ LLBL(G3TP3R_2): - -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points4_3d_no_rot_raw) -GLNAME( gl_3dnow_transform_points4_3d_no_rot_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_2d_no_rot_raw ) +GLNAME( gl_3dnow_transform_points4_2d_no_rot_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TP3NRR_2) ) + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TP2NRR_3 ) ) - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + MOVD ( REGIND(ECX), MM0 ) /* | m00 */ + PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ - MOVD ( REGOFF(40, ECX), MM2 ) /* | m22 */ - PUNPCKLDQ ( REGOFF(56, ECX), MM2 ) /* m32 | m22 */ + MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ - MOVQ ( REGOFF(48, ECX), MM1 ) /* m31 | m30 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ - - ADD_L ( EDI, EAX ) /* next vertex */ - -ALIGNTEXT32 -LLBL(G3TP3NRR_1): - - PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + ADD_L ( EDI, EAX ) /* next vertex */ - PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ +ALIGNTEXT16 +LLBL( G3TP2NRR_2 ): - MOVQ ( MM5, MM6 ) /* x3 | x2 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGIND(EAX) ) - PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ - PFMUL ( MM2, MM5 ) /* x3*m32 | x2*m22 */ + PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ + MOVQ ( MM5, MM6 ) /* x3 | x2 */ - PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ - PFACC ( MM7, MM5 ) /* x3 | x2*m22+x3*m32 */ + ADD_L ( CONST(16), EDX ) /* next r */ + PUNPCKHDQ ( MM6, MM6 ) /* x3 | x3 */ - PFADD ( MM6, MM4 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ - ADD_L ( CONST(16), EDX ) /* next r */ + PFMUL ( MM1, MM6 ) /* x3*m31 | x3*m30 */ + PFADD ( MM4, MM6 ) /* x1*m11+x3*m31 | x0*m00+x3*m30 */ - MOVQ ( MM4, REGOFF(-16, EDX) ) /* write r0, r1 */ - MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ + MOVQ ( MM6, REGOFF(-16, EDX) ) /* write r0, r1 */ + MOVQ ( MM5, REGOFF(-8, EDX) ) /* write r2, r3 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ + MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVD ( REGOFF(12, EAX), MM7 ) /* | x3 */ - ADD_L ( EDI, EAX ) /* next vertex */ + ADD_L ( EDI, EAX ) /* next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ - DEC_L ( ESI ) /* decrement vertex counter */ - JA ( LLBL(G3TP3NRR_1) ) /* cnt > 0 ? -> process next vertex */ + JNZ ( LLBL( G3TP2NRR_2 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TP3NRR_2): +LLBL( G3TP2NRR_3 ): FEMMS POP_L ( EDI ) @@ -499,87 +531,57 @@ LLBL(G3TP3NRR_2): -ALIGNTEXT32 -GLOBL GLNAME(gl_3dnow_transform_points4_perspective_raw) -GLNAME( gl_3dnow_transform_points4_perspective_raw ): +ALIGNTEXT16 +GLOBL GLNAME( gl_3dnow_transform_points4_identity_raw ) +GLNAME( gl_3dnow_transform_points4_identity_raw ): PUSH_L ( ESI ) - MOV_L ( REGOFF(8, ESP), ECX ) - MOV_L ( REGOFF(12, ESP), ESI ) - MOV_L ( REGOFF(16, ESP), EAX ) - MOV_L ( CONST(4), REGOFF(16, ECX) ) - OR_B ( CONST(VEC_SIZE_4), REGOFF(20, ECX) ) - MOV_L ( REGOFF(8, EAX), EDX ) - MOV_L ( EDX, REGOFF(8, ECX) ) + + MOV_L ( ARG_DEST, ECX ) + MOV_L ( ARG_MATRIX, ESI ) + MOV_L ( ARG_SOURCE, EAX ) + MOV_L ( CONST(4), REGOFF(V4F_SIZE, ECX) ) + OR_B ( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, ECX) ) + MOV_L ( REGOFF(V4F_COUNT, EAX), EDX ) + MOV_L ( EDX, REGOFF(V4F_COUNT, ECX) ) PUSH_L ( EDI ) - MOV_L ( REGOFF(4, ECX), EDX ) + MOV_L ( REGOFF(V4F_START, ECX), EDX ) MOV_L ( ESI, ECX ) - MOV_L ( REGOFF(8, EAX), ESI ) - MOV_L ( REGOFF(12, EAX), EDI ) - MOV_L ( REGOFF(4, EAX), EAX ) - - CMP_L ( CONST(0), ESI ) - JE ( LLBL(G3TPPR_2) ) - - PREFETCH ( REGIND(EAX) ) - - PREFETCHW ( REGIND(EDX) ) - - MOVD ( REGIND(ECX), MM0 ) /* | m00 */ - PUNPCKLDQ ( REGOFF(20, ECX), MM0 ) /* m11 | m00 */ + MOV_L ( REGOFF(V4F_COUNT, EAX), ESI ) + MOV_L ( REGOFF(V4F_STRIDE, EAX), EDI ) + MOV_L ( REGOFF(V4F_START, EAX), EAX ) - MOVD ( REGOFF(40, ECX), MM1 ) /* | m22 */ - PUNPCKLDQ ( REGOFF(56, ECX), MM1 ) /* m32 | m22 */ + TEST_L ( ESI, ESI ) + JZ ( LLBL( G3TPIR_2 ) ) - MOVQ ( REGOFF(32, ECX), MM2 ) /* m21 | m20 */ - PXOR ( MM7, MM7 ) /* 0 | 0 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ + MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - ADD_L ( EDI, EAX ) /* next vertex */ - -ALIGNTEXT32 -LLBL(G3TPPR_1): +ALIGNTEXT16 +LLBL( G3TPIR_1 ): PREFETCHW ( REGOFF(32, EDX) ) /* prefetch 2 vertices ahead */ + PREFETCH ( REGIND(EAX) ) - PREFETCH ( REGOFF(32, EAX) ) /* hopefully stride is zero */ - - MOVQ ( MM5, MM6 ) /* x3 | x2 */ - PFMUL ( MM0, MM4 ) /* x1*m11 | x0*m00 */ - - PUNPCKLDQ ( MM5, MM5 ) /* x2 | x2 */ - ADD_L ( CONST(16), EDX ) /* next r */ - - PFMUL ( MM2, MM5 ) /* x2*m21 | x2*m20 */ - PFSUBR ( MM7, MM3 ) /* | -x2 */ - - PFMUL ( MM1, MM6 ) /* x3*m32 | x2*m22 */ - PFADD ( MM4, MM5 ) /* x1*m11+x2*m21 | x0*m00+x2*m20 */ - - PFACC ( MM3, MM6 ) /* -x2 | x2*m22+x3*m32 */ - MOVQ ( MM5, REGOFF(-16, EDX) ) /* write r0, r1 */ - - MOVQ ( MM6, REGOFF(-8, EDX) ) /* write r2, r3 */ - MOVQ ( REGIND(EAX), MM4 ) /* x1 | x0 */ + ADD_L ( CONST(16), EDX ) /* next r */ + MOVQ ( MM0, REGOFF(-16, EDX) ) /* r1 | r0 */ - MOVQ ( REGOFF(8, EAX), MM5 ) /* x3 | x2 */ - MOVD ( REGOFF(8, EAX), MM3 ) /* | x2 */ + MOVQ ( MM1, REGOFF(-8, EDX) ) /* r3 | r2 */ + MOVQ ( REGIND(EAX), MM0 ) /* x1 | x0 */ - ADD_L ( EDI, EAX ) /* next vertex */ - DEC_L ( ESI ) /* decrement vertex counter */ + MOVQ ( REGOFF(8, EAX), MM1 ) /* x3 | x2 */ + ADD_L ( EDI, EAX ) /* next vertex */ - JA ( LLBL(G3TPPR_1) ) /* cnt > 0 ? -> process next vertex */ + DEC_L ( ESI ) /* decrement vertex counter */ + JNZ ( LLBL( G3TPIR_1 ) ) /* cnt > 0 ? -> process next vertex */ -ALIGNTEXT32 -LLBL(G3TPPR_2): +LLBL( G3TPIR_2 ): FEMMS POP_L ( EDI ) POP_L ( ESI ) RET - diff --git a/xc/extras/Mesa/src/X86/assyntax.h b/xc/extras/Mesa/src/X86/assyntax.h index ae1506701..13c48445c 100644 --- a/xc/extras/Mesa/src/X86/assyntax.h +++ b/xc/extras/Mesa/src/X86/assyntax.h @@ -1,5 +1,6 @@ + #ifndef __ASSYNTAX_H__ -#define __ASSYNTAX_H__ +#define __ASSYNTAX_H__ /* * Copyright 1992 Vrije Universiteit, The Netherlands @@ -14,12 +15,12 @@ * representations about the suitability of this software for any purpose. * It is provided "as is" without express or implied warranty. * - * The Vrije Universiteit DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, - * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO - * EVENT SHALL The Vrije Universiteit BE LIABLE FOR ANY SPECIAL, INDIRECT OR - * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, - * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER - * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * The Vrije Universiteit DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, + * IN NO EVENT SHALL The Vrije Universiteit BE LIABLE FOR ANY SPECIAL, + * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ @@ -71,11 +72,10 @@ #define GNU_ASSEMBLER #endif -#if (defined(__STDC__) && !defined(UNIXCPP)) || (defined (sun) && defined (i386) \ - && defined (SVR4) && defined (__STDC__) && !defined (__GNUC__)) -#define CONCAT(x, y) x ## y +#if (defined(__STDC__) && !defined(UNIXCPP)) || (defined (sun) && defined (i386) && defined (SVR4) && defined (__STDC__) && !defined (__GNUC__)) +#define CONCAT(x, y) x ## y #else -#define CONCAT(x, y) x/**/y +#define CONCAT(x, y) x/**/y #endif #ifdef ACK_ASSEMBLER @@ -83,170 +83,170 @@ /* Assume we write code for 32-bit protected mode! */ /* Redefine register names for GAS & AT&T assemblers */ -#define AL al -#define AH ah -#define AX ax -#define EAX ax -#define BL bl -#define BH bh -#define BX bx -#define EBX bx -#define CL cl -#define CH ch -#define CX cx -#define ECX cx -#define DL dl -#define DH dh -#define DX dx -#define EDX dx -#define BP bp -#define EBP bp -#define SI si -#define ESI si -#define DI di -#define EDI di -#define SP sp -#define ESP sp -#define CS cs -#define SS ss -#define DS ds -#define ES es -#define FS fs -#define GS gs +#define AL al +#define AH ah +#define AX ax +#define EAX ax +#define BL bl +#define BH bh +#define BX bx +#define EBX bx +#define CL cl +#define CH ch +#define CX cx +#define ECX cx +#define DL dl +#define DH dh +#define DX dx +#define EDX dx +#define BP bp +#define EBP bp +#define SI si +#define ESI si +#define DI di +#define EDI di +#define SP sp +#define ESP sp +#define CS cs +#define SS ss +#define DS ds +#define ES es +#define FS fs +#define GS gs /* Control Registers */ -#define CR0 cr0 -#define CR1 cr1 -#define CR2 cr2 -#define CR3 cr3 +#define CR0 cr0 +#define CR1 cr1 +#define CR2 cr2 +#define CR3 cr3 /* Debug Registers */ -#define DR0 dr0 -#define DR1 dr1 -#define DR2 dr2 -#define DR3 dr3 -#define DR4 dr4 -#define DR5 dr5 -#define DR6 dr6 -#define DR7 dr7 +#define DR0 dr0 +#define DR1 dr1 +#define DR2 dr2 +#define DR3 dr3 +#define DR4 dr4 +#define DR5 dr5 +#define DR6 dr6 +#define DR7 dr7 /* Floating-point Stack */ -#define ST st +#define ST st -#define AS_BEGIN .sect .text; .sect .rom; .sect .data; .sect .bss; .sect .text +#define AS_BEGIN .sect .text; .sect .rom; .sect .data; .sect .bss; .sect .text -#define _WTOG o16 /* word toggle for _W instructions */ -#define _LTOG /* long toggle for _L instructions */ -#define ADDR_TOGGLE a16 -#define OPSZ_TOGGLE o16 -#define USE16 .use16 -#define USE32 .use32 +#define _WTOG o16 /* word toggle for _W instructions */ +#define _LTOG /* long toggle for _L instructions */ +#define ADDR_TOGGLE a16 +#define OPSZ_TOGGLE o16 +#define USE16 .use16 +#define USE32 .use32 -#define CHOICE(a,b,c) c +#define CHOICE(a,b,c) c #else /* AT&T or GAS */ /* Redefine register names for GAS & AT&T assemblers */ -#define AL %al -#define AH %ah -#define AX %ax -#define EAX %eax -#define BL %bl -#define BH %bh -#define BX %bx -#define EBX %ebx -#define CL %cl -#define CH %ch -#define CX %cx -#define ECX %ecx -#define DL %dl -#define DH %dh -#define DX %dx -#define EDX %edx -#define BP %bp -#define EBP %ebp -#define SI %si -#define ESI %esi -#define DI %di -#define EDI %edi -#define SP %sp -#define ESP %esp -#define CS %cs -#define SS %ss -#define DS %ds -#define ES %es -#define FS %fs -#define GS %gs +#define AL %al +#define AH %ah +#define AX %ax +#define EAX %eax +#define BL %bl +#define BH %bh +#define BX %bx +#define EBX %ebx +#define CL %cl +#define CH %ch +#define CX %cx +#define ECX %ecx +#define DL %dl +#define DH %dh +#define DX %dx +#define EDX %edx +#define BP %bp +#define EBP %ebp +#define SI %si +#define ESI %esi +#define DI %di +#define EDI %edi +#define SP %sp +#define ESP %esp +#define CS %cs +#define SS %ss +#define DS %ds +#define ES %es +#define FS %fs +#define GS %gs /* Control Registers */ -#define CR0 %cr0 -#define CR1 %cr1 -#define CR2 %cr2 -#define CR3 %cr3 +#define CR0 %cr0 +#define CR1 %cr1 +#define CR2 %cr2 +#define CR3 %cr3 /* Debug Registers */ -#define DR0 %db0 -#define DR1 %db1 -#define DR2 %db2 -#define DR3 %db3 -#define DR4 %db4 -#define DR5 %db5 -#define DR6 %db6 -#define DR7 %db7 +#define DR0 %db0 +#define DR1 %db1 +#define DR2 %db2 +#define DR3 %db3 +#define DR4 %db4 +#define DR5 %db5 +#define DR6 %db6 +#define DR7 %db7 /* Floating-point Stack */ -#define _STX0 %st(0) -#define _STX1 %st(1) -#define _STX2 %st(2) -#define _STX3 %st(3) -#define _STX4 %st(4) -#define _STX5 %st(5) -#define _STX6 %st(6) -#define _STX7 %st(7) -#define ST(x) CONCAT(_STX,x) +#define _STX0 %st(0) +#define _STX1 %st(1) +#define _STX2 %st(2) +#define _STX3 %st(3) +#define _STX4 %st(4) +#define _STX5 %st(5) +#define _STX6 %st(6) +#define _STX7 %st(7) +#define ST(x) CONCAT(_STX,x) /* MMX Registers */ -#define MM0 %mm0 -#define MM1 %mm1 -#define MM2 %mm2 -#define MM3 %mm3 -#define MM4 %mm4 -#define MM5 %mm5 -#define MM6 %mm6 -#define MM7 %mm7 +#define MM0 %mm0 +#define MM1 %mm1 +#define MM2 %mm2 +#define MM3 %mm3 +#define MM4 %mm4 +#define MM5 %mm5 +#define MM6 %mm6 +#define MM7 %mm7 /* SSE Registers */ -#define XMM0 %xmm0 -#define XMM1 %xmm1 -#define XMM2 %xmm2 -#define XMM3 %xmm3 -#define XMM4 %xmm4 -#define XMM5 %xmm5 -#define XMM6 %xmm6 -#define XMM7 %xmm7 - -#define AS_BEGIN -#define USE16 -#define USE32 +#define XMM0 %xmm0 +#define XMM1 %xmm1 +#define XMM2 %xmm2 +#define XMM3 %xmm3 +#define XMM4 %xmm4 +#define XMM5 %xmm5 +#define XMM6 %xmm6 +#define XMM7 %xmm7 + +#define AS_BEGIN +#define USE16 +#define USE32 #ifdef GNU_ASSEMBLER -#define ADDR_TOGGLE aword -#define OPSZ_TOGGLE word +#define ADDR_TOGGLE aword +#define OPSZ_TOGGLE word -#define CHOICE(a,b,c) b +#define CHOICE(a,b,c) b #else /* * AT&T ASSEMBLER SYNTAX * ********************* */ -#define CHOICE(a,b,c) a +#define CHOICE(a,b,c) a -#define ADDR_TOGGLE addr16 -#define OPSZ_TOGGLE data16 +#define ADDR_TOGGLE addr16 +#define OPSZ_TOGGLE data16 #endif /* GNU_ASSEMBLER */ #endif /* ACK_ASSEMBLER */ #if defined(__QNX__) || defined(Lynx) || (defined(SYSV) || defined(SVR4)) && !defined(ACK_ASSEMBLER) || defined(__ELF__) || defined(__GNU__) -#define GLNAME(a) a +#define GLNAME(a) a #else -#define GLNAME(a) CONCAT(_,a) +#define GLNAME(a) CONCAT(_,a) #endif @@ -260,59 +260,63 @@ /* Redefine assembler directives */ /*********************************/ #define GLOBL CHOICE(.globl, .globl, .extern) +#define GLOBAL GLOBL +#define EXTERN GLOBL /* -#define ALIGNTEXT32 CHOICE(.align 32, .align ARG2(5,0x90), .align 32) +#define ALIGNTEXT32 CHOICE(.align 32, .align ARG2(5,0x90), .align 32) */ -#define ALIGNTEXT32 CHOICE(.align 32, .balign 32, .align 32) -#define ALIGNTEXT16 CHOICE(.align 16, .balign 16, .align 16) -#define ALIGNTEXT8 CHOICE(.align 8, .balign 8, .align 8) -#define ALIGNTEXT4 CHOICE(.align 4, .balign 4, .align 4) -#define ALIGNTEXT2 CHOICE(.align 2, .balign 2, .align 2) +#define ALIGNTEXT32 CHOICE(.align 32, .balign 32, .align 32) +#define ALIGNTEXT16 CHOICE(.align 16, .balign 16, .align 16) +#define ALIGNTEXT8 CHOICE(.align 8, .balign 8, .align 8) +#define ALIGNTEXT4 CHOICE(.align 4, .balign 4, .align 4) +#define ALIGNTEXT2 CHOICE(.align 2, .balign 2, .align 2) /* ALIGNTEXT4ifNOP is the same as ALIGNTEXT4, but only if the space is * guaranteed to be filled with NOPs. Otherwise it does nothing. */ -#define ALIGNTEXT32ifNOP CHOICE(.align 32, .balign ARG2(32,0x90), /*can't do it*/) -#define ALIGNTEXT16ifNOP CHOICE(.align 16, .balign ARG2(16,0x90), /*can't do it*/) -#define ALIGNTEXT8ifNOP CHOICE(.align 8, .balign ARG2(8,0x90), /*can't do it*/) -#define ALIGNTEXT4ifNOP CHOICE(.align 4, .balign ARG2(4,0x90), /*can't do it*/) -#define ALIGNDATA32 CHOICE(.align 32, .balign ARG2(32,0x0), .align 32) -#define ALIGNDATA16 CHOICE(.align 16, .balign ARG2(16,0x0), .align 16) -#define ALIGNDATA8 CHOICE(.align 8, .balign ARG2(8,0x0), .align 8) -#define ALIGNDATA4 CHOICE(.align 4, .balign ARG2(4,0x0), .align 4) -#define ALIGNDATA2 CHOICE(.align 2, .balign ARG2(2,0x0), .align 2) -#define FILE(s) CHOICE(.file s, .file s, .file s) -#define STRING(s) CHOICE(.string s, .asciz s, .asciz s) -#define D_LONG CHOICE(.long, .long, .data4) -#define D_WORD CHOICE(.value, .short, .data2) -#define D_BYTE CHOICE(.byte, .byte, .data1) -#define SPACE CHOICE(.comm, .space, .space) -#define COMM CHOICE(.comm, .comm, .comm) -#define SEG_DATA CHOICE(.data, .data, .sect .data) -#define SEG_TEXT CHOICE(.text, .text, .sect .text) -#define SEG_BSS CHOICE(.bss, .bss, .sect .bss) +#define ALIGNTEXT32ifNOP CHOICE(.align 32, .balign ARG2(32,0x90), /*can't do it*/) +#define ALIGNTEXT16ifNOP CHOICE(.align 16, .balign ARG2(16,0x90), /*can't do it*/) +#define ALIGNTEXT8ifNOP CHOICE(.align 8, .balign ARG2(8,0x90), /*can't do it*/) +#define ALIGNTEXT4ifNOP CHOICE(.align 4, .balign ARG2(4,0x90), /*can't do it*/) +#define ALIGNDATA32 CHOICE(.align 32, .balign ARG2(32,0x0), .align 32) +#define ALIGNDATA16 CHOICE(.align 16, .balign ARG2(16,0x0), .align 16) +#define ALIGNDATA8 CHOICE(.align 8, .balign ARG2(8,0x0), .align 8) +#define ALIGNDATA4 CHOICE(.align 4, .balign ARG2(4,0x0), .align 4) +#define ALIGNDATA2 CHOICE(.align 2, .balign ARG2(2,0x0), .align 2) +#define FILE(s) CHOICE(.file s, .file s, .file s) +#define STRING(s) CHOICE(.string s, .asciz s, .asciz s) +#define D_LONG CHOICE(.long, .long, .data4) +#define D_WORD CHOICE(.value, .short, .data2) +#define D_BYTE CHOICE(.byte, .byte, .data1) +#define SPACE CHOICE(.comm, .space, .space) +#define COMM CHOICE(.comm, .comm, .comm) +#define SEG_DATA CHOICE(.data, .data, .sect .data) +#define SEG_TEXT CHOICE(.text, .text, .sect .text) +#define SEG_BSS CHOICE(.bss, .bss, .sect .bss) #ifdef GNU_ASSEMBLER -#define D_SPACE(n) . = . + n +#define D_SPACE(n) . = . + n #else -#define D_SPACE(n) .space n +#define D_SPACE(n) .space n #endif /* Addressing Modes */ /* Immediate Mode */ -#define ADDR(a) CHOICE(CONCAT($,a), CONCAT($,a), a) -#define CONST(a) CHOICE(CONCAT($,a), CONCAT($,a), a) +#define ADDR(a) CHOICE(CONCAT($,a), CONCAT($,a), a) +#define CONST(a) CHOICE(CONCAT($,a), CONCAT($,a), a) /* Indirect Mode */ -#define CONTENT(a) CHOICE(a, a, (a)) /* take contents of variable */ -#define REGIND(a) CHOICE((a), (a), (a)) /* Register a indirect */ +#define CONTENT(a) CHOICE(a, a, (a)) /* take contents of variable */ +#define REGIND(a) CHOICE((a), (a), (a)) /* Register a indirect */ /* Register b indirect plus displacement a */ -#define REGOFF(a, b) CHOICE(a(b), a(b), a(b)) +#define REGOFF(a, b) CHOICE(a(b), a(b), a(b)) /* Reg indirect Base + Index + Displacement - this is mainly for 16-bit mode * which has no scaling */ -#define REGBID(b,i,d) CHOICE(d(b,i), d(b,i), d(b)(i)) +#define REGBID(b,i,d) CHOICE(d(b,i), d(b,i), d(b)(i)) +/* Reg indirect Base + (Index * Scale) */ +#define REGBIS(b,i,s) CHOICE((b,i,s), (b,i,s), (b)(i*s)) /* Reg indirect Base + (Index * Scale) + Displacement */ -#define REGBISD(b,i,s,d) CHOICE(d(b,i,s), d(b,i,s), d(b)(i*s)) +#define REGBISD(b,i,s,d) CHOICE(d(b,i,s), d(b,i,s), d(b)(i*s)) /* Displaced Scaled Index: */ #define REGDIS(d,i,s) CHOICE(d(,i,s), d(,i,s), d(i * s)) /* Indexed Base: */ @@ -328,10 +332,10 @@ * eg. (CRT0_PM | CRT_EM) */ -#define EXPR(a) CHOICE([a], (a), [a]) -#define ENOT(a) CHOICE(0!a, ~a, ~a) -#define EMUL(a,b) CHOICE(a\*b, a*b, a*b) -#define EDIV(a,b) CHOICE(a\/b, a/b, a/b) +#define EXPR(a) CHOICE([a], (a), [a]) +#define ENOT(a) CHOICE(0!a, ~a, ~a) +#define EMUL(a,b) CHOICE(a\*b, a*b, a*b) +#define EDIV(a,b) CHOICE(a\/b, a/b, a/b) /* * We have to beat the problem of commas within arguments to choice. @@ -339,436 +343,436 @@ * and other known cpp definitions evaluate arguments before substitution * so the following works. */ -#define ARG2(a, b) a,b -#define ARG3(a,b,c) a,b,c +#define ARG2(a, b) a,b +#define ARG3(a,b,c) a,b,c /* Redefine assembler commands */ -#define AAA CHOICE(aaa, aaa, aaa) -#define AAD CHOICE(aad, aad, aad) -#define AAM CHOICE(aam, aam, aam) -#define AAS CHOICE(aas, aas, aas) -#define ADC_L(a, b) CHOICE(adcl ARG2(a,b), adcl ARG2(a,b), _LTOG adc ARG2(b,a)) -#define ADC_W(a, b) CHOICE(adcw ARG2(a,b), adcw ARG2(a,b), _WTOG adc ARG2(b,a)) -#define ADC_B(a, b) CHOICE(adcb ARG2(a,b), adcb ARG2(a,b), adcb ARG2(b,a)) -#define ADD_L(a, b) CHOICE(addl ARG2(a,b), addl ARG2(a,b), _LTOG add ARG2(b,a)) -#define ADD_W(a, b) CHOICE(addw ARG2(a,b), addw ARG2(a,b), _WTOG add ARG2(b,a)) -#define ADD_B(a, b) CHOICE(addb ARG2(a,b), addb ARG2(a,b), addb ARG2(b,a)) -#define AND_L(a, b) CHOICE(andl ARG2(a,b), andl ARG2(a,b), _LTOG and ARG2(b,a)) -#define AND_W(a, b) CHOICE(andw ARG2(a,b), andw ARG2(a,b), _WTOG and ARG2(b,a)) -#define AND_B(a, b) CHOICE(andb ARG2(a,b), andb ARG2(a,b), andb ARG2(b,a)) -#define ARPL(a,b) CHOICE(arpl ARG2(a,b), arpl ARG2(a,b), arpl ARG2(b,a)) -#define BOUND_L(a, b) CHOICE(boundl ARG2(a,b), boundl ARG2(b,a), _LTOG bound ARG2(b,a)) -#define BOUND_W(a, b) CHOICE(boundw ARG2(a,b), boundw ARG2(b,a), _WTOG bound ARG2(b,a)) -#define BSF_L(a, b) CHOICE(bsfl ARG2(a,b), bsfl ARG2(a,b), _LTOG bsf ARG2(b,a)) -#define BSF_W(a, b) CHOICE(bsfw ARG2(a,b), bsfw ARG2(a,b), _WTOG bsf ARG2(b,a)) -#define BSR_L(a, b) CHOICE(bsrl ARG2(a,b), bsrl ARG2(a,b), _LTOG bsr ARG2(b,a)) -#define BSR_W(a, b) CHOICE(bsrw ARG2(a,b), bsrw ARG2(a,b), _WTOG bsr ARG2(b,a)) -#define BT_L(a, b) CHOICE(btl ARG2(a,b), btl ARG2(a,b), _LTOG bt ARG2(b,a)) -#define BT_W(a, b) CHOICE(btw ARG2(a,b), btw ARG2(a,b), _WTOG bt ARG2(b,a)) -#define BTC_L(a, b) CHOICE(btcl ARG2(a,b), btcl ARG2(a,b), _LTOG btc ARG2(b,a)) -#define BTC_W(a, b) CHOICE(btcw ARG2(a,b), btcw ARG2(a,b), _WTOG btc ARG2(b,a)) -#define BTR_L(a, b) CHOICE(btrl ARG2(a,b), btrl ARG2(a,b), _LTOG btr ARG2(b,a)) -#define BTR_W(a, b) CHOICE(btrw ARG2(a,b), btrw ARG2(a,b), _WTOG btr ARG2(b,a)) -#define BTS_L(a, b) CHOICE(btsl ARG2(a,b), btsl ARG2(a,b), _LTOG bts ARG2(b,a)) -#define BTS_W(a, b) CHOICE(btsw ARG2(a,b), btsw ARG2(a,b), _WTOG bts ARG2(b,a)) -#define CALL(a) CHOICE(call a, call a, call a) -#define CALLF(s,a) CHOICE(lcall ARG2(s,a), lcall ARG2(s,a), callf s:a) -#define CBW CHOICE(cbtw, cbw, cbw) -#define CWDE CHOICE(cwtd, cwde, cwde) -#define CLC CHOICE(clc, clc, clc) -#define CLD CHOICE(cld, cld, cld) -#define CLI CHOICE(cli, cli, cli) -#define CLTS CHOICE(clts, clts, clts) -#define CMC CHOICE(cmc, cmc, cmc) -#define CMP_L(a, b) CHOICE(cmpl ARG2(a,b), cmpl ARG2(a,b), _LTOG cmp ARG2(b,a)) -#define CMP_W(a, b) CHOICE(cmpw ARG2(a,b), cmpw ARG2(a,b), _WTOG cmp ARG2(b,a)) -#define CMP_B(a, b) CHOICE(cmpb ARG2(a,b), cmpb ARG2(a,b), cmpb ARG2(b,a)) -#define CMPS_L CHOICE(cmpsl, cmpsl, _LTOG cmps) -#define CMPS_W CHOICE(cmpsw, cmpsw, _WTOG cmps) -#define CMPS_B CHOICE(cmpsb, cmpsb, cmpsb) -#define CWD CHOICE(cwtl, cwd, cwd) -#define CDQ CHOICE(cltd, cdq, cdq) -#define DAA CHOICE(daa, daa, daa) -#define DAS CHOICE(das, das, das) -#define DEC_L(a) CHOICE(decl a, decl a, _LTOG dec a) -#define DEC_W(a) CHOICE(decw a, decw a, _WTOG dec a) -#define DEC_B(a) CHOICE(decb a, decb a, decb a) -#define DIV_L(a) CHOICE(divl a, divl a, div a) -#define DIV_W(a) CHOICE(divw a, divw a, div a) -#define DIV_B(a) CHOICE(divb a, divb a, divb a) -#define ENTER(a,b) CHOICE(enter ARG2(a,b), enter ARG2(a,b), enter ARG2(b,a)) -#define HLT CHOICE(hlt, hlt, hlt) -#define IDIV_L(a) CHOICE(idivl a, idivl a, _LTOG idiv a) -#define IDIV_W(a) CHOICE(idivw a, idivw a, _WTOG idiv a) -#define IDIV_B(a) CHOICE(idivb a, idivb a, idivb a) +#define AAA CHOICE(aaa, aaa, aaa) +#define AAD CHOICE(aad, aad, aad) +#define AAM CHOICE(aam, aam, aam) +#define AAS CHOICE(aas, aas, aas) +#define ADC_L(a, b) CHOICE(adcl ARG2(a,b), adcl ARG2(a,b), _LTOG adc ARG2(b,a)) +#define ADC_W(a, b) CHOICE(adcw ARG2(a,b), adcw ARG2(a,b), _WTOG adc ARG2(b,a)) +#define ADC_B(a, b) CHOICE(adcb ARG2(a,b), adcb ARG2(a,b), adcb ARG2(b,a)) +#define ADD_L(a, b) CHOICE(addl ARG2(a,b), addl ARG2(a,b), _LTOG add ARG2(b,a)) +#define ADD_W(a, b) CHOICE(addw ARG2(a,b), addw ARG2(a,b), _WTOG add ARG2(b,a)) +#define ADD_B(a, b) CHOICE(addb ARG2(a,b), addb ARG2(a,b), addb ARG2(b,a)) +#define AND_L(a, b) CHOICE(andl ARG2(a,b), andl ARG2(a,b), _LTOG and ARG2(b,a)) +#define AND_W(a, b) CHOICE(andw ARG2(a,b), andw ARG2(a,b), _WTOG and ARG2(b,a)) +#define AND_B(a, b) CHOICE(andb ARG2(a,b), andb ARG2(a,b), andb ARG2(b,a)) +#define ARPL(a,b) CHOICE(arpl ARG2(a,b), arpl ARG2(a,b), arpl ARG2(b,a)) +#define BOUND_L(a, b) CHOICE(boundl ARG2(a,b), boundl ARG2(b,a), _LTOG bound ARG2(b,a)) +#define BOUND_W(a, b) CHOICE(boundw ARG2(a,b), boundw ARG2(b,a), _WTOG bound ARG2(b,a)) +#define BSF_L(a, b) CHOICE(bsfl ARG2(a,b), bsfl ARG2(a,b), _LTOG bsf ARG2(b,a)) +#define BSF_W(a, b) CHOICE(bsfw ARG2(a,b), bsfw ARG2(a,b), _WTOG bsf ARG2(b,a)) +#define BSR_L(a, b) CHOICE(bsrl ARG2(a,b), bsrl ARG2(a,b), _LTOG bsr ARG2(b,a)) +#define BSR_W(a, b) CHOICE(bsrw ARG2(a,b), bsrw ARG2(a,b), _WTOG bsr ARG2(b,a)) +#define BT_L(a, b) CHOICE(btl ARG2(a,b), btl ARG2(a,b), _LTOG bt ARG2(b,a)) +#define BT_W(a, b) CHOICE(btw ARG2(a,b), btw ARG2(a,b), _WTOG bt ARG2(b,a)) +#define BTC_L(a, b) CHOICE(btcl ARG2(a,b), btcl ARG2(a,b), _LTOG btc ARG2(b,a)) +#define BTC_W(a, b) CHOICE(btcw ARG2(a,b), btcw ARG2(a,b), _WTOG btc ARG2(b,a)) +#define BTR_L(a, b) CHOICE(btrl ARG2(a,b), btrl ARG2(a,b), _LTOG btr ARG2(b,a)) +#define BTR_W(a, b) CHOICE(btrw ARG2(a,b), btrw ARG2(a,b), _WTOG btr ARG2(b,a)) +#define BTS_L(a, b) CHOICE(btsl ARG2(a,b), btsl ARG2(a,b), _LTOG bts ARG2(b,a)) +#define BTS_W(a, b) CHOICE(btsw ARG2(a,b), btsw ARG2(a,b), _WTOG bts ARG2(b,a)) +#define CALL(a) CHOICE(call a, call a, call a) +#define CALLF(s,a) CHOICE(lcall ARG2(s,a), lcall ARG2(s,a), callf s:a) +#define CBW CHOICE(cbtw, cbw, cbw) +#define CWDE CHOICE(cwtd, cwde, cwde) +#define CLC CHOICE(clc, clc, clc) +#define CLD CHOICE(cld, cld, cld) +#define CLI CHOICE(cli, cli, cli) +#define CLTS CHOICE(clts, clts, clts) +#define CMC CHOICE(cmc, cmc, cmc) +#define CMP_L(a, b) CHOICE(cmpl ARG2(a,b), cmpl ARG2(a,b), _LTOG cmp ARG2(b,a)) +#define CMP_W(a, b) CHOICE(cmpw ARG2(a,b), cmpw ARG2(a,b), _WTOG cmp ARG2(b,a)) +#define CMP_B(a, b) CHOICE(cmpb ARG2(a,b), cmpb ARG2(a,b), cmpb ARG2(b,a)) +#define CMPS_L CHOICE(cmpsl, cmpsl, _LTOG cmps) +#define CMPS_W CHOICE(cmpsw, cmpsw, _WTOG cmps) +#define CMPS_B CHOICE(cmpsb, cmpsb, cmpsb) +#define CWD CHOICE(cwtl, cwd, cwd) +#define CDQ CHOICE(cltd, cdq, cdq) +#define DAA CHOICE(daa, daa, daa) +#define DAS CHOICE(das, das, das) +#define DEC_L(a) CHOICE(decl a, decl a, _LTOG dec a) +#define DEC_W(a) CHOICE(decw a, decw a, _WTOG dec a) +#define DEC_B(a) CHOICE(decb a, decb a, decb a) +#define DIV_L(a) CHOICE(divl a, divl a, div a) +#define DIV_W(a) CHOICE(divw a, divw a, div a) +#define DIV_B(a) CHOICE(divb a, divb a, divb a) +#define ENTER(a,b) CHOICE(enter ARG2(a,b), enter ARG2(a,b), enter ARG2(b,a)) +#define HLT CHOICE(hlt, hlt, hlt) +#define IDIV_L(a) CHOICE(idivl a, idivl a, _LTOG idiv a) +#define IDIV_W(a) CHOICE(idivw a, idivw a, _WTOG idiv a) +#define IDIV_B(a) CHOICE(idivb a, idivb a, idivb a) /* More forms than this for imul!! */ -#define IMUL_L(a, b) CHOICE(imull ARG2(a,b), imull ARG2(a,b), _LTOG imul ARG2(b,a)) -#define IMUL_W(a, b) CHOICE(imulw ARG2(a,b), imulw ARG2(a,b), _WTOG imul ARG2(b,a)) -#define IMUL_B(a) CHOICE(imulb a, imulb a, imulb a) -#define IN_L CHOICE(inl (DX), inl ARG2(DX,EAX), _LTOG in DX) -#define IN_W CHOICE(inw (DX), inw ARG2(DX,AX), _WTOG in DX) -#define IN_B CHOICE(inb (DX), inb ARG2(DX,AL), inb DX) +#define IMUL_L(a, b) CHOICE(imull ARG2(a,b), imull ARG2(a,b), _LTOG imul ARG2(b,a)) +#define IMUL_W(a, b) CHOICE(imulw ARG2(a,b), imulw ARG2(a,b), _WTOG imul ARG2(b,a)) +#define IMUL_B(a) CHOICE(imulb a, imulb a, imulb a) +#define IN_L CHOICE(inl (DX), inl ARG2(DX,EAX), _LTOG in DX) +#define IN_W CHOICE(inw (DX), inw ARG2(DX,AX), _WTOG in DX) +#define IN_B CHOICE(inb (DX), inb ARG2(DX,AL), inb DX) /* Please AS code writer: use the following ONLY, if you refer to ports<256 * directly, but not in IN1_W(DX), for instance, even if IN1_ looks nicer */ #if defined (sun) -#define IN1_L(a) CHOICE(inl (a), inl ARG2(a,EAX), _LTOG in a) -#define IN1_W(a) CHOICE(inw (a), inw ARG2(a,AX), _WTOG in a) -#define IN1_B(a) CHOICE(inb (a), inb ARG2(a,AL), inb a) +#define IN1_L(a) CHOICE(inl (a), inl ARG2(a,EAX), _LTOG in a) +#define IN1_W(a) CHOICE(inw (a), inw ARG2(a,AX), _WTOG in a) +#define IN1_B(a) CHOICE(inb (a), inb ARG2(a,AL), inb a) #else -#define IN1_L(a) CHOICE(inl a, inl ARG2(a,EAX), _LTOG in a) -#define IN1_W(a) CHOICE(inw a, inw ARG2(a,AX), _WTOG in a) -#define IN1_B(a) CHOICE(inb a, inb ARG2(a,AL), inb a) +#define IN1_L(a) CHOICE(inl a, inl ARG2(a,EAX), _LTOG in a) +#define IN1_W(a) CHOICE(inw a, inw ARG2(a,AX), _WTOG in a) +#define IN1_B(a) CHOICE(inb a, inb ARG2(a,AL), inb a) #endif -#define INC_L(a) CHOICE(incl a, incl a, _LTOG inc a) -#define INC_W(a) CHOICE(incw a, incw a, _WTOG inc a) -#define INC_B(a) CHOICE(incb a, incb a, incb a) -#define INS_L CHOICE(insl, insl, _LTOG ins) -#define INS_W CHOICE(insw, insw, _WTOG ins) -#define INS_B CHOICE(insb, insb, insb) -#define INT(a) CHOICE(int a, int a, int a) -#define INT3 CHOICE(int CONST(3), int3, int CONST(3)) -#define INTO CHOICE(into, into, into) -#define IRET CHOICE(iret, iret, iret) -#define IRETD CHOICE(iret, iret, iretd) -#define JA(a) CHOICE(ja a, ja a, ja a) -#define JAE(a) CHOICE(jae a, jae a, jae a) -#define JB(a) CHOICE(jb a, jb a, jb a) -#define JBE(a) CHOICE(jbe a, jbe a, jbe a) -#define JC(a) CHOICE(jc a, jc a, jc a) -#define JE(a) CHOICE(je a, je a, je a) -#define JG(a) CHOICE(jg a, jg a, jg a) -#define JGE(a) CHOICE(jge a, jge a, jge a) -#define JL(a) CHOICE(jl a, jl a, jl a) -#define JLE(a) CHOICE(jle a, jle a, jle a) -#define JNA(a) CHOICE(jna a, jna a, jna a) -#define JNAE(a) CHOICE(jnae a, jnae a, jnae a) -#define JNB(a) CHOICE(jnb a, jnb a, jnb a) -#define JNBE(a) CHOICE(jnbe a, jnbe a, jnbe a) -#define JNC(a) CHOICE(jnc a, jnc a, jnc a) -#define JNE(a) CHOICE(jne a, jne a, jne a) -#define JNG(a) CHOICE(jng a, jng a, jng a) -#define JNGE(a) CHOICE(jnge a, jnge a, jnge a) -#define JNL(a) CHOICE(jnl a, jnl a, jnl a) -#define JNLE(a) CHOICE(jnle a, jnle a, jnle a) -#define JNO(a) CHOICE(jno a, jno a, jno a) -#define JNP(a) CHOICE(jnp a, jnp a, jnp a) -#define JNS(a) CHOICE(jns a, jns a, jns a) -#define JNZ(a) CHOICE(jnz a, jnz a, jnz a) -#define JO(a) CHOICE(jo a, jo a, jo a) -#define JP(a) CHOICE(jp a, jp a, jp a) -#define JPE(a) CHOICE(jpe a, jpe a, jpe a) -#define JPO(a) CHOICE(jpo a, jpo a, jpo a) -#define JS(a) CHOICE(js a, js a, js a) -#define JZ(a) CHOICE(jz a, jz a, jz a) -#define JMP(a) CHOICE(jmp a, jmp a, jmp a) -#define JMPF(s,a) CHOICE(ljmp ARG2(s,a), ljmp ARG2(s,a), jmpf s:a) -#define LAHF CHOICE(lahf, lahf, lahf) +#define INC_L(a) CHOICE(incl a, incl a, _LTOG inc a) +#define INC_W(a) CHOICE(incw a, incw a, _WTOG inc a) +#define INC_B(a) CHOICE(incb a, incb a, incb a) +#define INS_L CHOICE(insl, insl, _LTOG ins) +#define INS_W CHOICE(insw, insw, _WTOG ins) +#define INS_B CHOICE(insb, insb, insb) +#define INT(a) CHOICE(int a, int a, int a) +#define INT3 CHOICE(int CONST(3), int3, int CONST(3)) +#define INTO CHOICE(into, into, into) +#define IRET CHOICE(iret, iret, iret) +#define IRETD CHOICE(iret, iret, iretd) +#define JA(a) CHOICE(ja a, ja a, ja a) +#define JAE(a) CHOICE(jae a, jae a, jae a) +#define JB(a) CHOICE(jb a, jb a, jb a) +#define JBE(a) CHOICE(jbe a, jbe a, jbe a) +#define JC(a) CHOICE(jc a, jc a, jc a) +#define JE(a) CHOICE(je a, je a, je a) +#define JG(a) CHOICE(jg a, jg a, jg a) +#define JGE(a) CHOICE(jge a, jge a, jge a) +#define JL(a) CHOICE(jl a, jl a, jl a) +#define JLE(a) CHOICE(jle a, jle a, jle a) +#define JNA(a) CHOICE(jna a, jna a, jna a) +#define JNAE(a) CHOICE(jnae a, jnae a, jnae a) +#define JNB(a) CHOICE(jnb a, jnb a, jnb a) +#define JNBE(a) CHOICE(jnbe a, jnbe a, jnbe a) +#define JNC(a) CHOICE(jnc a, jnc a, jnc a) +#define JNE(a) CHOICE(jne a, jne a, jne a) +#define JNG(a) CHOICE(jng a, jng a, jng a) +#define JNGE(a) CHOICE(jnge a, jnge a, jnge a) +#define JNL(a) CHOICE(jnl a, jnl a, jnl a) +#define JNLE(a) CHOICE(jnle a, jnle a, jnle a) +#define JNO(a) CHOICE(jno a, jno a, jno a) +#define JNP(a) CHOICE(jnp a, jnp a, jnp a) +#define JNS(a) CHOICE(jns a, jns a, jns a) +#define JNZ(a) CHOICE(jnz a, jnz a, jnz a) +#define JO(a) CHOICE(jo a, jo a, jo a) +#define JP(a) CHOICE(jp a, jp a, jp a) +#define JPE(a) CHOICE(jpe a, jpe a, jpe a) +#define JPO(a) CHOICE(jpo a, jpo a, jpo a) +#define JS(a) CHOICE(js a, js a, js a) +#define JZ(a) CHOICE(jz a, jz a, jz a) +#define JMP(a) CHOICE(jmp a, jmp a, jmp a) +#define JMPF(s,a) CHOICE(ljmp ARG2(s,a), ljmp ARG2(s,a), jmpf s:a) +#define LAHF CHOICE(lahf, lahf, lahf) #if !defined(_REAL_MODE) && !defined(_V86_MODE) -#define LAR(a, b) CHOICE(lar ARG2(a, b), lar ARG2(a, b), lar ARG2(b, a)) +#define LAR(a, b) CHOICE(lar ARG2(a, b), lar ARG2(a, b), lar ARG2(b, a)) #endif -#define LEA_L(a, b) CHOICE(leal ARG2(a,b), leal ARG2(a,b), _LTOG lea ARG2(b,a)) -#define LEA_W(a, b) CHOICE(leaw ARG2(a,b), leaw ARG2(a,b), _WTOG lea ARG2(b,a)) -#define LEAVE CHOICE(leave, leave, leave) -#define LGDT(a) CHOICE(lgdt a, lgdt a, lgdt a) -#define LIDT(a) CHOICE(lidt a, lidt a, lidt a) -#define LDS(a, b) CHOICE(ldsl ARG2(a,b), lds ARG2(a,b), lds ARG2(b,a)) -#define LES(a, b) CHOICE(lesl ARG2(a,b), les ARG2(a,b), les ARG2(b,a)) -#define LFS(a, b) CHOICE(lfsl ARG2(a,b), lfs ARG2(a,b), lfs ARG2(b,a)) -#define LGS(a, b) CHOICE(lgsl ARG2(a,b), lgs ARG2(a,b), lgs ARG2(b,a)) -#define LSS(a, b) CHOICE(lssl ARG2(a,b), lss ARG2(a,b), lss ARG2(b,a)) -#define LLDT(a) CHOICE(lldt a, lldt a, lldt a) -#define LMSW(a) CHOICE(lmsw a, lmsw a, lmsw a) +#define LEA_L(a, b) CHOICE(leal ARG2(a,b), leal ARG2(a,b), _LTOG lea ARG2(b,a)) +#define LEA_W(a, b) CHOICE(leaw ARG2(a,b), leaw ARG2(a,b), _WTOG lea ARG2(b,a)) +#define LEAVE CHOICE(leave, leave, leave) +#define LGDT(a) CHOICE(lgdt a, lgdt a, lgdt a) +#define LIDT(a) CHOICE(lidt a, lidt a, lidt a) +#define LDS(a, b) CHOICE(ldsl ARG2(a,b), lds ARG2(a,b), lds ARG2(b,a)) +#define LES(a, b) CHOICE(lesl ARG2(a,b), les ARG2(a,b), les ARG2(b,a)) +#define LFS(a, b) CHOICE(lfsl ARG2(a,b), lfs ARG2(a,b), lfs ARG2(b,a)) +#define LGS(a, b) CHOICE(lgsl ARG2(a,b), lgs ARG2(a,b), lgs ARG2(b,a)) +#define LSS(a, b) CHOICE(lssl ARG2(a,b), lss ARG2(a,b), lss ARG2(b,a)) +#define LLDT(a) CHOICE(lldt a, lldt a, lldt a) +#define LMSW(a) CHOICE(lmsw a, lmsw a, lmsw a) #define LOCK CHOICE(lock, lock, lock) -#define LODS_L CHOICE(lodsl, lodsl, _LTOG lods) -#define LODS_W CHOICE(lodsw, lodsw, _WTOG lods) -#define LODS_B CHOICE(lodsb, lodsb, lodsb) -#define LOOP(a) CHOICE(loop a, loop a, loop a) -#define LOOPE(a) CHOICE(loope a, loope a, loope a) -#define LOOPZ(a) CHOICE(loopz a, loopz a, loopz a) -#define LOOPNE(a) CHOICE(loopne a, loopne a, loopne a) -#define LOOPNZ(a) CHOICE(loopnz a, loopnz a, loopnz a) +#define LODS_L CHOICE(lodsl, lodsl, _LTOG lods) +#define LODS_W CHOICE(lodsw, lodsw, _WTOG lods) +#define LODS_B CHOICE(lodsb, lodsb, lodsb) +#define LOOP(a) CHOICE(loop a, loop a, loop a) +#define LOOPE(a) CHOICE(loope a, loope a, loope a) +#define LOOPZ(a) CHOICE(loopz a, loopz a, loopz a) +#define LOOPNE(a) CHOICE(loopne a, loopne a, loopne a) +#define LOOPNZ(a) CHOICE(loopnz a, loopnz a, loopnz a) #if !defined(_REAL_MODE) && !defined(_V86_MODE) -#define LSL(a, b) CHOICE(lsl ARG2(a,b), lsl ARG2(a,b), lsl ARG2(b,a)) +#define LSL(a, b) CHOICE(lsl ARG2(a,b), lsl ARG2(a,b), lsl ARG2(b,a)) #endif -#define LTR(a) CHOICE(ltr a, ltr a, ltr a) -#define MOV_SR(a, b) CHOICE(movw ARG2(a,b), mov ARG2(a,b), mov ARG2(b,a)) -#define MOV_L(a, b) CHOICE(movl ARG2(a,b), movl ARG2(a,b), _LTOG mov ARG2(b,a)) -#define MOV_W(a, b) CHOICE(movw ARG2(a,b), movw ARG2(a,b), _WTOG mov ARG2(b,a)) -#define MOV_B(a, b) CHOICE(movb ARG2(a,b), movb ARG2(a,b), movb ARG2(b,a)) -#define MOVS_L CHOICE(movsl, movsl, _LTOG movs) -#define MOVS_W CHOICE(movsw, movsw, _WTOG movs) -#define MOVS_B CHOICE(movsb, movsb, movsb) -#define MOVSX_BL(a, b) CHOICE(movsbl ARG2(a,b), movsbl ARG2(a,b), movsx ARG2(b,a)) -#define MOVSX_BW(a, b) CHOICE(movsbw ARG2(a,b), movsbw ARG2(a,b), movsx ARG2(b,a)) -#define MOVSX_WL(a, b) CHOICE(movswl ARG2(a,b), movswl ARG2(a,b), movsx ARG2(b,a)) -#define MOVZX_BL(a, b) CHOICE(movzbl ARG2(a,b), movzbl ARG2(a,b), movzx ARG2(b,a)) -#define MOVZX_BW(a, b) CHOICE(movzbw ARG2(a,b), movzbw ARG2(a,b), movzx ARG2(b,a)) -#define MOVZX_WL(a, b) CHOICE(movzwl ARG2(a,b), movzwl ARG2(a,b), movzx ARG2(b,a)) -#define MUL_L(a) CHOICE(mull a, mull a, _LTOG mul a) -#define MUL_W(a) CHOICE(mulw a, mulw a, _WTOG mul a) -#define MUL_B(a) CHOICE(mulb a, mulb a, mulb a) -#define NEG_L(a) CHOICE(negl a, negl a, _LTOG neg a) -#define NEG_W(a) CHOICE(negw a, negw a, _WTOG neg a) -#define NEG_B(a) CHOICE(negb a, negb a, negb a) -#define NOP CHOICE(nop, nop, nop) -#define NOT_L(a) CHOICE(notl a, notl a, _LTOG not a) -#define NOT_W(a) CHOICE(notw a, notw a, _WTOG not a) -#define NOT_B(a) CHOICE(notb a, notb a, notb a) -#define OR_L(a,b) CHOICE(orl ARG2(a,b), orl ARG2(a,b), _LTOG or ARG2(b,a)) -#define OR_W(a,b) CHOICE(orw ARG2(a,b), orw ARG2(a,b), _WTOG or ARG2(b,a)) -#define OR_B(a,b) CHOICE(orb ARG2(a,b), orb ARG2(a,b), orb ARG2(b,a)) -#define OUT_L CHOICE(outl (DX), outl ARG2(EAX,DX), _LTOG out DX) -#define OUT_W CHOICE(outw (DX), outw ARG2(AX,DX), _WTOG out DX) -#define OUT_B CHOICE(outb (DX), outb ARG2(AL,DX), outb DX) +#define LTR(a) CHOICE(ltr a, ltr a, ltr a) +#define MOV_SR(a, b) CHOICE(movw ARG2(a,b), mov ARG2(a,b), mov ARG2(b,a)) +#define MOV_L(a, b) CHOICE(movl ARG2(a,b), movl ARG2(a,b), _LTOG mov ARG2(b,a)) +#define MOV_W(a, b) CHOICE(movw ARG2(a,b), movw ARG2(a,b), _WTOG mov ARG2(b,a)) +#define MOV_B(a, b) CHOICE(movb ARG2(a,b), movb ARG2(a,b), movb ARG2(b,a)) +#define MOVS_L CHOICE(movsl, movsl, _LTOG movs) +#define MOVS_W CHOICE(movsw, movsw, _WTOG movs) +#define MOVS_B CHOICE(movsb, movsb, movsb) +#define MOVSX_BL(a, b) CHOICE(movsbl ARG2(a,b), movsbl ARG2(a,b), movsx ARG2(b,a)) +#define MOVSX_BW(a, b) CHOICE(movsbw ARG2(a,b), movsbw ARG2(a,b), movsx ARG2(b,a)) +#define MOVSX_WL(a, b) CHOICE(movswl ARG2(a,b), movswl ARG2(a,b), movsx ARG2(b,a)) +#define MOVZX_BL(a, b) CHOICE(movzbl ARG2(a,b), movzbl ARG2(a,b), movzx ARG2(b,a)) +#define MOVZX_BW(a, b) CHOICE(movzbw ARG2(a,b), movzbw ARG2(a,b), movzx ARG2(b,a)) +#define MOVZX_WL(a, b) CHOICE(movzwl ARG2(a,b), movzwl ARG2(a,b), movzx ARG2(b,a)) +#define MUL_L(a) CHOICE(mull a, mull a, _LTOG mul a) +#define MUL_W(a) CHOICE(mulw a, mulw a, _WTOG mul a) +#define MUL_B(a) CHOICE(mulb a, mulb a, mulb a) +#define NEG_L(a) CHOICE(negl a, negl a, _LTOG neg a) +#define NEG_W(a) CHOICE(negw a, negw a, _WTOG neg a) +#define NEG_B(a) CHOICE(negb a, negb a, negb a) +#define NOP CHOICE(nop, nop, nop) +#define NOT_L(a) CHOICE(notl a, notl a, _LTOG not a) +#define NOT_W(a) CHOICE(notw a, notw a, _WTOG not a) +#define NOT_B(a) CHOICE(notb a, notb a, notb a) +#define OR_L(a,b) CHOICE(orl ARG2(a,b), orl ARG2(a,b), _LTOG or ARG2(b,a)) +#define OR_W(a,b) CHOICE(orw ARG2(a,b), orw ARG2(a,b), _WTOG or ARG2(b,a)) +#define OR_B(a,b) CHOICE(orb ARG2(a,b), orb ARG2(a,b), orb ARG2(b,a)) +#define OUT_L CHOICE(outl (DX), outl ARG2(EAX,DX), _LTOG out DX) +#define OUT_W CHOICE(outw (DX), outw ARG2(AX,DX), _WTOG out DX) +#define OUT_B CHOICE(outb (DX), outb ARG2(AL,DX), outb DX) /* Please AS code writer: use the following ONLY, if you refer to ports<256 * directly, but not in OUT1_W(DX), for instance, even if OUT1_ looks nicer */ -#define OUT1_L(a) CHOICE(outl (a), outl ARG2(EAX,a), _LTOG out a) -#define OUT1_W(a) CHOICE(outw (a), outw ARG2(AX,a), _WTOG out a) -#define OUT1_B(a) CHOICE(outb (a), outb ARG2(AL,a), outb a) -#define OUTS_L CHOICE(outsl, outsl, _LTOG outs) -#define OUTS_W CHOICE(outsw, outsw, _WTOG outs) -#define OUTS_B CHOICE(outsb, outsb, outsb) -#define POP_SR(a) CHOICE(pop a, pop a, pop a) -#define POP_L(a) CHOICE(popl a, popl a, _LTOG pop a) -#define POP_W(a) CHOICE(popw a, popw a, _WTOG pop a) -#define POPA_L CHOICE(popal, popal, _LTOG popa) -#define POPA_W CHOICE(popaw, popaw, _WTOG popa) -#define POPF_L CHOICE(popfl, popfl, _LTOG popf) -#define POPF_W CHOICE(popfw, popfw, _WTOG popf) -#define PUSH_SR(a) CHOICE(push a, push a, push a) -#define PUSH_L(a) CHOICE(pushl a, pushl a, _LTOG push a) -#define PUSH_W(a) CHOICE(pushw a, pushw a, _WTOG push a) -#define PUSH_B(a) CHOICE(push a, pushb a, push a) -#define PUSHA_L CHOICE(pushal, pushal, _LTOG pusha) -#define PUSHA_W CHOICE(pushaw, pushaw, _WTOG pusha) -#define PUSHF_L CHOICE(pushfl, pushfl, _LTOG pushf) -#define PUSHF_W CHOICE(pushfw, pushfw, _WTOG pushf) -#define RCL_L(a, b) CHOICE(rcll ARG2(a,b), rcll ARG2(a,b), _LTOG rcl ARG2(b,a)) -#define RCL_W(a, b) CHOICE(rclw ARG2(a,b), rclw ARG2(a,b), _WTOG rcl ARG2(b,a)) -#define RCL_B(a, b) CHOICE(rclb ARG2(a,b), rclb ARG2(a,b), rclb ARG2(b,a)) -#define RCR_L(a, b) CHOICE(rcrl ARG2(a,b), rcrl ARG2(a,b), _LTOG rcr ARG2(b,a)) -#define RCR_W(a, b) CHOICE(rcrw ARG2(a,b), rcrw ARG2(a,b), _WTOG rcr ARG2(b,a)) -#define RCR_B(a, b) CHOICE(rcrb ARG2(a,b), rcrb ARG2(a,b), rcrb ARG2(b,a)) -#define ROL_L(a, b) CHOICE(roll ARG2(a,b), roll ARG2(a,b), _LTOG rol ARG2(b,a)) -#define ROL_W(a, b) CHOICE(rolw ARG2(a,b), rolw ARG2(a,b), _WTOG rol ARG2(b,a)) -#define ROL_B(a, b) CHOICE(rolb ARG2(a,b), rolb ARG2(a,b), rolb ARG2(b,a)) -#define ROR_L(a, b) CHOICE(rorl ARG2(a,b), rorl ARG2(a,b), _LTOG ror ARG2(b,a)) -#define ROR_W(a, b) CHOICE(rorw ARG2(a,b), rorw ARG2(a,b), _WTOG ror ARG2(b,a)) -#define ROR_B(a, b) CHOICE(rorb ARG2(a,b), rorb ARG2(a,b), rorb ARG2(b,a)) -#define REP CHOICE(rep ;, rep ;, repe) -#define REPE CHOICE(repz ;, repe ;, repe) -#define REPNE CHOICE(repnz ;, repne ;, repne) -#define REPNZ REPNE -#define REPZ REPE -#define RET CHOICE(ret, ret, ret) -#define SAHF CHOICE(sahf, sahf, sahf) -#define SAL_L(a, b) CHOICE(sall ARG2(a,b), sall ARG2(a,b), _LTOG sal ARG2(b,a)) -#define SAL_W(a, b) CHOICE(salw ARG2(a,b), salw ARG2(a,b), _WTOG sal ARG2(b,a)) -#define SAL_B(a, b) CHOICE(salb ARG2(a,b), salb ARG2(a,b), salb ARG2(b,a)) -#define SAR_L(a, b) CHOICE(sarl ARG2(a,b), sarl ARG2(a,b), _LTOG sar ARG2(b,a)) -#define SAR_W(a, b) CHOICE(sarw ARG2(a,b), sarw ARG2(a,b), _WTOG sar ARG2(b,a)) -#define SAR_B(a, b) CHOICE(sarb ARG2(a,b), sarb ARG2(a,b), sarb ARG2(b,a)) -#define SBB_L(a, b) CHOICE(sbbl ARG2(a,b), sbbl ARG2(a,b), _LTOG sbb ARG2(b,a)) -#define SBB_W(a, b) CHOICE(sbbw ARG2(a,b), sbbw ARG2(a,b), _WTOG sbb ARG2(b,a)) -#define SBB_B(a, b) CHOICE(sbbb ARG2(a,b), sbbb ARG2(a,b), sbbb ARG2(b,a)) -#define SCAS_L CHOICE(scasl, scasl, _LTOG scas) -#define SCAS_W CHOICE(scasw, scasw, _WTOG scas) -#define SCAS_B CHOICE(scasb, scasb, scasb) -#define SETA(a) CHOICE(seta a, seta a, seta a) -#define SETAE(a) CHOICE(setae a, setae a, setae a) -#define SETB(a) CHOICE(setb a, setb a, setb a) -#define SETBE(a) CHOICE(setbe a, setbe a, setbe a) -#define SETC(a) CHOICE(setc a, setb a, setb a) -#define SETE(a) CHOICE(sete a, sete a, sete a) -#define SETG(a) CHOICE(setg a, setg a, setg a) -#define SETGE(a) CHOICE(setge a, setge a, setge a) -#define SETL(a) CHOICE(setl a, setl a, setl a) -#define SETLE(a) CHOICE(setle a, setle a, setle a) -#define SETNA(a) CHOICE(setna a, setna a, setna a) -#define SETNAE(a) CHOICE(setnae a, setnae a, setnae a) -#define SETNB(a) CHOICE(setnb a, setnb a, setnb a) -#define SETNBE(a) CHOICE(setnbe a, setnbe a, setnbe a) -#define SETNC(a) CHOICE(setnc a, setnb a, setnb a) -#define SETNE(a) CHOICE(setne a, setne a, setne a) -#define SETNG(a) CHOICE(setng a, setng a, setng a) -#define SETNGE(a) CHOICE(setnge a, setnge a, setnge a) -#define SETNL(a) CHOICE(setnl a, setnl a, setnl a) -#define SETNLE(a) CHOICE(setnle a, setnle a, setnle a) -#define SETNO(a) CHOICE(setno a, setno a, setno a) -#define SETNP(a) CHOICE(setnp a, setnp a, setnp a) -#define SETNS(a) CHOICE(setns a, setns a, setna a) -#define SETNZ(a) CHOICE(setnz a, setnz a, setnz a) -#define SETO(a) CHOICE(seto a, seto a, seto a) -#define SETP(a) CHOICE(setp a, setp a, setp a) -#define SETPE(a) CHOICE(setpe a, setpe a, setpe a) -#define SETPO(a) CHOICE(setpo a, setpo a, setpo a) -#define SETS(a) CHOICE(sets a, sets a, seta a) -#define SETZ(a) CHOICE(setz a, setz a, setz a) -#define SGDT(a) CHOICE(sgdt a, sgdt a, sgdt a) -#define SIDT(a) CHOICE(sidt a, sidt a, sidt a) -#define SHL_L(a, b) CHOICE(shll ARG2(a,b), shll ARG2(a,b), _LTOG shl ARG2(b,a)) -#define SHL_W(a, b) CHOICE(shlw ARG2(a,b), shlw ARG2(a,b), _WTOG shl ARG2(b,a)) -#define SHL_B(a, b) CHOICE(shlb ARG2(a,b), shlb ARG2(a,b), shlb ARG2(b,a)) -#define SHLD_L(a,b,c) CHOICE(shldl ARG3(a,b,c), shldl ARG3(a,b,c), _LTOG shld ARG3(c,b,a)) -#define SHLD2_L(a,b) CHOICE(shldl ARG2(a,b), shldl ARG3(CL,a,b), _LTOG shld ARG3(b,a,CL)) -#define SHLD_W(a,b,c) CHOICE(shldw ARG3(a,b,c), shldw ARG3(a,b,c), _WTOG shld ARG3(c,b,a)) -#define SHLD2_W(a,b) CHOICE(shldw ARG2(a,b), shldw ARG3(CL,a,b), _WTOG shld ARG3(b,a,CL)) -#define SHR_L(a, b) CHOICE(shrl ARG2(a,b), shrl ARG2(a,b), _LTOG shr ARG2(b,a)) -#define SHR_W(a, b) CHOICE(shrw ARG2(a,b), shrw ARG2(a,b), _WTOG shr ARG2(b,a)) -#define SHR_B(a, b) CHOICE(shrb ARG2(a,b), shrb ARG2(a,b), shrb ARG2(b,a)) -#define SHRD_L(a,b,c) CHOICE(shrdl ARG3(a,b,c), shrdl ARG3(a,b,c), _LTOG shrd ARG3(c,b,a)) -#define SHRD2_L(a,b) CHOICE(shrdl ARG2(a,b), shrdl ARG3(CL,a,b), _LTOG shrd ARG3(b,a,CL)) -#define SHRD_W(a,b,c) CHOICE(shrdw ARG3(a,b,c), shrdw ARG3(a,b,c), _WTOG shrd ARG3(c,b,a)) -#define SHRD2_W(a,b) CHOICE(shrdw ARG2(a,b), shrdw ARG3(CL,a,b), _WTOG shrd ARG3(b,a,CL)) -#define SLDT(a) CHOICE(sldt a, sldt a, sldt a) -#define SMSW(a) CHOICE(smsw a, smsw a, smsw a) -#define STC CHOICE(stc, stc, stc) -#define STD CHOICE(std, std, std) -#define STI CHOICE(sti, sti, sti) -#define STOS_L CHOICE(stosl, stosl, _LTOG stos) -#define STOS_W CHOICE(stosw, stosw, _WTOG stos) -#define STOS_B CHOICE(stosb, stosb, stosb) -#define STR(a) CHOICE(str a, str a, str a) -#define SUB_L(a, b) CHOICE(subl ARG2(a,b), subl ARG2(a,b), _LTOG sub ARG2(b,a)) -#define SUB_W(a, b) CHOICE(subw ARG2(a,b), subw ARG2(a,b), _WTOG sub ARG2(b,a)) -#define SUB_B(a, b) CHOICE(subb ARG2(a,b), subb ARG2(a,b), subb ARG2(b,a)) -#define TEST_L(a, b) CHOICE(testl ARG2(a,b), testl ARG2(a,b), _LTOG test ARG2(b,a)) -#define TEST_W(a, b) CHOICE(testw ARG2(a,b), testw ARG2(a,b), _WTOG test ARG2(b,a)) -#define TEST_B(a, b) CHOICE(testb ARG2(a,b), testb ARG2(a,b), testb ARG2(b,a)) -#define VERR(a) CHOICE(verr a, verr a, verr a) -#define VERW(a) CHOICE(verw a, verw a, verw a) -#define WAIT CHOICE(wait, wait, wait) -#define XCHG_L(a, b) CHOICE(xchgl ARG2(a,b), xchgl ARG2(a,b), _LTOG xchg ARG2(b,a)) -#define XCHG_W(a, b) CHOICE(xchgw ARG2(a,b), xchgw ARG2(a,b), _WTOG xchg ARG2(b,a)) -#define XCHG_B(a, b) CHOICE(xchgb ARG2(a,b), xchgb ARG2(a,b), xchgb ARG2(b,a)) -#define XLAT CHOICE(xlat, xlat, xlat) -#define XOR_L(a, b) CHOICE(xorl ARG2(a,b), xorl ARG2(a,b), _LTOG xor ARG2(b,a)) -#define XOR_W(a, b) CHOICE(xorw ARG2(a,b), xorw ARG2(a,b), _WTOG xor ARG2(b,a)) -#define XOR_B(a, b) CHOICE(xorb ARG2(a,b), xorb ARG2(a,b), xorb ARG2(b,a)) +#define OUT1_L(a) CHOICE(outl (a), outl ARG2(EAX,a), _LTOG out a) +#define OUT1_W(a) CHOICE(outw (a), outw ARG2(AX,a), _WTOG out a) +#define OUT1_B(a) CHOICE(outb (a), outb ARG2(AL,a), outb a) +#define OUTS_L CHOICE(outsl, outsl, _LTOG outs) +#define OUTS_W CHOICE(outsw, outsw, _WTOG outs) +#define OUTS_B CHOICE(outsb, outsb, outsb) +#define POP_SR(a) CHOICE(pop a, pop a, pop a) +#define POP_L(a) CHOICE(popl a, popl a, _LTOG pop a) +#define POP_W(a) CHOICE(popw a, popw a, _WTOG pop a) +#define POPA_L CHOICE(popal, popal, _LTOG popa) +#define POPA_W CHOICE(popaw, popaw, _WTOG popa) +#define POPF_L CHOICE(popfl, popfl, _LTOG popf) +#define POPF_W CHOICE(popfw, popfw, _WTOG popf) +#define PUSH_SR(a) CHOICE(push a, push a, push a) +#define PUSH_L(a) CHOICE(pushl a, pushl a, _LTOG push a) +#define PUSH_W(a) CHOICE(pushw a, pushw a, _WTOG push a) +#define PUSH_B(a) CHOICE(push a, pushb a, push a) +#define PUSHA_L CHOICE(pushal, pushal, _LTOG pusha) +#define PUSHA_W CHOICE(pushaw, pushaw, _WTOG pusha) +#define PUSHF_L CHOICE(pushfl, pushfl, _LTOG pushf) +#define PUSHF_W CHOICE(pushfw, pushfw, _WTOG pushf) +#define RCL_L(a, b) CHOICE(rcll ARG2(a,b), rcll ARG2(a,b), _LTOG rcl ARG2(b,a)) +#define RCL_W(a, b) CHOICE(rclw ARG2(a,b), rclw ARG2(a,b), _WTOG rcl ARG2(b,a)) +#define RCL_B(a, b) CHOICE(rclb ARG2(a,b), rclb ARG2(a,b), rclb ARG2(b,a)) +#define RCR_L(a, b) CHOICE(rcrl ARG2(a,b), rcrl ARG2(a,b), _LTOG rcr ARG2(b,a)) +#define RCR_W(a, b) CHOICE(rcrw ARG2(a,b), rcrw ARG2(a,b), _WTOG rcr ARG2(b,a)) +#define RCR_B(a, b) CHOICE(rcrb ARG2(a,b), rcrb ARG2(a,b), rcrb ARG2(b,a)) +#define ROL_L(a, b) CHOICE(roll ARG2(a,b), roll ARG2(a,b), _LTOG rol ARG2(b,a)) +#define ROL_W(a, b) CHOICE(rolw ARG2(a,b), rolw ARG2(a,b), _WTOG rol ARG2(b,a)) +#define ROL_B(a, b) CHOICE(rolb ARG2(a,b), rolb ARG2(a,b), rolb ARG2(b,a)) +#define ROR_L(a, b) CHOICE(rorl ARG2(a,b), rorl ARG2(a,b), _LTOG ror ARG2(b,a)) +#define ROR_W(a, b) CHOICE(rorw ARG2(a,b), rorw ARG2(a,b), _WTOG ror ARG2(b,a)) +#define ROR_B(a, b) CHOICE(rorb ARG2(a,b), rorb ARG2(a,b), rorb ARG2(b,a)) +#define REP CHOICE(rep ;, rep ;, repe) +#define REPE CHOICE(repz ;, repe ;, repe) +#define REPNE CHOICE(repnz ;, repne ;, repne) +#define REPNZ REPNE +#define REPZ REPE +#define RET CHOICE(ret, ret, ret) +#define SAHF CHOICE(sahf, sahf, sahf) +#define SAL_L(a, b) CHOICE(sall ARG2(a,b), sall ARG2(a,b), _LTOG sal ARG2(b,a)) +#define SAL_W(a, b) CHOICE(salw ARG2(a,b), salw ARG2(a,b), _WTOG sal ARG2(b,a)) +#define SAL_B(a, b) CHOICE(salb ARG2(a,b), salb ARG2(a,b), salb ARG2(b,a)) +#define SAR_L(a, b) CHOICE(sarl ARG2(a,b), sarl ARG2(a,b), _LTOG sar ARG2(b,a)) +#define SAR_W(a, b) CHOICE(sarw ARG2(a,b), sarw ARG2(a,b), _WTOG sar ARG2(b,a)) +#define SAR_B(a, b) CHOICE(sarb ARG2(a,b), sarb ARG2(a,b), sarb ARG2(b,a)) +#define SBB_L(a, b) CHOICE(sbbl ARG2(a,b), sbbl ARG2(a,b), _LTOG sbb ARG2(b,a)) +#define SBB_W(a, b) CHOICE(sbbw ARG2(a,b), sbbw ARG2(a,b), _WTOG sbb ARG2(b,a)) +#define SBB_B(a, b) CHOICE(sbbb ARG2(a,b), sbbb ARG2(a,b), sbbb ARG2(b,a)) +#define SCAS_L CHOICE(scasl, scasl, _LTOG scas) +#define SCAS_W CHOICE(scasw, scasw, _WTOG scas) +#define SCAS_B CHOICE(scasb, scasb, scasb) +#define SETA(a) CHOICE(seta a, seta a, seta a) +#define SETAE(a) CHOICE(setae a, setae a, setae a) +#define SETB(a) CHOICE(setb a, setb a, setb a) +#define SETBE(a) CHOICE(setbe a, setbe a, setbe a) +#define SETC(a) CHOICE(setc a, setb a, setb a) +#define SETE(a) CHOICE(sete a, sete a, sete a) +#define SETG(a) CHOICE(setg a, setg a, setg a) +#define SETGE(a) CHOICE(setge a, setge a, setge a) +#define SETL(a) CHOICE(setl a, setl a, setl a) +#define SETLE(a) CHOICE(setle a, setle a, setle a) +#define SETNA(a) CHOICE(setna a, setna a, setna a) +#define SETNAE(a) CHOICE(setnae a, setnae a, setnae a) +#define SETNB(a) CHOICE(setnb a, setnb a, setnb a) +#define SETNBE(a) CHOICE(setnbe a, setnbe a, setnbe a) +#define SETNC(a) CHOICE(setnc a, setnb a, setnb a) +#define SETNE(a) CHOICE(setne a, setne a, setne a) +#define SETNG(a) CHOICE(setng a, setng a, setng a) +#define SETNGE(a) CHOICE(setnge a, setnge a, setnge a) +#define SETNL(a) CHOICE(setnl a, setnl a, setnl a) +#define SETNLE(a) CHOICE(setnle a, setnle a, setnle a) +#define SETNO(a) CHOICE(setno a, setno a, setno a) +#define SETNP(a) CHOICE(setnp a, setnp a, setnp a) +#define SETNS(a) CHOICE(setns a, setns a, setna a) +#define SETNZ(a) CHOICE(setnz a, setnz a, setnz a) +#define SETO(a) CHOICE(seto a, seto a, seto a) +#define SETP(a) CHOICE(setp a, setp a, setp a) +#define SETPE(a) CHOICE(setpe a, setpe a, setpe a) +#define SETPO(a) CHOICE(setpo a, setpo a, setpo a) +#define SETS(a) CHOICE(sets a, sets a, seta a) +#define SETZ(a) CHOICE(setz a, setz a, setz a) +#define SGDT(a) CHOICE(sgdt a, sgdt a, sgdt a) +#define SIDT(a) CHOICE(sidt a, sidt a, sidt a) +#define SHL_L(a, b) CHOICE(shll ARG2(a,b), shll ARG2(a,b), _LTOG shl ARG2(b,a)) +#define SHL_W(a, b) CHOICE(shlw ARG2(a,b), shlw ARG2(a,b), _WTOG shl ARG2(b,a)) +#define SHL_B(a, b) CHOICE(shlb ARG2(a,b), shlb ARG2(a,b), shlb ARG2(b,a)) +#define SHLD_L(a,b,c) CHOICE(shldl ARG3(a,b,c), shldl ARG3(a,b,c), _LTOG shld ARG3(c,b,a)) +#define SHLD2_L(a,b) CHOICE(shldl ARG2(a,b), shldl ARG3(CL,a,b), _LTOG shld ARG3(b,a,CL)) +#define SHLD_W(a,b,c) CHOICE(shldw ARG3(a,b,c), shldw ARG3(a,b,c), _WTOG shld ARG3(c,b,a)) +#define SHLD2_W(a,b) CHOICE(shldw ARG2(a,b), shldw ARG3(CL,a,b), _WTOG shld ARG3(b,a,CL)) +#define SHR_L(a, b) CHOICE(shrl ARG2(a,b), shrl ARG2(a,b), _LTOG shr ARG2(b,a)) +#define SHR_W(a, b) CHOICE(shrw ARG2(a,b), shrw ARG2(a,b), _WTOG shr ARG2(b,a)) +#define SHR_B(a, b) CHOICE(shrb ARG2(a,b), shrb ARG2(a,b), shrb ARG2(b,a)) +#define SHRD_L(a,b,c) CHOICE(shrdl ARG3(a,b,c), shrdl ARG3(a,b,c), _LTOG shrd ARG3(c,b,a)) +#define SHRD2_L(a,b) CHOICE(shrdl ARG2(a,b), shrdl ARG3(CL,a,b), _LTOG shrd ARG3(b,a,CL)) +#define SHRD_W(a,b,c) CHOICE(shrdw ARG3(a,b,c), shrdw ARG3(a,b,c), _WTOG shrd ARG3(c,b,a)) +#define SHRD2_W(a,b) CHOICE(shrdw ARG2(a,b), shrdw ARG3(CL,a,b), _WTOG shrd ARG3(b,a,CL)) +#define SLDT(a) CHOICE(sldt a, sldt a, sldt a) +#define SMSW(a) CHOICE(smsw a, smsw a, smsw a) +#define STC CHOICE(stc, stc, stc) +#define STD CHOICE(std, std, std) +#define STI CHOICE(sti, sti, sti) +#define STOS_L CHOICE(stosl, stosl, _LTOG stos) +#define STOS_W CHOICE(stosw, stosw, _WTOG stos) +#define STOS_B CHOICE(stosb, stosb, stosb) +#define STR(a) CHOICE(str a, str a, str a) +#define SUB_L(a, b) CHOICE(subl ARG2(a,b), subl ARG2(a,b), _LTOG sub ARG2(b,a)) +#define SUB_W(a, b) CHOICE(subw ARG2(a,b), subw ARG2(a,b), _WTOG sub ARG2(b,a)) +#define SUB_B(a, b) CHOICE(subb ARG2(a,b), subb ARG2(a,b), subb ARG2(b,a)) +#define TEST_L(a, b) CHOICE(testl ARG2(a,b), testl ARG2(a,b), _LTOG test ARG2(b,a)) +#define TEST_W(a, b) CHOICE(testw ARG2(a,b), testw ARG2(a,b), _WTOG test ARG2(b,a)) +#define TEST_B(a, b) CHOICE(testb ARG2(a,b), testb ARG2(a,b), testb ARG2(b,a)) +#define VERR(a) CHOICE(verr a, verr a, verr a) +#define VERW(a) CHOICE(verw a, verw a, verw a) +#define WAIT CHOICE(wait, wait, wait) +#define XCHG_L(a, b) CHOICE(xchgl ARG2(a,b), xchgl ARG2(a,b), _LTOG xchg ARG2(b,a)) +#define XCHG_W(a, b) CHOICE(xchgw ARG2(a,b), xchgw ARG2(a,b), _WTOG xchg ARG2(b,a)) +#define XCHG_B(a, b) CHOICE(xchgb ARG2(a,b), xchgb ARG2(a,b), xchgb ARG2(b,a)) +#define XLAT CHOICE(xlat, xlat, xlat) +#define XOR_L(a, b) CHOICE(xorl ARG2(a,b), xorl ARG2(a,b), _LTOG xor ARG2(b,a)) +#define XOR_W(a, b) CHOICE(xorw ARG2(a,b), xorw ARG2(a,b), _WTOG xor ARG2(b,a)) +#define XOR_B(a, b) CHOICE(xorb ARG2(a,b), xorb ARG2(a,b), xorb ARG2(b,a)) /* Floating Point Instructions */ -#define F2XM1 CHOICE(f2xm1, f2xm1, f2xm1) -#define FABS CHOICE(fabs, fabs, fabs) -#define FADD_D(a) CHOICE(faddl a, faddl a, faddd a) -#define FADD_S(a) CHOICE(fadds a, fadds a, fadds a) -#define FADD2(a, b) CHOICE(fadd ARG2(a,b), fadd ARG2(a,b), fadd ARG2(b,a)) -#define FADDP(a, b) CHOICE(faddp ARG2(a,b), faddp ARG2(a,b), faddp ARG2(b,a)) -#define FIADD_L(a) CHOICE(fiaddl a, fiaddl a, fiaddl a) -#define FIADD_W(a) CHOICE(fiadd a, fiadds a, fiadds a) -#define FBLD(a) CHOICE(fbld a, fbld a, fbld a) -#define FBSTP(a) CHOICE(fbstp a, fbstp a, fbstp a) -#define FCHS CHOICE(fchs, fchs, fchs) -#define FCLEX CHOICE(fclex, wait; fnclex, wait; fclex) -#define FNCLEX CHOICE(fnclex, fnclex, fclex) -#define FCOM(a) CHOICE(fcom a, fcom a, fcom a) -#define FCOM_D(a) CHOICE(fcoml a, fcoml a, fcomd a) -#define FCOM_S(a) CHOICE(fcoms a, fcoms a, fcoms a) -#define FCOMP(a) CHOICE(fcomp a, fcomp a, fcomp a) -#define FCOMP_D(a) CHOICE(fcompl a, fcompl a, fcompd a) -#define FCOMP_S(a) CHOICE(fcomps a, fcomps a, fcomps a) -#define FCOMPP CHOICE(fcompp, fcompp, fcompp) -#define FCOS CHOICE(fcos, fcos, fcos) -#define FDECSTP CHOICE(fdecstp, fdecstp, fdecstp) -#define FDIV_D(a) CHOICE(fdivl a, fdivl a, fdivd a) -#define FDIV_S(a) CHOICE(fdivs a, fdivs a, fdivs a) -#define FDIV2(a, b) CHOICE(fdiv ARG2(a,b), fdiv ARG2(a,b), fdiv ARG2(b,a)) -#define FDIVP(a, b) CHOICE(fdivp ARG2(a,b), fdivp ARG2(a,b), fdivp ARG2(b,a)) -#define FIDIV_L(a) CHOICE(fidivl a, fidivl a, fidivl a) -#define FIDIV_W(a) CHOICE(fidiv a, fidivs a, fidivs a) -#define FDIVR_D(a) CHOICE(fdivrl a, fdivrl a, fdivrd a) -#define FDIVR_S(a) CHOICE(fdivrs a, fdivrs a, fdivrs a) -#define FDIVR2(a, b) CHOICE(fdivr ARG2(a,b), fdivr ARG2(a,b), fdivr ARG2(b,a)) -#define FDIVRP(a, b) CHOICE(fdivrp ARG2(a,b), fdivrp ARG2(a,b), fdivrp ARG2(b,a)) -#define FIDIVR_L(a) CHOICE(fidivrl a, fidivrl a, fidivrl a) -#define FIDIVR_W(a) CHOICE(fidivr a, fidivrs a, fidivrs a) -#define FFREE(a) CHOICE(ffree a, ffree a, ffree a) -#define FICOM_L(a) CHOICE(ficoml a, ficoml a, ficoml a) -#define FICOM_W(a) CHOICE(ficom a, ficoms a, ficoms a) -#define FICOMP_L(a) CHOICE(ficompl a, ficompl a, ficompl a) -#define FICOMP_W(a) CHOICE(ficomp a, ficomps a, ficomps a) -#define FILD_Q(a) CHOICE(fildll a, fildq a, fildq a) -#define FILD_L(a) CHOICE(fildl a, fildl a, fildl a) -#define FILD_W(a) CHOICE(fild a, filds a, filds a) -#define FINCSTP CHOICE(fincstp, fincstp, fincstp) -#define FINIT CHOICE(finit, wait; fninit, wait; finit) -#define FNINIT CHOICE(fninit, fninit, finit) -#define FIST_L(a) CHOICE(fistl a, fistl a, fistl a) -#define FIST_W(a) CHOICE(fist a, fists a, fists a) -#define FISTP_Q(a) CHOICE(fistpll a, fistpq a, fistpq a) -#define FISTP_L(a) CHOICE(fistpl a, fistpl a, fistpl a) -#define FISTP_W(a) CHOICE(fistp a, fistps a, fistps a) -#define FLD_X(a) CHOICE(fldt a, fldt a, fldx a) /* 80 bit data type! */ -#define FLD_D(a) CHOICE(fldl a, fldl a, fldd a) -#define FLD_S(a) CHOICE(flds a, flds a, flds a) -#define FLD1 CHOICE(fld1, fld1, fld1) -#define FLDL2T CHOICE(fldl2t, fldl2t, fldl2t) -#define FLDL2E CHOICE(fldl2e, fldl2e, fldl2e) -#define FLDPI CHOICE(fldpi, fldpi, fldpi) -#define FLDLG2 CHOICE(fldlg2, fldlg2, fldlg2) -#define FLDLN2 CHOICE(fldln2, fldln2, fldln2) -#define FLDZ CHOICE(fldz, fldz, fldz) -#define FLDCW(a) CHOICE(fldcw a, fldcw a, fldcw a) -#define FLDENV(a) CHOICE(fldenv a, fldenv a, fldenv a) -#define FMUL_S(a) CHOICE(fmuls a, fmuls a, fmuls a) -#define FMUL_D(a) CHOICE(fmull a, fmull a, fmuld a) -#define FMUL2(a, b) CHOICE(fmul ARG2(a,b), fmul ARG2(a,b), fmul ARG2(b,a)) -#define FMULP(a, b) CHOICE(fmulp ARG2(a,b), fmulp ARG2(a,b), fmulp ARG2(b,a)) -#define FIMUL_L(a) CHOICE(fimull a, fimull a, fimull a) -#define FIMUL_W(a) CHOICE(fimul a, fimuls a, fimuls a) -#define FNOP CHOICE(fnop, fnop, fnop) -#define FPATAN CHOICE(fpatan, fpatan, fpatan) -#define FPREM CHOICE(fprem, fprem, fprem) -#define FPREM1 CHOICE(fprem1, fprem1, fprem1) -#define FPTAN CHOICE(fptan, fptan, fptan) -#define FRNDINT CHOICE(frndint, frndint, frndint) -#define FRSTOR(a) CHOICE(frstor a, frstor a, frstor a) -#define FSAVE(a) CHOICE(fsave a, wait; fnsave a, wait; fsave a) -#define FNSAVE(a) CHOICE(fnsave a, fnsave a, fsave a) -#define FSCALE CHOICE(fscale, fscale, fscale) -#define FSIN CHOICE(fsin, fsin, fsin) -#define FSINCOS CHOICE(fsincos, fsincos, fsincos) -#define FSQRT CHOICE(fsqrt, fsqrt, fsqrt) -#define FST_D(a) CHOICE(fstl a, fstl a, fstd a) -#define FST_S(a) CHOICE(fsts a, fsts a, fsts a) -#define FSTP_X(a) CHOICE(fstpt a, fstpt a, fstpx a) -#define FSTP_D(a) CHOICE(fstpl a, fstpl a, fstpd a) -#define FSTP_S(a) CHOICE(fstps a, fstps a, fstps a) -#define FSTP(a) CHOICE(fstp a, fstp a, fstp a) -#define FSTCW(a) CHOICE(fstcw a, wait; fnstcw a, wait; fstcw a) -#define FNSTCW(a) CHOICE(fnstcw a, fnstcw a, fstcw a) -#define FSTENV(a) CHOICE(fstenv a, wait; fnstenv a, fstenv a) -#define FNSTENV(a) CHOICE(fnstenv a, fnstenv a, fstenv a) -#define FSTSW(a) CHOICE(fstsw a, wait; fnstsw a, wait; fstsw a) -#define FNSTSW(a) CHOICE(fnstsw a, fnstsw a, fstsw a) -#define FSUB_S(a) CHOICE(fsubs a, fsubs a, fsubs a) -#define FSUB_D(a) CHOICE(fsubl a, fsubl a, fsubd a) -#define FSUB2(a, b) CHOICE(fsub ARG2(a,b), fsub ARG2(a,b), fsub ARG2(b,a)) -#define FSUBP(a, b) CHOICE(fsubp ARG2(a,b), fsubp ARG2(a,b), fsubp ARG2(b,a)) -#define FISUB_L(a) CHOICE(fisubl a, fisubl a, fisubl a) -#define FISUB_W(a) CHOICE(fisub a, fisubs a, fisubs a) -#define FSUBR_S(a) CHOICE(fsubrs a, fsubrs a, fsubrs a) -#define FSUBR_D(a) CHOICE(fsubrl a, fsubrl a, fsubrd a) -#define FSUBR2(a, b) CHOICE(fsubr ARG2(a,b), fsubr ARG2(a,b), fsubr ARG2(b,a)) -#define FSUBRP(a, b) CHOICE(fsubrp ARG2(a,b), fsubrp ARG2(a,b), fsubrp ARG2(b,a)) -#define FISUBR_L(a) CHOICE(fisubrl a, fisubrl a, fisubrl a) -#define FISUBR_W(a) CHOICE(fisubr a, fisubrs a, fisubrs a) -#define FTST CHOICE(ftst, ftst, ftst) -#define FUCOM(a) CHOICE(fucom a, fucom a, fucom a) -#define FUCOMP(a) CHOICE(fucomp a, fucomp a, fucomp a) -#define FUCOMPP CHOICE(fucompp, fucompp, fucompp) -#define FWAIT CHOICE(wait, wait, wait) -#define FXAM CHOICE(fxam, fxam, fxam) -#define FXCH(a) CHOICE(fxch a, fxch a, fxch a) -#define FXTRACT CHOICE(fxtract, fxtract, fxtract) -#define FYL2X CHOICE(fyl2x, fyl2x, fyl2x) -#define FYL2XP1 CHOICE(fyl2xp1, fyl2xp1, fyl2xp1) +#define F2XM1 CHOICE(f2xm1, f2xm1, f2xm1) +#define FABS CHOICE(fabs, fabs, fabs) +#define FADD_D(a) CHOICE(faddl a, faddl a, faddd a) +#define FADD_S(a) CHOICE(fadds a, fadds a, fadds a) +#define FADD2(a, b) CHOICE(fadd ARG2(a,b), fadd ARG2(a,b), fadd ARG2(b,a)) +#define FADDP(a, b) CHOICE(faddp ARG2(a,b), faddp ARG2(a,b), faddp ARG2(b,a)) +#define FIADD_L(a) CHOICE(fiaddl a, fiaddl a, fiaddl a) +#define FIADD_W(a) CHOICE(fiadd a, fiadds a, fiadds a) +#define FBLD(a) CHOICE(fbld a, fbld a, fbld a) +#define FBSTP(a) CHOICE(fbstp a, fbstp a, fbstp a) +#define FCHS CHOICE(fchs, fchs, fchs) +#define FCLEX CHOICE(fclex, wait; fnclex, wait; fclex) +#define FNCLEX CHOICE(fnclex, fnclex, fclex) +#define FCOM(a) CHOICE(fcom a, fcom a, fcom a) +#define FCOM_D(a) CHOICE(fcoml a, fcoml a, fcomd a) +#define FCOM_S(a) CHOICE(fcoms a, fcoms a, fcoms a) +#define FCOMP(a) CHOICE(fcomp a, fcomp a, fcomp a) +#define FCOMP_D(a) CHOICE(fcompl a, fcompl a, fcompd a) +#define FCOMP_S(a) CHOICE(fcomps a, fcomps a, fcomps a) +#define FCOMPP CHOICE(fcompp, fcompp, fcompp) +#define FCOS CHOICE(fcos, fcos, fcos) +#define FDECSTP CHOICE(fdecstp, fdecstp, fdecstp) +#define FDIV_D(a) CHOICE(fdivl a, fdivl a, fdivd a) +#define FDIV_S(a) CHOICE(fdivs a, fdivs a, fdivs a) +#define FDIV2(a, b) CHOICE(fdiv ARG2(a,b), fdiv ARG2(a,b), fdiv ARG2(b,a)) +#define FDIVP(a, b) CHOICE(fdivp ARG2(a,b), fdivp ARG2(a,b), fdivp ARG2(b,a)) +#define FIDIV_L(a) CHOICE(fidivl a, fidivl a, fidivl a) +#define FIDIV_W(a) CHOICE(fidiv a, fidivs a, fidivs a) +#define FDIVR_D(a) CHOICE(fdivrl a, fdivrl a, fdivrd a) +#define FDIVR_S(a) CHOICE(fdivrs a, fdivrs a, fdivrs a) +#define FDIVR2(a, b) CHOICE(fdivr ARG2(a,b), fdivr ARG2(a,b), fdivr ARG2(b,a)) +#define FDIVRP(a, b) CHOICE(fdivrp ARG2(a,b), fdivrp ARG2(a,b), fdivrp ARG2(b,a)) +#define FIDIVR_L(a) CHOICE(fidivrl a, fidivrl a, fidivrl a) +#define FIDIVR_W(a) CHOICE(fidivr a, fidivrs a, fidivrs a) +#define FFREE(a) CHOICE(ffree a, ffree a, ffree a) +#define FICOM_L(a) CHOICE(ficoml a, ficoml a, ficoml a) +#define FICOM_W(a) CHOICE(ficom a, ficoms a, ficoms a) +#define FICOMP_L(a) CHOICE(ficompl a, ficompl a, ficompl a) +#define FICOMP_W(a) CHOICE(ficomp a, ficomps a, ficomps a) +#define FILD_Q(a) CHOICE(fildll a, fildq a, fildq a) +#define FILD_L(a) CHOICE(fildl a, fildl a, fildl a) +#define FILD_W(a) CHOICE(fild a, filds a, filds a) +#define FINCSTP CHOICE(fincstp, fincstp, fincstp) +#define FINIT CHOICE(finit, wait; fninit, wait; finit) +#define FNINIT CHOICE(fninit, fninit, finit) +#define FIST_L(a) CHOICE(fistl a, fistl a, fistl a) +#define FIST_W(a) CHOICE(fist a, fists a, fists a) +#define FISTP_Q(a) CHOICE(fistpll a, fistpq a, fistpq a) +#define FISTP_L(a) CHOICE(fistpl a, fistpl a, fistpl a) +#define FISTP_W(a) CHOICE(fistp a, fistps a, fistps a) +#define FLD_X(a) CHOICE(fldt a, fldt a, fldx a) /* 80 bit data type! */ +#define FLD_D(a) CHOICE(fldl a, fldl a, fldd a) +#define FLD_S(a) CHOICE(flds a, flds a, flds a) +#define FLD1 CHOICE(fld1, fld1, fld1) +#define FLDL2T CHOICE(fldl2t, fldl2t, fldl2t) +#define FLDL2E CHOICE(fldl2e, fldl2e, fldl2e) +#define FLDPI CHOICE(fldpi, fldpi, fldpi) +#define FLDLG2 CHOICE(fldlg2, fldlg2, fldlg2) +#define FLDLN2 CHOICE(fldln2, fldln2, fldln2) +#define FLDZ CHOICE(fldz, fldz, fldz) +#define FLDCW(a) CHOICE(fldcw a, fldcw a, fldcw a) +#define FLDENV(a) CHOICE(fldenv a, fldenv a, fldenv a) +#define FMUL_S(a) CHOICE(fmuls a, fmuls a, fmuls a) +#define FMUL_D(a) CHOICE(fmull a, fmull a, fmuld a) +#define FMUL2(a, b) CHOICE(fmul ARG2(a,b), fmul ARG2(a,b), fmul ARG2(b,a)) +#define FMULP(a, b) CHOICE(fmulp ARG2(a,b), fmulp ARG2(a,b), fmulp ARG2(b,a)) +#define FIMUL_L(a) CHOICE(fimull a, fimull a, fimull a) +#define FIMUL_W(a) CHOICE(fimul a, fimuls a, fimuls a) +#define FNOP CHOICE(fnop, fnop, fnop) +#define FPATAN CHOICE(fpatan, fpatan, fpatan) +#define FPREM CHOICE(fprem, fprem, fprem) +#define FPREM1 CHOICE(fprem1, fprem1, fprem1) +#define FPTAN CHOICE(fptan, fptan, fptan) +#define FRNDINT CHOICE(frndint, frndint, frndint) +#define FRSTOR(a) CHOICE(frstor a, frstor a, frstor a) +#define FSAVE(a) CHOICE(fsave a, wait; fnsave a, wait; fsave a) +#define FNSAVE(a) CHOICE(fnsave a, fnsave a, fsave a) +#define FSCALE CHOICE(fscale, fscale, fscale) +#define FSIN CHOICE(fsin, fsin, fsin) +#define FSINCOS CHOICE(fsincos, fsincos, fsincos) +#define FSQRT CHOICE(fsqrt, fsqrt, fsqrt) +#define FST_D(a) CHOICE(fstl a, fstl a, fstd a) +#define FST_S(a) CHOICE(fsts a, fsts a, fsts a) +#define FSTP_X(a) CHOICE(fstpt a, fstpt a, fstpx a) +#define FSTP_D(a) CHOICE(fstpl a, fstpl a, fstpd a) +#define FSTP_S(a) CHOICE(fstps a, fstps a, fstps a) +#define FSTP(a) CHOICE(fstp a, fstp a, fstp a) +#define FSTCW(a) CHOICE(fstcw a, wait; fnstcw a, wait; fstcw a) +#define FNSTCW(a) CHOICE(fnstcw a, fnstcw a, fstcw a) +#define FSTENV(a) CHOICE(fstenv a, wait; fnstenv a, fstenv a) +#define FNSTENV(a) CHOICE(fnstenv a, fnstenv a, fstenv a) +#define FSTSW(a) CHOICE(fstsw a, wait; fnstsw a, wait; fstsw a) +#define FNSTSW(a) CHOICE(fnstsw a, fnstsw a, fstsw a) +#define FSUB_S(a) CHOICE(fsubs a, fsubs a, fsubs a) +#define FSUB_D(a) CHOICE(fsubl a, fsubl a, fsubd a) +#define FSUB2(a, b) CHOICE(fsub ARG2(a,b), fsub ARG2(a,b), fsub ARG2(b,a)) +#define FSUBP(a, b) CHOICE(fsubp ARG2(a,b), fsubp ARG2(a,b), fsubp ARG2(b,a)) +#define FISUB_L(a) CHOICE(fisubl a, fisubl a, fisubl a) +#define FISUB_W(a) CHOICE(fisub a, fisubs a, fisubs a) +#define FSUBR_S(a) CHOICE(fsubrs a, fsubrs a, fsubrs a) +#define FSUBR_D(a) CHOICE(fsubrl a, fsubrl a, fsubrd a) +#define FSUBR2(a, b) CHOICE(fsubr ARG2(a,b), fsubr ARG2(a,b), fsubr ARG2(b,a)) +#define FSUBRP(a, b) CHOICE(fsubrp ARG2(a,b), fsubrp ARG2(a,b), fsubrp ARG2(b,a)) +#define FISUBR_L(a) CHOICE(fisubrl a, fisubrl a, fisubrl a) +#define FISUBR_W(a) CHOICE(fisubr a, fisubrs a, fisubrs a) +#define FTST CHOICE(ftst, ftst, ftst) +#define FUCOM(a) CHOICE(fucom a, fucom a, fucom a) +#define FUCOMP(a) CHOICE(fucomp a, fucomp a, fucomp a) +#define FUCOMPP CHOICE(fucompp, fucompp, fucompp) +#define FWAIT CHOICE(wait, wait, wait) +#define FXAM CHOICE(fxam, fxam, fxam) +#define FXCH(a) CHOICE(fxch a, fxch a, fxch a) +#define FXTRACT CHOICE(fxtract, fxtract, fxtract) +#define FYL2X CHOICE(fyl2x, fyl2x, fyl2x) +#define FYL2XP1 CHOICE(fyl2xp1, fyl2xp1, fyl2xp1) /* New instructions */ -#define CPUID CHOICE(D_BYTE ARG2(15, 162), cpuid, D_BYTE ARG2(15, 162)) -#define RDTSC CHOICE(D_BYTE ARG2(15, 49), rdtsc, D_BYTE ARG2(15, 49)) +#define CPUID CHOICE(D_BYTE ARG2(15, 162), cpuid, D_BYTE ARG2(15, 162)) +#define RDTSC CHOICE(D_BYTE ARG2(15, 49), rdtsc, D_BYTE ARG2(15, 49)) #else /* NASM_ASSEMBLER || MASM_ASSEMBLER is defined */ @@ -779,166 +783,167 @@ /* */ /****************************************/ -#define P_EAX EAX -#define L_EAX EAX -#define W_AX AX -#define B_AH AH -#define B_AL AL - -#define P_EBX EBX -#define L_EBX EBX -#define W_BX BX -#define B_BH BH -#define B_BL BL - -#define P_ECX ECX -#define L_ECX ECX -#define W_CX CX -#define B_CH CH -#define B_CL CL - -#define P_EDX EDX -#define L_EDX EDX -#define W_DX DX -#define B_DH DH -#define B_DL DL - -#define P_EBP EBP -#define L_EBP EBP -#define W_BP BP - -#define P_ESI ESI -#define L_ESI ESI -#define W_SI SI - -#define P_EDI EDI -#define L_EDI EDI -#define W_DI DI - -#define P_ESP ESP -#define L_ESP ESP -#define W_SP SP - -#define W_CS CS -#define W_SS SS -#define W_DS DS -#define W_ES ES -#define W_FS FS -#define W_GS GS - -#define X_ST ST -#define D_ST ST -#define L_ST ST - -#define P_MM0 mm0 -#define P_MM1 mm1 -#define P_MM2 mm2 -#define P_MM3 mm3 -#define P_MM4 mm4 -#define P_MM5 mm5 -#define P_MM6 mm6 -#define P_MM7 mm7 - -#define P_XMM0 xmm0 -#define P_XMM1 xmm1 -#define P_XMM2 xmm2 -#define P_XMM3 xmm3 -#define P_XMM4 xmm4 -#define P_XMM5 xmm5 -#define P_XMM6 xmm6 -#define P_XMM7 xmm7 - -#define CONCAT(x, y) x ## y +#define P_EAX EAX +#define L_EAX EAX +#define W_AX AX +#define B_AH AH +#define B_AL AL + +#define P_EBX EBX +#define L_EBX EBX +#define W_BX BX +#define B_BH BH +#define B_BL BL + +#define P_ECX ECX +#define L_ECX ECX +#define W_CX CX +#define B_CH CH +#define B_CL CL + +#define P_EDX EDX +#define L_EDX EDX +#define W_DX DX +#define B_DH DH +#define B_DL DL + +#define P_EBP EBP +#define L_EBP EBP +#define W_BP BP + +#define P_ESI ESI +#define L_ESI ESI +#define W_SI SI + +#define P_EDI EDI +#define L_EDI EDI +#define W_DI DI + +#define P_ESP ESP +#define L_ESP ESP +#define W_SP SP + +#define W_CS CS +#define W_SS SS +#define W_DS DS +#define W_ES ES +#define W_FS FS +#define W_GS GS + +#define X_ST ST +#define D_ST ST +#define L_ST ST + +#define P_MM0 mm0 +#define P_MM1 mm1 +#define P_MM2 mm2 +#define P_MM3 mm3 +#define P_MM4 mm4 +#define P_MM5 mm5 +#define P_MM6 mm6 +#define P_MM7 mm7 + +#define P_XMM0 xmm0 +#define P_XMM1 xmm1 +#define P_XMM2 xmm2 +#define P_XMM3 xmm3 +#define P_XMM4 xmm4 +#define P_XMM5 xmm5 +#define P_XMM6 xmm6 +#define P_XMM7 xmm7 + +#define CONCAT(x, y) x ## y #if defined(NASM_ASSEMBLER) -#define ST(n) st ## n +#define ST(n) st ## n -#define TBYTE_PTR tword -#define QWORD_PTR qword -#define DWORD_PTR dword -#define WORD_PTR word -#define BYTE_PTR byte +#define TBYTE_PTR tword +#define QWORD_PTR qword +#define DWORD_PTR dword +#define WORD_PTR word +#define BYTE_PTR byte #define OFFSET -#define GLOBL GLOBAL -#define ALIGNTEXT32 ALIGN 32 -#define ALIGNTEXT16 ALIGN 16 -#define ALIGNTEXT8 ALIGN 8 -#define ALIGNTEXT4 ALIGN 4 -#define ALIGNTEXT2 ALIGN 2 -#define ALIGNTEXT32ifNOP ALIGN 32 -#define ALIGNTEXT16ifNOP ALIGN 16 -#define ALIGNTEXT8ifNOP ALIGN 8 -#define ALIGNTEXT4ifNOP ALIGN 4 -#define ALIGNDATA32 ALIGN 32 -#define ALIGNDATA16 ALIGN 16 -#define ALIGNDATA8 ALIGN 8 -#define ALIGNDATA4 ALIGN 4 -#define ALIGNDATA2 ALIGN 2 +#define GLOBL GLOBAL +#define ALIGNTEXT32 ALIGN 32 +#define ALIGNTEXT16 ALIGN 16 +#define ALIGNTEXT8 ALIGN 8 +#define ALIGNTEXT4 ALIGN 4 +#define ALIGNTEXT2 ALIGN 2 +#define ALIGNTEXT32ifNOP ALIGN 32 +#define ALIGNTEXT16ifNOP ALIGN 16 +#define ALIGNTEXT8ifNOP ALIGN 8 +#define ALIGNTEXT4ifNOP ALIGN 4 +#define ALIGNDATA32 ALIGN 32 +#define ALIGNDATA16 ALIGN 16 +#define ALIGNDATA8 ALIGN 8 +#define ALIGNDATA4 ALIGN 4 +#define ALIGNDATA2 ALIGN 2 #define FILE(s) -#define STRING(s) db s -#define D_LONG dd -#define D_WORD dw -#define D_BYTE db +#define STRING(s) db s +#define D_LONG dd +#define D_WORD dw +#define D_BYTE db /* #define SPACE */ /* #define COMM */ #if defined(__WATCOMC__) SECTION _TEXT public align=16 class=CODE use32 flat SECTION _DATA public align=16 class=DATA use32 flat -#define SEG_TEXT SECTION _TEXT -#define SEG_DATA SECTION _DATA -#define SEG_BSS SECTION .bss +#define SEG_TEXT SECTION _TEXT +#define SEG_DATA SECTION _DATA +#define SEG_BSS SECTION .bss #else -#define SEG_DATA SECTION .data -#define SEG_TEXT SECTION .text -#define SEG_BSS SECTION .bss +#define SEG_DATA SECTION .data +#define SEG_TEXT SECTION .text +#define SEG_BSS SECTION .bss #endif -#define D_SPACE(n) db n REP 0 +#define D_SPACE(n) db n REP 0 #define AS_BEGIN -#define NEAR near /* Jcc's should be handled better than this... */ +/* Jcc's should be handled better than this... */ +#define NEAR near #else /* MASM */ -#define TBYTE_PTR tbyte ptr -#define QWORD_PTR qword ptr -#define DWORD_PTR dword ptr -#define WORD_PTR word ptr -#define BYTE_PTR byte ptr - -#define OFFSET offset - -#define GLOBL GLOBAL -#define ALIGNTEXT32 ALIGN 32 -#define ALIGNTEXT16 ALIGN 16 -#define ALIGNTEXT8 ALIGN 8 -#define ALIGNTEXT4 ALIGN 4 -#define ALIGNTEXT2 ALIGN 2 -#define ALIGNTEXT32ifNOP ALIGN 32 -#define ALIGNTEXT16ifNOP ALIGN 16 -#define ALIGNTEXT8ifNOP ALIGN 8 -#define ALIGNTEXT4ifNOP ALIGN 4 -#define ALIGNDATA32 ALIGN 32 -#define ALIGNDATA16 ALIGN 16 -#define ALIGNDATA8 ALIGN 8 -#define ALIGNDATA4 ALIGN 4 -#define ALIGNDATA2 ALIGN 2 +#define TBYTE_PTR tbyte ptr +#define QWORD_PTR qword ptr +#define DWORD_PTR dword ptr +#define WORD_PTR word ptr +#define BYTE_PTR byte ptr + +#define OFFSET offset + +#define GLOBL GLOBAL +#define ALIGNTEXT32 ALIGN 32 +#define ALIGNTEXT16 ALIGN 16 +#define ALIGNTEXT8 ALIGN 8 +#define ALIGNTEXT4 ALIGN 4 +#define ALIGNTEXT2 ALIGN 2 +#define ALIGNTEXT32ifNOP ALIGN 32 +#define ALIGNTEXT16ifNOP ALIGN 16 +#define ALIGNTEXT8ifNOP ALIGN 8 +#define ALIGNTEXT4ifNOP ALIGN 4 +#define ALIGNDATA32 ALIGN 32 +#define ALIGNDATA16 ALIGN 16 +#define ALIGNDATA8 ALIGN 8 +#define ALIGNDATA4 ALIGN 4 +#define ALIGNDATA2 ALIGN 2 #define FILE(s) -#define STRING(s) db s -#define D_LONG dd -#define D_WORD dw -#define D_BYTE db +#define STRING(s) db s +#define D_LONG dd +#define D_WORD dw +#define D_BYTE db /* #define SPACE */ /* #define COMM */ -#define SEG_DATA .DATA -#define SEG_TEXT .CODE -#define SEG_BSS .DATA +#define SEG_DATA .DATA +#define SEG_TEXT .CODE +#define SEG_BSS .DATA -#define D_SPACE(n) db n REP 0 +#define D_SPACE(n) db n REP 0 #define AS_BEGIN @@ -949,520 +954,529 @@ SECTION _DATA public align=16 class=DATA use32 flat #if defined(Lynx) || (defined(SYSV) || defined(SVR4)) \ || (defined(linux) || defined(__OS2ELF__)) && defined(__ELF__) \ || defined(__FreeBSD__) && __FreeBSD__ >= 3 -#define GLNAME(a) a +#define GLNAME(a) a #else -#define GLNAME(a) _ ## a +#define GLNAME(a) _ ## a #endif /* - * Addressing Modes + * Addressing Modes */ /* Immediate Mode */ -#define P_ADDR(a) OFFSET a -#define X_ADDR(a) OFFSET a -#define D_ADDR(a) OFFSET a -#define L_ADDR(a) OFFSET a -#define W_ADDR(a) OFFSET a -#define B_ADDR(a) OFFSET a - -#define P_CONST(a) a -#define X_CONST(a) a -#define D_CONST(a) a -#define L_CONST(a) a -#define W_CONST(a) a -#define B_CONST(a) a +#define P_ADDR(a) OFFSET a +#define X_ADDR(a) OFFSET a +#define D_ADDR(a) OFFSET a +#define L_ADDR(a) OFFSET a +#define W_ADDR(a) OFFSET a +#define B_ADDR(a) OFFSET a + +#define P_CONST(a) a +#define X_CONST(a) a +#define D_CONST(a) a +#define L_CONST(a) a +#define W_CONST(a) a +#define B_CONST(a) a /* Indirect Mode */ -#define P_CONTENT(a) a -#define X_CONTENT(a) TBYTE_PTR a -#define D_CONTENT(a) QWORD_PTR a -#define L_CONTENT(a) DWORD_PTR a -#define W_CONTENT(a) WORD_PTR a -#define B_CONTENT(a) BYTE_PTR a +#define P_CONTENT(a) a +#define X_CONTENT(a) TBYTE_PTR a +#define D_CONTENT(a) QWORD_PTR a +#define L_CONTENT(a) DWORD_PTR a +#define W_CONTENT(a) WORD_PTR a +#define B_CONTENT(a) BYTE_PTR a /* Register a indirect */ -#define P_REGIND(a) [a] -#define X_REGIND(a) TBYTE_PTR [a] -#define D_REGIND(a) QWORD_PTR [a] -#define L_REGIND(a) DWORD_PTR [a] -#define W_REGIND(a) WORD_PTR [a] -#define B_REGIND(a) BYTE_PTR [a] +#define P_REGIND(a) [a] +#define X_REGIND(a) TBYTE_PTR [a] +#define D_REGIND(a) QWORD_PTR [a] +#define L_REGIND(a) DWORD_PTR [a] +#define W_REGIND(a) WORD_PTR [a] +#define B_REGIND(a) BYTE_PTR [a] /* Register b indirect plus displacement a */ -#define P_REGOFF(a, b) [b + a] -#define X_REGOFF(a, b) TBYTE_PTR [b + a] -#define D_REGOFF(a, b) QWORD_PTR [b + a] -#define L_REGOFF(a, b) DWORD_PTR [b + a] -#define W_REGOFF(a, b) WORD_PTR [b + a] -#define B_REGOFF(a, b) BYTE_PTR [b + a] +#define P_REGOFF(a, b) [b + a] +#define X_REGOFF(a, b) TBYTE_PTR [b + a] +#define D_REGOFF(a, b) QWORD_PTR [b + a] +#define L_REGOFF(a, b) DWORD_PTR [b + a] +#define W_REGOFF(a, b) WORD_PTR [b + a] +#define B_REGOFF(a, b) BYTE_PTR [b + a] /* Reg indirect Base + Index + Displacement - this is mainly for 16-bit mode * which has no scaling */ -#define P_REGBID(b, i, d) [b + i + d] -#define X_REGBID(b, i, d) TBYTE_PTR [b + i + d] -#define D_REGBID(b, i, d) QWORD_PTR [b + i + d] -#define L_REGBID(b, i, d) DWORD_PTR [b + i + d] -#define W_REGBID(b, i, d) WORD_PTR [b + i + d] -#define B_REGBID(b, i, d) BYTE_PTR [b + i + d] +#define P_REGBID(b, i, d) [b + i + d] +#define X_REGBID(b, i, d) TBYTE_PTR [b + i + d] +#define D_REGBID(b, i, d) QWORD_PTR [b + i + d] +#define L_REGBID(b, i, d) DWORD_PTR [b + i + d] +#define W_REGBID(b, i, d) WORD_PTR [b + i + d] +#define B_REGBID(b, i, d) BYTE_PTR [b + i + d] + +/* Reg indirect Base + (Index * Scale) */ +#define P_REGBIS(b, i, s) [b + i * s] +#define X_REGBIS(b, i, s) TBYTE_PTR [b + i * s] +#define D_REGBIS(b, i, s) QWORD_PTR [b + i * s] +#define L_REGBIS(b, i, s) DWORD_PTR [b + i * s] +#define W_REGBIS(b, i, s) WORD_PTR [b + i * s] +#define B_REGBIS(b, i, s) BYTE_PTR [b + i * s] /* Reg indirect Base + (Index * Scale) + Displacement */ -#define P_REGBISD(b, i, s, d) [b + i * s + d] -#define X_REGBISD(b, i, s, d) TBYTE_PTR [b + i * s + d] -#define D_REGBISD(b, i, s, d) QWORD_PTR [b + i * s + d] -#define L_REGBISD(b, i, s, d) DWORD_PTR [b + i * s + d] -#define W_REGBISD(b, i, s, d) WORD_PTR [b + i * s + d] -#define B_REGBISD(b, i, s, d) BYTE_PTR [b + i * s + d] +#define P_REGBISD(b, i, s, d) [b + i * s + d] +#define X_REGBISD(b, i, s, d) TBYTE_PTR [b + i * s + d] +#define D_REGBISD(b, i, s, d) QWORD_PTR [b + i * s + d] +#define L_REGBISD(b, i, s, d) DWORD_PTR [b + i * s + d] +#define W_REGBISD(b, i, s, d) WORD_PTR [b + i * s + d] +#define B_REGBISD(b, i, s, d) BYTE_PTR [b + i * s + d] /* Displaced Scaled Index: */ -#define P_REGDIS(d, i, s) [i * s + d] -#define X_REGDIS(d, i, s) TBYTE_PTR [i * s + d] -#define D_REGDIS(d, i, s) QWORD_PTR [i * s + d] -#define L_REGDIS(d, i, s) DWORD_PTR [i * s + d] -#define W_REGDIS(d, i, s) WORD_PTR [i * s + d] -#define B_REGDIS(d, i, s) BYTE_PTR [i * s + d] +#define P_REGDIS(d, i, s) [i * s + d] +#define X_REGDIS(d, i, s) TBYTE_PTR [i * s + d] +#define D_REGDIS(d, i, s) QWORD_PTR [i * s + d] +#define L_REGDIS(d, i, s) DWORD_PTR [i * s + d] +#define W_REGDIS(d, i, s) WORD_PTR [i * s + d] +#define B_REGDIS(d, i, s) BYTE_PTR [i * s + d] /* Indexed Base: */ -#define P_REGBI(b, i) [b + i] -#define X_REGBI(b, i) TBYTE_PTR [b + i] -#define D_REGBI(b, i) QWORD_PTR [b + i] -#define L_REGBI(b, i) DWORD_PTR [b + i] -#define W_REGBI(b, i) WORD_PTR [b + i] -#define B_REGBI(b, i) BYTE_PTR [b + i] +#define P_REGBI(b, i) [b + i] +#define X_REGBI(b, i) TBYTE_PTR [b + i] +#define D_REGBI(b, i) QWORD_PTR [b + i] +#define L_REGBI(b, i) DWORD_PTR [b + i] +#define W_REGBI(b, i) WORD_PTR [b + i] +#define B_REGBI(b, i) BYTE_PTR [b + i] /* Displaced Base: */ -#define P_REGDB(d, b) [b + d] -#define X_REGDB(d, b) TBYTE_PTR [b + d] -#define D_REGDB(d, b) QWORD_PTR [b + d] -#define L_REGDB(d, b) DWORD_PTR [b + d] -#define W_REGDB(d, b) WORD_PTR [b + d] -#define B_REGDB(d, b) BYTE_PTR [b + d] +#define P_REGDB(d, b) [b + d] +#define X_REGDB(d, b) TBYTE_PTR [b + d] +#define D_REGDB(d, b) QWORD_PTR [b + d] +#define L_REGDB(d, b) DWORD_PTR [b + d] +#define W_REGDB(d, b) WORD_PTR [b + d] +#define B_REGDB(d, b) BYTE_PTR [b + d] /* Variable indirect: */ -#define VARINDIRECT(var) var +#define VARINDIRECT(var) var /* Use register contents as jump/call target: */ -#define CODEPTR(reg) reg +#define CODEPTR(reg) reg /* - * Redefine assembler commands + * Redefine assembler commands */ -#define P_(a) P_ ## a -#define X_(a) X_ ## a -#define D_(a) D_ ## a -#define S_(a) L_ ## a -#define L_(a) L_ ## a -#define W_(a) W_ ## a -#define B_(a) B_ ## a - -#define AAA aaa -#define AAD aad -#define AAM aam -#define AAS aas -#define ADC_L(a, b) adc L_(b), L_(a) -#define ADC_W(a, b) adc W_(b), W_(a) -#define ADC_B(a, b) adc B_(b), B_(a) -#define ADD_L(a, b) add L_(b), L_(a) -#define ADD_W(a, b) add W_(b), W_(a) -#define ADD_B(a, b) add B_(b), B_(a) -#define AND_L(a, b) and L_(b), L_(a) -#define AND_W(a, b) and W_(b), W_(a) -#define AND_B(a, b) and B_(b), B_(a) -#define ARPL(a,b) arpl W_(b), a -#define BOUND_L(a, b) bound L_(b), L_(a) -#define BOUND_W(a, b) bound W_(b), W_(a) -#define BSF_L(a, b) bsf L_(b), L_(a) -#define BSF_W(a, b) bsf W_(b), W_(a) -#define BSR_L(a, b) bsr L_(b), L_(a) -#define BSR_W(a, b) bsr W_(b), W_(a) -#define BT_L(a, b) bt L_(b), L_(a) -#define BT_W(a, b) bt W_(b), W_(a) -#define BTC_L(a, b) btc L_(b), L_(a) -#define BTC_W(a, b) btc W_(b), W_(a) -#define BTR_L(a, b) btr L_(b), L_(a) -#define BTR_W(a, b) btr W_(b), W_(a) -#define BTS_L(a, b) bts L_(b), L_(a) -#define BTS_W(a, b) bts W_(b), W_(a) -#define CALL(a) call a -#define CALLF(s,a) call far s:a -#define CBW cbw -#define CWDE cwde -#define CLC clc -#define CLD cld -#define CLI cli -#define CLTS clts -#define CMC cmc -#define CMP_L(a, b) cmp L_(b), L_(a) -#define CMP_W(a, b) cmp W_(b), W_(a) -#define CMP_B(a, b) cmp B_(b), B_(a) -#define CMPS_L cmpsd -#define CMPS_W cmpsw -#define CMPS_B cmpsb -#define CWD cwd -#define CDQ cdq -#define DAA daa -#define DAS das -#define DEC_L(a) dec L_(a) -#define DEC_W(a) dec W_(a) -#define DEC_B(a) dec B_(a) -#define DIV_L(a) div L_(a) -#define DIV_W(a) div W_(a) -#define DIV_B(a) div B_(a) -#define ENTER(a,b) enter b, a -#define HLT hlt -#define IDIV_L(a) idiv L_(a) -#define IDIV_W(a) idiv W_(a) -#define IDIV_B(a) idiv B_(a) -#define IMUL_L(a, b) imul L_(b), L_(a) -#define IMUL_W(a, b) imul W_(b), W_(a) -#define IMUL_B(a) imul B_(a) -#define IN_L in EAX, DX -#define IN_W in AX, DX -#define IN_B in AL, DX -#define IN1_L(a) in1 L_(a) -#define IN1_W(a) in1 W_(a) -#define IN1_B(a) in1 B_(a) -#define INC_L(a) inc L_(a) -#define INC_W(a) inc W_(a) -#define INC_B(a) inc B_(a) -#define INS_L ins -#define INS_W ins -#define INS_B ins -#define INT(a) int B_(a) -#define INT3 int3 -#define INTO into -#define IRET iret -#define IRETD iretd -#define JA(a) ja NEAR a -#define JAE(a) jae NEAR a -#define JB(a) jb NEAR a -#define JBE(a) jbe NEAR a -#define JC(a) jc NEAR a -#define JE(a) je NEAR a -#define JG(a) jg NEAR a -#define JGE(a) jge NEAR a -#define JL(a) jl NEAR a -#define JLE(a) jle NEAR a -#define JNA(a) jna NEAR a -#define JNAE(a) jnae NEAR a -#define JNB(a) jnb NEAR a -#define JNBE(a) jnbe NEAR a -#define JNC(a) jnc NEAR a -#define JNE(a) jne NEAR a -#define JNG(a) jng NEAR a -#define JNGE(a) jnge NEAR a -#define JNL(a) jnl NEAR a -#define JNLE(a) jnle NEAR a -#define JNO(a) jno NEAR a -#define JNP(a) jnp NEAR a -#define JNS(a) jns NEAR a -#define JNZ(a) jnz NEAR a -#define JO(a) jo NEAR a -#define JP(a) jp NEAR a -#define JPE(a) jpe NEAR a -#define JPO(a) jpo NEAR a -#define JS(a) js NEAR a -#define JZ(a) jz NEAR a -#define JMP(a) jmp a -#define JMPF(s,a) jmpf -#define LAHF lahf -#define LAR(a, b) lar b, a -#define LEA_L(a, b) lea P_(b), P_(a) -#define LEA_W(a, b) lea P_(b), P_(a) -#define LEAVE leave -#define LGDT(a) lgdt a -#define LIDT(a) lidt a -#define LDS(a, b) lds b, a -#define LES(a, b) les b, a -#define LFS(a, b) lfs b, a -#define LGS(a, b) lgs b, a -#define LSS(a, b) lss b, a -#define LLDT(a) lldt a -#define LMSW(a) lmsw a -#define LOCK lock -#define LODS_L lodsd -#define LODS_W lodsw -#define LODS_B lodsb -#define LOOP(a) loop a -#define LOOPE(a) loope a -#define LOOPZ(a) loopz a -#define LOOPNE(a) loopne a -#define LOOPNZ(a) loopnz a -#define LSL(a, b) lsl b, a -#define LTR(a) ltr a -#define MOV_SR(a, b) mov S_(b), S_(a) -#define MOV_L(a, b) mov L_(b), L_(a) -#define MOV_W(a, b) mov W_(b), W_(a) -#define MOV_B(a, b) mov B_(b), B_(a) -#define MOVS_L movsd -#define MOVS_W movsw -#define MOVS_B movsb -#define MOVSX_BL(a, b) movsx B_(b), B_(a) -#define MOVSX_BW(a, b) movsx B_(b), B_(a) -#define MOVSX_WL(a, b) movsx W_(b), W_(a) -#define MOVZX_BL(a, b) movzx B_(b), B_(a) -#define MOVZX_BW(a, b) movzx B_(b), B_(a) -#define MOVZX_WL(a, b) movzx W_(b), W_(a) -#define MUL_L(a) mul L_(a) -#define MUL_W(a) mul W_(a) -#define MUL_B(a) mul B_(a) -#define NEG_L(a) neg L_(a) -#define NEG_W(a) neg W_(a) -#define NEG_B(a) neg B_(a) -#define NOP nop -#define NOT_L(a) not L_(a) -#define NOT_W(a) not W_(a) -#define NOT_B(a) not B_(a) -#define OR_L(a,b) or L_(b), L_(a) -#define OR_W(a,b) or W_(b), W_(a) -#define OR_B(a,b) or B_(b), B_(a) -#define OUT_L out DX, EAX -#define OUT_W out DX, AX -#define OUT_B out DX, AL -#define OUT1_L(a) out1 L_(a) -#define OUT1_W(a) out1 W_(a) -#define OUT1_B(a) out1 B_(a) -#define OUTS_L outsd -#define OUTS_W outsw -#define OUTS_B outsb -#define POP_SR(a) pop S_(a) -#define POP_L(a) pop L_(a) -#define POP_W(a) pop W_(a) -#define POPA_L popad -#define POPA_W popa -#define POPF_L popfd -#define POPF_W popf -#define PUSH_SR(a) push S_(a) -#define PUSH_L(a) push L_(a) -#define PUSH_W(a) push W_(a) -#define PUSH_B(a) push B_(a) -#define PUSHA_L pushad -#define PUSHA_W pusha -#define PUSHF_L pushfd -#define PUSHF_W pushf -#define RCL_L(a, b) rcl L_(b), L_(a) -#define RCL_W(a, b) rcl W_(b), W_(a) -#define RCL_B(a, b) rcl B_(b), B_(a) -#define RCR_L(a, b) rcr L_(b), L_(a) -#define RCR_W(a, b) rcr W_(b), W_(a) -#define RCR_B(a, b) rcr B_(b), B_(a) -#define ROL_L(a, b) rol L_(b), L_(a) -#define ROL_W(a, b) rol W_(b), W_(a) -#define ROL_B(a, b) rol B_(b), B_(a) -#define ROR_L(a, b) ror L_(b), L_(a) -#define ROR_W(a, b) ror W_(b), W_(a) -#define ROR_B(a, b) ror B_(b), B_(a) -#define REP rep -#define REPE repe -#define REPNE repne -#define REPNZ REPNE -#define REPZ REPE -#define RET ret -#define SAHF sahf -#define SAL_L(a, b) sal L_(b), L_(a) -#define SAL_W(a, b) sal W_(b), W_(a) -#define SAL_B(a, b) sal B_(b), B_(a) -#define SAR_L(a, b) sar L_(b), L_(a) -#define SAR_W(a, b) sar W_(b), W_(a) -#define SAR_B(a, b) sar B_(b), B_(a) -#define SBB_L(a, b) sbb L_(b), L_(a) -#define SBB_W(a, b) sbb W_(b), W_(a) -#define SBB_B(a, b) sbb B_(b), B_(a) -#define SCAS_L scas -#define SCAS_W scas -#define SCAS_B scas -#define SETA(a) seta a -#define SETAE(a) setae a -#define SETB(a) setb a -#define SETBE(a) setbe a -#define SETC(a) setc a -#define SETE(a) sete a -#define SETG(a) setg a -#define SETGE(a) setge a -#define SETL(a) setl a -#define SETLE(a) setle a -#define SETNA(a) setna a -#define SETNAE(a) setnae a -#define SETNB(a) setnb a -#define SETNBE(a) setnbe a -#define SETNC(a) setnc a -#define SETNE(a) setne a -#define SETNG(a) setng a -#define SETNGE(a) setnge a -#define SETNL(a) setnl a -#define SETNLE(a) setnle a -#define SETNO(a) setno a -#define SETNP(a) setnp a -#define SETNS(a) setns a -#define SETNZ(a) setnz a -#define SETO(a) seto a -#define SETP(a) setp a -#define SETPE(a) setpe a -#define SETPO(a) setpo a -#define SETS(a) sets a -#define SETZ(a) setz a -#define SGDT(a) sgdt a -#define SIDT(a) sidt a -#define SHL_L(a, b) shl L_(b), L_(a) -#define SHL_W(a, b) shl W_(b), W_(a) -#define SHL_B(a, b) shl B_(b), B_(a) -#define SHLD_L(a,b,c) shld -#define SHLD2_L(a,b) shld L_(b), L_(a) -#define SHLD_W(a,b,c) shld -#define SHLD2_W(a,b) shld W_(b), W_(a) -#define SHR_L(a, b) shr L_(b), L_(a) -#define SHR_W(a, b) shr W_(b), W_(a) -#define SHR_B(a, b) shr B_(b), B_(a) -#define SHRD_L(a,b,c) shrd -#define SHRD2_L(a,b) shrd L_(b), L_(a) -#define SHRD_W(a,b,c) shrd -#define SHRD2_W(a,b) shrd W_(b), W_(a) -#define SLDT(a) sldt a -#define SMSW(a) smsw a -#define STC stc -#define STD std -#define STI sti -#define STOS_L stos -#define STOS_W stos -#define STOS_B stos -#define STR(a) str a -#define SUB_L(a, b) sub L_(b), L_(a) -#define SUB_W(a, b) sub W_(b), W_(a) -#define SUB_B(a, b) sub B_(b), B_(a) -#define TEST_L(a, b) test L_(b), L_(a) -#define TEST_W(a, b) test W_(b), W_(a) -#define TEST_B(a, b) test B_(b), B_(a) -#define VERR(a) verr a -#define VERW(a) verw a -#define WAIT wait -#define XCHG_L(a, b) xchg L_(b), L_(a) -#define XCHG_W(a, b) xchg W_(b), W_(a) -#define XCHG_B(a, b) xchg B_(b), B_(a) -#define XLAT xlat -#define XOR_L(a, b) xor L_(b), L_(a) -#define XOR_W(a, b) xor W_(b), W_(a) -#define XOR_B(a, b) xor B_(b), B_(a) -#define F2XM1 f2xm1 -#define FABS fabs -#define FADD_D(a) fadd D_(a) -#define FADD_S(a) fadd S_(a) -#define FADD2(a, b) fadd b, a -#define FADDP(a, b) faddp b, a -#define FIADD_L(a) fiadd L_(a) -#define FIADD_W(a) fiadd W_(a) -#define FBLD(a) fbld a -#define FBSTP(a) fbstp a -#define FCHS fchs -#define FCLEX fclex -#define FNCLEX fnclex -#define FCOM(a) fcom a -#define FCOM_D(a) fcom D_(a) -#define FCOM_S(a) fcom S_(a) -#define FCOMP(a) fcomp a -#define FCOMP_D(a) fcomp D_(a) -#define FCOMP_S(a) fcomp S_(a) -#define FCOMPP fcompp -#define FCOS fcos -#define FDECSTP fdecstp -#define FDIV_D(a) fdiv D_(a) -#define FDIV_S(a) fdiv S_(a) -#define FDIV2(a, b) fdiv b, a -#define FDIVP(a, b) fdivp b, a -#define FIDIV_L(a) fidiv L_(a) -#define FIDIV_W(a) fidiv W_(a) -#define FDIVR_D(a) fdivr D_(a) -#define FDIVR_S(a) fdivr S_(a) -#define FDIVR2(a, b) fdivr b, a -#define FDIVRP(a, b) fdivrp b, a -#define FIDIVR_L(a) fidivr L_(a) -#define FIDIVR_W(a) fidivr W_(a) -#define FFREE(a) ffree a -#define FICOM_L(a) ficom L_(a) -#define FICOM_W(a) ficom W_(a) -#define FICOMP_L(a) ficomp L_(a) -#define FICOMP_W(a) ficomp W_(a) -#define FILD_Q(a) fild D_(a) -#define FILD_L(a) fild L_(a) -#define FILD_W(a) fild W_(a) -#define FINCSTP fincstp -#define FINIT finit -#define FNINIT fninit -#define FIST_L(a) fist L_(a) -#define FIST_W(a) fist W_(a) -#define FISTP_Q(a) fistp D_(a) -#define FISTP_L(a) fistp L_(a) -#define FISTP_W(a) fistp W_(a) -#define FLD_X(a) fld X_(a) -#define FLD_D(a) fld D_(a) -#define FLD_S(a) fld S_(a) -#define FLD1 fld1 -#define FLDL2T fldl2t -#define FLDL2E fldl2e -#define FLDPI fldpi -#define FLDLG2 fldlg2 -#define FLDLN2 fldln2 -#define FLDZ fldz -#define FLDCW(a) fldcw a -#define FLDENV(a) fldenv a -#define FMUL_S(a) fmul S_(a) -#define FMUL_D(a) fmul D_(a) -#define FMUL2(a, b) fmul b, a -#define FMULP(a, b) fmulp b, a -#define FIMUL_L(a) fimul L_(a) -#define FIMUL_W(a) fimul W_(a) -#define FNOP fnop -#define FPATAN fpatan -#define FPREM fprem -#define FPREM1 fprem1 -#define FPTAN fptan -#define FRNDINT frndint -#define FRSTOR(a) frstor a -#define FSAVE(a) fsave a -#define FNSAVE(a) fnsave a -#define FSCALE fscale -#define FSIN fsin -#define FSINCOS fsincos -#define FSQRT fsqrt -#define FST_D(a) fst D_(a) -#define FST_S(a) fst S_(a) -#define FSTP_X(a) fstp X_(a) -#define FSTP_D(a) fstp D_(a) -#define FSTP_S(a) fstp S_(a) -#define FSTP(a) fstp a -#define FSTCW(a) fstcw a -#define FNSTCW(a) fnstcw a -#define FSTENV(a) fstenv a -#define FNSTENV(a) fnstenv a -#define FSTSW(a) fstsw a -#define FNSTSW(a) fnstsw a -#define FSUB_S(a) fsub S_(a) -#define FSUB_D(a) fsub D_(a) -#define FSUB2(a, b) fsub b, a -#define FSUBP(a, b) fsubp b, a -#define FISUB_L(a) fisub L_(a) -#define FISUB_W(a) fisub W_(a) -#define FSUBR_S(a) fsubr S_(a) -#define FSUBR_D(a) fsubr D_(a) -#define FSUBR2(a, b) fsubr b, a -#define FSUBRP(a, b) fsubrp b, a -#define FISUBR_L(a) fisubr L_(a) -#define FISUBR_W(a) fisubr W_(a) -#define FTST ftst -#define FUCOM(a) fucom a -#define FUCOMP(a) fucomp a -#define FUCOMPP fucompp -#define FWAIT fwait -#define FXAM fxam -#define FXCH(a) fxch a -#define FXTRACT fxtract -#define FYL2X fyl2x -#define FYL2XP1 fyl2xp1 +#define P_(a) P_ ## a +#define X_(a) X_ ## a +#define D_(a) D_ ## a +#define S_(a) L_ ## a +#define L_(a) L_ ## a +#define W_(a) W_ ## a +#define B_(a) B_ ## a + +#define AAA aaa +#define AAD aad +#define AAM aam +#define AAS aas +#define ADC_L(a, b) adc L_(b), L_(a) +#define ADC_W(a, b) adc W_(b), W_(a) +#define ADC_B(a, b) adc B_(b), B_(a) +#define ADD_L(a, b) add L_(b), L_(a) +#define ADD_W(a, b) add W_(b), W_(a) +#define ADD_B(a, b) add B_(b), B_(a) +#define AND_L(a, b) and L_(b), L_(a) +#define AND_W(a, b) and W_(b), W_(a) +#define AND_B(a, b) and B_(b), B_(a) +#define ARPL(a,b) arpl W_(b), a +#define BOUND_L(a, b) bound L_(b), L_(a) +#define BOUND_W(a, b) bound W_(b), W_(a) +#define BSF_L(a, b) bsf L_(b), L_(a) +#define BSF_W(a, b) bsf W_(b), W_(a) +#define BSR_L(a, b) bsr L_(b), L_(a) +#define BSR_W(a, b) bsr W_(b), W_(a) +#define BT_L(a, b) bt L_(b), L_(a) +#define BT_W(a, b) bt W_(b), W_(a) +#define BTC_L(a, b) btc L_(b), L_(a) +#define BTC_W(a, b) btc W_(b), W_(a) +#define BTR_L(a, b) btr L_(b), L_(a) +#define BTR_W(a, b) btr W_(b), W_(a) +#define BTS_L(a, b) bts L_(b), L_(a) +#define BTS_W(a, b) bts W_(b), W_(a) +#define CALL(a) call a +#define CALLF(s,a) call far s:a +#define CBW cbw +#define CWDE cwde +#define CLC clc +#define CLD cld +#define CLI cli +#define CLTS clts +#define CMC cmc +#define CMP_L(a, b) cmp L_(b), L_(a) +#define CMP_W(a, b) cmp W_(b), W_(a) +#define CMP_B(a, b) cmp B_(b), B_(a) +#define CMPS_L cmpsd +#define CMPS_W cmpsw +#define CMPS_B cmpsb +#define CPUID cpuid +#define CWD cwd +#define CDQ cdq +#define DAA daa +#define DAS das +#define DEC_L(a) dec L_(a) +#define DEC_W(a) dec W_(a) +#define DEC_B(a) dec B_(a) +#define DIV_L(a) div L_(a) +#define DIV_W(a) div W_(a) +#define DIV_B(a) div B_(a) +#define ENTER(a,b) enter b, a +#define HLT hlt +#define IDIV_L(a) idiv L_(a) +#define IDIV_W(a) idiv W_(a) +#define IDIV_B(a) idiv B_(a) +#define IMUL_L(a, b) imul L_(b), L_(a) +#define IMUL_W(a, b) imul W_(b), W_(a) +#define IMUL_B(a) imul B_(a) +#define IN_L in EAX, DX +#define IN_W in AX, DX +#define IN_B in AL, DX +#define IN1_L(a) in1 L_(a) +#define IN1_W(a) in1 W_(a) +#define IN1_B(a) in1 B_(a) +#define INC_L(a) inc L_(a) +#define INC_W(a) inc W_(a) +#define INC_B(a) inc B_(a) +#define INS_L ins +#define INS_W ins +#define INS_B ins +#define INT(a) int B_(a) +#define INT3 int3 +#define INTO into +#define IRET iret +#define IRETD iretd +#define JA(a) ja NEAR a +#define JAE(a) jae NEAR a +#define JB(a) jb NEAR a +#define JBE(a) jbe NEAR a +#define JC(a) jc NEAR a +#define JE(a) je NEAR a +#define JG(a) jg NEAR a +#define JGE(a) jge NEAR a +#define JL(a) jl NEAR a +#define JLE(a) jle NEAR a +#define JNA(a) jna NEAR a +#define JNAE(a) jnae NEAR a +#define JNB(a) jnb NEAR a +#define JNBE(a) jnbe NEAR a +#define JNC(a) jnc NEAR a +#define JNE(a) jne NEAR a +#define JNG(a) jng NEAR a +#define JNGE(a) jnge NEAR a +#define JNL(a) jnl NEAR a +#define JNLE(a) jnle NEAR a +#define JNO(a) jno NEAR a +#define JNP(a) jnp NEAR a +#define JNS(a) jns NEAR a +#define JNZ(a) jnz NEAR a +#define JO(a) jo NEAR a +#define JP(a) jp NEAR a +#define JPE(a) jpe NEAR a +#define JPO(a) jpo NEAR a +#define JS(a) js NEAR a +#define JZ(a) jz NEAR a +#define JMP(a) jmp a +#define JMPF(s,a) jmpf +#define LAHF lahf +#define LAR(a, b) lar b, a +#define LEA_L(a, b) lea P_(b), P_(a) +#define LEA_W(a, b) lea P_(b), P_(a) +#define LEAVE leave +#define LGDT(a) lgdt a +#define LIDT(a) lidt a +#define LDS(a, b) lds b, a +#define LES(a, b) les b, a +#define LFS(a, b) lfs b, a +#define LGS(a, b) lgs b, a +#define LSS(a, b) lss b, a +#define LLDT(a) lldt a +#define LMSW(a) lmsw a +#define LOCK lock +#define LODS_L lodsd +#define LODS_W lodsw +#define LODS_B lodsb +#define LOOP(a) loop a +#define LOOPE(a) loope a +#define LOOPZ(a) loopz a +#define LOOPNE(a) loopne a +#define LOOPNZ(a) loopnz a +#define LSL(a, b) lsl b, a +#define LTR(a) ltr a +#define MOV_SR(a, b) mov S_(b), S_(a) +#define MOV_L(a, b) mov L_(b), L_(a) +#define MOV_W(a, b) mov W_(b), W_(a) +#define MOV_B(a, b) mov B_(b), B_(a) +#define MOVS_L movsd +#define MOVS_W movsw +#define MOVS_B movsb +#define MOVSX_BL(a, b) movsx B_(b), B_(a) +#define MOVSX_BW(a, b) movsx B_(b), B_(a) +#define MOVSX_WL(a, b) movsx W_(b), W_(a) +#define MOVZX_BL(a, b) movzx B_(b), B_(a) +#define MOVZX_BW(a, b) movzx B_(b), B_(a) +#define MOVZX_WL(a, b) movzx W_(b), W_(a) +#define MUL_L(a) mul L_(a) +#define MUL_W(a) mul W_(a) +#define MUL_B(a) mul B_(a) +#define NEG_L(a) neg L_(a) +#define NEG_W(a) neg W_(a) +#define NEG_B(a) neg B_(a) +#define NOP nop +#define NOT_L(a) not L_(a) +#define NOT_W(a) not W_(a) +#define NOT_B(a) not B_(a) +#define OR_L(a,b) or L_(b), L_(a) +#define OR_W(a,b) or W_(b), W_(a) +#define OR_B(a,b) or B_(b), B_(a) +#define OUT_L out DX, EAX +#define OUT_W out DX, AX +#define OUT_B out DX, AL +#define OUT1_L(a) out1 L_(a) +#define OUT1_W(a) out1 W_(a) +#define OUT1_B(a) out1 B_(a) +#define OUTS_L outsd +#define OUTS_W outsw +#define OUTS_B outsb +#define POP_SR(a) pop S_(a) +#define POP_L(a) pop L_(a) +#define POP_W(a) pop W_(a) +#define POPA_L popad +#define POPA_W popa +#define POPF_L popfd +#define POPF_W popf +#define PUSH_SR(a) push S_(a) +#define PUSH_L(a) push L_(a) +#define PUSH_W(a) push W_(a) +#define PUSH_B(a) push B_(a) +#define PUSHA_L pushad +#define PUSHA_W pusha +#define PUSHF_L pushfd +#define PUSHF_W pushf +#define RCL_L(a, b) rcl L_(b), L_(a) +#define RCL_W(a, b) rcl W_(b), W_(a) +#define RCL_B(a, b) rcl B_(b), B_(a) +#define RCR_L(a, b) rcr L_(b), L_(a) +#define RCR_W(a, b) rcr W_(b), W_(a) +#define RCR_B(a, b) rcr B_(b), B_(a) +#define RDTSC rdtsc +#define ROL_L(a, b) rol L_(b), L_(a) +#define ROL_W(a, b) rol W_(b), W_(a) +#define ROL_B(a, b) rol B_(b), B_(a) +#define ROR_L(a, b) ror L_(b), L_(a) +#define ROR_W(a, b) ror W_(b), W_(a) +#define ROR_B(a, b) ror B_(b), B_(a) +#define REP rep +#define REPE repe +#define REPNE repne +#define REPNZ REPNE +#define REPZ REPE +#define RET ret +#define SAHF sahf +#define SAL_L(a, b) sal L_(b), L_(a) +#define SAL_W(a, b) sal W_(b), W_(a) +#define SAL_B(a, b) sal B_(b), B_(a) +#define SAR_L(a, b) sar L_(b), L_(a) +#define SAR_W(a, b) sar W_(b), W_(a) +#define SAR_B(a, b) sar B_(b), B_(a) +#define SBB_L(a, b) sbb L_(b), L_(a) +#define SBB_W(a, b) sbb W_(b), W_(a) +#define SBB_B(a, b) sbb B_(b), B_(a) +#define SCAS_L scas +#define SCAS_W scas +#define SCAS_B scas +#define SETA(a) seta a +#define SETAE(a) setae a +#define SETB(a) setb a +#define SETBE(a) setbe a +#define SETC(a) setc a +#define SETE(a) sete a +#define SETG(a) setg a +#define SETGE(a) setge a +#define SETL(a) setl a +#define SETLE(a) setle a +#define SETNA(a) setna a +#define SETNAE(a) setnae a +#define SETNB(a) setnb a +#define SETNBE(a) setnbe a +#define SETNC(a) setnc a +#define SETNE(a) setne a +#define SETNG(a) setng a +#define SETNGE(a) setnge a +#define SETNL(a) setnl a +#define SETNLE(a) setnle a +#define SETNO(a) setno a +#define SETNP(a) setnp a +#define SETNS(a) setns a +#define SETNZ(a) setnz a +#define SETO(a) seto a +#define SETP(a) setp a +#define SETPE(a) setpe a +#define SETPO(a) setpo a +#define SETS(a) sets a +#define SETZ(a) setz a +#define SGDT(a) sgdt a +#define SIDT(a) sidt a +#define SHL_L(a, b) shl L_(b), L_(a) +#define SHL_W(a, b) shl W_(b), W_(a) +#define SHL_B(a, b) shl B_(b), B_(a) +#define SHLD_L(a,b,c) shld +#define SHLD2_L(a,b) shld L_(b), L_(a) +#define SHLD_W(a,b,c) shld +#define SHLD2_W(a,b) shld W_(b), W_(a) +#define SHR_L(a, b) shr L_(b), L_(a) +#define SHR_W(a, b) shr W_(b), W_(a) +#define SHR_B(a, b) shr B_(b), B_(a) +#define SHRD_L(a,b,c) shrd +#define SHRD2_L(a,b) shrd L_(b), L_(a) +#define SHRD_W(a,b,c) shrd +#define SHRD2_W(a,b) shrd W_(b), W_(a) +#define SLDT(a) sldt a +#define SMSW(a) smsw a +#define STC stc +#define STD std +#define STI sti +#define STOS_L stos +#define STOS_W stos +#define STOS_B stos +#define STR(a) str a +#define SUB_L(a, b) sub L_(b), L_(a) +#define SUB_W(a, b) sub W_(b), W_(a) +#define SUB_B(a, b) sub B_(b), B_(a) +#define TEST_L(a, b) test L_(b), L_(a) +#define TEST_W(a, b) test W_(b), W_(a) +#define TEST_B(a, b) test B_(b), B_(a) +#define VERR(a) verr a +#define VERW(a) verw a +#define WAIT wait +#define XCHG_L(a, b) xchg L_(b), L_(a) +#define XCHG_W(a, b) xchg W_(b), W_(a) +#define XCHG_B(a, b) xchg B_(b), B_(a) +#define XLAT xlat +#define XOR_L(a, b) xor L_(b), L_(a) +#define XOR_W(a, b) xor W_(b), W_(a) +#define XOR_B(a, b) xor B_(b), B_(a) -/* New instructions */ -#define CPUID D_BYTE 15, 162 -#define RDTSC D_BYTE 15, 49 + +/* Floating Point Instructions */ +#define F2XM1 f2xm1 +#define FABS fabs +#define FADD_D(a) fadd D_(a) +#define FADD_S(a) fadd S_(a) +#define FADD2(a, b) fadd b, a +#define FADDP(a, b) faddp b, a +#define FIADD_L(a) fiadd L_(a) +#define FIADD_W(a) fiadd W_(a) +#define FBLD(a) fbld a +#define FBSTP(a) fbstp a +#define FCHS fchs +#define FCLEX fclex +#define FNCLEX fnclex +#define FCOM(a) fcom a +#define FCOM_D(a) fcom D_(a) +#define FCOM_S(a) fcom S_(a) +#define FCOMP(a) fcomp a +#define FCOMP_D(a) fcomp D_(a) +#define FCOMP_S(a) fcomp S_(a) +#define FCOMPP fcompp +#define FCOS fcos +#define FDECSTP fdecstp +#define FDIV_D(a) fdiv D_(a) +#define FDIV_S(a) fdiv S_(a) +#define FDIV2(a, b) fdiv b, a +#define FDIVP(a, b) fdivp b, a +#define FIDIV_L(a) fidiv L_(a) +#define FIDIV_W(a) fidiv W_(a) +#define FDIVR_D(a) fdivr D_(a) +#define FDIVR_S(a) fdivr S_(a) +#define FDIVR2(a, b) fdivr b, a +#define FDIVRP(a, b) fdivrp b, a +#define FIDIVR_L(a) fidivr L_(a) +#define FIDIVR_W(a) fidivr W_(a) +#define FFREE(a) ffree a +#define FICOM_L(a) ficom L_(a) +#define FICOM_W(a) ficom W_(a) +#define FICOMP_L(a) ficomp L_(a) +#define FICOMP_W(a) ficomp W_(a) +#define FILD_Q(a) fild D_(a) +#define FILD_L(a) fild L_(a) +#define FILD_W(a) fild W_(a) +#define FINCSTP fincstp +#define FINIT finit +#define FNINIT fninit +#define FIST_L(a) fist L_(a) +#define FIST_W(a) fist W_(a) +#define FISTP_Q(a) fistp D_(a) +#define FISTP_L(a) fistp L_(a) +#define FISTP_W(a) fistp W_(a) +#define FLD_X(a) fld X_(a) +#define FLD_D(a) fld D_(a) +#define FLD_S(a) fld S_(a) +#define FLD1 fld1 +#define FLDL2T fldl2t +#define FLDL2E fldl2e +#define FLDPI fldpi +#define FLDLG2 fldlg2 +#define FLDLN2 fldln2 +#define FLDZ fldz +#define FLDCW(a) fldcw a +#define FLDENV(a) fldenv a +#define FMUL_S(a) fmul S_(a) +#define FMUL_D(a) fmul D_(a) +#define FMUL2(a, b) fmul b, a +#define FMULP(a, b) fmulp b, a +#define FIMUL_L(a) fimul L_(a) +#define FIMUL_W(a) fimul W_(a) +#define FNOP fnop +#define FPATAN fpatan +#define FPREM fprem +#define FPREM1 fprem1 +#define FPTAN fptan +#define FRNDINT frndint +#define FRSTOR(a) frstor a +#define FSAVE(a) fsave a +#define FNSAVE(a) fnsave a +#define FSCALE fscale +#define FSIN fsin +#define FSINCOS fsincos +#define FSQRT fsqrt +#define FST_D(a) fst D_(a) +#define FST_S(a) fst S_(a) +#define FSTP_X(a) fstp X_(a) +#define FSTP_D(a) fstp D_(a) +#define FSTP_S(a) fstp S_(a) +#define FSTP(a) fstp a +#define FSTCW(a) fstcw a +#define FNSTCW(a) fnstcw a +#define FSTENV(a) fstenv a +#define FNSTENV(a) fnstenv a +#define FSTSW(a) fstsw a +#define FNSTSW(a) fnstsw a +#define FSUB_S(a) fsub S_(a) +#define FSUB_D(a) fsub D_(a) +#define FSUB2(a, b) fsub b, a +#define FSUBP(a, b) fsubp b, a +#define FISUB_L(a) fisub L_(a) +#define FISUB_W(a) fisub W_(a) +#define FSUBR_S(a) fsubr S_(a) +#define FSUBR_D(a) fsubr D_(a) +#define FSUBR2(a, b) fsubr b, a +#define FSUBRP(a, b) fsubrp b, a +#define FISUBR_L(a) fisubr L_(a) +#define FISUBR_W(a) fisubr W_(a) +#define FTST ftst +#define FUCOM(a) fucom a +#define FUCOMP(a) fucomp a +#define FUCOMPP fucompp +#define FWAIT fwait +#define FXAM fxam +#define FXCH(a) fxch a +#define FXTRACT fxtract +#define FYL2X fyl2x +#define FYL2XP1 fyl2xp1 #endif /* NASM_ASSEMBLER, MASM_ASSEMBLER */ @@ -1474,182 +1488,188 @@ SECTION _DATA public align=16 class=DATA use32 flat /****************************************/ #if defined(NASM_ASSEMBLER) || defined(MASM_ASSEMBLER) -#define P_ARG1(a) P_ ## a -#define P_ARG2(a, b) P_ ## b, P_ ## a -#define P_ARG3(a, b, c) P_ ## c, P_ ## b, P_ ## a +#define P_ARG1(a) P_ ## a +#define P_ARG2(a, b) P_ ## b, P_ ## a +#define P_ARG3(a, b, c) P_ ## c, P_ ## b, P_ ## a #else -#define P_ARG1(a) a -#define P_ARG2(a, b) a, b -#define P_ARG3(a, b, c) a, b, c +#define P_ARG1(a) a +#define P_ARG2(a, b) a, b +#define P_ARG3(a, b, c) a, b, c #endif /* MMX */ -#define MOVD(a, b) movd P_ARG2(a, b) -#define MOVQ(a, b) movq P_ARG2(a, b) +#define MOVD(a, b) movd P_ARG2(a, b) +#define MOVQ(a, b) movq P_ARG2(a, b) -#define PADDB(a, b) paddb P_ARG2(a, b) -#define PADDW(a, b) paddw P_ARG2(a, b) -#define PADDD(a, b) paddd P_ARG2(a, b) +#define PADDB(a, b) paddb P_ARG2(a, b) +#define PADDW(a, b) paddw P_ARG2(a, b) +#define PADDD(a, b) paddd P_ARG2(a, b) -#define PADDSB(a, b) paddsb P_ARG2(a, b) -#define PADDSW(a, b) paddsw P_ARG2(a, b) +#define PADDSB(a, b) paddsb P_ARG2(a, b) +#define PADDSW(a, b) paddsw P_ARG2(a, b) -#define PADDUSB(a, b) paddusb P_ARG2(a, b) -#define PADDUSW(a, b) paddusw P_ARG2(a, b) +#define PADDUSB(a, b) paddusb P_ARG2(a, b) +#define PADDUSW(a, b) paddusw P_ARG2(a, b) -#define PSUBB(a, b) psubb P_ARG2(a, b) -#define PSUBW(a, b) psubw P_ARG2(a, b) -#define PSUBD(a, b) psubd P_ARG2(a, b) +#define PSUBB(a, b) psubb P_ARG2(a, b) +#define PSUBW(a, b) psubw P_ARG2(a, b) +#define PSUBD(a, b) psubd P_ARG2(a, b) -#define PSUBSB(a, b) psubsb P_ARG2(a, b) -#define PSUBSW(a, b) psubsw P_ARG2(a, b) +#define PSUBSB(a, b) psubsb P_ARG2(a, b) +#define PSUBSW(a, b) psubsw P_ARG2(a, b) -#define PSUBUSB(a, b) psubusb P_ARG2(a, b) -#define PSUBUSW(a, b) psubusw P_ARG2(a, b) +#define PSUBUSB(a, b) psubusb P_ARG2(a, b) +#define PSUBUSW(a, b) psubusw P_ARG2(a, b) -#define PCMPEQB(a, b) pcmpeqb P_ARG2(a, b) -#define PCMPEQW(a, b) pcmpeqw P_ARG2(a, b) -#define PCMPEQD(a, b) pcmpeqd P_ARG2(a, b) +#define PCMPEQB(a, b) pcmpeqb P_ARG2(a, b) +#define PCMPEQW(a, b) pcmpeqw P_ARG2(a, b) +#define PCMPEQD(a, b) pcmpeqd P_ARG2(a, b) -#define PCMPGTB(a, b) pcmpgtb P_ARG2(a, b) -#define PCMPGTW(a, b) pcmpgtw P_ARG2(a, b) -#define PCMPGTD(a, b) pcmpgtd P_ARG2(a, b) +#define PCMPGTB(a, b) pcmpgtb P_ARG2(a, b) +#define PCMPGTW(a, b) pcmpgtw P_ARG2(a, b) +#define PCMPGTD(a, b) pcmpgtd P_ARG2(a, b) -#define PMULHW(a, b) pmulhw P_ARG2(a, b) -#define PMULLW(a, b) pmullw P_ARG2(a, b) +#define PMULHW(a, b) pmulhw P_ARG2(a, b) +#define PMULLW(a, b) pmullw P_ARG2(a, b) -#define PMADDWD(a, b) pmaddwd P_ARG2(a, b) +#define PMADDWD(a, b) pmaddwd P_ARG2(a, b) -#define PAND(a, b) pand P_ARG2(a, b) +#define PAND(a, b) pand P_ARG2(a, b) -#define PANDN(a, b) pandn P_ARG2(a, b) +#define PANDN(a, b) pandn P_ARG2(a, b) -#define POR(a, b) por P_ARG2(a, b) +#define POR(a, b) por P_ARG2(a, b) -#define PXOR(a, b) pxor P_ARG2(a, b) +#define PXOR(a, b) pxor P_ARG2(a, b) -#define PSRAW(a, b) psraw P_ARG2(a, b) -#define PSRAD(a, b) psrad P_ARG2(a, b) +#define PSRAW(a, b) psraw P_ARG2(a, b) +#define PSRAD(a, b) psrad P_ARG2(a, b) -#define PSRLW(a, b) psrlw P_ARG2(a, b) -#define PSRLD(a, b) psrld P_ARG2(a, b) -#define PSRLQ(a, b) psrlq P_ARG2(a, b) +#define PSRLW(a, b) psrlw P_ARG2(a, b) +#define PSRLD(a, b) psrld P_ARG2(a, b) +#define PSRLQ(a, b) psrlq P_ARG2(a, b) -#define PSLLW(a, b) psllw P_ARG2(a, b) -#define PSLLD(a, b) pslld P_ARG2(a, b) -#define PSLLQ(a, b) psllq P_ARG2(a, b) +#define PSLLW(a, b) psllw P_ARG2(a, b) +#define PSLLD(a, b) pslld P_ARG2(a, b) +#define PSLLQ(a, b) psllq P_ARG2(a, b) -#define PACKSSWB(a, b) packsswb P_ARG2(a, b) -#define PACKSSDW(a, b) packssdw P_ARG2(a, b) -#define PACKUSWB(a, b) packuswb P_ARG2(a, b) +#define PACKSSWB(a, b) packsswb P_ARG2(a, b) +#define PACKSSDW(a, b) packssdw P_ARG2(a, b) +#define PACKUSWB(a, b) packuswb P_ARG2(a, b) -#define PUNPCKHBW(a, b) punpckhbw P_ARG2(a, b) -#define PUNPCKHWD(a, b) punpckhwd P_ARG2(a, b) -#define PUNPCKHDQ(a, b) punpckhdq P_ARG2(a, b) -#define PUNPCKLBW(a, b) punpcklbw P_ARG2(a, b) -#define PUNPCKLWD(a, b) punpcklwd P_ARG2(a, b) -#define PUNPCKLDQ(a, b) punpckldq P_ARG2(a, b) +#define PUNPCKHBW(a, b) punpckhbw P_ARG2(a, b) +#define PUNPCKHWD(a, b) punpckhwd P_ARG2(a, b) +#define PUNPCKHDQ(a, b) punpckhdq P_ARG2(a, b) +#define PUNPCKLBW(a, b) punpcklbw P_ARG2(a, b) +#define PUNPCKLWD(a, b) punpcklwd P_ARG2(a, b) +#define PUNPCKLDQ(a, b) punpckldq P_ARG2(a, b) -#define EMMS emms +#define EMMS emms /* AMD 3DNow! */ -#define PAVGUSB(a, b) pavgusb P_ARG2(a, b) -#define PFADD(a, b) pfadd P_ARG2(a, b) -#define PFSUB(a, b) pfsub P_ARG2(a, b) -#define PFSUBR(a, b) pfsubr P_ARG2(a, b) -#define PFACC(a, b) pfacc P_ARG2(a, b) -#define PFCMPGE(a, b) pfcmpge P_ARG2(a, b) -#define PFCMPGT(a, b) pfcmpgt P_ARG2(a, b) -#define PFCMPEQ(a, b) pfcmpeq P_ARG2(a, b) -#define PFMIN(a, b) pfmin P_ARG2(a, b) -#define PFMAX(a, b) pfmax P_ARG2(a, b) -#define PI2FD(a, b) pi2fd P_ARG2(a, b) -#define PF2ID(a, b) pf2id P_ARG2(a, b) -#define PFRCP(a, b) pfrcp P_ARG2(a, b) -#define PFRSQRT(a, b) pfrsqrt P_ARG2(a, b) -#define PFMUL(a, b) pfmul P_ARG2(a, b) -#define PFRCPIT1(a, b) pfrcpit1 P_ARG2(a, b) -#define PFRSQIT1(a, b) pfrsqit1 P_ARG2(a, b) -#define PFRCPIT2(a, b) pfrcpit2 P_ARG2(a, b) -#define PMULHRW(a, b) pmulhrw P_ARG2(a, b) - -#define FEMMS femms -#define PREFETCH(a) prefetch P_ARG1(a) -#define PREFETCHW(a) prefetchw P_ARG1(a) +#define PAVGUSB(a, b) pavgusb P_ARG2(a, b) +#define PFADD(a, b) pfadd P_ARG2(a, b) +#define PFSUB(a, b) pfsub P_ARG2(a, b) +#define PFSUBR(a, b) pfsubr P_ARG2(a, b) +#define PFACC(a, b) pfacc P_ARG2(a, b) +#define PFCMPGE(a, b) pfcmpge P_ARG2(a, b) +#define PFCMPGT(a, b) pfcmpgt P_ARG2(a, b) +#define PFCMPEQ(a, b) pfcmpeq P_ARG2(a, b) +#define PFMIN(a, b) pfmin P_ARG2(a, b) +#define PFMAX(a, b) pfmax P_ARG2(a, b) +#define PI2FD(a, b) pi2fd P_ARG2(a, b) +#define PF2ID(a, b) pf2id P_ARG2(a, b) +#define PFRCP(a, b) pfrcp P_ARG2(a, b) +#define PFRSQRT(a, b) pfrsqrt P_ARG2(a, b) +#define PFMUL(a, b) pfmul P_ARG2(a, b) +#define PFRCPIT1(a, b) pfrcpit1 P_ARG2(a, b) +#define PFRSQIT1(a, b) pfrsqit1 P_ARG2(a, b) +#define PFRCPIT2(a, b) pfrcpit2 P_ARG2(a, b) +#define PMULHRW(a, b) pmulhrw P_ARG2(a, b) + +#define FEMMS femms +#define PREFETCH(a) prefetch P_ARG1(a) +#define PREFETCHW(a) prefetchw P_ARG1(a) /* Intel SSE */ -#define ADDPS(a, b) addps P_ARG2(a, b) -#define ADDSS(a, b) addss P_ARG2(a, b) -#define ANDNPS(a, b) andnps P_ARG2(a, b) -#define ANDPS(a, b) andps P_ARG2(a, b) -/* - NASM only knows the pseudo ops for these. - #define CMPPS(a, b, c) cmpps P_ARG3(a, b, c) - #define CMPSS(a, b, c) cmpss P_ARG3(a, b, c) +#define ADDPS(a, b) addps P_ARG2(a, b) +#define ADDSS(a, b) addss P_ARG2(a, b) +#define ANDNPS(a, b) andnps P_ARG2(a, b) +#define ANDPS(a, b) andps P_ARG2(a, b) +/* NASM only knows the pseudo ops for these. +#define CMPPS(a, b, c) cmpps P_ARG3(a, b, c) +#define CMPSS(a, b, c) cmpss P_ARG3(a, b, c) */ -#define CMPEQPS(a, b) cmpeqps P_ARG2(a, b) -#define CMPLTPS(a, b) cmpltps P_ARG2(a, b) -#define CMPLEPS(a, b) cmpleps P_ARG2(a, b) +#define CMPEQPS(a, b) cmpeqps P_ARG2(a, b) +#define CMPLTPS(a, b) cmpltps P_ARG2(a, b) +#define CMPLEPS(a, b) cmpleps P_ARG2(a, b) #define CMPUNORDPS(a, b) cmpunordps P_ARG2(a, b) -#define CMPNEQPS(a, b) cmpneqps P_ARG2(a, b) -#define CMPNLTPS(a, b) cmpnltps P_ARG2(a, b) -#define CMPNLEPS(a, b) cmpnleps P_ARG2(a, b) -#define CMPORDPS(a, b) cmpordps P_ARG2(a, b) -#define CMPEQSS(a, b) cmpeqss P_ARG2(a, b) -#define CMPLTSS(a, b) cmpltss P_ARG2(a, b) -#define CMPLESS(a, b) cmpless P_ARG2(a, b) +#define CMPNEQPS(a, b) cmpneqps P_ARG2(a, b) +#define CMPNLTPS(a, b) cmpnltps P_ARG2(a, b) +#define CMPNLEPS(a, b) cmpnleps P_ARG2(a, b) +#define CMPORDPS(a, b) cmpordps P_ARG2(a, b) +#define CMPEQSS(a, b) cmpeqss P_ARG2(a, b) +#define CMPLTSS(a, b) cmpltss P_ARG2(a, b) +#define CMPLESS(a, b) cmpless P_ARG2(a, b) #define CMPUNORDSS(a, b) cmpunordss P_ARG2(a, b) -#define CMPNEQSS(a, b) cmpneqss P_ARG2(a, b) -#define CMPNLTSS(a, b) cmpnltss P_ARG2(a, b) -#define CMPNLESS(a, b) cmpnless P_ARG2(a, b) -#define CMPORDSS(a, b) cmpordss P_ARG2(a, b) -#define COMISS(a, b) comiss P_ARG2(a, b) -#define CVTPI2PS(a, b) cvtpi2ps P_ARG2(a, b) -#define CVTPS2PI(a, b) cvtps2pi P_ARG2(a, b) -#define CVTSI2SS(a, b) cvtsi2ss P_ARG2(a, b) -#define CVTSS2SI(a, b) cvtss2si P_ARG2(a, b) -#define CVTTPS2PI(a, b) cvttps2pi P_ARG2(a, b) -#define CVTTSS2SI(a, b) cvttss2si P_ARG2(a, b) -#define DIVPS(a, b) divps P_ARG2(a, b) -#define DIVSS(a, b) divss P_ARG2(a, b) -#define FXRSTOR(a) fxrstor P_ARG1(a) -#define FXSAVE(a) fxsave P_ARG1(a) -#define LDMXCSR(a) ldmxcsr P_ARG1(a) -#define MAXPS(a, b) maxps P_ARG2(a, b) -#define MAXSS(a, b) maxss P_ARG2(a, b) -#define MINPS(a, b) minps P_ARG2(a, b) -#define MINSS(a, b) minss P_ARG2(a, b) -#define MOVAPS(a, b) movaps P_ARG2(a, b) -#define MOVHLPS(a, b) movhlps P_ARG2(a, b) -#define MOVHPS(a, b) movhps P_ARG2(a, b) -#define MOVLHPS(a, b) movlhps P_ARG2(a, b) -#define MOVLPS(a, b) movlps P_ARG2(a, b) -#define MOVMSKPS(a, b) movmskps P_ARG2(a, b) -#define MOVSS(a, b) movss P_ARG2(a, b) -#define MOVUPS(a, b) movups P_ARG2(a, b) -#define MULPS(a, b) mulps P_ARG2(a, b) -#define MULSS(a, b) mulss P_ARG2(a, b) -#define ORPS(a, b) orps P_ARG2(a, b) -#define RCPPS(a, b) rcpps P_ARG2(a, b) -#define RCPSS(a, b) rcpss P_ARG2(a, b) -#define RSQRTPS(a, b) rsqrtps P_ARG2(a, b) -#define RSQRTSS(a, b) rsqrtss P_ARG2(a, b) -#define SHUFPS(a, b, c) shufps P_ARG3(a, b, c) -#define SQRTPS(a, b) sqrtps P_ARG2(a, b) -#define SQRTSS(a, b) sqrtss P_ARG2(a, b) -#define STMXCSR(a) stmxcsr P_ARG1(a) -#define SUBPS(a, b) subps P_ARG2(a, b) -#define UCOMISS(a, b) ucomiss P_ARG2(a, b) -#define UNPCKHPS(a, b) unpckhps P_ARG2(a, b) -#define UNPCKLPS(a, b) unpcklps P_ARG2(a, b) -#define XORPS(a, b) xorps P_ARG2(a, b) - +#define CMPNEQSS(a, b) cmpneqss P_ARG2(a, b) +#define CMPNLTSS(a, b) cmpnltss P_ARG2(a, b) +#define CMPNLESS(a, b) cmpnless P_ARG2(a, b) +#define CMPORDSS(a, b) cmpordss P_ARG2(a, b) +#define COMISS(a, b) comiss P_ARG2(a, b) +#define CVTPI2PS(a, b) cvtpi2ps P_ARG2(a, b) +#define CVTPS2PI(a, b) cvtps2pi P_ARG2(a, b) +#define CVTSI2SS(a, b) cvtsi2ss P_ARG2(a, b) +#define CVTSS2SI(a, b) cvtss2si P_ARG2(a, b) +#define CVTTPS2PI(a, b) cvttps2pi P_ARG2(a, b) +#define CVTTSS2SI(a, b) cvttss2si P_ARG2(a, b) +#define DIVPS(a, b) divps P_ARG2(a, b) +#define DIVSS(a, b) divss P_ARG2(a, b) +#define FXRSTOR(a) fxrstor P_ARG1(a) +#define FXSAVE(a) fxsave P_ARG1(a) +#define LDMXCSR(a) ldmxcsr P_ARG1(a) +#define MAXPS(a, b) maxps P_ARG2(a, b) +#define MAXSS(a, b) maxss P_ARG2(a, b) +#define MINPS(a, b) minps P_ARG2(a, b) +#define MINSS(a, b) minss P_ARG2(a, b) +#define MOVAPS(a, b) movaps P_ARG2(a, b) +#define MOVHLPS(a, b) movhlps P_ARG2(a, b) +#define MOVHPS(a, b) movhps P_ARG2(a, b) +#define MOVLHPS(a, b) movlhps P_ARG2(a, b) +#define MOVLPS(a, b) movlps P_ARG2(a, b) +#define MOVMSKPS(a, b) movmskps P_ARG2(a, b) +#define MOVNTPS(a, b) movntps P_ARG2(a, b) +#define MOVNTQ(a, b) movntq P_ARG2(a, b) +#define MOVSS(a, b) movss P_ARG2(a, b) +#define MOVUPS(a, b) movups P_ARG2(a, b) +#define MULPS(a, b) mulps P_ARG2(a, b) +#define MULSS(a, b) mulss P_ARG2(a, b) +#define ORPS(a, b) orps P_ARG2(a, b) +#define RCPPS(a, b) rcpps P_ARG2(a, b) +#define RCPSS(a, b) rcpss P_ARG2(a, b) +#define RSQRTPS(a, b) rsqrtps P_ARG2(a, b) +#define RSQRTSS(a, b) rsqrtss P_ARG2(a, b) +#define SHUFPS(a, b, c) shufps P_ARG3(a, b, c) +#define SQRTPS(a, b) sqrtps P_ARG2(a, b) +#define SQRTSS(a, b) sqrtss P_ARG2(a, b) +#define STMXCSR(a) stmxcsr P_ARG1(a) +#define SUBPS(a, b) subps P_ARG2(a, b) +#define UCOMISS(a, b) ucomiss P_ARG2(a, b) +#define UNPCKHPS(a, b) unpckhps P_ARG2(a, b) +#define UNPCKLPS(a, b) unpcklps P_ARG2(a, b) +#define XORPS(a, b) xorps P_ARG2(a, b) + +#define PREFETCHNTA(a) prefetchnta P_ARG1(a) +#define PREFETCHT0(a) prefetcht0 P_ARG1(a) +#define PREFETCHT1(a) prefetcht1 P_ARG1(a) +#define PREFETCHT2(a) prefetcht2 P_ARG1(a) +#define SFENCE sfence /* Added by BrianP for FreeBSD (per David Dawes) */ #if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER) && !defined(__bsdi__) -#define LLBL(a) CONCAT(.L,a) +#define LLBL(a) CONCAT(.L,a) #else -#define LLBL(a) a +#define LLBL(a) a #endif diff --git a/xc/extras/Mesa/src/X86/clip_args.h b/xc/extras/Mesa/src/X86/clip_args.h new file mode 100644 index 000000000..1196e1499 --- /dev/null +++ b/xc/extras/Mesa/src/X86/clip_args.h @@ -0,0 +1,75 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Clip test function interface for assembly code. Simply define + * FRAME_OFFSET to the number of bytes pushed onto the stack before + * using the ARG_* argument macros. + * + * Gareth Hughes <gareth@valinux.com> + */ + +#ifndef __CLIP_ARGS_H__ +#define __CLIP_ARGS_H__ + +/* Offsets into GLvector4f + */ +#define V4F_DATA 0 +#define V4F_START 4 +#define V4F_COUNT 8 +#define V4F_STRIDE 12 +#define V4F_SIZE 16 +#define V4F_FLAGS 20 + +/* GLvector4f flags + */ +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 + +/* + * Offsets for clip_func arguments + * + * typedef GLvector4f *(*clip_func)( GLvector4f *vClip, + * GLvector4f *vProj, + * GLubyte clipMask[], + * GLubyte *orMask, + * GLubyte *andMask ); + */ + +#define OFFSET_SOURCE 4 +#define OFFSET_DEST 8 +#define OFFSET_CLIP 12 +#define OFFSET_OR 16 +#define OFFSET_AND 20 + +#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) +#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) +#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) +#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP) +#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP) + +#endif diff --git a/xc/extras/Mesa/src/X86/common_x86.c b/xc/extras/Mesa/src/X86/common_x86.c index fdf1b276c..68b7403c2 100644 --- a/xc/extras/Mesa/src/X86/common_x86.c +++ b/xc/extras/Mesa/src/X86/common_x86.c @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.3 - * + * Version: 3.4 + * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -25,81 +25,102 @@ /* - * Check CPU capabilities & initialize optimized funtions for this particular - * processor. + * Check CPU capabilities & initialize optimized funtions for this particular + * processor. * - * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> - * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the - * new Katmai functions + * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> + * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the + * new Katmai functions. */ #include <stdlib.h> #include <stdio.h> -#include "common_x86asm.h" + +#include "common_x86_asm.h" + int gl_x86_cpu_features = 0; -static void message(const char *msg) +/* No reason for this to be public. + */ +extern int gl_identify_x86_cpu_features( void ); + + +static void message( const char *msg ) { - if (getenv("MESA_DEBUG")) - fprintf(stderr, "%s\n", msg); + if ( getenv( "MESA_DEBUG" ) ) { + fprintf( stderr, "%s\n", msg ); + } } -void gl_init_all_x86_asm (void) +void gl_init_all_x86_transform_asm( void ) { #ifdef USE_X86_ASM - gl_x86_cpu_features = gl_identify_x86_cpu_features (); - gl_x86_cpu_features |= GL_CPU_AnyX86; + gl_x86_cpu_features = gl_identify_x86_cpu_features(); - if (getenv("MESA_NO_ASM") != 0) + if ( getenv( "MESA_NO_ASM" ) ) { gl_x86_cpu_features = 0; - - if (gl_x86_cpu_features & GL_CPU_GenuineIntel) { - message("GenuineIntel cpu detected."); } - if (gl_x86_cpu_features) { - gl_init_x86_asm_transforms (); + if ( gl_x86_cpu_features ) { + gl_init_x86_transform_asm(); } #ifdef USE_MMX_ASM - if (gl_x86_cpu_features & GL_CPU_MMX) { - char *s = getenv( "MESA_NO_MMX" ); - if (s == NULL) { - message("MMX cpu detected."); + if ( cpu_has_mmx ) { + if ( getenv( "MESA_NO_MMX" ) == 0 ) { + message( "MMX cpu detected." ); } else { - gl_x86_cpu_features &= (~GL_CPU_MMX); + gl_x86_cpu_features &= ~(X86_FEATURE_MMX); } } #endif - #ifdef USE_3DNOW_ASM - if (gl_x86_cpu_features & GL_CPU_3Dnow) { - char *s = getenv( "MESA_NO_3DNOW" ); - if (s == NULL) { - message("3Dnow cpu detected."); - gl_init_3dnow_asm_transforms (); + if ( cpu_has_3dnow ) { + if ( getenv( "MESA_NO_3DNOW" ) == 0 ) { + message( "3Dnow cpu detected." ); + gl_init_3dnow_transform_asm(); } else { - gl_x86_cpu_features &= (~GL_CPU_3Dnow); + gl_x86_cpu_features &= ~(X86_FEATURE_3DNOW); } } #endif - #ifdef USE_KATMAI_ASM - if (gl_x86_cpu_features & GL_CPU_Katmai) { - char *s = getenv( "MESA_NO_KATMAI" ); - if (s == NULL) { - message("Katmai cpu detected."); - gl_init_katmai_asm_transforms (); + if ( cpu_has_xmm ) { + if ( getenv( "MESA_NO_KATMAI" ) == 0 ) { + message( "Katmai cpu detected." ); + gl_init_katmai_transform_asm(); } else { - gl_x86_cpu_features &= (~GL_CPU_Katmai); + gl_x86_cpu_features &= ~(X86_FEATURE_XMM); } } #endif - #endif } +/* Note: the above function must be called before this one, so that + * gl_x86_cpu_features gets correctly initialized. + */ +void gl_init_all_x86_vertex_asm( void ) +{ +#ifdef USE_X86_ASM + if ( gl_x86_cpu_features ) { + gl_init_x86_vertex_asm(); + } + +#ifdef USE_3DNOW_ASM + if ( cpu_has_3dnow && getenv( "MESA_NO_3DNOW" ) == 0 ) { + gl_init_3dnow_vertex_asm(); + } +#endif + +#ifdef USE_KATMAI_ASM + if ( cpu_has_xmm && getenv( "MESA_NO_KATMAI" ) == 0 ) { + gl_init_katmai_vertex_asm(); + } +#endif +#endif +} diff --git a/xc/extras/Mesa/src/X86/common_x86_asm.S b/xc/extras/Mesa/src/X86/common_x86_asm.S new file mode 100644 index 000000000..fbde9e30d --- /dev/null +++ b/xc/extras/Mesa/src/X86/common_x86_asm.S @@ -0,0 +1,151 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Check extended CPU capabilities. Now justs returns the raw CPUID + * feature information, allowing the higher level code to interpret the + * results. + * + * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> + * + * Cleaned up and simplified by Gareth Hughes <gareth@valinux.com> + */ + +#include "assyntax.h" +#include "common_x86_features.h" + + +/* Intel vendor string + */ +#define GENU 0x756e6547 /* "Genu" */ +#define INEI 0x49656e69 /* "ineI" */ +#define NTEL 0x6c65746e /* "ntel" */ + +/* AMD vendor string + */ +#define AUTH 0x68747541 /* "Auth" */ +#define ENTI 0x69746e65 /* "enti" */ +#define CAMD 0x444d4163 /* "cAMD" */ + + + SEG_DATA + +/* We might want to print out some useful messages. + */ +LLBL( found_intel ): STRING( "Genuine Intel processor found\n\0" ) +LLBL( found_amd ): STRING( "Authentic AMD processor found\n\0" ) + + + SEG_TEXT + +ALIGNTEXT4 +GLOBL GLNAME( gl_identify_x86_cpu_features ) +GLNAME( gl_identify_x86_cpu_features ): + + PUSH_L ( EBX ) + + /* Test for the CPUID command. If the ID Flag bit in EFLAGS + * (bit 21) is writable, the CPUID command is present. + */ + PUSHF_L + POP_L ( EAX ) + MOV_L ( EAX, ECX ) + XOR_L ( CONST(0x00200000), EAX ) + PUSH_L ( EAX ) + POPF_L + PUSHF_L + POP_L ( EAX ) + + /* Verify the ID Flag bit has been written. + */ + CMP_L ( ECX, EAX ) + JZ ( LLBL ( cpuid_done ) ) + + /* Get the CPU vendor info. + */ + XOR_L ( EAX, EAX ) + CPUID + + /* Test for Intel processors. We must look for the + * "GenuineIntel" string in EBX, ECX and EDX. + */ + CMP_L ( CONST(GENU), EBX ) + JNE ( LLBL( cpuid_amd ) ) + CMP_L ( CONST(INEI), EDX ) + JNE ( LLBL( cpuid_amd ) ) + CMP_L ( CONST(NTEL), ECX ) + JNE ( LLBL( cpuid_amd ) ) + + /* We have an Intel processor, so we can get the feature + * information with an CPUID input value of 1. + */ + MOV_L ( CONST(0x1), EAX ) + CPUID + MOV_L ( EDX, EAX ) + JMP ( LLBL( cpuid_done ) ) + +LLBL( cpuid_amd ): + + /* Test for AMD processors. We must look for the + * "AuthenticAMD" string in EBX, ECX and EDX. + */ + CMP_L ( CONST(AUTH), EBX ) + JNE ( LLBL( cpuid_other ) ) + CMP_L ( CONST(ENTI), EDX ) + JNE ( LLBL( cpuid_other ) ) + CMP_L ( CONST(CAMD), ECX ) + JNE ( LLBL( cpuid_other ) ) + + /* We have an AMD processor, so we can get the feature + * information after we verify that the extended functions are + * supported. + */ + MOV_L ( CONST(0x80000000), EAX ) + CPUID + TEST_L ( EAX, EAX ) + JZ ( LLBL ( cpuid_failed ) ) + + MOV_L ( CONST(0x80000001), EAX ) + CPUID + MOV_L ( EDX, EAX ) + JMP ( LLBL ( cpuid_done ) ) + +LLBL( cpuid_other ): + + /* Test for other processors here when required. + */ + +LLBL( cpuid_failed ): + + /* If we can't determine the feature information, we must + * return zero to indicate that no platform-specific + * optimizations can be used. + */ + MOV_L ( CONST(0), EAX ) + +LLBL ( cpuid_done ): + + POP_L ( EBX ) + RET diff --git a/xc/extras/Mesa/src/X86/common_x86asm.h b/xc/extras/Mesa/src/X86/common_x86_asm.h index a9d901f62..f781d503e 100644 --- a/xc/extras/Mesa/src/X86/common_x86asm.h +++ b/xc/extras/Mesa/src/X86/common_x86_asm.h @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.4 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -23,23 +23,22 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /* - * Check CPU capabilities & initialize optimized funtions for this particular - * processor. + * Check CPU capabilities & initialize optimized funtions for this particular + * processor. + * + * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> + * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the + * new Katmai functions * - * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> - * Changed by Andre Werthmann <wertmann@cs.uni-potsdam.de> for using the - * new Katmai functions + * Reimplemented by Gareth Hughes <gareth@valinux.com> in a more + * future-proof manner, based on code in the Linux kernel. */ -#ifndef _common_3dnow_h -#define _common_3dnow_h -#define GL_CPU_GenuineIntel 1 -#define GL_CPU_MMX 2 -#define GL_CPU_3Dnow 4 -#define GL_CPU_Katmai 8 /* set if the katmai-instructions are available */ -#define GL_CPU_AnyX86 16 /* set if x86 asm allowed */ +#ifndef __COMMON_X86_ASM_H__ +#define __COMMON_X86_ASM_H__ + +#include "common_x86_features.h" #ifdef HAVE_CONFIG_H #include "conf.h" @@ -56,7 +55,8 @@ #endif extern int gl_x86_cpu_features; -extern void gl_init_all_x86_asm (void); -extern int gl_identify_x86_cpu_features(void); + +extern void gl_init_all_x86_transform_asm( void ); +extern void gl_init_all_x86_vertex_asm( void ); #endif diff --git a/xc/extras/Mesa/src/X86/common_x86_features.h b/xc/extras/Mesa/src/X86/common_x86_features.h new file mode 100644 index 000000000..b8e373d89 --- /dev/null +++ b/xc/extras/Mesa/src/X86/common_x86_features.h @@ -0,0 +1,76 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * x86 CPUID feature information. The raw data is returned by + * gl_identify_x86_cpu_features() and interpreted with the cpu_has_* + * helper macros. + * + * Gareth Hughes <gareth@valinux.com> + */ + +#ifndef __COMMON_X86_FEATURES_H__ +#define __COMMON_X86_FEATURES_H__ + +/* Capabilities of CPUs + */ +#define X86_FEATURE_FPU 0x00000001 +#define X86_FEATURE_VME 0x00000002 +#define X86_FEATURE_DE 0x00000004 +#define X86_FEATURE_PSE 0x00000008 +#define X86_FEATURE_TSC 0x00000010 +#define X86_FEATURE_MSR 0x00000020 +#define X86_FEATURE_PAE 0x00000040 +#define X86_FEATURE_MCE 0x00000080 +#define X86_FEATURE_CX8 0x00000100 +#define X86_FEATURE_APIC 0x00000200 +#define X86_FEATURE_10 0x00000400 +#define X86_FEATURE_SEP 0x00000800 +#define X86_FEATURE_MTRR 0x00001000 +#define X86_FEATURE_PGE 0x00002000 +#define X86_FEATURE_MCA 0x00004000 +#define X86_FEATURE_CMOV 0x00008000 +#define X86_FEATURE_PAT 0x00010000 +#define X86_FEATURE_PSE36 0x00020000 +#define X86_FEATURE_18 0x00040000 +#define X86_FEATURE_19 0x00080000 +#define X86_FEATURE_20 0x00100000 +#define X86_FEATURE_21 0x00200000 +#define X86_FEATURE_MMXEXT 0x00400000 +#define X86_FEATURE_MMX 0x00800000 +#define X86_FEATURE_FXSR 0x01000000 +#define X86_FEATURE_XMM 0x02000000 +#define X86_FEATURE_26 0x04000000 +#define X86_FEATURE_27 0x08000000 +#define X86_FEATURE_28 0x10000000 +#define X86_FEATURE_29 0x20000000 +#define X86_FEATURE_3DNOWEXT 0x40000000 +#define X86_FEATURE_3DNOW 0x80000000 + +#define cpu_has_mmx (gl_x86_cpu_features & X86_FEATURE_MMX) +#define cpu_has_xmm (gl_x86_cpu_features & X86_FEATURE_XMM) +#define cpu_has_3dnow (gl_x86_cpu_features & X86_FEATURE_3DNOW) + +#endif diff --git a/xc/extras/Mesa/src/X86/common_x86asm.S b/xc/extras/Mesa/src/X86/common_x86asm.S deleted file mode 100644 index 26b4f4ebf..000000000 --- a/xc/extras/Mesa/src/X86/common_x86asm.S +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 3.3 - * - * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Check extended CPU capabilities. - * - * Written by Holger Waechtler <holger@akaflieg.extern.tu-berlin.de> - */ - - - - -#include "assyntax.h" - - SEG_TEXT - - -#define GL_CPU_GenuineIntel CONST(1) -#define GL_CPU_MMX CONST(2) -#define GL_CPU_3Dnow CONST(4) -#define GL_CPU_Katmai CONST(8) - - -GLOBL GLNAME(gl_identify_x86_cpu_features) - ALIGNTEXT4 -GLNAME(gl_identify_x86_cpu_features): - - PUSH_L ( EBX ) - PUSH_L ( ESI ) - XOR_L ( ESI, ESI ) - - PUSHF_L /* test for the cpuid command */ - POP_L ( EAX ) /* (if the ID-Bit (Bit 21) in */ - MOV_L ( EAX, EBX ) /* EEFLAGS is writable, the */ - XOR_L ( CONST(0x00200000), EAX ) /* cpuid command is present) */ - PUSH_L ( EAX ) - POPF_L - PUSHF_L - POP_L ( EAX ) - CMP_L ( EBX, EAX ) - JZ ( LLBL ( Gidcpu_end ) ) /* cpuid command not supported */ - - XOR_L ( EAX, EAX ) - CPUID - - -LLBL (Gidcpu_intel): - CMP_L ( CONST(0x756e6547), EBX ) /* compare */ - JNE ( LLBL ( Gidcpu_amd ) ) /* vendor string == 'GenuineIntel' ? */ - CMP_L ( CONST(0x49656e69), EDX ) - JNE ( LLBL ( Gidcpu_amd ) ) - CMP_L ( CONST(0x6c65746e), ECX ) - JNE ( LLBL ( Gidcpu_amd ) ) - OR_L ( GL_CPU_GenuineIntel, ESI ) - JMP ( LLBL ( Gidcpu_katmai ) ) - - -LLBL ( Gidcpu_amd ): - CMP_L ( CONST(0x68747541), EBX ) /* compare vendor string == 'Auth'? */ - JNE ( LLBL ( Gidcpu_other ) ) - CMP_L ( CONST(0x69746e65), EDX ) /* 'enti' */ - JNE ( LLBL ( Gidcpu_other ) ) - CMP_L ( CONST(0x444d4163), ECX ) /* 'cAMD' (= 'AuthenticAMD') */ - JE ( LLBL ( Gidcpu_3dnow ) ) - - -LLBL ( Gidcpu_other ): - -/* insert here other vendorstrings with 3dnow capable cpu vendors */ - -JMP ( LLBL ( Gidcpu_mmx ) ) - -LLBL ( Gidcpu_katmai ): - MOV_L ( CONST(0x1), EAX ) - CPUID /* test for Katmai */ - TEST_L ( CONST(0x002000000), EDX ) /* is Bit 25 set ? */ - JZ ( LLBL ( Gidcpu_mmx ) ) /* streaming SIMD-extensions not found... */ - OR_L ( GL_CPU_Katmai, ESI ) /* Katmai detected ! :) */ - JMP ( LLBL ( Gidcpu_mmx ) ) /* check for mmx */ - -LLBL ( Gidcpu_3dnow ): - MOV_L ( CONST(0x80000000), EAX ) /* extended functions supported ? */ - - CPUID - TEST_L ( EAX, EAX ) - JZ ( LLBL ( Gidcpu_end ) ) - - MOV_L ( CONST(0x80000001), EAX ) - CPUID /* test for 3Dnow! */ - TEST_L ( CONST(0x80000000), EDX ) - JZ ( LLBL ( Gidcpu_mmx ) ) - OR_L ( GL_CPU_3Dnow, ESI ) /* (detected 3Dnow! extension) */ - -LLBL ( Gidcpu_mmx ): - MOV_L ( CONST(0x1), EAX ) - CPUID - TEST_L ( CONST(0x00800000), EDX ) /* Bit 23 set ? */ - JZ ( LLBL ( Gidcpu_end ) ) - OR_L ( GL_CPU_MMX, ESI ) /* (detected mmx extension) */ - - -LLBL ( Gidcpu_end ): - MOV_L ( ESI, EAX ) - STC - POP_L ( ESI ) - POP_L ( EBX ) - - RET - diff --git a/xc/extras/Mesa/src/X86/katmai.c b/xc/extras/Mesa/src/X86/katmai.c index 507a7208c..d0aa79e27 100644 --- a/xc/extras/Mesa/src/X86/katmai.c +++ b/xc/extras/Mesa/src/X86/katmai.c @@ -1,19 +1,20 @@ + /* * Mesa 3-D graphics library - * Version: 3.3 - * + * Version: 3.4 + * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -21,215 +22,199 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X86/katmai.c,v 1.4 2000/09/26 15:56:39 tsi Exp $ */ /* * PentiumIII-SIMD (SSE) optimizations contributed by * Andre Werthmann <wertmann@cs.uni-potsdam.de> */ -#if defined(USE_KATMAI_ASM) -#include "katmai.h" - -#include <limits.h> -#include <stdio.h> -#include <stdlib.h> -#include <math.h> - +#include "glheader.h" #include "context.h" #include "types.h" -#include "xform.h" #include "vertices.h" +#include "xform.h" +#include "katmai.h" #ifdef DEBUG #include "debug_xform.h" #endif +#define XFORM_ARGS GLvector4f *to_vec, \ + const GLfloat m[16], \ + const GLvector4f *from_vec, \ + const GLubyte *mask, \ + const GLubyte flag + + +#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS ); + + +#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \ + gl_transform_tab[cma][sz][MATRIX_GENERAL] = \ + gl_##pfx##_transform_points##sz##_general_##masked; \ + gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \ + gl_##pfx##_transform_points##sz##_identity_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \ + gl_##pfx##_transform_points##sz##_perspective_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D] = \ + gl_##pfx##_transform_points##sz##_2d_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D] = \ + gl_##pfx##_transform_points##sz##_3d_##masked; + + + +#define NORM_ARGS const GLmatrix *mat, \ + GLfloat scale, \ + const GLvector3f *in, \ + const GLfloat *lengths, \ + const GLubyte mask[], \ + GLvector3f *dest + + +#define DECLARE_NORM_GROUP( pfx, masked ) \ + extern void _ASMAPI gl_##pfx##_rescale_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_normalize_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normals_no_rot_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_rescale_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_rescale_normals_no_rot_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normalize_normals_##masked( NORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_normalize_normals_no_rot_##masked( NORM_ARGS ); + + +#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \ + gl_normal_tab[NORM_RESCALE][cma] = \ + gl_##pfx##_rescale_normals_##masked; \ + gl_normal_tab[NORM_NORMALIZE][cma] = \ + gl_##pfx##_normalize_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM][cma] = \ + gl_##pfx##_transform_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM_NO_ROT][cma] = \ + gl_##pfx##_transform_normals_no_rot_##masked; \ + gl_normal_tab[NORM_TRANSFORM | NORM_RESCALE][cma] = \ + gl_##pfx##_transform_rescale_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][cma] = \ + gl_##pfx##_transform_rescale_normals_no_rot_##masked; \ + gl_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE][cma] = \ + gl_##pfx##_transform_normalize_normals_##masked; \ + gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE][cma] = \ + gl_##pfx##_transform_normalize_normals_no_rot_##masked; + + +#ifdef USE_KATMAI_ASM +DECLARE_XFORM_GROUP( katmai, 1, raw ) +DECLARE_XFORM_GROUP( katmai, 2, raw ) +DECLARE_XFORM_GROUP( katmai, 3, raw ) + +DECLARE_XFORM_GROUP( katmai, 1, masked ) +DECLARE_XFORM_GROUP( katmai, 2, masked ) +DECLARE_XFORM_GROUP( katmai, 3, masked ) + +#if 1 +/* Some functions are not written in SSE-assembly, because the fpu ones are faster */ +extern void gl_katmai_transform_normals_no_rot_raw(NORM_ARGS); +extern void gl_katmai_transform_rescale_normals_raw(NORM_ARGS); +extern void gl_katmai_transform_rescale_normals_no_rot_raw(NORM_ARGS); + +extern void gl_katmai_transform_points4_general_raw(XFORM_ARGS); +extern void gl_katmai_transform_points4_general_masked(XFORM_ARGS); +extern void gl_katmai_transform_points4_identity_masked(XFORM_ARGS); +extern void gl_katmai_transform_points4_3d_no_rot_masked(XFORM_ARGS); +extern void gl_katmai_transform_points4_3d_raw(XFORM_ARGS); +extern void gl_katmai_transform_points4_3d_masked(XFORM_ARGS); +#else +DECLARE_NORM_GROUP( katmai, raw ) +#endif -#define XFORM_ARGS GLvector4f *to_vec, \ - const GLfloat m[16], \ - const GLvector4f *from_vec, \ - const GLubyte *mask, \ - const GLubyte flag - - - -#define DECLARE_XFORM_GROUP(pfx, v, masked) \ - extern void gl_##pfx##_transform_points##v##_general_##masked(XFORM_ARGS); \ - extern void gl_##pfx##_transform_points##v##_identity_##masked(XFORM_ARGS); \ - extern void gl_##pfx##_transform_points##v##_3d_no_rot_##masked(XFORM_ARGS); \ - extern void gl_##pfx##_transform_points##v##_perspective_##masked(XFORM_ARGS);\ - extern void gl_##pfx##_transform_points##v##_2d_##masked(XFORM_ARGS); \ - extern void gl_##pfx##_transform_points##v##_2d_no_rot_##masked(XFORM_ARGS); \ - extern void gl_##pfx##_transform_points##v##_3d_##masked(XFORM_ARGS); - - - -#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \ - gl_transform_tab[cma][vsize][MATRIX_GENERAL] \ - = gl_##pfx##_transform_points##vsize##_general_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_IDENTITY] \ - = gl_##pfx##_transform_points##vsize##_identity_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] \ - = gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] \ - = gl_##pfx##_transform_points##vsize##_perspective_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D] \ - = gl_##pfx##_transform_points##vsize##_2d_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] \ - = gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D] \ - = gl_##pfx##_transform_points##vsize##_3d_##masked; - - - - -#define NORM_ARGS const GLmatrix *mat, \ - GLfloat scale, \ - const GLvector3f *in, \ - const GLfloat *lengths, \ - const GLubyte mask[], \ - GLvector3f *dest - - - -#define DECLARE_NORM_GROUP(pfx, masked) \ - extern void gl_##pfx##_rescale_normals_##masked## (NORM_ARGS); \ - extern void gl_##pfx##_normalize_normals_##masked## (NORM_ARGS); \ - extern void gl_##pfx##_transform_normals_##masked## (NORM_ARGS); \ - extern void gl_##pfx##_transform_normals_no_rot_##masked## (NORM_ARGS); \ - extern void gl_##pfx##_transform_rescale_normals_##masked## (NORM_ARGS); \ - extern void gl_##pfx##_transform_rescale_normals_no_rot_##masked## (NORM_ARGS); \ - extern void gl_##pfx##_transform_normalize_normals_##masked## (NORM_ARGS); \ - extern void gl_##pfx##_transform_normalize_normals_no_rot_##masked## (NORM_ARGS); - - - -#define ASSIGN_NORM_GROUP( pfx, cma, masked ) \ - gl_normal_tab[NORM_RESCALE][cma] = \ - gl_##pfx##_rescale_normals_##masked##; \ - gl_normal_tab[NORM_NORMALIZE][cma] = \ - gl_##pfx##_normalize_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM][cma] = \ - gl_##pfx##_transform_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM_NO_ROT][cma] = \ - gl_##pfx##_transform_normals_no_rot_##masked##; \ - gl_normal_tab[NORM_TRANSFORM | NORM_RESCALE][cma] = \ - gl_##pfx##_transform_rescale_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][cma] = \ - gl_##pfx##_transform_rescale_normals_no_rot_##masked##; \ - gl_normal_tab[NORM_TRANSFORM | NORM_NORMALIZE][cma] = \ - gl_##pfx##_transform_normalize_normals_##masked##; \ - gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE][cma] = \ - gl_##pfx##_transform_normalize_normals_no_rot_##masked##; - - -extern void gl_katmai_project_vertices( GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride ); - -extern void gl_katmai_project_clipped_vertices( GLfloat *first, +extern void _ASMAPI gl_v16_katmai_general_xform( GLfloat *first_vert, + const GLfloat *m, + const GLfloat *src, + GLuint src_stride, + GLuint count ); +extern void _ASMAPI gl_katmai_project_vertices( GLfloat *first, GLfloat *last, const GLfloat *m, - GLuint stride, - const GLubyte *clipmask ); - -extern void gl_v16_katmai_general_xform( GLfloat *first_vert, - const GLfloat *m, - const GLfloat *src, - GLuint src_stride, - GLuint count ); + GLuint stride ); +extern void _ASMAPI gl_katmai_project_clipped_vertices( GLfloat *first, + GLfloat *last, + const GLfloat *m, + GLuint stride, + const GLubyte *clipmask ); +#endif -void gl_init_katmai_asm_transforms (void) +void gl_init_katmai_transform_asm( void ) { - extern void gl_katmai_transform_rescale_normals_raw(NORM_ARGS); - extern void gl_katmai_transform_rescale_normals_no_rot_raw(NORM_ARGS); - extern void gl_katmai_transform_normals_no_rot_raw(NORM_ARGS); - - /* Some functions are not written in SSE-assembly, because the fpu ones are faster */ - extern void gl_katmai_transform_points4_general_raw(XFORM_ARGS); - extern void gl_katmai_transform_points4_general_masked(XFORM_ARGS); - extern void gl_katmai_transform_points4_identity_masked(XFORM_ARGS); - extern void gl_katmai_transform_points4_3d_no_rot_masked(XFORM_ARGS); - extern void gl_katmai_transform_points4_3d_raw(XFORM_ARGS); - extern void gl_katmai_transform_points4_3d_masked(XFORM_ARGS); - +#ifdef USE_KATMAI_ASM + ASSIGN_XFORM_GROUP( katmai, 0, 1, raw ); + ASSIGN_XFORM_GROUP( katmai, 0, 2, raw ); + ASSIGN_XFORM_GROUP( katmai, 0, 3, raw ); - DECLARE_XFORM_GROUP( katmai, 1, raw ) - DECLARE_XFORM_GROUP( katmai, 1, masked ) + ASSIGN_XFORM_GROUP( katmai, CULL_MASK_ACTIVE, 1, masked ); + ASSIGN_XFORM_GROUP( katmai, CULL_MASK_ACTIVE, 2, masked ); + ASSIGN_XFORM_GROUP( katmai, CULL_MASK_ACTIVE, 3, masked ); - DECLARE_XFORM_GROUP( katmai, 2, raw ) - DECLARE_XFORM_GROUP( katmai, 2, masked ) - - DECLARE_XFORM_GROUP( katmai, 3, raw ) - DECLARE_XFORM_GROUP( katmai, 3, masked ) - - - gl_normal_tab[NORM_TRANSFORM | NORM_RESCALE][0]=gl_katmai_transform_rescale_normals_raw; - gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][0]=gl_katmai_transform_rescale_normals_no_rot_raw; - gl_normal_tab[NORM_TRANSFORM_NO_ROT][0]=gl_katmai_transform_normals_no_rot_raw; - - - gl_transform_tab[0][4][MATRIX_GENERAL]=gl_katmai_transform_points4_general_raw; - gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_GENERAL]=gl_katmai_transform_points4_general_masked; - - gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_IDENTITY]=gl_katmai_transform_points4_identity_masked; - - gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_3D_NO_ROT]=gl_katmai_transform_points4_3d_no_rot_masked; - - gl_transform_tab[0][4][MATRIX_3D]=gl_katmai_transform_points4_3d_raw; - gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_3D]=gl_katmai_transform_points4_3d_masked; - - - ASSIGN_XFORM_GROUP( katmai, 0, 1, raw ) - ASSIGN_XFORM_GROUP( katmai, CULL_MASK_ACTIVE, 1, masked ) - - ASSIGN_XFORM_GROUP( katmai, 0, 2, raw ) - ASSIGN_XFORM_GROUP( katmai, CULL_MASK_ACTIVE, 2, masked ) - - ASSIGN_XFORM_GROUP( katmai, 0, 3, raw ) - ASSIGN_XFORM_GROUP( katmai, CULL_MASK_ACTIVE, 3, masked ) - - - /* TODO ! (some parts of it) */ - - /* - DECLARE_NORM_GROUP( katmai, raw ) - - ASSIGN_NORM_GROUP( katmai, 0, raw ) - */ +#if 1 + /* TODO: Finish these off. + */ + gl_normal_tab[NORM_TRANSFORM_NO_ROT][0] = + gl_katmai_transform_normals_no_rot_raw; + gl_normal_tab[NORM_TRANSFORM | NORM_RESCALE][0] = + gl_katmai_transform_rescale_normals_raw; + gl_normal_tab[NORM_TRANSFORM_NO_ROT | NORM_RESCALE][0] = + gl_katmai_transform_rescale_normals_no_rot_raw; + + gl_transform_tab[0][4][MATRIX_GENERAL] = + gl_katmai_transform_points4_general_raw; + gl_transform_tab[0][4][MATRIX_3D] = + gl_katmai_transform_points4_3d_raw; + + gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_GENERAL] = + gl_katmai_transform_points4_general_masked; + gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_IDENTITY] = + gl_katmai_transform_points4_identity_masked; + gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_3D_NO_ROT] = + gl_katmai_transform_points4_3d_no_rot_masked; + gl_transform_tab[CULL_MASK_ACTIVE][4][MATRIX_3D] = + gl_katmai_transform_points4_3d_masked; +#else + ASSIGN_NORM_GROUP( katmai, 0, raw ); +#endif #ifdef DEBUG - gl_test_all_transform_functions("Katmai!"); - gl_test_all_normal_transform_functions("Katmai!"); + gl_test_all_transform_functions( "SSE" ); + gl_test_all_normal_transform_functions( "SSE" ); +#endif #endif - - /* done */ - - /* Hook in some stuff for vertices.c. - */ - gl_xform_points3_v16_general = gl_v16_katmai_general_xform; - - /* test, if it works correctly ! */ - gl_project_v16 = gl_katmai_project_vertices; - - /* test, if it works correctly ! */ - gl_project_clipped_v16 = gl_katmai_project_clipped_vertices; } -#else - - -/* silence compiler warning */ -extern void _mesa_katmai_dummy_function(void); -void _mesa_katmai_dummy_function(void) +void gl_init_katmai_vertex_asm( void ) { -} - +#ifdef USE_KATMAI_ASM + gl_xform_points3_v16_general = gl_v16_katmai_general_xform; +#if 0 + /* GH: These are broken. I'm fixing them now. + */ + gl_project_v16 = gl_katmai_project_vertices; + gl_project_clipped_v16 = gl_katmai_project_clipped_vertices; +#endif +#if 0 + gl_test_all_vertex_functions( "SSE" ); +#endif #endif +} diff --git a/xc/extras/Mesa/src/X86/katmai.h b/xc/extras/Mesa/src/X86/katmai.h index 40d769f4d..0deeb629b 100644 --- a/xc/extras/Mesa/src/X86/katmai.h +++ b/xc/extras/Mesa/src/X86/katmai.h @@ -1,19 +1,20 @@ + /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.4 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -22,22 +23,17 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - /* * PentiumIII-SIMD (SSE) optimizations contributed by * Andre Werthmann <wertmann@cs.uni-potsdam.de> */ - #ifndef __KATMAI_H__ #define __KATMAI_H__ - - #include "xform.h" +void gl_init_katmai_transform_asm( void ); +void gl_init_katmai_vertex_asm( void ); -void gl_init_katmai_asm_transforms (void); - - -#endif /* __KATMAI_H__ */ +#endif diff --git a/xc/extras/Mesa/src/X86/katmai_norm_raw.S b/xc/extras/Mesa/src/X86/katmai_norm_raw.S index fdd720de7..02fafd8aa 100644 --- a/xc/extras/Mesa/src/X86/katmai_norm_raw.S +++ b/xc/extras/Mesa/src/X86/katmai_norm_raw.S @@ -3,9 +3,9 @@ * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -28,15 +28,15 @@ #define V3F_COUNT 8 #define V3F_STRIDE 12 -#define MAT_INV 64 +#define MAT_INV 4 #define M(i) REGOFF(i * 4, EDX) #define S(i) REGOFF(i * 4, ESI) #define D(i) REGOFF(i * 4, EDI) #define STRIDE REGOFF(12, ESI) - + #undef FRAME_OFFSET - + /* * Offsets for norm_func arguments * @@ -107,7 +107,7 @@ LLBL(K_G3TRNNRR_finish): RET #undef FRAME_OFFSET - + ALIGNTEXT16 GLOBL GLNAME(gl_katmai_transform_rescale_normals_raw) @@ -136,7 +136,7 @@ GLNAME(gl_katmai_transform_rescale_normals_raw): MOV_L( REGOFF(V3F_START, EDI), EDI ) /* ptr to first dest vertex */ ADD_L( EDI, ECX ) /* count += dest ptr */ - + ALIGNTEXT32 MOVSS ( M(0), XMM0 ) /* m0 */ MOVSS ( M(4), XMM1 ) /* m4 */ @@ -144,7 +144,7 @@ ALIGNTEXT32 MOVSS ( ARG_SCALE, XMM4 ) /* scale */ SHUFPS ( CONST(0x0), XMM4, XMM4 ) /* scale | scale */ - + MULPS ( XMM4, XMM0 ) /* m4*scale | m0*scale */ MOVSS ( M(1), XMM1 ) /* m1 */ MOVSS ( M(5), XMM2 ) /* m5 */ @@ -159,7 +159,7 @@ ALIGNTEXT32 MULSS ( ARG_SCALE, XMM6 ) /* m8*scale */ MOVSS ( M(9), XMM7 ) /* m9 */ MULSS ( ARG_SCALE, XMM7 ) /* m9*scale */ - + ALIGNTEXT32 LLBL(K_G3TRNR_top): MOVSS ( S(0), XMM3 ) /* ux */ @@ -187,7 +187,7 @@ LLBL(K_G3TRNR_top): ADDSS ( XMM4, XMM3 ) ADDSS ( XMM5, XMM3 ) MOVSS ( XMM3, D(2) ) - + LLBL(K_G3TRNR_skip): ADD_L ( CONST(12), EDI ) ADD_L ( EAX, ESI ) @@ -234,7 +234,7 @@ ALIGNTEXT32 MOVSS( M(5), XMM1 ) /* m5 */ UNPCKLPS( XMM1, XMM0 ) /* m5 | m0 */ MOVSS( M(10), XMM1 ) /* m10 */ - + ALIGNTEXT32 LLBL(K_G3TNNRR_top): MOVLPS( S(0), XMM2 ) /* uy | ux */ @@ -244,7 +244,7 @@ LLBL(K_G3TNNRR_top): MOVSS( S(2), XMM2 ) /* uz */ MULSS( XMM1, XMM2 ) /* uz*m10 */ MOVSS( XMM2, D(2) ) - + LLBL(K_G3TNNRR_skip): ADD_L ( CONST(12), EDI ) ADD_L ( EAX, ESI ) @@ -256,4 +256,3 @@ LLBL(K_G3TNNRR_finish): POP_L ( ESI ) RET #undef FRAME_OFFSET - diff --git a/xc/extras/Mesa/src/X86/vertex_katmai.S b/xc/extras/Mesa/src/X86/katmai_vertex.S index a8b08902a..90fe52fae 100644 --- a/xc/extras/Mesa/src/X86/vertex_katmai.S +++ b/xc/extras/Mesa/src/X86/katmai_vertex.S @@ -1,17 +1,16 @@ #include "assyntax.h" + SEG_TEXT + #define MAT_SY 20 #define MAT_SZ 40 #define MAT_TX 48 #define MAT_TY 52 #define MAT_TZ 56 - - - SEG_TEXT /* - * void gl_v16_katmai_general_xform (GLfloat *dest, + * void gl_v16_katmai_general_xform( GLfloat *dest, * const GLfloat *m, * const GLfloat *src, * GLuint src_stride, @@ -23,51 +22,51 @@ ALIGNTEXT16 GLOBL GLNAME( gl_v16_katmai_general_xform ) GLNAME( gl_v16_katmai_general_xform ): - PUSH_L( EDI ) - PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( ESI ) - MOV_L( REGOFF(12, ESP), EAX ) /* f - destination */ - MOV_L( REGOFF(16, ESP), ESI ) /* m - matrix */ - MOV_L( REGOFF(20, ESP), EDX ) /* obj - source */ - MOV_L( REGOFF(24, ESP), EDI ) /* obj_stride */ - MOV_L( REGOFF(28, ESP), ECX ) /* count */ + MOV_L( REGOFF(12, ESP), EAX ) /* destination */ + MOV_L( REGOFF(16, ESP), ESI ) /* matrix */ + MOV_L( REGOFF(20, ESP), EDX ) /* source */ + MOV_L( REGOFF(24, ESP), EDI ) /* src_stride */ + MOV_L( REGOFF(28, ESP), ECX ) /* count */ - MOVUPS( REGOFF(0, ESI), XMM4 ) /* x14, x13, x12, x11 => xmm4 */ - MOVUPS( REGOFF(16, ESI), XMM5 ) /* x24, x23, x22, x21 => xmm5 */ - MOVUPS( REGOFF(32, ESI), XMM6 ) /* x34, x33, x32, x31 => xmm6 */ - MOVUPS( REGOFF(48, ESI), XMM7 ) /* x44, x43, x42, x41 => xmm7 */ + MOVAPS( REGOFF(0, ESI), XMM4 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( REGOFF(16, ESI), XMM5 ) /* m7 | m6 | m5 | m4 */ + MOVAPS( REGOFF(32, ESI), XMM6 ) /* m11 | m10 | m9 | m8 */ + MOVAPS( REGOFF(48, ESI), XMM7 ) /* m15 | m14 | m13 | m12 */ ALIGNTEXT32 -LLBL(loop1): +LLBL(v16_katmai_general_loop): - MOVSS( REGOFF(0, EDX), XMM0 ) /* s1 => xmm0 */ - SHUFPS( CONST(0x0), XMM0, XMM0 ) /* xmm0 shuffle to other 96 bits */ - MULPS( XMM4, XMM0 ) /* x14*s1, x13*s1, x12*s1, x11*s1 */ + MOVSS( REGOFF(0, EDX), XMM0 ) /* | | | x0 */ + SHUFPS( CONST(0x0), XMM0, XMM0 ) /* x0 | x0 | x0 | x0 */ + MULPS( XMM4, XMM0 ) /* x0*m3 | x0*m2 | x0*m1 | x0*m0 */ - MOVSS( REGOFF(4, EDX), XMM1 ) /* s2 => xmm1 */ - SHUFPS( CONST(0x0), XMM1, XMM1 ) /* xmm1 shuffle to other 96 bits */ - MULPS( XMM5, XMM1 ) /* x24*s2, x23*s2, x22*s2, x21*s2 */ + MOVSS( REGOFF(4, EDX), XMM1 ) /* | | | x1 */ + SHUFPS( CONST(0x0), XMM1, XMM1 ) /* x1 | x1 | x1 | x1 */ + MULPS( XMM5, XMM1 ) /* x1*m7 | x1*m6 | x1*m5 | x1*m4 */ - MOVSS( REGOFF(8, EDX), XMM2 ) /* s3 => xmm2 */ - SHUFPS( CONST(0x0), XMM2, XMM2 ) /* xmm2 shuffle to other 96 bits */ - MULPS( XMM6, XMM2 ) /* x34*s3, x33*s3, x32*s3, x31*s3 */ + MOVSS( REGOFF(8, EDX), XMM2 ) /* | | | x2 */ + SHUFPS( CONST(0x0), XMM2, XMM2 ) /* x2 | x2 | x2 | x2 */ + MULPS( XMM6, XMM2 ) /* x2*m11 | x2*m10 | x2*m9 | x2*m8 */ - ADDPS( XMM1, XMM0 ) /* xmm0 + xmm1 => xmm0 */ - ADDPS( XMM2, XMM0 ) /* xmm0 + xmm2 => xmm0 */ - ADDPS( XMM7, XMM0 ) /* xmm0 + xmm7 => xmm0 */ + ADDPS( XMM1, XMM0 ) + ADDPS( XMM2, XMM0 ) + ADDPS( XMM7, XMM0 ) /* r3 | r2 | r1 | r1 */ - MOVUPS( XMM0, REGOFF(0, EAX) ) /* xmm0 => dest( f11, f12, f13, f14 ) */ + MOVAPS( XMM0, REGOFF(0, EAX) ) - ADD_L ( CONST(64), EAX ) /* f + 64 bytes => f */ - ADD_L ( EDI, EDX ) /* obj + obj_stride => obj */ + ADD_L( CONST(64), EAX ) /* next output vertex */ + ADD_L( EDI, EDX ) /* next input vertex */ - DEC_L ( ECX ) /* count - 1 => count */ - JNE ( LLBL(loop1) ) /* if not zero, go back to LABL(loop1) */ + DEC_L( ECX ) + JNE( LLBL(v16_katmai_general_loop) ) - POP_L( ESI ) - POP_L( EDI ) + POP_L( ESI ) + POP_L( EDI ) - RET + RET /* void gl_katmai_project_vertices(GLfloat *first, @@ -80,7 +79,7 @@ LLBL(loop1): ALIGNTEXT16 GLOBL GLNAME( gl_katmai_project_vertices ) GLNAME( gl_katmai_project_vertices ): - + PUSH_L( EBP ) MOV_L( REGOFF(8, ESP), ECX ) /* first_vert */ @@ -88,9 +87,9 @@ GLNAME( gl_katmai_project_vertices ): MOV_L( REGOFF(16, ESP), EBP ) /* matrix */ MOV_L( REGOFF(20, ESP), EAX ) /* stride */ - + ALIGNTEXT32 - MOVUPS( REGOFF(MAT_TX, EBP), XMM0 ) /* (x44), x43, x42, x41 => xmm0 */ + MOVAPS( REGOFF(MAT_TX, EBP), XMM0 ) /* (x44), x43, x42, x41 => xmm0 */ MOVSS( REGOFF(0, EBP), XMM1 ) /* -, -, -, x11 => xmm1 */ UNPCKLPS( REGOFF(MAT_SY, EBP), XMM1 ) /* -, -, x22, x11 => xmm1 */ SHUFPS( CONST(0x44), REGOFF(MAT_SZ, EBP), XMM1 ) @@ -99,7 +98,7 @@ ALIGNTEXT32 SUB_L( ECX, EDX ) /* last -= first */ LLBL(v16_katmai_pv_loop_start): - MOVUPS( REGOFF(0, ECX), XMM3 ) /* f[3], f[2], f[1], f[0] */ + MOVAPS( REGOFF(0, ECX), XMM3 ) /* f[3], f[2], f[1], f[0] */ MOVSS( REGOFF(12, ECX), XMM2 ) /* -, -, -, f[3] */ SHUFPS( CONST(0x0), XMM2, XMM2 ) /* f[3], f[3], f[3], f[3] */ @@ -109,7 +108,7 @@ LLBL(v16_katmai_pv_loop_start): MULPS( XMM3, XMM1 ) /* -, x33*1/f[3]*f[2]... */ ADDPS( XMM0, XMM1 ) /* -, x33*1/f[3]*f[2]+x43... */ - MOVUPS( XMM1, REGOFF(0, ECX) ) /* back to f */ + MOVAPS( XMM1, REGOFF(0, ECX) ) /* back to f */ MOVSS( XMM2, REGOFF(12, ECX) ) /* 1/f[3] into f[3] ! */ @@ -133,7 +132,7 @@ LLBL(v16_katmai_pv_loop_start): ALIGNTEXT16 GLOBL GLNAME( gl_katmai_project_clipped_vertices ) GLNAME( gl_katmai_project_clipped_vertices ): - + PUSH_L( EBP ) PUSH_L( ESI ) @@ -142,12 +141,12 @@ GLNAME( gl_katmai_project_clipped_vertices ): MOV_L( REGOFF(20, ESP), EBP ) /* matrix */ MOV_L( REGOFF(24, ESP), EAX ) /* stride */ MOV_L( REGOFF(28, ESP), ESI ) /* clip_mask */ - - + + ALIGNTEXT32 - MOVUPS( REGOFF(MAT_TX, EBP), XMM0 ) /* (x44), x43, x42, x41 => xmm0 */ + MOVAPS( REGOFF(MAT_TX, EBP), XMM0 ) /* (x44), x43, x42, x41 => xmm0 */ MOVSS( REGOFF(0, EBP), XMM1 ) /* -, -, -, x11 => xmm1 */ UNPCKLPS( REGOFF(MAT_SY, EBP), XMM1 ) /* -, -, x22, x11 => xmm1 */ SHUFPS( CONST(0x44), REGOFF(MAT_SZ, EBP), XMM1 ) @@ -157,7 +156,7 @@ LLBL(v16_katmai_pcv_loop_start): CMP_B ( CONST(0), REGIND(ESI) ) /* clip_mask == 0 ? */ JNE( LLBL(v16_katmai_pcv_skip) ) /* no -> skip ! */ - MOVUPS( REGOFF(0, ECX), XMM3 ) /* f[3], f[2], f[1], f[0] */ + MOVAPS( REGOFF(0, ECX), XMM3 ) /* f[3], f[2], f[1], f[0] */ MOVSS( REGOFF(12, ECX), XMM2 ) /* -, -, -, f[3] */ SHUFPS( CONST(0x0), XMM2, XMM2 ) /* f[3], f[3], f[3], f[3] */ @@ -167,7 +166,7 @@ LLBL(v16_katmai_pcv_loop_start): MULPS( XMM3, XMM1 ) /* -, x33*1/f[3]*f[2]... */ ADDPS( XMM0, XMM1 ) /* -, x33*1/f[3]*f[2]+x43... */ - MOVUPS( XMM1, REGOFF(0, ECX) ) /* back to f */ + MOVAPS( XMM1, REGOFF(0, ECX) ) /* back to f */ MOVSS( XMM2, REGOFF(12, ECX) ) /* 1/f[3] into f[3] ! */ LLBL(v16_katmai_pcv_skip): @@ -178,9 +177,6 @@ LLBL(v16_katmai_pcv_skip): JNE( LLBL(v16_katmai_pcv_loop_start) ) /* no -> go on with next vertex */ - POP_L( ESI ) + POP_L( ESI ) POP_L( EBP ) RET - - - diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked1.S b/xc/extras/Mesa/src/X86/katmai_xform_masked1.S index 0408fcf81..4f5f62b80 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked1.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked1.S @@ -1,14 +1,11 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_masked1.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -34,9 +31,9 @@ /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ @@ -68,7 +65,7 @@ GLNAME( gl_katmai_transform_points1_general_masked ): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -89,19 +86,19 @@ GLNAME( gl_katmai_transform_points1_general_masked ): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ ALIGNTEXT32 LLBL(K_GTP1GM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP1GM_skip) ) - + MOVSS( S(0), XMM2 ) /* ox */ SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ ADDPS( XMM1, XMM2 ) /* + | + | + | + */ - MOVUPS( XMM2, D(0) ) + MOVAPS( XMM2, D(0) ) LLBL(K_GTP1GM_skip): INC_L ( EBP ) @@ -195,7 +192,7 @@ GLNAME(gl_katmai_transform_points1_3d_no_rot_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -220,12 +217,12 @@ ALIGNTEXT32 MOVSS( M(12), XMM1 ) /* m12 */ MOVSS( M(13), XMM2 ) /* m13 */ MOVSS( M(14), XMM3 ) /* m14 */ - + ALIGNTEXT32 LLBL(K_GTP13DNRM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP13DNRM_skip) ) - + MOVSS( S(0), XMM4 ) /* ox */ MULSS( XMM0, XMM4 ) /* ox*m0 */ ADDSS( XMM1, XMM4 ) /* ox*m0+m12 */ @@ -248,7 +245,7 @@ LLBL(K_GTP13DNRM_finish): POP_L ( ESI ) RET #undef FRAME_OFFSET - + ALIGNTEXT4 GLOBL GLNAME(gl_katmai_transform_points1_perspective_masked) @@ -287,12 +284,12 @@ ALIGNTEXT32 XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ MOVSS( M(0), XMM1 ) /* m0 */ MOVSS( M(14), XMM2 ) /* m14 */ - + ALIGNTEXT32 LLBL(K_GTP13PM_top): TEST_B ( BL, REGIND(EBP) ) JNZ ( LLBL(K_GTP13PM_skip) ) - + MOVSS( S(0), XMM3 ) /* ox */ MULSS( XMM1, XMM3 ) /* ox*m0 */ MOVSS( XMM3, D(0) ) /* ox*m0->D(0) */ @@ -300,7 +297,7 @@ LLBL(K_GTP13PM_top): MOVSS( XMM0, D(1) ) MOVSS( XMM0, D(3) ) - + LLBL(K_GTP13PM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) @@ -326,7 +323,7 @@ GLNAME(gl_katmai_transform_points1_2d_masked): PUSH_L( EDI ) PUSH_L( EBX ) PUSH_L( EBP ) - + MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ @@ -353,18 +350,18 @@ GLNAME(gl_katmai_transform_points1_2d_masked): ALIGNTEXT32 MOVLPS( M(0), XMM0 ) /* m1 | m0 */ MOVLPS( M(12), XMM1 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP13P2DM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP13P2DM_skip) ) - + MOVSS( S(0), XMM2 ) /* ox */ SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */ ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */ MOVLPS( XMM2, D(0) ) - + LLBL(K_GTP13P2DM_skip): INC_L ( EBP ) ADD_L ( CONST(16), EDI ) @@ -396,7 +393,7 @@ GLNAME(gl_katmai_transform_points1_2d_no_rot_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -419,18 +416,18 @@ ALIGNTEXT32 MOVSS( M(0), XMM0 ) /* m0 */ MOVSS( M(12), XMM1 ) /* m12 */ MOVSS( M(13), XMM2 ) /* m13 */ - + ALIGNTEXT32 LLBL(K_GTP13P2DNRM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP13P2DNRM_skip) ) - + MOVSS( S(0), XMM3 ) /* ox */ MULSS( XMM0, XMM3 ) /* ox*m0 */ ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */ MOVSS( XMM3, D(0) ) MOVSS( XMM2, D(1) ) - + LLBL(K_GTP13P2DNRM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) @@ -462,7 +459,7 @@ GLNAME(gl_katmai_transform_points1_3d_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -483,14 +480,14 @@ GLNAME(gl_katmai_transform_points1_3d_masked): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ - -ALIGNTEXT32 + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ + +ALIGNTEXT32 LLBL(K_GTP13P3DM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP13P3DM_skip) ) - + MOVSS( S(0), XMM2 ) /* ox */ SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ MULPS( XMM0, XMM2 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ @@ -498,7 +495,7 @@ LLBL(K_GTP13P3DM_top): MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/ UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */ MOVSS( XMM2, D(2) ) /* ->D(2) */ - + LLBL(K_GTP13P3DM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked2.S b/xc/extras/Mesa/src/X86/katmai_xform_masked2.S index bfaaa5400..2ede85f2b 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked2.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked2.S @@ -1,14 +1,11 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_masked2.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -34,9 +31,9 @@ /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ @@ -68,13 +65,13 @@ GLNAME( gl_katmai_transform_points2_general_masked ): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ TEST_L( ECX, ECX ) JZ( LLBL(K_GTP2GM_finish) ) /* count was zero; go to finish */ - + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ @@ -89,15 +86,15 @@ GLNAME( gl_katmai_transform_points2_general_masked ): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ - MOVUPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ + MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ ALIGNTEXT32 LLBL(K_GTP2GM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP2GM_skip) ) - + MOVSS( S(0), XMM3 ) /* ox */ SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox | ox */ MULPS( XMM0, XMM3 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ @@ -107,8 +104,8 @@ LLBL(K_GTP2GM_top): ADDPS( XMM4, XMM3 ) ADDPS( XMM2, XMM3 ) - MOVUPS( XMM3, D(0) ) - + MOVAPS( XMM3, D(0) ) + LLBL(K_GTP2GM_skip): INC_L ( EBP ) ADD_L ( CONST(16), EDI ) @@ -145,7 +142,7 @@ GLNAME( gl_katmai_transform_points2_identity_masked ): MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ @@ -165,12 +162,12 @@ ALIGNTEXT32 LLBL(K_GTP2IM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP2IM_skip) ) - + MOV_L( S(0), EDX ) MOV_L( EDX, D(0) ) MOV_L( S(1), EDX ) MOV_L( EDX, D(1) ) - + LLBL(K_GTP2IM_skip): INC_L ( EBP ) ADD_L ( CONST(16), EDI ) @@ -202,7 +199,7 @@ GLNAME(gl_katmai_transform_points2_3d_no_rot_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -256,7 +253,7 @@ LLBL(K_GTP23DNRM_finish): RET #undef FRAME_OFFSET - + ALIGNTEXT4 GLOBL GLNAME(gl_katmai_transform_points2_perspective_masked) GLNAME(gl_katmai_transform_points2_perspective_masked): @@ -272,7 +269,7 @@ GLNAME(gl_katmai_transform_points2_perspective_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -297,18 +294,18 @@ ALIGNTEXT32 UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ MOVSS ( M(14), XMM3 ) /* m14 */ XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ - + ALIGNTEXT32 LLBL(K_GTP23PM_top): TEST_B ( BL, REGIND(EBP) ) JNZ ( LLBL(K_GTP23PM_skip) ) - + MOVLPS( S(0), XMM4 ) /* oy | ox */ MULPS( XMM1, XMM4 ) /* oy*m5 | ox*m0 */ MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */ MOVSS( XMM3, D(2) ) /* ->D(2) */ MOVSS( XMM0, D(3) ) /* ->D(3) */ - + LLBL(K_GTP23PM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) @@ -341,7 +338,7 @@ GLNAME(gl_katmai_transform_points2_2d_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -364,7 +361,7 @@ ALIGNTEXT32 MOVLPS( M(0), XMM0 ) /* m1 | m0 */ MOVLPS( M(4), XMM1 ) /* m5 | m4 */ MOVLPS( M(12), XMM2 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP23P2DM_top): TEST_B( BL, REGIND(EBP) ) @@ -381,7 +378,7 @@ LLBL(K_GTP23P2DM_top): ADDPS( XMM4, XMM3 ) ADDPS( XMM2, XMM3 ) MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ - + LLBL(K_GTP23P2DM_skip): INC_L ( EBP ) ADD_L ( CONST(16), EDI ) @@ -414,7 +411,7 @@ GLNAME(gl_katmai_transform_points2_2d_no_rot_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -438,7 +435,7 @@ ALIGNTEXT32 MOVSS ( M(5), XMM2 ) /* m5 */ UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP23P2DNRM_top): TEST_B( BL, REGIND(EBP) ) @@ -448,7 +445,7 @@ LLBL(K_GTP23P2DNRM_top): MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ - + LLBL(K_GTP23P2DNRM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) @@ -481,7 +478,7 @@ GLNAME(gl_katmai_transform_points2_3d_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -501,11 +498,11 @@ GLNAME(gl_katmai_transform_points2_3d_masked): ADD_L( EDI, ECX ) /* count += dest ptr */ ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m6 | m5 | m4 */ - MOVUPS( M(12), XMM2 ) /* m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ + MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */ -ALIGNTEXT32 +ALIGNTEXT32 LLBL(K_GTP23P3DM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP23P3DM_skip) ) @@ -524,7 +521,7 @@ LLBL(K_GTP23P3DM_top): MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ UNPCKHPS( XMM3, XMM3 ) MOVSS( XMM3, D(2) ) /* ->D(2) */ - + LLBL(K_GTP23P3DM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked3.S b/xc/extras/Mesa/src/X86/katmai_xform_masked3.S index cace27fc1..6bb5659b3 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked3.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked3.S @@ -1,14 +1,11 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_masked3.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -34,9 +31,9 @@ /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ @@ -88,10 +85,10 @@ GLNAME( gl_katmai_transform_points3_general_masked ): ALIGNTEXT32 - MOVUPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */ - MOVUPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */ - MOVUPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */ - MOVUPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */ + MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */ + MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */ + MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */ + MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */ ALIGNTEXT32 @@ -114,7 +111,7 @@ LLBL(K_GTPGM_top): ADDPS ( XMM6, XMM4 ) ADDPS ( XMM3, XMM4 ) - MOVUPS ( XMM4, REGOFF(0, EDI) ) + MOVAPS ( XMM4, REGOFF(0, EDI) ) LLBL(K_GTPGM_skip): INC_L ( EBP ) @@ -238,7 +235,7 @@ ALIGNTEXT32 MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ MOVSS ( M(10), XMM3 ) /* - | - | - | m10 */ MOVSS ( M(14), XMM4 ) /* - | - | - | m14 */ - + ALIGNTEXT32 LLBL(K_GTP3DNRM_top): TEST_B( BL, REGIND(EBP) ) @@ -253,7 +250,7 @@ LLBL(K_GTP3DNRM_top): MULSS ( XMM3, XMM0 ) /* sz*m10 */ ADDSS ( XMM4, XMM0 ) /* +m14 */ MOVSS ( XMM0, D(2) ) /* -> D(2) */ - + LLBL(K_GTP3DNRM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) @@ -313,7 +310,7 @@ ALIGNTEXT32 MOVSS ( M(10), XMM3 ) /* m10 */ MOVSS ( M(14), XMM4 ) /* m14 */ XORPS ( XMM6, XMM6 ) /* 0 */ - + ALIGNTEXT32 LLBL(K_GTP3PM_top): TEST_B ( BL, REGIND(EBP) ) @@ -408,7 +405,7 @@ LLBL(K_GTP3P2DM_top): ADDPS ( XMM4, XMM3 ) ADDPS ( XMM2, XMM3 ) MOVLPS ( XMM3, D(0) ) - + MOVSS ( S(2), XMM3 ) MOVSS ( XMM3, D(2) ) @@ -468,7 +465,7 @@ ALIGNTEXT32 MOVSS ( M(5), XMM2 ) /* m5 */ UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP3P2DNRM_top): TEST_B( BL, REGIND(EBP) ) @@ -481,7 +478,7 @@ LLBL(K_GTP3P2DNRM_top): MOVSS( S(2), XMM0 ) MOVSS( XMM0, D(2) ) - + LLBL(K_GTP3P2DNRM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) @@ -535,16 +532,16 @@ GLNAME(gl_katmai_transform_points3_3d_masked): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m6 | m5 | m4 */ - MOVUPS( M(8), XMM2 ) /* m10 | m9 | m8 */ - MOVUPS( M(12), XMM3 ) /* m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ + MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */ + MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */ ALIGNTEXT32 LLBL(K_GTP3P3DM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP3P3DM_skip) ) - + MOVSS( S(0), XMM4 ) SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */ MULPS( XMM0, XMM4 ) /* ox*m2 | ox*m1 | ox*m0 */ diff --git a/xc/extras/Mesa/src/X86/katmai_xform_masked4.S b/xc/extras/Mesa/src/X86/katmai_xform_masked4.S index d5d591bbb..e65930ed5 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_masked4.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_masked4.S @@ -1,14 +1,11 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_masked4.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -34,9 +31,9 @@ /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ @@ -68,7 +65,7 @@ GLNAME( gl_katmai_transform_points4_general_masked ): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -88,16 +85,16 @@ GLNAME( gl_katmai_transform_points4_general_masked ): ADD_L( EDI, ECX ) /* count += dest ptr */ ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ - MOVUPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ - MOVUPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ - + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ + MOVAPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ + MOVAPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ + ALIGNTEXT32 LLBL(K_GTP4GM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP4GM_skip) ) - + MOVSS( S(0), XMM4 ) /* ox */ SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ @@ -117,7 +114,7 @@ LLBL(K_GTP4GM_top): ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ - MOVUPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ + MOVAPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ LLBL(K_GTP4GM_skip): INC_L ( EBP ) @@ -169,7 +166,7 @@ GLNAME( gl_katmai_transform_points4_identity_masked ): ADD_L( EDI, ECX ) /* count += dest ptr */ MOV_L( EAX, ARG_SOURCE ) - + CMP_L( ESI, EDI ) JE( LLBL(K_GTP4IM_finish) ) @@ -219,7 +216,7 @@ GLNAME(gl_katmai_transform_points4_3d_no_rot_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -246,12 +243,12 @@ ALIGNTEXT32 MOVSS( M(10), XMM1 ) /* m10 */ MOVLPS( M(12), XMM2 ) /* m13 | m12 */ MOVSS( M(14), XMM3 ) /* m14 */ - + ALIGNTEXT32 LLBL(K_GTP43DNRM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP43DNRM_skip) ) - + MOVLPS( S(0), XMM4 ) /* oy | ox */ MULPS( XMM0, XMM4 ) /* oy*m5 | ox*m0 */ MOVSS( S(3), XMM5 ) /* ow */ @@ -299,7 +296,7 @@ GLNAME(gl_katmai_transform_points4_3d_masked): MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ MOV_B( ARG_FLAG, BL ) /* clip mask flags */ @@ -320,16 +317,16 @@ GLNAME(gl_katmai_transform_points4_3d_masked): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ - MOVUPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ - MOVUPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ - -ALIGNTEXT32 + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ + MOVAPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ + MOVAPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ + +ALIGNTEXT32 LLBL(K_GTP43P3DM_top): TEST_B( BL, REGIND(EBP) ) JNZ( LLBL(K_GTP43P3DM_skip) ) - + MOVSS( S(0), XMM4 ) /* ox */ SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ @@ -349,11 +346,11 @@ LLBL(K_GTP43P3DM_top): ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ - MOVUPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ + MOVAPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ MOVSS( S(3), XMM4 ) /* ow */ MOVSS( XMM4, D(3) ) /* ->D(3) */ - + LLBL(K_GTP43P3DM_skip): INC_L( EBP ) ADD_L( CONST(16), EDI ) diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw1.S b/xc/extras/Mesa/src/X86/katmai_xform_raw1.S index b8e9dcda8..24a567c89 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw1.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw1.S @@ -1,14 +1,11 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_raw1.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -34,9 +31,9 @@ /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ @@ -84,8 +81,8 @@ GLNAME( gl_katmai_transform_points1_general_raw ): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ ALIGNTEXT32 LLBL(K_GTP1GR_top): @@ -145,7 +142,7 @@ ALIGNTEXT32 LLBL(K_GTP1IR_top): MOV_L( S(0), EDX ) MOV_L( EDX, D(0) ) - + LLBL(K_GTP1IR_skip): ADD_L ( CONST(16), EDI ) ADD_L ( EAX, ESI ) @@ -195,7 +192,7 @@ ALIGNTEXT32 MOVSS( M(12), XMM1 ) /* m12 */ MOVSS( M(13), XMM2 ) /* m13 */ MOVSS( M(14), XMM3 ) /* m14 */ - + ALIGNTEXT32 LLBL(K_GTP13DNRR_top): MOVSS( S(0), XMM4 ) /* ox */ @@ -218,7 +215,7 @@ LLBL(K_GTP13DNRR_finish): RET #undef FRAME_OFFSET - + ALIGNTEXT4 GLOBL GLNAME(gl_katmai_transform_points1_perspective_raw) @@ -254,7 +251,7 @@ ALIGNTEXT32 XORPS( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ MOVSS( M(0), XMM1 ) /* m0 */ MOVSS( M(14), XMM2 ) /* m14 */ - + ALIGNTEXT32 LLBL(K_GTP13PR_top): MOVSS( S(0), XMM3 ) /* ox */ @@ -264,7 +261,7 @@ LLBL(K_GTP13PR_top): MOVSS( XMM0, D(1) ) MOVSS( XMM0, D(3) ) - + LLBL(K_GTP13PR_skip): ADD_L( CONST(16), EDI ) ADD_L( EAX, ESI ) @@ -310,7 +307,7 @@ GLNAME(gl_katmai_transform_points1_2d_raw): ALIGNTEXT32 MOVLPS( M(0), XMM0 ) /* m1 | m0 */ MOVLPS( M(12), XMM1 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP13P2DR_top): MOVSS( S(0), XMM2 ) /* ox */ @@ -318,7 +315,7 @@ LLBL(K_GTP13P2DR_top): MULPS( XMM0, XMM2 ) /* - | - | ox*m1 | ox*m0 */ ADDPS( XMM1, XMM2 ) /* - | - | ox*m1+m13 | ox*m0+m12 */ MOVLPS( XMM2, D(0) ) - + LLBL(K_GTP13P2DR_skip): ADD_L ( CONST(16), EDI ) ADD_L ( EAX, ESI ) @@ -365,7 +362,7 @@ ALIGNTEXT32 MOVSS( M(0), XMM0 ) /* m0 */ MOVSS( M(12), XMM1 ) /* m12 */ MOVSS( M(13), XMM2 ) /* m13 */ - + ALIGNTEXT32 LLBL(K_GTP13P2DNRR_top): MOVSS( S(0), XMM3 ) /* ox */ @@ -373,7 +370,7 @@ LLBL(K_GTP13P2DNRR_top): ADDSS( XMM1, XMM3 ) /* ox*m0+m12 */ MOVSS( XMM3, D(0) ) MOVSS( XMM2, D(1) ) - + LLBL(K_GTP13P2DNRR_skip): ADD_L( CONST(16), EDI ) ADD_L( EAX, ESI ) @@ -419,10 +416,10 @@ GLNAME(gl_katmai_transform_points1_3d_raw): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ - -ALIGNTEXT32 + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(12), XMM1 ) /* m15 | m14 | m13 | m12 */ + +ALIGNTEXT32 LLBL(K_GTP13P3DR_top): MOVSS( S(0), XMM2 ) /* ox */ SHUFPS( CONST(0x0), XMM2, XMM2 ) /* ox | ox | ox | ox */ @@ -431,7 +428,7 @@ LLBL(K_GTP13P3DR_top): MOVLPS( XMM2, D(0) ) /* - | - | ->D(1)| ->D(0)*/ UNPCKHPS( XMM2, XMM2 ) /* ox*m3+m15 | ox*m3+m15 | ox*m2+m14 | ox*m2+m14 */ MOVSS( XMM2, D(2) ) - + LLBL(K_GTP13P3DR_skip): ADD_L( CONST(16), EDI ) ADD_L( EAX, ESI ) diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw2.S b/xc/extras/Mesa/src/X86/katmai_xform_raw2.S index c330b0783..3d37ff687 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw2.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw2.S @@ -1,14 +1,11 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_raw2.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -34,9 +31,9 @@ /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ @@ -60,7 +57,7 @@ GLNAME( gl_katmai_transform_points2_general_raw ): #define FRAME_OFFSET 8 PUSH_L ( ESI ) PUSH_L ( EDI ) - + MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ @@ -69,7 +66,7 @@ GLNAME( gl_katmai_transform_points2_general_raw ): TEST_L( ECX, ECX ) JZ( LLBL(K_GTP2GR_finish) ) /* count was zero; go to finish */ - + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ @@ -83,9 +80,9 @@ GLNAME( gl_katmai_transform_points2_general_raw ): ADD_L( EDI, ECX ) /* count += dest ptr */ ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ - MOVUPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ + MOVAPS( M(12), XMM2 ) /* m15 | m14 | m13 | m12 */ ALIGNTEXT32 LLBL(K_GTP2GR_top): @@ -98,8 +95,8 @@ LLBL(K_GTP2GR_top): ADDPS( XMM4, XMM3 ) ADDPS( XMM2, XMM3 ) - MOVUPS( XMM3, D(0) ) - + MOVAPS( XMM3, D(0) ) + LLBL(K_GTP2GR_skip): ADD_L ( CONST(16), EDI ) ADD_L ( EAX, ESI ) @@ -151,7 +148,7 @@ LLBL(K_GTP2IR_top): MOV_L ( EDX, D(0) ) MOV_L ( S(1), EDX ) MOV_L ( EDX, D(1) ) - + LLBL(K_GTP2IR_skip): ADD_L ( CONST(16), EDI ) ADD_L ( EAX, ESI ) @@ -201,7 +198,7 @@ ALIGNTEXT32 UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ MOVLPS ( M(12), XMM2 ) /* - | - | m13 | m12 */ MOVSS ( M(14), XMM3 ) /* - | - | - | m14 */ - + ALIGNTEXT32 LLBL(K_GTP23DNRR_top): MOVLPS ( S(0), XMM0 ) /* - | - | oy | ox */ @@ -223,7 +220,7 @@ LLBL(K_GTP23DNRR_finish): RET #undef FRAME_OFFSET - + ALIGNTEXT4 GLOBL GLNAME(gl_katmai_transform_points2_perspective_raw) GLNAME(gl_katmai_transform_points2_perspective_raw): @@ -259,7 +256,7 @@ ALIGNTEXT32 UNPCKLPS ( XMM2, XMM1 ) /* - | - | m5 | m0 */ MOVSS ( M(14), XMM3 ) /* m14 */ XORPS ( XMM0, XMM0 ) /* 0 | 0 | 0 | 0 */ - + ALIGNTEXT32 LLBL(K_GTP23PR_top): MOVLPS( S(0), XMM4 ) /* oy | ox */ @@ -267,7 +264,7 @@ LLBL(K_GTP23PR_top): MOVLPS( XMM4, D(0) ) /* ->D(1) | ->D(0) */ MOVSS( XMM3, D(2) ) /* ->D(2) */ MOVSS( XMM0, D(3) ) /* ->D(3) */ - + LLBL(K_GTP23PR_skip): ADD_L( CONST(16), EDI ) ADD_L( EAX, ESI ) @@ -315,7 +312,7 @@ ALIGNTEXT32 MOVLPS( M(0), XMM0 ) /* m1 | m0 */ MOVLPS( M(4), XMM1 ) /* m5 | m4 */ MOVLPS( M(12), XMM2 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP23P2DR_top): MOVSS( S(0), XMM3 ) /* ox */ @@ -378,14 +375,14 @@ ALIGNTEXT32 MOVSS ( M(5), XMM2 ) /* m5 */ UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP23P2DNRR_top): MOVLPS( S(0), XMM0 ) /* oy | ox */ MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ - + LLBL(K_GTP23P2DNRR_skip): ADD_L( CONST(16), EDI ) ADD_L( EAX, ESI ) @@ -430,11 +427,11 @@ GLNAME(gl_katmai_transform_points2_3d_raw): ADD_L( EDI, ECX ) /* count += dest ptr */ ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m6 | m5 | m4 */ - MOVUPS( M(12), XMM2 ) /* m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ + MOVAPS( M(12), XMM2 ) /* m14 | m13 | m12 */ -ALIGNTEXT32 +ALIGNTEXT32 LLBL(K_GTP23P3DR_top): MOVSS( S(0), XMM3 ) /* ox */ SHUFPS( CONST(0x0), XMM3, XMM3 ) /* ox | ox | ox */ @@ -450,7 +447,7 @@ LLBL(K_GTP23P3DR_top): MOVLPS( XMM3, D(0) ) /* ->D(1) | ->D(0) */ UNPCKHPS( XMM3, XMM3 ) MOVSS( XMM3, D(2) ) /* ->D(2) */ - + LLBL(K_GTP23P3DR_skip): ADD_L( CONST(16), EDI ) ADD_L( EAX, ESI ) @@ -462,5 +459,3 @@ LLBL(K_GTP23P3DR_finish): POP_L( ESI ) RET #undef FRAME_OFFSET - - diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw3.S b/xc/extras/Mesa/src/X86/katmai_xform_raw3.S index d9ed22be3..e5176651d 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw3.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw3.S @@ -1,14 +1,11 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_raw3.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - + SEG_TEXT @@ -34,9 +31,9 @@ /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ @@ -84,10 +81,10 @@ GLNAME( gl_katmai_transform_points3_general_raw ): ALIGNTEXT32 - MOVUPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */ - MOVUPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */ - MOVUPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */ - MOVUPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */ + MOVAPS ( REGOFF(0, EDX), XMM0 ) /* m0 | m1 | m2 | m3 */ + MOVAPS ( REGOFF(16, EDX), XMM1 ) /* m4 | m5 | m6 | m7 */ + MOVAPS ( REGOFF(32, EDX), XMM2 ) /* m8 | m9 | m10 | m11 */ + MOVAPS ( REGOFF(48, EDX), XMM3 ) /* m12 | m13 | m14 | m15 */ ALIGNTEXT32 @@ -107,7 +104,7 @@ LLBL(K_GTPGR_top): ADDPS ( XMM6, XMM4 ) ADDPS ( XMM3, XMM4 ) - MOVUPS ( XMM4, REGOFF(0, EDI) ) + MOVAPS ( XMM4, REGOFF(0, EDI) ) LLBL(K_GTPGR_skip): ADD_L ( CONST(16), EDI ) @@ -240,7 +237,7 @@ LLBL(K_GTP3DNRR_finish): RET #undef FRAME_OFFSET - + ALIGNTEXT4 GLOBL GLNAME(gl_katmai_transform_points3_perspective_raw) @@ -279,7 +276,7 @@ ALIGNTEXT32 MOVSS ( M(10), XMM3 ) /* m10 */ MOVSS ( M(14), XMM4 ) /* m14 */ XORPS ( XMM6, XMM6 ) /* 0 */ - + ALIGNTEXT32 LLBL(K_GTP3PR_top): MOVLPS ( S(0), XMM0 ) /* oy | ox */ @@ -347,7 +344,7 @@ ALIGNTEXT32 MOVLPS( M(0), XMM0 ) /* m1 | m0 */ MOVLPS( M(4), XMM1 ) /* m5 | m4 */ MOVLPS( M(12), XMM2 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP3P2DR_top): MOVSS ( S(0), XMM3 ) /* ox */ @@ -360,7 +357,7 @@ LLBL(K_GTP3P2DR_top): ADDPS ( XMM4, XMM3 ) ADDPS ( XMM2, XMM3 ) MOVLPS ( XMM3, D(0) ) - + MOVSS ( S(2), XMM3 ) MOVSS ( XMM3, D(2) ) @@ -412,14 +409,14 @@ ALIGNTEXT32 MOVSS ( M(5), XMM2 ) /* m5 */ UNPCKLPS ( XMM2, XMM1 ) /* m5 | m0 */ MOVLPS ( M(12), XMM2 ) /* m13 | m12 */ - + ALIGNTEXT32 LLBL(K_GTP3P2DNRR_top): MOVLPS( S(0), XMM0 ) /* oy | ox */ MULPS( XMM1, XMM0 ) /* oy*m5 | ox*m0 */ ADDPS( XMM2, XMM0 ) /* +m13 | +m12 */ MOVLPS( XMM0, D(0) ) /* ->D(1) | ->D(0) */ - + MOVSS( S(2), XMM0 ) MOVSS( XMM0, D(2) ) @@ -470,12 +467,12 @@ GLNAME(gl_katmai_transform_points3_3d_raw): ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m6 | m5 | m4 */ - MOVUPS( M(8), XMM2 ) /* m10 | m9 | m8 */ - MOVUPS( M(12), XMM3 ) /* m14 | m13 | m12 */ + MOVAPS( M(0), XMM0 ) /* m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m6 | m5 | m4 */ + MOVAPS( M(8), XMM2 ) /* m10 | m9 | m8 */ + MOVAPS( M(12), XMM3 ) /* m14 | m13 | m12 */ -ALIGNTEXT32 +ALIGNTEXT32 LLBL(K_GTP3P3DR_top): MOVSS( S(0), XMM4 ) SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox */ @@ -508,5 +505,3 @@ LLBL(K_GTP3P3DR_finish): POP_L( ESI ) RET #undef FRAME_OFFSET - - diff --git a/xc/extras/Mesa/src/X86/katmai_xform_raw4.S b/xc/extras/Mesa/src/X86/katmai_xform_raw4.S index eabec45bc..6cbe22a16 100644 --- a/xc/extras/Mesa/src/X86/katmai_xform_raw4.S +++ b/xc/extras/Mesa/src/X86/katmai_xform_raw4.S @@ -1,200 +1,193 @@ -/* $XFree86: xc/extras/Mesa/src/X86/katmai_xform_raw4.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - #include "assyntax.h" - /** TODO: * - insert PREFETCH instructions to avoid cache-misses ! * - some more optimizations are possible... * - for 40-50% more performance in the SSE-functions, the - * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! + * data (trans-matrix, src_vert, dst_vert) needs to be 16byte aligned ! */ - - SEG_TEXT + SEG_TEXT -#define S(i) REGOFF(i * 4, ESI) -#define D(i) REGOFF(i * 4, EDI) -#define M(i) REGOFF(i * 4, EDX) + +#define S(i) REGOFF(i * 4, ESI) +#define D(i) REGOFF(i * 4, EDI) +#define M(i) REGOFF(i * 4, EDX) /* - * Offsets into GLvector4f + * Offsets into GLvector4f */ -#define V4F_DATA 0 -#define V4F_START 4 -#define V4F_COUNT 8 -#define V4F_STRIDE 12 -#define V4F_SIZE 16 -#define V4F_FLAGS 20 - -#define VEC_SIZE_1 1 -#define VEC_SIZE_2 3 -#define VEC_SIZE_3 7 -#define VEC_SIZE_4 15 +#define V4F_DATA 0 +#define V4F_START 4 +#define V4F_COUNT 8 +#define V4F_STRIDE 12 +#define V4F_SIZE 16 +#define V4F_FLAGS 20 + +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 /* * Offsets for transform_func arguments * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, * const GLubyte *clipmask, * const GLubyte flag ); */ -#define OFFSET_DEST 4 -#define OFFSET_MATRIX 8 -#define OFFSET_SOURCE 12 -#define OFFSET_CLIP 16 -#define OFFSET_FLAG 20 +#define OFFSET_DEST 4 +#define OFFSET_MATRIX 8 +#define OFFSET_SOURCE 12 +#define OFFSET_CLIP 16 +#define OFFSET_FLAG 20 -#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) -#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) -#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) -#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) -#define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP) +#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) +#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) +#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) +#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) +#define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP) ALIGNTEXT4 -GLOBL GLNAME(gl_katmai_transform_points4_general_raw) +GLOBL GLNAME( gl_katmai_transform_points4_general_raw ) GLNAME( gl_katmai_transform_points4_general_raw ): #define FRAME_OFFSET 8 - PUSH_L ( ESI ) - PUSH_L ( EDI ) + PUSH_L( ESI ) + PUSH_L( EDI ) - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ + MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ + MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ + MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - CMP_L( CONST(0), ECX ) /* count == 0 ? */ - JE( LLBL(K_GTP4GR_finish) ) /* yes -> nothing to do. */ + CMP_L( CONST(0), ECX ) /* count == 0 ? */ + JE( LLBL(K_GTP4GR_finish) ) /* yes -> nothing to do. */ - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )/* set dest size */ - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ + SHL_L( CONST(4), ECX ) /* count *= 16 */ + MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ + MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ + ADD_L( EDI, ECX ) /* count += dest ptr */ + + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ + MOVAPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ + MOVAPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ - MOVUPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ - MOVUPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ - -ALIGNTEXT32 -LLBL(K_GTP4GR_top): - MOVSS( S(0), XMM4 ) /* ox */ - SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ - MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ - - MOVSS( S(1), XMM5 ) /* oy */ - SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ - MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ - - MOVSS( S(2), XMM6 ) /* oz */ - SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ - MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ - - MOVSS( S(3), XMM7 ) /* ow */ - SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */ - MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ - - ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ - ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ - ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ - MOVUPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ - -LLBL(K_GTP4GR_skip): - ADD_L ( CONST(16), EDI ) - ADD_L ( EAX, ESI ) - CMP_L ( ECX, EDI ) - JNE ( LLBL(K_GTP4GR_top) ) - -LLBL(K_GTP4GR_finish): - POP_L ( EDI ) - POP_L ( ESI ) +LLBL( K_GTP4GR_top ): + MOVSS( S(0), XMM4 ) /* ox */ + SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ + MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ + + MOVSS( S(1), XMM5 ) /* oy */ + SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ + MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ + + MOVSS( S(2), XMM6 ) /* oz */ + SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ + MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ + + MOVSS( S(3), XMM7 ) /* ow */ + SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */ + MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ + + ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ + ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ + ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ + MOVAPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ + +LLBL( K_GTP4GR_skip ): + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL(K_GTP4GR_top) ) + +LLBL( K_GTP4GR_finish ): + POP_L( EDI ) + POP_L( ESI ) RET #undef FRAME_OFFSET ALIGNTEXT4 -GLOBL GLNAME(gl_katmai_transform_points4_3d_raw) -GLNAME(gl_katmai_transform_points4_3d_raw): +GLOBL GLNAME( gl_katmai_transform_points4_3d_raw ) +GLNAME( gl_katmai_transform_points4_3d_raw ): #define FRAME_OFFSET 8 PUSH_L( ESI ) PUSH_L( EDI ) - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ + MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ + MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ + MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ TEST_L( ECX, ECX) - JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */ + JZ( LLBL(K_GTP43P3DR_finish) ) /* count was zero; go to finish */ - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )/* set dest size */ - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ + SHL_L( CONST(4), ECX ) /* count *= 16 */ + MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ + MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ + ADD_L( EDI, ECX ) /* count += dest ptr */ + MOVAPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ + MOVAPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ + MOVAPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ + MOVAPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ ALIGNTEXT32 - MOVUPS( M(0), XMM0 ) /* m3 | m2 | m1 | m0 */ - MOVUPS( M(4), XMM1 ) /* m7 | m6 | m5 | m4 */ - MOVUPS( M(8), XMM2 ) /* m11 | m10 | m9 | m8 */ - MOVUPS( M(12), XMM3 ) /* m15 | m14 | m13 | m12 */ - -ALIGNTEXT32 -LLBL(K_GTP43P3DR_top): - MOVSS( S(0), XMM4 ) /* ox */ - SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ - MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ - - MOVSS( S(1), XMM5 ) /* oy */ - SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ - MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ - - MOVSS( S(2), XMM6 ) /* oz */ - SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ - MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ - - MOVSS( S(3), XMM7 ) /* ow */ - SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */ - MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ - - ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ - ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ - ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ - MOVUPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ - - MOVSS( S(3), XMM4 ) /* ow */ - MOVSS( XMM4, D(3) ) /* ->D(3) */ - -LLBL(K_GTP43P3DR_skip): +LLBL( K_GTP43P3DR_top ): + MOVSS( S(0), XMM4 ) /* ox */ + SHUFPS( CONST(0x0), XMM4, XMM4 ) /* ox | ox | ox | ox */ + MULPS( XMM0, XMM4 ) /* ox*m3 | ox*m2 | ox*m1 | ox*m0 */ + + MOVSS( S(1), XMM5 ) /* oy */ + SHUFPS( CONST(0x0), XMM5, XMM5 ) /* oy | oy | oy | oy */ + MULPS( XMM1, XMM5 ) /* oy*m7 | oy*m6 | oy*m5 | oy*m4 */ + + MOVSS( S(2), XMM6 ) /* oz */ + SHUFPS( CONST(0x0), XMM6, XMM6 ) /* oz | oz | oz | oz */ + MULPS( XMM2, XMM6 ) /* oz*m11 | oz*m10 | oz*m9 | oz*m8 */ + + MOVSS( S(3), XMM7 ) /* ow */ + SHUFPS( CONST(0x0), XMM7, XMM7 ) /* ow | ow | ow | ow */ + MULPS( XMM3, XMM7 ) /* ow*m15 | ow*m14 | ow*m13 | ow*m12 */ + + ADDPS( XMM5, XMM4 ) /* ox*m3+oy*m7 | ... */ + ADDPS( XMM6, XMM4 ) /* ox*m3+oy*m7+oz*m11 | ... */ + ADDPS( XMM7, XMM4 ) /* ox*m3+oy*m7+oz*m11+ow*m15 | ... */ + MOVAPS( XMM4, D(0) ) /* ->D(3) | ->D(2) | ->D(1) | ->D(0) */ + + MOVSS( S(3), XMM4 ) /* ow */ + MOVSS( XMM4, D(3) ) /* ->D(3) */ + +LLBL( K_GTP43P3DR_skip ): ADD_L( CONST(16), EDI ) ADD_L( EAX, ESI ) CMP_L( ECX, EDI ) JNE( LLBL(K_GTP43P3DR_top) ) -LLBL(K_GTP43P3DR_finish): +LLBL( K_GTP43P3DR_finish ): POP_L( EDI ) POP_L( ESI ) RET #undef FRAME_OFFSET - diff --git a/xc/extras/Mesa/src/X86/mmx.h b/xc/extras/Mesa/src/X86/mmx.h index f0e05cf06..07d66c774 100644 --- a/xc/extras/Mesa/src/X86/mmx.h +++ b/xc/extras/Mesa/src/X86/mmx.h @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.4 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL diff --git a/xc/extras/Mesa/src/X86/mmx_blend.S b/xc/extras/Mesa/src/X86/mmx_blend.S index e7d6e118c..21fa36ede 100644 --- a/xc/extras/Mesa/src/X86/mmx_blend.S +++ b/xc/extras/Mesa/src/X86/mmx_blend.S @@ -350,7 +350,3 @@ LLBL(GMBT_1): MOV_L ( EBP, ESP ) POP_L ( EBP ) RET - - - - diff --git a/xc/extras/Mesa/src/X86/vertex.S b/xc/extras/Mesa/src/X86/vertex.S deleted file mode 100644 index 0326c6392..000000000 --- a/xc/extras/Mesa/src/X86/vertex.S +++ /dev/null @@ -1,102 +0,0 @@ -#include "assyntax.h" - - - SEG_TEXT - -/*#define MAT_SX 0*/ /* accessed by REGIND !! */ -#define MAT_SY 20 -#define MAT_SZ 40 -#define MAT_TX 48 -#define MAT_TY 52 -#define MAT_TZ 56 - - - - -/* - * void gl_v16_x86_general_xform ( GLfloat *dest, - * const GLfloat *m, - * const GLfloat *src, - * GLuint src_stride, - * GLuint count ) - */ - - -/* This is nothing more glamorous than an objdump of one of Josh's - * routines hacked to match the above. - */ -ALIGNTEXT16 -GLOBL GLNAME( gl_v16_x86_general_xform ) -GLNAME( gl_v16_x86_general_xform ): - - - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L ( REGOFF(12, ESP), EAX ) /* dest */ - MOV_L ( REGOFF(16, ESP), ESI ) /* mat */ - MOV_L ( REGOFF(20, ESP), EDX ) /* src */ - MOV_L ( REGOFF(24, ESP), EDI ) /* src_stride */ - MOV_L ( REGOFF(28, ESP), ECX ) /* count */ - - -LLBL(v16x86_loop): - FLD_S ( REGOFF( 0x0, EDX ) ) - FMUL_S ( REGOFF( 0x0, ESI ) ) - FLD_S ( REGOFF( 0x0, EDX ) ) - FMUL_S ( REGOFF( 0x4, ESI ) ) - FLD_S ( REGOFF( 0x0, EDX ) ) - FMUL_S ( REGOFF( 0x8, ESI ) ) - FLD_S ( REGOFF( 0x0, EDX ) ) - FMUL_S ( REGOFF( 0xc, ESI ) ) - FLD_S ( REGOFF( 0x4, EDX ) ) - FMUL_S ( REGOFF( 0x10, ESI ) ) - FLD_S ( REGOFF( 0x4, EDX ) ) - FMUL_S ( REGOFF( 0x14, ESI ) ) - FLD_S ( REGOFF( 0x4, EDX ) ) - FMUL_S ( REGOFF( 0x18, ESI ) ) - FLD_S ( REGOFF( 0x4, EDX ) ) - FMUL_S ( REGOFF( 0x1c, ESI ) ) - FXCH ( ST(3) ) - FADDP ( ST(0),ST(7) ) - FXCH ( ST(1) ) - FADDP ( ST(0),ST(5) ) - FADDP ( ST(0),ST(3) ) - FADDP ( ST(0),ST(1) ) - FLD_S ( REGOFF( 0x8, EDX ) ) - FMUL_S ( REGOFF( 0x20, ESI ) ) - FLD_S ( REGOFF( 0x8, EDX ) ) - FMUL_S ( REGOFF( 0x24, ESI ) ) - FLD_S ( REGOFF( 0x8, EDX ) ) - FMUL_S ( REGOFF( 0x28, ESI ) ) - FLD_S ( REGOFF( 0x8, EDX ) ) - FMUL_S ( REGOFF( 0x2c, ESI ) ) - FXCH ( ST(3) ) - FADDP ( ST(0),ST(7) ) - FXCH ( ST(1) ) - FADDP ( ST(0),ST(5) ) - FADDP ( ST(0),ST(3) ) - FADDP ( ST(0),ST(1) ) - FXCH ( ST(3) ) - FADD_S ( REGOFF( 0x30, ESI ) ) - FXCH ( ST(2) ) - FADD_S ( REGOFF( 0x34, ESI ) ) - FXCH ( ST(1) ) - FADD_S ( REGOFF( 0x38, ESI ) ) - FXCH ( ST(3) ) - FADD_S ( REGOFF( 0x3c, ESI ) ) - FXCH ( ST(2) ) - FSTP_S ( REGOFF( 0x0, EAX ) ) - FSTP_S ( REGOFF( 0x4, EAX ) ) - FXCH ( ST(1) ) - FSTP_S ( REGOFF( 0x8, EAX ) ) - FSTP_S ( REGOFF( 0xc, EAX ) ) - ADD_L ( CONST(64), EAX ) - ADD_L ( EDI, EDX ) - DEC_L ( ECX ) - JNE ( LLBL(v16x86_loop) ) - - POP_L ( ESI ) - POP_L ( EDI ) - RET - diff --git a/xc/extras/Mesa/src/X86/vertex_3dnow.S b/xc/extras/Mesa/src/X86/vertex_3dnow.S deleted file mode 100644 index 693ac0812..000000000 --- a/xc/extras/Mesa/src/X86/vertex_3dnow.S +++ /dev/null @@ -1,234 +0,0 @@ -#include "assyntax.h" - - SEG_TEXT - - - -/*#define MAT_SX 0 accessed by REGIND !! */ -#define MAT_SY 20 -#define MAT_SZ 40 -#define MAT_TX 48 -#define MAT_TY 52 -#define MAT_TZ 56 - - - - - -/* Do viewport map and perspective projection. Args should look like: - * - * x86_3dnow_project_vertices( float *first_vertex, - * const float *last_vertex, - * float *matrix, - * GLuint stride ) - * - * This routine assumes a sane vertex layout with x,y,z,w as - * the first four elements, to be projected in clip-space, to - * x/w,y/w,z/w,1/w, and then transformed according to the matrix to - * device space. The device coordinates will overwrite the clip - * coordinates as the first four elements of the vertex. - * - * If projection is required for other elements, such as texcoords, - * you will have to code a specialized version of this routine. See - * FX/X86 for examples. - * - * These routines are simplified versions of the FX code written by - * Holger. - */ - -GLOBL GLNAME( gl_3dnow_project_vertices ) -GLNAME( gl_3dnow_project_vertices ): - - PUSH_L ( EBP ) - FEMMS - PREFETCH ( REGOFF(8, ESP) ) /* fetch the first vertex */ - - MOV_L ( REGOFF(8, ESP), ECX ) /* first_vert */ - MOV_L ( REGOFF(12, ESP), EDX ) /* last_vert */ - MOV_L ( REGOFF(16, ESP), EBP ) /* matrix */ - MOV_L ( REGOFF(20, ESP), EAX ) /* stride */ - - MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */ - PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */ - MOVD ( REGIND(EBP), MM5 ) - PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */ - MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */ - SUB_L ( ECX, EDX ) /* last -= first */ - -ALIGNTEXT32 -LLBL(v16_3dnow_pv_loop_start): - - PREFETCH ( REGOFF(64, ECX) ) /* fetch one/two verts ahead */ - MOVD ( REGOFF(12, ECX), MM0 ) /* | f[3] */ - PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */ - MOVD ( REGOFF(12, ECX), MM7 ) /* | f[3] */ - PFRCPIT1 ( MM0, MM7 ) - PFRCPIT2 ( MM0, MM7 ) /* oow | oow */ - PUNPCKLDQ ( MM7, MM7 ) - MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */ - PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */ - MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */ - PFMUL ( MM7, MM3 ) /* | f[2] * oow */ - MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */ - PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */ - PFADD ( MM0, MM3 ) /* | f[2] += vtz */ - PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */ - PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */ - PUNPCKLDQ ( MM7, MM3 ) /* f[3] = oow | f[2] */ - MOVQ ( MM2, REGOFF(0, ECX) ) - MOVQ ( MM3, REGOFF(8, ECX) ) - ADD_L ( EAX, ECX ) /* f += stride */ - SUB_L ( EAX, EDX ) - JA ( LLBL(v16_3dnow_pv_loop_start) ) - - FEMMS - POP_L ( EBP ) - RET - - - - - - - - -GLOBL GLNAME( gl_3dnow_project_clipped_vertices ) -GLNAME( gl_3dnow_project_clipped_vertices ): - - PUSH_L ( EBP ) - PUSH_L ( ESI ) - - FEMMS - - PREFETCH ( REGOFF(12, ESP) ) /* fetch the first vertex */ - - MOV_L ( REGOFF(12, ESP), ECX ) /* first_vert */ - MOV_L ( REGOFF(16, ESP), EDX ) /* last_vert */ - MOV_L ( REGOFF(20, ESP), EBP ) /* matrix */ - MOV_L ( REGOFF(24, ESP), EAX ) /* stride */ - MOV_L ( REGOFF(28, ESP), ESI ) /* clip_mask */ - - - MOVD ( REGOFF(MAT_TX, EBP), MM6 ) /* | tx */ - PUNPCKLDQ ( REGOFF(MAT_TY, EBP), MM6 ) /* ty | tx */ - MOVD ( REGIND(EBP), MM5 ) - PUNPCKLDQ ( REGOFF(MAT_SY, EBP), MM5 ) /* vsy | vsx */ - MOVD ( REGOFF(MAT_SZ, EBP), MM1 ) /* | vsz */ - - -ALIGNTEXT32 -LLBL(v16_3dnow_pcv_loop_start): - - CMP_B ( CONST(0), REGIND(ESI) ) - JNE ( LLBL(v16_3dnow_pcv_skip) ) - - MOVD ( REGOFF(12, ECX), MM0) /* | f[3] */ - PFRCP ( MM0, MM0 ) /* oow = 1/f[3] */ - MOVD ( REGOFF(12, ECX), MM7) /* | f[3] */ - PFRCPIT1 ( MM0, MM7 ) - PFRCPIT2 ( MM0, MM7 ) /* oow | oow */ - PUNPCKLDQ ( MM7, MM7 ) - MOVQ ( REGIND(ECX), MM2 ) /* f[1] | f[0] */ - PFMUL ( MM7, MM2 ) /* f[1] * oow | f[0] * oow */ - MOVD ( REGOFF(8, ECX), MM3 ) /* | f[2] */ - PFMUL ( MM7, MM3 ) /* | f[2] * oow */ - MOVD ( REGOFF(MAT_TZ, EBP), MM0 ) /* | vtz */ - PFMUL ( MM1, MM3 ) /* | f[2] *= vsz */ - PFADD ( MM0, MM3 ) /* | f[2] += vtz */ - PFMUL ( MM5, MM2 ) /* f[1] *= vsy | f[0] *= vsx */ - PFADD ( MM6, MM2 ) /* f[1] += vty | f[0] += vtx */ - PUNPCKLDQ ( MM7, MM3 ) /* f[3] = oow | f[2] */ - MOVQ ( MM2, REGOFF(0, ECX) ) - MOVQ ( MM3, REGOFF(8, ECX) ) - -LLBL(v16_3dnow_pcv_skip): - ADD_L ( EAX, ECX ) /* f += stride */ - INC_L ( ESI ) /* next ClipMask */ - - CMP_L ( ECX, EDX ) - JNE ( LLBL(v16_3dnow_pcv_loop_start) ) - - FEMMS - - POP_L ( ESI ) - POP_L ( EBP ) - RET - - - - - -/* - * void gl_3dnow_transform_v16 (GLfloat *dest, - * const GLfloat *m, - * const GLfloat *src, - * GLuint src_stride, - * GLuint count ) - * - * These tranformation functions could disappear if the standard ones - * took an output stride. - */ -GLOBL GLNAME( gl_v16_3dnow_general_xform ) -GLNAME( gl_v16_3dnow_general_xform ): - - - PUSH_L ( EDI ) - PUSH_L ( ESI ) - - MOV_L ( REGOFF(12, ESP), EAX ) /* dest */ - MOV_L ( REGOFF(16, ESP), ESI ) /* mat */ - MOV_L ( REGOFF(20, ESP), EDX ) /* src */ - MOV_L ( REGOFF(24, ESP), EDI ) /* src_stride */ - MOV_L ( REGOFF(28, ESP), ECX ) /* count */ - - FEMMS - - MOVQ ( REGOFF(MAT_TX, ESI), MM7 ) /* m31 | m30 */ - MOVQ ( REGOFF(MAT_TZ, ESI), MM3 ) /* m33 | m32 */ - -ALIGNTEXT32 -LLBL(v16_3dnow_general_loop): - PREFETCH ( REGOFF(128, EAX) ) /* prefetch 2 output verts ahead*/ - PREFETCH ( REGOFF(32, EDX) ) /* */ - - MOVQ ( REGIND(EDX), MM0 ) /* x1 | x0 */ - MOVD ( REGOFF(8, EDX), MM1 ) /* | x2 */ - MOVQ ( REGIND(ESI), MM4 ) /* m00 | m01 */ - PUNPCKHDQ ( MM0, MM2 ) /* x1 | */ - MOVQ ( REGOFF(16, ESI),MM5 ) /* m10 | m11 */ - PUNPCKLDQ ( MM0, MM0 ) /* x0 | x0 */ - MOVQ ( REGOFF(32, ESI),MM6 ) /* m20 | m21 */ - PFMUL ( MM0, MM4 ) /* x0.m01 | x0.m00 */ - PUNPCKHDQ ( MM2, MM2 ) /* x1 | x1 */ - PFMUL ( MM2, MM5 ) /* x1.m11 | x1.m10 */ - PUNPCKLDQ ( MM1, MM1 ) /* x2 | x2 */ - PFMUL ( REGOFF(8,ESI), MM0 ) /* x0.m03 | x0.m02 */ - PFMUL ( REGOFF(24,ESI),MM2 ) /* x1.m13 | x1.m12 */ - PFMUL ( MM1, MM6 ) /* x2.m21 | x2.m20 */ - PFADD ( MM4, MM5 ) /* x0m01+x1m11 | x0m00+x1m10 */ - PFMUL ( REGOFF(40,ESI),MM1 ) /* x2.m21 | x2.m20 */ - PFADD ( MM0, MM2 ) /* x0m03+x1m13 | x0m02+x1m12 */ - PFADD ( MM5, MM6 ) /* (xm012)1 | (xm012)0 */ - PFADD ( MM1, MM2 ) /* (xm012)3 | (xm012)2 */ - PFADD ( MM7, MM6 ) /* r1 | r0 */ - PFADD ( MM3, MM2 ) /* r3 | r2 */ - ADD_L ( EDI, EDX ) /* next input */ - MOVQ ( MM6, REGIND(EAX) ) /* */ - MOVQ ( MM2, REGOFF(8,EAX) ) /* */ - ADD_L ( CONST(64), EAX ) /* next output */ - DEC_L ( ECX ) - JNE ( LLBL(v16_3dnow_general_loop) ) - - FEMMS - - POP_L ( ESI ) - POP_L ( EDI ) - RET - - - - - - - - diff --git a/xc/extras/Mesa/src/X86/x86.c b/xc/extras/Mesa/src/X86/x86.c index 0e531b8df..37abf644b 100644 --- a/xc/extras/Mesa/src/X86/x86.c +++ b/xc/extras/Mesa/src/X86/x86.c @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.4 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -22,13 +22,11 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/X86/x86.c,v 1.6 2000/09/26 15:56:40 tsi Exp $ */ /* * Intel x86 assembly code by Josh Vanderhoof */ - #include "glheader.h" #include "context.h" #include "types.h" @@ -36,89 +34,104 @@ #include "xform.h" #include "x86.h" -#ifdef USE_X86_ASM -extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert, - GLfloat *last_vert, - GLubyte *or_mask, - GLubyte *and_mask, - GLubyte *clip_mask ); - - -extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest, - const GLfloat *m, - const GLfloat *src, - GLuint src_stride, - GLuint count ); +#ifdef DEBUG +#include "debug_xform.h" #endif -#define XFORM_ARGS GLvector4f *to_vec, \ - const GLfloat m[16], \ - const GLvector4f *from_vec, \ - const GLubyte *mask, \ +#define XFORM_ARGS GLvector4f *to_vec, \ + const GLfloat m[16], \ + const GLvector4f *from_vec, \ + const GLubyte *mask, \ const GLubyte flag -#define DECLARE_XFORM_GROUP(pfx, vsize, masked) \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_general_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_identity_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_perspective_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked(XFORM_ARGS); \ - extern void _ASMAPI gl_##pfx##_transform_points##vsize##_3d_##masked(XFORM_ARGS); - -#define ASSIGN_XFORM_GROUP( pfx, cma, vsize, masked ) \ - gl_transform_tab[cma][vsize][MATRIX_GENERAL] \ - = gl_##pfx##_transform_points##vsize##_general_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_IDENTITY] \ - = gl_##pfx##_transform_points##vsize##_identity_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D_NO_ROT] \ - = gl_##pfx##_transform_points##vsize##_3d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_PERSPECTIVE] \ - = gl_##pfx##_transform_points##vsize##_perspective_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D] \ - = gl_##pfx##_transform_points##vsize##_2d_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_2D_NO_ROT] \ - = gl_##pfx##_transform_points##vsize##_2d_no_rot_##masked; \ - gl_transform_tab[cma][vsize][MATRIX_3D] \ - = gl_##pfx##_transform_points##vsize##_3d_##masked; + +#define DECLARE_XFORM_GROUP( pfx, sz, masked ) \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_general_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_identity_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_perspective_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_2d_no_rot_##masked( XFORM_ARGS ); \ + extern void _ASMAPI gl_##pfx##_transform_points##sz##_3d_##masked( XFORM_ARGS ); + + +#define ASSIGN_XFORM_GROUP( pfx, cma, sz, masked ) \ + gl_transform_tab[cma][sz][MATRIX_GENERAL] = \ + gl_##pfx##_transform_points##sz##_general_##masked; \ + gl_transform_tab[cma][sz][MATRIX_IDENTITY] = \ + gl_##pfx##_transform_points##sz##_identity_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_3d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_PERSPECTIVE] = \ + gl_##pfx##_transform_points##sz##_perspective_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D] = \ + gl_##pfx##_transform_points##sz##_2d_##masked; \ + gl_transform_tab[cma][sz][MATRIX_2D_NO_ROT] = \ + gl_##pfx##_transform_points##sz##_2d_no_rot_##masked; \ + gl_transform_tab[cma][sz][MATRIX_3D] = \ + gl_##pfx##_transform_points##sz##_3d_##masked; #ifdef USE_X86_ASM - DECLARE_XFORM_GROUP( x86, 2, raw ) - DECLARE_XFORM_GROUP( x86, 3, raw ) - DECLARE_XFORM_GROUP( x86, 4, raw ) - DECLARE_XFORM_GROUP( x86, 2, masked ) - DECLARE_XFORM_GROUP( x86, 3, masked ) - DECLARE_XFORM_GROUP( x86, 4, masked ) - - extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec, - GLvector4f *proj_vec, - GLubyte clipMask[], - GLubyte *orMask, - GLubyte *andMask ); +DECLARE_XFORM_GROUP( x86, 2, raw ) +DECLARE_XFORM_GROUP( x86, 3, raw ) +DECLARE_XFORM_GROUP( x86, 4, raw ) +DECLARE_XFORM_GROUP( x86, 2, masked ) +DECLARE_XFORM_GROUP( x86, 3, masked ) +DECLARE_XFORM_GROUP( x86, 4, masked ) + + +extern GLvector4f * _ASMAPI gl_x86_cliptest_points4( GLvector4f *clip_vec, + GLvector4f *proj_vec, + GLubyte clipMask[], + GLubyte *orMask, + GLubyte *andMask ); + + +extern void _ASMAPI gl_v16_x86_cliptest_points4( GLfloat *first_vert, + GLfloat *last_vert, + GLubyte *or_mask, + GLubyte *and_mask, + GLubyte *clip_mask ); + + +extern void _ASMAPI gl_v16_x86_general_xform( GLfloat *dest, + const GLfloat *m, + const GLfloat *src, + GLuint src_stride, + GLuint count ); #endif -void gl_init_x86_asm_transforms( void ) +void gl_init_x86_transform_asm( void ) { #ifdef USE_X86_ASM - ASSIGN_XFORM_GROUP( x86, 0, 2, raw ) - ASSIGN_XFORM_GROUP( x86, 0, 3, raw ) - ASSIGN_XFORM_GROUP( x86, 0, 4, raw ) + ASSIGN_XFORM_GROUP( x86, 0, 2, raw ); + ASSIGN_XFORM_GROUP( x86, 0, 3, raw ); + ASSIGN_XFORM_GROUP( x86, 0, 4, raw ); - ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked ) - ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked ) - ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked ) + ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 2, masked ); + ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 3, masked ); + ASSIGN_XFORM_GROUP( x86, CULL_MASK_ACTIVE, 4, masked ); /* XXX this function has been found to cause FP overflow exceptions */ gl_clip_tab[4] = gl_x86_cliptest_points4; #ifdef DEBUG - gl_test_all_transform_functions("x86"); + gl_test_all_transform_functions( "x86" ); +#endif #endif +} - gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4; - gl_xform_points3_v16_general = gl_v16_x86_general_xform; +void gl_init_x86_vertex_asm( void ) +{ +#ifdef USE_X86_ASM + gl_xform_points3_v16_general = gl_v16_x86_general_xform; + gl_cliptest_points4_v16 = gl_v16_x86_cliptest_points4; + +#if 0 + gl_test_all_vertex_functions( "x86" ); +#endif #endif } diff --git a/xc/extras/Mesa/src/X86/x86.h b/xc/extras/Mesa/src/X86/x86.h index aaad25b2c..76b2ff566 100644 --- a/xc/extras/Mesa/src/X86/x86.h +++ b/xc/extras/Mesa/src/X86/x86.h @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.1 - * + * Version: 3.4 + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -27,10 +27,10 @@ * Intel x86 assembly code by Josh Vanderhoof */ +#ifndef __X86_H__ +#define __X86_H__ -#ifndef X86_H -#define X86_H - -extern void gl_init_x86_asm_transforms(void); +extern void gl_init_x86_transform_asm( void ); +extern void gl_init_x86_vertex_asm( void ); #endif diff --git a/xc/extras/Mesa/src/X86/x86_cliptest.S b/xc/extras/Mesa/src/X86/x86_cliptest.S new file mode 100644 index 000000000..992fec3d7 --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_cliptest.S @@ -0,0 +1,247 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "clip_args.h" + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +/* + * Table for clip test. + * + * bit6 = SRC(3) < 0 + * bit5 = SRC(2) < 0 + * bit4 = abs(S(2)) > abs(S(3)) + * bit3 = SRC(1) < 0 + * bit2 = abs(S(1)) > abs(S(3)) + * bit1 = SRC(0) < 0 + * bit0 = abs(S(0)) > abs(S(3)) + */ + + SEG_DATA + +clip_table: + D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 + D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 + D_BYTE 32, 33, 32, 34, 36, 37, 36, 38 + D_BYTE 32, 33, 32, 34, 40, 41, 40, 42 + D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 + D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 + D_BYTE 16, 17, 16, 18, 20, 21, 20, 22 + D_BYTE 16, 17, 16, 18, 24, 25, 24, 26 + D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 + D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 + D_BYTE 47, 45, 47, 46, 39, 37, 39, 38 + D_BYTE 47, 45, 47, 46, 43, 41, 43, 42 + D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 + D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 + D_BYTE 31, 29, 31, 30, 23, 21, 23, 22 + D_BYTE 31, 29, 31, 30, 27, 25, 27, 26 + + + SEG_TEXT + +/* + * gl_x86_cliptest_points4 + * + * AL: ormask + * AH: andmask + * EBX: temp0 + * ECX: temp1 + * EDX: clipmask[] + * ESI: clip[] + * EDI: proj[] + * EBP: temp2 + */ + +#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC) +#define ELFPIC +#endif + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_cliptest_points4 ) +GLNAME( gl_x86_cliptest_points4 ): + +#ifdef ELFPIC +#define FRAME_OFFSET 20 +#else +#define FRAME_OFFSET 16 +#endif + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBP ) + PUSH_L( EBX ) + +#ifdef ELFPIC + /* store pointer to clip_table on stack */ + CALL( LLBL( ctp4_get_eip ) ) + ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) + MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) + PUSH_L( EBX ) + JMP( LLBL( ctp4_clip_table_ready ) ) + +LLBL( ctp4_get_eip ): + /* store eip in ebx */ + MOV_L( REGIND(ESP), EBX ) + RET + +LLBL( ctp4_clip_table_ready ): +#endif + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_CLIP, EDX ) + MOV_L( ARG_OR, EBX ) + + MOV_L( ARG_AND, EBP ) + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */ + + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDX, ECX ) + + MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */ + CMP_L( ECX, EDX ) + + MOV_B( REGIND(EBX), AL ) + MOV_B( REGIND(EBP), AH ) + + JZ( LLBL( ctp4_finish ) ) + +ALIGNTEXT16 +LLBL( ctp4_top ): + + FLD1 /* F3 */ + FDIV_S( SRC(3) ) + + MOV_L( SRC(3), EBP ) + MOV_L( SRC(2), EBX ) + + XOR_L( ECX, ECX ) + ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ + + ADC_L( ECX, ECX ) + ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + MOV_L( SRC(1), EBX ) + + ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + MOV_L( SRC(0), EBX ) + + ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + +#ifdef ELFPIC + MOV_L( REGIND(ESP), EBP ) /* clip_table */ + + MOV_B( REGBI(EBP, ECX), CL ) +#else + MOV_B( REGOFF(clip_table,ECX), CL ) +#endif + + OR_B( CL, AL ) + AND_B( CL, AH ) + + TEST_B( CL, CL ) + MOV_B( CL, REGIND(EDX) ) + + JZ( LLBL( ctp4_proj ) ) + + FSTP( ST(0) ) /* */ + JMP( LLBL( ctp4_next ) ) + +LLBL( ctp4_proj ): + + FLD_S( SRC(0) ) /* F0 F3 */ + FMUL2( ST(1), ST(0) ) + + FLD_S( SRC(1) ) /* F1 F0 F3 */ + FMUL2( ST(2), ST(0) ) + + FLD_S( SRC(2) ) /* F2 F1 F0 F3 */ + FMUL2( ST(3), ST(0) ) + + FXCH( ST(2) ) /* F0 F1 F2 F3 */ + FSTP_S( DST(0) ) /* F1 F2 F3 */ + FSTP_S( DST(1) ) /* F2 F3 */ + FSTP_S( DST(2) ) /* F3 */ + FSTP_S( DST(3) ) /* */ + +LLBL( ctp4_next ): + + INC_L( EDX ) + ADD_L( CONST(16), EDI ) + + ADD_L( ARG_SOURCE, ESI ) + CMP_L( EDX, ARG_CLIP ) + + JNZ( LLBL( ctp4_top ) ) + + MOV_L( ARG_OR, ECX ) + MOV_L( ARG_AND, EDX ) + + MOV_B( AL, REGIND(ECX) ) + MOV_B( AH, REGIND(EDX) ) + +LLBL( ctp4_finish ): + + MOV_L( ARG_DEST, EAX ) +#ifdef ELFPIC + POP_L( ESI ) /* discard ptr to clip_table */ +#endif + POP_L( EBX ) + POP_L( EBP ) + POP_L( EDI ) + POP_L( ESI ) + + RET diff --git a/xc/extras/Mesa/src/X86/x86_vertex.S b/xc/extras/Mesa/src/X86/x86_vertex.S new file mode 100644 index 000000000..dbe2cfbe4 --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_vertex.S @@ -0,0 +1,298 @@ +#include "assyntax.h" + + + SEG_TEXT + +/*#define MAT_SX 0*/ /* accessed by REGIND !! */ +#define MAT_SY 20 +#define MAT_SZ 40 +#define MAT_TX 48 +#define MAT_TY 52 +#define MAT_TZ 56 + + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define S(i) REGOFF(i * 4, ESI) +#define D(i) REGOFF(i * 4, EDI) +#define M(i) REGOFF(i * 4, EDX) + + + +/* + * void gl_v16_x86_general_xform ( GLfloat *dest, + * const GLfloat *m, + * const GLfloat *src, + * GLuint src_stride, + * GLuint count ) + */ + + +/* This is nothing more glamorous than an objdump of one of Josh's + * routines hacked to match the above. + */ +ALIGNTEXT16 +GLOBL GLNAME( gl_v16_x86_general_xform ) +GLNAME( gl_v16_x86_general_xform ): + + + PUSH_L ( EDI ) + PUSH_L ( ESI ) + + MOV_L ( REGOFF(12, ESP), EAX ) /* dest */ + MOV_L ( REGOFF(16, ESP), ESI ) /* mat */ + MOV_L ( REGOFF(20, ESP), EDX ) /* src */ + MOV_L ( REGOFF(24, ESP), EDI ) /* src_stride */ + MOV_L ( REGOFF(28, ESP), ECX ) /* count */ + + +LLBL(v16x86_loop): + FLD_S ( REGOFF( 0x0, EDX ) ) + FMUL_S ( REGOFF( 0x0, ESI ) ) + FLD_S ( REGOFF( 0x0, EDX ) ) + FMUL_S ( REGOFF( 0x4, ESI ) ) + FLD_S ( REGOFF( 0x0, EDX ) ) + FMUL_S ( REGOFF( 0x8, ESI ) ) + FLD_S ( REGOFF( 0x0, EDX ) ) + FMUL_S ( REGOFF( 0xc, ESI ) ) + FLD_S ( REGOFF( 0x4, EDX ) ) + FMUL_S ( REGOFF( 0x10, ESI ) ) + FLD_S ( REGOFF( 0x4, EDX ) ) + FMUL_S ( REGOFF( 0x14, ESI ) ) + FLD_S ( REGOFF( 0x4, EDX ) ) + FMUL_S ( REGOFF( 0x18, ESI ) ) + FLD_S ( REGOFF( 0x4, EDX ) ) + FMUL_S ( REGOFF( 0x1c, ESI ) ) + FXCH ( ST(3) ) + FADDP ( ST(0),ST(7) ) + FXCH ( ST(1) ) + FADDP ( ST(0),ST(5) ) + FADDP ( ST(0),ST(3) ) + FADDP ( ST(0),ST(1) ) + FLD_S ( REGOFF( 0x8, EDX ) ) + FMUL_S ( REGOFF( 0x20, ESI ) ) + FLD_S ( REGOFF( 0x8, EDX ) ) + FMUL_S ( REGOFF( 0x24, ESI ) ) + FLD_S ( REGOFF( 0x8, EDX ) ) + FMUL_S ( REGOFF( 0x28, ESI ) ) + FLD_S ( REGOFF( 0x8, EDX ) ) + FMUL_S ( REGOFF( 0x2c, ESI ) ) + FXCH ( ST(3) ) + FADDP ( ST(0),ST(7) ) + FXCH ( ST(1) ) + FADDP ( ST(0),ST(5) ) + FADDP ( ST(0),ST(3) ) + FADDP ( ST(0),ST(1) ) + FXCH ( ST(3) ) + FADD_S ( REGOFF( 0x30, ESI ) ) + FXCH ( ST(2) ) + FADD_S ( REGOFF( 0x34, ESI ) ) + FXCH ( ST(1) ) + FADD_S ( REGOFF( 0x38, ESI ) ) + FXCH ( ST(3) ) + FADD_S ( REGOFF( 0x3c, ESI ) ) + FXCH ( ST(2) ) + FSTP_S ( REGOFF( 0x0, EAX ) ) + FSTP_S ( REGOFF( 0x4, EAX ) ) + FXCH ( ST(1) ) + FSTP_S ( REGOFF( 0x8, EAX ) ) + FSTP_S ( REGOFF( 0xc, EAX ) ) + ADD_L ( CONST(64), EAX ) + ADD_L ( EDI, EDX ) + DEC_L ( ECX ) + JNE ( LLBL(v16x86_loop) ) + + POP_L ( ESI ) + POP_L ( EDI ) + RET + + +/* + * Table for clip test. + * + * bit6 = S(3) < 0 + * bit5 = S(2) < 0 + * bit4 = abs(S(2)) > abs(S(3)) + * bit3 = S(1) < 0 + * bit2 = abs(S(1)) > abs(S(3)) + * bit1 = S(0) < 0 + * bit0 = abs(S(0)) > abs(S(3)) + */ + + + SEG_DATA + +clip_table: + D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 + D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 + D_BYTE 32, 33, 32, 34, 36, 37, 36, 38 + D_BYTE 32, 33, 32, 34, 40, 41, 40, 42 + D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 + D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 + D_BYTE 16, 17, 16, 18, 20, 21, 20, 22 + D_BYTE 16, 17, 16, 18, 24, 25, 24, 26 + D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 + D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 + D_BYTE 47, 45, 47, 46, 39, 37, 39, 38 + D_BYTE 47, 45, 47, 46, 43, 41, 43, 42 + D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 + D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 + D_BYTE 31, 29, 31, 30, 23, 21, 23, 22 + D_BYTE 31, 29, 31, 30, 27, 25, 27, 26 + + SEG_TEXT + + +/* +######################################## +## +## gl_v16_x86_cliptest_points4 +## +## Performs cliptesting equivalent to that done by cliptest_v16() +## in vertices.c +## +## This is a hacked version of the original above. +## +######################################## +*/ + + + +#define OFFSET_V16_SOURCE 4 +#define OFFSET_V16_LAST 8 +#define OFFSET_V16_OR 12 +#define OFFSET_V16_AND 16 +#define OFFSET_V16_MASK 20 + +#define ARG_V16_SOURCE REGOFF(V16_FRAME_OFFSET+OFFSET_V16_SOURCE, ESP) +#define ARG_V16_LAST REGOFF(V16_FRAME_OFFSET+OFFSET_V16_LAST, ESP) +#define ARG_V16_OR REGOFF(V16_FRAME_OFFSET+OFFSET_V16_OR, ESP) +#define ARG_V16_AND REGOFF(V16_FRAME_OFFSET+OFFSET_V16_AND, ESP) +#define ARG_V16_MASK REGOFF(V16_FRAME_OFFSET+OFFSET_V16_MASK, ESP) + + +#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC) +#define ELFPIC +#endif + + GLOBL GLNAME(gl_v16_x86_cliptest_points4) + ALIGNTEXT4 + +GLNAME(gl_v16_x86_cliptest_points4): +#ifdef ELFPIC +#define V16_FRAME_OFFSET 20 +#else +#define V16_FRAME_OFFSET 16 +#endif + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBP ) + PUSH_L( EBX ) + +#ifdef ELFPIC + /* store pointer to clip_table on stack */ + CALL( LLBL(v16_ctp4_get_eip) ) + ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) + MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) + PUSH_L( EBX ) + JMP( LLBL(v16_ctp4_clip_table_ready) ) + +LLBL(v16_ctp4_get_eip): + /* store eip in ebx */ + MOV_L( REGIND(ESP), EBX ) + RET + +LLBL(v16_ctp4_clip_table_ready): +#endif + + MOV_L( ARG_V16_SOURCE, ESI ) /* ptr to first source vertex */ + MOV_L( ARG_V16_LAST, EDX ) /* ptr to last source vertex */ + MOV_L( ARG_V16_OR, EBX ) + MOV_L( ARG_V16_AND, EBP ) + MOV_L( ARG_V16_MASK, EDI ) + + CMP_L( EDX, ESI ) + + MOV_B( REGIND(EBX), AL ) + MOV_B( REGIND(EBP), AH ) + + JZ( LLBL(v16_ctp4_finish) ) + + ALIGNTEXT4ifNOP +LLBL(v16_ctp4_top): +#if 0 + FLD1 /* F0 */ + FDIV_S( S(3) ) +#endif + + MOV_L( S(3), EBP ) + MOV_L( S(2), EBX ) + + XOR_L( ECX, ECX ) + ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ + + ADC_L( ECX, ECX ) + ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + MOV_L( S(1), EBX ) + + ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + MOV_L( S(0), EBX ) + + ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ + + ADC_L( ECX, ECX ) + CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ + + ADC_L( ECX, ECX ) + +#ifdef ELFPIC + MOV_L( REGIND(ESP), EBP ) /* clip_table */ + + MOV_B( REGBI(EBP, ECX), CL ) +#else + MOV_B( REGOFF(clip_table,ECX), CL ) +#endif + + OR_B( CL, AL ) + AND_B( CL, AH ) + + MOV_B( CL, REGIND(EDI) ) /* save clipmask */ + INC_L( EDI ) /* next clipmask */ + +#if 0 + FSTP_S( S(8) ) /* */ /* GR_VERTEX_OOW_OFFSET */ +#endif + + ADD_L( CONST(64), ESI ) /* next fxVertex */ + + CMP_L( EDX, ESI ) /* finished? */ + JNZ( LLBL(v16_ctp4_top) ) + + MOV_L( ARG_V16_OR, ECX ) + MOV_L( ARG_V16_AND, EDX ) + + MOV_B( AL, REGIND(ECX) ) + MOV_B( AH, REGIND(EDX) ) + +LLBL(v16_ctp4_finish): + +#ifdef ELFPIC + POP_L( ESI ) /* discard ptr to clip_table */ +#endif + POP_L( EBX ) + POP_L( EBP ) + POP_L( EDI ) + POP_L( ESI ) + + RET diff --git a/xc/extras/Mesa/src/X86/x86_xform_masked2.S b/xc/extras/Mesa/src/X86/x86_xform_masked2.S new file mode 100644 index 000000000..1b8240bfb --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_xform_masked2.S @@ -0,0 +1,610 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT + +#define FRAME_OFFSET 16 + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_general_masked ) +GLNAME( gl_x86_transform_points2_general_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_gm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_gm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p2_gm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + FLD_S( SRC(0) ) /* F7 F6 F5 F4 */ + FMUL_S( MAT(3) ) + + FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(6) ) + FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(7) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FXCH( ST(3) ) /* F4 F6 F5 F7 */ + FADD_S( MAT(12) ) + FXCH( ST(2) ) /* F5 F6 F4 F7 */ + FADD_S( MAT(13) ) + FXCH( ST(1) ) /* F6 F5 F4 F7 */ + FADD_S( MAT(14) ) + FXCH( ST(3) ) /* F7 F5 F4 F6 */ + FADD_S( MAT(15) ) + + FXCH( ST(2) ) /* F4 F5 F7 F6 */ + FSTP_S( DST(0) ) /* F5 F7 F6 */ + FSTP_S( DST(1) ) /* F7 F6 */ + FXCH( ST(1) ) /* F6 F7 */ + FSTP_S( DST(2) ) /* F7 */ + FSTP_S( DST(3) ) /* */ + +LLBL( x86_p2_gm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_gm_loop ) ) + +LLBL( x86_p2_gm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_perspective_masked ) +GLNAME( gl_x86_transform_points2_perspective_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_pm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need eax; put stride in ARG_SOURCE */ + MOV_L( EAX, ARG_SOURCE ) + + MOV_L( MAT(14), EAX ) + +ALIGNTEXT16 +LLBL( x86_p2_pm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p2_pm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FSTP_S( DST(0) ) /* F1 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EAX, DST(2) ) + MOV_L( CONST(FP_ZERO), DST(3) ) + +LLBL( x86_p2_pm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_pm_loop ) ) + +LLBL( x86_p2_pm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_3d_masked ) +GLNAME( gl_x86_transform_points2_3d_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_3dm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_3dm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p2_3dm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + + FLD_S( SRC(1) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(6) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 F6 */ + FADD_S( MAT(13) ) + FXCH( ST(2) ) /* F6 F4 F5 */ + FADD_S( MAT(14) ) + + FXCH( ST(1) ) /* F4 F6 F5 */ + FSTP_S( DST(0) ) /* F6 F5 */ + FXCH( ST(1) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + +LLBL( x86_p2_3dm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_3dm_loop ) ) + +LLBL( x86_p2_3dm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_3d_no_rot_masked ) +GLNAME( gl_x86_transform_points2_3d_no_rot_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_3dnrm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need eax; put stride in ARG_SOURCE */ + MOV_L( EAX, ARG_SOURCE ) + + MOV_L( MAT(14), EAX ) + +ALIGNTEXT16 +LLBL( x86_p2_3dnrm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p2_3dnrm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 */ + FXCH( ST(2) ) /* F1 F4 F5 */ + FADDP( ST(0), ST(2) ) /* F4 F5 */ + + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EAX, DST(2) ) + +LLBL( x86_p2_3dnrm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_3dnrm_loop ) ) + +LLBL( x86_p2_3dnrm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_2d_masked ) +GLNAME( gl_x86_transform_points2_2d_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_2dm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_2dm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p2_2dm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + + FLD_S( SRC(1) ) /* F0 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + FXCH( ST(1) ) /* F4 F5 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 */ + FADD_S( MAT(13) ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + +LLBL( x86_p2_2dm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_2dm_loop ) ) + +LLBL( x86_p2_2dm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_2d_no_rot_masked ) +GLNAME( gl_x86_transform_points2_2d_no_rot_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_2dnrm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_2dnrm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p2_2dnrm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 */ + FXCH( ST(2) ) /* F1 F4 F5 */ + FADDP( ST(0), ST(2) ) /* F4 F5 */ + + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + +LLBL( x86_p2_2dnrm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_2dnrm_loop ) ) + +LLBL( x86_p2_2dnrm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_identity_masked ) +GLNAME( gl_x86_transform_points2_identity_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_im_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need eax; put stride in ARG_SOURCE */ + MOV_L( EAX, ARG_SOURCE ) + + CMP_L( ESI, EDI ) + JE( LLBL( x86_p2_im_done ) ) + +ALIGNTEXT16 +LLBL( x86_p2_im_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p2_im_skip ) ) + + MOV_L( SRC(0), EAX ) + MOV_L( SRC(1), EDX ) + + MOV_L( EAX, DST(0) ) + MOV_L( EDX, DST(1) ) + +LLBL( x86_p2_im_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_im_loop ) ) + +LLBL( x86_p2_im_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET diff --git a/xc/extras/Mesa/src/X86/x86_xform_masked3.S b/xc/extras/Mesa/src/X86/x86_xform_masked3.S new file mode 100644 index 000000000..54bed7e36 --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_xform_masked3.S @@ -0,0 +1,670 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT + +#define FRAME_OFFSET 16 + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_general_masked ) +GLNAME( gl_x86_transform_points3_general_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_gm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_gm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p3_gm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + FLD_S( SRC(0) ) /* F7 F6 F5 F4 */ + FMUL_S( MAT(3) ) + + FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(6) ) + FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(7) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(10) ) + FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(11) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FXCH( ST(3) ) /* F4 F6 F5 F7 */ + FADD_S( MAT(12) ) + FXCH( ST(2) ) /* F5 F6 F4 F7 */ + FADD_S( MAT(13) ) + FXCH( ST(1) ) /* F6 F5 F4 F7 */ + FADD_S( MAT(14) ) + FXCH( ST(3) ) /* F7 F5 F4 F6 */ + FADD_S( MAT(15) ) + + FXCH( ST(2) ) /* F4 F5 F7 F6 */ + FSTP_S( DST(0) ) /* F5 F7 F6 */ + FSTP_S( DST(1) ) /* F7 F6 */ + FXCH( ST(1) ) /* F6 F7 */ + FSTP_S( DST(2) ) /* F7 */ + FSTP_S( DST(3) ) /* */ + +LLBL( x86_p3_gm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_gm_loop ) ) + +LLBL( x86_p3_gm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_perspective_masked ) +GLNAME( gl_x86_transform_points3_perspective_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_pm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need eax; put stride in ARG_SOURCE */ + MOV_L( EAX, ARG_SOURCE ) + +ALIGNTEXT16 +LLBL( x86_p3_pm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p3_pm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F0 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ + FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */ + FADDP( ST(0), ST(2) ) /* F2 F5 F4 */ + FLD_S( MAT(14) ) /* F6 F2 F5 F4 */ + FXCH( ST(1) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(2), EAX ) + XOR_L( CONST(-2147483648), EAX )/* change sign */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EAX, DST(3) ) + +LLBL( x86_p3_pm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_pm_loop ) ) + +LLBL( x86_p3_pm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_3d_masked ) +GLNAME( gl_x86_transform_points3_3d_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_3dm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_3dm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p3_3dm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + + FLD_S( SRC(1) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(6) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 F6 */ + FADD_S( MAT(13) ) + FXCH( ST(2) ) /* F6 F4 F5 */ + FADD_S( MAT(14) ) + + FXCH( ST(1) ) /* F4 F6 F5 */ + FSTP_S( DST(0) ) /* F6 F5 */ + FXCH( ST(1) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + +LLBL( x86_p3_3dm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_3dm_loop ) ) + +LLBL( x86_p3_3dm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_3d_no_rot_masked ) +GLNAME( gl_x86_transform_points3_3d_no_rot_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_3dnrm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_3dnrm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p3_3dnrm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F2 F1 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F4 F1 F2 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 F2 */ + FXCH( ST(2) ) /* F1 F4 F5 F2 */ + FADDP( ST(0), ST(2) ) /* F4 F5 F2 */ + FLD_S( MAT(14) ) /* F6 F4 F5 F2 */ + FXCH( ST(3) ) /* F2 F4 F5 F6 */ + FADDP( ST(0), ST(3) ) /* F4 F5 F6 */ + + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + +LLBL( x86_p3_3dnrm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_3dnrm_loop ) ) + +LLBL( x86_p3_3dnrm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_2d_masked ) +GLNAME( gl_x86_transform_points3_2d_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_2dm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need eax; put stride in ARG_SOURCE */ + MOV_L( EAX, ARG_SOURCE ) + +ALIGNTEXT16 +LLBL( x86_p3_2dm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p3_2dm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + + FLD_S( SRC(1) ) /* F0 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + FXCH( ST(1) ) /* F4 F5 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 */ + FADD_S( MAT(13) ) + + MOV_L( SRC(2), EAX ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EAX, DST(2) ) + +LLBL( x86_p3_2dm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_2dm_loop ) ) + +LLBL( x86_p3_2dm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT4 +GLOBL GLNAME( gl_x86_transform_points3_2d_no_rot_masked ) +GLNAME( gl_x86_transform_points3_2d_no_rot_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_2dnrm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need eax; put stride in ARG_SOURCE */ + MOV_L( EAX, ARG_SOURCE ) + +ALIGNTEXT16 +LLBL( x86_p3_2dnrm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p3_2dnrm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 */ + + FXCH( ST(2) ) /* F1 F4 F5 */ + FADDP( ST(0), ST(2) ) /* F4 F5 */ + + MOV_L( SRC(2), EAX ) + + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EAX, DST(2) ) + +LLBL( x86_p3_2dnrm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_2dnrm_loop ) ) + +LLBL( x86_p3_2dnrm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_identity_masked ) +GLNAME( gl_x86_transform_points3_identity_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_im_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need ecx; put dest+count in ARG_DEST */ + MOV_L( EAX, ARG_SOURCE ) + MOV_L( ECX, ARG_DEST ) + + CMP_L( ESI, EDI ) + JE( LLBL( x86_p3_im_done ) ) + +ALIGNTEXT16 +LLBL( x86_p3_im_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p3_im_skip ) ) + + MOV_L( SRC(0), EAX ) + MOV_L( SRC(1), ECX ) + MOV_L( SRC(2), EDX ) + + MOV_L( EAX, DST(0) ) + MOV_L( ECX, DST(1) ) + MOV_L( EDX, DST(2) ) + +LLBL( x86_p3_im_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ARG_DEST, EDI ) + JNE( LLBL( x86_p3_im_loop ) ) + +LLBL( x86_p3_im_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET diff --git a/xc/extras/Mesa/src/X86/x86_xform_masked4.S b/xc/extras/Mesa/src/X86/x86_xform_masked4.S new file mode 100644 index 000000000..3c8755dd8 --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_xform_masked4.S @@ -0,0 +1,710 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT + +#define FRAME_OFFSET 16 + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_general_masked ) +GLNAME( gl_x86_transform_points4_general_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_gm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p4_gm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p4_gm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + FLD_S( SRC(0) ) /* F7 F6 F5 F4 */ + FMUL_S( MAT(3) ) + + FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(6) ) + FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(7) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(10) ) + FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(11) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FLD_S( SRC(3) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(13) ) + FLD_S( SRC(3) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(14) ) + FLD_S( SRC(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(15) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FXCH( ST(3) ) /* F4 F6 F5 F7 */ + FSTP_S( DST(0) ) /* F6 F5 F7 */ + FXCH( ST(1) ) /* F5 F6 F7 */ + FSTP_S( DST(1) ) /* F6 F7 */ + FSTP_S( DST(2) ) /* F7 */ + FSTP_S( DST(3) ) /* */ + +LLBL( x86_p4_gm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_gm_loop ) ) + +LLBL( x86_p4_gm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_perspective_masked ) +GLNAME( gl_x86_transform_points4_perspective_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_pm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ + +ALIGNTEXT16 +LLBL( x86_p4_pm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p4_pm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F0 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F6 F1 F0 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ + FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */ + FADDP( ST(0), ST(2) ) /* F6 F5 F4 */ + + FLD_S( SRC(3) ) /* F2 F6 F5 F4 */ + FMUL_S( MAT(14) ) + + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(2), EAX ) + XOR_L( CONST(-2147483648), EAX )/* change sign */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EAX, DST(3) ) + +LLBL( x86_p4_pm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_pm_loop ) ) + +LLBL( x86_p4_pm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_3d_masked ) +GLNAME( gl_x86_transform_points4_3d_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_3dm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ + +ALIGNTEXT16 +LLBL( x86_p4_3dm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p4_3dm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + + FLD_S( SRC(1) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(6) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FLD_S( SRC(3) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(13) ) + FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(14) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(3), EAX ) + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EAX, DST(3) ) + +LLBL( x86_p4_3dm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_3dm_loop ) ) + +LLBL( x86_p4_3dm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_3d_no_rot_masked ) +GLNAME( gl_x86_transform_points4_3d_no_rot_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_3dnrm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ + +ALIGNTEXT16 +LLBL( x86_p4_3dnrm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p4_3dnrm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F6 F5 F4 */ + FMUL_S( MAT(10) ) + + FLD_S( SRC(3) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(13) ) + FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(14) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(3), EAX ) + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EAX, DST(3) ) + +LLBL( x86_p4_3dnrm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_3dnrm_loop ) ) + +LLBL( x86_p4_3dnrm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_2d_masked ) +GLNAME( gl_x86_transform_points4_2d_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_2dm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ + MOV_L( ECX, ARG_DEST ) /* need ecx; put dest+count in ARG_DEST */ + +ALIGNTEXT16 +LLBL( x86_p4_2dm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p4_2dm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + + FLD_S( SRC(1) ) /* F0 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + FLD_S( SRC(3) ) /* F0 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(13) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + MOV_L( SRC(2), EAX ) + MOV_L( SRC(3), ECX ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EAX, DST(2) ) + MOV_L( ECX, DST(3) ) + +LLBL( x86_p4_2dm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ARG_DEST, EDI ) + JNE( LLBL( x86_p4_2dm_loop ) ) + +LLBL( x86_p4_2dm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_2d_no_rot_masked ) +GLNAME( gl_x86_transform_points4_2d_no_rot_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_2dnrm_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ + MOV_L( ECX, ARG_DEST ) /* need ecx; put dest+count in ARG_DEST */ + +ALIGNTEXT16 +LLBL( x86_p4_2dnrm_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p4_2dnrm_skip ) ) + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(3) ) /* F0 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(13) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + MOV_L( SRC(2), EAX ) + MOV_L( SRC(3), ECX ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EAX, DST(2) ) + MOV_L( ECX, DST(3) ) + +LLBL( x86_p4_2dnrm_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ARG_DEST, EDI ) + JNE( LLBL( x86_p4_2dnrm_loop ) ) + +LLBL( x86_p4_2dnrm_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_identity_masked ) +GLNAME( gl_x86_transform_points4_identity_masked ): + + PUSH_L( ESI ) + PUSH_L( EDI ) + + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( ARG_CLIP, EBP ) + + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + MOV_B( ARG_FLAG, BL ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_im_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + /* need eax; put stride in ARG_SOURCE */ + MOV_L( EAX, ARG_SOURCE ) + + CMP_L( ESI, EDI ) + JE( LLBL( x86_p4_im_done ) ) + +ALIGNTEXT16 +LLBL( x86_p4_im_loop ): + + TEST_B( BL, REGIND(EBP) ) + JNZ( LLBL( x86_p4_im_skip ) ) + + MOV_L( SRC(0), EAX ) + MOV_L( SRC(1), EDX ) + + MOV_L( EAX, DST(0) ) + MOV_L( EDX, DST(1) ) + + MOV_L( SRC(2), EAX ) + MOV_L( SRC(3), EDX ) + + MOV_L( EAX, DST(2) ) + MOV_L( EDX, DST(3) ) + +LLBL( x86_p4_im_skip ): + + INC_L( EBP ) + ADD_L( CONST(16), EDI ) + ADD_L( ARG_SOURCE, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_im_loop ) ) + +LLBL( x86_p4_im_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET diff --git a/xc/extras/Mesa/src/X86/x86_xform_raw2.S b/xc/extras/Mesa/src/X86/x86_xform_raw2.S new file mode 100644 index 000000000..fd6b59617 --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_xform_raw2.S @@ -0,0 +1,535 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_general_raw ) +GLNAME( gl_x86_transform_points2_general_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_gr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_gr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + FLD_S( SRC(0) ) /* F7 F6 F5 F4 */ + FMUL_S( MAT(3) ) + + FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(6) ) + FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(7) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FXCH( ST(3) ) /* F4 F6 F5 F7 */ + FADD_S( MAT(12) ) + FXCH( ST(2) ) /* F5 F6 F4 F7 */ + FADD_S( MAT(13) ) + FXCH( ST(1) ) /* F6 F5 F4 F7 */ + FADD_S( MAT(14) ) + FXCH( ST(3) ) /* F7 F5 F4 F6 */ + FADD_S( MAT(15) ) + + FXCH( ST(2) ) /* F4 F5 F7 F6 */ + FSTP_S( DST(0) ) /* F5 F7 F6 */ + FSTP_S( DST(1) ) /* F7 F6 */ + FXCH( ST(1) ) /* F6 F7 */ + FSTP_S( DST(2) ) /* F7 */ + FSTP_S( DST(3) ) /* */ + +LLBL( x86_p2_gr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_gr_loop ) ) + +LLBL( x86_p2_gr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_perspective_raw ) +GLNAME( gl_x86_transform_points2_perspective_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_pr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + MOV_L( MAT(14), EBX ) + +ALIGNTEXT16 +LLBL( x86_p2_pr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FSTP_S( DST(0) ) /* F1 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EBX, DST(2) ) + MOV_L( CONST(FP_ZERO), DST(3) ) + +LLBL( x86_p2_pr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_pr_loop ) ) + +LLBL( x86_p2_pr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_3d_raw ) +GLNAME( gl_x86_transform_points2_3d_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_3dr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_3dr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + + FLD_S( SRC(1) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(6) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 F6 */ + FADD_S( MAT(13) ) + FXCH( ST(2) ) /* F6 F4 F5 */ + FADD_S( MAT(14) ) + + FXCH( ST(1) ) /* F4 F6 F5 */ + FSTP_S( DST(0) ) /* F6 F5 */ + FXCH( ST(1) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + +LLBL( x86_p2_3dr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_3dr_loop ) ) + +LLBL( x86_p2_3dr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_3d_no_rot_raw ) +GLNAME( gl_x86_transform_points2_3d_no_rot_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_3dnrr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + MOV_L( MAT(14), EBX ) + +ALIGNTEXT16 +LLBL( x86_p2_3dnrr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 */ + FXCH( ST(2) ) /* F1 F4 F5 */ + FADDP( ST(0), ST(2) ) /* F4 F5 */ + + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EBX, DST(2) ) + +LLBL( x86_p2_3dnrr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_3dnrr_loop ) ) + +LLBL( x86_p2_3dnrr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_2d_raw ) +GLNAME( gl_x86_transform_points2_2d_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_2dr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_2dr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + + FLD_S( SRC(1) ) /* F0 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + FXCH( ST(1) ) /* F4 F5 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 */ + FADD_S( MAT(13) ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + +LLBL( x86_p2_2dr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_2dr_loop ) ) + +LLBL( x86_p2_2dr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT4 +GLOBL GLNAME( gl_x86_transform_points2_2d_no_rot_raw ) +GLNAME( gl_x86_transform_points2_2d_no_rot_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_2dnrr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p2_2dnrr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 */ + FXCH( ST(2) ) /* F1 F4 F5 */ + FADDP( ST(0), ST(2) ) /* F4 F5 */ + + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + +LLBL( x86_p2_2dnrr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_2dnrr_loop ) ) + +LLBL( x86_p2_2dnrr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points2_identity_raw ) +GLNAME( gl_x86_transform_points2_identity_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p2_ir_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + CMP_L( ESI, EDI ) + JE( LLBL( x86_p2_ir_done ) ) + +ALIGNTEXT16 +LLBL( x86_p2_ir_loop ): + + MOV_L( SRC(0), EBX ) + MOV_L( SRC(1), EDX ) + + MOV_L( EBX, DST(0) ) + MOV_L( EDX, DST(1) ) + +LLBL( x86_p2_ir_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p2_ir_loop ) ) + +LLBL( x86_p2_ir_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET diff --git a/xc/extras/Mesa/src/X86/x86_xform_raw3.S b/xc/extras/Mesa/src/X86/x86_xform_raw3.S new file mode 100644 index 000000000..b32afee90 --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_xform_raw3.S @@ -0,0 +1,605 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_general_raw ) +GLNAME( gl_x86_transform_points3_general_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_gr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_gr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + FLD_S( SRC(0) ) /* F7 F6 F5 F4 */ + FMUL_S( MAT(3) ) + + FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(6) ) + FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(7) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(10) ) + FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(11) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FXCH( ST(3) ) /* F4 F6 F5 F7 */ + FADD_S( MAT(12) ) + FXCH( ST(2) ) /* F5 F6 F4 F7 */ + FADD_S( MAT(13) ) + FXCH( ST(1) ) /* F6 F5 F4 F7 */ + FADD_S( MAT(14) ) + FXCH( ST(3) ) /* F7 F5 F4 F6 */ + FADD_S( MAT(15) ) + + FXCH( ST(2) ) /* F4 F5 F7 F6 */ + FSTP_S( DST(0) ) /* F5 F7 F6 */ + FSTP_S( DST(1) ) /* F7 F6 */ + FXCH( ST(1) ) /* F6 F7 */ + FSTP_S( DST(2) ) /* F7 */ + FSTP_S( DST(3) ) /* */ + +LLBL( x86_p3_gr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_gr_loop ) ) + +LLBL( x86_p3_gr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_perspective_raw ) +GLNAME( gl_x86_transform_points3_perspective_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_pr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_pr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F0 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ + FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */ + FADDP( ST(0), ST(2) ) /* F2 F5 F4 */ + FLD_S( MAT(14) ) /* F6 F2 F5 F4 */ + FXCH( ST(1) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(2), EBX ) + XOR_L( CONST(-2147483648), EBX )/* change sign */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EBX, DST(3) ) + +LLBL( x86_p3_pr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_pr_loop ) ) + +LLBL( x86_p3_pr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_3d_raw ) +GLNAME( gl_x86_transform_points3_3d_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_3dr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_3dr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + + FLD_S( SRC(1) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(6) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 F6 */ + FADD_S( MAT(13) ) + FXCH( ST(2) ) /* F6 F4 F5 */ + FADD_S( MAT(14) ) + + FXCH( ST(1) ) /* F4 F6 F5 */ + FSTP_S( DST(0) ) /* F6 F5 */ + FXCH( ST(1) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + +LLBL( x86_p3_3dr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_3dr_loop ) ) + +LLBL( x86_p3_3dr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_3d_no_rot_raw ) +GLNAME( gl_x86_transform_points3_3d_no_rot_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_3dnrr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_3dnrr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F2 F1 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F4 F1 F2 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 F2 */ + FXCH( ST(2) ) /* F1 F4 F5 F2 */ + FADDP( ST(0), ST(2) ) /* F4 F5 F2 */ + FLD_S( MAT(14) ) /* F6 F4 F5 F2 */ + FXCH( ST(3) ) /* F2 F4 F5 F6 */ + FADDP( ST(0), ST(3) ) /* F4 F5 F6 */ + + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + +LLBL( x86_p3_3dnrr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_3dnrr_loop ) ) + +LLBL( x86_p3_3dnrr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_2d_raw ) +GLNAME( gl_x86_transform_points3_2d_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_2dr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_2dr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + + FLD_S( SRC(1) ) /* F0 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + FXCH( ST(1) ) /* F4 F5 */ + FADD_S( MAT(12) ) + FXCH( ST(1) ) /* F5 F4 */ + FADD_S( MAT(13) ) + + MOV_L( SRC(2), EBX ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EBX, DST(2) ) + +LLBL( x86_p3_2dr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_2dr_loop ) ) + +LLBL( x86_p3_2dr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_2d_no_rot_raw ) +GLNAME( gl_x86_transform_points3_2d_no_rot_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_2dnrr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p3_2dnrr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F1 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F4 F1 */ + FADD_S( MAT(12) ) + FLD_S( MAT(13) ) /* F5 F4 F1 */ + + FXCH( ST(2) ) /* F1 F4 F5 */ + FADDP( ST(0), ST(2) ) /* F4 F5 */ + + MOV_L( SRC(2), EBX ) + + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EBX, DST(2) ) + +LLBL( x86_p3_2dnrr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_2dnrr_loop ) ) + +LLBL( x86_p3_2dnrr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points3_identity_raw ) +GLNAME(gl_x86_transform_points3_identity_raw ): + +#define FRAME_OFFSET 16 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p3_ir_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + CMP_L( ESI, EDI ) + JE( LLBL( x86_p3_ir_done ) ) + +ALIGNTEXT16 +LLBL( x86_p3_ir_loop ): + +#if 1 + MOV_L( SRC(0), EBX ) + MOV_L( SRC(1), EBP ) + MOV_L( SRC(2), EDX ) + + MOV_L( EBX, DST(0) ) + MOV_L( EBP, DST(1) ) + MOV_L( EDX, DST(2) ) +#else + FLD_S( SRC(0) ) + FLD_S( SRC(1) ) + FLD_S( SRC(2) ) + + FSTP_S( DST(2) ) + FSTP_S( DST(1) ) + FSTP_S( DST(0) ) +#endif + +LLBL( x86_p3_ir_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p3_ir_loop ) ) + +LLBL( x86_p3_ir_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET diff --git a/xc/extras/Mesa/src/X86/x86_xform_raw4.S b/xc/extras/Mesa/src/X86/x86_xform_raw4.S new file mode 100644 index 000000000..29258acbd --- /dev/null +++ b/xc/extras/Mesa/src/X86/x86_xform_raw4.S @@ -0,0 +1,638 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "assyntax.h" +#include "xform_args.h" + + SEG_TEXT + +#define FP_ONE 1065353216 +#define FP_ZERO 0 + +#define SRC(i) REGOFF(i * 4, ESI) +#define DST(i) REGOFF(i * 4, EDI) +#define MAT(i) REGOFF(i * 4, EDX) + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_general_raw ) +GLNAME( gl_x86_transform_points4_general_raw ): + +#define FRAME_OFFSET 8 + PUSH_L( ESI ) + PUSH_L( EDI ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_gr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p4_gr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + FLD_S( SRC(0) ) /* F7 F6 F5 F4 */ + FMUL_S( MAT(3) ) + + FLD_S( SRC(1) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(6) ) + FLD_S( SRC(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(7) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(10) ) + FLD_S( SRC(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(11) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FLD_S( SRC(3) ) /* F0 F7 F6 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(13) ) + FLD_S( SRC(3) ) /* F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(14) ) + FLD_S( SRC(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ + FMUL_S( MAT(15) ) + + FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ + FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ + + FXCH( ST(3) ) /* F4 F6 F5 F7 */ + FSTP_S( DST(0) ) /* F6 F5 F7 */ + FXCH( ST(1) ) /* F5 F6 F7 */ + FSTP_S( DST(1) ) /* F6 F7 */ + FSTP_S( DST(2) ) /* F7 */ + FSTP_S( DST(3) ) /* */ + +LLBL( x86_p4_gr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_gr_loop ) ) + +LLBL( x86_p4_gr_done ): + + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_perspective_raw ) +GLNAME( gl_x86_transform_points4_perspective_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_pr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p4_pr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F0 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F6 F1 F0 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ + FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */ + FADDP( ST(0), ST(2) ) /* F6 F5 F4 */ + + FLD_S( SRC(3) ) /* F2 F6 F5 F4 */ + FMUL_S( MAT(14) ) + + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(2), EBX ) + XOR_L( CONST(-2147483648), EBX )/* change sign */ + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EBX, DST(3) ) + +LLBL( x86_p4_pr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_pr_loop ) ) + +LLBL( x86_p4_pr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_3d_raw ) +GLNAME( gl_x86_transform_points4_3d_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_3dr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p4_3dr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + FLD_S( SRC(0) ) /* F6 F5 F4 */ + FMUL_S( MAT(2) ) + + FLD_S( SRC(1) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(5) ) + FLD_S( SRC(1) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(6) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FLD_S( SRC(2) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(8) ) + FLD_S( SRC(2) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(9) ) + FLD_S( SRC(2) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(10) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + FLD_S( SRC(3) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(13) ) + FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(14) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(3), EBX ) + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EBX, DST(3) ) + +LLBL( x86_p4_3dr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_3dr_loop ) ) + +LLBL( x86_p4_3dr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME(gl_x86_transform_points4_3d_no_rot_raw) +GLNAME(gl_x86_transform_points4_3d_no_rot_raw): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_3dnrr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p4_3dnrr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(2) ) /* F6 F5 F4 */ + FMUL_S( MAT(10) ) + + FLD_S( SRC(3) ) /* F0 F6 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F6 F5 F4 */ + FMUL_S( MAT(13) ) + FLD_S( SRC(3) ) /* F2 F1 F0 F6 F5 F4 */ + FMUL_S( MAT(14) ) + + FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ + + MOV_L( SRC(3), EBX ) + + FXCH( ST(2) ) /* F4 F5 F6 */ + FSTP_S( DST(0) ) /* F5 F6 */ + FSTP_S( DST(1) ) /* F6 */ + FSTP_S( DST(2) ) /* */ + MOV_L( EBX, DST(3) ) + +LLBL( x86_p4_3dnrr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_3dnrr_loop ) ) + +LLBL( x86_p4_3dnrr_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_2d_raw ) +GLNAME( gl_x86_transform_points4_2d_raw ): + +#define FRAME_OFFSET 16 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_2dr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p4_2dr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + FLD_S( SRC(0) ) /* F5 F4 */ + FMUL_S( MAT(1) ) + + FLD_S( SRC(1) ) /* F0 F5 F4 */ + FMUL_S( MAT(4) ) + FLD_S( SRC(1) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(5) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + FLD_S( SRC(3) ) /* F0 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(13) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + MOV_L( SRC(2), EBX ) + MOV_L( SRC(3), EBP ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EBX, DST(2) ) + MOV_L( EBP, DST(3) ) + +LLBL( x86_p4_2dr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_2dr_loop ) ) + +LLBL( x86_p4_2dr_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_2d_no_rot_raw ) +GLNAME( gl_x86_transform_points4_2d_no_rot_raw ): + +#define FRAME_OFFSET 16 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + PUSH_L( EBP ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_2dnrr_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + +ALIGNTEXT16 +LLBL( x86_p4_2dnrr_loop ): + + FLD_S( SRC(0) ) /* F4 */ + FMUL_S( MAT(0) ) + + FLD_S( SRC(1) ) /* F5 F4 */ + FMUL_S( MAT(5) ) + + FLD_S( SRC(3) ) /* F0 F5 F4 */ + FMUL_S( MAT(12) ) + FLD_S( SRC(3) ) /* F1 F0 F5 F4 */ + FMUL_S( MAT(13) ) + + FXCH( ST(1) ) /* F0 F1 F5 F4 */ + FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ + FADDP( ST(0), ST(1) ) /* F5 F4 */ + + MOV_L( SRC(2), EBX ) + MOV_L( SRC(3), EBP ) + + FXCH( ST(1) ) /* F4 F5 */ + FSTP_S( DST(0) ) /* F5 */ + FSTP_S( DST(1) ) /* */ + MOV_L( EBX, DST(2) ) + MOV_L( EBP, DST(3) ) + +LLBL( x86_p4_2dnrr_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_2dnrr_loop ) ) + +LLBL( x86_p4_2dnrr_done ): + + POP_L( EBP ) + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET +#undef FRAME_OFFSET + + + + +ALIGNTEXT16 +GLOBL GLNAME( gl_x86_transform_points4_identity_raw ) +GLNAME( gl_x86_transform_points4_identity_raw ): + +#define FRAME_OFFSET 12 + PUSH_L( ESI ) + PUSH_L( EDI ) + PUSH_L( EBX ) + + MOV_L( ARG_SOURCE, ESI ) + MOV_L( ARG_DEST, EDI ) + + MOV_L( ARG_MATRIX, EDX ) + MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) + + TEST_L( ECX, ECX ) + JZ( LLBL( x86_p4_ir_done ) ) + + MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) + OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) + + MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) + MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) + + SHL_L( CONST(4), ECX ) + MOV_L( REGOFF(V4F_START, ESI), ESI ) + + MOV_L( REGOFF(V4F_START, EDI), EDI ) + ADD_L( EDI, ECX ) + + CMP_L( ESI, EDI ) + JE( LLBL( x86_p4_ir_done ) ) + +ALIGNTEXT16 +LLBL( x86_p4_ir_loop ): + + MOV_L( SRC(0), EBX ) + MOV_L( SRC(1), EDX ) + + MOV_L( EBX, DST(0) ) + MOV_L( EDX, DST(1) ) + + MOV_L( SRC(2), EBX ) + MOV_L( SRC(3), EDX ) + + MOV_L( EBX, DST(2) ) + MOV_L( EDX, DST(3) ) + +LLBL( x86_p4_ir_skip ): + + ADD_L( CONST(16), EDI ) + ADD_L( EAX, ESI ) + CMP_L( ECX, EDI ) + JNE( LLBL( x86_p4_ir_loop ) ) + +LLBL( x86_p4_ir_done ): + + POP_L( EBX ) + POP_L( EDI ) + POP_L( ESI ) + RET diff --git a/xc/extras/Mesa/src/X86/x86a.S b/xc/extras/Mesa/src/X86/x86a.S deleted file mode 100644 index ac12457ee..000000000 --- a/xc/extras/Mesa/src/X86/x86a.S +++ /dev/null @@ -1,9081 +0,0 @@ -/* $XFree86: xc/extras/Mesa/src/X86/x86a.S,v 1.4 2000/09/26 15:56:40 tsi Exp $ */ - -#include "assyntax.h" - - SEG_TEXT - -#define FP_ONE 1065353216 -#define FP_ZERO 0 - -#define S(i) REGOFF(i * 4, ESI) -#define D(i) REGOFF(i * 4, EDI) -#define M(i) REGOFF(i * 4, EDX) - -/* - * Offsets into GLvector4f - */ -#define V4F_DATA 0 -#define V4F_START 4 -#define V4F_COUNT 8 -#define V4F_STRIDE 12 -#define V4F_SIZE 16 -#define V4F_FLAGS 20 - -#define VEC_SIZE_1 1 -#define VEC_SIZE_2 3 -#define VEC_SIZE_3 7 -#define VEC_SIZE_4 15 - -/* - * Offsets for transform_func arguments - * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLfloat m[16], - * const GLvector4f *from_vec, - * const GLubyte *clipmask, - * const GLubyte flag ); - */ -#define OFFSET_DEST 4 -#define OFFSET_MATRIX 8 -#define OFFSET_SOURCE 12 -#define OFFSET_CLIP 16 -#define OFFSET_FLAG 20 - -#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) -#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) -#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) -#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) -#define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP) - -/* -######################################## -## -## Masked versions -## -######################################## -*/ - -/* -######################################## -## -## gl_x86_transform_points2_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_general_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_general_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p2mgm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p2mgm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p2mgm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p2mgm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2mgm_top) ) - - -LLBL(p2mgm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_identity_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_identity_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p2mim_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p2mim_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p2mim_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p2mim_skip) ) - - MOV_L( S(0), EAX ) - MOV_L( S(1), EDX ) - - MOV_L( EAX, D(0) ) - MOV_L( EDX, D(1) ) -LLBL(p2mim_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2mim_top) ) - - -LLBL(p2mim_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p2m2dm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p2m2dm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p2m2dm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - -LLBL(p2m2dm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m2dm_top) ) - - -LLBL(p2m2dm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_no_rot_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_no_rot_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p2m2dnrm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p2m2dnrm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p2m2dnrm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - -LLBL(p2m2dnrm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m2dnrm_top) ) - - -LLBL(p2m2dnrm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p2m3dm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p2m3dm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p2m3dm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - -LLBL(p2m3dm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m3dm_top) ) - - -LLBL(p2m3dm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_no_rot_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_no_rot_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p2m3dnrm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - MOV_L( M(14), EAX ) - ALIGNTEXT4ifNOP -LLBL(p2m3dnrm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p2m3dnrm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EAX, D(2) ) - -LLBL(p2m3dnrm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m3dnrm_top) ) - - -LLBL(p2m3dnrm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_perspective_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_perspective_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p2mpm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - MOV_L( M(14), EAX ) - ALIGNTEXT4ifNOP -LLBL(p2mpm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p2mpm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FSTP_S( D(0) ) /* F1 */ - FSTP_S( D(1) ) /* */ - MOV_L( EAX, D(2) ) - MOV_L( CONST(FP_ZERO), D(3) ) - -LLBL(p2mpm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2mpm_top) ) - - -LLBL(p2mpm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_general_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_general_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p3mgm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p3mgm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p3mgm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p3mgm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3mgm_top) ) - - -LLBL(p3mgm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_identity_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_identity_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p3mim_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - MOV_L( ECX, ARG_DEST ) /* need ecx; put dest+count in ARG_DEST */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p3mim_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p3mim_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p3mim_skip) ) - - MOV_L( S(0), EAX ) - MOV_L( S(1), ECX ) - MOV_L( S(2), EDX ) - - MOV_L( EAX, D(0) ) - MOV_L( ECX, D(1) ) - MOV_L( EDX, D(2) ) -LLBL(p3mim_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ARG_DEST, EDI ) - JNE( LLBL(p3mim_top) ) - - -LLBL(p3mim_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p3m2dm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p3m2dm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - MOV_L( S(2), EAX ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EAX, D(2) ) - -LLBL(p3m2dm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m2dm_top) ) - - -LLBL(p3m2dm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_no_rot_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_no_rot_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p3m2dnrm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dnrm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p3m2dnrm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - MOV_L( S(2), EAX ) - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EAX, D(2) ) - -LLBL(p3m2dnrm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m2dnrm_top) ) - - -LLBL(p3m2dnrm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p3m3dm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p3m3dm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p3m3dm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - -LLBL(p3m3dm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m3dm_top) ) - - -LLBL(p3m3dm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_no_rot_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_no_rot_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p3m3dnrm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p3m3dnrm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p3m3dnrm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F2 F1 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F4 F1 F2 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 F2 */ - FXCH( ST(2) ) /* F1 F4 F5 F2 */ - FADDP( ST(0), ST(2) ) /* F4 F5 F2 */ - FLD_S( M(14) ) /* F6 F4 F5 F2 */ - FXCH( ST(3) ) /* F2 F4 F5 F6 */ - FADDP( ST(0), ST(3) ) /* F4 F5 F6 */ - - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - -LLBL(p3m3dnrm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m3dnrm_top) ) - - -LLBL(p3m3dnrm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_perspective_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_perspective_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p3mpm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - ALIGNTEXT4ifNOP -LLBL(p3mpm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p3mpm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F2 F5 F4 */ - FLD_S( M(14) ) /* F6 F2 F5 F4 */ - FXCH( ST(1) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EAX ) - XOR_L( CONST(-2147483648), EAX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EAX, D(3) ) - -LLBL(p3mpm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3mpm_top) ) - - -LLBL(p3mpm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_general_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_general_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p4mgm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - - ALIGNTEXT4ifNOP -LLBL(p4mgm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p4mgm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(14) ) - FLD_S( S(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(15) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FSTP_S( D(0) ) /* F6 F5 F7 */ - FXCH( ST(1) ) /* F5 F6 F7 */ - FSTP_S( D(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p4mgm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4mgm_top) ) - - -LLBL(p4mgm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_identity_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_identity_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p4mim_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p4mim_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p4mim_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p4mim_skip) ) - - MOV_L( S(0), EAX ) - MOV_L( S(1), EDX ) - - MOV_L( EAX, D(0) ) - MOV_L( EDX, D(1) ) - - MOV_L( S(2), EAX ) - MOV_L( S(3), EDX ) - - MOV_L( EAX, D(2) ) - MOV_L( EDX, D(3) ) -LLBL(p4mim_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4mim_top) ) - - -LLBL(p4mim_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p4m2dm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - MOV_L( ECX, ARG_DEST ) /* need ecx; put dest+count in ARG_DEST */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p4m2dm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EAX ) - MOV_L( S(3), ECX ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EAX, D(2) ) - MOV_L( ECX, D(3) ) - -LLBL(p4m2dm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ARG_DEST, EDI ) - JNE( LLBL(p4m2dm_top) ) - - -LLBL(p4m2dm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_no_rot_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_no_rot_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p4m2dnrm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - MOV_L( ECX, ARG_DEST ) /* need ecx; put dest+count in ARG_DEST */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dnrm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p4m2dnrm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EAX ) - MOV_L( S(3), ECX ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EAX, D(2) ) - MOV_L( ECX, D(3) ) - -LLBL(p4m2dnrm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ARG_DEST, EDI ) - JNE( LLBL(p4m2dnrm_top) ) - - -LLBL(p4m2dnrm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p4m3dm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p4m3dm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EAX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EAX, D(3) ) - -LLBL(p4m3dm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4m3dm_top) ) - - -LLBL(p4m3dm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_no_rot_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_no_rot_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p4m3dnrm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dnrm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p4m3dnrm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F6 F5 F4 */ - FMUL_S( M(10) ) - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EAX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EAX, D(3) ) - -LLBL(p4m3dnrm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4m3dnrm_top) ) - - -LLBL(p4m3dnrm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_perspective_masked) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_perspective_masked): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(p4mpm_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ - - - ALIGNTEXT4ifNOP -LLBL(p4mpm_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(p4mpm_skip) ) - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F6 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F2 F6 F5 F4 */ - FMUL_S( M(14) ) - - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EAX ) - XOR_L( CONST(-2147483648), EAX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EAX, D(3) ) - -LLBL(p4mpm_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( ARG_SOURCE, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4mpm_top) ) - - -LLBL(p4mpm_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - - - -/* -######################################## -## -## Unmasked versions -## -######################################## -*/ - -/* -######################################## -## -## gl_x86_transform_points2_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_general_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_general_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mgr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p2mgr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p2mgr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2mgr_top) ) - - -LLBL(p2mgr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_identity_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_identity_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mir_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p2mir_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p2mir_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) -LLBL(p2mir_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2mir_top) ) - - -LLBL(p2mir_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - -LLBL(p2m2dr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m2dr_top) ) - - -LLBL(p2m2dr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_no_rot_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_no_rot_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dnrr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_2), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(2), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dnrr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - -LLBL(p2m2dnrr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m2dnrr_top) ) - - -LLBL(p2m2dnrr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p2m3dr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - -LLBL(p2m3dr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m3dr_top) ) - - -LLBL(p2m3dr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_no_rot_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_no_rot_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dnrr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2m3dnrr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - -LLBL(p2m3dnrr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2m3dnrr_top) ) - - -LLBL(p2m3dnrr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_perspective_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_perspective_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mpr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2mpr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FSTP_S( D(0) ) /* F1 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( CONST(FP_ZERO), D(3) ) - -LLBL(p2mpr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p2mpr_top) ) - - -LLBL(p2mpr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_general_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_general_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mgr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p3mgr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p3mgr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3mgr_top) ) - - -LLBL(p3mgr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_identity_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_identity_raw): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mir_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p3mir_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p3mir_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EBP ) - MOV_L( S(2), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EBP, D(1) ) - MOV_L( EDX, D(2) ) -LLBL(p3mir_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3mir_top) ) - - -LLBL(p3mir_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - MOV_L( S(2), EBX ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - -LLBL(p3m2dr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m2dr_top) ) - - -LLBL(p3m2dr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_no_rot_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_no_rot_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dnrr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dnrr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - MOV_L( S(2), EBX ) - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - -LLBL(p3m2dnrr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m2dnrr_top) ) - - -LLBL(p3m2dnrr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - -LLBL(p3m3dr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m3dr_top) ) - - -LLBL(p3m3dr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_no_rot_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_no_rot_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dnrr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dnrr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F2 F1 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F4 F1 F2 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 F2 */ - FXCH( ST(2) ) /* F1 F4 F5 F2 */ - FADDP( ST(0), ST(2) ) /* F4 F5 F2 */ - FLD_S( M(14) ) /* F6 F4 F5 F2 */ - FXCH( ST(3) ) /* F2 F4 F5 F6 */ - FADDP( ST(0), ST(3) ) /* F4 F5 F6 */ - - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - -LLBL(p3m3dnrr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3m3dnrr_top) ) - - -LLBL(p3m3dnrr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_perspective_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_perspective_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mpr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p3mpr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F2 F5 F4 */ - FLD_S( M(14) ) /* F6 F2 F5 F4 */ - FXCH( ST(1) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p3mpr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p3mpr_top) ) - - -LLBL(p3mpr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_general_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_general_raw): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mgr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p4mgr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(14) ) - FLD_S( S(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(15) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FSTP_S( D(0) ) /* F6 F5 F7 */ - FXCH( ST(1) ) /* F5 F6 F7 */ - FSTP_S( D(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p4mgr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4mgr_top) ) - - -LLBL(p4mgr_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_identity_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_identity_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mir_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p4mir_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p4mir_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) - - MOV_L( S(2), EBX ) - MOV_L( S(3), EDX ) - - MOV_L( EBX, D(2) ) - MOV_L( EDX, D(3) ) -LLBL(p4mir_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4mir_top) ) - - -LLBL(p4mir_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_raw): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4m2dr_top) ) - - -LLBL(p4m2dr_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_no_rot_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_no_rot_raw): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dnrr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dnrr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dnrr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4m2dnrr_top) ) - - -LLBL(p4m2dnrr_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4m3dr_top) ) - - -LLBL(p4m3dr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_no_rot_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_no_rot_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dnrr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dnrr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F6 F5 F4 */ - FMUL_S( M(10) ) - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dnrr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4m3dnrr_top) ) - - -LLBL(p4m3dnrr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_perspective_raw) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_perspective_raw): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - - PUSH_L( EBX ) - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mpr_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - - - ALIGNTEXT4ifNOP -LLBL(p4mpr_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F6 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F2 F6 F5 F4 */ - FMUL_S( M(14) ) - - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4mpr_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(p4mpr_top) ) - - -LLBL(p4mpr_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - - - -#undef OFFSET_DEST -#undef OFFSET_MATRIX -#undef OFFSET_SOURCE -#undef OFFSET_CLIP -#undef OFFSET_FLAG - -#undef ARG_DEST -#undef ARG_MATRIX -#undef ARG_SOURCE -#undef ARG_CLIP -#undef ARG_FLAG - -#define OFFSET_DEST 4 -#define OFFSET_MATRIX 8 -#define OFFSET_SOURCE 12 -#define OFFSET_STRIDE 16 -#define OFFSET_COUNT 20 - -#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) -#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) -#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) -#define ARG_STRIDE REGOFF(FRAME_OFFSET+OFFSET_STRIDE, ESP) -#define ARG_COUNT REGOFF(FRAME_OFFSET+OFFSET_COUNT, ESP) - -/* -######################################## -## -## clean (no vertex struct) versions -## -######################################## -*/ - - -/* -######################################## -## -## gl_x86_transform_points2_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_general_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_general_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mgv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2mgv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p2mgv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2mgv8_top) ) - - -LLBL(p2mgv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_identity_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_identity_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2miv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p2miv8_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p2miv8_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2miv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2miv8_top) ) - - -LLBL(p2miv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m2dv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m2dv8_top) ) - - -LLBL(p2m2dv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_no_rot_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_no_rot_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dnrv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dnrv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m2dnrv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m2dnrv8_top) ) - - -LLBL(p2m2dnrv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m3dv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m3dv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m3dv8_top) ) - - -LLBL(p2m3dv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_no_rot_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_no_rot_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dnrv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2m3dnrv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m3dnrv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m3dnrv8_top) ) - - -LLBL(p2m3dnrv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_perspective_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_perspective_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mpv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2mpv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FSTP_S( D(0) ) /* F1 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( CONST(FP_ZERO), D(3) ) - -LLBL(p2mpv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2mpv8_top) ) - - -LLBL(p2mpv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_general_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_general_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mgv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3mgv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p3mgv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3mgv8_top) ) - - -LLBL(p3mgv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_identity_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_identity_v8): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3miv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p3miv8_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p3miv8_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EBP ) - MOV_L( S(2), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EBP, D(1) ) - MOV_L( EDX, D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3miv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3miv8_top) ) - - -LLBL(p3miv8_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - MOV_L( S(2), EBX ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m2dv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m2dv8_top) ) - - -LLBL(p3m2dv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_no_rot_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_no_rot_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dnrv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dnrv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - MOV_L( S(2), EBX ) - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m2dnrv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m2dnrv8_top) ) - - -LLBL(p3m2dnrv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m3dv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m3dv8_top) ) - - -LLBL(p3m3dv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_no_rot_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_no_rot_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dnrv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dnrv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F2 F1 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F4 F1 F2 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 F2 */ - FXCH( ST(2) ) /* F1 F4 F5 F2 */ - FADDP( ST(0), ST(2) ) /* F4 F5 F2 */ - FLD_S( M(14) ) /* F6 F4 F5 F2 */ - FXCH( ST(3) ) /* F2 F4 F5 F6 */ - FADDP( ST(0), ST(3) ) /* F4 F5 F6 */ - - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m3dnrv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m3dnrv8_top) ) - - -LLBL(p3m3dnrv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_perspective_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_perspective_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mpv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3mpv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F2 F5 F4 */ - FLD_S( M(14) ) /* F6 F2 F5 F4 */ - FXCH( ST(1) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p3mpv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3mpv8_top) ) - - -LLBL(p3mpv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_general_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_general_v8): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mgv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4mgv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(14) ) - FLD_S( S(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(15) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FSTP_S( D(0) ) /* F6 F5 F7 */ - FXCH( ST(1) ) /* F5 F6 F7 */ - FSTP_S( D(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p4mgv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4mgv8_top) ) - - -LLBL(p4mgv8_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_identity_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_identity_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4miv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p4miv8_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p4miv8_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) - - MOV_L( S(2), EBX ) - MOV_L( S(3), EDX ) - - MOV_L( EBX, D(2) ) - MOV_L( EDX, D(3) ) -LLBL(p4miv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4miv8_top) ) - - -LLBL(p4miv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_v8): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m2dv8_top) ) - - -LLBL(p4m2dv8_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_no_rot_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_no_rot_v8): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dnrv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dnrv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dnrv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m2dnrv8_top) ) - - -LLBL(p4m2dnrv8_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m3dv8_top) ) - - -LLBL(p4m3dv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_no_rot_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_no_rot_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dnrv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dnrv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F6 F5 F4 */ - FMUL_S( M(10) ) - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dnrv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m3dnrv8_top) ) - - -LLBL(p4m3dnrv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_perspective_v8) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_perspective_v8): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mpv8_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4mpv8_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F6 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F2 F6 F5 F4 */ - FMUL_S( M(14) ) - - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4mpv8_skip): - ADD_L( CONST(32), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4mpv8_top) ) - - -LLBL(p4mpv8_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - - - -/* -######################################## -## -## gl_x86_transform_points2_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_general_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_general_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mgv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2mgv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p2mgv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2mgv12_top) ) - - -LLBL(p2mgv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_identity_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_identity_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2miv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p2miv12_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p2miv12_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2miv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2miv12_top) ) - - -LLBL(p2miv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m2dv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m2dv12_top) ) - - -LLBL(p2m2dv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_no_rot_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_no_rot_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dnrv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dnrv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m2dnrv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m2dnrv12_top) ) - - -LLBL(p2m2dnrv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m3dv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m3dv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m3dv12_top) ) - - -LLBL(p2m3dv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_no_rot_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_no_rot_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dnrv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2m3dnrv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m3dnrv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m3dnrv12_top) ) - - -LLBL(p2m3dnrv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_perspective_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_perspective_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mpv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2mpv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FSTP_S( D(0) ) /* F1 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( CONST(FP_ZERO), D(3) ) - -LLBL(p2mpv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2mpv12_top) ) - - -LLBL(p2mpv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_general_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_general_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mgv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3mgv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p3mgv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3mgv12_top) ) - - -LLBL(p3mgv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_identity_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_identity_v12): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3miv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p3miv12_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p3miv12_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EBP ) - MOV_L( S(2), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EBP, D(1) ) - MOV_L( EDX, D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3miv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3miv12_top) ) - - -LLBL(p3miv12_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - MOV_L( S(2), EBX ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m2dv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m2dv12_top) ) - - -LLBL(p3m2dv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_no_rot_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_no_rot_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dnrv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dnrv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - MOV_L( S(2), EBX ) - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m2dnrv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m2dnrv12_top) ) - - -LLBL(p3m2dnrv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m3dv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m3dv12_top) ) - - -LLBL(p3m3dv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_no_rot_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_no_rot_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dnrv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dnrv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F2 F1 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F4 F1 F2 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 F2 */ - FXCH( ST(2) ) /* F1 F4 F5 F2 */ - FADDP( ST(0), ST(2) ) /* F4 F5 F2 */ - FLD_S( M(14) ) /* F6 F4 F5 F2 */ - FXCH( ST(3) ) /* F2 F4 F5 F6 */ - FADDP( ST(0), ST(3) ) /* F4 F5 F6 */ - - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m3dnrv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m3dnrv12_top) ) - - -LLBL(p3m3dnrv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_perspective_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_perspective_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mpv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3mpv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F2 F5 F4 */ - FLD_S( M(14) ) /* F6 F2 F5 F4 */ - FXCH( ST(1) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p3mpv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3mpv12_top) ) - - -LLBL(p3mpv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_general_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_general_v12): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mgv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4mgv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(14) ) - FLD_S( S(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(15) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FSTP_S( D(0) ) /* F6 F5 F7 */ - FXCH( ST(1) ) /* F5 F6 F7 */ - FSTP_S( D(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p4mgv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4mgv12_top) ) - - -LLBL(p4mgv12_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_identity_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_identity_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4miv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p4miv12_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p4miv12_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) - - MOV_L( S(2), EBX ) - MOV_L( S(3), EDX ) - - MOV_L( EBX, D(2) ) - MOV_L( EDX, D(3) ) -LLBL(p4miv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4miv12_top) ) - - -LLBL(p4miv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_v12): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m2dv12_top) ) - - -LLBL(p4m2dv12_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_no_rot_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_no_rot_v12): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dnrv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dnrv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dnrv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m2dnrv12_top) ) - - -LLBL(p4m2dnrv12_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m3dv12_top) ) - - -LLBL(p4m3dv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_no_rot_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_no_rot_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dnrv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dnrv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F6 F5 F4 */ - FMUL_S( M(10) ) - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dnrv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m3dnrv12_top) ) - - -LLBL(p4m3dnrv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_perspective_v12) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_perspective_v12): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mpv12_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4mpv12_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F6 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F2 F6 F5 F4 */ - FMUL_S( M(14) ) - - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4mpv12_skip): - ADD_L( CONST(48), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4mpv12_top) ) - - -LLBL(p4mpv12_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - - - -/* -######################################## -## -## gl_x86_transform_points2_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_general_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_general_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mgv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2mgv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p2mgv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2mgv16_top) ) - - -LLBL(p2mgv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_identity_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_identity_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2miv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p2miv16_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p2miv16_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2miv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2miv16_top) ) - - -LLBL(p2miv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m2dv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m2dv16_top) ) - - -LLBL(p2m2dv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_2d_no_rot_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_2d_no_rot_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m2dnrv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m2dnrv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - - MOV_L( CONST(FP_ZERO), D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m2dnrv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m2dnrv16_top) ) - - -LLBL(p2m2dnrv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p2m3dv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m3dv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m3dv16_top) ) - - -LLBL(p2m3dv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_3d_no_rot_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_3d_no_rot_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2m3dnrv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2m3dnrv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p2m3dnrv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2m3dnrv16_top) ) - - -LLBL(p2m3dnrv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points2_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points2_perspective_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points2_perspective_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p2mpv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - MOV_L( M(14), EBX ) - ALIGNTEXT4ifNOP -LLBL(p2mpv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FSTP_S( D(0) ) /* F1 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( CONST(FP_ZERO), D(3) ) - -LLBL(p2mpv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p2mpv16_top) ) - - -LLBL(p2mpv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_general_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_general_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mgv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3mgv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FADD_S( M(12) ) - FXCH( ST(2) ) /* F5 F6 F4 F7 */ - FADD_S( M(13) ) - FXCH( ST(1) ) /* F6 F5 F4 F7 */ - FADD_S( M(14) ) - FXCH( ST(3) ) /* F7 F5 F4 F6 */ - FADD_S( M(15) ) - - FXCH( ST(2) ) /* F4 F5 F7 F6 */ - FSTP_S( D(0) ) /* F5 F7 F6 */ - FSTP_S( D(1) ) /* F7 F6 */ - FXCH( ST(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p3mgv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3mgv16_top) ) - - -LLBL(p3mgv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_identity_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_identity_v16): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3miv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p3miv16_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p3miv16_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EBP ) - MOV_L( S(2), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EBP, D(1) ) - MOV_L( EDX, D(2) ) - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3miv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3miv16_top) ) - - -LLBL(p3miv16_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FXCH( ST(1) ) /* F4 F5 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 */ - FADD_S( M(13) ) - - MOV_L( S(2), EBX ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m2dv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m2dv16_top) ) - - -LLBL(p3m2dv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_2d_no_rot_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_2d_no_rot_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m2dnrv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m2dnrv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F4 F1 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 */ - - FXCH( ST(2) ) /* F1 F4 F5 */ - FADDP( ST(0), ST(2) ) /* F4 F5 */ - - MOV_L( S(2), EBX ) - - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m2dnrv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m2dnrv16_top) ) - - -LLBL(p3m2dnrv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FADD_S( M(12) ) - FXCH( ST(1) ) /* F5 F4 F6 */ - FADD_S( M(13) ) - FXCH( ST(2) ) /* F6 F4 F5 */ - FADD_S( M(14) ) - - FXCH( ST(1) ) /* F4 F6 F5 */ - FSTP_S( D(0) ) /* F6 F5 */ - FXCH( ST(1) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m3dv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m3dv16_top) ) - - -LLBL(p3m3dv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_3d_no_rot_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_3d_no_rot_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3m3dnrv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3m3dnrv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F1 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F2 F1 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F4 F1 F2 */ - FADD_S( M(12) ) - FLD_S( M(13) ) /* F5 F4 F1 F2 */ - FXCH( ST(2) ) /* F1 F4 F5 F2 */ - FADDP( ST(0), ST(2) ) /* F4 F5 F2 */ - FLD_S( M(14) ) /* F6 F4 F5 F2 */ - FXCH( ST(3) ) /* F2 F4 F5 F6 */ - FADDP( ST(0), ST(3) ) /* F4 F5 F6 */ - - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - - MOV_L( CONST(FP_ONE), D(3) ) -LLBL(p3m3dnrv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3m3dnrv16_top) ) - - -LLBL(p3m3dnrv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points3_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points3_perspective_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points3_perspective_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p3mpv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p3mpv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F2 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F2 F5 F4 */ - FLD_S( M(14) ) /* F6 F2 F5 F4 */ - FXCH( ST(1) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p3mpv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p3mpv16_top) ) - - -LLBL(p3mpv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_general -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_general_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_general_v16): - -#define FRAME_OFFSET 8 - PUSH_L( ESI ) - PUSH_L( EDI ) - - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mgv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4mgv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - FLD_S( S(0) ) /* F7 F6 F5 F4 */ - FMUL_S( M(3) ) - - FLD_S( S(1) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(6) ) - FLD_S( S(1) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(7) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(10) ) - FLD_S( S(2) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(11) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F7 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(14) ) - FLD_S( S(3) ) /* F3 F2 F1 F0 F7 F6 F5 F4 */ - FMUL_S( M(15) ) - - FXCH( ST(3) ) /* F0 F2 F1 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(7) ) /* F2 F1 F3 F7 F6 F5 F4 */ - FXCH( ST(1) ) /* F1 F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F2 F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F3 F7 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F7 F6 F5 F4 */ - - FXCH( ST(3) ) /* F4 F6 F5 F7 */ - FSTP_S( D(0) ) /* F6 F5 F7 */ - FXCH( ST(1) ) /* F5 F6 F7 */ - FSTP_S( D(1) ) /* F6 F7 */ - FSTP_S( D(2) ) /* F7 */ - FSTP_S( D(3) ) /* */ - -LLBL(p4mgv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4mgv16_top) ) - - -LLBL(p4mgv16_finish): - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_identity -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_identity_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_identity_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4miv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - CMP_L( ESI, EDI ) - JE( LLBL(p4miv16_finish) ) - - ALIGNTEXT4ifNOP -LLBL(p4miv16_top): - - MOV_L( S(0), EBX ) - MOV_L( S(1), EDX ) - - MOV_L( EBX, D(0) ) - MOV_L( EDX, D(1) ) - - MOV_L( S(2), EBX ) - MOV_L( S(3), EDX ) - - MOV_L( EBX, D(2) ) - MOV_L( EDX, D(3) ) -LLBL(p4miv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4miv16_top) ) - - -LLBL(p4miv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_v16): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - - FLD_S( S(1) ) /* F0 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F5 F4 */ - FMUL_S( M(5) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m2dv16_top) ) - - -LLBL(p4m2dv16_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_2d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_2d_no_rot_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_2d_no_rot_v16): - -#define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m2dnrv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m2dnrv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(3) ) /* F0 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F5 F4 */ - FMUL_S( M(13) ) - - FXCH( ST(1) ) /* F0 F1 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F1 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F5 F4 */ - - MOV_L( S(2), EBX ) - MOV_L( S(3), EBP ) - - FXCH( ST(1) ) /* F4 F5 */ - FSTP_S( D(0) ) /* F5 */ - FSTP_S( D(1) ) /* */ - MOV_L( EBX, D(2) ) - MOV_L( EBP, D(3) ) - -LLBL(p4m2dnrv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m2dnrv16_top) ) - - -LLBL(p4m2dnrv16_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - FLD_S( S(0) ) /* F5 F4 */ - FMUL_S( M(1) ) - FLD_S( S(0) ) /* F6 F5 F4 */ - FMUL_S( M(2) ) - - FLD_S( S(1) ) /* F0 F6 F5 F4 */ - FMUL_S( M(4) ) - FLD_S( S(1) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(5) ) - FLD_S( S(1) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(6) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(2) ) /* F0 F6 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m3dv16_top) ) - - -LLBL(p4m3dv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_3d_no_rot -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_3d_no_rot_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_3d_no_rot_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4m3dnrv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4m3dnrv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F6 F5 F4 */ - FMUL_S( M(10) ) - - FLD_S( S(3) ) /* F0 F6 F5 F4 */ - FMUL_S( M(12) ) - FLD_S( S(3) ) /* F1 F0 F6 F5 F4 */ - FMUL_S( M(13) ) - FLD_S( S(3) ) /* F2 F1 F0 F6 F5 F4 */ - FMUL_S( M(14) ) - - FXCH( ST(2) ) /* F0 F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(5) ) /* F1 F2 F6 F5 F4 */ - FADDP( ST(0), ST(3) ) /* F2 F6 F5 F4 */ - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(3), EBX ) - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4m3dnrv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4m3dnrv16_top) ) - - -LLBL(p4m3dnrv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - -/* -######################################## -## -## gl_x86_transform_points4_perspective -## -## -*/ - GLOBL GLNAME(gl_x86_transform_points4_perspective_v16) - ALIGNTEXT4 -GLNAME(gl_x86_transform_points4_perspective_v16): - -#define FRAME_OFFSET 12 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(p4mpv16_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ - - - ALIGNTEXT4ifNOP -LLBL(p4mpv16_top): - - - FLD_S( S(0) ) /* F4 */ - FMUL_S( M(0) ) - - FLD_S( S(1) ) /* F5 F4 */ - FMUL_S( M(5) ) - - FLD_S( S(2) ) /* F0 F5 F4 */ - FMUL_S( M(8) ) - FLD_S( S(2) ) /* F1 F0 F5 F4 */ - FMUL_S( M(9) ) - FLD_S( S(2) ) /* F6 F1 F0 F5 F4 */ - FMUL_S( M(10) ) - - FXCH( ST(2) ) /* F0 F1 F6 F5 F4 */ - FADDP( ST(0), ST(4) ) /* F1 F6 F5 F4 */ - FADDP( ST(0), ST(2) ) /* F6 F5 F4 */ - - FLD_S( S(3) ) /* F2 F6 F5 F4 */ - FMUL_S( M(14) ) - - FADDP( ST(0), ST(1) ) /* F6 F5 F4 */ - - MOV_L( S(2), EBX ) - XOR_L( CONST(-2147483648), EBX ) /* change sign */ - - FXCH( ST(2) ) /* F4 F5 F6 */ - FSTP_S( D(0) ) /* F5 F6 */ - FSTP_S( D(1) ) /* F6 */ - FSTP_S( D(2) ) /* */ - MOV_L( EBX, D(3) ) - -LLBL(p4mpv16_skip): - ADD_L( CONST(64), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(p4mpv16_top) ) - - -LLBL(p4mpv16_finish): - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -#undef FRAME_OFFSET - - - -#undef OFFSET_DEST -#undef OFFSET_MATRIX -#undef OFFSET_SOURCE -#undef OFFSET_STRIDE -#undef OFFSET_COUNT - -#undef ARG_DEST -#undef ARG_MATRIX -#undef ARG_SOURCE -#undef ARG_STRIDE -#undef ARG_COUNT - - -/* - * Table for clip test. - * - * bit6 = S(3) < 0 - * bit5 = S(2) < 0 - * bit4 = abs(S(2)) > abs(S(3)) - * bit3 = S(1) < 0 - * bit2 = abs(S(1)) > abs(S(3)) - * bit1 = S(0) < 0 - * bit0 = abs(S(0)) > abs(S(3)) - */ - - - SEG_DATA - -clip_table: - D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 - D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 - D_BYTE 32, 33, 32, 34, 36, 37, 36, 38 - D_BYTE 32, 33, 32, 34, 40, 41, 40, 42 - D_BYTE 0, 1, 0, 2, 4, 5, 4, 6 - D_BYTE 0, 1, 0, 2, 8, 9, 8, 10 - D_BYTE 16, 17, 16, 18, 20, 21, 20, 22 - D_BYTE 16, 17, 16, 18, 24, 25, 24, 26 - D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 - D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 - D_BYTE 47, 45, 47, 46, 39, 37, 39, 38 - D_BYTE 47, 45, 47, 46, 43, 41, 43, 42 - D_BYTE 63, 61, 63, 62, 55, 53, 55, 54 - D_BYTE 63, 61, 63, 62, 59, 57, 59, 58 - D_BYTE 31, 29, 31, 30, 23, 21, 23, 22 - D_BYTE 31, 29, 31, 30, 27, 25, 27, 26 - - SEG_TEXT - -/* - * Offsets for clip_func arguments - * - * typedef GLvector4f *(*clip_func)( GLvector4f *vClip, - * GLvector4f *vProj, - * GLubyte clipMask[], - * GLubyte *orMask, - * GLubyte *andMask ); - */ - -#define OFFSET_SOURCE 4 -#define OFFSET_DEST 8 -#define OFFSET_CLIP 12 -#define OFFSET_OR 16 -#define OFFSET_AND 20 - -#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) -#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) -#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) -#define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP) -#define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP) - -/* -######################################## -## -## gl_x86_cliptest_points4 -## -## AL: ormask -## AH: andmask -## EBX: temp0 -## ECX: temp1 -## EDX: clipmask[] -## ESI: clip[] -## EDI: proj[] -## EBP: temp2 -## -######################################## -*/ - -#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC) -#define ELFPIC -#endif - - GLOBL GLNAME(gl_x86_cliptest_points4) - ALIGNTEXT4 - -GLNAME(gl_x86_cliptest_points4): -#ifdef ELFPIC -#define FRAME_OFFSET 20 -#else -#define FRAME_OFFSET 16 -#endif - PUSH_L( ESI ) - PUSH_L( EDI ) - PUSH_L( EBP ) - PUSH_L( EBX ) - -#ifdef ELFPIC - /* store pointer to clip_table on stack */ - CALL( LLBL(ctp4_get_eip) ) - ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) - MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) - PUSH_L( EBX ) - JMP( LLBL(ctp4_clip_table_ready) ) - -LLBL(ctp4_get_eip): - /* store eip in ebx */ - MOV_L( REGIND(ESP), EBX ) - RET - -LLBL(ctp4_clip_table_ready): -#endif - - MOV_L( ARG_SOURCE, ESI ) - MOV_L( ARG_DEST, EDI ) - - MOV_L( ARG_CLIP, EDX ) - MOV_L( ARG_OR, EBX ) - - MOV_L( ARG_AND, EBP ) - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* source stride */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */ - - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDX, ECX ) - - MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */ - CMP_L( ECX, EDX ) - - MOV_B( REGIND(EBX), AL ) - MOV_B( REGIND(EBP), AH ) - - JZ( LLBL(ctp4_finish) ) - - ALIGNTEXT4ifNOP -LLBL(ctp4_top): - FLD1 /* F3 */ - FDIV_S( S(3) ) - - MOV_L( S(3), EBP ) - MOV_L( S(2), EBX ) - - XOR_L( ECX, ECX ) - ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ - - ADC_L( ECX, ECX ) - ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(1), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(0), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - -#ifdef ELFPIC - MOV_L( REGIND(ESP), EBP ) /* clip_table */ - - MOV_B( REGBI(EBP, ECX), CL ) -#else - MOV_B( REGOFF(clip_table,ECX), CL ) -#endif - - OR_B( CL, AL ) - AND_B( CL, AH ) - - TEST_B( CL, CL ) - MOV_B( CL, REGIND(EDX) ) - - JZ( LLBL(ctp4_proj) ) - - FSTP( ST(0) ) /* */ - JMP( LLBL(ctp4_next) ) - -LLBL(ctp4_proj): - FLD_S( S(0) ) /* F0 F3 */ - FMUL2( ST(1), ST(0) ) - - FLD_S( S(1) ) /* F1 F0 F3 */ - FMUL2( ST(2), ST(0) ) - - FLD_S( S(2) ) /* F2 F1 F0 F3 */ - FMUL2( ST(3), ST(0) ) - - FXCH( ST(2) ) /* F0 F1 F2 F3 */ - FSTP_S( D(0) ) /* F1 F2 F3 */ - FSTP_S( D(1) ) /* F2 F3 */ - FSTP_S( D(2) ) /* F3 */ - FSTP_S( D(3) ) /* */ - -LLBL(ctp4_next): - INC_L( EDX ) - ADD_L( CONST(16), EDI ) - - ADD_L( ARG_SOURCE, ESI ) - CMP_L( EDX, ARG_CLIP ) - - JNZ( LLBL(ctp4_top) ) - - MOV_L( ARG_OR, ECX ) - MOV_L( ARG_AND, EDX ) - - MOV_B( AL, REGIND(ECX) ) - MOV_B( AH, REGIND(EDX) ) - -LLBL(ctp4_finish): - MOV_L( ARG_DEST, EAX ) -#ifdef ELFPIC - POP_L( ESI ) /* discard ptr to clip_table */ -#endif - POP_L( EBX ) - POP_L( EBP ) - POP_L( EDI ) - POP_L( ESI ) - - RET - - - - -/* -######################################## -## -## gl_v16_x86_cliptest_points4 -## -## Performs cliptesting equivalent to that done by cliptest_v16() -## in vertices.c -## -## This is a hacked version of the original above. -## -######################################## -*/ - - - -#define OFFSET_V16_SOURCE 4 -#define OFFSET_V16_LAST 8 -#define OFFSET_V16_OR 12 -#define OFFSET_V16_AND 16 -#define OFFSET_V16_MASK 20 - -#define ARG_V16_SOURCE REGOFF(V16_FRAME_OFFSET+OFFSET_V16_SOURCE, ESP) -#define ARG_V16_LAST REGOFF(V16_FRAME_OFFSET+OFFSET_V16_LAST, ESP) -#define ARG_V16_OR REGOFF(V16_FRAME_OFFSET+OFFSET_V16_OR, ESP) -#define ARG_V16_AND REGOFF(V16_FRAME_OFFSET+OFFSET_V16_AND, ESP) -#define ARG_V16_MASK REGOFF(V16_FRAME_OFFSET+OFFSET_V16_MASK, ESP) - - -#if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC) -#define ELFPIC -#endif - - GLOBL GLNAME(gl_v16_x86_cliptest_points4) - ALIGNTEXT4 - -GLNAME(gl_v16_x86_cliptest_points4): -#ifdef ELFPIC -#define V16_FRAME_OFFSET 20 -#else -#define V16_FRAME_OFFSET 16 -#endif - PUSH_L( ESI ) - PUSH_L( EDI ) - PUSH_L( EBP ) - PUSH_L( EBX ) - -#ifdef ELFPIC - /* store pointer to clip_table on stack */ - CALL( LLBL(v16_ctp4_get_eip) ) - ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) - MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) - PUSH_L( EBX ) - JMP( LLBL(v16_ctp4_clip_table_ready) ) - -LLBL(v16_ctp4_get_eip): - /* store eip in ebx */ - MOV_L( REGIND(ESP), EBX ) - RET - -LLBL(v16_ctp4_clip_table_ready): -#endif - - MOV_L( ARG_V16_SOURCE, ESI ) /* ptr to first source vertex */ - MOV_L( ARG_V16_LAST, EDX ) /* ptr to last source vertex */ - MOV_L( ARG_V16_OR, EBX ) - MOV_L( ARG_V16_AND, EBP ) - MOV_L( ARG_V16_MASK, EDI ) - - CMP_L( EDX, ESI ) - - MOV_B( REGIND(EBX), AL ) - MOV_B( REGIND(EBP), AH ) - - JZ( LLBL(v16_ctp4_finish) ) - - ALIGNTEXT4ifNOP -LLBL(v16_ctp4_top): -#if 0 - FLD1 /* F0 */ - FDIV_S( S(3) ) -#endif - - MOV_L( S(3), EBP ) - MOV_L( S(2), EBX ) - - XOR_L( ECX, ECX ) - ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ - - ADC_L( ECX, ECX ) - ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(1), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(0), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - -#ifdef ELFPIC - MOV_L( REGIND(ESP), EBP ) /* clip_table */ - - MOV_B( REGBI(EBP, ECX), CL ) -#else - MOV_B( REGOFF(clip_table,ECX), CL ) -#endif - - OR_B( CL, AL ) - AND_B( CL, AH ) - - MOV_B( CL, REGIND(EDI) ) /* save clipmask */ - INC_L( EDI ) /* next clipmask */ - -#if 0 - FSTP_S( S(8) ) /* */ /* GR_VERTEX_OOW_OFFSET */ -#endif - - ADD_L( CONST(64), ESI ) /* next fxVertex */ - - CMP_L( EDX, ESI ) /* finished? */ - JNZ( LLBL(v16_ctp4_top) ) - - MOV_L( ARG_V16_OR, ECX ) - MOV_L( ARG_V16_AND, EDX ) - - MOV_B( AL, REGIND(ECX) ) - MOV_B( AH, REGIND(EDX) ) - -LLBL(v16_ctp4_finish): - -#ifdef ELFPIC - POP_L( ESI ) /* discard ptr to clip_table */ -#endif - POP_L( EBX ) - POP_L( EBP ) - POP_L( EDI ) - POP_L( ESI ) - - RET - diff --git a/xc/extras/Mesa/src/X86/x86a.S.m4 b/xc/extras/Mesa/src/X86/x86a.S.m4 deleted file mode 100644 index 7b543fce6..000000000 --- a/xc/extras/Mesa/src/X86/x86a.S.m4 +++ /dev/null @@ -1,1751 +0,0 @@ -include(x86flatregs.m4)dnl -dnl ############################################################ -dnl ## output_x86_transform_functions -dnl ## -dnl ## This macro is called twice. Once with mask checking -dnl ## versions of FUNCTION_HEAD, LOOP_HEAD, LOOP_TAIL and -dnl ## FUNCTION_TAIL, and once with versions that don't check -dnl ## the clip mask. -dnl ## -dnl ## The parameters for FUNCTION_HEAD are: -dnl ## - The function's name. The mask checking version will -dnl ## append "_masked" to this, the unmasked version appends -dnl ## "_raw". -dnl ## - A unique string (for labels). -dnl ## - The number of integer registers needed. Can be 0-2. -dnl ## If not zero, IREG_0 and/or IREG_1 will be defined. -dnl ## - The output vector size. -dnl ############################################################ -define(`output_x86_transform_functions', `dnl -/* -######################################## -## -## gl_x86_transform_points2_general -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points2_general, p2mg, 0, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - F_FLD_S( S(0), F6 ) - F_FMUL_S( M(2), F6 ) - F_FLD_S( S(0), F7 ) - F_FMUL_S( M(3), F7 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - F_FLD_S( S(1), F2 ) - F_FMUL_S( M(6), F2 ) - F_FLD_S( S(1), F3 ) - F_FMUL_S( M(7), F3 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - F_FADDP( F3, F7 ) - - F_FADD_S( M(12), F4 ) - F_FADD_S( M(13), F5 ) - F_FADD_S( M(14), F6 ) - F_FADD_S( M(15), F7 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - F_FSTP_S( F7, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points2_identity -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points2_identity, p2mi, 1, 2) - - CMP_L( ESI, EDI ) - JE( LLBL(SHORT_NAME`'_finish) ) - -LOOP_HEAD - MOV_L( S(0), IREG_0 ) - MOV_L( S(1), EDX ) - - MOV_L( IREG_0, D(0) ) - MOV_L( EDX, D(1) ) -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points2_2d -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points2_2d, p2m2d, 0, 2) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - - F_FADD_S( M(12), F4 ) - F_FADD_S( M(13), F5 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points2_2d_no_rot -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points2_2d_no_rot, p2m2dnr, 0, 2) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FADD_S( M(12), F4 ) - F_FLD_S( M(13), F5 ) - F_FADDP( F1, F5 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points2_3d -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points2_3d, p2m3d, 0, 3) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - F_FLD_S( S(0), F6 ) - F_FMUL_S( M(2), F6 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - F_FLD_S( S(1), F2 ) - F_FMUL_S( M(6), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - - F_FADD_S( M(12), F4 ) - F_FADD_S( M(13), F5 ) - F_FADD_S( M(14), F6 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points2_3d_no_rot -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points2_3d_no_rot, p2m3dnr, 1, 3) - - MOV_L( M(14), IREG_0 ) -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FADD_S( M(12), F4 ) - F_FLD_S( M(13), F5 ) - F_FADDP( F1, F5 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - MOV_L( IREG_0, D(2) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points2_perspective -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points2_perspective, p2mp, 1, 4) - - MOV_L( M(14), IREG_0 ) -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F1, D(1) ) - MOV_L( IREG_0, D(2) ) - MOV_L( CONST(FP_ZERO), D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points3_general -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points3_general, p3mg, 0, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - F_FLD_S( S(0), F6 ) - F_FMUL_S( M(2), F6 ) - F_FLD_S( S(0), F7 ) - F_FMUL_S( M(3), F7 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - F_FLD_S( S(1), F2 ) - F_FMUL_S( M(6), F2 ) - F_FLD_S( S(1), F3 ) - F_FMUL_S( M(7), F3 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - F_FADDP( F3, F7 ) - - F_FLD_S( S(2), F0 ) - F_FMUL_S( M(8), F0 ) - F_FLD_S( S(2), F1 ) - F_FMUL_S( M(9), F1 ) - F_FLD_S( S(2), F2 ) - F_FMUL_S( M(10), F2 ) - F_FLD_S( S(2), F3 ) - F_FMUL_S( M(11), F3 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - F_FADDP( F3, F7 ) - - F_FADD_S( M(12), F4 ) - F_FADD_S( M(13), F5 ) - F_FADD_S( M(14), F6 ) - F_FADD_S( M(15), F7 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - F_FSTP_S( F7, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points3_identity -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points3_identity, p3mi, 2, 3) - - CMP_L( ESI, EDI ) - JE( LLBL(SHORT_NAME`'_finish) ) - -LOOP_HEAD - MOV_L( S(0), IREG_0 ) - MOV_L( S(1), IREG_1 ) - MOV_L( S(2), EDX ) - - MOV_L( IREG_0, D(0) ) - MOV_L( IREG_1, D(1) ) - MOV_L( EDX, D(2) ) -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points3_2d -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points3_2d, p3m2d, 1, 3) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - - F_FADD_S( M(12), F4 ) - F_FADD_S( M(13), F5 ) - - MOV_L( S(2), IREG_0 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - MOV_L( IREG_0, D(2) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points3_2d_no_rot -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points3_2d_no_rot, p3m2dnr, 1, 3) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FADD_S( M(12), F4 ) - F_FLD_S( M(13), F5 ) - - F_FADDP( F1, F5 ) - - MOV_L( S(2), IREG_0 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - MOV_L( IREG_0, D(2) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points3_3d -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points3_3d, p3m3d, 0, 3) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - F_FLD_S( S(0), F6 ) - F_FMUL_S( M(2), F6 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - F_FLD_S( S(1), F2 ) - F_FMUL_S( M(6), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - - F_FLD_S( S(2), F0 ) - F_FMUL_S( M(8), F0 ) - F_FLD_S( S(2), F1 ) - F_FMUL_S( M(9), F1 ) - F_FLD_S( S(2), F2 ) - F_FMUL_S( M(10), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - - F_FADD_S( M(12), F4 ) - F_FADD_S( M(13), F5 ) - F_FADD_S( M(14), F6 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points3_3d_no_rot -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points3_3d_no_rot, p3m3dnr, 0, 3) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FLD_S( S(2), F2 ) - F_FMUL_S( M(10), F2 ) - - F_FADD_S( M(12), F4 ) - F_FLD_S( M(13), F5 ) - F_FADDP( F1, F5 ) - F_FLD_S( M(14), F6 ) - F_FADDP( F2, F6 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points3_perspective -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points3_perspective, p3mp, 1, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F5 ) - F_FMUL_S( M(5), F5 ) - - F_FLD_S( S(2), F0 ) - F_FMUL_S( M(8), F0 ) - F_FLD_S( S(2), F1 ) - F_FMUL_S( M(9), F1 ) - F_FLD_S( S(2), F2 ) - F_FMUL_S( M(10), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FLD_S( M(14), F6 ) - F_FADDP( F2, F6 ) - - MOV_L( S(2), IREG_0 ) - XOR_L( CONST(eval(0x80000000)), IREG_0 ) /* change sign */ - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - MOV_L( IREG_0, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points4_general -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points4_general, p4mg, 0, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - F_FLD_S( S(0), F6 ) - F_FMUL_S( M(2), F6 ) - F_FLD_S( S(0), F7 ) - F_FMUL_S( M(3), F7 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - F_FLD_S( S(1), F2 ) - F_FMUL_S( M(6), F2 ) - F_FLD_S( S(1), F3 ) - F_FMUL_S( M(7), F3 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - F_FADDP( F3, F7 ) - - F_FLD_S( S(2), F0 ) - F_FMUL_S( M(8), F0 ) - F_FLD_S( S(2), F1 ) - F_FMUL_S( M(9), F1 ) - F_FLD_S( S(2), F2 ) - F_FMUL_S( M(10), F2 ) - F_FLD_S( S(2), F3 ) - F_FMUL_S( M(11), F3 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - F_FADDP( F3, F7 ) - - F_FLD_S( S(3), F0 ) - F_FMUL_S( M(12), F0 ) - F_FLD_S( S(3), F1 ) - F_FMUL_S( M(13), F1 ) - F_FLD_S( S(3), F2 ) - F_FMUL_S( M(14), F2 ) - F_FLD_S( S(3), F3 ) - F_FMUL_S( M(15), F3 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - F_FADDP( F3, F7 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - F_FSTP_S( F7, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points4_identity -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points4_identity, p4mi, 1, 4) - - CMP_L( ESI, EDI ) - JE( LLBL(SHORT_NAME`'_finish) ) - -LOOP_HEAD - MOV_L( S(0), IREG_0 ) - MOV_L( S(1), EDX ) - - MOV_L( IREG_0, D(0) ) - MOV_L( EDX, D(1) ) - - MOV_L( S(2), IREG_0 ) - MOV_L( S(3), EDX ) - - MOV_L( IREG_0, D(2) ) - MOV_L( EDX, D(3) ) -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points4_2d -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points4_2d, p4m2d, 2, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - - F_FLD_S( S(3), F0 ) - F_FMUL_S( M(12), F0 ) - F_FLD_S( S(3), F1 ) - F_FMUL_S( M(13), F1 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - - MOV_L( S(2), IREG_0 ) - MOV_L( S(3), IREG_1 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - MOV_L( IREG_0, D(2) ) - MOV_L( IREG_1, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points4_2d_no_rot -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points4_2d_no_rot, p4m2dnr, 2, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F5 ) - F_FMUL_S( M(5), F5 ) - - F_FLD_S( S(3), F0 ) - F_FMUL_S( M(12), F0 ) - F_FLD_S( S(3), F1 ) - F_FMUL_S( M(13), F1 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - - MOV_L( S(2), IREG_0 ) - MOV_L( S(3), IREG_1 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - MOV_L( IREG_0, D(2) ) - MOV_L( IREG_1, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points4_3d -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points4_3d, p4m3d, 1, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - F_FLD_S( S(0), F5 ) - F_FMUL_S( M(1), F5 ) - F_FLD_S( S(0), F6 ) - F_FMUL_S( M(2), F6 ) - - F_FLD_S( S(1), F0 ) - F_FMUL_S( M(4), F0 ) - F_FLD_S( S(1), F1 ) - F_FMUL_S( M(5), F1 ) - F_FLD_S( S(1), F2 ) - F_FMUL_S( M(6), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - - F_FLD_S( S(2), F0 ) - F_FMUL_S( M(8), F0 ) - F_FLD_S( S(2), F1 ) - F_FMUL_S( M(9), F1 ) - F_FLD_S( S(2), F2 ) - F_FMUL_S( M(10), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - - F_FLD_S( S(3), F0 ) - F_FMUL_S( M(12), F0 ) - F_FLD_S( S(3), F1 ) - F_FMUL_S( M(13), F1 ) - F_FLD_S( S(3), F2 ) - F_FMUL_S( M(14), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - - MOV_L( S(3), IREG_0 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - MOV_L( IREG_0, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points4_3d_no_rot -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points4_3d_no_rot, p4m3dnr, 1, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F5 ) - F_FMUL_S( M(5), F5 ) - - F_FLD_S( S(2), F6 ) - F_FMUL_S( M(10), F6 ) - - F_FLD_S( S(3), F0 ) - F_FMUL_S( M(12), F0 ) - F_FLD_S( S(3), F1 ) - F_FMUL_S( M(13), F1 ) - F_FLD_S( S(3), F2 ) - F_FMUL_S( M(14), F2 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - F_FADDP( F2, F6 ) - - MOV_L( S(3), IREG_0 ) - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - MOV_L( IREG_0, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -/* -######################################## -## -## gl_x86_transform_points4_perspective -## -## -*/ -FUNCTION_HEAD(gl_x86_transform_points4_perspective, p4mp, 1, 4) - -LOOP_HEAD - - F_FLD_S( S(0), F4 ) - F_FMUL_S( M(0), F4 ) - - F_FLD_S( S(1), F5 ) - F_FMUL_S( M(5), F5 ) - - F_FLD_S( S(2), F0 ) - F_FMUL_S( M(8), F0 ) - F_FLD_S( S(2), F1 ) - F_FMUL_S( M(9), F1 ) - F_FLD_S( S(2), F6 ) - F_FMUL_S( M(10), F6 ) - - F_FADDP( F0, F4 ) - F_FADDP( F1, F5 ) - - F_FLD_S( S(3), F2 ) - F_FMUL_S( M(14), F2 ) - - F_FADDP( F2, F6 ) - - MOV_L( S(2), IREG_0 ) - XOR_L( CONST(eval(0x80000000)), IREG_0 ) /* change sign */ - - F_FSTP_S( F4, D(0) ) - F_FSTP_S( F5, D(1) ) - F_FSTP_S( F6, D(2) ) - MOV_L( IREG_0, D(3) ) - -LOOP_TAIL - -FUNCTION_TAIL - -')dnl -#include "assyntax.h" - - SEG_TEXT - -`#'define FP_ONE eval(0x3f800000) -`#'define FP_ZERO 0 - -`#'if !defined(NASM_ASSEMBLER) && !defined(MASM_ASSEMBLER) -`#'define LLBL(a) .L ## a -`#'else -`#'define LLBL(a) a -`#'endif - -`#'define S(i) REGOFF(i * 4, ESI) -`#'define D(i) REGOFF(i * 4, EDI) -`#'define M(i) REGOFF(i * 4, EDX) - -/* - * Offsets into GLvector4f - */ -`#'define V4F_DATA 0 -`#'define V4F_START 4 -`#'define V4F_COUNT 8 -`#'define V4F_STRIDE 12 -`#'define V4F_SIZE 16 -`#'define V4F_FLAGS 20 - -define(`VEC_DIRTY_0', 1)dnl -define(`VEC_DIRTY_1', 2)dnl -define(`VEC_DIRTY_2', 4)dnl -define(`VEC_DIRTY_3', 8)dnl -`#'define VEC_SIZE_1 eval(VEC_DIRTY_0) -`#'define VEC_SIZE_2 eval(VEC_DIRTY_0|VEC_DIRTY_1) -`#'define VEC_SIZE_3 eval(VEC_DIRTY_0|VEC_DIRTY_1|VEC_DIRTY_2) -`#'define VEC_SIZE_4 eval(VEC_DIRTY_0|VEC_DIRTY_1|VEC_DIRTY_2|VEC_DIRTY_3) - -/* - * Offsets for transform_func arguments - * - * typedef void (*transform_func)( GLvector4f *to_vec, - * const GLmatrix *mat, - * const GLvector4f *from_vec, - * const GLubyte *clipmask, - * const GLubyte flag ); - */ -`#'define OFFSET_DEST 4 -`#'define OFFSET_MATRIX 8 -`#'define OFFSET_SOURCE 12 -`#'define OFFSET_CLIP 16 -`#'define OFFSET_FLAG 20 - -`#'define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) -`#'define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) -`#'define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) -`#'define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) -`#'define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP) - -/* -######################################## -## -## Masked versions -## -######################################## -*/ -divert(-1) -#################### -## FUNCTION_HEAD( -## long_name, -## short_name, -## regs_needed, -## output_vector_size -## ); -## -## EAX: stride or temp0 -## EBX: clipflag -## ECX: dst + count or temp1 -## EDX: matrix -## ESI: src[] -## EDI: dst[] -## EBP: clipmask[] -## -#################### -define(`FUNCTION_HEAD', `dnl -define(`LONG_NAME', $1`'_masked)dnl -define(`SHORT_NAME', $2`'m)dnl -define(`TEMP_REGS_NEEDED', $3)dnl -define(`VEC_SIZE', $4)dnl - GLOBL GLNAME(LONG_NAME) - ALIGNTEXT4 -GLNAME(LONG_NAME): - -`#'define FRAME_OFFSET 16 - PUSH_L( ESI ) - PUSH_L( EDI ) - - PUSH_L( EBX ) - PUSH_L( EBP ) - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source GLvector4f */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest GLvector4f */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_CLIP, EBP ) /* ptr to clip mask array */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_B( ARG_FLAG, BL ) /* clip mask flags */ - - TEST_L( ECX, ECX ) - JZ( LLBL(SHORT_NAME`'_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(`VEC_SIZE_'VEC_SIZE), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(VEC_SIZE), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ - -dnl ########## -ifelse(eval(TEMP_REGS_NEEDED > 0), 1, -` MOV_L( EAX, ARG_SOURCE ) /* need eax; put stride in ARG_SOURCE */ -define(`IREG_0', `EAX')define(`STRIDE', `ARG_SOURCE')dnl -', -`define(`STRIDE', `EAX')')dnl -dnl ########## -ifelse(eval(TEMP_REGS_NEEDED > 1), 1, -` MOV_L( ECX, ARG_DEST ) /* need ecx; put dest+count in ARG_DEST */ -define(`IREG_1', `ECX')define(`DEST_END', `ARG_DEST')dnl -', -`define(`DEST_END', `ECX')')dnl -dnl ########## -') - -#################### -## LOOP_HEAD -#################### -define(`LOOP_HEAD', -` ALIGNTEXT4ifNOP -LLBL(SHORT_NAME`'_top): - - TEST_B( BL, REGIND(EBP) ) - JNZ( LLBL(SHORT_NAME`'_skip) ) -') - -#################### -## LOOP_TAIL -#################### -define(`LOOP_TAIL', -`LLBL(SHORT_NAME`'_skip): - INC_L( EBP ) - ADD_L( CONST(16), EDI ) - ADD_L( STRIDE, ESI ) - CMP_L( DEST_END, EDI ) - JNE( LLBL(SHORT_NAME`'_top) ) -') - -#################### -## FUNCTION_TAIL -#################### -define(`FUNCTION_TAIL', -`LLBL(SHORT_NAME`'_finish): - POP_L( EBP ) - POP_L( EBX ) - POP_L( EDI ) - POP_L( ESI ) - RET -undefine(`LONG_NAME')dnl -undefine(`SHORT_NAME')dnl -undefine(`TEMP_REGS_NEEDED')dnl -undefine(`VEC_SIZE')dnl -undefine(`IREG_0')dnl -undefine(`IREG_1')dnl -undefine(`STRIDE')dnl -undefine(`DEST_END')dnl -`#'undef FRAME_OFFSET') - -#################### -divert -output_x86_transform_functions - -/* -######################################## -## -## Unmasked versions -## -######################################## -*/ -divert(-1) -#################### -## FUNCTION_HEAD( -## long_name, -## short_name, -## regs_needed, -## output_vector_size -## ); -## -## EAX: stride -## EBX: temp0 -## ECX: dst + count -## EDX: matrix -## ESI: src[] -## EDI: dst[] -## EBP: temp1 -## -#################### -define(`FUNCTION_HEAD', `dnl -define(`LONG_NAME', $1`'_raw)dnl -define(`SHORT_NAME', $2`'r)dnl -define(`TEMP_REGS_NEEDED', $3)dnl -define(`VEC_SIZE', $4)dnl - GLOBL GLNAME(LONG_NAME) - ALIGNTEXT4 -GLNAME(LONG_NAME): - -`#'define FRAME_OFFSET eval(TEMP_REGS_NEEDED * 4 + 8) - PUSH_L( ESI ) - PUSH_L( EDI ) - - MOV_L( REGOFF(OFFSET_SOURCE+8, ESP), ESI ) /* ptr to source GLvector4f */ - MOV_L( REGOFF(OFFSET_DEST+8, ESP), EDI ) /* ptr to dest GLvector4f */ - -ifelse(eval(TEMP_REGS_NEEDED > 0), 1, -` PUSH_L( EBX )define(`IREG_0', EBX) -')dnl -ifelse(eval(TEMP_REGS_NEEDED > 1), 1, -` PUSH_L( EBP )define(`IREG_1', EBP) -')dnl - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - - TEST_L( ECX, ECX) - JZ( LLBL(SHORT_NAME`'_finish) ) /* count was zero; go to finish */ - - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* stride */ - OR_L( CONST(`VEC_SIZE_'VEC_SIZE), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - MOV_L( CONST(VEC_SIZE), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - - SHL_L( CONST(4), ECX ) /* count *= 16 */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDI, ECX ) /* count += dest ptr */ -') - -#################### -## LOOP_HEAD -#################### -define(`LOOP_HEAD', -` ALIGNTEXT4ifNOP -LLBL(SHORT_NAME`'_top): -') - -#################### -## LOOP_TAIL -#################### -define(`LOOP_TAIL', -`LLBL(SHORT_NAME`'_skip): - ADD_L( CONST(16), EDI ) - ADD_L( EAX, ESI ) - CMP_L( ECX, EDI ) - JNE( LLBL(SHORT_NAME`'_top) ) -') - -#################### -## FUNCTION_TAIL -#################### -define(`FUNCTION_TAIL', -`LLBL(SHORT_NAME`'_finish): -ifelse(eval(TEMP_REGS_NEEDED > 1), 1, -` POP_L( EBP ) -')dnl -ifelse(eval(TEMP_REGS_NEEDED > 0), 1, -` POP_L( EBX ) -')dnl - POP_L( EDI ) - POP_L( ESI ) - RET -undefine(`LONG_NAME')dnl -undefine(`SHORT_NAME')dnl -undefine(`TEMP_REGS_NEEDED')dnl -undefine(`VEC_SIZE')dnl -undefine(`IREG_0')dnl -undefine(`IREG_1')dnl -`#'undef FRAME_OFFSET') - - -#################### -divert -output_x86_transform_functions - -`#'undef OFFSET_DEST -`#'undef OFFSET_MATRIX -`#'undef OFFSET_SOURCE -`#'undef OFFSET_CLIP -`#'undef OFFSET_FLAG - -`#'undef ARG_DEST -`#'undef ARG_MATRIX -`#'undef ARG_SOURCE -`#'undef ARG_CLIP -`#'undef ARG_FLAG - -`#'define OFFSET_DEST 4 -`#'define OFFSET_MATRIX 8 -`#'define OFFSET_SOURCE 12 -`#'define OFFSET_STRIDE 16 -`#'define OFFSET_COUNT 20 - -`#'define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) -`#'define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) -`#'define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) -`#'define ARG_STRIDE REGOFF(FRAME_OFFSET+OFFSET_STRIDE, ESP) -`#'define ARG_COUNT REGOFF(FRAME_OFFSET+OFFSET_COUNT, ESP) - -/* -######################################## -## -## clean (no vertex struct) versions -## -######################################## -*/ -divert(-1) -#################### -## FUNCTION_HEAD( -## long_name, -## short_name, -## regs_needed, -## output_vector_size -## ); -## -## EAX: stride -## EBX: temp0 -## ECX: dst + count -## EDX: matrix -## ESI: src[] -## EDI: dst[] -## EBP: temp1 -## -#################### -define(`FUNCTION_HEAD', `dnl -define(`LONG_NAME', $1`'_v`'DEST_STRIDE)dnl -define(`SHORT_NAME', $2`'v`'DEST_STRIDE)dnl -define(`TEMP_REGS_NEEDED', $3)dnl -define(`VEC_SIZE', $4)dnl - GLOBL GLNAME(LONG_NAME) - ALIGNTEXT4 -GLNAME(LONG_NAME): - -`#'define FRAME_OFFSET eval(TEMP_REGS_NEEDED * 4 + 8) - PUSH_L( ESI ) - PUSH_L( EDI ) - -ifelse(eval(TEMP_REGS_NEEDED > 0), 1, -` PUSH_L( EBX )define(`IREG_0', EBX) -')dnl -ifelse(eval(TEMP_REGS_NEEDED > 1), 1, -` PUSH_L( EBP )define(`IREG_1', EBP) -')dnl - - MOV_L( ARG_SOURCE, ESI ) /* ptr to source */ - MOV_L( ARG_DEST, EDI ) /* ptr to dest */ - - MOV_L( ARG_MATRIX, EDX ) /* ptr to matrix */ - MOV_L( ARG_COUNT, ECX ) /* count */ - - TEST_L( ECX, ECX) - JZ( LLBL(SHORT_NAME`'_finish) ) /* count was zero; go to finish */ - - MOV_L( ARG_STRIDE, EAX ) /* stride */ -') - -#################### -## LOOP_HEAD -#################### -define(`LOOP_HEAD', -` ALIGNTEXT4ifNOP -LLBL(SHORT_NAME`'_top): -') - -#################### -## LOOP_TAIL -#################### -define(`LOOP_TAIL', -`ifelse(eval(VEC_SIZE == 1), 1, ` MOV_L( CONST(FP_ZERO), D(1) ) -')dnl -ifelse(eval(VEC_SIZE <= 2), 1, ` MOV_L( CONST(FP_ZERO), D(2) ) -')dnl -ifelse(eval(VEC_SIZE <= 3), 1, ` MOV_L( CONST(FP_ONE), D(3) ) -')dnl -LLBL(SHORT_NAME`'_skip): - ADD_L( CONST(eval(DEST_STRIDE*4)), EDI ) - ADD_L( EAX, ESI ) - DEC_L( ECX ) - JNZ( LLBL(SHORT_NAME`'_top) ) -') - -#################### -## FUNCTION_TAIL -#################### -define(`FUNCTION_TAIL', -`LLBL(SHORT_NAME`'_finish): -ifelse(eval(TEMP_REGS_NEEDED > 1), 1, -` POP_L( EBP ) -')dnl -ifelse(eval(TEMP_REGS_NEEDED > 0), 1, -` POP_L( EBX ) -')dnl - POP_L( EDI ) - POP_L( ESI ) - RET -undefine(`LONG_NAME')dnl -undefine(`SHORT_NAME')dnl -undefine(`TEMP_REGS_NEEDED')dnl -undefine(`VEC_SIZE')dnl -undefine(`IREG_0')dnl -undefine(`IREG_1')dnl -`#'undef FRAME_OFFSET') - - -#################### -divert - -define(`DEST_STRIDE', 8)dnl -output_x86_transform_functions -undefine(`DEST_STRIDE')dnl - -define(`DEST_STRIDE', 12)dnl -output_x86_transform_functions -undefine(`DEST_STRIDE')dnl - -define(`DEST_STRIDE', 16)dnl -output_x86_transform_functions -undefine(`DEST_STRIDE')dnl - -`#'undef OFFSET_DEST -`#'undef OFFSET_MATRIX -`#'undef OFFSET_SOURCE -`#'undef OFFSET_STRIDE -`#'undef OFFSET_COUNT - -`#'undef ARG_DEST -`#'undef ARG_MATRIX -`#'undef ARG_SOURCE -`#'undef ARG_STRIDE -`#'undef ARG_COUNT - - -/* - * Table for clip test. - * - * bit6 = S(3) < 0 - * bit5 = S(2) < 0 - * bit4 = abs(S(2)) > abs(S(3)) - * bit3 = S(1) < 0 - * bit2 = abs(S(1)) > abs(S(3)) - * bit1 = S(0) < 0 - * bit0 = abs(S(0)) > abs(S(3)) - */ -divert(-1) -# Vertex buffer clipping flags (from vb.h) -define(`CLIP_RIGHT_BIT', `0x01') -define(`CLIP_LEFT_BIT', `0x02') -define(`CLIP_TOP_BIT', `0x04') -define(`CLIP_BOTTOM_BIT', `0x08') -define(`CLIP_NEAR_BIT', `0x10') -define(`CLIP_FAR_BIT', `0x20') - -define(`MAGN_X', `(~((($1) & 1) - 1))') -define(`SIGN_X', `(~(((($1) >> 1) & 1) - 1))') -define(`MAGN_Y', `(~(((($1) >> 2) & 1) - 1))') -define(`SIGN_Y', `(~(((($1) >> 3) & 1) - 1))') -define(`MAGN_Z', `(~(((($1) >> 4) & 1) - 1))') -define(`SIGN_Z', `(~(((($1) >> 5) & 1) - 1))') -define(`SIGN_W', `(~(((($1) >> 6) & 1) - 1))') - -define(`CLIP_VALUE', `eval( - (CLIP_RIGHT_BIT - & ((~SIGN_X($1) & SIGN_W($1)) - | (~SIGN_X($1) & ~SIGN_W($1) & MAGN_X($1)) - | (SIGN_X($1) & SIGN_W($1) & ~MAGN_X($1)))) - | (CLIP_LEFT_BIT - & ((SIGN_X($1) & SIGN_W($1)) - | (~SIGN_X($1) & SIGN_W($1) & ~MAGN_X($1)) - | (SIGN_X($1) & ~SIGN_W($1) & MAGN_X($1)))) - | (CLIP_TOP_BIT - & ((~SIGN_Y($1) & SIGN_W($1)) - | (~SIGN_Y($1) & ~SIGN_W($1) & MAGN_Y($1)) - | (SIGN_Y($1) & SIGN_W($1) & ~MAGN_Y($1)))) - | (CLIP_BOTTOM_BIT - & ((SIGN_Y($1) & SIGN_W($1)) - | (~SIGN_Y($1) & SIGN_W($1) & ~MAGN_Y($1)) - | (SIGN_Y($1) & ~SIGN_W($1) & MAGN_Y($1)))) - | (CLIP_FAR_BIT - & ((~SIGN_Z($1) & SIGN_W($1)) - | (~SIGN_Z($1) & ~SIGN_W($1) & MAGN_Z($1)) - | (SIGN_Z($1) & SIGN_W($1) & ~MAGN_Z($1)))) - | (CLIP_NEAR_BIT - & ((SIGN_Z($1) & SIGN_W($1)) - | (~SIGN_Z($1) & SIGN_W($1) & ~MAGN_Z($1)) - | (SIGN_Z($1) & ~SIGN_W($1) & MAGN_Z($1)))) -)') -define(`CLIP_VALUE8', -`CLIP_VALUE($1 + 0), CLIP_VALUE($1 + 1), CLIP_VALUE($1 + 2), CLIP_VALUE($1 + 3), dnl -CLIP_VALUE($1 + 4), CLIP_VALUE($1 + 5), CLIP_VALUE($1 + 6), CLIP_VALUE($1 + 7)') -divert - - SEG_DATA - -clip_table: - D_BYTE CLIP_VALUE8(0x00) - D_BYTE CLIP_VALUE8(0x08) - D_BYTE CLIP_VALUE8(0x10) - D_BYTE CLIP_VALUE8(0x18) - D_BYTE CLIP_VALUE8(0x20) - D_BYTE CLIP_VALUE8(0x28) - D_BYTE CLIP_VALUE8(0x30) - D_BYTE CLIP_VALUE8(0x38) - D_BYTE CLIP_VALUE8(0x40) - D_BYTE CLIP_VALUE8(0x48) - D_BYTE CLIP_VALUE8(0x50) - D_BYTE CLIP_VALUE8(0x58) - D_BYTE CLIP_VALUE8(0x60) - D_BYTE CLIP_VALUE8(0x68) - D_BYTE CLIP_VALUE8(0x70) - D_BYTE CLIP_VALUE8(0x78) - - SEG_TEXT - -/* - * Offsets for clip_func arguments - * - * typedef GLvector4f *(*clip_func)( GLvector4f *vClip, - * GLvector4f *vProj, - * GLubyte clipMask[], - * GLubyte *orMask, - * GLubyte *andMask ); - */ - -`#'define OFFSET_SOURCE 4 -`#'define OFFSET_DEST 8 -`#'define OFFSET_CLIP 12 -`#'define OFFSET_OR 16 -`#'define OFFSET_AND 20 - -`#'define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) -`#'define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) -`#'define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) -`#'define ARG_OR REGOFF(FRAME_OFFSET+OFFSET_OR, ESP) -`#'define ARG_AND REGOFF(FRAME_OFFSET+OFFSET_AND, ESP) - -/* -######################################## -## -## gl_x86_cliptest_points4 -## -## AL: ormask -## AH: andmask -## EBX: temp0 -## ECX: temp1 -## EDX: clipmask[] -## ESI: clip[] -## EDI: proj[] -## EBP: temp2 -## -######################################## -*/ - -`#'if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC) -`#'define ELFPIC -`#'endif - - GLOBL GLNAME(gl_x86_cliptest_points4) - ALIGNTEXT4 - -GLNAME(gl_x86_cliptest_points4): -`#'ifdef ELFPIC -`#'define FRAME_OFFSET 20 -`#'else -`#'define FRAME_OFFSET 16 -`#'endif - PUSH_L( ESI ) - PUSH_L( EDI ) - PUSH_L( EBP ) - PUSH_L( EBX ) - -`#'ifdef ELFPIC - /* store pointer to clip_table on stack */ - CALL( LLBL(ctp4_get_eip) ) - ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) - MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) - PUSH_L( EBX ) - JMP( LLBL(ctp4_clip_table_ready) ) - -LLBL(ctp4_get_eip): - /* store eip in ebx */ - MOV_L( REGIND(ESP), EBX ) - RET - -LLBL(ctp4_clip_table_ready): -`#'endif - - MOV_L( ARG_SOURCE, ESI ) - MOV_L( ARG_DEST, EDI ) - - MOV_L( ARG_CLIP, EDX ) - MOV_L( ARG_OR, EBX ) - - MOV_L( ARG_AND, EBP ) - MOV_L( REGOFF(V4F_STRIDE, ESI), EAX ) /* source stride */ - - MOV_L( REGOFF(V4F_COUNT, ESI), ECX ) /* source count */ - MOV_L( REGOFF(V4F_START, ESI), ESI ) /* ptr to first source vertex */ - - OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) ) /* set dest flags */ - MOV_L( EAX, ARG_SOURCE ) /* put stride in ARG_SOURCE */ - - MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) ) /* set dest size */ - MOV_L( ECX, REGOFF(V4F_COUNT, EDI) ) /* set dest count */ - - MOV_L( REGOFF(V4F_START, EDI), EDI ) /* ptr to first dest vertex */ - ADD_L( EDX, ECX ) - - MOV_L( ECX, ARG_CLIP ) /* put clipmask + count in ARG_CLIP */ - CMP_L( ECX, EDX ) - - MOV_B( REGIND(EBX), AL ) - MOV_B( REGIND(EBP), AH ) - - JZ( LLBL(ctp4_finish) ) - - ALIGNTEXT4ifNOP -LLBL(ctp4_top): - F_PUSH0( FLD1, F3 ) - F_FDIV_S( S(3), F3 ) - - MOV_L( S(3), EBP ) - MOV_L( S(2), EBX ) - - XOR_L( ECX, ECX ) - ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ - - ADC_L( ECX, ECX ) - ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(1), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(0), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - -`#'ifdef ELFPIC - MOV_L( REGIND(ESP), EBP ) /* clip_table */ - - MOV_B( REGBI(EBP, ECX), CL ) -`#'else - MOV_B( REGOFF(clip_table,ECX), CL ) -`#'endif - - OR_B( CL, AL ) - AND_B( CL, AH ) - - TEST_B( CL, CL ) - MOV_B( CL, REGIND(EDX) ) - - JZ( LLBL(ctp4_proj) ) - -pushdef(`F_STACK', F_STACK)dnl - F_FSTP( F3, F3 ) -popdef(`F_STACK')dnl - JMP( LLBL(ctp4_next) ) - -LLBL(ctp4_proj): - F_FLD_S( S(0), F0 ) - F_FMUL2( F3, F0 ) - - F_FLD_S( S(1), F1 ) - F_FMUL2( F3, F1 ) - - F_FLD_S( S(2), F2 ) - F_FMUL2( F3, F2 ) - - F_FSTP_S( F0, D(0) ) - F_FSTP_S( F1, D(1) ) - F_FSTP_S( F2, D(2) ) - F_FSTP_S( F3, D(3) ) - -LLBL(ctp4_next): - INC_L( EDX ) - ADD_L( CONST(16), EDI ) - - ADD_L( ARG_SOURCE, ESI ) - CMP_L( EDX, ARG_CLIP ) - - JNZ( LLBL(ctp4_top) ) - - MOV_L( ARG_OR, ECX ) - MOV_L( ARG_AND, EDX ) - - MOV_B( AL, REGIND(ECX) ) - MOV_B( AH, REGIND(EDX) ) - -LLBL(ctp4_finish): - MOV_L( ARG_DEST, EAX ) -`#'ifdef ELFPIC - POP_L( ESI ) /* discard ptr to clip_table */ -`#'endif - POP_L( EBX ) - POP_L( EBP ) - POP_L( EDI ) - POP_L( ESI ) - - RET - - - - -/* -######################################## -## -## gl_v16_x86_cliptest_points4 -## -## Performs cliptesting equivalent to that done by cliptest_v16() -## in vertices.c -## -## This is a hacked version of the original above. -## -######################################## -*/ - - - -`#'define OFFSET_V16_SOURCE 4 -`#'define OFFSET_V16_LAST 8 -`#'define OFFSET_V16_OR 12 -`#'define OFFSET_V16_AND 16 -`#'define OFFSET_V16_MASK 20 - -`#'define ARG_V16_SOURCE REGOFF(V16_FRAME_OFFSET+OFFSET_V16_SOURCE, ESP) -`#'define ARG_V16_LAST REGOFF(V16_FRAME_OFFSET+OFFSET_V16_LAST, ESP) -`#'define ARG_V16_OR REGOFF(V16_FRAME_OFFSET+OFFSET_V16_OR, ESP) -`#'define ARG_V16_AND REGOFF(V16_FRAME_OFFSET+OFFSET_V16_AND, ESP) -`#'define ARG_V16_MASK REGOFF(V16_FRAME_OFFSET+OFFSET_V16_MASK, ESP) - - -`#'if defined(__ELF__) && defined(__PIC__) && !defined(ELFPIC) -`#'define ELFPIC -`#'endif - - GLOBL GLNAME(gl_v16_x86_cliptest_points4) - ALIGNTEXT4 - -GLNAME(gl_v16_x86_cliptest_points4): -`#'ifdef ELFPIC -`#'define V16_FRAME_OFFSET 20 -`#'else -`#'define V16_FRAME_OFFSET 16 -`#'endif - PUSH_L( ESI ) - PUSH_L( EDI ) - PUSH_L( EBP ) - PUSH_L( EBX ) - -`#'ifdef ELFPIC - /* store pointer to clip_table on stack */ - CALL( LLBL(v16_ctp4_get_eip) ) - ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX ) - MOV_L( REGOFF(clip_table@GOT, EBX), EBX ) - PUSH_L( EBX ) - JMP( LLBL(v16_ctp4_clip_table_ready) ) - -LLBL(v16_ctp4_get_eip): - /* store eip in ebx */ - MOV_L( REGIND(ESP), EBX ) - RET - -LLBL(v16_ctp4_clip_table_ready): -`#'endif - - MOV_L( ARG_V16_SOURCE, ESI ) /* ptr to first source vertex */ - MOV_L( ARG_V16_LAST, EDX ) /* ptr to last source vertex */ - MOV_L( ARG_V16_OR, EBX ) - MOV_L( ARG_V16_AND, EBP ) - MOV_L( ARG_V16_MASK, EDI ) - - CMP_L( EDX, ESI ) - - MOV_B( REGIND(EBX), AL ) - MOV_B( REGIND(EBP), AH ) - - JZ( LLBL(v16_ctp4_finish) ) - - ALIGNTEXT4ifNOP -LLBL(v16_ctp4_top): -`#'if 0 - F_PUSH0( FLD1, F0 ) - F_FDIV_S( S(3), F0 ) -`#'endif - - MOV_L( S(3), EBP ) - MOV_L( S(2), EBX ) - - XOR_L( ECX, ECX ) - ADD_L( EBP, EBP ) /* ebp = abs(S(3))*2 ; carry = sign of S(3) */ - - ADC_L( ECX, ECX ) - ADD_L( EBX, EBX ) /* ebx = abs(S(2))*2 ; carry = sign of S(2) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(2))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(1), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(1))*2 ; carry = sign of S(1) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(1))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - MOV_L( S(0), EBX ) - - ADD_L( EBX, EBX ) /* ebx = abs(S(0))*2 ; carry = sign of S(0) */ - - ADC_L( ECX, ECX ) - CMP_L( EBX, EBP ) /* carry = abs(S(0))*2 > abs(S(3))*2 */ - - ADC_L( ECX, ECX ) - -`#'ifdef ELFPIC - MOV_L( REGIND(ESP), EBP ) /* clip_table */ - - MOV_B( REGBI(EBP, ECX), CL ) -`#'else - MOV_B( REGOFF(clip_table,ECX), CL ) -`#'endif - - OR_B( CL, AL ) - AND_B( CL, AH ) - - MOV_B( CL, REGIND(EDI) ) /* save clipmask */ - INC_L( EDI ) /* next clipmask */ - -`#'if 0 - F_FSTP_S( F0, S(8) ) /* GR_VERTEX_OOW_OFFSET */ -`#'endif - - ADD_L( CONST(64), ESI ) /* next fxVertex */ - - CMP_L( EDX, ESI ) /* finished? */ - JNZ( LLBL(v16_ctp4_top) ) - - MOV_L( ARG_V16_OR, ECX ) - MOV_L( ARG_V16_AND, EDX ) - - MOV_B( AL, REGIND(ECX) ) - MOV_B( AH, REGIND(EDX) ) - -LLBL(v16_ctp4_finish): - -`#'ifdef ELFPIC - POP_L( ESI ) /* discard ptr to clip_table */ -`#'endif - POP_L( EBX ) - POP_L( EBP ) - POP_L( EDI ) - POP_L( ESI ) - - RET - diff --git a/xc/extras/Mesa/src/X86/x86flatregs.m4 b/xc/extras/Mesa/src/X86/x86flatregs.m4 deleted file mode 100644 index f3cbf39b7..000000000 --- a/xc/extras/Mesa/src/X86/x86flatregs.m4 +++ /dev/null @@ -1,200 +0,0 @@ -divert(-1) -############################################################ -## These m4 macros make the x86 floating point register -## stack look more like normal registers. -## -## They do *NOT* know about jump instructions or any other -## form of conditional execution! -############################################################ - -######################################## -## define the stack -######################################## -define(`F_STACK', `') - -######################################## -## print the stack -######################################## -define(`F_SHOW_STACK', `patsubst(F_STACK, `[0-7]', `F`'\& ')') - -######################################## -## define the flat floating point regs -######################################## -define(`F0', 0) -define(`F1', 1) -define(`F2', 2) -define(`F3', 3) -define(`F4', 4) -define(`F5', 5) -define(`F6', 6) -define(`F7', 7) - -######################################## -## F_PUSH_STACK(REG) -## push a register -######################################## -define(`F_PUSH_STACK', -`ifelse(len(F_STACK), 8, ` [ERROR - STACK OVERFLOW] ')dnl -ifelse(eval(F_FIND_REG($1) != -1), 1, ` [ERROR - F`'$1 PUSHED TWICE] ')dnl -define(`F_STACK', eval($1)`'F_STACK)') - -######################################## -## F_POP_STACK() -## pop top of stack -######################################## -define(`F_POP_STACK', -`ifelse(len(F_STACK), 0, ` [ERROR - STACK UNDERFLOW] ')dnl -define(`F_STACK', substr(F_STACK, 1))') - -######################################## -## F_FIND_REG(REG) -## find register -######################################## -define(`F_FIND_REG', `index(F_STACK, eval($1))') - -######################################## -## F_XCH_TO_TOP(REG) -## swap to top of stack -######################################## -define(`F_XCH_TO_TOP', -`define(`F_INDEX', F_FIND_REG($1))dnl -ifelse( -eval(F_INDEX > 0), 1, -`FXCH( ST('F_FIND_REG($1)`) )dnl -define(`F_STACK', - translit(F_STACK, - substr(F_STACK, 0, 1)`'eval($1), - eval($1)`'substr(F_STACK, 0, 1)))F_PCOMMENT(`F$1 to top') - ', -eval(F_INDEX == -1), 1, -` [ERROR - F`'$1 NOT ON STACK] ')') - -######################################## -## F_PUSH(INSN, SRC, DST) -## SRC - memory -## DST - register -## execute an insn that pushes a register -######################################## -define(`F_PUSH', -`$1( $2 )F_PUSH_STACK($3)F_PCOMMENT(`$1 $2, F$3')') - -######################################## -## F_PUSHR(INSN, SRC, DST) -## SRC - register -## DST - register -## execute an insn that pushes a register -######################################## -define(`F_PUSHR', -`$1( ST(F_FIND_REG($2) )F_PUSH_STACK($3)F_PCOMMENT(`$1 F$2, F$3')') - -######################################## -## F_PUSH0(INSN, DST) -## DST - register -## execute an insn that pushes a register -######################################## -define(`F_PUSH0', -`$1`'F_PUSH_STACK($2)F_PCOMMENT(`$1 F$3')') - -######################################## -## F_POP(INSN, SRC, DST) -## SRC - register -## DST - memory -## execute an insn that pops a register -######################################## -define(`F_POP', -`F_XCH_TO_TOP($2)dnl -$1( $3 )F_POP_STACK()F_PCOMMENT(`$1 F$2, $3')') - -######################################## -## F_POPR(INSN, SRC, DST) -## SRC - register -## DST - register -## execute an insn that pops a register -######################################## -define(`F_POPR', -`F_XCH_TO_TOP($2)dnl -$1( ST(0), ST(F_FIND_REG($3)) )F_POP_STACK()F_PCOMMENT(`$1 F$2, F$3')') - -######################################## -## F_POP1(INSN, SRC, DST) -## SRC - register -## DST - register -## execute an insn that pops a register -## with st(0) arg omitted -######################################## -define(`F_POP1', -`F_XCH_TO_TOP($2)dnl -$1( ST(F_FIND_REG($3)) )F_POP_STACK()F_PCOMMENT(`$1 F$2, F$3')') - -######################################## -## F_EXEC(INSN, SRC, DST) -## SRC - memory -## DST - register -## execute an insn that doesn't change the stack -######################################## -define(`F_EXEC', -`F_XCH_TO_TOP($3)dnl -$1( $2 )F_COMMENT(`$1 $2, F$3')') - -######################################## -## F_EXECR(INSN, SRC, DST) -## SRC - register -## DST - register -## execute an insn that doesn't change the stack -######################################## -define(`F_EXECR', -`F_XCH_TO_TOP($3)dnl -$1( ST(F_FIND_REG($2)), ST(0) )F_COMMENT(`$1 F$2, F$3')') - -######################################## -## F_EXEC0(INSN, REG) -## REG - register -## execute an insn that doesn't change -## the stack or have any arguments -## besides the stack top. -######################################## -define(`F_EXEC0', -`F_XCH_TO_TOP($2)dnl -$1`'F_COMMENT(`$1 F$2')') - -######################################## -## Some common fp operations - -######################################## -define(`F_FADD2', `F_EXECR( FADD2, $1, $2 )') -define(`F_FADD_S', `F_EXEC( FADD_S, $1, $2 )') -define(`F_FADDP', `F_POPR( FADDP, $1, $2 )') - -define(`F_FDIV2', `F_EXECR( FDIV2, $1, $2 )') -define(`F_FDIV_S', `F_EXEC( FDIV_S, $1, $2 )') -define(`F_FDIVP', `F_POPR( FDIVP, $1, $2 )') - -define(`F_FDIVR2', `F_EXECR( FDIVR2, $1, $2 )') -define(`F_FDIVR_S', `F_EXEC( FDIVR_S, $1, $2 )') -define(`F_FDIVRP', `F_POPR( FDIVRP, $1, $2 )') - -define(`F_FLD_S', `F_PUSH( FLD_S, $1, $2 )') - -define(`F_FMUL2', `F_EXECR( FMUL2, $1, $2 )') -define(`F_FMUL_S', `F_EXEC( FMUL_S, $1, $2 )') -define(`F_FMULP', `F_POPR( FMULP, $1, $2 )') - -define(`F_FSTP_S', `F_POP( FSTP_S, $1, $2 )') -define(`F_FSTP', `F_POP1( FSTP, $1, $2 )') - -define(`F_FSUB2', `F_EXECR( FSUB2, $1, $2 )') -define(`F_FSUB_S', `F_EXEC( FSUB_S, $1, $2 )') -define(`F_FSUBP', `F_POPR( FSUBP, $1, $2 )') - -define(`F_FSUBR2', `F_EXECR( FSUBR2, $1, $2 )') -define(`F_FSUBR_S', `F_EXEC( FSUBR_S, $1, $2 )') -define(`F_FSUBRP', `F_POPR( FSUBRP, $1, $2 )') - -######################################## -## Comments -######################################## -## define(`F_COMMENT', ` /* `$1' */') -## define(`F_PCOMMENT', ` /* `$1' { F_SHOW_STACK} */') -define(`F_COMMENT', `') -define(`F_PCOMMENT', ` /* F_SHOW_STACK*/') - -divert`'dnl diff --git a/xc/extras/Mesa/src/X86/xform_args.h b/xc/extras/Mesa/src/X86/xform_args.h new file mode 100644 index 000000000..4c3899acc --- /dev/null +++ b/xc/extras/Mesa/src/X86/xform_args.h @@ -0,0 +1,73 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Transform function interface for assembly code. Simply define + * FRAME_OFFSET to the number of bytes pushed onto the stack before + * using the ARG_* argument macros. + * + * Gareth Hughes <gareth@valinux.com> + */ + +#ifndef __XFORM_ARGS_H__ +#define __XFORM_ARGS_H__ + +/* Offsets into GLvector4f + */ +#define V4F_DATA 0 +#define V4F_START 4 +#define V4F_COUNT 8 +#define V4F_STRIDE 12 +#define V4F_SIZE 16 +#define V4F_FLAGS 20 + +/* GLvector4f flags + */ +#define VEC_SIZE_1 1 +#define VEC_SIZE_2 3 +#define VEC_SIZE_3 7 +#define VEC_SIZE_4 15 + +/* Offsets for transform_func arguments + * + * typedef void (*transform_func)( GLvector4f *to_vec, + * const GLfloat m[16], + * const GLvector4f *from_vec, + * const GLubyte *clipmask, + * const GLubyte flag ); + */ +#define OFFSET_DEST 4 +#define OFFSET_MATRIX 8 +#define OFFSET_SOURCE 12 +#define OFFSET_CLIP 16 +#define OFFSET_FLAG 20 + +#define ARG_DEST REGOFF(FRAME_OFFSET+OFFSET_DEST, ESP) +#define ARG_MATRIX REGOFF(FRAME_OFFSET+OFFSET_MATRIX, ESP) +#define ARG_SOURCE REGOFF(FRAME_OFFSET+OFFSET_SOURCE, ESP) +#define ARG_CLIP REGOFF(FRAME_OFFSET+OFFSET_CLIP, ESP) +#define ARG_FLAG REGOFF(FRAME_OFFSET+OFFSET_FLAG, ESP) + +#endif diff --git a/xc/extras/Mesa/src/aatritemp.h b/xc/extras/Mesa/src/aatritemp.h index 976ee9689..e0d623eb0 100644 --- a/xc/extras/Mesa/src/aatritemp.h +++ b/xc/extras/Mesa/src/aatritemp.h @@ -303,24 +303,24 @@ #ifdef DO_STUV1 # ifdef DO_SPEC gl_write_multitexture_span(ctx, 2, n, startX, iy, z, - (const GLfloat (*)[MAX_WIDTH]) s, - (const GLfloat (*)[MAX_WIDTH]) t, - (const GLfloat (*)[MAX_WIDTH]) u, + (CONST GLfloat (*)[MAX_WIDTH]) s, + (CONST GLfloat (*)[MAX_WIDTH]) t, + (CONST GLfloat (*)[MAX_WIDTH]) u, (GLfloat (*)[MAX_WIDTH]) lambda, - rgba, (const GLubyte (*)[4]) spec, + rgba, (CONST GLubyte (*)[4]) spec, GL_POLYGON); # else gl_write_multitexture_span(ctx, 2, n, startX, iy, z, - (const GLfloat (*)[MAX_WIDTH]) s, - (const GLfloat (*)[MAX_WIDTH]) t, - (const GLfloat (*)[MAX_WIDTH]) u, + (CONST GLfloat (*)[MAX_WIDTH]) s, + (CONST GLfloat (*)[MAX_WIDTH]) t, + (CONST GLfloat (*)[MAX_WIDTH]) u, lambda, rgba, NULL, GL_POLYGON); # endif #elif defined(DO_STUV0) # ifdef DO_SPEC gl_write_texture_span(ctx, n, startX, iy, z, s[0], t[0], u[0], lambda[0], rgba, - (const GLubyte (*)[4]) spec, GL_POLYGON); + (CONST GLubyte (*)[4]) spec, GL_POLYGON); # else gl_write_texture_span(ctx, n, startX, iy, z, s[0], t[0], u[0], lambda[0], @@ -408,7 +408,7 @@ left = ix + 1; #ifdef DO_STUV1 { - int j; + GLuint j; for (j = 0; j < n; j++) { s[0][j] = s[0][j + left]; t[0][j] = t[0][j + left]; @@ -422,17 +422,17 @@ } # ifdef DO_SPEC gl_write_multitexture_span(ctx, 2, n, left, iy, z + left, - (const GLfloat (*)[MAX_WIDTH]) s, - (const GLfloat (*)[MAX_WIDTH]) t, - (const GLfloat (*)[MAX_WIDTH]) u, + (CONST GLfloat (*)[MAX_WIDTH]) s, + (CONST GLfloat (*)[MAX_WIDTH]) t, + (CONST GLfloat (*)[MAX_WIDTH]) u, lambda, rgba + left, - (const GLubyte (*)[4]) (spec + left), + (CONST GLubyte (*)[4]) (spec + left), GL_POLYGON); # else gl_write_multitexture_span(ctx, 2, n, left, iy, z + left, - (const GLfloat (*)[MAX_WIDTH]) s, - (const GLfloat (*)[MAX_WIDTH]) t, - (const GLfloat (*)[MAX_WIDTH]) u, + (CONST GLfloat (*)[MAX_WIDTH]) s, + (CONST GLfloat (*)[MAX_WIDTH]) t, + (CONST GLfloat (*)[MAX_WIDTH]) u, lambda, rgba + left, NULL, GL_POLYGON); # endif @@ -441,7 +441,7 @@ gl_write_texture_span(ctx, n, left, iy, z + left, s[0] + left, t[0] + left, u[0] + left, lambda[0] + left, rgba + left, - (const GLubyte (*)[4]) (spec + left), + (CONST GLubyte (*)[4]) (spec + left), GL_POLYGON); # else gl_write_texture_span(ctx, n, left, iy, z + left, diff --git a/xc/extras/Mesa/src/accum.c b/xc/extras/Mesa/src/accum.c index da658df0c..dfed66c18 100644 --- a/xc/extras/Mesa/src/accum.c +++ b/xc/extras/Mesa/src/accum.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/accum.c,v 1.7 2000/09/26 15:56:29 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -234,6 +234,8 @@ _mesa_Accum( GLenum op, GLfloat value ) if (ctx->IntegerAccumMode && value != ctx->IntegerAccumScaler) rescale_accum(ctx); + RENDER_START(ctx); + if (ctx->IntegerAccumMode) { /* simply add integer color values into accum buffer */ GLuint j; @@ -277,6 +279,7 @@ _mesa_Accum( GLenum op, GLfloat value ) /* restore read buffer = draw buffer (the default) */ (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, ctx->Color.DriverDrawBuffer ); + RENDER_FINISH(ctx); break; case GL_LOAD: @@ -297,6 +300,7 @@ _mesa_Accum( GLenum op, GLfloat value ) ctx->IntegerAccumScaler = 0.0; } + RENDER_START(ctx); if (ctx->IntegerAccumMode) { /* just copy values into accum buffer */ GLuint j; @@ -340,6 +344,7 @@ _mesa_Accum( GLenum op, GLfloat value ) /* restore read buffer = draw buffer (the default) */ (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, ctx->Color.DriverDrawBuffer ); + RENDER_FINISH(ctx); break; case GL_RETURN: @@ -347,16 +352,18 @@ _mesa_Accum( GLenum op, GLfloat value ) if (ctx->IntegerAccumMode && value != 1.0) rescale_accum(ctx); + RENDER_START(ctx); if (ctx->IntegerAccumMode && ctx->IntegerAccumScaler > 0) { /* build lookup table to avoid many floating point multiplies */ const GLfloat mult = ctx->IntegerAccumScaler; static GLchan multTable[32768]; static GLfloat prevMult = 0.0; GLuint j; - const GLint max = MIN2((GLint) (256 / mult), 32767); + GLint k; + const GLint max = MIN2((GLuint) (256 / mult), 32767); if (mult != prevMult) { - for (j = 0; j < max; j++) - multTable[j] = (GLint) ((GLfloat) j * mult + 0.5F); + for (k = 0; k < max; k++) + multTable[k] = (GLint) ((GLfloat) k * mult + 0.5F); prevMult = mult; } @@ -379,7 +386,7 @@ _mesa_Accum( GLenum op, GLfloat value ) _mesa_mask_rgba_span( ctx, width, xpos, ypos, rgba ); } (*ctx->Driver.WriteRGBASpan)( ctx, width, xpos, ypos, - (const GLubyte (*)[4])rgba, NULL ); + (CONST GLubyte (*)[4]) rgba, NULL ); ypos++; } } @@ -406,10 +413,11 @@ _mesa_Accum( GLenum op, GLfloat value ) _mesa_mask_rgba_span( ctx, width, xpos, ypos, rgba ); } (*ctx->Driver.WriteRGBASpan)( ctx, width, xpos, ypos, - (const GLubyte (*)[4])rgba, NULL ); + (CONST GLubyte (*)[4]) rgba, NULL ); ypos++; } } + RENDER_FINISH(ctx); break; default: diff --git a/xc/extras/Mesa/src/attrib.c b/xc/extras/Mesa/src/attrib.c index a464f0905..93e03802d 100644 --- a/xc/extras/Mesa/src/attrib.c +++ b/xc/extras/Mesa/src/attrib.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/attrib.c,v 1.6 2000/09/26 15:56:29 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -212,8 +212,8 @@ _mesa_PushAttrib(GLbitfield mask) attr->RescaleNormals = ctx->Transform.RescaleNormals; attr->Scissor = ctx->Scissor.Enabled; attr->Stencil = ctx->Stencil.Enabled; - attr->Texture = ctx->Texture.Enabled; for (i=0; i<MAX_TEXTURE_UNITS; i++) { + attr->Texture[i] = ctx->Texture.Unit[i].Enabled; attr->TexGen[i] = ctx->Texture.Unit[i].TexGenEnabled; } newnode = new_attrib_node( GL_ENABLE_BIT ); @@ -392,6 +392,148 @@ _mesa_PushAttrib(GLbitfield mask) +static void +pop_enable_group(GLcontext *ctx, const struct gl_enable_attrib *enable) +{ + GLuint i; + +#define TEST_AND_UPDATE(VALUE, NEWVALUE, ENUM) \ + if ((VALUE) != (NEWVALUE)) { \ + _mesa_set_enable( ctx, ENUM, (NEWVALUE) ); \ + } + + TEST_AND_UPDATE(ctx->Color.AlphaEnabled, enable->AlphaTest, GL_ALPHA_TEST); + TEST_AND_UPDATE(ctx->Transform.Normalize, enable->AutoNormal, GL_NORMALIZE); + TEST_AND_UPDATE(ctx->Color.BlendEnabled, enable->Blend, GL_BLEND); + + for (i=0;i<MAX_CLIP_PLANES;i++) { + if (ctx->Transform.ClipEnabled[i] != enable->ClipPlane[i]) + _mesa_set_enable(ctx, (GLenum) (GL_CLIP_PLANE0 + i), + enable->ClipPlane[i]); + } + + TEST_AND_UPDATE(ctx->Light.ColorMaterialEnabled, enable->ColorMaterial, + GL_COLOR_MATERIAL); + TEST_AND_UPDATE(ctx->Polygon.CullFlag, enable->CullFace, GL_CULL_FACE); + TEST_AND_UPDATE(ctx->Depth.Test, enable->DepthTest, GL_DEPTH_TEST); + TEST_AND_UPDATE(ctx->Color.DitherFlag, enable->Dither, GL_DITHER); + TEST_AND_UPDATE(ctx->Pixel.Convolution1DEnabled, enable->Convolution1D, + GL_CONVOLUTION_1D); + TEST_AND_UPDATE(ctx->Pixel.Convolution2DEnabled, enable->Convolution2D, + GL_CONVOLUTION_2D); + TEST_AND_UPDATE(ctx->Pixel.Separable2DEnabled, enable->Separable2D, + GL_SEPARABLE_2D); + TEST_AND_UPDATE(ctx->Fog.Enabled, enable->Fog, GL_FOG); + TEST_AND_UPDATE(ctx->Light.Enabled, enable->Lighting, GL_LIGHTING); + TEST_AND_UPDATE(ctx->Line.SmoothFlag, enable->LineSmooth, GL_LINE_SMOOTH); + TEST_AND_UPDATE(ctx->Line.StippleFlag, enable->LineStipple, + GL_LINE_STIPPLE); + TEST_AND_UPDATE(ctx->Color.IndexLogicOpEnabled, enable->IndexLogicOp, + GL_INDEX_LOGIC_OP); + TEST_AND_UPDATE(ctx->Color.ColorLogicOpEnabled, enable->ColorLogicOp, + GL_COLOR_LOGIC_OP); + TEST_AND_UPDATE(ctx->Eval.Map1Color4, enable->Map1Color4, GL_MAP1_COLOR_4); + TEST_AND_UPDATE(ctx->Eval.Map1Index, enable->Map1Index, GL_MAP1_INDEX); + TEST_AND_UPDATE(ctx->Eval.Map1Normal, enable->Map1Normal, GL_MAP1_NORMAL); + TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord1, enable->Map1TextureCoord1, + GL_MAP1_TEXTURE_COORD_1); + TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord2, enable->Map1TextureCoord2, + GL_MAP1_TEXTURE_COORD_2); + TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord3, enable->Map1TextureCoord3, + GL_MAP1_TEXTURE_COORD_3); + TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord4, enable->Map1TextureCoord4, + GL_MAP1_TEXTURE_COORD_4); + TEST_AND_UPDATE(ctx->Eval.Map1Vertex3, enable->Map1Vertex3, + GL_MAP1_VERTEX_3); + TEST_AND_UPDATE(ctx->Eval.Map1Vertex4, enable->Map1Vertex4, + GL_MAP1_VERTEX_4); + TEST_AND_UPDATE(ctx->Eval.Map2Color4, enable->Map2Color4, GL_MAP2_COLOR_4); + TEST_AND_UPDATE(ctx->Eval.Map2Index, enable->Map2Index, GL_MAP2_INDEX); + TEST_AND_UPDATE(ctx->Eval.Map2Normal, enable->Map2Normal, GL_MAP2_NORMAL); + TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord1, enable->Map2TextureCoord1, + GL_MAP2_TEXTURE_COORD_1); + TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord2, enable->Map2TextureCoord2, + GL_MAP2_TEXTURE_COORD_2); + TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord3, enable->Map2TextureCoord3, + GL_MAP2_TEXTURE_COORD_3); + TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord4, enable->Map2TextureCoord4, + GL_MAP2_TEXTURE_COORD_4); + TEST_AND_UPDATE(ctx->Eval.Map2Vertex3, enable->Map2Vertex3, + GL_MAP2_VERTEX_3); + TEST_AND_UPDATE(ctx->Eval.Map2Vertex4, enable->Map2Vertex4, + GL_MAP2_VERTEX_4); + TEST_AND_UPDATE(ctx->Transform.Normalize, enable->Normalize, GL_NORMALIZE); + TEST_AND_UPDATE(ctx->Transform.RescaleNormals, enable->RescaleNormals, + GL_RESCALE_NORMAL_EXT); + TEST_AND_UPDATE(ctx->Pixel.PixelTextureEnabled, enable->PixelTexture, + GL_POINT_SMOOTH); + TEST_AND_UPDATE(ctx->Point.SmoothFlag, enable->PointSmooth, + GL_POINT_SMOOTH); + TEST_AND_UPDATE(ctx->Polygon.OffsetPoint, enable->PolygonOffsetPoint, + GL_POLYGON_OFFSET_POINT); + TEST_AND_UPDATE(ctx->Polygon.OffsetLine, enable->PolygonOffsetLine, + GL_POLYGON_OFFSET_LINE); + TEST_AND_UPDATE(ctx->Polygon.OffsetFill, enable->PolygonOffsetFill, + GL_POLYGON_OFFSET_FILL); + TEST_AND_UPDATE(ctx->Polygon.SmoothFlag, enable->PolygonSmooth, + GL_POLYGON_SMOOTH); + TEST_AND_UPDATE(ctx->Polygon.StippleFlag, enable->PolygonStipple, + GL_POLYGON_STIPPLE); + TEST_AND_UPDATE(ctx->Scissor.Enabled, enable->Scissor, GL_SCISSOR_TEST); + TEST_AND_UPDATE(ctx->Stencil.Enabled, enable->Stencil, GL_STENCIL_TEST); +#undef TEST_AND_UPDATE + + /* texture unit enables */ + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + if (ctx->Texture.Unit[i].Enabled != enable->Texture[i]) { + ctx->Texture.Unit[i].Enabled = enable->Texture[i]; + if (ctx->Driver.Enable) { + if (ctx->Driver.ActiveTexture) { + (*ctx->Driver.ActiveTexture)(ctx, i); + } + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_1D, + (GLboolean) (enable->Texture[i] & TEXTURE0_1D) ); + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_2D, + (GLboolean) (enable->Texture[i] & TEXTURE0_2D) ); + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_3D, + (GLboolean) (enable->Texture[i] & TEXTURE0_3D) ); + } + } + + if (ctx->Texture.Unit[i].TexGenEnabled != enable->TexGen[i]) { + ctx->Texture.Unit[i].TexGenEnabled = enable->TexGen[i]; + if (ctx->Driver.Enable) { + if (ctx->Driver.ActiveTexture) { + (*ctx->Driver.ActiveTexture)(ctx, i); + } + if (enable->TexGen[i] & S_BIT) + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_S, GL_TRUE); + else + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_S, GL_FALSE); + if (enable->TexGen[i] & T_BIT) + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_T, GL_TRUE); + else + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_T, GL_FALSE); + if (enable->TexGen[i] & R_BIT) + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_R, GL_TRUE); + else + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_R, GL_FALSE); + if (enable->TexGen[i] & Q_BIT) + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_Q, GL_TRUE); + else + (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_Q, GL_FALSE); + } + } + } + + if (ctx->Driver.ActiveTexture) { + (*ctx->Driver.ActiveTexture)(ctx, ctx->Texture.CurrentUnit); + } +} + + + + /* * This function is kind of long just because we have to call a lot * of device driver functions to update device driver state. @@ -494,116 +636,7 @@ _mesa_PopAttrib(void) { const struct gl_enable_attrib *enable; enable = (const struct gl_enable_attrib *) attr->data; - -#define TEST_AND_UPDATE(VALUE, NEWVALUE, ENUM) \ - if ((VALUE) != (NEWVALUE)) { \ - _mesa_set_enable( ctx, ENUM, (NEWVALUE) ); \ - } - - TEST_AND_UPDATE(ctx->Color.AlphaEnabled, enable->AlphaTest, GL_ALPHA_TEST); - TEST_AND_UPDATE(ctx->Transform.Normalize, enable->AutoNormal, GL_NORMALIZE); - TEST_AND_UPDATE(ctx->Color.BlendEnabled, enable->Blend, GL_BLEND); - { - GLuint i; - for (i=0;i<MAX_CLIP_PLANES;i++) { - if (ctx->Transform.ClipEnabled[i] != enable->ClipPlane[i]) - _mesa_set_enable( ctx, (GLenum) (GL_CLIP_PLANE0 + i), enable->ClipPlane[i] ); - } - } - TEST_AND_UPDATE(ctx->Light.ColorMaterialEnabled, enable->ColorMaterial, GL_COLOR_MATERIAL); - TEST_AND_UPDATE(ctx->Polygon.CullFlag, enable->CullFace, GL_CULL_FACE); - TEST_AND_UPDATE(ctx->Depth.Test, enable->DepthTest, GL_DEPTH_TEST); - TEST_AND_UPDATE(ctx->Color.DitherFlag, enable->Dither, GL_DITHER); - TEST_AND_UPDATE(ctx->Pixel.Convolution1DEnabled, enable->Convolution1D, GL_CONVOLUTION_1D); - TEST_AND_UPDATE(ctx->Pixel.Convolution2DEnabled, enable->Convolution2D, GL_CONVOLUTION_2D); - TEST_AND_UPDATE(ctx->Pixel.Separable2DEnabled, enable->Separable2D, GL_SEPARABLE_2D); - TEST_AND_UPDATE(ctx->Fog.Enabled, enable->Fog, GL_FOG); - TEST_AND_UPDATE(ctx->Light.Enabled, enable->Lighting, GL_LIGHTING); - TEST_AND_UPDATE(ctx->Line.SmoothFlag, enable->LineSmooth, GL_LINE_SMOOTH); - TEST_AND_UPDATE(ctx->Line.StippleFlag, enable->LineStipple, GL_LINE_STIPPLE); - TEST_AND_UPDATE(ctx->Color.IndexLogicOpEnabled, enable->IndexLogicOp, GL_INDEX_LOGIC_OP); - TEST_AND_UPDATE(ctx->Color.ColorLogicOpEnabled, enable->ColorLogicOp, GL_COLOR_LOGIC_OP); - TEST_AND_UPDATE(ctx->Eval.Map1Color4, enable->Map1Color4, GL_MAP1_COLOR_4); - TEST_AND_UPDATE(ctx->Eval.Map1Index, enable->Map1Index, GL_MAP1_INDEX); - TEST_AND_UPDATE(ctx->Eval.Map1Normal, enable->Map1Normal, GL_MAP1_NORMAL); - TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord1, enable->Map1TextureCoord1, GL_MAP1_TEXTURE_COORD_1); - TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord2, enable->Map1TextureCoord2, GL_MAP1_TEXTURE_COORD_2); - TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord3, enable->Map1TextureCoord3, GL_MAP1_TEXTURE_COORD_3); - TEST_AND_UPDATE(ctx->Eval.Map1TextureCoord4, enable->Map1TextureCoord4, GL_MAP1_TEXTURE_COORD_4); - TEST_AND_UPDATE(ctx->Eval.Map1Vertex3, enable->Map1Vertex3, GL_MAP1_VERTEX_3); - TEST_AND_UPDATE(ctx->Eval.Map1Vertex4, enable->Map1Vertex4, GL_MAP1_VERTEX_4); - TEST_AND_UPDATE(ctx->Eval.Map2Color4, enable->Map2Color4, GL_MAP2_COLOR_4); - TEST_AND_UPDATE(ctx->Eval.Map2Index, enable->Map2Index, GL_MAP2_INDEX); - TEST_AND_UPDATE(ctx->Eval.Map2Normal, enable->Map2Normal, GL_MAP2_NORMAL); - TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord1, enable->Map2TextureCoord1, GL_MAP2_TEXTURE_COORD_1); - TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord2, enable->Map2TextureCoord2, GL_MAP2_TEXTURE_COORD_2); - TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord3, enable->Map2TextureCoord3, GL_MAP2_TEXTURE_COORD_3); - TEST_AND_UPDATE(ctx->Eval.Map2TextureCoord4, enable->Map2TextureCoord4, GL_MAP2_TEXTURE_COORD_4); - TEST_AND_UPDATE(ctx->Eval.Map2Vertex3, enable->Map2Vertex3, GL_MAP2_VERTEX_3); - TEST_AND_UPDATE(ctx->Eval.Map2Vertex4, enable->Map2Vertex4, GL_MAP2_VERTEX_4); - TEST_AND_UPDATE(ctx->Transform.Normalize, enable->Normalize, GL_NORMALIZE); - TEST_AND_UPDATE(ctx->Transform.RescaleNormals, enable->RescaleNormals, GL_RESCALE_NORMAL_EXT); - TEST_AND_UPDATE(ctx->Pixel.PixelTextureEnabled, enable->PixelTexture, GL_POINT_SMOOTH); - TEST_AND_UPDATE(ctx->Point.SmoothFlag, enable->PointSmooth, GL_POINT_SMOOTH); - TEST_AND_UPDATE(ctx->Polygon.OffsetPoint, enable->PolygonOffsetPoint, GL_POLYGON_OFFSET_POINT); - TEST_AND_UPDATE(ctx->Polygon.OffsetLine, enable->PolygonOffsetLine, GL_POLYGON_OFFSET_LINE); - TEST_AND_UPDATE(ctx->Polygon.OffsetFill, enable->PolygonOffsetFill, GL_POLYGON_OFFSET_FILL); - TEST_AND_UPDATE(ctx->Polygon.SmoothFlag, enable->PolygonSmooth, GL_POLYGON_SMOOTH); - TEST_AND_UPDATE(ctx->Polygon.StippleFlag, enable->PolygonStipple, GL_POLYGON_STIPPLE); - TEST_AND_UPDATE(ctx->Scissor.Enabled, enable->Scissor, GL_SCISSOR_TEST); - TEST_AND_UPDATE(ctx->Stencil.Enabled, enable->Stencil, GL_STENCIL_TEST); - if (ctx->Texture.Enabled != enable->Texture) { - ctx->Texture.Enabled = enable->Texture; - if (ctx->Driver.Enable) { - if (ctx->Driver.ActiveTexture) - (*ctx->Driver.ActiveTexture)( ctx, 0 ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_1D, (GLboolean) (enable->Texture & TEXTURE0_1D) ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_2D, (GLboolean) (enable->Texture & TEXTURE0_2D) ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_3D, (GLboolean) (enable->Texture & TEXTURE0_3D) ); - if (ctx->Driver.ActiveTexture) - (*ctx->Driver.ActiveTexture)( ctx, 1 ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_1D, (GLboolean) (enable->Texture & TEXTURE1_1D) ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_2D, (GLboolean) (enable->Texture & TEXTURE1_2D) ); - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_3D, (GLboolean) (enable->Texture & TEXTURE1_3D) ); - if (ctx->Driver.ActiveTexture) - (*ctx->Driver.ActiveTexture)( ctx, ctx->Texture.CurrentUnit ); - } - } -#undef TEST_AND_UPDATE - { - GLuint i; - for (i=0; i<MAX_TEXTURE_UNITS; i++) { - if (ctx->Texture.Unit[i].TexGenEnabled != enable->TexGen[i]) { - ctx->Texture.Unit[i].TexGenEnabled = enable->TexGen[i]; - - /* ctx->Enabled recalculated in state change - processing */ - - if (ctx->Driver.Enable) { - if (ctx->Driver.ActiveTexture) - (*ctx->Driver.ActiveTexture)( ctx, i ); - if (enable->TexGen[i] & S_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_S, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_S, GL_FALSE); - if (enable->TexGen[i] & T_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_T, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_T, GL_FALSE); - if (enable->TexGen[i] & R_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_R, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_R, GL_FALSE); - if (enable->TexGen[i] & Q_BIT) - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_Q, GL_TRUE); - else - (*ctx->Driver.Enable)( ctx, GL_TEXTURE_GEN_Q, GL_FALSE); - } - } - } - if (ctx->Driver.ActiveTexture) - (*ctx->Driver.ActiveTexture)( ctx, ctx->Texture.CurrentUnit ); - } + pop_enable_group(ctx, enable); } break; case GL_EVAL_BIT: diff --git a/xc/extras/Mesa/src/bbox.c b/xc/extras/Mesa/src/bbox.c index 24a2d86c1..a559a53ed 100644 --- a/xc/extras/Mesa/src/bbox.c +++ b/xc/extras/Mesa/src/bbox.c @@ -113,7 +113,7 @@ static void transform_bounds3( GLubyte *orMask, GLubyte *andMask, ACC_4V( data[i], dz ); - cliptest_bounds( orMask, andMask, (const GLfloat (*)[4])data, 8 ); + cliptest_bounds( orMask, andMask, (CONST GLfloat (*)[4])data, 8 ); } static void transform_bounds2( GLubyte *orMask, GLubyte *andMask, @@ -147,7 +147,7 @@ static void transform_bounds2( GLubyte *orMask, GLubyte *andMask, ACC_4V( data[2], dy ); ACC_4V( data[3], dy ); - cliptest_bounds( orMask, andMask, (const GLfloat (*)[4])data, 4 ); + cliptest_bounds( orMask, andMask, (CONST GLfloat (*)[4])data, 4 ); } /* Dummy diff --git a/xc/extras/Mesa/src/bitmap.c b/xc/extras/Mesa/src/bitmap.c index eefee932f..d01337d67 100644 --- a/xc/extras/Mesa/src/bitmap.c +++ b/xc/extras/Mesa/src/bitmap.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -61,6 +61,8 @@ render_bitmap( GLcontext *ctx, GLint px, GLint py, return; /* NULL bitmap is legal, a no-op */ } + RENDER_START(ctx); + /* Set bitmap drawing color */ if (ctx->Visual->RGBAflag) { GLint r, g, b, a; @@ -127,6 +129,8 @@ render_bitmap( GLcontext *ctx, GLint px, GLint py, } gl_flush_pb(ctx); + + RENDER_FINISH(ctx); } diff --git a/xc/extras/Mesa/src/blend.c b/xc/extras/Mesa/src/blend.c index 70c114ef9..84141c8bc 100644 --- a/xc/extras/Mesa/src/blend.c +++ b/xc/extras/Mesa/src/blend.c @@ -1,20 +1,20 @@ /* * Mesa 3-D graphics library - * Version: 3.3 - * - * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * Version: 3.4 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/blend.c,v 1.7 2000/09/26 15:56:29 tsi Exp $ */ + #ifdef PC_HEADER @@ -288,7 +288,7 @@ _mesa_BlendEquation( GLenum mode ) ctx->Color.BlendFunc = NULL; ctx->NewState |= NEW_RASTER_OPS; - + if (ctx->Driver.BlendEquation) ctx->Driver.BlendEquation( ctx, mode ); } @@ -338,11 +338,31 @@ blend_transparency( GLcontext *ctx, GLuint n, const GLubyte mask[], /* 100% alpha, no-op */ } else { +#if 0 + /* This is pretty close, but Glean complains */ + const GLint s = CHAN_MAX - t; + const GLint r = (rgba[i][RCOMP] * t + dest[i][RCOMP] * s + 1) >> 8; + const GLint g = (rgba[i][GCOMP] * t + dest[i][GCOMP] * s + 1) >> 8; + const GLint b = (rgba[i][BCOMP] * t + dest[i][BCOMP] * s + 1) >> 8; + const GLint a = (rgba[i][ACOMP] * t + dest[i][ACOMP] * s + 1) >> 8; +#elif 0 + /* This is slower but satisfies Glean */ const GLint s = CHAN_MAX - t; - const GLint r = (rgba[i][RCOMP] * t + dest[i][RCOMP] * s) >> 8; - const GLint g = (rgba[i][GCOMP] * t + dest[i][GCOMP] * s) >> 8; - const GLint b = (rgba[i][BCOMP] * t + dest[i][BCOMP] * s) >> 8; - const GLint a = (rgba[i][ACOMP] * t + dest[i][ACOMP] * s) >> 8; + const GLint r = (rgba[i][RCOMP] * t + dest[i][RCOMP] * s) / 255; + const GLint g = (rgba[i][GCOMP] * t + dest[i][GCOMP] * s) / 255; + const GLint b = (rgba[i][BCOMP] * t + dest[i][BCOMP] * s) / 255; + const GLint a = (rgba[i][ACOMP] * t + dest[i][ACOMP] * s) / 255; +#else + /* This satisfies Glean and should be reasonably fast */ + /* Contributed by Nathan Hand */ +#define DIV255(X) (((X) << 8) + (X) + 256) >> 16 + const GLint s = CHAN_MAX - t; + const GLint r = DIV255(rgba[i][RCOMP] * t + dest[i][RCOMP] * s); + const GLint g = DIV255(rgba[i][GCOMP] * t + dest[i][GCOMP] * s); + const GLint b = DIV255(rgba[i][BCOMP] * t + dest[i][BCOMP] * s); + const GLint a = DIV255(rgba[i][ACOMP] * t + dest[i][ACOMP] * s); +#undef DIV255 +#endif ASSERT(r <= CHAN_MAX); ASSERT(g <= CHAN_MAX); ASSERT(b <= CHAN_MAX); @@ -754,22 +774,22 @@ blend_general( GLcontext *ctx, GLuint n, const GLubyte mask[], /* compute blended color */ if (ctx->Color.BlendEquation==GL_FUNC_ADD_EXT) { - r = Rs * sR + Rd * dR; - g = Gs * sG + Gd * dG; - b = Bs * sB + Bd * dB; - a = As * sA + Ad * dA; + r = Rs * sR + Rd * dR + 0.5F; + g = Gs * sG + Gd * dG + 0.5F; + b = Bs * sB + Bd * dB + 0.5F; + a = As * sA + Ad * dA + 0.5F; } else if (ctx->Color.BlendEquation==GL_FUNC_SUBTRACT_EXT) { - r = Rs * sR - Rd * dR; - g = Gs * sG - Gd * dG; - b = Bs * sB - Bd * dB; - a = As * sA - Ad * dA; + r = Rs * sR - Rd * dR + 0.5F; + g = Gs * sG - Gd * dG + 0.5F; + b = Bs * sB - Bd * dB + 0.5F; + a = As * sA - Ad * dA + 0.5F; } else if (ctx->Color.BlendEquation==GL_FUNC_REVERSE_SUBTRACT_EXT) { - r = Rd * dR - Rs * sR; - g = Gd * dG - Gs * sG; - b = Bd * dB - Bs * sB; - a = Ad * dA - As * sA; + r = Rd * dR - Rs * sR + 0.5F; + g = Gd * dG - Gs * sG + 0.5F; + b = Bd * dB - Bs * sB + 0.5F; + a = Ad * dA - As * sA + 0.5F; } else { /* should never get here */ @@ -790,7 +810,7 @@ blend_general( GLcontext *ctx, GLuint n, const GLubyte mask[], #if defined(USE_MMX_ASM) #include "X86/mmx.h" -#include "X86/common_x86asm.h" +#include "X86/common_x86_asm.h" #endif @@ -810,8 +830,8 @@ static void set_blend_function( GLcontext *ctx ) /* Hmm. A table here would have 12^4 == way too many entries. * Provide a hook for MMX instead. */ - if (gl_x86_cpu_features & GL_CPU_MMX) { - gl_mmx_set_blend_function (ctx); + if ( cpu_has_mmx ) { + gl_mmx_set_blend_function( ctx ); } else #endif @@ -869,7 +889,7 @@ _mesa_blend_span( GLcontext *ctx, GLuint n, GLint x, GLint y, if (!ctx->Color.BlendFunc) set_blend_function(ctx); - (*ctx->Color.BlendFunc)( ctx, n, mask, rgba, (const GLubyte (*)[4])dest ); + (*ctx->Color.BlendFunc)( ctx, n, mask, rgba, (CONST GLubyte (*)[4])dest ); } @@ -904,5 +924,5 @@ _mesa_blend_pixels( GLcontext *ctx, if (!ctx->Color.BlendFunc) set_blend_function(ctx); - (*ctx->Color.BlendFunc)( ctx, n, mask, rgba, (const GLubyte (*)[4])dest ); + (*ctx->Color.BlendFunc)( ctx, n, mask, rgba, (CONST GLubyte (*)[4])dest ); } diff --git a/xc/extras/Mesa/src/buffers.c b/xc/extras/Mesa/src/buffers.c index e334265a6..214e644cb 100644 --- a/xc/extras/Mesa/src/buffers.c +++ b/xc/extras/Mesa/src/buffers.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/buffers.c,v 1.4 2000/09/26 15:56:29 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -167,7 +167,7 @@ clear_color_buffer(GLcontext *ctx) } else { /* Color index mode */ - ASSERT(ctx->Color.IndexMask == ~0); + ASSERT(ctx->Color.IndexMask == (GLuint) ~0); if (ctx->Visual->IndexBits == 8) { /* 8-bit clear */ GLubyte span[MAX_WIDTH]; @@ -208,15 +208,19 @@ clear_color_buffers(GLcontext *ctx) if (bufferBit & ctx->Color.DrawDestMask) { if (bufferBit == FRONT_LEFT_BIT) { (void) (*ctx->Driver.SetDrawBuffer)( ctx, GL_FRONT_LEFT); + (void) (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, GL_FRONT_LEFT); } else if (bufferBit == FRONT_RIGHT_BIT) { (void) (*ctx->Driver.SetDrawBuffer)( ctx, GL_FRONT_RIGHT); + (void) (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, GL_FRONT_RIGHT); } else if (bufferBit == BACK_LEFT_BIT) { (void) (*ctx->Driver.SetDrawBuffer)( ctx, GL_BACK_LEFT); + (void) (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, GL_BACK_LEFT); } else { (void) (*ctx->Driver.SetDrawBuffer)( ctx, GL_BACK_RIGHT); + (void) (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, GL_BACK_RIGHT); } if (ctx->Color.SWmasking) { @@ -228,8 +232,9 @@ clear_color_buffers(GLcontext *ctx) } } - /* restore default dest buffer */ + /* restore default read/draw buffers */ (void) (*ctx->Driver.SetDrawBuffer)( ctx, ctx->Color.DriverDrawBuffer ); + (void) (*ctx->Driver.SetReadBuffer)( ctx, ctx->ReadBuffer, ctx->Pixel.DriverReadBuffer ); } @@ -287,6 +292,8 @@ _mesa_Clear( GLbitfield mask ) } #endif + RENDER_START(ctx); + /* do software clearing here */ if (newMask) { if (newMask & ctx->Color.DrawDestMask) clear_color_buffers(ctx); @@ -302,6 +309,8 @@ _mesa_Clear( GLbitfield mask ) _mesa_clear_alpha_buffers( ctx ); } + RENDER_FINISH(ctx); + #ifdef PROFILE ctx->ClearTime += gl_time() - t0; ctx->ClearCount++; diff --git a/xc/extras/Mesa/src/clip_funcs.h b/xc/extras/Mesa/src/clip_funcs.h index d2bb45280..e0003bf3d 100644 --- a/xc/extras/Mesa/src/clip_funcs.h +++ b/xc/extras/Mesa/src/clip_funcs.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/clip_funcs.h,v 1.6 2000/09/26 15:56:29 tsi Exp $ */ /* * New (3.1) transformation code written by Keith Whitwell. diff --git a/xc/extras/Mesa/src/colortab.c b/xc/extras/Mesa/src/colortab.c index dff9feae9..425c03b1d 100644 --- a/xc/extras/Mesa/src/colortab.c +++ b/xc/extras/Mesa/src/colortab.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/colortab.c,v 1.7 2000/09/26 15:56:29 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -324,17 +324,14 @@ _mesa_ColorTable( GLenum target, GLenum internalFormat, return; } - if (width > ctx->Const.MaxColorTableSize) { + if (width > (GLsizei) ctx->Const.MaxColorTableSize) { if (proxy) { table->Size = 0; table->IntFormat = (GLenum) 0; table->Format = (GLenum) 0; } else { - if (width > ctx->Const.MaxColorTableSize) - gl_error(ctx, GL_TABLE_TOO_LARGE, "glColorTable(width)"); - else - gl_error(ctx, GL_INVALID_VALUE, "glColorTable(width)"); + gl_error(ctx, GL_TABLE_TOO_LARGE, "glColorTable(width)"); } return; } @@ -355,7 +352,7 @@ _mesa_ColorTable( GLenum target, GLenum internalFormat, if (floatTable) { GLfloat tempTab[MAX_COLOR_TABLE_SIZE * 4]; GLfloat *tableF; - GLuint i; + GLint i; _mesa_unpack_float_color_span(ctx, width, table->Format, tempTab, /* dest */ @@ -422,7 +419,7 @@ _mesa_ColorTable( GLenum target, GLenum internalFormat, return; } _mesa_unpack_ubyte_color_span(ctx, width, table->Format, - table->Table, /* dest */ + (GLubyte *) table->Table, /* dest */ format, type, data, &ctx->Unpack, GL_TRUE); } /* floatTable */ @@ -523,7 +520,7 @@ _mesa_ColorSubTable( GLenum target, GLsizei start, comps = _mesa_components_in_format(table->Format); assert(comps > 0); /* error should have been caught sooner */ - if (start + count > table->Size) { + if (start + count > (GLint) table->Size) { gl_error(ctx, GL_INVALID_VALUE, "glColorSubTable(count)"); return; } @@ -541,7 +538,7 @@ _mesa_ColorSubTable( GLenum target, GLsizei start, else { GLfloat tempTab[MAX_COLOR_TABLE_SIZE * 4]; GLfloat *tableF; - GLuint i; + GLint i; ASSERT(table->TableType == GL_FLOAT); @@ -676,7 +673,7 @@ _mesa_GetColorTable( GLenum target, GLenum format, struct gl_texture_unit *texUnit = &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; struct gl_color_table *table = NULL; GLubyte rgba[MAX_COLOR_TABLE_SIZE][4]; - GLint i; + GLuint i; ASSERT_OUTSIDE_BEGIN_END(ctx, "glGetColorTable"); @@ -835,7 +832,7 @@ _mesa_GetColorTable( GLenum target, GLenum format, return; } - _mesa_pack_rgba_span(ctx, table->Size, (const GLubyte (*)[]) rgba, + _mesa_pack_rgba_span(ctx, table->Size, (CONST GLubyte (*)[4]) rgba, format, type, data, &ctx->Pack, GL_FALSE); } diff --git a/xc/extras/Mesa/src/config.c b/xc/extras/Mesa/src/config.c index 9e50ff4b0..50df728c8 100644 --- a/xc/extras/Mesa/src/config.c +++ b/xc/extras/Mesa/src/config.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/config.c,v 1.6 2000/09/24 13:50:08 alanh Exp $ */ /* Mesa config file parse and execute code. diff --git a/xc/extras/Mesa/src/config.h b/xc/extras/Mesa/src/config.h index 1d93ae611..07ff841f6 100644 --- a/xc/extras/Mesa/src/config.h +++ b/xc/extras/Mesa/src/config.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/config.h,v 1.7 2000/09/26 15:56:30 tsi Exp $ */ + /* * Tunable configuration parameters. diff --git a/xc/extras/Mesa/src/context.c b/xc/extras/Mesa/src/context.c index f8a4ef90b..4d4a5974c 100644 --- a/xc/extras/Mesa/src/context.c +++ b/xc/extras/Mesa/src/context.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/context.c,v 1.6 2000/09/26 15:56:30 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -210,10 +210,11 @@ _mesa_initialize_visual( GLvisual *vis, if (depthBits == 0) { /* Special case. Even if we don't have a depth buffer we need - * good values for DepthMax for Z vertex transformation purposes. + * good values for DepthMax for Z vertex transformation purposes + * and for per-fragment fog computation. */ - vis->DepthMax = 1; - vis->DepthMaxF = 1.0F; + vis->DepthMax = 1 << 16; + vis->DepthMaxF = (GLfloat) vis->DepthMax; } else if (depthBits < 32) { vis->DepthMax = (1 << depthBits) - 1; @@ -451,8 +452,9 @@ alloc_shared_state( void ) if (!ss) return NULL; - ss->DisplayList = _mesa_NewHashTable(); + _glthread_INIT_MUTEX(ss->Mutex); + ss->DisplayList = _mesa_NewHashTable(); ss->TexObjects = _mesa_NewHashTable(); /* Default Texture objects */ @@ -592,6 +594,8 @@ init_texture_unit( GLcontext *ctx, GLuint unit ) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + texUnit->Enabled = 0; + texUnit->ReallyEnabled = 0; texUnit->EnvMode = GL_MODULATE; texUnit->CombineModeRGB = GL_MODULATE; texUnit->CombineModeA = GL_MODULATE; @@ -1148,9 +1152,10 @@ init_attrib_groups( GLcontext *ctx ) /* Texture group */ ctx->Texture.CurrentUnit = 0; /* multitexture */ ctx->Texture.CurrentTransformUnit = 0; /* multitexture */ - ctx->Texture.Enabled = 0; + ctx->Texture.ReallyEnabled = 0; for (i=0; i<MAX_TEXTURE_UNITS; i++) init_texture_unit( ctx, i ); + ctx->Texture.SharedPalette = GL_FALSE; _mesa_init_colortable(&ctx->Texture.Palette); /* Transformation group */ diff --git a/xc/extras/Mesa/src/convolve.c b/xc/extras/Mesa/src/convolve.c new file mode 100644 index 000000000..ba25340cf --- /dev/null +++ b/xc/extras/Mesa/src/convolve.c @@ -0,0 +1,415 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.3 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* + * Image convolution functions. + * + * Notes: filter kernel elements are indexed by <n> and <m> as in + * the GL spec. + */ + + +#ifdef PC_HEADER +#include "all.h" +#else +#include "glheader.h" +#include "types.h" +#endif + + +void +_mesa_convolve_1d_reduce(GLint srcWidth, const GLfloat src[][4], + GLint filterWidth, const GLfloat filter[][4], + GLfloat dest[][4]) +{ + const GLint dstWidth = srcWidth - (filterWidth - 1); + GLint i, n; + + if (dstWidth <= 0) + return; /* null result */ + + for (i = 0; i < dstWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (n = 0; n < filterWidth; n++) { + sumR += src[i + n][RCOMP] * filter[n][RCOMP]; + sumG += src[i + n][GCOMP] * filter[n][GCOMP]; + sumB += src[i + n][BCOMP] * filter[n][BCOMP]; + sumA += src[i + n][ACOMP] * filter[n][ACOMP]; + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} + + +void +_mesa_convolve_1d_constant(GLint srcWidth, const GLfloat src[][4], + GLint filterWidth, const GLfloat filter[][4], + const GLfloat borderColor[4], GLfloat dest[][4]) +{ + const GLint halfFilterWidth = filterWidth / 2; + GLint i, n; + + for (i = 0; i < srcWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (n = 0; n < filterWidth; n++) { + if (i + n < halfFilterWidth || i + n - halfFilterWidth >= srcWidth) { + sumR += borderColor[RCOMP] * filter[n][RCOMP]; + sumG += borderColor[GCOMP] * filter[n][GCOMP]; + sumB += borderColor[BCOMP] * filter[n][BCOMP]; + sumA += borderColor[ACOMP] * filter[n][ACOMP]; + } + else { + sumR += src[i + n - halfFilterWidth][RCOMP] * filter[n][RCOMP]; + sumG += src[i + n - halfFilterWidth][GCOMP] * filter[n][GCOMP]; + sumB += src[i + n - halfFilterWidth][BCOMP] * filter[n][BCOMP]; + sumA += src[i + n - halfFilterWidth][ACOMP] * filter[n][ACOMP]; + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} + + +void +_mesa_convolve_1d_replicate(GLint srcWidth, const GLfloat src[][4], + GLint filterWidth, const GLfloat filter[][4], + GLfloat dest[][4]) +{ + const GLint halfFilterWidth = filterWidth / 2; + GLint i, n; + + for (i = 0; i < srcWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (n = 0; n < filterWidth; n++) { + if (i + n < halfFilterWidth) { + sumR += src[0][RCOMP] * filter[n][RCOMP]; + sumG += src[0][GCOMP] * filter[n][GCOMP]; + sumB += src[0][BCOMP] * filter[n][BCOMP]; + sumA += src[0][ACOMP] * filter[n][ACOMP]; + } + else if (i + n - halfFilterWidth >= srcWidth) { + sumR += src[srcWidth - 1][RCOMP] * filter[n][RCOMP]; + sumG += src[srcWidth - 1][GCOMP] * filter[n][GCOMP]; + sumB += src[srcWidth - 1][BCOMP] * filter[n][BCOMP]; + sumA += src[srcWidth - 1][ACOMP] * filter[n][ACOMP]; + } + else { + sumR += src[i + n - halfFilterWidth][RCOMP] * filter[n][RCOMP]; + sumG += src[i + n - halfFilterWidth][GCOMP] * filter[n][GCOMP]; + sumB += src[i + n - halfFilterWidth][BCOMP] * filter[n][BCOMP]; + sumA += src[i + n - halfFilterWidth][ACOMP] * filter[n][ACOMP]; + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} + + +/* + * <src> is the source image width width = srcWidth, height = filterHeight. + * <filter> has width <filterWidth> and height <filterHeight>. + * <dst> is a 1-D image span of width <srcWidth> - (<filterWidth> - 1). + */ +void +_mesa_convolve_2d_reduce(GLint srcWidth, GLint srcHeight, + const GLfloat src[][4], + GLint filterWidth, GLint filterHeight, + const GLfloat filter[][4], + GLfloat dest[][4]) +{ + const GLint dstWidth = srcWidth - (filterWidth - 1); + GLint i, n, m; + + if (dstWidth <= 0) + return; /* null result */ + + /* XXX todo */ + for (i = 0; i < dstWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (n = 0; n < filterHeight; n++) { + for (m = 0; m < filterWidth; m++) { + const GLint k = n * srcWidth + i + m; + sumR += src[k][RCOMP] * filter[n][RCOMP]; + sumG += src[k][GCOMP] * filter[n][GCOMP]; + sumB += src[k][BCOMP] * filter[n][BCOMP]; + sumA += src[k][ACOMP] * filter[n][ACOMP]; + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} + + +void +_mesa_convolve_2d_constant(GLint srcWidth, GLint srcHeight, + const GLfloat src[][4], + GLint filterWidth, GLint filterHeight, + const GLfloat filter[][4], + GLfloat dest[][4], + const GLfloat borderColor[4]) +{ + const GLint halfFilterWidth = filterWidth / 2; + GLint i, n, m; + + for (i = 0; i < srcWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (m = 0; m < filterHeight; m++) { + const GLfloat (*filterRow)[4] = filter + m * filterWidth; + for (n = 0; n < filterWidth; n++) { + if (i + n < halfFilterWidth || + i + n - halfFilterWidth >= srcWidth) { + sumR += borderColor[RCOMP] * filterRow[n][RCOMP]; + sumG += borderColor[GCOMP] * filterRow[n][GCOMP]; + sumB += borderColor[BCOMP] * filterRow[n][BCOMP]; + sumA += borderColor[ACOMP] * filterRow[n][ACOMP]; + } + else { + const GLint k = m * srcWidth + i + n - halfFilterWidth; + sumR += src[k][RCOMP] * filterRow[n][RCOMP]; + sumG += src[k][GCOMP] * filterRow[n][GCOMP]; + sumB += src[k][BCOMP] * filterRow[n][BCOMP]; + sumA += src[k][ACOMP] * filterRow[n][ACOMP]; + } + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} + + +void +_mesa_convolve_2d_replicate(GLint srcWidth, GLint srcHeight, + const GLfloat src[][4], + GLint filterWidth, GLint filterHeight, + const GLfloat filter[][4], + GLfloat dest[][4]) +{ + const GLint halfFilterWidth = filterWidth / 2; + GLint i, n, m; + + for (i = 0; i < srcWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (m = 0; m < filterHeight; m++) { + const GLfloat (*filterRow)[4] = filter + m * filterWidth; + for (n = 0; n < filterWidth; n++) { + if (i + n < halfFilterWidth) { + const GLint k = m * srcWidth + 0; + sumR += src[k][RCOMP] * filterRow[n][RCOMP]; + sumG += src[k][GCOMP] * filterRow[n][GCOMP]; + sumB += src[k][BCOMP] * filterRow[n][BCOMP]; + sumA += src[k][ACOMP] * filterRow[n][ACOMP]; + } + else if (i + n - halfFilterWidth >= srcWidth) { + const GLint k = m * srcWidth + srcWidth - 1; + sumR += src[k][RCOMP] * filterRow[n][RCOMP]; + sumG += src[k][GCOMP] * filterRow[n][GCOMP]; + sumB += src[k][BCOMP] * filterRow[n][BCOMP]; + sumA += src[k][ACOMP] * filterRow[n][ACOMP]; + } + else { + const GLint k = m * srcWidth + i + n - halfFilterWidth; + sumR += src[k][RCOMP] * filterRow[n][RCOMP]; + sumG += src[k][GCOMP] * filterRow[n][GCOMP]; + sumB += src[k][BCOMP] * filterRow[n][BCOMP]; + sumA += src[k][ACOMP] * filterRow[n][ACOMP]; + } + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} + + +void +_mesa_convolve_sep_constant(GLint srcWidth, GLint srcHeight, + const GLfloat src[][4], + GLint filterWidth, GLint filterHeight, + const GLfloat rowFilt[][4], + const GLfloat colFilt[][4], + GLfloat dest[][4], + const GLfloat borderColor[4]) +{ + const GLint halfFilterWidth = filterWidth / 2; + GLint i, n, m; + + for (i = 0; i < srcWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (m = 0; m < filterHeight; m++) { + for (n = 0; n < filterWidth; n++) { + if (i + n < halfFilterWidth || + i + n - halfFilterWidth >= srcWidth) { + sumR += borderColor[RCOMP] * rowFilt[n][RCOMP] * colFilt[m][RCOMP]; + sumG += borderColor[GCOMP] * rowFilt[n][GCOMP] * colFilt[m][GCOMP]; + sumB += borderColor[BCOMP] * rowFilt[n][BCOMP] * colFilt[m][BCOMP]; + sumA += borderColor[ACOMP] * rowFilt[n][ACOMP] * colFilt[m][ACOMP]; + } + else { + const GLint k = m * srcWidth + i + n - halfFilterWidth; + sumR += src[k][RCOMP] * rowFilt[n][RCOMP] * colFilt[m][RCOMP]; + sumG += src[k][GCOMP] * rowFilt[n][GCOMP] * colFilt[m][GCOMP]; + sumB += src[k][BCOMP] * rowFilt[n][BCOMP] * colFilt[m][BCOMP]; + sumA += src[k][ACOMP] * rowFilt[n][ACOMP] * colFilt[m][ACOMP]; + } + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} + + +void +_mesa_convolve_sep_reduce(GLint srcWidth, GLint srcHeight, + const GLfloat src[][4], + GLint filterWidth, GLint filterHeight, + const GLfloat rowFilt[][4], + const GLfloat colFilt[][4], + GLfloat dest[][4]) +{ +#if 00 + const GLint halfFilterWidth = filterWidth / 2; + GLint i, n, m; + + for (i = 0; i < srcWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (m = 0; m < filterHeight; m++) { + for (n = 0; n < filterWidth; n++) { + if (i + n < halfFilterWidth || + i + n - halfFilterWidth >= srcWidth) { + sumR += borderColor[RCOMP] * rowFilt[n][RCOMP] * colFilt[m][RCOMP]; + sumG += borderColor[GCOMP] * rowFilt[n][GCOMP] * colFilt[m][GCOMP]; + sumB += borderColor[BCOMP] * rowFilt[n][BCOMP] * colFilt[m][BCOMP]; + sumA += borderColor[ACOMP] * rowFilt[n][ACOMP] * colFilt[m][ACOMP]; + } + else { + const GLint k = m * srcWidth + i + n - halfFilterWidth; + sumR += src[k][RCOMP] * rowFilt[n][RCOMP] * colFilt[m][RCOMP]; + sumG += src[k][GCOMP] * rowFilt[n][GCOMP] * colFilt[m][GCOMP]; + sumB += src[k][BCOMP] * rowFilt[n][BCOMP] * colFilt[m][BCOMP]; + sumA += src[k][ACOMP] * rowFilt[n][ACOMP] * colFilt[m][ACOMP]; + } + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +#endif +} + + +void +_mesa_convolve_sep_replicate(GLint srcWidth, GLint srcHeight, + const GLfloat src[][4], + GLint filterWidth, GLint filterHeight, + const GLfloat rowFilt[][4], + const GLfloat colFilt[][4], + GLfloat dest[][4]) +{ + const GLint halfFilterWidth = filterWidth / 2; + GLint i, n, m; + + for (i = 0; i < srcWidth; i++) { + GLfloat sumR = 0.0; + GLfloat sumG = 0.0; + GLfloat sumB = 0.0; + GLfloat sumA = 0.0; + for (m = 0; m < filterHeight; m++) { + for (n = 0; n < filterWidth; n++) { + if (i + n < halfFilterWidth) { + const GLint k = m * srcWidth + 0; + sumR += src[k][RCOMP] * rowFilt[n][RCOMP] * colFilt[m][RCOMP]; + sumG += src[k][GCOMP] * rowFilt[n][GCOMP] * colFilt[m][GCOMP]; + sumB += src[k][BCOMP] * rowFilt[n][BCOMP] * colFilt[m][BCOMP]; + sumA += src[k][ACOMP] * rowFilt[n][ACOMP] * colFilt[m][ACOMP]; + } + else if (i + n - halfFilterWidth >= srcWidth) { + const GLint k = m * srcWidth + srcWidth - 1; + sumR += src[k][RCOMP] * rowFilt[n][RCOMP] * colFilt[m][RCOMP]; + sumG += src[k][GCOMP] * rowFilt[n][GCOMP] * colFilt[m][GCOMP]; + sumB += src[k][BCOMP] * rowFilt[n][BCOMP] * colFilt[m][BCOMP]; + sumA += src[k][ACOMP] * rowFilt[n][ACOMP] * colFilt[m][ACOMP]; + } + else { + const GLint k = m * srcWidth + i + n - halfFilterWidth; + sumR += src[k][RCOMP] * rowFilt[n][RCOMP] * colFilt[m][RCOMP]; + sumG += src[k][GCOMP] * rowFilt[n][GCOMP] * colFilt[m][GCOMP]; + sumB += src[k][BCOMP] * rowFilt[n][BCOMP] * colFilt[m][BCOMP]; + sumA += src[k][ACOMP] * rowFilt[n][ACOMP] * colFilt[m][ACOMP]; + } + } + } + dest[i][RCOMP] = sumR; + dest[i][GCOMP] = sumG; + dest[i][BCOMP] = sumB; + dest[i][ACOMP] = sumA; + } +} diff --git a/xc/extras/Mesa/src/copypix.c b/xc/extras/Mesa/src/copypix.c index 8b40a8711..2d3c3594a 100644 --- a/xc/extras/Mesa/src/copypix.c +++ b/xc/extras/Mesa/src/copypix.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/copypix.c,v 1.6 2000/09/26 15:56:30 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -126,7 +126,10 @@ static void copy_rgba_pixels( GLcontext *ctx, /* If read and draw buffer are different we must do buffer switching */ saveReadAlpha = ctx->ReadBuffer->Alpha; changeBuffer = ctx->Pixel.ReadBuffer != ctx->Color.DrawBuffer - || ctx->DrawBuffer != ctx->ReadBuffer; + || ctx->DrawBuffer != ctx->ReadBuffer; + + (*ctx->Driver.SetReadBuffer)( ctx, ctx->ReadBuffer, + ctx->Pixel.DriverReadBuffer ); if (overlapping) { GLint ssy = sy; @@ -205,7 +208,7 @@ static void copy_rgba_pixels( GLcontext *ctx, if (applyTransferOps) { const GLfloat scale = (1.0F / 255.0F); GLfloat rgbaFloat[MAX_WIDTH][4]; - GLuint k; + GLint k; /* convert ubyte to float */ for (k = 0; k < width; k++) { rgbaFloat[k][RCOMP] = (GLfloat) rgba[k][RCOMP] * scale; @@ -269,7 +272,7 @@ static void copy_rgba_pixels( GLcontext *ctx, MEMCPY(primary_rgba, rgba, 4 * width * sizeof(GLubyte)); for (unit = 0; unit < MAX_TEXTURE_UNITS; unit++) { - _mesa_pixeltexgen(ctx, width, (const GLubyte (*)[4]) rgba, + _mesa_pixeltexgen(ctx, width, (CONST GLubyte (*)[4]) rgba, s, t, r, q); gl_texture_pixels(ctx, unit, width, s, t, r, NULL, primary_rgba, rgba); @@ -278,17 +281,21 @@ static void copy_rgba_pixels( GLcontext *ctx, if (quick_draw && dy >= 0 && dy < ctx->DrawBuffer->Height) { (*ctx->Driver.WriteRGBASpan)( ctx, width, destx, dy, - (const GLubyte (*)[4])rgba, NULL ); + (CONST GLubyte (*)[4])rgba, NULL ); } else if (zoom) { gl_write_zoomed_rgba_span( ctx, width, destx, dy, zspan, - (const GLubyte (*)[4])rgba, desty); + (CONST GLubyte (*)[4])rgba, desty); } else { gl_write_rgba_span( ctx, width, destx, dy, zspan, rgba, GL_BITMAP ); } } + /* Restore pixel source to be the draw buffer (for blending, etc) */ + (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, + ctx->Color.DriverDrawBuffer ); + if (overlapping) FREE(prgba); } @@ -336,6 +343,9 @@ static void copy_ci_pixels( GLcontext *ctx, changeBuffer = ctx->Pixel.ReadBuffer != ctx->Color.DrawBuffer || ctx->DrawBuffer != ctx->ReadBuffer; + (*ctx->Driver.SetReadBuffer)( ctx, ctx->ReadBuffer, + ctx->Pixel.DriverReadBuffer ); + if (overlapping) { GLint ssy = sy; pci = (GLuint *) MALLOC(width * height * sizeof(GLuint)); @@ -394,6 +404,10 @@ static void copy_ci_pixels( GLcontext *ctx, } } + /* Restore pixel source to be the draw buffer (for blending, etc) */ + (*ctx->Driver.SetReadBuffer)( ctx, ctx->DrawBuffer, + ctx->Color.DriverDrawBuffer ); + if (overlapping) FREE(pci); } @@ -489,7 +503,7 @@ static void copy_depth_pixels( GLcontext *ctx, GLint srcx, GLint srcy, if (ctx->Visual->RGBAflag) { if (zoom) { gl_write_zoomed_rgba_span( ctx, width, destx, dy, zspan, - (const GLubyte (*)[4])rgba, desty ); + (CONST GLubyte (*)[4])rgba, desty ); } else { gl_write_rgba_span( ctx, width, destx, dy, zspan, rgba, GL_BITMAP); @@ -626,9 +640,12 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height, ctx->OcclusionResult = GL_TRUE; + RENDER_START(ctx); + if (ctx->Driver.CopyPixels && (*ctx->Driver.CopyPixels)( ctx, srcx, srcy, width, height, destx, desty, type )) { + RENDER_FINISH(ctx); return; } @@ -647,6 +664,8 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height, else { gl_error( ctx, GL_INVALID_ENUM, "glCopyPixels" ); } + + RENDER_FINISH(ctx); } else if (ctx->RenderMode == GL_FEEDBACK) { GLfloat color[4]; diff --git a/xc/extras/Mesa/src/cull_tmp.h b/xc/extras/Mesa/src/cull_tmp.h index 463cb59b4..14c83c475 100644 --- a/xc/extras/Mesa/src/cull_tmp.h +++ b/xc/extras/Mesa/src/cull_tmp.h @@ -38,7 +38,7 @@ static GLuint TAG(gl_cull_triangles)( struct vertex_buffer *VB, const GLubyte face_bit = ctx->Polygon.FrontBit; const GLubyte cull_faces = ctx->Polygon.CullBits; GLubyte *cullmask = VB->CullMask; - GLint i,cullcount = 0; + GLint i, cullcount = 0; GLint last = count - 3; (void) parity; @@ -46,7 +46,7 @@ static GLuint TAG(gl_cull_triangles)( struct vertex_buffer *VB, CULL_TRI(DO_CLIP, DO_AREA, i, i+1, i+2, face_bit, 3); } - if (i != count) + if (i != (GLint) count) cullcount += count - i; return cullcount; @@ -77,7 +77,7 @@ static GLuint TAG(gl_cull_triangle_fan)( struct vertex_buffer *VB, CULL_TRI(DO_CLIP, DO_AREA, start, i+1, i+2, face_bit, nr); } - if (i != last + 1) + if (i != (GLint) (last + 1)) cullcount += count - i; return cullcount; @@ -135,7 +135,7 @@ static GLuint TAG(gl_cull_quads)( struct vertex_buffer *VB, CULL_QUAD(DO_CLIP, DO_AREA, i, i+1, i+2, i+3, 4); } - if (i != count) + if (i != (GLint) count) cullcount += count - i; return cullcount; @@ -162,7 +162,7 @@ static GLuint TAG(gl_cull_quad_strip)( struct vertex_buffer *VB, CULL_QUAD(DO_CLIP, DO_AREA, i, i+1, i+3, i+2, nr); } - if (i != last + 2) + if (i != ((GLint) last + 2)) cullcount += count - i; return cullcount; diff --git a/xc/extras/Mesa/src/cva.c b/xc/extras/Mesa/src/cva.c index d06890c3c..f4f1a7ba2 100644 --- a/xc/extras/Mesa/src/cva.c +++ b/xc/extras/Mesa/src/cva.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -244,15 +244,16 @@ void gl_merge_cva( struct vertex_buffer *VB, translate_4f( VB->ClipPtr, cvaVB->ClipPtr, elt, count); if (VB->ClipOrMask & CLIP_USER_BIT) { - GLubyte or = 0, and = ~0; + GLubyte orMask = 0, andMask = ~0; copy_clipmask( VB->UserClipMask + VB->Start, - &or, &and, + &orMask, &andMask, cvaVB->UserClipMask, elt, VB->Count - VB->Start); - if (and) VB->ClipAndMask |= CLIP_USER_BIT; + if (andMask) + VB->ClipAndMask |= CLIP_USER_BIT; } if (VB->ClipOrMask) @@ -386,7 +387,7 @@ _mesa_LockArraysEXT(GLint first, GLsizei count) */ if (first == 0 && count > 0 && - count <= ctx->Const.MaxArrayLockSize) + count <= (GLint) ctx->Const.MaxArrayLockSize) { struct gl_cva *cva = &ctx->CVA; @@ -683,7 +684,7 @@ void gl_prepare_arrays_cva( struct vertex_buffer *VB ) if (disable & VERT_NORM) cva->v.Normal = *VB->store.Normal; if (disable & VERT_OBJ_ANY) cva->v.Obj = *VB->store.Obj; if (disable & VERT_TEX0_ANY) cva->v.TexCoord[0]= *(VB->store.TexCoord[0]); - if (disable & VERT_TEX0_ANY) cva->v.TexCoord[1]= *(VB->store.TexCoord[1]); + if (disable & VERT_TEX1_ANY) cva->v.TexCoord[1]= *(VB->store.TexCoord[1]); if (disable & VERT_EDGE) cva->v.EdgeFlag = *VB->store.EdgeFlag; } diff --git a/xc/extras/Mesa/src/dd.h b/xc/extras/Mesa/src/dd.h index 90ff664ed..7b269421f 100644 --- a/xc/extras/Mesa/src/dd.h +++ b/xc/extras/Mesa/src/dd.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/dd.h,v 1.7 2000/09/26 15:56:30 tsi Exp $ */ + #ifndef DD_INCLUDED @@ -640,6 +640,16 @@ struct dd_function_table { * Core Mesa will perform any image format/type conversions that are needed. */ + GLboolean (*TestProxyTexImage)(GLcontext *ctx, GLenum target, + GLint level, GLint internalFormat, + GLenum format, GLenum type, + GLint width, GLint height, + GLint depth, GLint border); + /* Called by glTexImage[123]D when user specifies a proxy texture + * target. Return GL_TRUE if the proxy test passes, return GL_FALSE + * if the test fails. + */ + GLboolean (*CompressedTexImage1D)( GLcontext *ctx, GLenum target, GLint level, GLsizei imageSize, const GLvoid *data, @@ -712,8 +722,15 @@ struct dd_function_table { */ GLint (*SpecificCompressedTexFormat)(GLcontext *ctx, - GLint internalFormat, - GLint numDimensions); + GLint internalFormat, + GLint numDimensions, + GLint *levelp, + GLsizei *widthp, + GLsizei *heightp, + GLsizei *depthp, + GLint *borderp, + GLenum *formatp, + GLenum *typep); /* Called to turn a generic texture format into a specific * texture format. For example, if a driver implements * GL_3DFX_texture_compression_FXT1, this would map diff --git a/xc/extras/Mesa/src/debug_xform.c b/xc/extras/Mesa/src/debug_xform.c index 5926ba191..e38548ee7 100644 --- a/xc/extras/Mesa/src/debug_xform.c +++ b/xc/extras/Mesa/src/debug_xform.c @@ -2,19 +2,19 @@ /* * Mesa 3-D graphics library * Version: 3.3 - * + * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -22,7 +22,10 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/debug_xform.c,v 1.5 2000/09/26 15:56:30 tsi Exp $ */ + +/* + * Updated for P6 architecture by Gareth Hughes. + */ #ifdef PC_HEADER #include "all.h" @@ -39,121 +42,160 @@ #ifdef DEBUG /* This code only used for debugging */ -/* comment this out to deactivate the cycle counter. - * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher) - * (hope, you don't try to debug Mesa on a 386 ;) +/* Comment this out to deactivate the cycle counter. + * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher) + * (hope, you don't try to debug Mesa on a 386 ;) */ #if defined(__GNUC__) && defined(__i386__) && defined(USE_X86_ASM) #define RUN_XFORM_BENCHMARK #endif -#define TEST_COUNT 100 /* size of the tested vector array */ +#define TEST_COUNT 128 /* size of the tested vector array */ -#define REQUIRED_PRECISION 10 /* allow 4 bits to miss */ -#define MAX_PRECISION 24 /* max. precision possible */ +#define REQUIRED_PRECISION 10 /* allow 4 bits to miss */ +#define MAX_PRECISION 24 /* max. precision possible */ #ifdef RUN_XFORM_BENCHMARK -/* Modify the the number of tests if you like. - * We take the minimum of all results, because every error should be - * positive (time used by other processes, task switches etc). - * It is assumed that all calculations are done in the cache. +/* Overhead of profiling counter in cycles. Automatically adjusted to + * your machine at run time - counter initialization should give very + * consistent results. */ -#define BEGIN_RACE(x) \ -x = 16000000; /* hope it's enough-*/ \ -for (cycle_i = 0; cycle_i <10; cycle_i++) { \ - long cycle_tmp1, cycle_tmp2, dummy; \ - __asm__ ("mov %%eax, %0":"=a" (cycle_tmp1)); /* cache cycle_tmp1 */ \ - __asm__ ("mov %%eax, %0":"=a" (cycle_tmp2)); /* cache cycle_tmp2 */ \ - __asm__ ("cdq"); /* stall pipeline */ \ - __asm__ ("cdq"); /* to avoid pairing */ \ - __asm__ ("rdtsc":"=a" (cycle_tmp1), "=d" (dummy)); /* save timestamp */ - - -#define END_RACE(x) \ - __asm__ ("cdq"); \ - __asm__ ("cdq"); \ - __asm__ ("rdtsc":"=a" (cycle_tmp2), "=d" (dummy)); \ - if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1; \ -} \ -x -= 13; /* sub the overhead of the counter procedure */ - -#else -#define BEGIN_RACE(x) -#define END_RACE(x) -#endif +static int need_counter = 1; +static long counter_overhead = 0; +/* Modify the the number of tests if you like. + * We take the minimum of all results, because every error should be + * positive (time used by other processes, task switches etc). + * It is assumed that all calculations are done in the cache. + */ -static char *mesa_profile = NULL; - - -static GLfloat rnd(void) -{ - GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX; - GLfloat gran = (GLfloat)(1 << 13); - - f = (GLfloat)(GLint)(f * gran) / gran; - - return f * 2.0 - 1.0; -} - - - -static int significand_match( GLfloat a, GLfloat b ) -{ - GLfloat d = a - b; - int a_ex, b_ex, d_ex; - - if (d == 0.0F) { - return MAX_PRECISION; /* Exact match */ - } - - if (a == 0.0F || b == 0.0F) { - /* - * It would probably be better to check if the - * non-zero number is denormalized and return - * the index of the highest set bit here. - */ - return 0; - } - - frexp(a, &a_ex); - frexp(b, &b_ex); - frexp(d, &d_ex); - - if (a_ex < b_ex) - return a_ex - d_ex; - else - return b_ex - d_ex; -} +#if 1 /* PPro, PII, PIII version */ +/* Profiling on the P6 architecture requires a little more work, due to + * the internal out-of-order execution. We must perform a serializing + * 'cpuid' instruction before and after the 'rdtsc' instructions to make + * sure no other uops are executed when we sample the timestamp counter. + */ +#define INIT_COUNTER() \ + do { \ + int cycle_i; \ + counter_overhead = LONG_MAX; \ + for ( cycle_i = 0 ; cycle_i < 4 ; cycle_i++ ) { \ + long cycle_tmp1 = 0, cycle_tmp2 = 0; \ + __asm__ ( "push %%ebx \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "rdtsc \n" \ + "mov %%eax, %0 \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "pop %%ebx \n" \ + "push %%ebx \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "rdtsc \n" \ + "mov %%eax, %1 \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "pop %%ebx \n" \ + : "=m" (cycle_tmp1), "=m" (cycle_tmp2) \ + : : "eax", "ecx", "edx" ); \ + if ( counter_overhead > (cycle_tmp2 - cycle_tmp1) ) { \ + counter_overhead = cycle_tmp2 - cycle_tmp1; \ + } \ + } \ + } while (0) + +#define BEGIN_RACE(x) \ + x = LONG_MAX; \ + for ( cycle_i = 0 ; cycle_i < 10 ; cycle_i++ ) { \ + long cycle_tmp1 = 0, cycle_tmp2 = 0; \ + __asm__ ( "push %%ebx \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "rdtsc \n" \ + "mov %%eax, %0 \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "pop %%ebx \n" \ + : "=m" (cycle_tmp1) \ + : : "eax", "ecx", "edx" ); + +#define END_RACE(x) \ + __asm__ ( "push %%ebx \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "rdtsc \n" \ + "mov %%eax, %0 \n" \ + "xor %%eax, %%eax \n" \ + "cpuid \n" \ + "pop %%ebx \n" \ + : "=m" (cycle_tmp2) \ + : : "eax", "ecx", "edx" ); \ + if ( x > (cycle_tmp2 - cycle_tmp1) ) { \ + x = cycle_tmp2 - cycle_tmp1; \ + } \ + } \ + x -= counter_overhead; + +#else /* PPlain, PMMX version */ + +/* To ensure accurate results, we stall the pipelines with the + * non-pairable 'cdq' instruction. This ensures all the code being + * profiled is complete when the 'rdtsc' instruction executes. + */ +#define INIT_COUNTER(x) \ + do { \ + int cycle_i; \ + x = LONG_MAX; \ + for ( cycle_i = 0 ; cycle_i < 32 ; cycle_i++ ) { \ + long cycle_tmp1, cycle_tmp2, dummy; \ + __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ + __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ + __asm__ ( "cdq" ); \ + __asm__ ( "cdq" ); \ + __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); \ + __asm__ ( "cdq" ); \ + __asm__ ( "cdq" ); \ + __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ + if ( x > (cycle_tmp2 - cycle_tmp1) ) \ + x = cycle_tmp2 - cycle_tmp1; \ + } \ + } while (0) + +#define BEGIN_RACE(x) \ + x = LONG_MAX; \ + for ( cycle_i = 0 ; cycle_i < 16 ; cycle_i++ ) { \ + long cycle_tmp1, cycle_tmp2, dummy; \ + __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp1) ); \ + __asm__ ( "mov %%eax, %0" : "=a" (cycle_tmp2) ); \ + __asm__ ( "cdq" ); \ + __asm__ ( "cdq" ); \ + __asm__ ( "rdtsc" : "=a" (cycle_tmp1), "=d" (dummy) ); + + +#define END_RACE(x) \ + __asm__ ( "cdq" ); \ + __asm__ ( "cdq" ); \ + __asm__ ( "rdtsc" : "=a" (cycle_tmp2), "=d" (dummy) ); \ + if ( x > (cycle_tmp2 - cycle_tmp1) ) \ + x = cycle_tmp2 - cycle_tmp1; \ + } \ + x -= counter_overhead; +#endif +#else +#define INIT_COUNTER(x) +#define BEGIN_RACE(x) +#define END_RACE(x) -static void ref_transform( GLvector4f *dst, - const GLmatrix *mat, - const GLvector4f *src, - const GLubyte *clipmask, - const GLubyte flag ) -{ - int i; - GLfloat *s = (GLfloat *)src->start; - GLfloat (*d)[4] = (GLfloat (*)[4])dst->start; - const GLfloat *m = mat->m; +#endif - (void) clipmask; - (void) flag; - for (i = 0; i < src->count; ++i) { - GLfloat x = s[0], y = s[1], z = s[2], w = s[3]; - d[i][0] = m[0]*x + m[4]*y + m[ 8]*z + m[12]*w; - d[i][1] = m[1]*x + m[5]*y + m[ 9]*z + m[13]*w; - d[i][2] = m[2]*x + m[6]*y + m[10]*z + m[14]*w; - d[i][3] = m[3]*x + m[7]*y + m[11]*z + m[15]*w; - s = (GLfloat *)((char *)s + src->stride); - } -} +static char *mesa_profile = NULL; enum { NIL=0, ONE=1, NEG=-1, VAR=2 }; @@ -170,7 +212,7 @@ static int m_identity[16] = { NIL, NIL, ONE, NIL, NIL, NIL, NIL, ONE }; -static int m_2d[16] = { +static int m_2d[16] = { VAR, VAR, NIL, VAR, VAR, VAR, NIL, VAR, NIL, NIL, ONE, NIL, @@ -229,23 +271,255 @@ static char *mstrings[7] = { }; -static int test_transform_function( transform_func fn, int psize, int mtype, - int masked, long* cycles ) + +static int m_norm_identity[16] = { + ONE, NIL, NIL, NIL, + NIL, ONE, NIL, NIL, + NIL, NIL, ONE, NIL, + NIL, NIL, NIL, NIL +}; +static int m_norm_general[16] = { + VAR, VAR, VAR, NIL, + VAR, VAR, VAR, NIL, + VAR, VAR, VAR, NIL, + NIL, NIL, NIL, NIL +}; +static int m_norm_no_rot[16] = { + VAR, NIL, NIL, NIL, + NIL, VAR, NIL, NIL, + NIL, NIL, VAR, NIL, + NIL, NIL, NIL, NIL +}; +static int *norm_templates[8] = { + m_norm_no_rot, + m_norm_no_rot, + m_norm_no_rot, + m_norm_general, + m_norm_general, + m_norm_general, + m_norm_identity, + m_norm_identity +}; +static int norm_types[8] = { + NORM_TRANSFORM_NO_ROT, + NORM_TRANSFORM_NO_ROT | NORM_RESCALE, + NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE, + NORM_TRANSFORM, + NORM_TRANSFORM | NORM_RESCALE, + NORM_TRANSFORM | NORM_NORMALIZE, + NORM_RESCALE, + NORM_NORMALIZE +}; +static int norm_scale_types[8] = { /* rescale factor */ + NIL, /* NIL disables rescaling */ + VAR, + NIL, + NIL, + VAR, + NIL, + VAR, + NIL +}; +static int norm_normalize_types[8] = { /* normalizing ?? (no = 0) */ + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 1 +}; +static char *norm_strings[8] = { + "NORM_TRANSFORM_NO_ROT", + "NORM_TRANSFORM_NO_ROT | NORM_RESCALE", + "NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE", + "NORM_TRANSFORM", + "NORM_TRANSFORM | NORM_RESCALE", + "NORM_TRANSFORM | NORM_NORMALIZE", + "NORM_RESCALE", + "NORM_NORMALIZE" +}; + + + +/* ================================================================ + * Helper functions + */ + +static GLfloat rnd( void ) +{ + GLfloat f = (GLfloat)rand() / (GLfloat)RAND_MAX; + GLfloat gran = (GLfloat)(1 << 13); + + f = (GLfloat)(GLint)(f * gran) / gran; + + return f * 2.0 - 1.0; +} + +static int significand_match( GLfloat a, GLfloat b ) +{ + GLfloat d = a - b; + int a_ex, b_ex, d_ex; + + if ( d == 0.0F ) { + return MAX_PRECISION; /* Exact match */ + } + + if ( a == 0.0F || b == 0.0F ) { + /* It would probably be better to check if the + * non-zero number is denormalized and return + * the index of the highest set bit here. + */ + return 0; + } + + frexp( a, &a_ex ); + frexp( b, &b_ex ); + frexp( d, &d_ex ); + + if ( a_ex < b_ex ) + return a_ex - d_ex; + else + return b_ex - d_ex; +} + + + +/* ================================================================ + * Reference transformations + */ + +static void ref_transform( GLvector4f *dst, + const GLmatrix *mat, + const GLvector4f *src, + const GLubyte *clipmask, + const GLubyte flag ) +{ + GLuint i; + GLfloat *s = (GLfloat *)src->start; + GLfloat (*d)[4] = (GLfloat (*)[4])dst->start; + const GLfloat *m = mat->m; + + (void) clipmask; + (void) flag; + + for ( i = 0 ; i < src->count ; i++ ) { + GLfloat x = s[0], y = s[1], z = s[2], w = s[3]; + d[i][0] = m[0]*x + m[4]*y + m[ 8]*z + m[12]*w; + d[i][1] = m[1]*x + m[5]*y + m[ 9]*z + m[13]*w; + d[i][2] = m[2]*x + m[6]*y + m[10]*z + m[14]*w; + d[i][3] = m[3]*x + m[7]*y + m[11]*z + m[15]*w; + s = (GLfloat *)((char *)s + src->stride); + } +} + +static void ref_norm_transform_rescale( const GLmatrix *mat, + GLfloat scale, + const GLvector3f *in, + const GLfloat *lengths, + const GLubyte mask[], + GLvector3f *dest ) +{ + GLuint i; + const GLfloat *s = in->start; + const GLfloat *m = mat->inv; + GLfloat (*out)[3] = (GLfloat (*)[3])dest->start; + + (void) mask; + (void) lengths; + + for ( i = 0 ; i < in->count ; i++ ) { + GLfloat x = s[0], y = s[1], z = s[2] ; + GLfloat tx = m[0]*x + m[1]*y + m[ 2]*z ; + GLfloat ty = m[4]*x + m[5]*y + m[ 6]*z ; + GLfloat tz = m[8]*x + m[9]*y + m[10]*z ; + + out[i][0] = tx * scale; + out[i][1] = ty * scale; + out[i][2] = tz * scale; + + s = (GLfloat *)((char *)s + in->stride); + } +} + +static void ref_norm_transform_normalize( const GLmatrix *mat, + GLfloat scale, + const GLvector3f *in, + const GLfloat *lengths, + const GLubyte mask[], + GLvector3f *dest ) +{ + GLuint i; + const GLfloat *s = in->start; + const GLfloat *m = mat->inv; + GLfloat (*out)[3] = (GLfloat (*)[3])dest->start; + + (void) mask; + + for ( i = 0 ; i < in->count ; i++ ) { + GLfloat x = s[0], y = s[1], z = s[2] ; + GLfloat tx = m[0]*x + m[1]*y + m[ 2]*z ; + GLfloat ty = m[4]*x + m[5]*y + m[ 6]*z ; + GLfloat tz = m[8]*x + m[9]*y + m[10]*z ; + + if ( !lengths ) { + GLfloat len = tx*tx + ty*ty + tz*tz; + if ( len > 1e-20 ) { + /* Hmmm, don't know how we could test the precalculated + * length case... + */ + scale = 1.0 / sqrt( len ); + out[i][0] = tx * scale; + out[i][1] = ty * scale; + out[i][2] = tz * scale; + } else { + out[i][0] = out[i][1] = out[i][2] = 0; + } + } else { + scale = lengths[i];; + out[i][0] = tx * scale; + out[i][1] = ty * scale; + out[i][2] = tz * scale; + } + + s = (GLfloat *)((char *)s + in->stride); + } +} + + + +/* ================================================================ + * Vertex transformation tests + */ + +/* Ensure our arrays are correctly aligned. + */ +#if defined(__GNUC__) +#define ALIGN16(x) x __attribute__ ((aligned (16))) +#else +#define ALIGN16(x) x +#endif +static GLfloat ALIGN16(s[TEST_COUNT][5]); +static GLfloat ALIGN16(d[TEST_COUNT][4]); +static GLfloat ALIGN16(r[TEST_COUNT][4]); + +static int test_transform_function( transform_func func, int psize, int mtype, + int masked, long *cycles ) { GLvector4f source[1], dest[1], ref[1]; GLmatrix mat[1]; - GLfloat s[TEST_COUNT][5], d[TEST_COUNT][4], r[TEST_COUNT][4]; GLfloat *m; GLubyte mask[TEST_COUNT]; int i, j; #ifdef RUN_XFORM_BENCHMARK - int cycle_i; /* the counter for the benchmarks we run */ + int cycle_i; /* the counter for the benchmarks we run */ #endif (void) cycles; - if (psize > 4) { - gl_problem( NULL, "test_transform_function called with psize > 4\n"); + if ( psize > 4 ) { + gl_problem( NULL, "test_transform_function called with psize > 4\n" ); return 0; } @@ -259,9 +533,9 @@ static int test_transform_function( transform_func fn, int psize, int mtype, m[2] = 44.0; m[6] = 9.0; m[10] = 7.0; m[14] = 3.0; m[3] = 11.0; m[7] = 23.0; m[11] = 91.0; m[15] = 9.0; - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - switch (templates[mtype][i * 4 + j]) { + for ( i = 0 ; i < 4 ; i++ ) { + for ( j = 0 ; j < 4 ; j++ ) { + switch ( templates[mtype][i * 4 + j] ) { case NIL: m[j * 4 + i] = 0.0; break; @@ -279,13 +553,13 @@ static int test_transform_function( transform_func fn, int psize, int mtype, } } - for (i = 0; i < TEST_COUNT; ++i) { - mask[i] = i % 2; /* mask every 2nd element */ + for ( i = 0 ; i < TEST_COUNT ; i++) { + mask[i] = i % 2; /* mask every 2nd element */ d[i][0] = s[i][0] = 0.0; d[i][1] = s[i][1] = 0.0; d[i][2] = s[i][2] = 0.0; d[i][3] = s[i][3] = 1.0; - for (j = 0; j < psize; j++) + for ( j = 0 ; j < psize ; j++ ) s[i][j] = rnd(); } @@ -310,47 +584,47 @@ static int test_transform_function( transform_func fn, int psize, int mtype, ref->size = 0; ref->flags = 0; - ref_transform(ref, mat, source, NULL, 0); + ref_transform( ref, mat, source, NULL, 0 ); - if (mesa_profile) { - if (masked) { - BEGIN_RACE (*cycles); - fn(dest, mat->m, source, mask, 1); - END_RACE (*cycles); + if ( mesa_profile ) { + if ( masked ) { + BEGIN_RACE( *cycles ); + func( dest, mat->m, source, mask, 1 ); + END_RACE( *cycles ); } else { - BEGIN_RACE (*cycles); - fn(dest, mat->m, source, NULL, 0); - END_RACE (*cycles); - } + BEGIN_RACE( *cycles ); + func( dest, mat->m, source, NULL, 0 ); + END_RACE( *cycles ); + } } else { - if (masked) { - fn(dest, mat->m, source, mask, 1); + if ( masked ) { + func( dest, mat->m, source, mask, 1 ); } else { - fn(dest, mat->m, source, NULL, 0); + func( dest, mat->m, source, NULL, 0 ); } } - for (i = 0; i < TEST_COUNT; ++i) { - if (masked && mask[i] & 1) + for ( i = 0 ; i < TEST_COUNT ; i++ ) { + if ( masked && (mask[i] & 1) ) continue; - for (j = 0; j < 4; j++) { - if (significand_match (d[i][j], r[i][j]) < REQUIRED_PRECISION) { - printf("-----------------------------\n"); - printf ("(i = %i, j = %i)\n", i, j); - printf ("%f \t %f \t [diff = %e - %i bit missed]\n", - d[i][0], r[i][0], r[i][0]-d[i][0], - MAX_PRECISION - significand_match (d[i][0], r[i][0])); - printf ("%f \t %f \t [diff = %e - %i bit missed]\n", - d[i][1], r[i][1], r[i][1]-d[i][1], - MAX_PRECISION - significand_match (d[i][1], r[i][1])); - printf ("%f \t %f \t [diff = %e - %i bit missed]\n", - d[i][2], r[i][2], r[i][2]-d[i][2], - MAX_PRECISION - significand_match (d[i][2], r[i][2])); - printf ("%f \t %f \t [diff = %e - %i bit missed]\n", - d[i][3], r[i][3], r[i][3]-d[i][3], - MAX_PRECISION - significand_match (d[i][3], r[i][3])); + for ( j = 0 ; j < 4 ; j++ ) { + if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) { + printf( "-----------------------------\n" ); + printf( "(i = %i, j = %i)\n", i, j ); + printf( "%f \t %f \t [diff = %e - %i bit missed]\n", + d[i][0], r[i][0], r[i][0]-d[i][0], + MAX_PRECISION - significand_match( d[i][0], r[i][0] ) ); + printf( "%f \t %f \t [diff = %e - %i bit missed]\n", + d[i][1], r[i][1], r[i][1]-d[i][1], + MAX_PRECISION - significand_match( d[i][1], r[i][1] ) ); + printf( "%f \t %f \t [diff = %e - %i bit missed]\n", + d[i][2], r[i][2], r[i][2]-d[i][2], + MAX_PRECISION - significand_match( d[i][2], r[i][2] ) ); + printf( "%f \t %f \t [diff = %e - %i bit missed]\n", + d[i][3], r[i][3], r[i][3]-d[i][3], + MAX_PRECISION - significand_match( d[i][3], r[i][3] ) ); return 0; } } @@ -360,240 +634,90 @@ static int test_transform_function( transform_func fn, int psize, int mtype, return 1; } - - - void gl_test_all_transform_functions( char *description ) { int masked, psize, mtype; - long benchmark_tab [2][4][7]; + long benchmark_tab[2][4][7]; static int first_time = 1; - if (first_time) { + if ( first_time ) { first_time = 0; - mesa_profile = getenv("MESA_PROFILE"); + mesa_profile = getenv( "MESA_PROFILE" ); } #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) { - printf("transform results after hooking in %s functions:\n", description); + if ( mesa_profile ) { + if ( need_counter ) { + need_counter = 0; + INIT_COUNTER(); + printf( "counter overhead: %ld cycles\n\n", counter_overhead ); + } + printf( "transform results after hooking in %s functions:\n", description ); } #endif - for (masked = 0; masked <= 1; masked++) { + for ( masked = 0 ; masked <= 1 ; masked++ ) { int cma = masked ? CULL_MASK_ACTIVE : 0; char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0"; #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) { - printf ("\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0"); - for (psize = 1; psize <= 4; psize++) { - printf(" p%d\t", psize ); + if ( mesa_profile ) { + printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" ); + for ( psize = 1 ; psize <= 4 ; psize++ ) { + printf( " p%d\t", psize ); } - printf("\n--------------------------------------------------------\n"); + printf( "\n--------------------------------------------------------\n" ); } #endif - for (mtype = 0; mtype < 7; mtype++) { - for (psize = 1; psize <= 4; psize++) { - transform_func fn = gl_transform_tab[cma][psize][mtypes[mtype]]; - long* cycles = &(benchmark_tab [cma][psize-1][mtype]); + for ( mtype = 0 ; mtype < 7 ; mtype++ ) { + for ( psize = 1 ; psize <= 4 ; psize++ ) { + transform_func func = gl_transform_tab[cma][psize][mtypes[mtype]]; + long *cycles = &(benchmark_tab[cma][psize-1][mtype]); - if (test_transform_function (fn,psize,mtype,masked,cycles) == 0 ) { + if ( test_transform_function( func, psize, mtype, + masked, cycles ) == 0 ) { char buf[100]; sprintf( buf, "gl_transform_tab[%s][%d][%s] failed test (%s)", cmastring, psize, mstrings[mtype], description ); gl_problem( NULL, buf ); - } else { -/* printf("gl_transform_tab[%s][%d][%s] passed test in %i cycles\n", - cmastring, psize, mstrings[mtype], - benchmark_tab [cma][psize-1][mtype]); -*/ } + } #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) - printf(" %li\t", benchmark_tab [cma][psize-1][mtype] ); + if ( mesa_profile ) + printf( " %li\t", benchmark_tab[cma][psize-1][mtype] ); #endif } #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) - printf (" | [%s]\n", mstrings[mtype] ); + if ( mesa_profile ) + printf( " | [%s]\n", mstrings[mtype] ); #endif } #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) - printf ("\n"); + if ( mesa_profile ) + printf( "\n" ); #endif } } +/* ================================================================ + * Normal transformation tests + */ - - - - -static void ref_norm_transform_rescale ( const GLmatrix *mat, - GLfloat scale, - const GLvector3f *in, - const GLfloat *lengths, - const GLubyte mask[], - GLvector3f *dest ) -{ - int i; - const GLfloat *s = in->start; - const GLfloat *m = mat->inv; - GLfloat (*out)[3] = (GLfloat (*)[3])dest->start; - - (void) mask; - (void) lengths; - - for (i = 0; i < in->count; ++i) { - GLfloat x = s[0], y = s[1], z = s[2] ; - GLfloat tx = m[0]*x + m[1]*y + m[ 2]*z ; - GLfloat ty = m[4]*x + m[5]*y + m[ 6]*z ; - GLfloat tz = m[8]*x + m[9]*y + m[10]*z ; - - out[i][0] = tx * scale; - out[i][1] = ty * scale; - out[i][2] = tz * scale; - - s = (GLfloat *)((char *)s + in->stride); - } -} - - -static void ref_norm_transform_normalize ( const GLmatrix *mat, - GLfloat scale, - const GLvector3f *in, - const GLfloat *lengths, - const GLubyte mask[], - GLvector3f *dest ) -{ - int i; - const GLfloat *s = in->start; - const GLfloat *m = mat->inv; - GLfloat (*out)[3] = (GLfloat (*)[3])dest->start; - - (void) mask; - - for (i = 0; i < in->count; ++i) { - GLfloat x = s[0], y = s[1], z = s[2] ; - GLfloat tx = m[0]*x + m[1]*y + m[ 2]*z ; - GLfloat ty = m[4]*x + m[5]*y + m[ 6]*z ; - GLfloat tz = m[8]*x + m[9]*y + m[10]*z ; - - if (!lengths) { - GLfloat len = tx*tx + ty*ty + tz*tz; - if (len > 1e-20) { - scale = 1.0 / sqrt (len); /* hmmm, don't know how we */ - /* could test the precalcu- */ - /* lated length case ... */ - out[i][0] = tx * scale; - out[i][1] = ty * scale; - out[i][2] = tz * scale; - } else { - out[i][0] = out[i][1] = out[i][2] = 0; - } - } else { - scale = lengths [i];; - out[i][0] = tx * scale; - out[i][1] = ty * scale; - out[i][2] = tz * scale; - } - - s = (GLfloat *)((char *)s + in->stride); - } -} - - - -static int m_norm_identity[16] = { - ONE, NIL, NIL, NIL, - NIL, ONE, NIL, NIL, - NIL, NIL, ONE, NIL, - NIL, NIL, NIL, NIL -}; -static int m_norm_general[16] = { - VAR, VAR, VAR, NIL, - VAR, VAR, VAR, NIL, - VAR, VAR, VAR, NIL, - NIL, NIL, NIL, NIL -}; -static int m_norm_no_rot[16] = { - VAR, NIL, NIL, NIL, - NIL, VAR, NIL, NIL, - NIL, NIL, VAR, NIL, - NIL, NIL, NIL, NIL -}; -static int *norm_templates[8] = { - m_norm_no_rot, - m_norm_no_rot, - m_norm_no_rot, - m_norm_general, - m_norm_general, - m_norm_general, - m_norm_identity, - m_norm_identity -}; -static int norm_types[8] = { - NORM_TRANSFORM_NO_ROT, - NORM_TRANSFORM_NO_ROT | NORM_RESCALE, - NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE, - NORM_TRANSFORM, - NORM_TRANSFORM | NORM_RESCALE, - NORM_TRANSFORM | NORM_NORMALIZE, - NORM_RESCALE, - NORM_NORMALIZE -}; -static int norm_scale_types[8] = { /* rescale factor */ - NIL, /* NIL disables rescaling */ - VAR, - NIL, - NIL, - VAR, - NIL, - VAR, - NIL -}; -static int norm_normalize_types[8] = { /* normalizing ?? (no = 0) */ - 0, - 0, - 1, - 0, - 0, - 1, - 0, - 1 -}; -static char *norm_strings[8] = { - "NORM_TRANSFORM_NO_ROT", - "NORM_TRANSFORM_NO_ROT | NORM_RESCALE", - "NORM_TRANSFORM_NO_ROT | NORM_NORMALIZE", - "NORM_TRANSFORM", - "NORM_TRANSFORM | NORM_RESCALE", - "NORM_TRANSFORM | NORM_NORMALIZE", - "NORM_RESCALE", - "NORM_NORMALIZE" -}; - - - - - -static int test_norm_function( normal_func fn, int mtype, - int masked, long* cycles ) +static int test_norm_function( normal_func func, int mtype, + int masked, long *cycles ) { GLvector3f source[1], dest[1], dest2[1], ref[1], ref2[1]; GLmatrix mat[1]; - GLfloat s [TEST_COUNT][5], d [TEST_COUNT][3], r [TEST_COUNT][3]; - GLfloat d2 [TEST_COUNT][3], r2 [TEST_COUNT][3], length [TEST_COUNT]; + GLfloat s[TEST_COUNT][5], d[TEST_COUNT][3], r[TEST_COUNT][3]; + GLfloat d2[TEST_COUNT][3], r2[TEST_COUNT][3], length[TEST_COUNT]; GLfloat scale; GLfloat *m; GLubyte mask[TEST_COUNT]; int i, j; #ifdef RUN_XFORM_BENCHMARK - int cycle_i; /* the counter for the benchmarks we run */ + int cycle_i; /* the counter for the benchmarks we run */ #endif (void) cycles; @@ -608,9 +732,9 @@ static int test_norm_function( normal_func fn, int mtype, scale = 1.0F + rnd () * norm_scale_types[mtype]; - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - switch (norm_templates[mtype][i * 4 + j]) { + for ( i = 0 ; i < 4 ; i++ ) { + for ( j = 0 ; j < 4 ; j++ ) { + switch ( norm_templates[mtype][i * 4 + j] ) { case NIL: m[j * 4 + i] = 0.0; break; @@ -628,14 +752,16 @@ static int test_norm_function( normal_func fn, int mtype, } } - for (i = 0; i < TEST_COUNT; ++i) { - mask[i] = i % 2; /* mask every 2nd element */ + for ( i = 0 ; i < TEST_COUNT ; i++ ) { + mask[i] = i % 2; /* mask every 2nd element */ d[i][0] = s[i][0] = d2[i][0] = 0.0; d[i][1] = s[i][1] = d2[i][1] = 0.0; d[i][2] = s[i][2] = d2[i][2] = 0.0; - for (j = 0; j < 3; j++) + for ( j = 0 ; j < 3 ; j++ ) s[i][j] = rnd(); - length[i] = 1 / sqrt(s[i][0]*s[i][0] + s[i][1]*s[i][1] + s[i][2]*s[i][2]); + length[i] = 1 / sqrt( s[i][0]*s[i][0] + + s[i][1]*s[i][1] + + s[i][2]*s[i][2] ); } source->data = (GLfloat(*)[3])s; @@ -668,68 +794,68 @@ static int test_norm_function( normal_func fn, int mtype, ref2->stride = sizeof(float[3]); ref2->flags = 0; - if (norm_normalize_types [mtype] == 0) { - ref_norm_transform_rescale (mat, scale, source, NULL, NULL, ref); + if ( norm_normalize_types[mtype] == 0 ) { + ref_norm_transform_rescale( mat, scale, source, NULL, NULL, ref ); } else { - ref_norm_transform_normalize (mat, scale, source, NULL, NULL, ref); - ref_norm_transform_normalize (mat, scale, source, length, NULL, ref2); + ref_norm_transform_normalize( mat, scale, source, NULL, NULL, ref ); + ref_norm_transform_normalize( mat, scale, source, length, NULL, ref2 ); } - if (mesa_profile) { - if (masked) { - BEGIN_RACE (*cycles); - fn (mat, scale, source, NULL, mask, dest); - END_RACE (*cycles); - fn (mat, scale, source, length, mask, dest2); + if ( mesa_profile ) { + if ( masked ) { + BEGIN_RACE( *cycles ); + func( mat, scale, source, NULL, mask, dest ); + END_RACE( *cycles ); + func( mat, scale, source, length, mask, dest2 ); } else { - BEGIN_RACE (*cycles); - fn (mat, scale, source, NULL, NULL, dest); - END_RACE (*cycles); - fn (mat, scale, source, length, NULL, dest2); + BEGIN_RACE( *cycles ); + func( mat, scale, source, NULL, NULL, dest ); + END_RACE( *cycles ); + func( mat, scale, source, length, NULL, dest2 ); } } else { - if (masked) { - fn (mat, scale, source, NULL, mask, dest); - fn (mat, scale, source, length, mask, dest2); + if ( masked ) { + func( mat, scale, source, NULL, mask, dest ); + func( mat, scale, source, length, mask, dest2 ); } else { - fn (mat, scale, source, NULL, NULL, dest); - fn (mat, scale, source, length, NULL, dest2); + func( mat, scale, source, NULL, NULL, dest ); + func( mat, scale, source, length, NULL, dest2 ); } } - for (i = 0; i < TEST_COUNT; ++i) { - if (masked && !(mask[i] & 1)) + for ( i = 0 ; i < TEST_COUNT ; i++ ) { + if ( masked && !(mask[i] & 1) ) continue; - for (j = 0; j < 3; j++) { - if (significand_match (d[i][j], r[i][j]) < REQUIRED_PRECISION) { - printf("-----------------------------\n"); - printf ("(i = %i, j = %i)\n", i, j); - printf ("%f \t %f \t [ratio = %e - %i bit missed]\n", - d[i][0], r[i][0], r[i][0]/d[i][0], - MAX_PRECISION - significand_match (d[i][0], r[i][0])); - printf ("%f \t %f \t [ratio = %e - %i bit missed]\n", - d[i][1], r[i][1], r[i][1]/d[i][1], - MAX_PRECISION - significand_match (d[i][1], r[i][1])); - printf ("%f \t %f \t [ratio = %e - %i bit missed]\n", - d[i][2], r[i][2], r[i][2]/d[i][2], - MAX_PRECISION - significand_match (d[i][2], r[i][2])); + for ( j = 0 ; j < 3 ; j++ ) { + if ( significand_match( d[i][j], r[i][j] ) < REQUIRED_PRECISION ) { + printf( "-----------------------------\n" ); + printf( "(i = %i, j = %i)\n", i, j ); + printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", + d[i][0], r[i][0], r[i][0]/d[i][0], + MAX_PRECISION - significand_match( d[i][0], r[i][0] ) ); + printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", + d[i][1], r[i][1], r[i][1]/d[i][1], + MAX_PRECISION - significand_match( d[i][1], r[i][1] ) ); + printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", + d[i][2], r[i][2], r[i][2]/d[i][2], + MAX_PRECISION - significand_match( d[i][2], r[i][2] ) ); return 0; } - if (norm_normalize_types [mtype] != 0) { - if (significand_match (d2[i][j], r2[i][j]) < REQUIRED_PRECISION) { - printf("------------------- precalculated length case ------\n"); - printf ("(i = %i, j = %i)\n", i, j); - printf ("%f \t %f \t [ratio = %e - %i bit missed]\n", - d2[i][0], r2[i][0], r2[i][0]/d2[i][0], - MAX_PRECISION - significand_match (d2[i][0],r2[i][0])); - printf ("%f \t %f \t [ratio = %e - %i bit missed]\n", - d2[i][1], r2[i][1], r2[i][1]/d2[i][1], - MAX_PRECISION - significand_match (d2[i][1],r2[i][1])); - printf ("%f \t %f \t [ratio = %e - %i bit missed]\n", - d2[i][2], r2[i][2], r2[i][2]/d2[i][2], - MAX_PRECISION - significand_match (d2[i][2],r2[i][2])); + if ( norm_normalize_types[mtype] != 0 ) { + if ( significand_match( d2[i][j], r2[i][j] ) < REQUIRED_PRECISION ) { + printf( "------------------- precalculated length case ------\n" ); + printf( "(i = %i, j = %i)\n", i, j ); + printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", + d2[i][0], r2[i][0], r2[i][0]/d2[i][0], + MAX_PRECISION - significand_match( d2[i][0], r2[i][0] ) ); + printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", + d2[i][1], r2[i][1], r2[i][1]/d2[i][1], + MAX_PRECISION - significand_match( d2[i][1], r2[i][1] ) ); + printf( "%f \t %f \t [ratio = %e - %i bit missed]\n", + d2[i][2], r2[i][2], r2[i][2]/d2[i][2], + MAX_PRECISION - significand_match( d2[i][2], r2[i][2] ) ); return 0; } } @@ -740,68 +866,68 @@ static int test_norm_function( normal_func fn, int mtype, return 1; } - - void gl_test_all_normal_transform_functions( char *description ) { int masked; int mtype; - long benchmark_tab [0xf][0x4]; + long benchmark_tab[0xf][0x4]; static int first_time = 1; - - if (first_time) { + if ( first_time ) { first_time = 0; - mesa_profile = getenv("MESA_PROFILE"); + mesa_profile = getenv( "MESA_PROFILE" ); } #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) { - printf ("normal transform results after hooking in %s functions:\n", - description); + if ( mesa_profile ) { + if ( need_counter ) { + need_counter = 0; + INIT_COUNTER(); + printf( "counter overhead: %ld cycles\n\n", counter_overhead ); + } + printf( "normal transform results after hooking in %s functions:\n", + description ); } #endif - - for (masked = 0; masked <= 1; masked++) { + for ( masked = 0 ; masked <= 1 ; masked++ ) { int cma = masked ? CULL_MASK_ACTIVE : 0; char *cmastring = masked ? "CULL_MASK_ACTIVE" : "0"; #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) { - printf ("\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0"); - printf ("\n-------------------------------------------------------\n"); + if ( mesa_profile ) { + printf( "\n culling: %s \n", masked ? "CULL_MASK_ACTIVE" : "0" ); + printf( "\n-------------------------------------------------------\n" ); } #endif - for (mtype = 0; mtype < 8; mtype++) { - normal_func fn = gl_normal_tab [norm_types[mtype]][cma]; - long* cycles = &(benchmark_tab [mtype][cma]); + for ( mtype = 0 ; mtype < 8 ; mtype++ ) { + normal_func func = gl_normal_tab[norm_types[mtype]][cma]; + long *cycles = &(benchmark_tab[mtype][cma]); - if (test_norm_function (fn, mtype, masked, cycles) == 0 ) { + if ( test_norm_function( func, mtype, masked, cycles ) == 0 ) { char buf[100]; sprintf( buf, "gl_normal_tab[%s][%s] failed test (%s)", cmastring, norm_strings[mtype], description ); gl_problem( NULL, buf ); - } else { -/* printf("gl_normal_tab[%s][%s] passed test in %li cycles (%s)\n", - cmastring, norm_strings[mtype], - benchmark_tab [mtype][cma], description); -*/ } + } #ifdef RUN_XFORM_BENCHMARK - if (mesa_profile) { - printf (" %li\t", benchmark_tab [mtype][cma] ); - printf (" | [%s]\n", norm_strings[mtype] ); + if ( mesa_profile ) { + printf( " %li\t", benchmark_tab[mtype][cma] ); + printf( " | [%s]\n", norm_strings[mtype] ); } } - if (mesa_profile) { printf ("\n"); } + if ( mesa_profile ) + printf( "\n" ); #else } #endif } +#ifdef RUN_XFORM_BENCHMARK + if ( mesa_profile ) + fflush( stdout ); +#endif } - - -#endif /*DEBUG*/ +#endif /* DEBUG */ diff --git a/xc/extras/Mesa/src/depth.c b/xc/extras/Mesa/src/depth.c index ae119018e..600ae105f 100644 --- a/xc/extras/Mesa/src/depth.c +++ b/xc/extras/Mesa/src/depth.c @@ -1457,14 +1457,14 @@ _mesa_read_depth_span( GLcontext *ctx, /* read from software depth buffer */ if (ctx->Visual->DepthBits <= 16) { const GLushort *zptr = Z_ADDRESS16( ctx, x, y ); - GLuint i; + GLint i; for (i = 0; i < n; i++) { depth[i] = zptr[i]; } } else { const GLuint *zptr = Z_ADDRESS32( ctx, x, y ); - GLuint i; + GLint i; for (i = 0; i < n; i++) { depth[i] = zptr[i]; } @@ -1529,14 +1529,14 @@ _mesa_read_depth_span_float( GLcontext *ctx, /* read from software depth buffer */ if (ctx->Visual->DepthBits <= 16) { const GLushort *zptr = Z_ADDRESS16( ctx, x, y ); - GLuint i; + GLint i; for (i = 0; i < n; i++) { depth[i] = (GLfloat) zptr[i] * scale; } } else { const GLuint *zptr = Z_ADDRESS32( ctx, x, y ); - GLuint i; + GLint i; for (i = 0; i < n; i++) { depth[i] = (GLfloat) zptr[i] * scale; } @@ -1545,7 +1545,7 @@ _mesa_read_depth_span_float( GLcontext *ctx, else if (ctx->Driver.ReadDepthSpan) { /* read from hardware depth buffer */ GLdepth d[MAX_WIDTH]; - GLuint i; + GLint i; assert(n <= MAX_WIDTH); (*ctx->Driver.ReadDepthSpan)( ctx, n, x, y, d ); for (i = 0; i < n; i++) { diff --git a/xc/extras/Mesa/src/dlist.c b/xc/extras/Mesa/src/dlist.c index 5c26ba403..ff23ee951 100644 --- a/xc/extras/Mesa/src/dlist.c +++ b/xc/extras/Mesa/src/dlist.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/dlist.c,v 1.7 2000/09/26 15:56:30 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/drawpix.c b/xc/extras/Mesa/src/drawpix.c index ecb1b22af..9955367ba 100644 --- a/xc/extras/Mesa/src/drawpix.c +++ b/xc/extras/Mesa/src/drawpix.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/drawpix.c,v 1.6 2000/09/26 15:56:31 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -125,7 +125,6 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, && !ctx->Pixel.MinMaxEnabled && !ctx->Pixel.HistogramEnabled && ctx->Pixel.IndexShift==0 && ctx->Pixel.IndexOffset==0 - && ctx->Pixel.MapColorFlag==0 && ctx->Texture.ReallyEnabled == 0 && unpack->Alignment==1 && !unpack->SwapBytes @@ -218,7 +217,8 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, * skip "skipRows" rows and skip "skipPixels" pixels/row. */ - if (format==GL_RGBA && type==GL_UNSIGNED_BYTE) { + if (format==GL_RGBA && type==GL_UNSIGNED_BYTE + && ctx->Pixel.MapColorFlag==0) { if (ctx->Visual->RGBAflag) { GLubyte *src = (GLubyte *) pixels + (skipRows * rowLength + skipPixels) * 4; @@ -227,7 +227,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, GLint row; for (row=0; row<drawHeight; row++) { (*ctx->Driver.WriteRGBASpan)(ctx, drawWidth, destX, destY, - (void *) src, NULL); + (CONST GLubyte (*)[4]) src, NULL); src += rowLength * 4; destY++; } @@ -238,7 +238,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, for (row=0; row<drawHeight; row++) { destY--; (*ctx->Driver.WriteRGBASpan)(ctx, drawWidth, destX, destY, - (void *) src, NULL); + (CONST GLubyte (*)[4]) src, NULL); src += rowLength * 4; } } @@ -247,7 +247,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, GLint row; for (row=0; row<drawHeight; row++) { gl_write_zoomed_rgba_span(ctx, drawWidth, destX, destY, - zSpan, (void *) src, zoomY0); + zSpan, (CONST GLubyte (*)[4]) src, zoomY0); src += rowLength * 4; destY++; } @@ -255,7 +255,8 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, } return GL_TRUE; } - else if (format==GL_RGB && type==GL_UNSIGNED_BYTE) { + else if (format==GL_RGB && type==GL_UNSIGNED_BYTE + && ctx->Pixel.MapColorFlag==0) { if (ctx->Visual->RGBAflag) { GLubyte *src = (GLubyte *) pixels + (skipRows * rowLength + skipPixels) * 3; @@ -263,7 +264,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, GLint row; for (row=0; row<drawHeight; row++) { (*ctx->Driver.WriteRGBSpan)(ctx, drawWidth, destX, destY, - (void *) src, NULL); + (CONST GLubyte (*)[3]) src, NULL); src += rowLength * 3; destY++; } @@ -274,7 +275,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, for (row=0; row<drawHeight; row++) { destY--; (*ctx->Driver.WriteRGBSpan)(ctx, drawWidth, destX, destY, - (void *) src, NULL); + (CONST GLubyte (*)[3]) src, NULL); src += rowLength * 3; } } @@ -283,7 +284,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, GLint row; for (row=0; row<drawHeight; row++) { gl_write_zoomed_rgb_span(ctx, drawWidth, destX, destY, - zSpan, (void *) src, zoomY0); + zSpan, (CONST GLubyte (*)[3]) src, zoomY0); src += rowLength * 3; destY++; } @@ -291,7 +292,8 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, } return GL_TRUE; } - else if (format==GL_LUMINANCE && type==GL_UNSIGNED_BYTE) { + else if (format==GL_LUMINANCE && type==GL_UNSIGNED_BYTE + && ctx->Pixel.MapColorFlag==0) { if (ctx->Visual->RGBAflag) { GLubyte *src = (GLubyte *) pixels + (skipRows * rowLength + skipPixels); @@ -307,7 +309,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, rgb[i][2] = src[i]; } (*ctx->Driver.WriteRGBSpan)(ctx, drawWidth, destX, destY, - (void *) rgb, NULL); + (CONST GLubyte (*)[3]) rgb, NULL); src += rowLength; destY++; } @@ -325,7 +327,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, } destY--; (*ctx->Driver.WriteRGBSpan)(ctx, drawWidth, destX, destY, - (void *) rgb, NULL); + (CONST GLubyte (*)[3]) rgb, NULL); src += rowLength; } } @@ -341,7 +343,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, rgb[i][2] = src[i]; } gl_write_zoomed_rgb_span(ctx, drawWidth, destX, destY, - zSpan, (void *) rgb, zoomY0); + zSpan, (CONST GLubyte (*)[3]) rgb, zoomY0); src += rowLength; destY++; } @@ -349,7 +351,8 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, } return GL_TRUE; } - else if (format==GL_LUMINANCE_ALPHA && type==GL_UNSIGNED_BYTE) { + else if (format==GL_LUMINANCE_ALPHA && type==GL_UNSIGNED_BYTE + && ctx->Pixel.MapColorFlag==0) { if (ctx->Visual->RGBAflag) { GLubyte *src = (GLubyte *) pixels + (skipRows * rowLength + skipPixels)*2; @@ -367,7 +370,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, rgba[i][3] = *ptr++; } (*ctx->Driver.WriteRGBASpan)(ctx, drawWidth, destX, destY, - (void *) rgba, NULL); + (CONST GLubyte (*)[4]) rgba, NULL); src += rowLength*2; destY++; } @@ -387,7 +390,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, } destY--; (*ctx->Driver.WriteRGBASpan)(ctx, drawWidth, destX, destY, - (void *) rgba, NULL); + (CONST GLubyte (*)[4]) rgba, NULL); src += rowLength*2; } } @@ -405,7 +408,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, rgba[i][3] = *ptr++; } gl_write_zoomed_rgba_span(ctx, drawWidth, destX, destY, - zSpan, (void *) rgba, zoomY0); + zSpan, (CONST GLubyte (*)[4]) rgba, zoomY0); src += rowLength*2; destY++; } @@ -424,7 +427,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, ASSERT(drawWidth < MAX_WIDTH); _mesa_map_ci8_to_rgba(ctx, drawWidth, src, rgba); (*ctx->Driver.WriteRGBASpan)(ctx, drawWidth, destX, destY, - (const GLubyte (*)[4])rgba, + (CONST GLubyte (*)[4])rgba, NULL); src += rowLength; destY++; @@ -439,7 +442,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, _mesa_map_ci8_to_rgba(ctx, drawWidth, src, rgba); destY--; (*ctx->Driver.WriteRGBASpan)(ctx, drawWidth, destX, destY, - (const GLubyte (*)[4])rgba, + (CONST GLubyte (*)[4])rgba, NULL); src += rowLength; } @@ -452,7 +455,7 @@ simple_DrawPixels( GLcontext *ctx, GLint x, GLint y, ASSERT(drawWidth < MAX_WIDTH); _mesa_map_ci8_to_rgba(ctx, drawWidth, src, rgba); gl_write_zoomed_rgba_span(ctx, drawWidth, destX, destY, - zSpan, (void *) rgba, zoomY0); + zSpan, (CONST GLubyte (*)[4]) rgba, zoomY0); src += rowLength; destY++; } @@ -642,8 +645,9 @@ draw_depth_pixels( GLcontext *ctx, GLint x, GLint y, GLint row; for (row = 0; row < height; row++, y++) { GLdepth zspan[MAX_WIDTH]; - const GLushort *zptr = _mesa_image_address(&ctx->Unpack, - pixels, width, height, GL_DEPTH_COMPONENT, type, 0, row, 0); + const GLushort *zptr = (const GLushort *) + _mesa_image_address(&ctx->Unpack, pixels, width, height, + GL_DEPTH_COMPONENT, type, 0, row, 0); GLint i; for (i = 0; i < width; i++) zspan[i] = zptr[i]; @@ -655,8 +659,9 @@ draw_depth_pixels( GLcontext *ctx, GLint x, GLint y, /* Special case: directly write 32-bit depth values */ GLint row; for (row = 0; row < height; row++, y++) { - const GLuint *zptr = _mesa_image_address(&ctx->Unpack, - pixels, width, height, GL_DEPTH_COMPONENT, type, 0, row, 0); + const GLuint *zptr = (const GLuint *) + _mesa_image_address(&ctx->Unpack, pixels, width, height, + GL_DEPTH_COMPONENT, type, 0, row, 0); gl_write_rgba_span( ctx, width, x, y, zptr, rgba, GL_BITMAP ); } } @@ -672,7 +677,7 @@ draw_depth_pixels( GLcontext *ctx, GLint x, GLint y, if (ctx->Visual->RGBAflag) { if (zoom) { gl_write_zoomed_rgba_span(ctx, width, x, y, zspan, - (const GLubyte (*)[4])rgba, desty); + (CONST GLubyte (*)[4])rgba, desty); } else { gl_write_rgba_span(ctx, width, x, y, zspan, rgba, GL_BITMAP); @@ -707,6 +712,11 @@ draw_rgba_pixels( GLcontext *ctx, GLint x, GLint y, GLdepth zspan[MAX_WIDTH]; GLboolean quickDraw; + if (!_mesa_is_legal_format_and_type(format, type)) { + gl_error(ctx, GL_INVALID_ENUM, "glDrawPixels(format or type)"); + return; + } + /* Try an optimized glDrawPixels first */ if (simple_DrawPixels(ctx, x, y, width, height, format, type, pixels)) return; @@ -743,8 +753,8 @@ draw_rgba_pixels( GLcontext *ctx, GLint x, GLint y, for (row = 0; row < height; row++, y++) { const GLvoid *source = _mesa_image_address(unpack, pixels, width, height, format, type, 0, row, 0); - _mesa_unpack_ubyte_color_span(ctx, width, GL_RGBA, (void*) rgba, - format, type, source, unpack, GL_TRUE); + _mesa_unpack_ubyte_color_span(ctx, width, GL_RGBA, + (GLubyte *) rgba, format, type, source, unpack, GL_TRUE); if ((ctx->Pixel.MinMaxEnabled && ctx->MinMax.Sink) || (ctx->Pixel.HistogramEnabled && ctx->Histogram.Sink)) continue; @@ -758,7 +768,7 @@ draw_rgba_pixels( GLcontext *ctx, GLint x, GLint y, MEMCPY(primary_rgba, rgba, 4 * width * sizeof(GLubyte)); for (unit = 0; unit < MAX_TEXTURE_UNITS; unit++) { - _mesa_pixeltexgen(ctx, width, (const GLubyte (*)[4]) rgba, + _mesa_pixeltexgen(ctx, width, (CONST GLubyte (*)[4]) rgba, s, t, r, q); gl_texture_pixels(ctx, unit, width, s, t, r, NULL, primary_rgba, rgba); @@ -767,11 +777,11 @@ draw_rgba_pixels( GLcontext *ctx, GLint x, GLint y, if (quickDraw) { (*ctx->Driver.WriteRGBASpan)( ctx, width, x, y, - (CONST GLubyte (*)[]) rgba, NULL); + (CONST GLubyte (*)[4]) rgba, NULL); } else if (zoom) { gl_write_zoomed_rgba_span( ctx, width, x, y, zspan, - (CONST GLubyte (*)[]) rgba, desty ); + (CONST GLubyte (*)[4]) rgba, desty ); } else { gl_write_rgba_span( ctx, (GLuint) width, x, y, zspan, rgba, GL_BITMAP); @@ -814,6 +824,7 @@ _mesa_DrawPixels( GLsizei width, GLsizei height, return; } + RENDER_START(ctx); switch (format) { case GL_STENCIL_INDEX: draw_stencil_pixels( ctx, x, y, width, height, type, pixels ); @@ -842,8 +853,8 @@ _mesa_DrawPixels( GLsizei width, GLsizei height, break; default: gl_error( ctx, GL_INVALID_ENUM, "glDrawPixels(format)" ); - return; } + RENDER_FINISH(ctx); } else if (ctx->RenderMode==GL_FEEDBACK) { if (ctx->Current.RasterPosValid) { diff --git a/xc/extras/Mesa/src/enable.c b/xc/extras/Mesa/src/enable.c index 97554430e..aaf577b1a 100644 --- a/xc/extras/Mesa/src/enable.c +++ b/xc/extras/Mesa/src/enable.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -284,12 +284,14 @@ void _mesa_set_enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_POINT_SMOOTH: if (ctx->Point.SmoothFlag!=state) { ctx->Point.SmoothFlag = state; + ctx->TriangleCaps ^= DD_POINT_SMOOTH; ctx->NewState |= NEW_RASTER_OPS; } break; case GL_POLYGON_SMOOTH: if (ctx->Polygon.SmoothFlag!=state) { ctx->Polygon.SmoothFlag = state; + ctx->TriangleCaps ^= DD_TRI_SMOOTH; ctx->NewState |= NEW_RASTER_OPS; } break; @@ -349,48 +351,39 @@ void _mesa_set_enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_TEXTURE_1D: if (ctx->Visual->RGBAflag) { const GLuint curr = ctx->Texture.CurrentUnit; - const GLuint flag = TEXTURE0_1D << (curr * 4); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[curr]; ctx->NewState |= NEW_TEXTURE_ENABLE; if (state) { texUnit->Enabled |= TEXTURE0_1D; - ctx->Enabled |= flag; } else { texUnit->Enabled &= ~TEXTURE0_1D; - ctx->Enabled &= ~flag; } } break; case GL_TEXTURE_2D: if (ctx->Visual->RGBAflag) { const GLuint curr = ctx->Texture.CurrentUnit; - const GLuint flag = TEXTURE0_2D << (curr * 4); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[curr]; ctx->NewState |= NEW_TEXTURE_ENABLE; if (state) { texUnit->Enabled |= TEXTURE0_2D; - ctx->Enabled |= flag; } else { texUnit->Enabled &= ~TEXTURE0_2D; - ctx->Enabled &= ~flag; } } break; case GL_TEXTURE_3D: if (ctx->Visual->RGBAflag) { const GLuint curr = ctx->Texture.CurrentUnit; - const GLuint flag = TEXTURE0_3D << (curr * 4); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[curr]; ctx->NewState |= NEW_TEXTURE_ENABLE; if (state) { texUnit->Enabled |= TEXTURE0_3D; - ctx->Enabled |= flag; } else { texUnit->Enabled &= ~TEXTURE0_3D; - ctx->Enabled &= ~flag; } } break; @@ -510,35 +503,24 @@ void _mesa_set_enable( GLcontext *ctx, GLenum cap, GLboolean state ) if (ctx->Extensions.HaveTextureCubeMap) { if (ctx->Visual->RGBAflag) { const GLuint curr = ctx->Texture.CurrentUnit; - const GLuint flag = TEXTURE0_CUBE << (curr * 4); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[curr]; ctx->NewState |= NEW_TEXTURE_ENABLE; if (state) { texUnit->Enabled |= TEXTURE0_CUBE; - ctx->Enabled |= flag; } else { texUnit->Enabled &= ~TEXTURE0_CUBE; - ctx->Enabled &= ~flag; } } } else { - if (state) - gl_error(ctx, GL_INVALID_ENUM, "glEnable"); - else - gl_error(ctx, GL_INVALID_ENUM, "glDisable"); + gl_error(ctx, GL_INVALID_ENUM, state ? "glEnable" : "glDisable"); return; } break; default: - if (state) { - gl_error( ctx, GL_INVALID_ENUM, "glEnable" ); - } - else { - gl_error( ctx, GL_INVALID_ENUM, "glDisable" ); - } + gl_error(ctx, GL_INVALID_ENUM, state ? "glEnable" : "glDisable"); return; } diff --git a/xc/extras/Mesa/src/enums.c b/xc/extras/Mesa/src/enums.c index 2de4ef06e..c68cf74b8 100644 --- a/xc/extras/Mesa/src/enums.c +++ b/xc/extras/Mesa/src/enums.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/enums.c,v 1.6 2000/09/26 15:56:31 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -854,7 +854,7 @@ static int compar_nr( const enum_elt **a, const enum_elt **b ) static void sort_enums( void ) { - int i; + GLuint i; index1 = (enum_elt **)MALLOC( Elements(all_enums) * sizeof(enum_elt *) ); sorted = 1; diff --git a/xc/extras/Mesa/src/eval.c b/xc/extras/Mesa/src/eval.c index 2c29dcc27..1cc0cd083 100644 --- a/xc/extras/Mesa/src/eval.c +++ b/xc/extras/Mesa/src/eval.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/eval.c,v 1.5 2000/09/26 15:56:31 tsi Exp $ */ + /* * eval.c was written by @@ -109,8 +109,8 @@ static void horner_bezier_curve(const GLfloat *cp, GLfloat *out, GLfloat t, GLuint dim, GLuint order) { - GLfloat s, powert; - GLuint i, k, bincoeff; + GLfloat s, powert, bincoeff; + GLuint i, k; if(order >= 2) { @@ -161,8 +161,8 @@ horner_bezier_surf(GLfloat *cn, GLfloat *out, GLfloat u, GLfloat v, { if(uorder >= 2) { - GLfloat s, poweru; - GLuint j, k, bincoeff; + GLfloat s, poweru, bincoeff; + GLuint j, k; /* Compute the control polygon for the surface-curve in u-direction */ for(j=0; j<vorder; j++) diff --git a/xc/extras/Mesa/src/extensions.c b/xc/extras/Mesa/src/extensions.c index a9522bbd8..f3e771c98 100644 --- a/xc/extras/Mesa/src/extensions.c +++ b/xc/extras/Mesa/src/extensions.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/extensions.c,v 1.7 2000/09/26 15:56:31 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/feedback.c b/xc/extras/Mesa/src/feedback.c index 5d14e892e..3d413a978 100644 --- a/xc/extras/Mesa/src/feedback.c +++ b/xc/extras/Mesa/src/feedback.c @@ -238,7 +238,7 @@ void gl_feedback_points( GLcontext *ctx, GLuint first, GLuint last ) const struct vertex_buffer *VB = ctx->VB; GLuint i; - for (i=first;i<=last;i++) { + for (i=first;i<last;i++) { if (VB->ClipMask[i]==0) { FEEDBACK_TOKEN( ctx, (GLfloat) (GLint) GL_POINT_TOKEN ); feedback_vertex( ctx, i, i ); diff --git a/xc/extras/Mesa/src/fog.c b/xc/extras/Mesa/src/fog.c index e98e67644..a802c087e 100644 --- a/xc/extras/Mesa/src/fog.c +++ b/xc/extras/Mesa/src/fog.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/fog.c,v 1.8 2000/09/24 13:50:10 alanh Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/fog_tmp.h b/xc/extras/Mesa/src/fog_tmp.h index 5096b0a99..b3400d4e6 100644 --- a/xc/extras/Mesa/src/fog_tmp.h +++ b/xc/extras/Mesa/src/fog_tmp.h @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.1 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -193,9 +193,6 @@ static void TAG(fog_ci_vertices)( struct vertex_buffer *VB, - - - static void TAG(fog_rgba_vertices)( struct vertex_buffer *VB, GLuint side, GLubyte flag) @@ -226,7 +223,6 @@ static void TAG(fog_rgba_vertices)( struct vertex_buffer *VB, VB->ColorPtr = VB->Color[0]; out = (GLubyte (*)[4])VB->Color[side]->start; - if (VB->EyePtr->size > 2) { switch (ctx->Fog.Mode) { case GL_LINEAR: @@ -234,12 +230,17 @@ static void TAG(fog_rgba_vertices)( struct vertex_buffer *VB, for ( i = 0 ; i < n ; i++, STRIDE_F(v, stride), in += in_stride) { CULLCHECK { GLfloat f = (end - ABSF(v[2])) * d; - if (f >= 1.0) continue; - if (f < 0) { + if (f >= 1.0) { + out[i][0] = in[0]; + out[i][1] = in[1]; + out[i][2] = in[2]; + } + else if (f <= 0.0) { CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(out[i][0], rFog); CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(out[i][1], gFog); CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(out[i][2], bFog); - } else { + } + else { t = f * UBYTE_COLOR_TO_FLOAT_COLOR(in[0]) + (1.0F-f)*rFog; CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(out[i][0], t); @@ -293,27 +294,40 @@ static void TAG(fog_rgba_vertices)( struct vertex_buffer *VB, return; } } - else if (ctx->Fog.Mode == GL_LINEAR) - { - /* 2-vector vertices */ - GLubyte r,g,b; - GLfloat f = ctx->Fog.End * (ctx->Fog.End - ctx->Fog.Start); - CLAMP_FLOAT_COLOR( f ); - f = 1.0 - f; - rFog *= f; - bFog *= f; - gFog *= f; - - CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(r, rFog); - CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(g, gFog); - CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b, bFog); - - for (i = 0 ; i < n ; i++) { - /* CULLCHECK */ { - out[i][0] = r; - out[i][1] = g; - out[i][2] = b; - } + else { + /* All vertex Z coordinates are zero */ + if (ctx->Fog.Mode == GL_LINEAR) { + /* 2-vector vertices */ + GLubyte r,g,b; + GLfloat f = ctx->Fog.End * (ctx->Fog.End - ctx->Fog.Start); + CLAMP_FLOAT_COLOR( f ); + f = 1.0 - f; + rFog *= f; + bFog *= f; + gFog *= f; + + CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(r, rFog); + CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(g, gFog); + CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b, bFog); + + for (i = 0 ; i < n ; i++) { + /* CULLCHECK */ { + out[i][0] = r; + out[i][1] = g; + out[i][2] = b; + } + } + } + else { + /* EXP or EXP2 mode */ + /* f = exp(d*z*z) or f = exp(d*|z|) is one. Copy in color to out */ + for ( i = 0 ; i < n ; i++, STRIDE_F(v,stride), in += in_stride) { + /* CULLCHECK */ { + out[i][0] = in[0]; + out[i][1] = in[1]; + out[i][2] = in[2]; + } + } } } } diff --git a/xc/extras/Mesa/src/get.c b/xc/extras/Mesa/src/get.c index a12e6be24..59e86056a 100644 --- a/xc/extras/Mesa/src/get.c +++ b/xc/extras/Mesa/src/get.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/get.c,v 1.7 2000/09/26 15:56:31 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/glapi.c b/xc/extras/Mesa/src/glapi.c index c3ac458cb..8a7611e4e 100644 --- a/xc/extras/Mesa/src/glapi.c +++ b/xc/extras/Mesa/src/glapi.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -1478,11 +1478,13 @@ get_static_proc_offset(const char *funcName) static GLvoid * get_static_proc_address(const char *funcName) { - GLint i = get_static_proc_offset(funcName); - if (i >= 0) - return static_functions[i].Address; - else - return NULL; + GLint i; + for (i = 0; static_functions[i].Name; i++) { + if (strcmp(static_functions[i].Name, funcName) == 0) { + return static_functions[i].Address; + } + } + return NULL; } @@ -1562,7 +1564,7 @@ _glapi_add_entrypoint(const char *funcName, GLuint offset) { GLint index = get_static_proc_offset(funcName); if (index >= 0) { - return (GLboolean) (index == offset); /* bad offset! */ + return (GLboolean) (index == (GLint) offset); /* bad offset! */ } } @@ -1670,7 +1672,7 @@ GLint _glapi_get_proc_offset(const char *funcName) { /* search extension functions first */ - GLint i; + GLuint i; for (i = 0; i < NumExtEntryPoints; i++) { if (strcmp(ExtEntryTable[i].Name, funcName) == 0) { return ExtEntryTable[i].Offset; @@ -1690,7 +1692,7 @@ const GLvoid * _glapi_get_proc_address(const char *funcName) { /* search extension functions first */ - GLint i; + GLuint i; for (i = 0; i < NumExtEntryPoints; i++) { if (strcmp(ExtEntryTable[i].Name, funcName) == 0) { return ExtEntryTable[i].Address; diff --git a/xc/extras/Mesa/src/glapinoop.c b/xc/extras/Mesa/src/glapinoop.c index 43a68c773..a48509d6b 100644 --- a/xc/extras/Mesa/src/glapinoop.c +++ b/xc/extras/Mesa/src/glapinoop.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/glapinoop.c,v 1.9 2000/09/26 15:56:31 tsi Exp $ */ + /* * This is part of the reusable GL dispather, see glapi.c for details. diff --git a/xc/extras/Mesa/src/glext_proto.h b/xc/extras/Mesa/src/glext_proto.h deleted file mode 100644 index 6f7e21535..000000000 --- a/xc/extras/Mesa/src/glext_proto.h +++ /dev/null @@ -1,1757 +0,0 @@ -#ifndef __glext_proto_h_ -#define __glext_proto_h_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* -** License Applicability. Except to the extent portions of this file are -** made subject to an alternative license as permitted in the SGI Free -** Software License B, Version 1.1 (the "License"), the contents of this -** file are subject only to the provisions of the License. You may not use -** this file except in compliance with the License. You may obtain a copy -** of the License at Silicon Graphics, Inc., attn: Legal Services, 1600 -** Amphitheatre Parkway, Mountain View, CA 94043-1351, or at: -** -** http://oss.sgi.com/projects/FreeB -** -** Note that, as provided in the License, the Software is distributed on an -** "AS IS" basis, with ALL EXPRESS AND IMPLIED WARRANTIES AND CONDITIONS -** DISCLAIMED, INCLUDING, WITHOUT LIMITATION, ANY IMPLIED WARRANTIES AND -** CONDITIONS OF MERCHANTABILITY, SATISFACTORY QUALITY, FITNESS FOR A -** PARTICULAR PURPOSE, AND NON-INFRINGEMENT. -** -** Original Code. The Original Code is: OpenGL Sample Implementation, -** Version 1.2.1, released January 26, 2000, developed by Silicon Graphics, -** Inc. The Original Code is Copyright (c) 1991-2000 Silicon Graphics, Inc. -** Copyright in any portions created by third parties is as indicated -** elsewhere herein. All Rights Reserved. -** -** Additional Notice Provisions: This software was created using the -** OpenGL(R) version 1.2.1 Sample Implementation published by SGI, but has -** not been independently verified as being compliant with the OpenGL(R) -** version 1.2.1 Specification. -*/ - -#if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN32) -#define WIN32_LEAN_AND_MEAN 1 -#include <windows.h> -#endif - -#ifndef APIENTRY -#define APIENTRY -#endif - -/*************************************************************/ - -/* OpenGL ABI for Linux version number */ -#define GLEXT_VERSION_EXT 5 -#define GL_CONSTANT_COLOR 0x8001 -#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 -#define GL_CONSTANT_ALPHA 0x8003 -#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 -#define GL_BLEND_COLOR 0x8005 -#define GL_FUNC_ADD 0x8006 -#define GL_MIN 0x8007 -#define GL_MAX 0x8008 -#define GL_BLEND_EQUATION 0x8009 -#define GL_FUNC_SUBTRACT 0x800A -#define GL_FUNC_REVERSE_SUBTRACT 0x800B -#define GL_CONVOLUTION_1D 0x8010 -#define GL_CONVOLUTION_2D 0x8011 -#define GL_SEPARABLE_2D 0x8012 -#define GL_CONVOLUTION_BORDER_MODE 0x8013 -#define GL_CONVOLUTION_FILTER_SCALE 0x8014 -#define GL_CONVOLUTION_FILTER_BIAS 0x8015 -#define GL_REDUCE 0x8016 -#define GL_CONVOLUTION_FORMAT 0x8017 -#define GL_CONVOLUTION_WIDTH 0x8018 -#define GL_CONVOLUTION_HEIGHT 0x8019 -#define GL_MAX_CONVOLUTION_WIDTH 0x801A -#define GL_MAX_CONVOLUTION_HEIGHT 0x801B -#define GL_POST_CONVOLUTION_RED_SCALE 0x801C -#define GL_POST_CONVOLUTION_GREEN_SCALE 0x801D -#define GL_POST_CONVOLUTION_BLUE_SCALE 0x801E -#define GL_POST_CONVOLUTION_ALPHA_SCALE 0x801F -#define GL_POST_CONVOLUTION_RED_BIAS 0x8020 -#define GL_POST_CONVOLUTION_GREEN_BIAS 0x8021 -#define GL_POST_CONVOLUTION_BLUE_BIAS 0x8022 -#define GL_POST_CONVOLUTION_ALPHA_BIAS 0x8023 -#define GL_HISTOGRAM 0x8024 -#define GL_PROXY_HISTOGRAM 0x8025 -#define GL_HISTOGRAM_WIDTH 0x8026 -#define GL_HISTOGRAM_FORMAT 0x8027 -#define GL_HISTOGRAM_RED_SIZE 0x8028 -#define GL_HISTOGRAM_GREEN_SIZE 0x8029 -#define GL_HISTOGRAM_BLUE_SIZE 0x802A -#define GL_HISTOGRAM_ALPHA_SIZE 0x802B -#define GL_HISTOGRAM_LUMINANCE_SIZE 0x802C -#define GL_HISTOGRAM_SINK 0x802D -#define GL_MINMAX 0x802E -#define GL_MINMAX_FORMAT 0x802F -#define GL_MINMAX_SINK 0x8030 -#define GL_TABLE_TOO_LARGE 0x8031 -#define GL_UNSIGNED_BYTE_3_3_2 0x8032 -#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 -#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 -#define GL_UNSIGNED_INT_8_8_8_8 0x8035 -#define GL_UNSIGNED_INT_10_10_10_2 0x8036 -#define GL_RESCALE_NORMAL 0x803A -#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362 -#define GL_UNSIGNED_SHORT_5_6_5 0x8363 -#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364 -#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365 -#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366 -#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367 -#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 -#define GL_COLOR_MATRIX 0x80B1 -#define GL_COLOR_MATRIX_STACK_DEPTH 0x80B2 -#define GL_MAX_COLOR_MATRIX_STACK_DEPTH 0x80B3 -#define GL_POST_COLOR_MATRIX_RED_SCALE 0x80B4 -#define GL_POST_COLOR_MATRIX_GREEN_SCALE 0x80B5 -#define GL_POST_COLOR_MATRIX_BLUE_SCALE 0x80B6 -#define GL_POST_COLOR_MATRIX_ALPHA_SCALE 0x80B7 -#define GL_POST_COLOR_MATRIX_RED_BIAS 0x80B8 -#define GL_POST_COLOR_MATRIX_GREEN_BIAS 0x80B9 -#define GL_POST_COLOR_MATRIX_BLUE_BIAS 0x80BA -#define GL_COLOR_TABLE 0x80D0 -#define GL_POST_CONVOLUTION_COLOR_TABLE 0x80D1 -#define GL_POST_COLOR_MATRIX_COLOR_TABLE 0x80D2 -#define GL_PROXY_COLOR_TABLE 0x80D3 -#define GL_PROXY_POST_CONVOLUTION_COLOR_TABLE 0x80D4 -#define GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE 0x80D5 -#define GL_COLOR_TABLE_SCALE 0x80D6 -#define GL_COLOR_TABLE_BIAS 0x80D7 -#define GL_COLOR_TABLE_FORMAT 0x80D8 -#define GL_COLOR_TABLE_WIDTH 0x80D9 -#define GL_COLOR_TABLE_RED_SIZE 0x80DA -#define GL_COLOR_TABLE_GREEN_SIZE 0x80DB -#define GL_COLOR_TABLE_BLUE_SIZE 0x80DC -#define GL_COLOR_TABLE_ALPHA_SIZE 0x80DD -#define GL_COLOR_TABLE_LUMINANCE_SIZE 0x80DE -#define GL_COLOR_TABLE_INTENSITY_SIZE 0x80DF -#define GL_CLAMP_TO_EDGE 0x812F -#define GL_TEXTURE_MIN_LOD 0x813A -#define GL_TEXTURE_MAX_LOD 0x813B -#define GL_TEXTURE_BASE_LEVEL 0x813C -#define GL_TEXTURE_MAX_LEVEL 0x813D -#define GL_TEXTURE0_ARB 0x84C0 -#define GL_TEXTURE1_ARB 0x84C1 -#define GL_TEXTURE2_ARB 0x84C2 -#define GL_TEXTURE3_ARB 0x84C3 -#define GL_ACTIVE_TEXTURE_ARB 0x84E0 -#define GL_CLIENT_ACTIVE_TEXTURE_ARB 0x84E1 -#define GL_MAX_ACTIVE_TEXTURES_ARB 0x84E2 -#define GL_TRANSPOSE_MODELVIEW_MATRIX_ARB 0x84E3 -#define GL_TRANSPOSE_PROJECTION_MATRIX_ARB 0x84E4 -#define GL_TRANSPOSE_TEXTURE_MATRIX_ARB 0x84E5 -#define GL_TRANSPOSE_COLOR_MATRIX_ARB 0x84E6 -#define GL_MULTISAMPLE_ARB 0x809D -#define GL_SAMPLE_ALPHA_TO_COVERAGE_ARB 0x809E -#define GL_SAMPLE_ALPHA_TO_ONE_ARB 0x809F -#define GL_SAMPLE_COVERAGE_ARB 0x80A0 -#define GL_SAMPLE_BUFFERS_ARB 0x80A8 -#define GL_SAMPLES_ARB 0x80A9 -#define GL_SAMPLE_COVERAGE_VALUE_ARB 0x80AA -#define GL_SAMPLE_COVERAGE_INVERT_ARB 0x80AB -#define GL_MULTISAMPLE_BIT_ARB 0x20000000 -#define GL_NORMAL_MAP_ARB 0x8511 -#define GL_REFLECTION_MAP_ARB 0x8512 -#define GL_TEXTURE_CUBE_MAP_ARB 0x8513 -#define GL_TEXTURE_BINDING_CUBE_MAP_ARB 0x8514 -#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB 0x8515 -#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB 0x8516 -#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB 0x8517 -#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB 0x8518 -#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB 0x8519 -#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB 0x851A -#define GL_PROXY_TEXTURE_CUBE_MAP_ARB 0x851B -#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_ARB 0x851C -#define GL_COMPRESSED_ALPHA_ARB 0x84E9 -#define GL_COMPRESSED_LUMINANCE_ARB 0x84EA -#define GL_COMPRESSED_LUMINANCE_ALPHA_ARB 0x84EB -#define GL_COMPRESSED_INTENSITY_ARB 0x84EC -#define GL_COMPRESSED_RGB_ARB 0x84ED -#define GL_COMPRESSED_RGBA_ARB 0x84EE -#define GL_TEXTURE_COMPRESSION_HINT_ARB 0x84EF -#define GL_TEXTURE_IMAGE_SIZE_ARB 0x86A0 -#define GL_TEXTURE_COMPRESSED_ARB 0x86A1 -#define GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB 0x86A2 -#define GL_COMPRESSED_TEXTURE_FORMATS_ARB 0x86A3 -#define GL_ABGR_EXT 0x8000 -#define GL_CONSTANT_COLOR_EXT 0x8001 -#define GL_ONE_MINUS_CONSTANT_COLOR_EXT 0x8002 -#define GL_CONSTANT_ALPHA_EXT 0x8003 -#define GL_ONE_MINUS_CONSTANT_ALPHA_EXT 0x8004 -#define GL_BLEND_COLOR_EXT 0x8005 -#define GL_POLYGON_OFFSET_EXT 0x8037 -#define GL_POLYGON_OFFSET_FACTOR_EXT 0x8038 -#define GL_POLYGON_OFFSET_BIAS_EXT 0x8039 -#define GL_ALPHA4_EXT 0x803B -#define GL_ALPHA8_EXT 0x803C -#define GL_ALPHA12_EXT 0x803D -#define GL_ALPHA16_EXT 0x803E -#define GL_LUMINANCE4_EXT 0x803F -#define GL_LUMINANCE8_EXT 0x8040 -#define GL_LUMINANCE12_EXT 0x8041 -#define GL_LUMINANCE16_EXT 0x8042 -#define GL_LUMINANCE4_ALPHA4_EXT 0x8043 -#define GL_LUMINANCE6_ALPHA2_EXT 0x8044 -#define GL_LUMINANCE8_ALPHA8_EXT 0x8045 -#define GL_LUMINANCE12_ALPHA4_EXT 0x8046 -#define GL_LUMINANCE12_ALPHA12_EXT 0x8047 -#define GL_LUMINANCE16_ALPHA16_EXT 0x8048 -#define GL_INTENSITY_EXT 0x8049 -#define GL_INTENSITY4_EXT 0x804A -#define GL_INTENSITY8_EXT 0x804B -#define GL_INTENSITY12_EXT 0x804C -#define GL_INTENSITY16_EXT 0x804D -#define GL_RGB2_EXT 0x804E -#define GL_RGB4_EXT 0x804F -#define GL_RGB5_EXT 0x8050 -#define GL_RGB8_EXT 0x8051 -#define GL_RGB10_EXT 0x8052 -#define GL_RGB12_EXT 0x8053 -#define GL_RGB16_EXT 0x8054 -#define GL_RGBA2_EXT 0x8055 -#define GL_RGBA4_EXT 0x8056 -#define GL_RGB5_A1_EXT 0x8057 -#define GL_RGBA8_EXT 0x8058 -#define GL_RGB10_A2_EXT 0x8059 -#define GL_RGBA12_EXT 0x805A -#define GL_RGBA16_EXT 0x805B -#define GL_TEXTURE_RED_SIZE_EXT 0x805C -#define GL_TEXTURE_GREEN_SIZE_EXT 0x805D -#define GL_TEXTURE_BLUE_SIZE_EXT 0x805E -#define GL_TEXTURE_ALPHA_SIZE_EXT 0x805F -#define GL_TEXTURE_LUMINANCE_SIZE_EXT 0x8060 -#define GL_TEXTURE_INTENSITY_SIZE_EXT 0x8061 -#define GL_REPLACE_EXT 0x8062 -#define GL_PROXY_TEXTURE_1D_EXT 0x8063 -#define GL_PROXY_TEXTURE_2D_EXT 0x8064 -#define GL_TEXTURE_TOO_LARGE_EXT 0x8065 -#define GL_PACK_SKIP_IMAGES 0x806B -#define GL_PACK_SKIP_IMAGES_EXT 0x806B -#define GL_PACK_IMAGE_HEIGHT 0x806C -#define GL_PACK_IMAGE_HEIGHT_EXT 0x806C -#define GL_UNPACK_SKIP_IMAGES 0x806D -#define GL_UNPACK_SKIP_IMAGES_EXT 0x806D -#define GL_UNPACK_IMAGE_HEIGHT 0x806E -#define GL_UNPACK_IMAGE_HEIGHT_EXT 0x806E -#define GL_TEXTURE_3D 0x806F -#define GL_TEXTURE_3D_EXT 0x806F -#define GL_PROXY_TEXTURE_3D 0x8070 -#define GL_PROXY_TEXTURE_3D_EXT 0x8070 -#define GL_TEXTURE_DEPTH 0x8071 -#define GL_TEXTURE_DEPTH_EXT 0x8071 -#define GL_TEXTURE_WRAP_R 0x8072 -#define GL_TEXTURE_WRAP_R_EXT 0x8072 -#define GL_MAX_3D_TEXTURE_SIZE 0x8073 -#define GL_MAX_3D_TEXTURE_SIZE_EXT 0x8073 -#define GL_FILTER4_SGIS 0x8146 -#define GL_TEXTURE_FILTER4_SIZE_SGIS 0x8147 -#define GL_HISTOGRAM_EXT 0x8024 -#define GL_PROXY_HISTOGRAM_EXT 0x8025 -#define GL_HISTOGRAM_WIDTH_EXT 0x8026 -#define GL_HISTOGRAM_FORMAT_EXT 0x8027 -#define GL_HISTOGRAM_RED_SIZE_EXT 0x8028 -#define GL_HISTOGRAM_GREEN_SIZE_EXT 0x8029 -#define GL_HISTOGRAM_BLUE_SIZE_EXT 0x802A -#define GL_HISTOGRAM_ALPHA_SIZE_EXT 0x802B -#define GL_HISTOGRAM_LUMINANCE_SIZE_EXT 0x802C -#define GL_HISTOGRAM_SINK_EXT 0x802D -#define GL_MINMAX_EXT 0x802E -#define GL_MINMAX_FORMAT_EXT 0x802F -#define GL_MINMAX_SINK_EXT 0x8030 -#define GL_TABLE_TOO_LARGE_EXT 0x8031 -#define GL_CONVOLUTION_1D_EXT 0x8010 -#define GL_CONVOLUTION_2D_EXT 0x8011 -#define GL_SEPARABLE_2D_EXT 0x8012 -#define GL_CONVOLUTION_BORDER_MODE_EXT 0x8013 -#define GL_CONVOLUTION_FILTER_SCALE_EXT 0x8014 -#define GL_CONVOLUTION_FILTER_BIAS_EXT 0x8015 -#define GL_REDUCE_EXT 0x8016 -#define GL_CONVOLUTION_FORMAT_EXT 0x8017 -#define GL_CONVOLUTION_WIDTH_EXT 0x8018 -#define GL_CONVOLUTION_HEIGHT_EXT 0x8019 -#define GL_MAX_CONVOLUTION_WIDTH_EXT 0x801A -#define GL_MAX_CONVOLUTION_HEIGHT_EXT 0x801B -#define GL_POST_CONVOLUTION_RED_SCALE_EXT 0x801C -#define GL_POST_CONVOLUTION_GREEN_SCALE_EXT 0x801D -#define GL_POST_CONVOLUTION_BLUE_SCALE_EXT 0x801E -#define GL_POST_CONVOLUTION_ALPHA_SCALE_EXT 0x801F -#define GL_POST_CONVOLUTION_RED_BIAS_EXT 0x8020 -#define GL_POST_CONVOLUTION_GREEN_BIAS_EXT 0x8021 -#define GL_POST_CONVOLUTION_BLUE_BIAS_EXT 0x8022 -#define GL_POST_CONVOLUTION_ALPHA_BIAS_EXT 0x8023 -#define GL_COLOR_MATRIX_SGI 0x80B1 -#define GL_COLOR_MATRIX_STACK_DEPTH_SGI 0x80B2 -#define GL_MAX_COLOR_MATRIX_STACK_DEPTH_SGI 0x80B3 -#define GL_POST_COLOR_MATRIX_RED_SCALE_SGI 0x80B4 -#define GL_POST_COLOR_MATRIX_GREEN_SCALE_SGI 0x80B5 -#define GL_POST_COLOR_MATRIX_BLUE_SCALE_SGI 0x80B6 -#define GL_POST_COLOR_MATRIX_ALPHA_SCALE_SGI 0x80B7 -#define GL_POST_COLOR_MATRIX_RED_BIAS_SGI 0x80B8 -#define GL_POST_COLOR_MATRIX_GREEN_BIAS_SGI 0x80B9 -#define GL_POST_COLOR_MATRIX_BLUE_BIAS_SGI 0x80BA -#define GL_POST_COLOR_MATRIX_ALPHA_BIAS_SGI 0x80BB -#define GL_COLOR_TABLE_SGI 0x80D0 -#define GL_POST_CONVOLUTION_COLOR_TABLE_SGI 0x80D1 -#define GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI 0x80D2 -#define GL_PROXY_COLOR_TABLE_SGI 0x80D3 -#define GL_PROXY_POST_CONVOLUTION_COLOR_TABLE_SGI 0x80D4 -#define GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE_SGI 0x80D5 -#define GL_COLOR_TABLE_SCALE_SGI 0x80D6 -#define GL_COLOR_TABLE_BIAS_SGI 0x80D7 -#define GL_COLOR_TABLE_FORMAT_SGI 0x80D8 -#define GL_COLOR_TABLE_WIDTH_SGI 0x80D9 -#define GL_COLOR_TABLE_RED_SIZE_SGI 0x80DA -#define GL_COLOR_TABLE_GREEN_SIZE_SGI 0x80DB -#define GL_COLOR_TABLE_BLUE_SIZE_SGI 0x80DC -#define GL_COLOR_TABLE_ALPHA_SIZE_SGI 0x80DD -#define GL_COLOR_TABLE_LUMINANCE_SIZE_SGI 0x80DE -#define GL_COLOR_TABLE_INTENSITY_SIZE_SGI 0x80DF -#define GL_PIXEL_TEXTURE_SGIS 0x8353 -#define GL_PIXEL_FRAGMENT_RGB_SOURCE_SGIS 0x8354 -#define GL_PIXEL_FRAGMENT_ALPHA_SOURCE_SGIS 0x8355 -#define GL_PIXEL_GROUP_COLOR_SGIS 0x8356 -#define GL_PIXEL_TEX_GEN_SGIX 0x8139 -#define GL_PIXEL_TEX_GEN_MODE_SGIX 0x832B -#define GL_PACK_SKIP_VOLUMES_SGIS 0x8130 -#define GL_PACK_IMAGE_DEPTH_SGIS 0x8131 -#define GL_UNPACK_SKIP_VOLUMES_SGIS 0x8132 -#define GL_UNPACK_IMAGE_DEPTH_SGIS 0x8133 -#define GL_TEXTURE_4D_SGIS 0x8134 -#define GL_PROXY_TEXTURE_4D_SGIS 0x8135 -#define GL_TEXTURE_4DSIZE_SGIS 0x8136 -#define GL_TEXTURE_WRAP_Q_SGIS 0x8137 -#define GL_MAX_4D_TEXTURE_SIZE_SGIS 0x8138 -#define GL_TEXTURE_4D_BINDING_SGIS 0x814F -#define GL_TEXTURE_COLOR_TABLE_SGI 0x80BC -#define GL_PROXY_TEXTURE_COLOR_TABLE_SGI 0x80BD -#define GL_CMYK_EXT 0x800C -#define GL_CMYKA_EXT 0x800D -#define GL_PACK_CMYK_HINT_EXT 0x800E -#define GL_UNPACK_CMYK_HINT_EXT 0x800F -#define GL_TEXTURE_PRIORITY_EXT 0x8066 -#define GL_TEXTURE_RESIDENT_EXT 0x8067 -#define GL_TEXTURE_1D_BINDING_EXT 0x8068 -#define GL_TEXTURE_2D_BINDING_EXT 0x8069 -#define GL_TEXTURE_3D_BINDING_EXT 0x806A -#define GL_DETAIL_TEXTURE_2D_SGIS 0x8095 -#define GL_DETAIL_TEXTURE_2D_BINDING_SGIS 0x8096 -#define GL_LINEAR_DETAIL_SGIS 0x8097 -#define GL_LINEAR_DETAIL_ALPHA_SGIS 0x8098 -#define GL_LINEAR_DETAIL_COLOR_SGIS 0x8099 -#define GL_DETAIL_TEXTURE_LEVEL_SGIS 0x809A -#define GL_DETAIL_TEXTURE_MODE_SGIS 0x809B -#define GL_DETAIL_TEXTURE_FUNC_POINTS_SGIS 0x809C -#define GL_LINEAR_SHARPEN_SGIS 0x80AD -#define GL_LINEAR_SHARPEN_ALPHA_SGIS 0x80AE -#define GL_LINEAR_SHARPEN_COLOR_SGIS 0x80AF -#define GL_SHARPEN_TEXTURE_FUNC_POINTS_SGIS 0x80B0 -#define GL_UNSIGNED_BYTE_3_3_2_EXT 0x8032 -#define GL_UNSIGNED_SHORT_4_4_4_4_EXT 0x8033 -#define GL_UNSIGNED_SHORT_5_5_5_1_EXT 0x8034 -#define GL_UNSIGNED_INT_8_8_8_8_EXT 0x8035 -#define GL_UNSIGNED_INT_10_10_10_2_EXT 0x8036 -#define GL_TEXTURE_MIN_LOD_SGIS 0x813A -#define GL_TEXTURE_MAX_LOD_SGIS 0x813B -#define GL_TEXTURE_BASE_LEVEL_SGIS 0x813C -#define GL_TEXTURE_MAX_LEVEL_SGIS 0x813D -#define GL_MULTISAMPLE_SGIS 0x809D -#define GL_SAMPLE_ALPHA_TO_MASK_SGIS 0x809E -#define GL_SAMPLE_ALPHA_TO_ONE_SGIS 0x809F -#define GL_SAMPLE_MASK_SGIS 0x80A0 -#define GL_1PASS_SGIS 0x80A1 -#define GL_2PASS_0_SGIS 0x80A2 -#define GL_2PASS_1_SGIS 0x80A3 -#define GL_4PASS_0_SGIS 0x80A4 -#define GL_4PASS_1_SGIS 0x80A5 -#define GL_4PASS_2_SGIS 0x80A6 -#define GL_4PASS_3_SGIS 0x80A7 -#define GL_SAMPLE_BUFFERS_SGIS 0x80A8 -#define GL_SAMPLES_SGIS 0x80A9 -#define GL_SAMPLE_MASK_VALUE_SGIS 0x80AA -#define GL_SAMPLE_MASK_INVERT_SGIS 0x80AB -#define GL_SAMPLE_PATTERN_SGIS 0x80AC -#define GL_RESCALE_NORMAL_EXT 0x803A -#define GL_VERTEX_ARRAY_EXT 0x8074 -#define GL_NORMAL_ARRAY_EXT 0x8075 -#define GL_COLOR_ARRAY_EXT 0x8076 -#define GL_INDEX_ARRAY_EXT 0x8077 -#define GL_TEXTURE_COORD_ARRAY_EXT 0x8078 -#define GL_EDGE_FLAG_ARRAY_EXT 0x8079 -#define GL_VERTEX_ARRAY_SIZE_EXT 0x807A -#define GL_VERTEX_ARRAY_TYPE_EXT 0x807B -#define GL_VERTEX_ARRAY_STRIDE_EXT 0x807C -#define GL_VERTEX_ARRAY_COUNT_EXT 0x807D -#define GL_NORMAL_ARRAY_TYPE_EXT 0x807E -#define GL_NORMAL_ARRAY_STRIDE_EXT 0x807F -#define GL_NORMAL_ARRAY_COUNT_EXT 0x8080 -#define GL_COLOR_ARRAY_SIZE_EXT 0x8081 -#define GL_COLOR_ARRAY_TYPE_EXT 0x8082 -#define GL_COLOR_ARRAY_STRIDE_EXT 0x8083 -#define GL_COLOR_ARRAY_COUNT_EXT 0x8084 -#define GL_INDEX_ARRAY_TYPE_EXT 0x8085 -#define GL_INDEX_ARRAY_STRIDE_EXT 0x8086 -#define GL_INDEX_ARRAY_COUNT_EXT 0x8087 -#define GL_TEXTURE_COORD_ARRAY_SIZE_EXT 0x8088 -#define GL_TEXTURE_COORD_ARRAY_TYPE_EXT 0x8089 -#define GL_TEXTURE_COORD_ARRAY_STRIDE_EXT 0x808A -#define GL_TEXTURE_COORD_ARRAY_COUNT_EXT 0x808B -#define GL_EDGE_FLAG_ARRAY_STRIDE_EXT 0x808C -#define GL_EDGE_FLAG_ARRAY_COUNT_EXT 0x808D -#define GL_VERTEX_ARRAY_POINTER_EXT 0x808E -#define GL_NORMAL_ARRAY_POINTER_EXT 0x808F -#define GL_COLOR_ARRAY_POINTER_EXT 0x8090 -#define GL_INDEX_ARRAY_POINTER_EXT 0x8091 -#define GL_TEXTURE_COORD_ARRAY_POINTER_EXT 0x8092 -#define GL_EDGE_FLAG_ARRAY_POINTER_EXT 0x8093 -#define GL_GENERATE_MIPMAP_SGIS 0x8191 -#define GL_GENERATE_MIPMAP_HINT_SGIS 0x8192 -#define GL_LINEAR_CLIPMAP_LINEAR_SGIX 0x8170 -#define GL_TEXTURE_CLIPMAP_CENTER_SGIX 0x8171 -#define GL_TEXTURE_CLIPMAP_FRAME_SGIX 0x8172 -#define GL_TEXTURE_CLIPMAP_OFFSET_SGIX 0x8173 -#define GL_TEXTURE_CLIPMAP_VIRTUAL_DEPTH_SGIX 0x8174 -#define GL_TEXTURE_CLIPMAP_LOD_OFFSET_SGIX 0x8175 -#define GL_TEXTURE_CLIPMAP_DEPTH_SGIX 0x8176 -#define GL_MAX_CLIPMAP_DEPTH_SGIX 0x8177 -#define GL_MAX_CLIPMAP_VIRTUAL_DEPTH_SGIX 0x8178 -#define GL_NEAREST_CLIPMAP_NEAREST_SGIX 0x844D -#define GL_NEAREST_CLIPMAP_LINEAR_SGIX 0x844E -#define GL_LINEAR_CLIPMAP_NEAREST_SGIX 0x844F -#define GL_TEXTURE_COMPARE_SGIX 0x819A -#define GL_TEXTURE_COMPARE_OPERATOR_SGIX 0x819B -#define GL_TEXTURE_LEQUAL_R_SGIX 0x819C -#define GL_TEXTURE_GEQUAL_R_SGIX 0x819D -#define GL_CLAMP_TO_EDGE_SGIS 0x812F -#define GL_CLAMP_TO_BORDER_SGIS 0x812D -#define GL_FUNC_ADD_EXT 0x8006 -#define GL_MIN_EXT 0x8007 -#define GL_MAX_EXT 0x8008 -#define GL_BLEND_EQUATION_EXT 0x8009 -#define GL_FUNC_SUBTRACT_EXT 0x800A -#define GL_FUNC_REVERSE_SUBTRACT_EXT 0x800B -#define GL_INTERLACE_SGIX 0x8094 -#define GL_PIXEL_TILE_BEST_ALIGNMENT_SGIX 0x813E -#define GL_PIXEL_TILE_CACHE_INCREMENT_SGIX 0x813F -#define GL_PIXEL_TILE_WIDTH_SGIX 0x8140 -#define GL_PIXEL_TILE_HEIGHT_SGIX 0x8141 -#define GL_PIXEL_TILE_GRID_WIDTH_SGIX 0x8142 -#define GL_PIXEL_TILE_GRID_HEIGHT_SGIX 0x8143 -#define GL_PIXEL_TILE_GRID_DEPTH_SGIX 0x8144 -#define GL_PIXEL_TILE_CACHE_SIZE_SGIX 0x8145 -#define GL_DUAL_ALPHA4_SGIS 0x8110 -#define GL_DUAL_ALPHA8_SGIS 0x8111 -#define GL_DUAL_ALPHA12_SGIS 0x8112 -#define GL_DUAL_ALPHA16_SGIS 0x8113 -#define GL_DUAL_LUMINANCE4_SGIS 0x8114 -#define GL_DUAL_LUMINANCE8_SGIS 0x8115 -#define GL_DUAL_LUMINANCE12_SGIS 0x8116 -#define GL_DUAL_LUMINANCE16_SGIS 0x8117 -#define GL_DUAL_INTENSITY4_SGIS 0x8118 -#define GL_DUAL_INTENSITY8_SGIS 0x8119 -#define GL_DUAL_INTENSITY12_SGIS 0x811A -#define GL_DUAL_INTENSITY16_SGIS 0x811B -#define GL_DUAL_LUMINANCE_ALPHA4_SGIS 0x811C -#define GL_DUAL_LUMINANCE_ALPHA8_SGIS 0x811D -#define GL_QUAD_ALPHA4_SGIS 0x811E -#define GL_QUAD_ALPHA8_SGIS 0x811F -#define GL_QUAD_LUMINANCE4_SGIS 0x8120 -#define GL_QUAD_LUMINANCE8_SGIS 0x8121 -#define GL_QUAD_INTENSITY4_SGIS 0x8122 -#define GL_QUAD_INTENSITY8_SGIS 0x8123 -#define GL_DUAL_TEXTURE_SELECT_SGIS 0x8124 -#define GL_QUAD_TEXTURE_SELECT_SGIS 0x8125 -#define GL_SPRITE_SGIX 0x8148 -#define GL_SPRITE_MODE_SGIX 0x8149 -#define GL_SPRITE_AXIS_SGIX 0x814A -#define GL_SPRITE_TRANSLATION_SGIX 0x814B -#define GL_SPRITE_AXIAL_SGIX 0x814C -#define GL_SPRITE_OBJECT_ALIGNED_SGIX 0x814D -#define GL_SPRITE_EYE_ALIGNED_SGIX 0x814E -#define GL_TEXTURE_MULTI_BUFFER_HINT_SGIX 0x812E -#define GL_POINT_SIZE_MIN_EXT 0x8126 -#define GL_POINT_SIZE_MIN_SGIS 0x8126 -#define GL_POINT_SIZE_MAX_EXT 0x8127 -#define GL_POINT_SIZE_MAX_SGIS 0x8127 -#define GL_POINT_FADE_THRESHOLD_SIZE_EXT 0x8128 -#define GL_POINT_FADE_THRESHOLD_SIZE_SGIS 0x8128 -#define GL_DISTANCE_ATTENUATION_EXT 0x8129 -#define GL_DISTANCE_ATTENUATION_SGIS 0x8129 -#define GL_INSTRUMENT_BUFFER_POINTER_SGIX 0x8180 -#define GL_INSTRUMENT_MEASUREMENTS_SGIX 0x8181 -#define GL_POST_TEXTURE_FILTER_BIAS_SGIX 0x8179 -#define GL_POST_TEXTURE_FILTER_SCALE_SGIX 0x817A -#define GL_POST_TEXTURE_FILTER_BIAS_RANGE_SGIX 0x817B -#define GL_POST_TEXTURE_FILTER_SCALE_RANGE_SGIX 0x817C -#define GL_FRAMEZOOM_SGIX 0x818B -#define GL_FRAMEZOOM_FACTOR_SGIX 0x818C -#define GL_MAX_FRAMEZOOM_FACTOR_SGIX 0x818D -#define GL_REFERENCE_PLANE_SGIX 0x817D -#define GL_REFERENCE_PLANE_EQUATION_SGIX 0x817E -#define GL_DEPTH_COMPONENT16_SGIX 0x81A5 -#define GL_DEPTH_COMPONENT24_SGIX 0x81A6 -#define GL_DEPTH_COMPONENT32_SGIX 0x81A7 -#define GL_FOG_FUNC_SGIS 0x812A -#define GL_FOG_FUNC_POINTS_SGIS 0x812B -#define GL_MAX_FOG_FUNC_POINTS_SGIS 0x812C -#define GL_FOG_OFFSET_SGIX 0x8198 -#define GL_FOG_OFFSET_VALUE_SGIX 0x8199 -#define GL_IMAGE_SCALE_X_HP 0x8155 -#define GL_IMAGE_SCALE_Y_HP 0x8156 -#define GL_IMAGE_TRANSLATE_X_HP 0x8157 -#define GL_IMAGE_TRANSLATE_Y_HP 0x8158 -#define GL_IMAGE_ROTATE_ANGLE_HP 0x8159 -#define GL_IMAGE_ROTATE_ORIGIN_X_HP 0x815A -#define GL_IMAGE_ROTATE_ORIGIN_Y_HP 0x815B -#define GL_IMAGE_MAG_FILTER_HP 0x815C -#define GL_IMAGE_MIN_FILTER_HP 0x815D -#define GL_IMAGE_CUBIC_WEIGHT_HP 0x815E -#define GL_CUBIC_HP 0x815F -#define GL_AVERAGE_HP 0x8160 -#define GL_IMAGE_TRANSFORM_2D_HP 0x8161 -#define GL_POST_IMAGE_TRANSFORM_COLOR_TABLE_HP 0x8162 -#define GL_PROXY_POST_IMAGE_TRANSFORM_COLOR_TABLE_HP 0x8163 -#define GL_IGNORE_BORDER_HP 0x8150 -#define GL_CONSTANT_BORDER_HP 0x8151 -#define GL_REPLICATE_BORDER_HP 0x8153 -#define GL_CONVOLUTION_BORDER_COLOR_HP 0x8154 -#define GL_TEXTURE_ENV_BIAS_SGIX 0x80BE -#define GL_VERTEX_DATA_HINT_PGI 0x1A22A -#define GL_VERTEX_CONSISTENT_HINT_PGI 0x1A22B -#define GL_MATERIAL_SIDE_HINT_PGI 0x1A22C -#define GL_MAX_VERTEX_HINT_PGI 0x1A22D -#define GL_COLOR3_BIT_PGI 0x00010000 -#define GL_COLOR4_BIT_PGI 0x00020000 -#define GL_EDGEFLAG_BIT_PGI 0x00040000 -#define GL_INDEX_BIT_PGI 0x00080000 -#define GL_MAT_AMBIENT_BIT_PGI 0x00100000 -#define GL_MAT_AMBIENT_AND_DIFFUSE_BIT_PGI 0x00200000 -#define GL_MAT_DIFFUSE_BIT_PGI 0x00400000 -#define GL_MAT_EMISSION_BIT_PGI 0x00800000 -#define GL_MAT_COLOR_INDEXES_BIT_PGI 0x01000000 -#define GL_MAT_SHININESS_BIT_PGI 0x02000000 -#define GL_MAT_SPECULAR_BIT_PGI 0x04000000 -#define GL_NORMAL_BIT_PGI 0x08000000 -#define GL_TEXCOORD1_BIT_PGI 0x10000000 -#define GL_TEXCOORD2_BIT_PGI 0x20000000 -#define GL_TEXCOORD3_BIT_PGI 0x40000000 -#define GL_TEXCOORD4_BIT_PGI 0x80000000 -#define GL_VERTEX23_BIT_PGI 0x00000004 -#define GL_VERTEX4_BIT_PGI 0x00000008 -#define GL_PREFER_DOUBLEBUFFER_HINT_PGI 0x1A1F8 -#define GL_CONSERVE_MEMORY_HINT_PGI 0x1A1FD -#define GL_RECLAIM_MEMORY_HINT_PGI 0x1A1FE -#define GL_NATIVE_GRAPHICS_HANDLE_PGI 0x1A202 -#define GL_NATIVE_GRAPHICS_BEGIN_HINT_PGI 0x1A203 -#define GL_NATIVE_GRAPHICS_END_HINT_PGI 0x1A204 -#define GL_ALWAYS_FAST_HINT_PGI 0x1A20C -#define GL_ALWAYS_SOFT_HINT_PGI 0x1A20D -#define GL_ALLOW_DRAW_OBJ_HINT_PGI 0x1A20E -#define GL_ALLOW_DRAW_WIN_HINT_PGI 0x1A20F -#define GL_ALLOW_DRAW_FRG_HINT_PGI 0x1A210 -#define GL_ALLOW_DRAW_MEM_HINT_PGI 0x1A211 -#define GL_STRICT_DEPTHFUNC_HINT_PGI 0x1A216 -#define GL_STRICT_LIGHTING_HINT_PGI 0x1A217 -#define GL_STRICT_SCISSOR_HINT_PGI 0x1A218 -#define GL_FULL_STIPPLE_HINT_PGI 0x1A219 -#define GL_CLIP_NEAR_HINT_PGI 0x1A220 -#define GL_CLIP_FAR_HINT_PGI 0x1A221 -#define GL_WIDE_LINE_HINT_PGI 0x1A222 -#define GL_BACK_NORMALS_HINT_PGI 0x1A223 -#define GL_COLOR_INDEX1_EXT 0x80E2 -#define GL_COLOR_INDEX2_EXT 0x80E3 -#define GL_COLOR_INDEX4_EXT 0x80E4 -#define GL_COLOR_INDEX8_EXT 0x80E5 -#define GL_COLOR_INDEX12_EXT 0x80E6 -#define GL_COLOR_INDEX16_EXT 0x80E7 -#define GL_TEXTURE_INDEX_SIZE_EXT 0x80ED -#define GL_CLIP_VOLUME_CLIPPING_HINT_EXT 0x80F0 -#define GL_LIST_PRIORITY_SGIX 0x8182 -#define GL_IR_INSTRUMENT1_SGIX 0x817F -#define GL_CALLIGRAPHIC_FRAGMENT_SGIX 0x8183 -#define GL_TEXTURE_LOD_BIAS_S_SGIX 0x818E -#define GL_TEXTURE_LOD_BIAS_T_SGIX 0x818F -#define GL_TEXTURE_LOD_BIAS_R_SGIX 0x8190 -#define GL_SHADOW_AMBIENT_SGIX 0x80BF -#define GL_INDEX_MATERIAL_EXT 0x81B8 -#define GL_INDEX_MATERIAL_PARAMETER_EXT 0x81B9 -#define GL_INDEX_MATERIAL_FACE_EXT 0x81BA -#define GL_INDEX_TEST_EXT 0x81B5 -#define GL_INDEX_TEST_FUNC_EXT 0x81B6 -#define GL_INDEX_TEST_REF_EXT 0x81B7 -#define GL_IUI_V2F_EXT 0x81AD -#define GL_IUI_V3F_EXT 0x81AE -#define GL_IUI_N3F_V2F_EXT 0x81AF -#define GL_IUI_N3F_V3F_EXT 0x81B0 -#define GL_T2F_IUI_V2F_EXT 0x81B1 -#define GL_T2F_IUI_V3F_EXT 0x81B2 -#define GL_T2F_IUI_N3F_V2F_EXT 0x81B3 -#define GL_T2F_IUI_N3F_V3F_EXT 0x81B4 -#define GL_ARRAY_ELEMENT_LOCK_FIRST_EXT 0x81A8 -#define GL_ARRAY_ELEMENT_LOCK_COUNT_EXT 0x81A9 -#define GL_CULL_VERTEX_EXT 0x81AA -#define GL_CULL_VERTEX_EYE_POSITION_EXT 0x81AB -#define GL_CULL_VERTEX_OBJECT_POSITION_EXT 0x81AC -#define GL_YCRCB_422_SGIX 0x81BB -#define GL_YCRCB_444_SGIX 0x81BC -#define GL_FRAGMENT_LIGHTING_SGIX 0x8400 -#define GL_FRAGMENT_COLOR_MATERIAL_SGIX 0x8401 -#define GL_FRAGMENT_COLOR_MATERIAL_FACE_SGIX 0x8402 -#define GL_FRAGMENT_COLOR_MATERIAL_PARAMETER_SGIX 0x8403 -#define GL_MAX_FRAGMENT_LIGHTS_SGIX 0x8404 -#define GL_MAX_ACTIVE_LIGHTS_SGIX 0x8405 -#define GL_CURRENT_RASTER_NORMAL_SGIX 0x8406 -#define GL_LIGHT_ENV_MODE_SGIX 0x8407 -#define GL_FRAGMENT_LIGHT_MODEL_LOCAL_VIEWER_SGIX 0x8408 -#define GL_FRAGMENT_LIGHT_MODEL_TWO_SIDE_SGIX 0x8409 -#define GL_FRAGMENT_LIGHT_MODEL_AMBIENT_SGIX 0x840A -#define GL_FRAGMENT_LIGHT_MODEL_NORMAL_INTERPOLATION_SGIX 0x840B -#define GL_FRAGMENT_LIGHT0_SGIX 0x840C -#define GL_FRAGMENT_LIGHT1_SGIX 0x840D -#define GL_FRAGMENT_LIGHT2_SGIX 0x840E -#define GL_FRAGMENT_LIGHT3_SGIX 0x840F -#define GL_FRAGMENT_LIGHT4_SGIX 0x8410 -#define GL_FRAGMENT_LIGHT5_SGIX 0x8411 -#define GL_FRAGMENT_LIGHT6_SGIX 0x8412 -#define GL_FRAGMENT_LIGHT7_SGIX 0x8413 -#define GL_RASTER_POSITION_UNCLIPPED_IBM 0x19262 -#define GL_TEXTURE_LIGHTING_MODE_HP 0x8167 -#define GL_TEXTURE_POST_SPECULAR_HP 0x8168 -#define GL_TEXTURE_PRE_SPECULAR_HP 0x8169 -#define GL_MAX_ELEMENTS_VERTICES_EXT 0x80E8 -#define GL_MAX_ELEMENTS_INDICES_EXT 0x80E9 -#define GL_PHONG_WIN 0x80EA -#define GL_PHONG_HINT_WIN 0x80EB -#define GL_FOG_SPECULAR_TEXTURE_WIN 0x80EC -#define GL_FRAGMENT_MATERIAL_EXT 0x8349 -#define GL_FRAGMENT_NORMAL_EXT 0x834A -#define GL_FRAGMENT_COLOR_EXT 0x834C -#define GL_ATTENUATION_EXT 0x834D -#define GL_SHADOW_ATTENUATION_EXT 0x834E -#define GL_TEXTURE_APPLICATION_MODE_EXT 0x834F -#define GL_TEXTURE_LIGHT_EXT 0x8350 -#define GL_TEXTURE_MATERIAL_FACE_EXT 0x8351 -#define GL_TEXTURE_MATERIAL_PARAMETER_EXT 0x8352 -/* reuse GL_FRAGMENT_DEPTH_EXT */ -#define GL_ALPHA_MIN_SGIX 0x8320 -#define GL_ALPHA_MAX_SGIX 0x8321 -#define GL_BGR_EXT 0x80E0 -#define GL_BGRA_EXT 0x80E1 -#define GL_PARALLEL_ARRAYS_INTEL 0x83F4 -#define GL_VERTEX_ARRAY_PARALLEL_POINTERS_INTEL 0x83F5 -#define GL_NORMAL_ARRAY_PARALLEL_POINTERS_INTEL 0x83F6 -#define GL_COLOR_ARRAY_PARALLEL_POINTERS_INTEL 0x83F7 -#define GL_TEXTURE_COORD_ARRAY_PARALLEL_POINTERS_INTEL 0x83F8 -#define GL_OCCLUSION_TEST_HP 0x8165 -#define GL_OCCLUSION_TEST_RESULT_HP 0x8166 -#define GL_PIXEL_TRANSFORM_2D_EXT 0x8330 -#define GL_PIXEL_MAG_FILTER_EXT 0x8331 -#define GL_PIXEL_MIN_FILTER_EXT 0x8332 -#define GL_PIXEL_CUBIC_WEIGHT_EXT 0x8333 -#define GL_CUBIC_EXT 0x8334 -#define GL_AVERAGE_EXT 0x8335 -#define GL_PIXEL_TRANSFORM_2D_STACK_DEPTH_EXT 0x8336 -#define GL_MAX_PIXEL_TRANSFORM_2D_STACK_DEPTH_EXT 0x8337 -#define GL_PIXEL_TRANSFORM_2D_MATRIX_EXT 0x8338 -#define GL_SHARED_TEXTURE_PALETTE_EXT 0x81FB -#define GL_LIGHT_MODEL_COLOR_CONTROL_EXT 0x81F8 -#define GL_SINGLE_COLOR_EXT 0x81F9 -#define GL_SEPARATE_SPECULAR_COLOR_EXT 0x81FA -#define GL_COLOR_SUM_EXT 0x8458 -#define GL_CURRENT_SECONDARY_COLOR_EXT 0x8459 -#define GL_SECONDARY_COLOR_ARRAY_SIZE_EXT 0x845A -#define GL_SECONDARY_COLOR_ARRAY_TYPE_EXT 0x845B -#define GL_SECONDARY_COLOR_ARRAY_STRIDE_EXT 0x845C -#define GL_SECONDARY_COLOR_ARRAY_POINTER_EXT 0x845D -#define GL_SECONDARY_COLOR_ARRAY_EXT 0x845E -#define GL_PERTURB_EXT 0x85AE -#define GL_TEXTURE_NORMAL_EXT 0x85AF -#define GL_FOG_COORDINATE_SOURCE_EXT 0x8450 -#define GL_FOG_COORDINATE_EXT 0x8451 -#define GL_FRAGMENT_DEPTH_EXT 0x8452 -#define GL_CURRENT_FOG_COORDINATE_EXT 0x8453 -#define GL_FOG_COORDINATE_ARRAY_TYPE_EXT 0x8454 -#define GL_FOG_COORDINATE_ARRAY_STRIDE_EXT 0x8455 -#define GL_FOG_COORDINATE_ARRAY_POINTER_EXT 0x8456 -#define GL_FOG_COORDINATE_ARRAY_EXT 0x8457 -#define GL_SCREEN_COORDINATES_REND 0x8490 -#define GL_INVERTED_SCREEN_W_REND 0x8491 -#define GL_TANGENT_ARRAY_EXT 0x8439 -#define GL_BINORMAL_ARRAY_EXT 0x843A -#define GL_CURRENT_TANGENT_EXT 0x843B -#define GL_CURRENT_BINORMAL_EXT 0x843C -#define GL_TANGENT_ARRAY_TYPE_EXT 0x843E -#define GL_TANGENT_ARRAY_STRIDE_EXT 0x843F -#define GL_BINORMAL_ARRAY_TYPE_EXT 0x8440 -#define GL_BINORMAL_ARRAY_STRIDE_EXT 0x8441 -#define GL_TANGENT_ARRAY_POINTER_EXT 0x8442 -#define GL_BINORMAL_ARRAY_POINTER_EXT 0x8443 -#define GL_MAP1_TANGENT_EXT 0x8444 -#define GL_MAP2_TANGENT_EXT 0x8445 -#define GL_MAP1_BINORMAL_EXT 0x8446 -#define GL_MAP2_BINORMAL_EXT 0x8447 -#define GL_COMBINE_EXT 0x8570 -#define GL_COMBINE_RGB_EXT 0x8571 -#define GL_COMBINE_ALPHA_EXT 0x8572 -#define GL_RGB_SCALE_EXT 0x8573 -#define GL_ADD_SIGNED_EXT 0x8574 -#define GL_INTERPOLATE_EXT 0x8575 -#define GL_CONSTANT_EXT 0x8576 -#define GL_PRIMARY_COLOR_EXT 0x8577 -#define GL_PREVIOUS_EXT 0x8578 -#define GL_SOURCE0_RGB_EXT 0x8580 -#define GL_SOURCE1_RGB_EXT 0x8581 -#define GL_SOURCE2_RGB_EXT 0x8582 -#define GL_SOURCE3_RGB_EXT 0x8583 -#define GL_SOURCE4_RGB_EXT 0x8584 -#define GL_SOURCE5_RGB_EXT 0x8585 -#define GL_SOURCE6_RGB_EXT 0x8586 -#define GL_SOURCE7_RGB_EXT 0x8587 -#define GL_SOURCE0_ALPHA_EXT 0x8588 -#define GL_SOURCE1_ALPHA_EXT 0x8589 -#define GL_SOURCE2_ALPHA_EXT 0x858A -#define GL_SOURCE3_ALPHA_EXT 0x858B -#define GL_SOURCE4_ALPHA_EXT 0x858C -#define GL_SOURCE5_ALPHA_EXT 0x858D -#define GL_SOURCE6_ALPHA_EXT 0x858E -#define GL_SOURCE7_ALPHA_EXT 0x858F -#define GL_OPERAND0_RGB_EXT 0x8590 -#define GL_OPERAND1_RGB_EXT 0x8591 -#define GL_OPERAND2_RGB_EXT 0x8592 -#define GL_OPERAND3_RGB_EXT 0x8593 -#define GL_OPERAND4_RGB_EXT 0x8594 -#define GL_OPERAND5_RGB_EXT 0x8595 -#define GL_OPERAND6_RGB_EXT 0x8596 -#define GL_OPERAND7_RGB_EXT 0x8597 -#define GL_OPERAND0_ALPHA_EXT 0x8598 -#define GL_OPERAND1_ALPHA_EXT 0x8599 -#define GL_OPERAND2_ALPHA_EXT 0x859A -#define GL_OPERAND3_ALPHA_EXT 0x859B -#define GL_OPERAND4_ALPHA_EXT 0x859C -#define GL_OPERAND5_ALPHA_EXT 0x859D -#define GL_OPERAND6_ALPHA_EXT 0x859E -#define GL_OPERAND7_ALPHA_EXT 0x859F -#define GL_LIGHT_MODEL_SPECULAR_VECTOR_APPLE 0x85B0 -#define GL_TRANSFORM_HINT_APPLE 0x85B1 -#define GL_FOG_SCALE_SGIX 0x81FC -#define GL_FOG_SCALE_VALUE_SGIX 0x81FD -#define GL_UNPACK_CONSTANT_DATA_SUNX 0x81D5 -#define GL_TEXTURE_CONSTANT_DATA_SUNX 0x81D6 -#define GL_GLOBAL_ALPHA_SUN 0x81D9 -#define GL_GLOBAL_ALPHA_FACTOR_SUN 0x81DA -#define GL_RESTART_SUN 0x01 -#define GL_REPLACE_MIDDLE_SUN 0x02 -#define GL_REPLACE_OLDEST_SUN 0x03 -#define GL_TRIANGLE_LIST_SUN 0x81D7 -#define GL_REPLACEMENT_CODE_SUN 0x81D8 -#define GL_REPLACEMENT_CODE_ARRAY_SUN 0x85C0 -#define GL_REPLACEMENT_CODE_ARRAY_TYPE_SUN 0x85C1 -#define GL_REPLACEMENT_CODE_ARRAY_STRIDE_SUN 0x85C2 -#define GL_REPLACEMENT_CODE_ARRAY_POINTER_SUN 0x85C3 -#define GL_R1UI_V3F_SUN 0x85C4 -#define GL_R1UI_C4UB_V3F_SUN 0x85C5 -#define GL_R1UI_C3F_V3F_SUN 0x85C6 -#define GL_R1UI_N3F_V3F_SUN 0x85C7 -#define GL_R1UI_C4F_N3F_V3F_SUN 0x85C8 -#define GL_R1UI_T2F_V3F_SUN 0x85C9 -#define GL_R1UI_T2F_N3F_V3F_SUN 0x85CA -#define GL_R1UI_T2F_C4F_N3F_V3F_SUN 0x85CB -#define GL_BLEND_DST_RGB_EXT 0x80C8 -#define GL_BLEND_SRC_RGB_EXT 0x80C9 -#define GL_BLEND_DST_ALPHA_EXT 0x80CA -#define GL_BLEND_SRC_ALPHA_EXT 0x80CB -#define GL_RED_MIN_CLAMP_INGR 0x8560 -#define GL_GREEN_MIN_CLAMP_INGR 0x8561 -#define GL_BLUE_MIN_CLAMP_INGR 0x8562 -#define GL_ALPHA_MIN_CLAMP_INGR 0x8563 -#define GL_RED_MAX_CLAMP_INGR 0x8564 -#define GL_GREEN_MAX_CLAMP_INGR 0x8565 -#define GL_BLUE_MAX_CLAMP_INGR 0x8566 -#define GL_ALPHA_MAX_CLAMP_INGR 0x8567 -#define GL_INTERLACE_READ_INGR 0x8568 -#define GL_INCR_WRAP_EXT 0x8507 -#define GL_DECR_WRAP_EXT 0x8508 -#define GL_422_EXT 0x80CC -#define GL_422_REV_EXT 0x80CD -#define GL_422_AVERAGE_EXT 0x80CE -#define GL_422_REV_AVERAGE_EXT 0x80CF -#define GL_NORMAL_MAP_NV 0x8511 -#define GL_REFLECTION_MAP_NV 0x8512 -#define GL_NORMAL_MAP_EXT 0x8511 -#define GL_REFLECTION_MAP_EXT 0x8512 -#define GL_TEXTURE_CUBE_MAP_EXT 0x8513 -#define GL_TEXTURE_BINDING_CUBE_MAP_EXT 0x8514 -#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_EXT 0x8515 -#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_EXT 0x8516 -#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_EXT 0x8517 -#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT 0x8518 -#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_EXT 0x8519 -#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT 0x851A -#define GL_PROXY_TEXTURE_CUBE_MAP_EXT 0x851B -#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_EXT 0x851C -#define GL_WRAP_BORDER_SUN 0x81D4 -#define GL_MAX_TEXTURE_LOD_BIAS_EXT 0x84FD -#define GL_TEXTURE_FILTER_CONTROL_EXT 0x8500 -#define GL_TEXTURE_LOD_BIAS_EXT 0x8501 -#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE -#define GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF -#define GL_MODELVIEW0_STACK_DEPTH_EXT GL_MODELVIEW_STACK_DEPTH -#define GL_MODELVIEW1_STACK_DEPTH_EXT 0x8502 -#define GL_MODELVIEW0_MATRIX_EXT GL_MODELVIEW_MATRIX -#define GL_MODELVIEW_MATRIX1_EXT 0x8506 -#define GL_VERTEX_WEIGHTING_EXT 0x8509 -#define GL_MODELVIEW0_EXT GL_MODELVIEW -#define GL_MODELVIEW1_EXT 0x850A -#define GL_CURRENT_VERTEX_WEIGHT_EXT 0x850B -#define GL_VERTEX_WEIGHT_ARRAY_EXT 0x850C -#define GL_VERTEX_WEIGHT_ARRAY_SIZE_EXT 0x850D -#define GL_VERTEX_WEIGHT_ARRAY_TYPE_EXT 0x850E -#define GL_VERTEX_WEIGHT_ARRAY_STRIDE_EXT 0x850F -#define GL_VERTEX_WEIGHT_ARRAY_POINTER_EXT 0x8510 -#define GL_MAX_SHININESS_NV 0x8504 -#define GL_MAX_SPOT_EXPONENT_NV 0x8505 -#define GL_VERTEX_ARRAY_RANGE_NV 0x851D -#define GL_VERTEX_ARRAY_RANGE_LENGTH_NV 0x851E -#define GL_VERTEX_ARRAY_RANGE_VALID_NV 0x851F -#define GL_MAX_VERTEX_ARRAY_RANGE_ELEMENT_NV 0x8520 -#define GL_VERTEX_ARRAY_RANGE_POINTER_NV 0x8521 -#define GL_REGISTER_COMBINERS_NV 0x8522 -#define GL_VARIABLE_A_NV 0x8523 -#define GL_VARIABLE_B_NV 0x8524 -#define GL_VARIABLE_C_NV 0x8525 -#define GL_VARIABLE_D_NV 0x8526 -#define GL_VARIABLE_E_NV 0x8527 -#define GL_VARIABLE_F_NV 0x8528 -#define GL_VARIABLE_G_NV 0x8529 -#define GL_CONSTANT_COLOR0_NV 0x852A -#define GL_CONSTANT_COLOR1_NV 0x852B -#define GL_PRIMARY_COLOR_NV 0x852C -#define GL_SECONDARY_COLOR_NV 0x852D -#define GL_SPARE0_NV 0x852E -#define GL_SPARE1_NV 0x852F -#define GL_DISCARD_NV 0x8530 -#define GL_E_TIMES_F_NV 0x8531 -#define GL_SPARE0_PLUS_SECONDARY_COLOR_NV 0x8532 -#define GL_UNSIGNED_IDENTITY_NV 0x8536 -#define GL_UNSIGNED_INVERT_NV 0x8537 -#define GL_EXPAND_NORMAL_NV 0x8538 -#define GL_EXPAND_NEGATE_NV 0x8539 -#define GL_HALF_BIAS_NORMAL_NV 0x853A -#define GL_HALF_BIAS_NEGATE_NV 0x853B -#define GL_SIGNED_IDENTITY_NV 0x853C -#define GL_SIGNED_NEGATE_NV 0x853D -#define GL_SCALE_BY_TWO_NV 0x853E -#define GL_SCALE_BY_FOUR_NV 0x853F -#define GL_SCALE_BY_ONE_HALF_NV 0x8540 -#define GL_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x8541 -#define GL_COMBINER_INPUT_NV 0x8542 -#define GL_COMBINER_MAPPING_NV 0x8543 -#define GL_COMBINER_COMPONENT_USAGE_NV 0x8544 -#define GL_COMBINER_AB_DOT_PRODUCT_NV 0x8545 -#define GL_COMBINER_CD_DOT_PRODUCT_NV 0x8546 -#define GL_COMBINER_MUX_SUM_NV 0x8547 -#define GL_COMBINER_SCALE_NV 0x8548 -#define GL_COMBINER_BIAS_NV 0x8549 -#define GL_COMBINER_AB_OUTPUT_NV 0x854A -#define GL_COMBINER_CD_OUTPUT_NV 0x854B -#define GL_COMBINER_SUM_OUTPUT_NV 0x854C -#define GL_MAX_GENERAL_COMBINERS_NV 0x854D -#define GL_NUM_GENERAL_COMBINERS_NV 0x854E -#define GL_COLOR_SUM_CLAMP_NV 0x854F -#define GL_COMBINER0_NV 0x8550 -#define GL_COMBINER1_NV 0x8551 -#define GL_COMBINER2_NV 0x8552 -#define GL_COMBINER3_NV 0x8553 -#define GL_COMBINER4_NV 0x8554 -#define GL_COMBINER5_NV 0x8555 -#define GL_COMBINER6_NV 0x8556 -#define GL_COMBINER7_NV 0x8557 -/* reuse GL_TEXTURE0_ARB */ -/* reuse GL_TEXTURE1_ARB */ -/* reuse GL_ZERO */ -/* reuse GL_NONE */ -/* reuse GL_FOG */ -#define GL_FOG_DISTANCE_MODE_NV 0x855A -#define GL_EYE_RADIAL_NV 0x855B -#define GL_EYE_PLANE_ABSOLUTE_NV 0x855C -/* reuse GL_EYE_PLANE */ -#define GL_EMBOSS_LIGHT_NV 0x855D -#define GL_EMBOSS_CONSTANT_NV 0x855E -#define GL_EMBOSS_MAP_NV 0x855F -#define GL_COMBINE4_NV 0x8503 -#define GL_SOURCE3_RGB_NV 0x8583 -#define GL_SOURCE3_ALPHA_NV 0x858B -#define GL_OPERAND3_RGB_NV 0x8593 -#define GL_OPERAND3_ALPHA_NV 0x859B -#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 -#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 -#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2 -#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3 -#define GL_CULL_VERTEX_IBM 103050 -#define GL_VERTEX_ARRAY_LIST_IBM 103070 -#define GL_NORMAL_ARRAY_LIST_IBM 103071 -#define GL_COLOR_ARRAY_LIST_IBM 103072 -#define GL_INDEX_ARRAY_LIST_IBM 103073 -#define GL_TEXTURE_COORD_ARRAY_LIST_IBM 103074 -#define GL_EDGE_FLAG_ARRAY_LIST_IBM 103075 -#define GL_FOG_COORDINATE_ARRAY_LIST_IBM 103076 -#define GL_SECONDARY_COLOR_ARRAY_LIST_IBM 103077 -#define GL_VERTEX_ARRAY_LIST_STRIDE_IBM 103080 -#define GL_NORMAL_ARRAY_LIST_STRIDE_IBM 103081 -#define GL_COLOR_ARRAY_LIST_STRIDE_IBM 103082 -#define GL_INDEX_ARRAY_LIST_STRIDE_IBM 103083 -#define GL_TEXTURE_COORD_ARRAY_LIST_STRIDE_IBM 103084 -#define GL_EDGE_FLAG_ARRAY_LIST_STRIDE_IBM 103085 -#define GL_FOG_COORDINATE_ARRAY_LIST_STRIDE_IBM 103086 -#define GL_SECONDARY_COLOR_ARRAY_LIST_STRIDE_IBM 103087 -#define GL_PACK_SUBSAMPLE_RATE_SGIX 0x85A0 -#define GL_UNPACK_SUBSAMPLE_RATE_SGIX 0x85A1 -#define GL_PIXEL_SUBSAMPLE_4444_SGIX 0x85A2 -#define GL_PIXEL_SUBSAMPLE_2424_SGIX 0x85A3 -#define GL_PIXEL_SUBSAMPLE_4242_SGIX 0x85A4 -#define GL_YCRCB_SGIX 0x8318 -#define GL_YCRCBA_SGIX 0x8319 -#define GL_DEPTH_PASS_INSTRUMENT_SGIX 0x8310 -#define GL_DEPTH_PASS_INSTRUMENT_COUNTERS_SGIX 0x8311 -#define GL_DEPTH_PASS_INSTRUMENT_MAX_SGIX 0x8312 -#define GL_COMPRESSED_RGB_FXT1_3DFX 0x86B0 -#define GL_COMPRESSED_RGBA_FXT1_3DFX 0x86B1 -#define GL_MULTISAMPLE_3DFX 0x86B2 -#define GL_SAMPLE_BUFFERS_3DFX 0x86B3 -#define GL_SAMPLES_3DFX 0x86B4 -#define GL_MULTISAMPLE_BIT_3DFX 0x20000000 -#define GL_MULTISAMPLE_EXT 0x809D -#define GL_SAMPLE_ALPHA_TO_MASK_EXT 0x809E -#define GL_SAMPLE_ALPHA_TO_ONE_EXT 0x809F -#define GL_SAMPLE_MASK_EXT 0x80A0 -#define GL_1PASS_EXT 0x80A1 -#define GL_2PASS_0_EXT 0x80A2 -#define GL_2PASS_1_EXT 0x80A3 -#define GL_4PASS_0_EXT 0x80A4 -#define GL_4PASS_1_EXT 0x80A5 -#define GL_4PASS_2_EXT 0x80A6 -#define GL_4PASS_3_EXT 0x80A7 -#define GL_SAMPLE_BUFFERS_EXT 0x80A8 -#define GL_SAMPLES_EXT 0x80A9 -#define GL_SAMPLE_MASK_VALUE_EXT 0x80AA -#define GL_SAMPLE_MASK_INVERT_EXT 0x80AB -#define GL_SAMPLE_PATTERN_EXT 0x80AC -#define GL_VERTEX_PRECLIP_SGIX 0x83EE -#define GL_VERTEX_PRECLIP_HINT_SGIX 0x83EF -#define GL_CONVOLUTION_HINT_SGIX 0x8316 -#define GL_PACK_RESAMPLE_SGIX 0x842C -#define GL_UNPACK_RESAMPLE_SGIX 0x842D -#define GL_RESAMPLE_REPLICATE_SGIX 0x842E -#define GL_RESAMPLE_ZERO_FILL_SGIX 0x842F -#define GL_RESAMPLE_DECIMATE_SGIX 0x8430 -#define GL_EYE_DISTANCE_TO_POINT_SGIS 0x81F0 -#define GL_OBJECT_DISTANCE_TO_POINT_SGIS 0x81F1 -#define GL_EYE_DISTANCE_TO_LINE_SGIS 0x81F2 -#define GL_OBJECT_DISTANCE_TO_LINE_SGIS 0x81F3 -#define GL_EYE_POINT_SGIS 0x81F4 -#define GL_OBJECT_POINT_SGIS 0x81F5 -#define GL_EYE_LINE_SGIS 0x81F6 -#define GL_OBJECT_LINE_SGIS 0x81F7 - - -/*************************************************************/ - -extern void APIENTRY glBlendColor (GLclampf, GLclampf, GLclampf, GLclampf); -typedef void (APIENTRY * PFNGLBLENDCOLORPROC) (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha); -extern void APIENTRY glBlendEquation (GLenum); -typedef void (APIENTRY * PFNGLBLENDEQUATIONPROC) (GLenum mode); -extern void APIENTRY glDrawRangeElements (GLenum, GLuint, GLuint, GLsizei, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLDRAWRANGEELEMENTSPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices); -extern void APIENTRY glColorTable (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCOLORTABLEPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *table); -extern void APIENTRY glColorTableParameterfv (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLCOLORTABLEPARAMETERFVPROC) (GLenum target, GLenum pname, const GLfloat *params); -extern void APIENTRY glColorTableParameteriv (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLCOLORTABLEPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params); -extern void APIENTRY glCopyColorTable (GLenum, GLenum, GLint, GLint, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCOLORTABLEPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width); -extern void APIENTRY glGetColorTable (GLenum, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEPROC) (GLenum target, GLenum format, GLenum type, GLvoid *table); -extern void APIENTRY glGetColorTableParameterfv (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetColorTableParameteriv (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glColorSubTable (GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCOLORSUBTABLEPROC) (GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const GLvoid *data); -extern void APIENTRY glCopyColorSubTable (GLenum, GLsizei, GLint, GLint, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCOLORSUBTABLEPROC) (GLenum target, GLsizei start, GLint x, GLint y, GLsizei width); -extern void APIENTRY glConvolutionFilter1D (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCONVOLUTIONFILTER1DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *image); -extern void APIENTRY glConvolutionFilter2D (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCONVOLUTIONFILTER2DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *image); -extern void APIENTRY glConvolutionParameterf (GLenum, GLenum, GLfloat); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERFPROC) (GLenum target, GLenum pname, GLfloat params); -extern void APIENTRY glConvolutionParameterfv (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERFVPROC) (GLenum target, GLenum pname, const GLfloat *params); -extern void APIENTRY glConvolutionParameteri (GLenum, GLenum, GLint); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERIPROC) (GLenum target, GLenum pname, GLint params); -extern void APIENTRY glConvolutionParameteriv (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params); -extern void APIENTRY glCopyConvolutionFilter1D (GLenum, GLenum, GLint, GLint, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCONVOLUTIONFILTER1DPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width); -extern void APIENTRY glCopyConvolutionFilter2D (GLenum, GLenum, GLint, GLint, GLsizei, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCONVOLUTIONFILTER2DPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height); -extern void APIENTRY glGetConvolutionFilter (GLenum, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETCONVOLUTIONFILTERPROC) (GLenum target, GLenum format, GLenum type, GLvoid *image); -extern void APIENTRY glGetConvolutionParameterfv (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETCONVOLUTIONPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetConvolutionParameteriv (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETCONVOLUTIONPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glGetSeparableFilter (GLenum, GLenum, GLenum, GLvoid *, GLvoid *, GLvoid *); -typedef void (APIENTRY * PFNGLGETSEPARABLEFILTERPROC) (GLenum target, GLenum format, GLenum type, GLvoid *row, GLvoid *column, GLvoid *span); -extern void APIENTRY glSeparableFilter2D (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *, const GLvoid *); -typedef void (APIENTRY * PFNGLSEPARABLEFILTER2DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *row, const GLvoid *column); -extern void APIENTRY glGetHistogram (GLenum, GLboolean, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETHISTOGRAMPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values); -extern void APIENTRY glGetHistogramParameterfv (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETHISTOGRAMPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetHistogramParameteriv (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETHISTOGRAMPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glGetMinmax (GLenum, GLboolean, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETMINMAXPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values); -extern void APIENTRY glGetMinmaxParameterfv (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETMINMAXPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetMinmaxParameteriv (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETMINMAXPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glHistogram (GLenum, GLsizei, GLenum, GLboolean); -typedef void (APIENTRY * PFNGLHISTOGRAMPROC) (GLenum target, GLsizei width, GLenum internalformat, GLboolean sink); -extern void APIENTRY glMinmax (GLenum, GLenum, GLboolean); -typedef void (APIENTRY * PFNGLMINMAXPROC) (GLenum target, GLenum internalformat, GLboolean sink); -extern void APIENTRY glResetHistogram (GLenum); -typedef void (APIENTRY * PFNGLRESETHISTOGRAMPROC) (GLenum target); -extern void APIENTRY glResetMinmax (GLenum); -typedef void (APIENTRY * PFNGLRESETMINMAXPROC) (GLenum target); -extern void APIENTRY glTexImage3D (GLenum, GLint, GLint, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXIMAGE3DPROC) (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels); -extern void APIENTRY glTexSubImage3D (GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid *pixels); -extern void APIENTRY glCopyTexSubImage3D (GLenum, GLint, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei); -typedef void (APIENTRY * PFNGLCOPYTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); -extern void APIENTRY glActiveTextureARB (GLenum); -typedef void (APIENTRY * PFNGLACTIVETEXTUREARBPROC) (GLenum texture); -extern void APIENTRY glClientActiveTextureARB (GLenum); -typedef void (APIENTRY * PFNGLCLIENTACTIVETEXTUREARBPROC) (GLenum texture); -extern void APIENTRY glMultiTexCoord1dARB (GLenum, GLdouble); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1DARBPROC) (GLenum target, GLdouble s); -extern void APIENTRY glMultiTexCoord1dvARB (GLenum, const GLdouble *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1DVARBPROC) (GLenum target, const GLdouble *v); -extern void APIENTRY glMultiTexCoord1fARB (GLenum, GLfloat); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1FARBPROC) (GLenum target, GLfloat s); -extern void APIENTRY glMultiTexCoord1fvARB (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1FVARBPROC) (GLenum target, const GLfloat *v); -extern void APIENTRY glMultiTexCoord1iARB (GLenum, GLint); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1IARBPROC) (GLenum target, GLint s); -extern void APIENTRY glMultiTexCoord1ivARB (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1IVARBPROC) (GLenum target, const GLint *v); -extern void APIENTRY glMultiTexCoord1sARB (GLenum, GLshort); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1SARBPROC) (GLenum target, GLshort s); -extern void APIENTRY glMultiTexCoord1svARB (GLenum, const GLshort *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD1SVARBPROC) (GLenum target, const GLshort *v); -extern void APIENTRY glMultiTexCoord2dARB (GLenum, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2DARBPROC) (GLenum target, GLdouble s, GLdouble t); -extern void APIENTRY glMultiTexCoord2dvARB (GLenum, const GLdouble *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2DVARBPROC) (GLenum target, const GLdouble *v); -extern void APIENTRY glMultiTexCoord2fARB (GLenum, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2FARBPROC) (GLenum target, GLfloat s, GLfloat t); -extern void APIENTRY glMultiTexCoord2fvARB (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2FVARBPROC) (GLenum target, const GLfloat *v); -extern void APIENTRY glMultiTexCoord2iARB (GLenum, GLint, GLint); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2IARBPROC) (GLenum target, GLint s, GLint t); -extern void APIENTRY glMultiTexCoord2ivARB (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2IVARBPROC) (GLenum target, const GLint *v); -extern void APIENTRY glMultiTexCoord2sARB (GLenum, GLshort, GLshort); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2SARBPROC) (GLenum target, GLshort s, GLshort t); -extern void APIENTRY glMultiTexCoord2svARB (GLenum, const GLshort *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD2SVARBPROC) (GLenum target, const GLshort *v); -extern void APIENTRY glMultiTexCoord3dARB (GLenum, GLdouble, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3DARBPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r); -extern void APIENTRY glMultiTexCoord3dvARB (GLenum, const GLdouble *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3DVARBPROC) (GLenum target, const GLdouble *v); -extern void APIENTRY glMultiTexCoord3fARB (GLenum, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3FARBPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r); -extern void APIENTRY glMultiTexCoord3fvARB (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3FVARBPROC) (GLenum target, const GLfloat *v); -extern void APIENTRY glMultiTexCoord3iARB (GLenum, GLint, GLint, GLint); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3IARBPROC) (GLenum target, GLint s, GLint t, GLint r); -extern void APIENTRY glMultiTexCoord3ivARB (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3IVARBPROC) (GLenum target, const GLint *v); -extern void APIENTRY glMultiTexCoord3sARB (GLenum, GLshort, GLshort, GLshort); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3SARBPROC) (GLenum target, GLshort s, GLshort t, GLshort r); -extern void APIENTRY glMultiTexCoord3svARB (GLenum, const GLshort *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD3SVARBPROC) (GLenum target, const GLshort *v); -extern void APIENTRY glMultiTexCoord4dARB (GLenum, GLdouble, GLdouble, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4DARBPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q); -extern void APIENTRY glMultiTexCoord4dvARB (GLenum, const GLdouble *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4DVARBPROC) (GLenum target, const GLdouble *v); -extern void APIENTRY glMultiTexCoord4fARB (GLenum, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4FARBPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q); -extern void APIENTRY glMultiTexCoord4fvARB (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4FVARBPROC) (GLenum target, const GLfloat *v); -extern void APIENTRY glMultiTexCoord4iARB (GLenum, GLint, GLint, GLint, GLint); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4IARBPROC) (GLenum target, GLint s, GLint t, GLint r, GLint q); -extern void APIENTRY glMultiTexCoord4ivARB (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4IVARBPROC) (GLenum target, const GLint *v); -extern void APIENTRY glMultiTexCoord4sARB (GLenum, GLshort, GLshort, GLshort, GLshort); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4SARBPROC) (GLenum target, GLshort s, GLshort t, GLshort r, GLshort q); -extern void APIENTRY glMultiTexCoord4svARB (GLenum, const GLshort *); -typedef void (APIENTRY * PFNGLMULTITEXCOORD4SVARBPROC) (GLenum target, const GLshort *v); -extern void APIENTRY glLoadTransposeMatrixfARB (const GLfloat *); -typedef void (APIENTRY * PFNGLLOADTRANSPOSEMATRIXFARBPROC) (const GLfloat *m); -extern void APIENTRY glLoadTransposeMatrixdARB (const GLdouble *); -typedef void (APIENTRY * PFNGLLOADTRANSPOSEMATRIXDARBPROC) (const GLdouble *m); -extern void APIENTRY glMultTransposeMatrixfARB (const GLfloat *); -typedef void (APIENTRY * PFNGLMULTTRANSPOSEMATRIXFARBPROC) (const GLfloat *m); -extern void APIENTRY glMultTransposeMatrixdARB (const GLdouble *); -typedef void (APIENTRY * PFNGLMULTTRANSPOSEMATRIXDARBPROC) (const GLdouble *m); -extern void APIENTRY glSampleCoverageARB (GLclampf, GLboolean); -typedef void (APIENTRY * PFNGLSAMPLECOVERAGEARBPROC) (GLclampf value, GLboolean invert); -extern void APIENTRY glSamplePassARB (GLenum); -typedef void (APIENTRY * PFNGLSAMPLEPASSARBPROC) (GLenum pass); -extern void APIENTRY glCompressedTexImage3DARB (GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLCOMPRESSEDTEXIMAGE3DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid *data); -extern void APIENTRY glCompressedTexImage2DARB (GLenum, GLint, GLenum, GLsizei, GLsizei, GLint, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLCOMPRESSEDTEXIMAGE2DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data); -extern void APIENTRY glCompressedTexImage1DARB (GLenum, GLint, GLenum, GLsizei, GLint, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLCOMPRESSEDTEXIMAGE1DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const GLvoid *data); -extern void APIENTRY glCompressedTexSubImage3DARB (GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *data); -extern void APIENTRY glCompressedTexSubImage2DARB (GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data); -extern void APIENTRY glCompressedTexSubImage1DARB (GLenum, GLint, GLint, GLsizei, GLenum, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *data); -extern void APIENTRY glGetCompressedTexImageARB (GLenum, GLint, void *); -typedef void (APIENTRY * PFNGLGETCOMPRESSEDTEXIMAGEARBPROC) (GLenum target, GLint level, void *img); -extern void APIENTRY glBlendColorEXT (GLclampf, GLclampf, GLclampf, GLclampf); -typedef void (APIENTRY * PFNGLBLENDCOLOREXTPROC) (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha); -extern void APIENTRY glPolygonOffsetEXT (GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLPOLYGONOFFSETEXTPROC) (GLfloat factor, GLfloat bias); -extern void APIENTRY glTexImage3DEXT (GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXIMAGE3DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels); -extern void APIENTRY glTexSubImage3DEXT (GLenum, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXSUBIMAGE3DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid *pixels); -extern void APIENTRY glGetTexFilterFuncSGIS (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETTEXFILTERFUNCSGISPROC) (GLenum target, GLenum filter, GLfloat *weights); -extern void APIENTRY glTexFilterFuncSGIS (GLenum, GLenum, GLsizei, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXFILTERFUNCSGISPROC) (GLenum target, GLenum filter, GLsizei n, const GLfloat *weights); -extern void APIENTRY glTexSubImage1DEXT (GLenum, GLint, GLint, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXSUBIMAGE1DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels); -extern void APIENTRY glTexSubImage2DEXT (GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXSUBIMAGE2DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels); -extern void APIENTRY glCopyTexImage1DEXT (GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLint); -typedef void (APIENTRY * PFNGLCOPYTEXIMAGE1DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border); -extern void APIENTRY glCopyTexImage2DEXT (GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLsizei, GLint); -typedef void (APIENTRY * PFNGLCOPYTEXIMAGE2DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); -extern void APIENTRY glCopyTexSubImage1DEXT (GLenum, GLint, GLint, GLint, GLint, GLsizei); -typedef void (APIENTRY * PFNGLCOPYTEXSUBIMAGE1DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width); -extern void APIENTRY glCopyTexSubImage2DEXT (GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei); -typedef void (APIENTRY * PFNGLCOPYTEXSUBIMAGE2DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); -extern void APIENTRY glCopyTexSubImage3DEXT (GLenum, GLint, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei); -typedef void (APIENTRY * PFNGLCOPYTEXSUBIMAGE3DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); -extern void APIENTRY glGetHistogramEXT (GLenum, GLboolean, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETHISTOGRAMEXTPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values); -extern void APIENTRY glGetHistogramParameterfvEXT (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETHISTOGRAMPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetHistogramParameterivEXT (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETHISTOGRAMPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glGetMinmaxEXT (GLenum, GLboolean, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETMINMAXEXTPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values); -extern void APIENTRY glGetMinmaxParameterfvEXT (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETMINMAXPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetMinmaxParameterivEXT (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETMINMAXPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glHistogramEXT (GLenum, GLsizei, GLenum, GLboolean); -typedef void (APIENTRY * PFNGLHISTOGRAMEXTPROC) (GLenum target, GLsizei width, GLenum internalformat, GLboolean sink); -extern void APIENTRY glMinmaxEXT (GLenum, GLenum, GLboolean); -typedef void (APIENTRY * PFNGLMINMAXEXTPROC) (GLenum target, GLenum internalformat, GLboolean sink); -extern void APIENTRY glResetHistogramEXT (GLenum); -typedef void (APIENTRY * PFNGLRESETHISTOGRAMEXTPROC) (GLenum target); -extern void APIENTRY glResetMinmaxEXT (GLenum); -typedef void (APIENTRY * PFNGLRESETMINMAXEXTPROC) (GLenum target); -extern void APIENTRY glConvolutionFilter1DEXT (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCONVOLUTIONFILTER1DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *image); -extern void APIENTRY glConvolutionFilter2DEXT (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCONVOLUTIONFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *image); -extern void APIENTRY glConvolutionParameterfEXT (GLenum, GLenum, GLfloat); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERFEXTPROC) (GLenum target, GLenum pname, GLfloat params); -extern void APIENTRY glConvolutionParameterfvEXT (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERFVEXTPROC) (GLenum target, GLenum pname, const GLfloat *params); -extern void APIENTRY glConvolutionParameteriEXT (GLenum, GLenum, GLint); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERIEXTPROC) (GLenum target, GLenum pname, GLint params); -extern void APIENTRY glConvolutionParameterivEXT (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLCONVOLUTIONPARAMETERIVEXTPROC) (GLenum target, GLenum pname, const GLint *params); -extern void APIENTRY glCopyConvolutionFilter1DEXT (GLenum, GLenum, GLint, GLint, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width); -extern void APIENTRY glCopyConvolutionFilter2DEXT (GLenum, GLenum, GLint, GLint, GLsizei, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height); -extern void APIENTRY glGetConvolutionFilterEXT (GLenum, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETCONVOLUTIONFILTEREXTPROC) (GLenum target, GLenum format, GLenum type, GLvoid *image); -extern void APIENTRY glGetConvolutionParameterfvEXT (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetConvolutionParameterivEXT (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glGetSeparableFilterEXT (GLenum, GLenum, GLenum, GLvoid *, GLvoid *, GLvoid *); -typedef void (APIENTRY * PFNGLGETSEPARABLEFILTEREXTPROC) (GLenum target, GLenum format, GLenum type, GLvoid *row, GLvoid *column, GLvoid *span); -extern void APIENTRY glSeparableFilter2DEXT (GLenum, GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *, const GLvoid *); -typedef void (APIENTRY * PFNGLSEPARABLEFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *row, const GLvoid *column); -extern void APIENTRY glColorTableSGI (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCOLORTABLESGIPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *table); -extern void APIENTRY glColorTableParameterfvSGI (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLCOLORTABLEPARAMETERFVSGIPROC) (GLenum target, GLenum pname, const GLfloat *params); -extern void APIENTRY glColorTableParameterivSGI (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLCOLORTABLEPARAMETERIVSGIPROC) (GLenum target, GLenum pname, const GLint *params); -extern void APIENTRY glCopyColorTableSGI (GLenum, GLenum, GLint, GLint, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCOLORTABLESGIPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width); -extern void APIENTRY glGetColorTableSGI (GLenum, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETCOLORTABLESGIPROC) (GLenum target, GLenum format, GLenum type, GLvoid *table); -extern void APIENTRY glGetColorTableParameterfvSGI (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEPARAMETERFVSGIPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetColorTableParameterivSGI (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEPARAMETERIVSGIPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glPixelTexGenSGIX (GLenum); -typedef void (APIENTRY * PFNGLPIXELTEXGENSGIXPROC) (GLenum mode); -extern void APIENTRY glPixelTexGenParameteriSGIS (GLenum, GLint); -typedef void (APIENTRY * PFNGLPIXELTEXGENPARAMETERISGISPROC) (GLenum pname, GLint param); -extern void APIENTRY glPixelTexGenParameterivSGIS (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLPIXELTEXGENPARAMETERIVSGISPROC) (GLenum pname, const GLint *params); -extern void APIENTRY glPixelTexGenParameterfSGIS (GLenum, GLfloat); -typedef void (APIENTRY * PFNGLPIXELTEXGENPARAMETERFSGISPROC) (GLenum pname, GLfloat param); -extern void APIENTRY glPixelTexGenParameterfvSGIS (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLPIXELTEXGENPARAMETERFVSGISPROC) (GLenum pname, const GLfloat *params); -extern void APIENTRY glGetPixelTexGenParameterivSGIS (GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETPIXELTEXGENPARAMETERIVSGISPROC) (GLenum pname, GLint *params); -extern void APIENTRY glGetPixelTexGenParameterfvSGIS (GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETPIXELTEXGENPARAMETERFVSGISPROC) (GLenum pname, GLfloat *params); -extern void APIENTRY glTexImage4DSGIS (GLenum, GLint, GLenum, GLsizei, GLsizei, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXIMAGE4DSGISPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLsizei size4d, GLint border, GLenum format, GLenum type, const GLvoid *pixels); -extern void APIENTRY glTexSubImage4DSGIS (GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXSUBIMAGE4DSGISPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint woffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei size4d, GLenum format, GLenum type, const GLvoid *pixels); -extern GLboolean APIENTRY glAreTexturesResidentEXT (GLsizei, const GLuint *, GLboolean *); -typedef GLboolean (APIENTRY * PFNGLARETEXTURESRESIDENTEXTPROC) (GLsizei n, const GLuint *textures, GLboolean *residences); -extern void APIENTRY glBindTextureEXT (GLenum, GLuint); -typedef void (APIENTRY * PFNGLBINDTEXTUREEXTPROC) (GLenum target, GLuint texture); -extern void APIENTRY glDeleteTexturesEXT (GLsizei, const GLuint *); -typedef void (APIENTRY * PFNGLDELETETEXTURESEXTPROC) (GLsizei n, const GLuint *textures); -extern void APIENTRY glGenTexturesEXT (GLsizei, GLuint *); -typedef void (APIENTRY * PFNGLGENTEXTURESEXTPROC) (GLsizei n, GLuint *textures); -extern GLboolean APIENTRY glIsTextureEXT (GLuint); -typedef GLboolean (APIENTRY * PFNGLISTEXTUREEXTPROC) (GLuint texture); -extern void APIENTRY glPrioritizeTexturesEXT (GLsizei, const GLuint *, const GLclampf *); -typedef void (APIENTRY * PFNGLPRIORITIZETEXTURESEXTPROC) (GLsizei n, const GLuint *textures, const GLclampf *priorities); -extern void APIENTRY glDetailTexFuncSGIS (GLenum, GLsizei, const GLfloat *); -typedef void (APIENTRY * PFNGLDETAILTEXFUNCSGISPROC) (GLenum target, GLsizei n, const GLfloat *points); -extern void APIENTRY glGetDetailTexFuncSGIS (GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETDETAILTEXFUNCSGISPROC) (GLenum target, GLfloat *points); -extern void APIENTRY glSharpenTexFuncSGIS (GLenum, GLsizei, const GLfloat *); -typedef void (APIENTRY * PFNGLSHARPENTEXFUNCSGISPROC) (GLenum target, GLsizei n, const GLfloat *points); -extern void APIENTRY glGetSharpenTexFuncSGIS (GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETSHARPENTEXFUNCSGISPROC) (GLenum target, GLfloat *points); -extern void APIENTRY glSampleMaskSGIS (GLclampf, GLboolean); -typedef void (APIENTRY * PFNGLSAMPLEMASKSGISPROC) (GLclampf value, GLboolean invert); -extern void APIENTRY glSamplePatternSGIS (GLenum); -typedef void (APIENTRY * PFNGLSAMPLEPATTERNSGISPROC) (GLenum pattern); -extern void APIENTRY glArrayElementEXT (GLint); -typedef void (APIENTRY * PFNGLARRAYELEMENTEXTPROC) (GLint i); -extern void APIENTRY glColorPointerEXT (GLint, GLenum, GLsizei, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLCOLORPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer); -extern void APIENTRY glDrawArraysEXT (GLenum, GLint, GLsizei); -typedef void (APIENTRY * PFNGLDRAWARRAYSEXTPROC) (GLenum mode, GLint first, GLsizei count); -extern void APIENTRY glEdgeFlagPointerEXT (GLsizei, GLsizei, const GLboolean *); -typedef void (APIENTRY * PFNGLEDGEFLAGPOINTEREXTPROC) (GLsizei stride, GLsizei count, const GLboolean *pointer); -extern void APIENTRY glGetPointervEXT (GLenum, GLvoid* *); -typedef void (APIENTRY * PFNGLGETPOINTERVEXTPROC) (GLenum pname, GLvoid* *params); -extern void APIENTRY glIndexPointerEXT (GLenum, GLsizei, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLINDEXPOINTEREXTPROC) (GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer); -extern void APIENTRY glNormalPointerEXT (GLenum, GLsizei, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLNORMALPOINTEREXTPROC) (GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer); -extern void APIENTRY glTexCoordPointerEXT (GLint, GLenum, GLsizei, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLTEXCOORDPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer); -extern void APIENTRY glVertexPointerEXT (GLint, GLenum, GLsizei, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLVERTEXPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const GLvoid *pointer); -extern void APIENTRY glBlendEquationEXT (GLenum); -typedef void (APIENTRY * PFNGLBLENDEQUATIONEXTPROC) (GLenum mode); -extern void APIENTRY glSpriteParameterfSGIX (GLenum, GLfloat); -typedef void (APIENTRY * PFNGLSPRITEPARAMETERFSGIXPROC) (GLenum pname, GLfloat param); -extern void APIENTRY glSpriteParameterfvSGIX (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLSPRITEPARAMETERFVSGIXPROC) (GLenum pname, const GLfloat *params); -extern void APIENTRY glSpriteParameteriSGIX (GLenum, GLint); -typedef void (APIENTRY * PFNGLSPRITEPARAMETERISGIXPROC) (GLenum pname, GLint param); -extern void APIENTRY glSpriteParameterivSGIX (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLSPRITEPARAMETERIVSGIXPROC) (GLenum pname, const GLint *params); -extern void APIENTRY glPointParameterfEXT (GLenum, GLfloat); -typedef void (APIENTRY * PFNGLPOINTPARAMETERFEXTPROC) (GLenum pname, GLfloat param); -extern void APIENTRY glPointParameterfvEXT (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLPOINTPARAMETERFVEXTPROC) (GLenum pname, const GLfloat *params); -extern void APIENTRY glPointParameterfSGIS (GLenum, GLfloat); -typedef void (APIENTRY * PFNGLPOINTPARAMETERFSGISPROC) (GLenum pname, GLfloat param); -extern void APIENTRY glPointParameterfvSGIS (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLPOINTPARAMETERFVSGISPROC) (GLenum pname, const GLfloat *params); -extern GLint APIENTRY glGetInstrumentsSGIX (void); -typedef GLint (APIENTRY * PFNGLGETINSTRUMENTSSGIXPROC) (void); -extern void APIENTRY glInstrumentsBufferSGIX (GLsizei, GLint *); -typedef void (APIENTRY * PFNGLINSTRUMENTSBUFFERSGIXPROC) (GLsizei size, GLint *buffer); -extern GLint APIENTRY glPollInstrumentsSGIX (GLint *); -typedef GLint (APIENTRY * PFNGLPOLLINSTRUMENTSSGIXPROC) (GLint *marker_p); -extern void APIENTRY glReadInstrumentsSGIX (GLint); -typedef void (APIENTRY * PFNGLREADINSTRUMENTSSGIXPROC) (GLint marker); -extern void APIENTRY glStartInstrumentsSGIX (void); -typedef void (APIENTRY * PFNGLSTARTINSTRUMENTSSGIXPROC) (void); -extern void APIENTRY glStopInstrumentsSGIX (GLint); -typedef void (APIENTRY * PFNGLSTOPINSTRUMENTSSGIXPROC) (GLint marker); -extern void APIENTRY glFrameZoomSGIX (GLint); -typedef void (APIENTRY * PFNGLFRAMEZOOMSGIXPROC) (GLint factor); -extern void APIENTRY glTagSampleBufferSGIX (void); -typedef void (APIENTRY * PFNGLTAGSAMPLEBUFFERSGIXPROC) (void); -extern void APIENTRY glReferencePlaneSGIX (const GLdouble *); -typedef void (APIENTRY * PFNGLREFERENCEPLANESGIXPROC) (const GLdouble *equation); -extern void APIENTRY glFlushRasterSGIX (void); -typedef void (APIENTRY * PFNGLFLUSHRASTERSGIXPROC) (void); -extern void APIENTRY glFogFuncSGIS (GLsizei, const GLfloat *); -typedef void (APIENTRY * PFNGLFOGFUNCSGISPROC) (GLsizei n, const GLfloat *points); -extern void APIENTRY glGetFogFuncSGIS (const GLfloat *); -typedef void (APIENTRY * PFNGLGETFOGFUNCSGISPROC) (const GLfloat *points); -extern void APIENTRY glImageTransformParameteriHP (GLenum, GLenum, GLint); -typedef void (APIENTRY * PFNGLIMAGETRANSFORMPARAMETERIHPPROC) (GLenum target, GLenum pname, GLint param); -extern void APIENTRY glImageTransformParameterfHP (GLenum, GLenum, GLfloat); -typedef void (APIENTRY * PFNGLIMAGETRANSFORMPARAMETERFHPPROC) (GLenum target, GLenum pname, GLfloat param); -extern void APIENTRY glImageTransformParameterivHP (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLIMAGETRANSFORMPARAMETERIVHPPROC) (GLenum target, GLenum pname, const GLint *params); -extern void APIENTRY glImageTransformParameterfvHP (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLIMAGETRANSFORMPARAMETERFVHPPROC) (GLenum target, GLenum pname, const GLfloat *params); -extern void APIENTRY glGetImageTransformParameterivHP (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glGetImageTransformParameterfvHP (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glColorSubTableEXT (GLenum, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCOLORSUBTABLEEXTPROC) (GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const GLvoid *data); -extern void APIENTRY glCopyColorSubTableEXT (GLenum, GLsizei, GLint, GLint, GLsizei); -typedef void (APIENTRY * PFNGLCOPYCOLORSUBTABLEEXTPROC) (GLenum target, GLsizei start, GLint x, GLint y, GLsizei width); -extern void APIENTRY glHintPGI (GLenum, GLint); -typedef void (APIENTRY * PFNGLHINTPGIPROC) (GLenum target, GLint mode); -extern void APIENTRY glColorTableEXT (GLenum, GLenum, GLsizei, GLenum, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLCOLORTABLEEXTPROC) (GLenum target, GLenum internalFormat, GLsizei width, GLenum format, GLenum type, const GLvoid *table); -extern void APIENTRY glGetColorTableEXT (GLenum, GLenum, GLenum, GLvoid *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEEXTPROC) (GLenum target, GLenum format, GLenum type, GLvoid *data); -extern void APIENTRY glGetColorTableParameterivEXT (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint *params); -extern void APIENTRY glGetColorTableParameterfvEXT (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETCOLORTABLEPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat *params); -extern void APIENTRY glGetListParameterfvSGIX (GLuint, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETLISTPARAMETERFVSGIXPROC) (GLuint list, GLenum pname, GLfloat *params); -extern void APIENTRY glGetListParameterivSGIX (GLuint, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETLISTPARAMETERIVSGIXPROC) (GLuint list, GLenum pname, GLint *params); -extern void APIENTRY glListParameterfSGIX (GLuint, GLenum, GLfloat); -typedef void (APIENTRY * PFNGLLISTPARAMETERFSGIXPROC) (GLuint list, GLenum pname, GLfloat param); -extern void APIENTRY glListParameterfvSGIX (GLuint, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLLISTPARAMETERFVSGIXPROC) (GLuint list, GLenum pname, const GLfloat *params); -extern void APIENTRY glListParameteriSGIX (GLuint, GLenum, GLint); -typedef void (APIENTRY * PFNGLLISTPARAMETERISGIXPROC) (GLuint list, GLenum pname, GLint param); -extern void APIENTRY glListParameterivSGIX (GLuint, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLLISTPARAMETERIVSGIXPROC) (GLuint list, GLenum pname, const GLint *params); -extern void APIENTRY glIndexMaterialEXT (GLenum, GLenum); -typedef void (APIENTRY * PFNGLINDEXMATERIALEXTPROC) (GLenum face, GLenum mode); -extern void APIENTRY glIndexFuncEXT (GLenum, GLclampf); -typedef void (APIENTRY * PFNGLINDEXFUNCEXTPROC) (GLenum func, GLclampf ref); -extern void APIENTRY glLockArraysEXT (GLint, GLsizei); -typedef void (APIENTRY * PFNGLLOCKARRAYSEXTPROC) (GLint first, GLsizei count); -extern void APIENTRY glUnlockArraysEXT (void); -typedef void (APIENTRY * PFNGLUNLOCKARRAYSEXTPROC) (void); -extern void APIENTRY glCullParameterdvEXT (GLenum, const GLdouble *); -typedef void (APIENTRY * PFNGLCULLPARAMETERDVEXTPROC) (GLenum pname, const GLdouble *params); -extern void APIENTRY glCullParameterfvEXT (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLCULLPARAMETERFVEXTPROC) (GLenum pname, const GLfloat *params); -extern void APIENTRY glFragmentColorMaterialSGIX (GLenum, GLenum); -typedef void (APIENTRY * PFNGLFRAGMENTCOLORMATERIALSGIXPROC) (GLenum face, GLenum mode); -extern void APIENTRY glFragmentLightfSGIX (GLenum, GLenum, GLfloat); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTFSGIXPROC) (GLenum light, GLenum pname, GLfloat param); -extern void APIENTRY glFragmentLightfvSGIX (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTFVSGIXPROC) (GLenum light, GLenum pname, const GLfloat *params); -extern void APIENTRY glFragmentLightiSGIX (GLenum, GLenum, GLint); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTISGIXPROC) (GLenum light, GLenum pname, GLint param); -extern void APIENTRY glFragmentLightivSGIX (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTIVSGIXPROC) (GLenum light, GLenum pname, const GLint *params); -extern void APIENTRY glFragmentLightModelfSGIX (GLenum, GLfloat); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTMODELFSGIXPROC) (GLenum pname, GLfloat param); -extern void APIENTRY glFragmentLightModelfvSGIX (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTMODELFVSGIXPROC) (GLenum pname, const GLfloat *params); -extern void APIENTRY glFragmentLightModeliSGIX (GLenum, GLint); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTMODELISGIXPROC) (GLenum pname, GLint param); -extern void APIENTRY glFragmentLightModelivSGIX (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLFRAGMENTLIGHTMODELIVSGIXPROC) (GLenum pname, const GLint *params); -extern void APIENTRY glFragmentMaterialfSGIX (GLenum, GLenum, GLfloat); -typedef void (APIENTRY * PFNGLFRAGMENTMATERIALFSGIXPROC) (GLenum face, GLenum pname, GLfloat param); -extern void APIENTRY glFragmentMaterialfvSGIX (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLFRAGMENTMATERIALFVSGIXPROC) (GLenum face, GLenum pname, const GLfloat *params); -extern void APIENTRY glFragmentMaterialiSGIX (GLenum, GLenum, GLint); -typedef void (APIENTRY * PFNGLFRAGMENTMATERIALISGIXPROC) (GLenum face, GLenum pname, GLint param); -extern void APIENTRY glFragmentMaterialivSGIX (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLFRAGMENTMATERIALIVSGIXPROC) (GLenum face, GLenum pname, const GLint *params); -extern void APIENTRY glGetFragmentLightfvSGIX (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETFRAGMENTLIGHTFVSGIXPROC) (GLenum light, GLenum pname, GLfloat *params); -extern void APIENTRY glGetFragmentLightivSGIX (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETFRAGMENTLIGHTIVSGIXPROC) (GLenum light, GLenum pname, GLint *params); -extern void APIENTRY glGetFragmentMaterialfvSGIX (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETFRAGMENTMATERIALFVSGIXPROC) (GLenum face, GLenum pname, GLfloat *params); -extern void APIENTRY glGetFragmentMaterialivSGIX (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETFRAGMENTMATERIALIVSGIXPROC) (GLenum face, GLenum pname, GLint *params); -extern void APIENTRY glLightEnviSGIX (GLenum, GLint); -typedef void (APIENTRY * PFNGLLIGHTENVISGIXPROC) (GLenum pname, GLint param); -extern void APIENTRY glDrawRangeElementsEXT (GLenum, GLuint, GLuint, GLsizei, GLenum, const GLvoid *); -typedef void (APIENTRY * PFNGLDRAWRANGEELEMENTSEXTPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices); -extern void APIENTRY glApplyTextureEXT (GLenum); -typedef void (APIENTRY * PFNGLAPPLYTEXTUREEXTPROC) (GLenum mode); -extern void APIENTRY glTextureLightEXT (GLenum); -typedef void (APIENTRY * PFNGLTEXTURELIGHTEXTPROC) (GLenum pname); -extern void APIENTRY glTextureMaterialEXT (GLenum, GLenum); -typedef void (APIENTRY * PFNGLTEXTUREMATERIALEXTPROC) (GLenum face, GLenum mode); -extern void APIENTRY glVertexPointervINTEL (GLint, GLenum, const GLvoid* *); -typedef void (APIENTRY * PFNGLVERTEXPOINTERVINTELPROC) (GLint size, GLenum type, const GLvoid* *pointer); -extern void APIENTRY glNormalPointervINTEL (GLenum, const GLvoid* *); -typedef void (APIENTRY * PFNGLNORMALPOINTERVINTELPROC) (GLenum type, const GLvoid* *pointer); -extern void APIENTRY glColorPointervINTEL (GLint, GLenum, const GLvoid* *); -typedef void (APIENTRY * PFNGLCOLORPOINTERVINTELPROC) (GLint size, GLenum type, const GLvoid* *pointer); -extern void APIENTRY glTexCoordPointervINTEL (GLint, GLenum, const GLvoid* *); -typedef void (APIENTRY * PFNGLTEXCOORDPOINTERVINTELPROC) (GLint size, GLenum type, const GLvoid* *pointer); -extern void APIENTRY glPixelTransformParameteriEXT (GLenum, GLenum, GLint); -typedef void (APIENTRY * PFNGLPIXELTRANSFORMPARAMETERIEXTPROC) (GLenum target, GLenum pname, GLint param); -extern void APIENTRY glPixelTransformParameterfEXT (GLenum, GLenum, GLfloat); -typedef void (APIENTRY * PFNGLPIXELTRANSFORMPARAMETERFEXTPROC) (GLenum target, GLenum pname, GLfloat param); -extern void APIENTRY glPixelTransformParameterivEXT (GLenum, GLenum, const GLint *); -typedef void (APIENTRY * PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC) (GLenum target, GLenum pname, const GLint *params); -extern void APIENTRY glPixelTransformParameterfvEXT (GLenum, GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC) (GLenum target, GLenum pname, const GLfloat *params); -extern void APIENTRY glSecondaryColor3bEXT (GLbyte, GLbyte, GLbyte); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3BEXTPROC) (GLbyte red, GLbyte green, GLbyte blue); -extern void APIENTRY glSecondaryColor3bvEXT (const GLbyte *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3BVEXTPROC) (const GLbyte *v); -extern void APIENTRY glSecondaryColor3dEXT (GLdouble, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3DEXTPROC) (GLdouble red, GLdouble green, GLdouble blue); -extern void APIENTRY glSecondaryColor3dvEXT (const GLdouble *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3DVEXTPROC) (const GLdouble *v); -extern void APIENTRY glSecondaryColor3fEXT (GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3FEXTPROC) (GLfloat red, GLfloat green, GLfloat blue); -extern void APIENTRY glSecondaryColor3fvEXT (const GLfloat *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3FVEXTPROC) (const GLfloat *v); -extern void APIENTRY glSecondaryColor3iEXT (GLint, GLint, GLint); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3IEXTPROC) (GLint red, GLint green, GLint blue); -extern void APIENTRY glSecondaryColor3ivEXT (const GLint *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3IVEXTPROC) (const GLint *v); -extern void APIENTRY glSecondaryColor3sEXT (GLshort, GLshort, GLshort); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3SEXTPROC) (GLshort red, GLshort green, GLshort blue); -extern void APIENTRY glSecondaryColor3svEXT (const GLshort *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3SVEXTPROC) (const GLshort *v); -extern void APIENTRY glSecondaryColor3ubEXT (GLubyte, GLubyte, GLubyte); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3UBEXTPROC) (GLubyte red, GLubyte green, GLubyte blue); -extern void APIENTRY glSecondaryColor3ubvEXT (const GLubyte *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3UBVEXTPROC) (const GLubyte *v); -extern void APIENTRY glSecondaryColor3uiEXT (GLuint, GLuint, GLuint); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3UIEXTPROC) (GLuint red, GLuint green, GLuint blue); -extern void APIENTRY glSecondaryColor3uivEXT (const GLuint *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3UIVEXTPROC) (const GLuint *v); -extern void APIENTRY glSecondaryColor3usEXT (GLushort, GLushort, GLushort); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3USEXTPROC) (GLushort red, GLushort green, GLushort blue); -extern void APIENTRY glSecondaryColor3usvEXT (const GLushort *); -typedef void (APIENTRY * PFNGLSECONDARYCOLOR3USVEXTPROC) (const GLushort *v); -extern void APIENTRY glSecondaryColorPointerEXT (GLint, GLenum, GLsizei, GLvoid *); -typedef void (APIENTRY * PFNGLSECONDARYCOLORPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLvoid *pointer); -extern void APIENTRY glTextureNormalEXT (GLenum); -typedef void (APIENTRY * PFNGLTEXTURENORMALEXTPROC) (GLenum mode); -extern void APIENTRY glMultiDrawArraysEXT (GLenum, GLint *, GLsizei *, GLsizei); -typedef void (APIENTRY * PFNGLMULTIDRAWARRAYSEXTPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount); -extern void APIENTRY glMultiDrawElementsEXT (GLenum, const GLsizei *, GLenum, const GLvoid* *, GLsizei); -typedef void (APIENTRY * PFNGLMULTIDRAWELEMENTSEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount); -extern void APIENTRY glFogCoordfEXT (GLfloat); -typedef void (APIENTRY * PFNGLFOGCOORDFEXTPROC) (GLfloat coord); -extern void APIENTRY glFogCoordfvEXT (const GLfloat *); -typedef void (APIENTRY * PFNGLFOGCOORDFVEXTPROC) (const GLfloat *coord); -extern void APIENTRY glFogCoorddEXT (GLdouble); -typedef void (APIENTRY * PFNGLFOGCOORDDEXTPROC) (GLdouble coord); -extern void APIENTRY glFogCoorddvEXT (const GLdouble *); -typedef void (APIENTRY * PFNGLFOGCOORDDVEXTPROC) (const GLdouble *coord); -extern void APIENTRY glFogCoordPointerEXT (GLenum, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLFOGCOORDPOINTEREXTPROC) (GLenum type, GLsizei stride, const GLvoid *pointer); -extern void APIENTRY glTangent3b (GLbyte, GLbyte, GLbyte); -typedef void (APIENTRY * PFNGLTANGENT3BPROC) (GLbyte tx, GLbyte ty, GLbyte tz); -extern void APIENTRY glTangent3bv (const GLbyte *); -typedef void (APIENTRY * PFNGLTANGENT3BVPROC) (const GLbyte *v); -extern void APIENTRY glTangent3d (GLdouble, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLTANGENT3DPROC) (GLdouble tx, GLdouble ty, GLdouble tz); -extern void APIENTRY glTangent3dv (const GLdouble *); -typedef void (APIENTRY * PFNGLTANGENT3DVPROC) (const GLdouble *v); -extern void APIENTRY glTangent3f (GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTANGENT3FPROC) (GLfloat tx, GLfloat ty, GLfloat tz); -extern void APIENTRY glTangent3fv (const GLfloat *); -typedef void (APIENTRY * PFNGLTANGENT3FVPROC) (const GLfloat *v); -extern void APIENTRY glTangent3i (GLint, GLint, GLint); -typedef void (APIENTRY * PFNGLTANGENT3IPROC) (GLint tx, GLint ty, GLint tz); -extern void APIENTRY glTangent3iv (const GLint *); -typedef void (APIENTRY * PFNGLTANGENT3IVPROC) (const GLint *v); -extern void APIENTRY glTangent3s (GLshort, GLshort, GLshort); -typedef void (APIENTRY * PFNGLTANGENT3SPROC) (GLshort tx, GLshort ty, GLshort tz); -extern void APIENTRY glTangent3sv (const GLshort *); -typedef void (APIENTRY * PFNGLTANGENT3SVPROC) (const GLshort *v); -extern void APIENTRY glBinormal3b (GLbyte, GLbyte, GLbyte); -typedef void (APIENTRY * PFNGLBINORMAL3BPROC) (GLbyte bx, GLbyte by, GLbyte bz); -extern void APIENTRY glBinormal3bv (const GLbyte *); -typedef void (APIENTRY * PFNGLBINORMAL3BVPROC) (const GLbyte *v); -extern void APIENTRY glBinormal3d (GLdouble, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLBINORMAL3DPROC) (GLdouble bx, GLdouble by, GLdouble bz); -extern void APIENTRY glBinormal3dv (const GLdouble *); -typedef void (APIENTRY * PFNGLBINORMAL3DVPROC) (const GLdouble *v); -extern void APIENTRY glBinormal3f (GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLBINORMAL3FPROC) (GLfloat bx, GLfloat by, GLfloat bz); -extern void APIENTRY glBinormal3fv (const GLfloat *); -typedef void (APIENTRY * PFNGLBINORMAL3FVPROC) (const GLfloat *v); -extern void APIENTRY glBinormal3i (GLint, GLint, GLint); -typedef void (APIENTRY * PFNGLBINORMAL3IPROC) (GLint bx, GLint by, GLint bz); -extern void APIENTRY glBinormal3iv (const GLint *); -typedef void (APIENTRY * PFNGLBINORMAL3IVPROC) (const GLint *v); -extern void APIENTRY glBinormal3s (GLshort, GLshort, GLshort); -typedef void (APIENTRY * PFNGLBINORMAL3SPROC) (GLshort bx, GLshort by, GLshort bz); -extern void APIENTRY glBinormal3sv (const GLshort *); -typedef void (APIENTRY * PFNGLBINORMAL3SVPROC) (const GLshort *v); -extern void APIENTRY glTangentPointerEXT (GLenum, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLTANGENTPOINTEREXTPROC) (GLenum type, GLsizei stride, const GLvoid *pointer); -extern void APIENTRY glBinormalPointerEXT (GLenum, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLBINORMALPOINTEREXTPROC) (GLenum type, GLsizei stride, const GLvoid *pointer); -extern void APIENTRY glFinishTextureSUNX (void); -typedef void (APIENTRY * PFNGLFINISHTEXTURESUNXPROC) (void); -extern void APIENTRY glGlobalAlphaFactorbSUN (GLbyte); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORBSUNPROC) (GLbyte factor); -extern void APIENTRY glGlobalAlphaFactorsSUN (GLshort); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORSSUNPROC) (GLshort factor); -extern void APIENTRY glGlobalAlphaFactoriSUN (GLint); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORISUNPROC) (GLint factor); -extern void APIENTRY glGlobalAlphaFactorfSUN (GLfloat); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORFSUNPROC) (GLfloat factor); -extern void APIENTRY glGlobalAlphaFactordSUN (GLdouble); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORDSUNPROC) (GLdouble factor); -extern void APIENTRY glGlobalAlphaFactorubSUN (GLubyte); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORUBSUNPROC) (GLubyte factor); -extern void APIENTRY glGlobalAlphaFactorusSUN (GLushort); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORUSSUNPROC) (GLushort factor); -extern void APIENTRY glGlobalAlphaFactoruiSUN (GLuint); -typedef void (APIENTRY * PFNGLGLOBALALPHAFACTORUISUNPROC) (GLuint factor); -extern void APIENTRY glReplacementCodeuiSUN (GLuint); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUISUNPROC) (GLuint code); -extern void APIENTRY glReplacementCodeusSUN (GLushort); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUSSUNPROC) (GLushort code); -extern void APIENTRY glReplacementCodeubSUN (GLubyte); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUBSUNPROC) (GLubyte code); -extern void APIENTRY glReplacementCodeuivSUN (const GLuint *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUIVSUNPROC) (const GLuint *code); -extern void APIENTRY glReplacementCodeusvSUN (const GLushort *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUSVSUNPROC) (const GLushort *code); -extern void APIENTRY glReplacementCodeubvSUN (const GLubyte *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUBVSUNPROC) (const GLubyte *code); -extern void APIENTRY glReplacementCodePointer (GLenum, GLsizei, const GLvoid* *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEPOINTERPROC) (GLenum type, GLsizei stride, const GLvoid* *pointer); -extern void APIENTRY glColor4ubVertex2fSUN (GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLCOLOR4UBVERTEX2FSUNPROC) (GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y); -extern void APIENTRY glColor4ubVertex2fvSUN (const GLubyte *, const GLfloat *); -typedef void (APIENTRY * PFNGLCOLOR4UBVERTEX2FVSUNPROC) (const GLubyte *c, const GLfloat *v); -extern void APIENTRY glColor4ubVertex3fSUN (GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLCOLOR4UBVERTEX3FSUNPROC) (GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glColor4ubVertex3fvSUN (const GLubyte *, const GLfloat *); -typedef void (APIENTRY * PFNGLCOLOR4UBVERTEX3FVSUNPROC) (const GLubyte *c, const GLfloat *v); -extern void APIENTRY glColor3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLCOLOR3FVERTEX3FSUNPROC) (GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glColor3fVertex3fvSUN (const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLCOLOR3FVERTEX3FVSUNPROC) (const GLfloat *c, const GLfloat *v); -extern void APIENTRY glNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLNORMAL3FVERTEX3FSUNPROC) (GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *n, const GLfloat *v); -extern void APIENTRY glColor4fNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glColor4fNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *c, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glTexCoord2fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTEXCOORD2FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glTexCoord2fVertex3fvSUN (const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXCOORD2FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *v); -extern void APIENTRY glTexCoord4fVertex4fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTEXCOORD4FVERTEX4FSUNPROC) (GLfloat s, GLfloat t, GLfloat p, GLfloat q, GLfloat x, GLfloat y, GLfloat z, GLfloat w); -extern void APIENTRY glTexCoord4fVertex4fvSUN (const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXCOORD4FVERTEX4FVSUNPROC) (const GLfloat *tc, const GLfloat *v); -extern void APIENTRY glTexCoord2fColor4ubVertex3fSUN (GLfloat, GLfloat, GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glTexCoord2fColor4ubVertex3fvSUN (const GLfloat *, const GLubyte *, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC) (const GLfloat *tc, const GLubyte *c, const GLfloat *v); -extern void APIENTRY glTexCoord2fColor3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glTexCoord2fColor3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *c, const GLfloat *v); -extern void APIENTRY glTexCoord2fNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glTexCoord2fNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glTexCoord2fColor4fNormal3fVertex3fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glTexCoord2fColor4fNormal3fVertex3fvSUN (const GLfloat *, const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat *tc, const GLfloat *c, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glTexCoord4fColor4fNormal3fVertex4fSUN (GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC) (GLfloat s, GLfloat t, GLfloat p, GLfloat q, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z, GLfloat w); -extern void APIENTRY glTexCoord4fColor4fNormal3fVertex4fvSUN (const GLfloat *, const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC) (const GLfloat *tc, const GLfloat *c, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiVertex3fSUN (GLenum, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC) (GLenum rc, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiVertex3fvSUN (const GLenum *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC) (const GLenum *rc, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiColor4ubVertex3fSUN (GLenum, GLubyte, GLubyte, GLubyte, GLubyte, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC) (GLenum rc, GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiColor4ubVertex3fvSUN (const GLenum *, const GLubyte *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC) (const GLenum *rc, const GLubyte *c, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiColor3fVertex3fSUN (GLenum, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC) (GLenum rc, GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiColor3fVertex3fvSUN (const GLenum *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC) (const GLenum *rc, const GLfloat *c, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiNormal3fVertex3fSUN (GLenum, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC) (GLenum rc, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiNormal3fVertex3fvSUN (const GLenum *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC) (const GLenum *rc, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiColor4fNormal3fVertex3fSUN (GLenum, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLenum rc, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiColor4fNormal3fVertex3fvSUN (const GLenum *, const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLenum *rc, const GLfloat *c, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiTexCoord2fVertex3fSUN (GLenum, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC) (GLenum rc, GLfloat s, GLfloat t, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiTexCoord2fVertex3fvSUN (const GLenum *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC) (const GLenum *rc, const GLfloat *tc, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN (GLenum, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC) (GLenum rc, GLfloat s, GLfloat t, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN (const GLenum *, const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC) (const GLenum *rc, const GLfloat *tc, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN (GLenum, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLenum rc, GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN (const GLenum *, const GLfloat *, const GLfloat *, const GLfloat *, const GLfloat *); -typedef void (APIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLenum *rc, const GLfloat *tc, const GLfloat *c, const GLfloat *n, const GLfloat *v); -extern void APIENTRY glBlendFuncSeparateEXT (GLenum, GLenum, GLenum, GLenum); -typedef void (APIENTRY * PFNGLBLENDFUNCSEPARATEEXTPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); -extern void APIENTRY glVertexWeightfEXT (GLfloat); -typedef void (APIENTRY * PFNGLVERTEXWEIGHTFEXTPROC) (GLfloat weight); -extern void APIENTRY glVertexWeightfvEXT (const GLfloat *); -typedef void (APIENTRY * PFNGLVERTEXWEIGHTFVEXTPROC) (const GLfloat *weight); -extern void APIENTRY glVertexWeightPointerEXT (GLsizei, GLenum, GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLVERTEXWEIGHTPOINTEREXTPROC) (GLsizei size, GLenum type, GLsizei stride, const GLvoid *pointer); -extern void APIENTRY glFlushVertexArrayRangeNV (void); -typedef void (APIENTRY * PFNGLFLUSHVERTEXARRAYRANGENVPROC) (void); -extern void APIENTRY glVertexArrayRangeNV (GLsizei, const GLvoid *); -typedef void (APIENTRY * PFNGLVERTEXARRAYRANGENVPROC) (GLsizei size, const GLvoid *pointer); -extern void APIENTRY glCombinerParameterfvNV (GLenum, const GLfloat *); -typedef void (APIENTRY * PFNGLCOMBINERPARAMETERFVNVPROC) (GLenum pname, const GLfloat *params); -extern void APIENTRY glCombinerParameterfNV (GLenum, GLfloat); -typedef void (APIENTRY * PFNGLCOMBINERPARAMETERFNVPROC) (GLenum pname, GLfloat param); -extern void APIENTRY glCombinerParameterivNV (GLenum, const GLint *); -typedef void (APIENTRY * PFNGLCOMBINERPARAMETERIVNVPROC) (GLenum pname, const GLint *params); -extern void APIENTRY glCombinerParameteriNV (GLenum, GLint); -typedef void (APIENTRY * PFNGLCOMBINERPARAMETERINVPROC) (GLenum pname, GLint param); -extern void APIENTRY glCombinerInputNV (GLenum, GLenum, GLenum, GLenum, GLenum, GLenum); -typedef void (APIENTRY * PFNGLCOMBINERINPUTNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum input, GLenum mapping, GLenum componentUsage); -extern void APIENTRY glCombinerOutputNV (GLenum, GLenum, GLenum, GLenum, GLenum, GLenum, GLenum, GLboolean, GLboolean, GLboolean); -typedef void (APIENTRY * PFNGLCOMBINEROUTPUTNVPROC) (GLenum stage, GLenum portion, GLenum abOutput, GLenum cdOutput, GLenum sumOutput, GLenum scale, GLenum bias, GLboolean abDotProduct, GLboolean cdDotProduct, GLboolean muxSum); -extern void APIENTRY glFinalCombinerInputNV (GLenum, GLenum, GLenum, GLenum); -typedef void (APIENTRY * PFNGLFINALCOMBINERINPUTNVPROC) (GLenum variable, GLenum input, GLenum mapping, GLenum componentUsage); -extern void APIENTRY glGetCombinerInputParameterfvNV (GLenum, GLenum, GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum pname, GLfloat *params); -extern void APIENTRY glGetCombinerInputParameterivNV (GLenum, GLenum, GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum pname, GLint *params); -extern void APIENTRY glGetCombinerOutputParameterfvNV (GLenum, GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC) (GLenum stage, GLenum portion, GLenum pname, GLfloat *params); -extern void APIENTRY glGetCombinerOutputParameterivNV (GLenum, GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC) (GLenum stage, GLenum portion, GLenum pname, GLint *params); -extern void APIENTRY glGetFinalCombinerInputParameterfvNV (GLenum, GLenum, GLfloat *); -typedef void (APIENTRY * PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC) (GLenum variable, GLenum pname, GLfloat *params); -extern void APIENTRY glGetFinalCombinerInputParameterivNV (GLenum, GLenum, GLint *); -typedef void (APIENTRY * PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC) (GLenum variable, GLenum pname, GLint *params); -extern void APIENTRY glResizeBuffersMESA (void); -typedef void (APIENTRY * PFNGLRESIZEBUFFERSMESAPROC) (void); -extern void APIENTRY glWindowPos2dMESA (GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLWINDOWPOS2DMESAPROC) (GLdouble x, GLdouble y); -extern void APIENTRY glWindowPos2dvMESA (const GLdouble *); -typedef void (APIENTRY * PFNGLWINDOWPOS2DVMESAPROC) (const GLdouble *v); -extern void APIENTRY glWindowPos2fMESA (GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLWINDOWPOS2FMESAPROC) (GLfloat x, GLfloat y); -extern void APIENTRY glWindowPos2fvMESA (const GLfloat *); -typedef void (APIENTRY * PFNGLWINDOWPOS2FVMESAPROC) (const GLfloat *v); -extern void APIENTRY glWindowPos2iMESA (GLint, GLint); -typedef void (APIENTRY * PFNGLWINDOWPOS2IMESAPROC) (GLint x, GLint y); -extern void APIENTRY glWindowPos2ivMESA (const GLint *); -typedef void (APIENTRY * PFNGLWINDOWPOS2IVMESAPROC) (const GLint *v); -extern void APIENTRY glWindowPos2sMESA (GLshort, GLshort); -typedef void (APIENTRY * PFNGLWINDOWPOS2SMESAPROC) (GLshort x, GLshort y); -extern void APIENTRY glWindowPos2svMESA (const GLshort *); -typedef void (APIENTRY * PFNGLWINDOWPOS2SVMESAPROC) (const GLshort *v); -extern void APIENTRY glWindowPos3dMESA (GLdouble, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLWINDOWPOS3DMESAPROC) (GLdouble x, GLdouble y, GLdouble z); -extern void APIENTRY glWindowPos3dvMESA (const GLdouble *); -typedef void (APIENTRY * PFNGLWINDOWPOS3DVMESAPROC) (const GLdouble *v); -extern void APIENTRY glWindowPos3fMESA (GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLWINDOWPOS3FMESAPROC) (GLfloat x, GLfloat y, GLfloat z); -extern void APIENTRY glWindowPos3fvMESA (const GLfloat *); -typedef void (APIENTRY * PFNGLWINDOWPOS3FVMESAPROC) (const GLfloat *v); -extern void APIENTRY glWindowPos3iMESA (GLint, GLint, GLint); -typedef void (APIENTRY * PFNGLWINDOWPOS3IMESAPROC) (GLint x, GLint y, GLint z); -extern void APIENTRY glWindowPos3ivMESA (const GLint *); -typedef void (APIENTRY * PFNGLWINDOWPOS3IVMESAPROC) (const GLint *v); -extern void APIENTRY glWindowPos3sMESA (GLshort, GLshort, GLshort); -typedef void (APIENTRY * PFNGLWINDOWPOS3SMESAPROC) (GLshort x, GLshort y, GLshort z); -extern void APIENTRY glWindowPos3svMESA (const GLshort *); -typedef void (APIENTRY * PFNGLWINDOWPOS3SVMESAPROC) (const GLshort *v); -extern void APIENTRY glWindowPos4dMESA (GLdouble, GLdouble, GLdouble, GLdouble); -typedef void (APIENTRY * PFNGLWINDOWPOS4DMESAPROC) (GLdouble x, GLdouble y, GLdouble z, GLdouble w); -extern void APIENTRY glWindowPos4dvMESA (const GLdouble *); -typedef void (APIENTRY * PFNGLWINDOWPOS4DVMESAPROC) (const GLdouble *v); -extern void APIENTRY glWindowPos4fMESA (GLfloat, GLfloat, GLfloat, GLfloat); -typedef void (APIENTRY * PFNGLWINDOWPOS4FMESAPROC) (GLfloat x, GLfloat y, GLfloat z, GLfloat w); -extern void APIENTRY glWindowPos4fvMESA (const GLfloat *); -typedef void (APIENTRY * PFNGLWINDOWPOS4FVMESAPROC) (const GLfloat *v); -extern void APIENTRY glWindowPos4iMESA (GLint, GLint, GLint, GLint); -typedef void (APIENTRY * PFNGLWINDOWPOS4IMESAPROC) (GLint x, GLint y, GLint z, GLint w); -extern void APIENTRY glWindowPos4ivMESA (const GLint *); -typedef void (APIENTRY * PFNGLWINDOWPOS4IVMESAPROC) (const GLint *v); -extern void APIENTRY glWindowPos4sMESA (GLshort, GLshort, GLshort, GLshort); -typedef void (APIENTRY * PFNGLWINDOWPOS4SMESAPROC) (GLshort x, GLshort y, GLshort z, GLshort w); -extern void APIENTRY glWindowPos4svMESA (const GLshort *); -typedef void (APIENTRY * PFNGLWINDOWPOS4SVMESAPROC) (const GLshort *v); -extern void APIENTRY glMultiModeDrawArraysIBM (GLenum, const GLint *, const GLsizei *, GLsizei, GLint); -typedef void (APIENTRY * PFNGLMULTIMODEDRAWARRAYSIBMPROC) (GLenum mode, const GLint *first, const GLsizei *count, GLsizei primcount, GLint modestride); -extern void APIENTRY glMultiModeDrawElementsIBM (const GLenum *, const GLsizei *, GLenum, const GLvoid* *, GLsizei, GLint); -typedef void (APIENTRY * PFNGLMULTIMODEDRAWELEMENTSIBMPROC) (const GLenum *mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount, GLint modestride); -extern void APIENTRY glColorPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint); -typedef void (APIENTRY * PFNGLCOLORPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride); -extern void APIENTRY glSecondaryColorPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint); -typedef void (APIENTRY * PFNGLSECONDARYCOLORPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride); -extern void APIENTRY glEdgeFlagPointerListIBM (GLint, const GLboolean* *, GLint); -typedef void (APIENTRY * PFNGLEDGEFLAGPOINTERLISTIBMPROC) (GLint stride, const GLboolean* *pointer, GLint ptrstride); -extern void APIENTRY glFogCoordPointerListIBM (GLenum, GLint, const GLvoid* *, GLint); -typedef void (APIENTRY * PFNGLFOGCOORDPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride); -extern void APIENTRY glIndexPointerListIBM (GLenum, GLint, const GLvoid* *, GLint); -typedef void (APIENTRY * PFNGLINDEXPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride); -extern void APIENTRY glNormalPointerListIBM (GLenum, GLint, const GLvoid* *, GLint); -typedef void (APIENTRY * PFNGLNORMALPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride); -extern void APIENTRY glTexCoordPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint); -typedef void (APIENTRY * PFNGLTEXCOORDPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride); -extern void APIENTRY glVertexPointerListIBM (GLint, GLenum, GLint, const GLvoid* *, GLint); -typedef void (APIENTRY * PFNGLVERTEXPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid* *pointer, GLint ptrstride); -extern void APIENTRY glTbufferMask3DFX (GLuint); -typedef void (APIENTRY * PFNGLTBUFFERMASK3DFXPROC) (GLuint mask); -extern void APIENTRY glSampleMaskEXT (GLclampf, GLboolean); -typedef void (APIENTRY * PFNGLSAMPLEMASKEXTPROC) (GLclampf value, GLboolean invert); -extern void APIENTRY glSamplePatternEXT (GLenum); -typedef void (APIENTRY * PFNGLSAMPLEPATTERNEXTPROC) (GLenum pattern); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/xc/extras/Mesa/src/glheader.h b/xc/extras/Mesa/src/glheader.h index 705c25c74..50316b6f5 100644 --- a/xc/extras/Mesa/src/glheader.h +++ b/xc/extras/Mesa/src/glheader.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/glheader.h,v 1.11 2000/10/26 17:57:48 dawes Exp $ */ #ifndef GLHEADER_H @@ -83,11 +82,11 @@ * the new src/glheader.h file. */ -#if defined(_WIN32) && !defined(__WIN32__) && !defined(__CYGWIN__) +#if defined(_WIN32) && !defined(__WIN32__) # define __WIN32__ #endif -#if !defined(OPENSTEP) && (defined(__WIN32__) && !defined(__CYGWIN__)) +#if !defined(OPENSTEP) && (defined(__WIN32__) || defined(__CYGWIN__)) # pragma warning( disable : 4068 ) /* unknown pragma */ # pragma warning( disable : 4710 ) /* function 'foo' not inlined */ # pragma warning( disable : 4711 ) /* function 'foo' selected for automatic inline expansion */ @@ -134,13 +133,13 @@ /* compatability guard so we don't need to change client code */ -#if defined(_WIN32) && !defined(_WINDEF_) && !defined(_GNU_H_WINDOWS32_BASE) && !defined(OPENSTEP) && !defined(__CYGWIN__) +#if defined(_WIN32) && !defined(_WINDEF_) && !defined(_GNU_H_WINDOWS32_BASE) && !defined(OPENSTEP) #if 0 # define CALLBACK GLCALLBACK -#endif -typedef int (GLAPIENTRY *PROC)(); typedef void *HGLRC; typedef void *HDC; +#endif +typedef int (GLAPIENTRY *PROC)(); typedef unsigned long COLORREF; #endif @@ -164,10 +163,8 @@ typedef unsigned long COLORREF; typedef struct tagLAYERPLANEDESCRIPTOR LAYERPLANEDESCRIPTOR, *PLAYERPLANEDESCRIPTOR, *LPLAYERPLANEDESCRIPTOR; typedef struct _GLYPHMETRICSFLOAT GLYPHMETRICSFLOAT, *PGLYPHMETRICSFLOAT, *LPGLYPHMETRICSFLOAT; typedef struct tagPIXELFORMATDESCRIPTOR PIXELFORMATDESCRIPTOR, *PPIXELFORMATDESCRIPTOR, *LPPIXELFORMATDESCRIPTOR; -#if 0 #include <gl/mesa_wgl.h> #endif -#endif diff --git a/xc/extras/Mesa/src/image.c b/xc/extras/Mesa/src/image.c index f1781326d..8053558f9 100644 --- a/xc/extras/Mesa/src/image.c +++ b/xc/extras/Mesa/src/image.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/image.c,v 1.7 2000/09/26 15:56:32 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -629,7 +629,7 @@ _mesa_pack_rgba_span( GLcontext *ctx, } else if (!applyTransferOps && format == GL_RGB && type == GL_UNSIGNED_BYTE) { /* common simple case */ - GLint i; + GLuint i; GLubyte *dest = (GLubyte *) destination; for (i = 0; i < n; i++) { dest[0] = srcRgba[i][RCOMP]; @@ -1276,7 +1276,7 @@ _mesa_pack_rgba_span( GLcontext *ctx, } break; case GL_UNSIGNED_SHORT_4_4_4_4: - if (format == GL_RGB) { + if (format == GL_RGBA) { GLushort *dst = (GLushort *) destination; for (i=0;i<n;i++) { dst[i] = (((GLint) (rgba[i][RCOMP] * 15.0F)) << 12) @@ -1285,9 +1285,27 @@ _mesa_pack_rgba_span( GLcontext *ctx, | (((GLint) (rgba[i][ACOMP] * 15.0F)) ); } } + else if (format == GL_BGRA) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][BCOMP] * 15.0F)) << 12) + | (((GLint) (rgba[i][GCOMP] * 15.0F)) << 8) + | (((GLint) (rgba[i][RCOMP] * 15.0F)) << 4) + | (((GLint) (rgba[i][ACOMP] * 15.0F)) ); + } + } + else if (format == GL_ABGR_EXT) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][ACOMP] * 15.0F)) << 4) + | (((GLint) (rgba[i][BCOMP] * 15.0F)) << 8) + | (((GLint) (rgba[i][GCOMP] * 15.0F)) << 12) + | (((GLint) (rgba[i][RCOMP] * 15.0F)) ); + } + } break; case GL_UNSIGNED_SHORT_4_4_4_4_REV: - if (format == GL_RGB) { + if (format == GL_RGBA) { GLushort *dst = (GLushort *) destination; for (i=0;i<n;i++) { dst[i] = (((GLint) (rgba[i][RCOMP] * 15.0F)) ) @@ -1296,9 +1314,27 @@ _mesa_pack_rgba_span( GLcontext *ctx, | (((GLint) (rgba[i][ACOMP] * 15.0F)) << 12); } } + else if (format == GL_BGRA) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][BCOMP] * 15.0F)) ) + | (((GLint) (rgba[i][GCOMP] * 15.0F)) << 4) + | (((GLint) (rgba[i][RCOMP] * 15.0F)) << 8) + | (((GLint) (rgba[i][ACOMP] * 15.0F)) << 12); + } + } + else if (format == GL_ABGR_EXT) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][ACOMP] * 15.0F)) ) + | (((GLint) (rgba[i][BCOMP] * 15.0F)) << 4) + | (((GLint) (rgba[i][GCOMP] * 15.0F)) << 8) + | (((GLint) (rgba[i][RCOMP] * 15.0F)) << 12); + } + } break; case GL_UNSIGNED_SHORT_5_5_5_1: - if (format == GL_RGB) { + if (format == GL_RGBA) { GLushort *dst = (GLushort *) destination; for (i=0;i<n;i++) { dst[i] = (((GLint) (rgba[i][RCOMP] * 31.0F)) << 11) @@ -1307,9 +1343,27 @@ _mesa_pack_rgba_span( GLcontext *ctx, | (((GLint) (rgba[i][ACOMP] * 1.0F)) ); } } + else if (format == GL_BGRA) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][BCOMP] * 31.0F)) << 11) + | (((GLint) (rgba[i][GCOMP] * 31.0F)) << 6) + | (((GLint) (rgba[i][RCOMP] * 31.0F)) << 1) + | (((GLint) (rgba[i][ACOMP] * 1.0F)) ); + } + } + else if (format == GL_ABGR_EXT) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][ACOMP] * 31.0F)) << 11) + | (((GLint) (rgba[i][BCOMP] * 31.0F)) << 6) + | (((GLint) (rgba[i][GCOMP] * 31.0F)) << 1) + | (((GLint) (rgba[i][RCOMP] * 1.0F)) ); + } + } break; case GL_UNSIGNED_SHORT_1_5_5_5_REV: - if (format == GL_RGB) { + if (format == GL_RGBA) { GLushort *dst = (GLushort *) destination; for (i=0;i<n;i++) { dst[i] = (((GLint) (rgba[i][RCOMP] * 31.0F)) ) @@ -1318,6 +1372,24 @@ _mesa_pack_rgba_span( GLcontext *ctx, | (((GLint) (rgba[i][ACOMP] * 1.0F)) << 15); } } + else if (format == GL_BGRA) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][BCOMP] * 31.0F)) ) + | (((GLint) (rgba[i][GCOMP] * 31.0F)) << 5) + | (((GLint) (rgba[i][RCOMP] * 31.0F)) << 10) + | (((GLint) (rgba[i][ACOMP] * 1.0F)) << 15); + } + } + else if (format == GL_ABGR_EXT) { + GLushort *dst = (GLushort *) destination; + for (i=0;i<n;i++) { + dst[i] = (((GLint) (rgba[i][ACOMP] * 31.0F)) ) + | (((GLint) (rgba[i][BCOMP] * 31.0F)) << 5) + | (((GLint) (rgba[i][GCOMP] * 31.0F)) << 10) + | (((GLint) (rgba[i][RCOMP] * 1.0F)) << 15); + } + } break; case GL_UNSIGNED_INT_8_8_8_8: if (format == GL_RGBA) { @@ -2280,7 +2352,7 @@ _mesa_unpack_ubyte_color_span( GLcontext *ctx, unpacking); if (applyTransferOps) { - if (ctx->Pixel.MapColorFlag) { + if (dstFormat == GL_COLOR_INDEX && ctx->Pixel.MapColorFlag) { _mesa_map_ci(ctx, n, indexes); } if (ctx->Pixel.IndexShift || ctx->Pixel.IndexOffset) { @@ -2549,7 +2621,7 @@ _mesa_unpack_float_color_span( GLcontext *ctx, unpacking); if (applyTransferOps) { - if (ctx->Pixel.MapColorFlag) { + if (dstFormat == GL_COLOR_INDEX && ctx->Pixel.MapColorFlag) { _mesa_map_ci(ctx, n, indexes); } if (ctx->Pixel.IndexShift || ctx->Pixel.IndexOffset) { @@ -2943,7 +3015,7 @@ _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, GLdepth *dest, const struct gl_pixelstore_attrib *unpacking, GLboolean applyTransferOps ) { - GLfloat *depth = MALLOC(n * sizeof(GLfloat)); + GLfloat *depth = (GLfloat *) MALLOC(n * sizeof(GLfloat)); if (!depth) return; @@ -3074,7 +3146,7 @@ _mesa_unpack_image( GLsizei width, GLsizei height, GLsizei depth, } { - GLubyte *destBuffer = MALLOC(bytesPerRow * height * depth); + GLubyte *destBuffer = (GLubyte *) MALLOC(bytesPerRow * height * depth); GLubyte *dst; GLint img, row; if (!destBuffer) @@ -3128,9 +3200,8 @@ _mesa_unpack_bitmap( GLint width, GLint height, const GLubyte *pixels, width_in_bytes = CEILING( width, 8 ); dst = buffer; for (row = 0; row < height; row++) { - GLubyte *src = _mesa_image_address( packing, pixels, width, height, - GL_COLOR_INDEX, GL_BITMAP, - 0, row, 0 ); + GLubyte *src = (GLubyte *) _mesa_image_address(packing, pixels, width, + height, GL_COLOR_INDEX, GL_BITMAP, 0, row, 0); if (!src) { FREE(buffer); return NULL; @@ -3223,7 +3294,7 @@ _mesa_pack_bitmap( GLint width, GLint height, const GLubyte *source, width_in_bytes = CEILING( width, 8 ); src = source; for (row = 0; row < height; row++) { - GLubyte *dst = _mesa_image_address( packing, dest, width, height, + GLubyte *dst = (GLubyte *) _mesa_image_address( packing, dest, width, height, GL_COLOR_INDEX, GL_BITMAP, 0, row, 0 ); if (!dst) diff --git a/xc/extras/Mesa/src/imaging.c b/xc/extras/Mesa/src/imaging.c index 5d2250cbc..27fb58a6a 100644 --- a/xc/extras/Mesa/src/imaging.c +++ b/xc/extras/Mesa/src/imaging.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/imaging.c,v 1.6 2000/09/26 15:56:32 tsi Exp $ */ + /* * Histogram, Min/max and convolution for GL_ARB_imaging subset @@ -210,7 +210,7 @@ pack_minmax( GLcontext *ctx, CONST GLfloat minmax[2][4], const struct gl_pixelstore_attrib *packing ) { const GLint comps = _mesa_components_in_format(format); - GLuint luminance[2]; + GLfloat luminance[2]; if (format == GL_LUMINANCE || format == GL_LUMINANCE_ALPHA) { luminance[0] = minmax[0][RCOMP] + minmax[0][GCOMP] + minmax[0][BCOMP]; diff --git a/xc/extras/Mesa/src/light.c b/xc/extras/Mesa/src/light.c index 9d73e6814..138aefb08 100644 --- a/xc/extras/Mesa/src/light.c +++ b/xc/extras/Mesa/src/light.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/light.c,v 1.5 2000/09/26 15:56:32 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/light.h b/xc/extras/Mesa/src/light.h index 1a4fa4174..8edcac801 100644 --- a/xc/extras/Mesa/src/light.h +++ b/xc/extras/Mesa/src/light.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/light.h,v 1.5 2000/09/26 15:56:32 tsi Exp $ */ + #ifndef LIGHT_H #define LIGHT_H diff --git a/xc/extras/Mesa/src/lines.c b/xc/extras/Mesa/src/lines.c index 24b9d5feb..ffbbad416 100644 --- a/xc/extras/Mesa/src/lines.c +++ b/xc/extras/Mesa/src/lines.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/lines.c,v 1.6 2000/09/26 15:56:32 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -1057,7 +1057,7 @@ _mesa_print_line_function(GLcontext *ctx) else if (ctx->Driver.LineFunc == null_line) printf("null_line\n"); else - printf("Driver func %p\n", ctx->Driver.PointsFunc); + printf("Driver func %p\n", ctx->Driver.LineFunc); } #endif diff --git a/xc/extras/Mesa/src/linetemp.h b/xc/extras/Mesa/src/linetemp.h index ab0f6657e..2c4eac4b0 100644 --- a/xc/extras/Mesa/src/linetemp.h +++ b/xc/extras/Mesa/src/linetemp.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/linetemp.h,v 1.9 2000/09/24 13:50:12 alanh Exp $ */ + /* * Line Rasterizer Template @@ -227,8 +227,8 @@ z1 = FloatToFixed(VB->Win.data[vert1][2] + ctx->LineZoffset); } else { - z0 = (int) VB->Win.data[vert0][2] + ctx->LineZoffset; - z1 = (int) VB->Win.data[vert1][2] + ctx->LineZoffset; + z0 = (GLint) (VB->Win.data[vert0][2] + ctx->LineZoffset); + z1 = (GLint) (VB->Win.data[vert1][2] + ctx->LineZoffset); } #endif #ifdef PIXEL_ADDRESS diff --git a/xc/extras/Mesa/src/lnaatemp.h b/xc/extras/Mesa/src/lnaatemp.h index 872fe09b9..9cde169e3 100644 --- a/xc/extras/Mesa/src/lnaatemp.h +++ b/xc/extras/Mesa/src/lnaatemp.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/lnaatemp.h,v 1.11 2000/09/24 13:50:12 alanh Exp $ */ /* * Antialiased Line Rasterizer Template @@ -107,8 +106,8 @@ z1 = FloatToFixed(VB->Win.data[vert1][2] + ctx->LineZoffset); } else { - z0 = (int) VB->Win.data[vert0][2] + ctx->LineZoffset; - z1 = (int) VB->Win.data[vert1][2] + ctx->LineZoffset; + z0 = (GLint) (VB->Win.data[vert0][2] + ctx->LineZoffset); + z1 = (GLint) (VB->Win.data[vert1][2] + ctx->LineZoffset); } #ifdef INTERP_STUV0 diff --git a/xc/extras/Mesa/src/logic.c b/xc/extras/Mesa/src/logic.c index 2a4bd3aed..872cb48d6 100644 --- a/xc/extras/Mesa/src/logic.c +++ b/xc/extras/Mesa/src/logic.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/logic.c,v 1.6 2000/09/26 15:56:32 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/matrix.c b/xc/extras/Mesa/src/matrix.c index 3dcc61ad9..7fc3e9404 100644 --- a/xc/extras/Mesa/src/matrix.c +++ b/xc/extras/Mesa/src/matrix.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/matrix.c,v 1.7 2000/09/26 15:56:32 tsi Exp $ */ + /* * Matrix operations @@ -763,18 +763,16 @@ static void analyze_from_scratch( GLmatrix *mat ) /* Do the real work */ - if (mask == MASK_IDENTITY) { + if (mask == (GLuint) MASK_IDENTITY) { mat->type = MATRIX_IDENTITY; } - else if ((mask & MASK_2D_NO_ROT) == MASK_2D_NO_ROT) - { + else if ((mask & MASK_2D_NO_ROT) == (GLuint) MASK_2D_NO_ROT) { mat->type = MATRIX_2D_NO_ROT; if ((mask & MASK_NO_2D_SCALE) != MASK_NO_2D_SCALE) mat->flags = MAT_FLAG_GENERAL_SCALE; } - else if ((mask & MASK_2D) == MASK_2D) - { + else if ((mask & MASK_2D) == (GLuint) MASK_2D) { GLfloat mm = DOT2(m, m); GLfloat m4m4 = DOT2(m+4,m+4); GLfloat mm4 = DOT2(m,m+4); @@ -793,8 +791,7 @@ static void analyze_from_scratch( GLmatrix *mat ) mat->flags |= MAT_FLAG_ROTATION; } - else if ((mask & MASK_3D_NO_ROT) == MASK_3D_NO_ROT) - { + else if ((mask & MASK_3D_NO_ROT) == (GLuint) MASK_3D_NO_ROT) { mat->type = MATRIX_3D_NO_ROT; /* Check for scale */ @@ -805,8 +802,7 @@ static void analyze_from_scratch( GLmatrix *mat ) } else mat->flags |= MAT_FLAG_GENERAL_SCALE; } - else if ((mask & MASK_3D) == MASK_3D) - { + else if ((mask & MASK_3D) == (GLuint) MASK_3D) { GLfloat c1 = DOT3(m,m); GLfloat c2 = DOT3(m+4,m+4); GLfloat c3 = DOT3(m+8,m+8); diff --git a/xc/extras/Mesa/src/matrix.h b/xc/extras/Mesa/src/matrix.h index 2da7687c9..6d989abd1 100644 --- a/xc/extras/Mesa/src/matrix.h +++ b/xc/extras/Mesa/src/matrix.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/matrix.h,v 1.5 2000/09/26 15:56:32 tsi Exp $ */ + #ifndef MATRIX_H #define MATRIX_H diff --git a/xc/extras/Mesa/src/mem.c b/xc/extras/Mesa/src/mem.c index 9e6a5c2b1..9334b63c4 100644 --- a/xc/extras/Mesa/src/mem.c +++ b/xc/extras/Mesa/src/mem.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/mem.c,v 1.4 2000/09/26 15:56:32 tsi Exp $ */ + /* * Memory allocation functions. Called via the MALLOC, CALLOC and diff --git a/xc/extras/Mesa/src/mem.h b/xc/extras/Mesa/src/mem.h index e9b1f3136..8935cb1aa 100644 --- a/xc/extras/Mesa/src/mem.h +++ b/xc/extras/Mesa/src/mem.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/mem.h,v 1.5 2000/09/26 15:56:32 tsi Exp $ */ + #ifndef MEM_H #define MEM_H diff --git a/xc/extras/Mesa/src/mmath.h b/xc/extras/Mesa/src/mmath.h index dae66eac6..fc34fcb08 100644 --- a/xc/extras/Mesa/src/mmath.h +++ b/xc/extras/Mesa/src/mmath.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/mmath.h,v 1.7 2000/09/24 13:50:14 alanh Exp $ */ + /* * Faster arithmetic functions. If the FAST_MATH preprocessor symbol is @@ -158,7 +158,7 @@ static __inline__ int FloatToInt(float f) __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); return r; } -#elif defined(__MSC__) && defined(__WIN32__) && !defined(__CYGWIN__) +#elif defined(__MSC__) && defined(__WIN32__) static __inline int FloatToInt(float f) { int r; diff --git a/xc/extras/Mesa/src/pb.c b/xc/extras/Mesa/src/pb.c index 454882d5b..8cc67e0ea 100644 --- a/xc/extras/Mesa/src/pb.c +++ b/xc/extras/Mesa/src/pb.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/pb.c,v 1.6 2000/09/26 15:56:32 tsi Exp $ */ + /* @@ -181,10 +181,10 @@ static void multi_write_rgba_pixels( GLcontext *ctx, GLuint n, } (*ctx->Driver.WriteRGBAPixels)( ctx, n, x, y, - (const GLubyte (*)[4])rgbaTmp, mask ); + (CONST GLubyte (*)[4])rgbaTmp, mask ); if (ctx->RasterMask & ALPHABUF_BIT) { _mesa_write_alpha_pixels( ctx, n, x, y, - (const GLubyte (*)[4])rgbaTmp, mask ); + (CONST GLubyte (*)[4])rgbaTmp, mask ); } } } @@ -271,7 +271,7 @@ void gl_flush_pb( GLcontext *ctx ) if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR && ctx->Light.Enabled && ctx->Texture.ReallyEnabled) { /* add specular color to primary color */ - add_colors( PB->count, PB->rgba, (const GLubyte (*)[3]) PB->spec ); + add_colors( PB->count, PB->rgba, (CONST GLubyte (*)[3]) PB->spec ); } if (ctx->Fog.Enabled @@ -284,7 +284,7 @@ void gl_flush_pb( GLcontext *ctx ) if (ctx->Color.AlphaEnabled) { if (_mesa_alpha_test( ctx, PB->count, - (const GLubyte (*)[4]) PB->rgba, mask )==0) { + (CONST GLubyte (*)[4]) PB->rgba, mask )==0) { goto CleanUp; } } @@ -304,7 +304,7 @@ void gl_flush_pb( GLcontext *ctx ) if (ctx->RasterMask & MULTI_DRAW_BIT) { multi_write_rgba_pixels( ctx, PB->count, PB->x, PB->y, - (const GLubyte (*)[4])PB->rgba, mask ); + (CONST GLubyte (*)[4])PB->rgba, mask ); } else { /* normal case: write to exactly one buffer */ @@ -321,11 +321,11 @@ void gl_flush_pb( GLcontext *ctx ) } (*ctx->Driver.WriteRGBAPixels)( ctx, PB->count, PB->x, PB->y, - (const GLubyte (*)[4]) PB->rgba, + (CONST GLubyte (*)[4]) PB->rgba, mask ); if (ctx->RasterMask & ALPHABUF_BIT) { _mesa_write_alpha_pixels( ctx, PB->count, PB->x, PB->y, - (const GLubyte (*)[4]) PB->rgba, mask ); + (CONST GLubyte (*)[4]) PB->rgba, mask ); } } } @@ -336,7 +336,7 @@ void gl_flush_pb( GLcontext *ctx ) if (ctx->Color.AlphaEnabled) { if (_mesa_alpha_test( ctx, PB->count, - (const GLubyte (*)[4]) PB->rgba, mask )==0) { + (CONST GLubyte (*)[4]) PB->rgba, mask )==0) { goto CleanUp; } } @@ -360,7 +360,7 @@ void gl_flush_pb( GLcontext *ctx ) if (ctx->RasterMask & MULTI_DRAW_BIT) { /* Copy mono color to all pixels */ multi_write_rgba_pixels( ctx, PB->count, PB->x, PB->y, - (const GLubyte (*)[4]) PB->rgba, mask ); + (CONST GLubyte (*)[4]) PB->rgba, mask ); } else { /* normal case: write to exactly one buffer */ diff --git a/xc/extras/Mesa/src/pipeline.c b/xc/extras/Mesa/src/pipeline.c index f1b4375f9..0a469894d 100644 --- a/xc/extras/Mesa/src/pipeline.c +++ b/xc/extras/Mesa/src/pipeline.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -414,7 +414,7 @@ void gl_build_immediate_pipeline( GLcontext *ctx ) gl_print_pipeline( ctx, elt ); } -#define INTERESTED ~(NEW_DRIVER_STATE|NEW_CLIENT_STATE|NEW_TEXTURE_ENABLE) +#define INTERESTED ~(NEW_DRIVER_STATE|NEW_CLIENT_STATE) void gl_update_pipelines( GLcontext *ctx ) { @@ -438,7 +438,7 @@ void gl_update_pipelines( GLcontext *ctx ) else flags |= VERT_INDEX; - if (ctx->Texture.Enabled & 0xf) { + if (ctx->Texture.ReallyEnabled & 0xf) { /* XXX this should also check that the texture is RGBA. What about Unit[1]? if (ctx->Texture.Unit[0].EnvMode == GL_REPLACE) flags &= ~VERT_RGBA; @@ -446,7 +446,7 @@ void gl_update_pipelines( GLcontext *ctx ) flags |= VERT_TEX0_ANY; } - if (ctx->Texture.Enabled & 0xf0) + if (ctx->Texture.ReallyEnabled & 0xf0) flags |= VERT_TEX1_ANY; if (ctx->Polygon.Unfilled) @@ -489,7 +489,7 @@ void gl_run_pipeline( struct vertex_buffer *VB ) START_FAST_MATH(x); - for ( VB->Culled = 0; *stages && !VB->Culled ; stages++ ) + for ( VB->Culled = 0; *stages && !VB->Culled ; stages++ ) (*stages)->run( VB ); END_FAST_MATH(x); @@ -524,7 +524,7 @@ void gl_print_vert_flags( const char *name, GLuint flags ) void gl_print_tri_caps( const char *name, GLuint flags ) { fprintf(stderr, - "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", name, flags, (flags & DD_FEEDBACK) ? "feedback, " : "", @@ -548,7 +548,9 @@ void gl_print_tri_caps( const char *name, GLuint flags ) (flags & DD_LINE_SW_RASTERIZE) ? "sw-lines, " : "", (flags & DD_TRI_SW_RASTERIZE) ? "sw-tris, " : "", (flags & DD_QUAD_SW_RASTERIZE) ? "sw-quads, " : "", - (flags & DD_TRI_CULL_FRONT_BACK) ? "cull-all, " : "" + (flags & DD_TRI_CULL_FRONT_BACK) ? "cull-all, " : "", + (flags & DD_STENCIL) ? "stencil, " : "", + (flags & DD_CLIP_FOG_COORD) ? "clip-fog-coord, " : "" ); } diff --git a/xc/extras/Mesa/src/pixel.c b/xc/extras/Mesa/src/pixel.c index 17c6eb6bb..7725f8ddb 100644 --- a/xc/extras/Mesa/src/pixel.c +++ b/xc/extras/Mesa/src/pixel.c @@ -104,6 +104,14 @@ _mesa_PixelStorei( GLenum pname, GLint param ) ctx->Pack.SkipRows = param; } break; + case GL_PACK_SKIP_IMAGES: + if (param<0) { + gl_error( ctx, GL_INVALID_VALUE, "glPixelStore(param)" ); + } + else { + ctx->Pack.SkipImages = param; + } + break; case GL_PACK_ALIGNMENT: if (param==1 || param==2 || param==4 || param==8) { ctx->Pack.Alignment = param; @@ -148,6 +156,14 @@ _mesa_PixelStorei( GLenum pname, GLint param ) ctx->Unpack.SkipRows = param; } break; + case GL_UNPACK_SKIP_IMAGES: + if (param < 0) { + gl_error( ctx, GL_INVALID_VALUE, "glPixelStore(param)" ); + } + else { + ctx->Unpack.SkipImages = param; + } + break; case GL_UNPACK_ALIGNMENT: if (param==1 || param==2 || param==4 || param==8) { ctx->Unpack.Alignment = param; diff --git a/xc/extras/Mesa/src/points.c b/xc/extras/Mesa/src/points.c index cd73dd1c3..c912d20df 100644 --- a/xc/extras/Mesa/src/points.c +++ b/xc/extras/Mesa/src/points.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/points.c,v 1.6 2000/09/26 15:56:32 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -160,7 +160,7 @@ size1_ci_points( GLcontext *ctx, GLuint first, GLuint last ) GLuint i; win = &VB->Win.data[first][0]; - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { pbx[pbcount] = (GLint) win[0]; pby[pbcount] = (GLint) win[1]; @@ -186,7 +186,7 @@ size1_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) struct pixel_buffer *PB = ctx->PB; GLuint i; - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { GLint x, y, z; GLint red, green, blue, alpha; @@ -220,7 +220,7 @@ general_ci_points( GLcontext *ctx, GLuint first, GLuint last ) GLint radius = isize >> 1; GLuint i; - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { GLint x0, x1, y0, y1; GLint ix, iy; @@ -269,7 +269,7 @@ general_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) GLint radius = isize >> 1; GLuint i; - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { GLint x0, x1, y0, y1; GLint ix, iy; @@ -322,7 +322,7 @@ textured_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) struct pixel_buffer *PB = ctx->PB; GLuint i; - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { GLint x0, x1, y0, y1; GLint ix, iy, radius; @@ -409,7 +409,7 @@ multitextured_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) struct pixel_buffer *PB = ctx->PB; GLuint i; - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { GLint x0, x1, y0, y1; GLint ix, iy; @@ -550,7 +550,7 @@ antialiased_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) GLuint i; if (ctx->Texture.ReallyEnabled) { - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { GLint x, y; GLint red, green, blue, alpha; @@ -666,7 +666,7 @@ antialiased_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) } else { /* Not texture mapped */ - for (i=first;i<=last;i++) { + for (i=first;i<last;i++) { if (VB->ClipMask[i]==0) { GLint xmin, ymin, xmax, ymax; GLint x, y, z; @@ -737,7 +737,7 @@ dist3(GLfloat *out, GLuint first, GLuint last, const GLfloat *p = VEC_ELT(v, GLfloat, first); GLuint i; - for (i = first ; i <= last ; i++, STRIDE_F(p, stride) ) { + for (i = first ; i < last ; i++, STRIDE_F(p, stride) ) { GLfloat dist = GL_SQRT(p[0]*p[0]+p[1]*p[1]+p[2]*p[2]); out[i] = 1.0F / (ctx->Point.Params[0] + dist * (ctx->Point.Params[1] + @@ -754,7 +754,7 @@ dist2(GLfloat *out, GLuint first, GLuint last, const GLfloat *p = VEC_ELT(v, GLfloat, first); GLuint i; - for (i = first ; i <= last ; i++, STRIDE_F(p, stride) ) { + for (i = first ; i < last ; i++, STRIDE_F(p, stride) ) { GLfloat dist = GL_SQRT(p[0]*p[0]+p[1]*p[1]); out[i] = 1.0F / (ctx->Point.Params[0] + dist * (ctx->Point.Params[1] + @@ -794,7 +794,7 @@ clip_dist(GLfloat *out, GLuint first, GLuint last, const GLfloat *from = (GLfloat *)clip_vec->start; const GLuint stride = clip_vec->stride; - for (i = first ; i <= last ; i++ ) + for (i = first ; i < last ; i++ ) { GLfloat dist = win[i][2]; out[i] = 1/(ctx->Point.Params[0]+ @@ -823,7 +823,7 @@ dist_atten_general_ci_points( GLcontext *ctx, GLuint first, GLuint last ) else clip_dist( dist, first, last, ctx, VB->ClipPtr ); - for (i=first;i<=last;i++) { + for (i=first;i<last;i++) { if (VB->ClipMask[i]==0) { GLint x0, x1, y0, y1; GLint ix, iy; @@ -885,7 +885,7 @@ dist_atten_general_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) else clip_dist( dist, first, last, ctx, VB->ClipPtr ); - for (i=first;i<=last;i++) { + for (i=first;i<last;i++) { if (VB->ClipMask[i]==0) { GLint x0, x1, y0, y1; GLint ix, iy; @@ -955,7 +955,7 @@ dist_atten_textured_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) else clip_dist( dist, first, last, ctx, VB->ClipPtr ); - for (i=first;i<=last;i++) { + for (i=first;i<last;i++) { if (VB->ClipMask[i]==0) { GLint x0, x1, y0, y1; GLint ix, iy; @@ -1103,7 +1103,7 @@ dist_atten_antialiased_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) clip_dist( dist, first, last, ctx, VB->ClipPtr ); if (ctx->Texture.ReallyEnabled) { - for (i=first;i<=last;i++) { + for (i=first;i<last;i++) { if (VB->ClipMask[i]==0) { GLfloat radius, rmin, rmax, rmin2, rmax2, cscale, alphaf; GLint xmin, ymin, xmax, ymax; @@ -1231,7 +1231,7 @@ dist_atten_antialiased_rgba_points( GLcontext *ctx, GLuint first, GLuint last ) } else { /* Not texture mapped */ - for (i = first; i <= last; i++) { + for (i = first; i < last; i++) { if (VB->ClipMask[i] == 0) { GLfloat radius, rmin, rmax, rmin2, rmax2, cscale, alphaf; GLint xmin, ymin, xmax, ymax; diff --git a/xc/extras/Mesa/src/readpix.c b/xc/extras/Mesa/src/readpix.c index 4e5e11985..96a3f11b2 100644 --- a/xc/extras/Mesa/src/readpix.c +++ b/xc/extras/Mesa/src/readpix.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/readpix.c,v 1.6 2000/09/26 15:56:33 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -655,7 +655,7 @@ static void read_rgba_pixels( GLcontext *ctx, dest = _mesa_image_address( packing, pixels, width, height, format, type, 0, j, 0); - _mesa_pack_rgba_span( ctx, readWidth, (const GLubyte (*)[4]) rgba, + _mesa_pack_rgba_span( ctx, readWidth, (CONST GLubyte (*)[4]) rgba, format, type, dest, packing, GL_TRUE ); } } @@ -678,7 +678,7 @@ static void read_rgba_pixels( GLcontext *ctx, dest = _mesa_image_address( packing, pixels, width, height, format, type, 0, j, 0); - _mesa_pack_rgba_span( ctx, readWidth, (const GLubyte (*)[4]) rgba, + _mesa_pack_rgba_span( ctx, readWidth, (CONST GLubyte (*)[4]) rgba, format, type, dest, packing, GL_TRUE ); } } @@ -705,6 +705,8 @@ _mesa_ReadPixels( GLint x, GLint y, GLsizei width, GLsizei height, format, type, &ctx->Pack, pixels)) return; + RENDER_START(ctx); + switch (format) { case GL_COLOR_INDEX: read_index_pixels(ctx, x, y, width, height, type, pixels, &ctx->Pack); @@ -732,4 +734,6 @@ _mesa_ReadPixels( GLint x, GLint y, GLsizei width, GLsizei height, default: gl_error( ctx, GL_INVALID_ENUM, "glReadPixels(format)" ); } + + RENDER_FINISH(ctx); } diff --git a/xc/extras/Mesa/src/rect.c b/xc/extras/Mesa/src/rect.c index 5a54a072b..502498c01 100644 --- a/xc/extras/Mesa/src/rect.c +++ b/xc/extras/Mesa/src/rect.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/rect.c,v 1.5 2000/09/26 15:56:33 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/render_tmp.h b/xc/extras/Mesa/src/render_tmp.h index b7a23b056..bcc60d42b 100644 --- a/xc/extras/Mesa/src/render_tmp.h +++ b/xc/extras/Mesa/src/render_tmp.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/render_tmp.h,v 1.6 2000/09/26 15:56:33 tsi Exp $ */ /* * New (3.1) transformation code written by Keith Whitwell. diff --git a/xc/extras/Mesa/src/shade.c b/xc/extras/Mesa/src/shade.c index 0d794dcdf..e2516ece7 100644 --- a/xc/extras/Mesa/src/shade.c +++ b/xc/extras/Mesa/src/shade.c @@ -22,7 +22,8 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/shade.c,v 1.5 2000/09/26 15:56:33 tsi Exp $ */ + + #ifdef PC_HEADER #include "all.h" @@ -203,7 +204,7 @@ void gl_update_lighting_function( GLcontext *ctx ) if (ctx->Visual->RGBAflag) { if (ctx->Light.NeedVertices) { - if (ctx->Texture.Enabled && + if (ctx->Texture.ReallyEnabled && ctx->Light.Model.ColorControl==GL_SEPARATE_SPECULAR_COLOR) tab = gl_shade_spec_tab; else diff --git a/xc/extras/Mesa/src/shade_tmp.h b/xc/extras/Mesa/src/shade_tmp.h index 72a7f2d82..f6ec08cb7 100644 --- a/xc/extras/Mesa/src/shade_tmp.h +++ b/xc/extras/Mesa/src/shade_tmp.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/shade_tmp.h,v 1.6 2000/09/26 15:56:33 tsi Exp $ */ /* * New (3.1) transformation code written by Keith Whitwell. diff --git a/xc/extras/Mesa/src/span.c b/xc/extras/Mesa/src/span.c index 3a3ba0633..a86489f03 100644 --- a/xc/extras/Mesa/src/span.c +++ b/xc/extras/Mesa/src/span.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/span.c,v 1.7 2000/09/26 15:56:33 tsi Exp $ */ + /* * pixel span rasterization: @@ -407,10 +407,10 @@ static void multi_write_rgba_span( GLcontext *ctx, GLuint n, } (*ctx->Driver.WriteRGBASpan)( ctx, n, x, y, - (const GLubyte (*)[4]) rgbaTmp, mask ); + (CONST GLubyte (*)[4]) rgbaTmp, mask ); if (ctx->RasterMask & ALPHABUF_BIT) { _mesa_write_alpha_span( ctx, n, x, y, - (const GLubyte (*)[4])rgbaTmp, mask ); + (CONST GLubyte (*)[4])rgbaTmp, mask ); } } } @@ -475,7 +475,7 @@ void gl_write_rgba_span( GLcontext *ctx, /* Do the alpha test */ if (ctx->Color.AlphaEnabled) { - if (_mesa_alpha_test( ctx, n, (const GLubyte (*)[4]) rgba, mask )==0) { + if (_mesa_alpha_test( ctx, n, (CONST GLubyte (*)[4]) rgba, mask )==0) { return; } write_all = GL_FALSE; @@ -503,7 +503,7 @@ void gl_write_rgba_span( GLcontext *ctx, ctx->OcclusionResult = GL_TRUE; if (ctx->RasterMask & MULTI_DRAW_BIT) { - multi_write_rgba_span( ctx, n, x, y, (const GLubyte (*)[4]) rgba, mask ); + multi_write_rgba_span( ctx, n, x, y, (CONST GLubyte (*)[4]) rgba, mask ); } else { /* normal: write to exactly one buffer */ @@ -524,12 +524,12 @@ void gl_write_rgba_span( GLcontext *ctx, /* write pixels */ (*ctx->Driver.WriteRGBASpan)( ctx, n, x, y, - (const GLubyte (*)[4]) rgba, + (CONST GLubyte (*)[4]) rgba, write_all ? Null : mask ); if (ctx->RasterMask & ALPHABUF_BIT) { _mesa_write_alpha_span( ctx, n, x, y, - (const GLubyte (*)[4]) rgba, + (CONST GLubyte (*)[4]) rgba, write_all ? Null : mask ); } @@ -588,7 +588,7 @@ void gl_write_monocolor_span( GLcontext *ctx, for (i=0;i<n;i++) { rgba[i][ACOMP] = color[ACOMP]; } - if (_mesa_alpha_test( ctx, n, (const GLubyte (*)[4])rgba, mask )==0) { + if (_mesa_alpha_test( ctx, n, (CONST GLubyte (*)[4])rgba, mask )==0) { return; } write_all = GL_FALSE; @@ -637,7 +637,7 @@ void gl_write_monocolor_span( GLcontext *ctx, if (ctx->RasterMask & MULTI_DRAW_BIT) { multi_write_rgba_span( ctx, n, x, y, - (const GLubyte (*)[4]) rgba, mask ); + (CONST GLubyte (*)[4]) rgba, mask ); } else { /* normal: write to exactly one buffer */ @@ -657,11 +657,11 @@ void gl_write_monocolor_span( GLcontext *ctx, /* write pixels */ (*ctx->Driver.WriteRGBASpan)( ctx, n, x, y, - (const GLubyte (*)[4]) rgba, + (CONST GLubyte (*)[4]) rgba, write_all ? Null : mask ); if (ctx->RasterMask & ALPHABUF_BIT) { _mesa_write_alpha_span( ctx, n, x, y, - (const GLubyte (*)[4]) rgba, + (CONST GLubyte (*)[4]) rgba, write_all ? Null : mask ); } } @@ -679,7 +679,7 @@ void gl_write_monocolor_span( GLcontext *ctx, } } multi_write_rgba_span( ctx, n, x, y, - (const GLubyte (*)[4]) rgba, mask ); + (CONST GLubyte (*)[4]) rgba, mask ); } else { (*ctx->Driver.WriteMonoRGBASpan)( ctx, n, x, y, mask ); @@ -787,7 +787,7 @@ void gl_write_texture_span( GLcontext *ctx, /* Do the alpha test */ if (ctx->Color.AlphaEnabled) { - if (_mesa_alpha_test( ctx, n, (const GLubyte (*)[4]) rgba, mask )==0) { + if (_mesa_alpha_test( ctx, n, (CONST GLubyte (*)[4]) rgba, mask )==0) { return; } write_all = GL_FALSE; @@ -815,7 +815,7 @@ void gl_write_texture_span( GLcontext *ctx, ctx->OcclusionResult = GL_TRUE; if (ctx->RasterMask & MULTI_DRAW_BIT) { - multi_write_rgba_span( ctx, n, x, y, (const GLubyte (*)[4]) rgba, mask ); + multi_write_rgba_span( ctx, n, x, y, (CONST GLubyte (*)[4]) rgba, mask ); } else { /* normal: write to exactly one buffer */ @@ -831,10 +831,10 @@ void gl_write_texture_span( GLcontext *ctx, _mesa_mask_rgba_span( ctx, n, x, y, rgba ); } - (*ctx->Driver.WriteRGBASpan)( ctx, n, x, y, (const GLubyte (*)[4])rgba, + (*ctx->Driver.WriteRGBASpan)( ctx, n, x, y, (CONST GLubyte (*)[4])rgba, write_all ? Null : mask ); if (ctx->RasterMask & ALPHABUF_BIT) { - _mesa_write_alpha_span( ctx, n, x, y, (const GLubyte (*)[4]) rgba, + _mesa_write_alpha_span( ctx, n, x, y, (CONST GLubyte (*)[4]) rgba, write_all ? Null : mask ); } } @@ -918,7 +918,7 @@ gl_write_multitexture_span( GLcontext *ctx, GLuint texUnits, /* Do the alpha test */ if (ctx->Color.AlphaEnabled) { - if (_mesa_alpha_test( ctx, n, (const GLubyte (*)[4])rgba, mask )==0) { + if (_mesa_alpha_test( ctx, n, (CONST GLubyte (*)[4])rgba, mask )==0) { return; } write_all = GL_FALSE; @@ -946,7 +946,7 @@ gl_write_multitexture_span( GLcontext *ctx, GLuint texUnits, ctx->OcclusionResult = GL_TRUE; if (ctx->RasterMask & MULTI_DRAW_BIT) { - multi_write_rgba_span( ctx, n, x, y, (const GLubyte (*)[4]) rgba, mask ); + multi_write_rgba_span( ctx, n, x, y, (CONST GLubyte (*)[4]) rgba, mask ); } else { /* normal: write to exactly one buffer */ @@ -963,9 +963,9 @@ gl_write_multitexture_span( GLcontext *ctx, GLuint texUnits, _mesa_mask_rgba_span( ctx, n, x, y, rgba ); } - (*ctx->Driver.WriteRGBASpan)( ctx, n, x, y, (const GLubyte (*)[4])rgba, write_all ? Null : mask ); + (*ctx->Driver.WriteRGBASpan)( ctx, n, x, y, (CONST GLubyte (*)[4])rgba, write_all ? Null : mask ); if (ctx->RasterMask & ALPHABUF_BIT) { - _mesa_write_alpha_span( ctx, n, x, y, (const GLubyte (*)[4])rgba, + _mesa_write_alpha_span( ctx, n, x, y, (CONST GLubyte (*)[4])rgba, write_all ? Null : mask ); } } diff --git a/xc/extras/Mesa/src/stages.c b/xc/extras/Mesa/src/stages.c index 859aadac5..adf614270 100644 --- a/xc/extras/Mesa/src/stages.c +++ b/xc/extras/Mesa/src/stages.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/stages.c,v 1.6 2000/09/26 15:56:33 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -457,7 +457,7 @@ static void do_lighting( struct vertex_buffer *VB ) /* Make sure we can talk about elements 0..2 in the vector we are * lighting. */ - if (VB->Unprojected->size == 2) { + if (VB->Unprojected && VB->Unprojected->size == 2) { if (VB->Unprojected->flags & VEC_WRITABLE) gl_vector4f_clean_elem(VB->Unprojected, VB->Count, 2); else @@ -774,8 +774,8 @@ CONST struct gl_pipeline_stage gl_default_pipeline[] = { PIPE_OP_TEX0, PIPE_PRECALC|PIPE_IMMEDIATE, 0, - NEW_TEXTURING|NEW_TEXTURE_MATRIX, - NEW_TEXTURING|NEW_TEXTURE_MATRIX, + NEW_TEXTURING|NEW_TEXTURE_MATRIX|NEW_TEXTURE_ENABLE, + NEW_TEXTURING|NEW_TEXTURE_MATRIX|NEW_TEXTURE_ENABLE, 0, 0, DYN_STATE, check_texture_0, @@ -785,8 +785,8 @@ CONST struct gl_pipeline_stage gl_default_pipeline[] = { PIPE_OP_TEX1, PIPE_PRECALC|PIPE_IMMEDIATE, 0, - NEW_TEXTURING|NEW_TEXTURE_MATRIX, - NEW_TEXTURING|NEW_TEXTURE_MATRIX, + NEW_TEXTURING|NEW_TEXTURE_MATRIX|NEW_TEXTURE_ENABLE, + NEW_TEXTURING|NEW_TEXTURE_MATRIX|NEW_TEXTURE_ENABLE, 0, 0, DYN_STATE, check_texture_1, diff --git a/xc/extras/Mesa/src/state.c b/xc/extras/Mesa/src/state.c index e111b18db..4c910aa5b 100644 --- a/xc/extras/Mesa/src/state.c +++ b/xc/extras/Mesa/src/state.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/state.c,v 1.5 2000/09/26 15:56:33 tsi Exp $ */ + /* * This file initializes the immediate-mode dispatch table (which may @@ -774,10 +774,10 @@ static void update_pixel_masking( GLcontext *ctx ) static void update_fog_mode( GLcontext *ctx ) { - int old_mode = ctx->FogMode; + const GLuint old_mode = ctx->FogMode; if (ctx->Fog.Enabled) { - if (ctx->Texture.Enabled) + if (ctx->Texture.ReallyEnabled) ctx->FogMode = FOG_FRAGMENT; else if (ctx->Hint.Fog == GL_NICEST) ctx->FogMode = FOG_FRAGMENT; @@ -921,8 +921,9 @@ void gl_update_state( GLcontext *ctx ) gl_update_client_state( ctx ); if ((ctx->NewState & NEW_TEXTURE_ENABLE) && - (ctx->Enabled & ENABLE_TEX_ANY) != ctx->Texture.Enabled) + (ctx->Enabled & ENABLE_TEX_ANY) != ctx->Texture.ReallyEnabled) { ctx->NewState |= NEW_TEXTURING | NEW_RASTER_OPS; + } if (ctx->NewState & NEW_TEXTURE_ENV) { if (ctx->Texture.Unit[0].EnvMode == ctx->Texture.Unit[0].LastEnvMode && @@ -932,21 +933,23 @@ void gl_update_state( GLcontext *ctx ) ctx->Texture.Unit[1].LastEnvMode = ctx->Texture.Unit[1].EnvMode; } - if (ctx->NewState & NEW_TEXTURE_MATRIX) { - ctx->Enabled &= ~(ENABLE_TEXMAT0|ENABLE_TEXMAT1); + /* Update ctx->Enabled's ENABLE_TEXMATn flags */ + if (ctx->NewState & (NEW_TEXTURE_MATRIX | NEW_TEXTURE_ENABLE)) { + ctx->Enabled &= ~(ENABLE_TEXMAT0 | ENABLE_TEXMAT1); for (i=0; i < MAX_TEXTURE_UNITS; i++) { if (ctx->TextureMatrix[i].flags & MAT_DIRTY_ALL_OVER) { gl_matrix_analyze( &ctx->TextureMatrix[i] ); ctx->TextureMatrix[i].flags &= ~MAT_DIRTY_DEPENDENTS; - - if (ctx->Texture.Unit[i].Enabled && - ctx->TextureMatrix[i].type != MATRIX_IDENTITY) - ctx->Enabled |= ENABLE_TEXMAT0 << i; } + if (ctx->Texture.Unit[i].Enabled && + ctx->TextureMatrix[i].type != MATRIX_IDENTITY) { + ctx->Enabled |= ENABLE_TEXMAT0 << i; + } } } + /* Update ctx->Enabled's ENABLE_TEXGENn and ENABLE_TEXn flags */ if (ctx->NewState & (NEW_TEXTURING | NEW_TEXTURE_ENABLE)) { ctx->Texture.NeedNormals = GL_FALSE; gl_update_dirty_texobjs(ctx); @@ -973,9 +976,11 @@ void gl_update_state( GLcontext *ctx ) } } } + else { + ctx->Texture.Unit[i].ReallyEnabled = 0; + } } - - ctx->Texture.Enabled = ctx->Enabled & ENABLE_TEX_ANY; + ctx->Enabled = (ctx->Enabled & ~ENABLE_TEX_ANY) | ctx->Texture.ReallyEnabled; ctx->NeedNormals = (ctx->Light.Enabled || ctx->Texture.NeedNormals); } @@ -1186,8 +1191,9 @@ void gl_update_state( GLcontext *ctx ) } ctx->NeedEyeNormals = ctx->NeedEyeCoords; } - if (ctx->Texture.Enabled || ctx->RenderMode==GL_FEEDBACK) { - if (ctx->Texture.NeedEyeCoords) ctx->NeedEyeCoords = GL_TRUE; + if (ctx->Texture.ReallyEnabled || ctx->RenderMode==GL_FEEDBACK) { + if (ctx->Texture.NeedEyeCoords) + ctx->NeedEyeCoords = GL_TRUE; if (ctx->Texture.NeedNormals) ctx->NeedNormals = ctx->NeedEyeNormals = GL_TRUE; } diff --git a/xc/extras/Mesa/src/texgen_tmp.h b/xc/extras/Mesa/src/texgen_tmp.h index 8fe16187f..c1fa50816 100644 --- a/xc/extras/Mesa/src/texgen_tmp.h +++ b/xc/extras/Mesa/src/texgen_tmp.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/texgen_tmp.h,v 1.5 2000/09/26 15:56:33 tsi Exp $ */ /* * New (3.1) transformation code written by Keith Whitwell. diff --git a/xc/extras/Mesa/src/teximage.c b/xc/extras/Mesa/src/teximage.c index c92ff7937..ecbc0d7fe 100644 --- a/xc/extras/Mesa/src/teximage.c +++ b/xc/extras/Mesa/src/teximage.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/teximage.c,v 1.10 2000/09/24 13:50:18 alanh Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -52,50 +52,50 @@ #ifdef DEBUG static void PrintTexture(const struct gl_texture_image *img) { - int i, j, c; - GLubyte *data = img->Data; - - if (!data) { - printf("No texture data\n"); - return; - } - - switch (img->Format) { - case GL_ALPHA: - case GL_LUMINANCE: - case GL_INTENSITY: - case GL_COLOR_INDEX: - c = 1; - break; - case GL_LUMINANCE_ALPHA: - c = 2; - break; - case GL_RGB: - c = 3; - break; - case GL_RGBA: - c = 4; - break; - default: - gl_problem(NULL, "error in PrintTexture\n"); - return; - } - - - for (i = 0; i < img->Height; i++) { - for (j = 0; j < img->Width; j++) { - if (c==1) - printf("%02x ", data[0]); - else if (c==2) - printf("%02x%02x ", data[0], data[1]); - else if (c==3) - printf("%02x%02x%02x ", data[0], data[1], data[2]); - else if (c==4) - printf("%02x%02x%02x%02x ", data[0], data[1], data[2], data[3]); - data += c; - } - printf("\n"); - } + GLuint i, j, c; + GLubyte *data = img->Data; + + if (!data) { + printf("No texture data\n"); + return; + } + + switch (img->Format) { + case GL_ALPHA: + case GL_LUMINANCE: + case GL_INTENSITY: + case GL_COLOR_INDEX: + c = 1; + break; + case GL_LUMINANCE_ALPHA: + c = 2; + break; + case GL_RGB: + c = 3; + break; + case GL_RGBA: + c = 4; + break; + default: + gl_problem(NULL, "error in PrintTexture\n"); + return; + } + + + for (i = 0; i < img->Height; i++) { + for (j = 0; j < img->Width; j++) { + if (c==1) + printf("%02x ", data[0]); + else if (c==2) + printf("%02x%02x ", data[0], data[1]); + else if (c==3) + printf("%02x%02x%02x ", data[0], data[1], data[2]); + else if (c==4) + printf("%02x%02x%02x%02x ", data[0], data[1], data[2], data[3]); + data += c; + } + printf("\n"); + } } #endif @@ -698,7 +698,7 @@ make_texture_image( GLcontext *ctx, && !ctx->Pixel.IndexOffset && !ctx->Pixel.IndexShift && srcType == GL_UNSIGNED_BYTE && depth == 1) { - if (srcFormat == internalFormat || + if ((GLint) srcFormat == internalFormat || (srcFormat == GL_LUMINANCE && internalFormat == 1) || (srcFormat == GL_LUMINANCE_ALPHA && internalFormat == 2) || (srcFormat == GL_RGB && internalFormat == 3) || @@ -824,13 +824,14 @@ make_null_texture( struct gl_texture_image *texImage ) }; GLubyte *imgPtr = texImage->Data; - GLint i, j, k; + GLuint i, j; + GLint k; for (i = 0; i < texImage->Height; i++) { GLint srcRow = 7 - i % 8; for (j = 0; j < texImage->Width; j++) { GLint srcCol = j % 32; GLint texel = (message[srcRow][srcCol]=='X') ? 255 : 70; - for (k=0;k<components;k++) { + for (k = 0; k < components; k++) { *imgPtr++ = (GLubyte) texel; } } @@ -1348,7 +1349,14 @@ copytexsubimage_error_check( GLcontext *ctx, GLuint dimensions, */ static GLint get_specific_compressed_tex_format(GLcontext *ctx, - GLint ifmt, GLint numDimensions) + GLint ifmt, GLint numDimensions, + GLint *levelp, + GLsizei *widthp, + GLsizei *heightp, + GLsizei *depthp, + GLint *borderp, + GLenum *formatp, + GLenum *typep) { char message[100]; GLint internalFormat = ifmt; @@ -1357,22 +1365,14 @@ get_specific_compressed_tex_format(GLcontext *ctx, && ctx->Driver.SpecificCompressedTexFormat) { /* * First, ask the driver for the specific format. + * We do this for all formats, since we may want to + * fake one compressed format for another. */ - switch (internalFormat) { - case GL_COMPRESSED_ALPHA_ARB: - case GL_COMPRESSED_LUMINANCE_ARB: - case GL_COMPRESSED_LUMINANCE_ALPHA_ARB: - case GL_COMPRESSED_INTENSITY_ARB: - case GL_COMPRESSED_RGB_ARB: - case GL_COMPRESSED_RGBA_ARB: - internalFormat = (*ctx->Driver.SpecificCompressedTexFormat) - (ctx, internalFormat, numDimensions); - /* XXX shouldn't we return now? */ - break; - default: - /* silence compiler warnings */ - ; - } + internalFormat = (*ctx->Driver.SpecificCompressedTexFormat) + (ctx, internalFormat, numDimensions, + levelp, + widthp, heightp, depthp, + borderp, formatp, typep); } /* @@ -1438,7 +1438,6 @@ get_specific_compressed_tex_format(GLcontext *ctx, } - /* * Called from the API. Note that width includes the border. */ @@ -1456,7 +1455,10 @@ _mesa_TexImage1D( GLenum target, GLint level, GLint internalFormat, struct gl_texture_image *texImage; GLint ifmt; - ifmt = get_specific_compressed_tex_format(ctx, internalFormat, 1); + ifmt = get_specific_compressed_tex_format(ctx, internalFormat, 1, + &level, + &width, 0, 0, + &border, &format, &type); if (ifmt < 0) { /* * The error here is that we were sent a generic compressed @@ -1536,10 +1538,16 @@ _mesa_TexImage1D( GLenum target, GLint level, GLint internalFormat, gl_put_texobj_on_dirty_list( ctx, texObj ); ctx->NewState |= NEW_TEXTURING; } - else if (target==GL_PROXY_TEXTURE_1D) { + else if (target == GL_PROXY_TEXTURE_1D) { /* Proxy texture: check for errors and update proxy state */ - if (texture_error_check(ctx, target, level, internalFormat, - format, type, 1, width, 1, 1, border)) { + GLenum error = texture_error_check(ctx, target, level, internalFormat, + format, type, 1, width, 1, 1, border); + if (!error && ctx->Driver.TestProxyTexImage) { + error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, + internalFormat, format, type, + width, 1, 1, border); + } + if (error) { /* if error, clear all proxy texture image parameters */ if (level>=0 && level<ctx->Const.MaxTextureLevels) { clear_proxy_teximage(ctx->Texture.Proxy1D->Image[level]); @@ -1576,7 +1584,10 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, struct gl_texture_image *texImage; GLint ifmt; - ifmt = get_specific_compressed_tex_format(ctx, internalFormat, 2); + ifmt = get_specific_compressed_tex_format(ctx, internalFormat, 2, + &level, + &width, &height, 0, + &border, &format, &type); if (ifmt < 0) { /* * The error here is that we were sent a generic compressed @@ -1667,10 +1678,16 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, gl_put_texobj_on_dirty_list( ctx, texObj ); ctx->NewState |= NEW_TEXTURING; } - else if (target==GL_PROXY_TEXTURE_2D) { + else if (target == GL_PROXY_TEXTURE_2D) { /* Proxy texture: check for errors and update proxy state */ - if (texture_error_check(ctx, target, level, internalFormat, - format, type, 2, width, height, 1, border)) { + GLenum error = texture_error_check(ctx, target, level, internalFormat, + format, type, 2, width, height, 1, border); + if (!error && ctx->Driver.TestProxyTexImage) { + error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, + internalFormat, format, type, + width, height, 1, border); + } + if (error) { /* if error, clear all proxy texture image parameters */ if (level>=0 && level<ctx->Const.MaxTextureLevels) { clear_proxy_teximage(ctx->Texture.Proxy2D->Image[level]); @@ -1690,7 +1707,6 @@ _mesa_TexImage2D( GLenum target, GLint level, GLint internalFormat, } - /* * Called by the API or display list executor. * Note that width and height include the border. @@ -1710,7 +1726,10 @@ _mesa_TexImage3D( GLenum target, GLint level, GLint internalFormat, struct gl_texture_image *texImage; GLint ifmt; - ifmt = get_specific_compressed_tex_format(ctx, internalFormat, 3); + ifmt = get_specific_compressed_tex_format(ctx, internalFormat, 3, + &level, + &width, &height, &depth, + &border, &format, &type); if (ifmt < 0) { /* * The error here is that we were sent a generic compressed @@ -1791,10 +1810,16 @@ _mesa_TexImage3D( GLenum target, GLint level, GLint internalFormat, gl_put_texobj_on_dirty_list( ctx, texObj ); ctx->NewState |= NEW_TEXTURING; } - else if (target==GL_PROXY_TEXTURE_3D) { + else if (target == GL_PROXY_TEXTURE_3D) { /* Proxy texture: check for errors and update proxy state */ - if (texture_error_check(ctx, target, level, internalFormat, - format, type, 3, width, height, depth, border)) { + GLenum error = texture_error_check(ctx, target, level, internalFormat, + format, type, 3, width, height, depth, border); + if (!error && ctx->Driver.TestProxyTexImage) { + error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, + internalFormat, format, type, + width, height, depth, border); + } + if (error) { /* if error, clear all proxy texture image parameters */ if (level>=0 && level<ctx->Const.MaxTextureLevels) { clear_proxy_teximage(ctx->Texture.Proxy3D->Image[level]); @@ -1915,6 +1940,56 @@ _mesa_get_teximage_from_driver( GLcontext *ctx, GLenum target, GLint level, } +/* + * Get all the mipmap images for a texture object from the device driver. + * Actually, only get mipmap images if we're using a mipmap filter. + */ +GLboolean +_mesa_get_teximages_from_driver(GLcontext *ctx, + struct gl_texture_object *texObj) +{ + if (ctx->Driver.GetTexImage) { + static const GLenum targets[] = { + GL_TEXTURE_1D, + GL_TEXTURE_2D, + GL_TEXTURE_3D, + GL_TEXTURE_CUBE_MAP_ARB, + GL_TEXTURE_CUBE_MAP_ARB, + GL_TEXTURE_CUBE_MAP_ARB + }; + GLboolean needLambda = (texObj->MinFilter != texObj->MagFilter); + GLenum target = targets[texObj->Dimensions - 1]; + if (needLambda) { + GLint level; + /* Get images for all mipmap levels. We might not need them + * all but this is easier. We're on a (slow) software path + * anyway. + */ + for (level = 0; level <= texObj->P; level++) { + struct gl_texture_image *texImg = texObj->Image[level]; + if (texImg && !texImg->Data) { + _mesa_get_teximage_from_driver(ctx, target, level, texObj); + if (!texImg->Data) + return GL_FALSE; /* out of memory */ + } + } + } + else { + GLint level = texObj->BaseLevel; + struct gl_texture_image *texImg = texObj->Image[level]; + if (texImg && !texImg->Data) { + _mesa_get_teximage_from_driver(ctx, target, level, texObj); + if (!texImg->Data) + return GL_FALSE; /* out of memory */ + } + } + return GL_TRUE; + } + return GL_FALSE; +} + + + void _mesa_GetTexImage( GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels ) @@ -2078,7 +2153,7 @@ _mesa_GetTexImage( GLenum target, GLint level, GLenum format, default: gl_problem( ctx, "bad format in gl_GetTexImage" ); } - _mesa_pack_rgba_span( ctx, width, (const GLubyte (*)[4])rgba, + _mesa_pack_rgba_span( ctx, width, (CONST GLubyte (*)[4])rgba, format, type, dest, &ctx->Pack, GL_TRUE ); } } @@ -2232,8 +2307,9 @@ _mesa_TexSubImage2D( GLenum target, GLint level, /* color index texture */ GLubyte *dst = texImage->Data + (yoffsetb * texImage->Width + xoffsetb) * texComponents; - const GLubyte *src = _mesa_image_address(&ctx->Unpack, pixels, - width, height, format, type, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(&ctx->Unpack, pixels, width, height, format, + type, 0, 0, 0); GLint row; for (row = 0; row < height; row++) { _mesa_unpack_index_span(ctx, width, GL_UNSIGNED_BYTE, dst, type, @@ -2246,8 +2322,9 @@ _mesa_TexSubImage2D( GLenum target, GLint level, /* color texture */ GLubyte *dst = texImage->Data + (yoffsetb * texImage->Width + xoffsetb) * texComponents; - const GLubyte *src = _mesa_image_address(&ctx->Unpack, pixels, - width, height, format, type, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(&ctx->Unpack, pixels, width, height, format, + type, 0, 0, 0); GLint row; for (row = 0; row < height; row++) { _mesa_unpack_ubyte_color_span(ctx, width, texFormat, dst, format, @@ -2337,8 +2414,9 @@ _mesa_TexSubImage3D( GLenum target, GLint level, /* color index texture */ GLint img, row; for (img = 0; img < depth; img++) { - const GLubyte *src = _mesa_image_address(&ctx->Unpack, pixels, - width, height, format, type, img, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(&ctx->Unpack, pixels, width, height, + format, type, img, 0, 0); GLubyte *dst = texImage->Data + ((zoffsetb + img) * dstRectArea + yoffsetb * texWidth + xoffsetb) * texComponents; for (row = 0; row < height; row++) { @@ -2353,8 +2431,9 @@ _mesa_TexSubImage3D( GLenum target, GLint level, /* color texture */ GLint img, row; for (img = 0; img < depth; img++) { - const GLubyte *src = _mesa_image_address(&ctx->Unpack, pixels, - width, height, format, type, img, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(&ctx->Unpack, pixels, width, height, + format, type, img, 0, 0); GLubyte *dst = texImage->Data + ((zoffsetb + img) * dstRectArea + yoffsetb * texWidth + xoffsetb) * texComponents; for (row = 0; row < height; row++) { @@ -2712,7 +2791,7 @@ _mesa_CompressedTexImage1DARB(GLenum target, GLint level, gl_error(ctx, GL_INVALID_VALUE, "glCompressedTexImage1DARB(imageSize)"); return; } - texImage->Data = MALLOC(computedImageSize); + texImage->Data = (GLubyte *) MALLOC(computedImageSize); if (texImage->Data) { MEMCPY(texImage->Data, data, computedImageSize); } @@ -2738,8 +2817,14 @@ _mesa_CompressedTexImage1DARB(GLenum target, GLint level, } else if (target == GL_PROXY_TEXTURE_1D) { /* Proxy texture: check for errors and update proxy state */ - if (texture_error_check(ctx, target, level, internalFormat, - GL_NONE, GL_NONE, 1, width, 1, 1, border)) { + GLenum error = texture_error_check(ctx, target, level, internalFormat, + GL_NONE, GL_NONE, 1, width, 1, 1, border); + if (!error && ctx->Driver.TestProxyTexImage) { + error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, + internalFormat, GL_NONE, GL_NONE, + width, 1, 1, border); + } + if (error) { /* if error, clear all proxy texture image parameters */ if (level>=0 && level<ctx->Const.MaxTextureLevels) { clear_proxy_teximage(ctx->Texture.Proxy1D->Image[level]); @@ -2841,7 +2926,7 @@ _mesa_CompressedTexImage2DARB(GLenum target, GLint level, gl_error(ctx, GL_INVALID_VALUE, "glCompressedTexImage2DARB(imageSize)"); return; } - texImage->Data = MALLOC(computedImageSize); + texImage->Data = (GLubyte *) MALLOC(computedImageSize); if (texImage->Data) { MEMCPY(texImage->Data, data, computedImageSize); } @@ -2867,8 +2952,14 @@ _mesa_CompressedTexImage2DARB(GLenum target, GLint level, } else if (target == GL_PROXY_TEXTURE_2D) { /* Proxy texture: check for errors and update proxy state */ - if (texture_error_check(ctx, target, level, internalFormat, - GL_NONE, GL_NONE, 1, width, 1, 1, border)) { + GLenum error = texture_error_check(ctx, target, level, internalFormat, + GL_NONE, GL_NONE, 2, width, height, 1, border); + if (!error && ctx->Driver.TestProxyTexImage) { + error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, + internalFormat, GL_NONE, GL_NONE, + width, height, 1, border); + } + if (error) { /* if error, clear all proxy texture image parameters */ if (level>=0 && level<ctx->Const.MaxTextureLevels) { clear_proxy_teximage(ctx->Texture.Proxy2D->Image[level]); @@ -2964,7 +3055,7 @@ _mesa_CompressedTexImage3DARB(GLenum target, GLint level, gl_error(ctx, GL_INVALID_VALUE, "glCompressedTexImage3DARB(imageSize)"); return; } - texImage->Data = MALLOC(computedImageSize); + texImage->Data = (GLubyte *) MALLOC(computedImageSize); if (texImage->Data) { MEMCPY(texImage->Data, data, computedImageSize); } @@ -2990,8 +3081,14 @@ _mesa_CompressedTexImage3DARB(GLenum target, GLint level, } else if (target == GL_PROXY_TEXTURE_3D) { /* Proxy texture: check for errors and update proxy state */ - if (texture_error_check(ctx, target, level, internalFormat, - GL_NONE, GL_NONE, 1, width, height, depth, border)) { + GLenum error = texture_error_check(ctx, target, level, internalFormat, + GL_NONE, GL_NONE, 1, width, height, depth, border); + if (!error && ctx->Driver.TestProxyTexImage) { + error = !(*ctx->Driver.TestProxyTexImage)(ctx, target, level, + internalFormat, GL_NONE, GL_NONE, + width, height, depth, border); + } + if (error) { /* if error, clear all proxy texture image parameters */ if (level>=0 && level<ctx->Const.MaxTextureLevels) { clear_proxy_teximage(ctx->Texture.Proxy3D->Image[level]); diff --git a/xc/extras/Mesa/src/teximage.h b/xc/extras/Mesa/src/teximage.h index 1ff5e9038..ebc45e690 100644 --- a/xc/extras/Mesa/src/teximage.h +++ b/xc/extras/Mesa/src/teximage.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/teximage.h,v 1.6 2000/09/26 15:56:33 tsi Exp $ */ + #ifndef TEXIMAGE_H #define TEXIMAGE_H @@ -64,8 +64,14 @@ _mesa_select_tex_image(GLcontext *ctx, const struct gl_texture_unit *texUnit, extern void -_mesa_get_teximage_from_driver( GLcontext *ctx, GLenum target, GLint level, - const struct gl_texture_object *texObj ); +_mesa_get_teximage_from_driver(GLcontext *ctx, GLenum target, GLint level, + const struct gl_texture_object *texObj); + + +extern GLboolean +_mesa_get_teximages_from_driver(GLcontext *ctx, + struct gl_texture_object *texObj); + /*** API entry point functions ***/ diff --git a/xc/extras/Mesa/src/texobj.c b/xc/extras/Mesa/src/texobj.c index 75d63fff1..86d438c47 100644 --- a/xc/extras/Mesa/src/texobj.c +++ b/xc/extras/Mesa/src/texobj.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/texobj.c,v 1.6 2000/09/26 15:56:33 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -62,6 +62,7 @@ gl_alloc_texture_object( struct gl_shared_state *shared, GLuint name, if (obj) { /* init the non-zero fields */ + _glthread_INIT_MUTEX(obj->Mutex); obj->RefCount = 1; obj->Name = name; obj->Dimensions = dimensions; @@ -157,6 +158,20 @@ void gl_free_texture_object( struct gl_shared_state *shared, /* + * This is helpful to diagnose incomplete texture problems. + */ +#if 0 +static void +incomplete(const struct gl_texture_object *t, const char *why) +{ + printf("Texture Obj %d incomplete because: %s\n", t->Name, why); +} +#else +#define incomplete(a, b) +#endif + + +/* * Examine a texture object to determine if it is complete or not. * The t->Complete flag will be set to GL_TRUE or GL_FALSE accordingly. */ @@ -170,6 +185,7 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, /* Always need level zero image */ if (!t->Image[baseLevel]) { + incomplete(t, "Image[baseLevel] == NULL"); t->Complete = GL_FALSE; return; } @@ -213,6 +229,7 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, t->NegZ[baseLevel]->Width2 != w || t->NegZ[baseLevel]->Height2 != h) { t->Complete = GL_FALSE; + incomplete(t, "Non-quare cubemap image"); return; } } @@ -228,6 +245,7 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, if (minLevel > maxLevel) { t->Complete = GL_FALSE; + incomplete(t, "minLevel > maxLevel"); return; } @@ -236,10 +254,12 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, if (t->Image[i]) { if (t->Image[i]->Format != t->Image[baseLevel]->Format) { t->Complete = GL_FALSE; + incomplete(t, "Format[i] != Format[baseLevel]"); return; } if (t->Image[i]->Border != t->Image[baseLevel]->Border) { t->Complete = GL_FALSE; + incomplete(t, "Border[i] != Border[baseLevel]"); return; } } @@ -256,10 +276,12 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, if (i >= minLevel && i <= maxLevel) { if (!t->Image[i]) { t->Complete = GL_FALSE; + incomplete(t, "1D Image[i] == NULL"); return; } if (t->Image[i]->Width2 != width ) { t->Complete = GL_FALSE; + incomplete(t, "1D Image[i] bad width"); return; } } @@ -282,14 +304,17 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, if (i >= minLevel && i <= maxLevel) { if (!t->Image[i]) { t->Complete = GL_FALSE; + incomplete(t, "2D Image[i] == NULL"); return; } if (t->Image[i]->Width2 != width) { t->Complete = GL_FALSE; + incomplete(t, "2D Image[i] bad width"); return; } if (t->Image[i]->Height2 != height) { t->Complete = GL_FALSE; + incomplete(t, "2D Image[i] bad height"); return; } if (width==1 && height==1) { @@ -315,19 +340,23 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, } if (i >= minLevel && i <= maxLevel) { if (!t->Image[i]) { + incomplete(t, "3D Image[i] == NULL"); t->Complete = GL_FALSE; return; } if (t->Image[i]->Width2 != width) { t->Complete = GL_FALSE; + incomplete(t, "3D Image[i] bad width"); return; } if (t->Image[i]->Height2 != height) { t->Complete = GL_FALSE; + incomplete(t, "3D Image[i] bad height"); return; } if (t->Image[i]->Depth2 != depth) { t->Complete = GL_FALSE; + incomplete(t, "3D Image[i] bad depth"); return; } } @@ -353,6 +382,7 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, !t->PosY[i] || !t->NegY[i] || !t->PosZ[i] || !t->NegZ[i]) { t->Complete = GL_FALSE; + incomplete(t, "CubeMap Image[i] == NULL"); return; } /* check that all six images have same size */ @@ -362,6 +392,7 @@ _mesa_test_texobj_completeness( const GLcontext *ctx, t->PosZ[i]->Width2!=width || t->PosZ[i]->Height2!=height || t->NegZ[i]->Width2!=width || t->NegZ[i]->Height2!=height) { t->Complete = GL_FALSE; + incomplete(t, "CubeMap Image[i] bad size"); return; } } @@ -589,6 +620,8 @@ _mesa_BindTexture( GLenum target, GLuint texName ) /* Pass BindTexture call to device driver */ if (ctx->Driver.BindTexture) { (*ctx->Driver.BindTexture)( ctx, target, newTexObj ); + /* Make sure the Driver.UpdateState() function gets called! */ + ctx->NewState |= NEW_TEXTURING; } if (oldTexObj->Name > 0) { diff --git a/xc/extras/Mesa/src/texstate.c b/xc/extras/Mesa/src/texstate.c index 7324ca657..523dbf82d 100644 --- a/xc/extras/Mesa/src/texstate.c +++ b/xc/extras/Mesa/src/texstate.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/texstate.c,v 1.6 2000/09/26 15:56:33 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -1591,7 +1591,7 @@ void _mesa_ActiveTextureARB( GLenum target ) { GET_CURRENT_CONTEXT(ctx); - GLint maxUnits = ctx->Const.MaxTextureUnits; + const GLuint maxUnits = ctx->Const.MaxTextureUnits; ASSERT_OUTSIDE_BEGIN_END( ctx, "glActiveTextureARB" ); @@ -1618,7 +1618,7 @@ void _mesa_ClientActiveTextureARB( GLenum target ) { GET_CURRENT_CONTEXT(ctx); - GLint maxUnits = ctx->Const.MaxTextureUnits; + const GLuint maxUnits = ctx->Const.MaxTextureUnits; ASSERT_OUTSIDE_BEGIN_END( ctx, "glClientActiveTextureARB" ); diff --git a/xc/extras/Mesa/src/texture.c b/xc/extras/Mesa/src/texture.c index fe4863ff4..580121f47 100644 --- a/xc/extras/Mesa/src/texture.c +++ b/xc/extras/Mesa/src/texture.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/texture.c,v 1.7 2000/09/26 15:56:34 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -2005,8 +2005,8 @@ _mesa_set_texture_sampler( struct gl_texture_object *t ) #define PROD(A,B) ( (GLuint)(A) * ((GLuint)(B)+1) ) static INLINE void -_mesa_texture_combine(CONST GLcontext *ctx, - CONST struct gl_texture_unit *textureUnit, +_mesa_texture_combine(const GLcontext *ctx, + const struct gl_texture_unit *textureUnit, GLuint n, GLubyte (*primary_rgba)[4], GLubyte (*texel)[4], @@ -2035,9 +2035,9 @@ _mesa_texture_combine(CONST GLcontext *ctx, case GL_CONSTANT_EXT: { GLubyte (*c)[4] = ccolor[j]; - GLubyte col = FLOAT_TO_UBYTE(textureUnit->EnvColor[3]); + GLubyte alpha = FLOAT_TO_UBYTE(textureUnit->EnvColor[3]); for (i = 0; i < n; i++) - c[i][ACOMP] = col; + c[i][ACOMP] = alpha; argA[j] = ccolor[j]; } break; @@ -2045,17 +2045,6 @@ _mesa_texture_combine(CONST GLcontext *ctx, gl_problem(NULL, "invalid combine source"); } - if (textureUnit->CombineOperandA[j] == GL_ONE_MINUS_SRC_ALPHA) { - GLubyte (*src)[4] = argA[j]; - GLubyte (*dst)[4] = ccolor[j]; - argA[j] = ccolor[j]; - for (i = 0; i < n; i++) - dst[i][ACOMP] = 255 - src[i][ACOMP]; - } - - if (j == 2) /* arg2 has no color component in specification. */ - break; - switch (textureUnit->CombineSourceRGB[j]) { case GL_TEXTURE: argRGB[j] = texel; @@ -2069,16 +2058,13 @@ _mesa_texture_combine(CONST GLcontext *ctx, case GL_CONSTANT_EXT: { GLubyte (*c)[4] = ccolor[j]; - GLubyte col[4]; - - col[RCOMP] = FLOAT_TO_UBYTE(textureUnit->EnvColor[0]); - col[GCOMP] = FLOAT_TO_UBYTE(textureUnit->EnvColor[1]); - col[BCOMP] = FLOAT_TO_UBYTE(textureUnit->EnvColor[2]); - + const GLubyte red = FLOAT_TO_UBYTE(textureUnit->EnvColor[0]); + const GLubyte green = FLOAT_TO_UBYTE(textureUnit->EnvColor[1]); + const GLubyte blue = FLOAT_TO_UBYTE(textureUnit->EnvColor[2]); for (i = 0; i < n; i++) { - c[i][RCOMP] = col[RCOMP]; - c[i][GCOMP] = col[GCOMP]; - c[i][BCOMP] = col[BCOMP]; + c[i][RCOMP] = red; + c[i][GCOMP] = green; + c[i][BCOMP] = blue; } argRGB[j] = ccolor[j]; } @@ -2099,13 +2085,17 @@ _mesa_texture_combine(CONST GLcontext *ctx, dst[i][GCOMP] = 255 - src[i][GCOMP]; dst[i][BCOMP] = 255 - src[i][BCOMP]; } - } else if (textureUnit->CombineOperandRGB[j] == GL_SRC_ALPHA) { + } + else if (textureUnit->CombineOperandRGB[j] == GL_SRC_ALPHA) { + src = argA[j]; for (i = 0; i < n; i++) { dst[i][RCOMP] = src[i][ACOMP]; dst[i][GCOMP] = src[i][ACOMP]; dst[i][BCOMP] = src[i][ACOMP]; } - } else { /* GL_ONE_MINUS_SRC_ALPHA */ + } + else { /* GL_ONE_MINUS_SRC_ALPHA */ + src = argA[j]; for (i = 0; i < n; i++) { dst[i][RCOMP] = 255 - src[i][ACOMP]; dst[i][GCOMP] = 255 - src[i][ACOMP]; @@ -2114,6 +2104,15 @@ _mesa_texture_combine(CONST GLcontext *ctx, } } + if (textureUnit->CombineOperandA[j] == GL_ONE_MINUS_SRC_ALPHA) { + GLubyte (*src)[4] = argA[j]; + GLubyte (*dst)[4] = ccolor[j]; + argA[j] = ccolor[j]; + for (i = 0; i < n; i++) { + dst[i][ACOMP] = 255 - src[i][ACOMP]; + } + } + if (textureUnit->CombineModeRGB == GL_REPLACE && textureUnit->CombineModeA == GL_REPLACE) { break; /* done, we need only arg0 */ @@ -2130,10 +2129,22 @@ _mesa_texture_combine(CONST GLcontext *ctx, case GL_REPLACE: { const GLubyte (*arg0)[4] = (const GLubyte (*)[4]) argRGB[0]; - for (i = 0; i < n; i++) { - rgba[i][RCOMP] = arg0[i][RCOMP] << RGBshift; - rgba[i][GCOMP] = arg0[i][GCOMP] << RGBshift; - rgba[i][BCOMP] = arg0[i][BCOMP] << RGBshift; + if (RGBshift) { + for (i = 0; i < n; i++) { + GLuint r = (GLuint) arg0[i][RCOMP] << RGBshift; + GLuint g = (GLuint) arg0[i][GCOMP] << RGBshift; + GLuint b = (GLuint) arg0[i][BCOMP] << RGBshift; + rgba[i][RCOMP] = MIN2(r, 255); + rgba[i][GCOMP] = MIN2(g, 255); + rgba[i][BCOMP] = MIN2(b, 255); + } + } + else { + for (i = 0; i < n; i++) { + rgba[i][RCOMP] = arg0[i][RCOMP]; + rgba[i][GCOMP] = arg0[i][GCOMP]; + rgba[i][BCOMP] = arg0[i][BCOMP]; + } } } break; @@ -2143,9 +2154,12 @@ _mesa_texture_combine(CONST GLcontext *ctx, const GLubyte (*arg1)[4] = (const GLubyte (*)[4]) argRGB[1]; RGBshift = 8 - RGBshift; for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (GLubyte) (PROD(arg0[i][0], arg1[i][RCOMP]) >> RGBshift); - rgba[i][GCOMP] = (GLubyte) (PROD(arg0[i][1], arg1[i][GCOMP]) >> RGBshift); - rgba[i][BCOMP] = (GLubyte) (PROD(arg0[i][2], arg1[i][BCOMP]) >> RGBshift); + GLuint r = PROD(arg0[i][0], arg1[i][RCOMP]) >> RGBshift; + GLuint g = PROD(arg0[i][1], arg1[i][GCOMP]) >> RGBshift; + GLuint b = PROD(arg0[i][2], arg1[i][BCOMP]) >> RGBshift; + rgba[i][RCOMP] = (GLubyte) MIN2(r, 255); + rgba[i][GCOMP] = (GLubyte) MIN2(g, 255); + rgba[i][BCOMP] = (GLubyte) MIN2(b, 255); } } break; @@ -2157,9 +2171,9 @@ _mesa_texture_combine(CONST GLcontext *ctx, GLint r = ((GLuint) arg0[i][RCOMP] + arg1[i][RCOMP]) << RGBshift; GLint g = ((GLuint) arg0[i][GCOMP] + arg1[i][GCOMP]) << RGBshift; GLint b = ((GLuint) arg0[i][BCOMP] + arg1[i][BCOMP]) << RGBshift; - rgba[i][RCOMP] = (r > 255) ? 255 : (GLubyte) r; - rgba[i][GCOMP] = (g > 255) ? 255 : (GLubyte) g; - rgba[i][BCOMP] = (b > 255) ? 255 : (GLubyte) b; + rgba[i][RCOMP] = (GLubyte) MIN2(r, 255); + rgba[i][GCOMP] = (GLubyte) MIN2(g, 255); + rgba[i][BCOMP] = (GLubyte) MIN2(b, 255); } } break; @@ -2174,9 +2188,9 @@ _mesa_texture_combine(CONST GLcontext *ctx, r = (r < 0) ? 0 : r << RGBshift; b = (b < 0) ? 0 : b << RGBshift; g = (g < 0) ? 0 : g << RGBshift; - rgba[i][RCOMP] = (r > 255) ? 255 : (GLubyte) r; - rgba[i][GCOMP] = (g > 255) ? 255 : (GLubyte) g; - rgba[i][BCOMP] = (b > 255) ? 255 : (GLubyte) b; + rgba[i][RCOMP] = (GLubyte) MIN2(r, 255); + rgba[i][GCOMP] = (GLubyte) MIN2(g, 255); + rgba[i][BCOMP] = (GLubyte) MIN2(b, 255); } } break; @@ -2187,9 +2201,18 @@ _mesa_texture_combine(CONST GLcontext *ctx, const GLubyte (*arg2)[4] = (const GLubyte (*)[4]) argRGB[2]; RGBshift = 8 - RGBshift; for (i = 0; i < n; i++) { - rgba[i][RCOMP] = (GLubyte) ((PROD(arg0[i][RCOMP], arg2[i][ACOMP]) + PROD(arg1[i][RCOMP], 255 - arg2[i][ACOMP])) >> RGBshift); - rgba[i][GCOMP] = (GLubyte) ((PROD(arg0[i][GCOMP], arg2[i][ACOMP]) + PROD(arg1[i][GCOMP], 255 - arg2[i][ACOMP])) >> RGBshift); - rgba[i][BCOMP] = (GLubyte) ((PROD(arg0[i][BCOMP], arg2[i][ACOMP]) + PROD(arg1[i][BCOMP], 255 - arg2[i][ACOMP])) >> RGBshift); + GLuint r = (PROD(arg0[i][RCOMP], arg2[i][RCOMP]) + + PROD(arg1[i][RCOMP], 255 - arg2[i][RCOMP])) + >> RGBshift; + GLuint g = (PROD(arg0[i][GCOMP], arg2[i][GCOMP]) + + PROD(arg1[i][GCOMP], 255 - arg2[i][GCOMP])) + >> RGBshift; + GLuint b = (PROD(arg0[i][BCOMP], arg2[i][BCOMP]) + + PROD(arg1[i][BCOMP], 255 - arg2[i][BCOMP])) + >> RGBshift; + rgba[i][RCOMP] = (GLubyte) MIN2(r, 255); + rgba[i][GCOMP] = (GLubyte) MIN2(g, 255); + rgba[i][BCOMP] = (GLubyte) MIN2(b, 255); } } break; @@ -2201,8 +2224,17 @@ _mesa_texture_combine(CONST GLcontext *ctx, case GL_REPLACE: { const GLubyte (*arg0)[4] = (const GLubyte (*)[4]) argA[0]; - for (i = 0; i < n; i++) - rgba[i][ACOMP] = arg0[i][ACOMP] << Ashift; + if (Ashift) { + for (i = 0; i < n; i++) { + GLuint a = (GLuint) arg0[i][ACOMP] << Ashift; + rgba[i][ACOMP] = (GLubyte) MIN2(a, 255); + } + } + else { + for (i = 0; i < n; i++) { + rgba[i][ACOMP] = arg0[i][ACOMP]; + } + } } break; case GL_MODULATE: @@ -2210,8 +2242,10 @@ _mesa_texture_combine(CONST GLcontext *ctx, const GLubyte (*arg0)[4] = (const GLubyte (*)[4]) argA[0]; const GLubyte (*arg1)[4] = (const GLubyte (*)[4]) argA[1]; Ashift = 8 - Ashift; - for (i = 0; i < n; i++) - rgba[i][ACOMP] = (GLubyte) (PROD(arg0[i][ACOMP], arg1[i][ACOMP]) >> Ashift); + for (i = 0; i < n; i++) { + GLuint a = (PROD(arg0[i][ACOMP], arg1[i][ACOMP]) >> Ashift); + rgba[i][ACOMP] = (GLubyte) MIN2(a, 255); + } } break; case GL_ADD: @@ -2220,7 +2254,7 @@ _mesa_texture_combine(CONST GLcontext *ctx, const GLubyte (*arg1)[4] = (const GLubyte (*)[4]) argA[1]; for (i = 0; i < n; i++) { GLint a = ((GLint) arg0[i][ACOMP] + arg1[i][ACOMP]) << Ashift; - rgba[i][ACOMP] = (a > 255) ? 255 : (GLubyte) a; + rgba[i][ACOMP] = (GLubyte) MIN2(a, 255); } } break; @@ -2231,7 +2265,7 @@ _mesa_texture_combine(CONST GLcontext *ctx, for (i = 0; i < n; i++) { GLint a = (GLint) arg0[i][ACOMP] + (GLint) arg1[i][ACOMP] - 128; a = (a < 0) ? 0 : a << Ashift; - rgba[i][ACOMP] = (a > 255) ? 255 : (GLubyte) a; + rgba[i][ACOMP] = (GLubyte) MIN2(a, 255); } } break; @@ -2241,8 +2275,12 @@ _mesa_texture_combine(CONST GLcontext *ctx, const GLubyte (*arg1)[4] = (const GLubyte (*)[4]) argA[1]; const GLubyte (*arg2)[4] = (const GLubyte (*)[4]) argA[2]; Ashift = 8 - Ashift; - for (i=0; i<n; i++) - rgba[i][ACOMP] = (GLubyte) ((PROD(arg0[i][ACOMP], arg2[i][ACOMP]) + PROD(arg1[i][ACOMP], 255 - arg2[i][ACOMP])) >> Ashift); + for (i=0; i<n; i++) { + GLuint a = (PROD(arg0[i][ACOMP], arg2[i][ACOMP]) + + PROD(arg1[i][ACOMP], 255 - arg2[i][ACOMP])) + >> Ashift; + rgba[i][ACOMP] = (GLubyte) MIN2(a, 255); + } } break; default: @@ -2269,7 +2307,7 @@ _mesa_texture_combine(CONST GLcontext *ctx, * according to the texture environment mode. */ static void apply_texture( CONST GLcontext *ctx, - CONST struct gl_texture_unit *texUnit, + const struct gl_texture_unit *texUnit, GLuint n, GLubyte primary_rgba[][4], GLubyte texel[][4], GLubyte rgba[][4] ) @@ -2645,7 +2683,7 @@ static void apply_texture( CONST GLcontext *ctx, gl_problem(ctx, "Bad format in apply_texture (GL_COMBINE_EXT)"); return; } - _mesa_texture_combine (ctx, texUnit, n, primary_rgba, texel, rgba); + _mesa_texture_combine(ctx, texUnit, n, primary_rgba, texel, rgba); break; default: @@ -2666,7 +2704,7 @@ void gl_texture_pixels( GLcontext *ctx, GLuint texUnit, GLuint n, GLubyte primary_rgba[][4], GLubyte rgba[][4] ) { GLuint mask = (TEXTURE0_1D | TEXTURE0_2D | TEXTURE0_3D | TEXTURE0_CUBE) << (texUnit * 4); - if (ctx->Texture.Enabled & mask) { + if (ctx->Texture.ReallyEnabled & mask) { const struct gl_texture_unit *textureUnit = &ctx->Texture.Unit[texUnit]; if (textureUnit->Current && textureUnit->Current->SampleFunc) { GLubyte texel[PB_SIZE][4]; @@ -2691,41 +2729,9 @@ void gl_texture_pixels( GLcontext *ctx, GLuint texUnit, GLuint n, } /* fetch texture images from device driver, if needed */ - { - static const GLenum targets[] = { - GL_TEXTURE_1D, - GL_TEXTURE_2D, - GL_TEXTURE_3D, - GL_TEXTURE_CUBE_MAP_ARB, - GL_TEXTURE_CUBE_MAP_ARB, - GL_TEXTURE_CUBE_MAP_ARB - }; - struct gl_texture_object *texObj = textureUnit->Current; - GLboolean needLambda = (texObj->MinFilter != texObj->MagFilter); - GLenum target = targets[texObj->Dimensions - 1]; - if (needLambda) { - GLint level; - /* Get images for all mipmap levels. We might not need them - * all but this is easier. We're on a (slow) software path - * anyway. - */ - for (level = 0; level <= texObj->P; level++) { - struct gl_texture_image *texImg = texObj->Image[level]; - if (texImg && !texImg->Data) { - _mesa_get_teximage_from_driver(ctx, target, level, texObj); - if (!texImg->Data) - return; /* out of memory */ - } - } - } - else { - GLint level = texObj->BaseLevel; - struct gl_texture_image *texImg = texObj->Image[level]; - if (texImg && !texImg->Data) { - _mesa_get_teximage_from_driver(ctx, target, level, texObj); - if (!texImg->Data) - return; /* out of memory */ - } + if (ctx->Driver.GetTexImage) { + if (!_mesa_get_teximages_from_driver(ctx, textureUnit->Current)) { + return; } } diff --git a/xc/extras/Mesa/src/texture.h b/xc/extras/Mesa/src/texture.h index 379e7a1d4..113602bf7 100644 --- a/xc/extras/Mesa/src/texture.h +++ b/xc/extras/Mesa/src/texture.h @@ -22,7 +22,10 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/texture.h,v 1.6 2000/09/26 15:56:34 tsi Exp $ */ + + + + #ifndef TEXTURE_H #define TEXTURE_H diff --git a/xc/extras/Mesa/src/texutil.c b/xc/extras/Mesa/src/texutil.c index a12ddd745..6d45a3b2f 100644 --- a/xc/extras/Mesa/src/texutil.c +++ b/xc/extras/Mesa/src/texutil.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/texutil.c,v 1.3 2000/09/26 15:56:34 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -80,7 +80,10 @@ * GL_RGBA GL_UNSIGNED_BYTE MESA_A4_R4_G4_B4 * GL_BGRA GL_UNSIGNED_SHORT_4_4_4_4_REV MESA_A4_R4_G4_B4 * GL_BGRA GL_UNSIGHED_SHORT_1_5_5_5_REV MESA_A1_R5_G5_B5 + * GL_RGBA GL_UNSIGNED_BYTE MESA_A1_R5_G5_B5 * GL_BGRA GL_UNSIGNED_INT_8_8_8_8_REV MESA_A8_R8_G8_B8 + * GL_RGBA GL_UNSIGNED_BYTE MESA_A8_R8_G8_B8 + * GL_RGB GL_UNSIGNED_BYTE MESA_A8_R8_G8_B8 * more to be added for new drivers... * * Notes: @@ -123,8 +126,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, /* store as 8-bit texels */ if (wScale == 1 && hScale == 1) { /* no scaling needed - fast case */ - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); GLubyte *dst = (GLubyte *) dstImage; @@ -141,8 +145,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { dst[col] = src[col / wScale]; @@ -160,8 +165,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, else { /* store as 16-bit texels */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); GLushort *dst = (GLushort *) dstImage; @@ -182,8 +188,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row, col; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); for (col = 0; col < dstWidth; col++) { @@ -203,8 +210,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, if (srcFormat == GL_RGB && srcType == GL_UNSIGNED_SHORT_5_6_5) { /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); GLushort *dst = (GLushort *) dstImage; @@ -221,8 +229,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLushort *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLushort *src = (const GLushort *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { dst[col] = src[col / wScale]; @@ -234,8 +243,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGB && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); #ifdef DO_32BIT_STORES @@ -286,8 +296,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { GLint col3 = (col / wScale) * 3; @@ -305,8 +316,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case (used by Quake3) */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); #ifdef DO_32BIT_STORES @@ -357,8 +369,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { GLint col4 = (col / wScale) * 4; @@ -384,8 +397,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV){ /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); GLushort *dst = (GLushort *) dstImage; @@ -402,8 +416,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLushort *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLushort *src = (const GLushort *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { dst[col] = src[col / wScale]; @@ -415,8 +430,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); #ifdef DO_32BIT_STORES @@ -473,8 +489,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { GLint col4 = (col / wScale) * 4; @@ -499,11 +516,12 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, case MESA_A1_R5_G5_B5: /* store as 16-bit texels (GR_TEXFMT_ARGB_1555) */ - if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV){ + if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV){ /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); GLushort *dst = (GLushort *) dstImage; @@ -520,8 +538,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLushort *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLushort *src = (const GLushort *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { dst[col] = src[col / wScale]; @@ -533,8 +552,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); GLushort *dst = (GLushort *) dstImage; @@ -561,8 +581,9 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { GLint col4 = (col / wScale) * 4; @@ -591,11 +612,12 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV){ /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); - GLuint *dst = dstImage; + GLuint *dst = (GLuint *) dstImage; GLint row; for (row = 0; row < dstHeight; row++) { MEMCPY(dst, src, dstWidth * sizeof(GLuint)); @@ -605,12 +627,13 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, } else { /* must rescale image */ - GLuint *dst = dstImage; + GLuint *dst = (GLuint *) dstImage; GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLuint *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLuint *src = (const GLuint *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { dst[col] = src[col / wScale]; @@ -622,11 +645,12 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); - GLuint *dst = dstImage; + GLuint *dst = (GLuint *) dstImage; GLint row; for (row = 0; row < dstHeight; row++) { GLint col, col4; @@ -643,12 +667,13 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, } else { /* must rescale image */ - GLuint *dst = dstImage; + GLuint *dst = (GLuint *) dstImage; GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { GLint col4 = (col / wScale) * 4; @@ -665,11 +690,12 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGB && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, srcWidth, srcFormat, srcType); - GLuint *dst = dstImage; + GLuint *dst = (GLuint *) dstImage; GLint row; for (row = 0; row < dstHeight; row++) { GLint col, col3; @@ -686,12 +712,13 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, } else { /* must rescale image */ - GLuint *dst = dstImage; + GLuint *dst = (GLuint *) dstImage; GLint row; for (row = 0; row < dstHeight; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < dstWidth; col++) { GLint col3 = (col / wScale) * 3; @@ -711,7 +738,7 @@ _mesa_convert_teximage(MesaIntTexFormat dstFormat, } if (dstFormat == MESA_FF_R8_G8_B8) { /* set alpha bytes to 0xff */ - GLuint i; + GLint i; GLubyte *dst = (GLubyte *) dstImage; for (i = 0; i < dstWidth * dstHeight; i++) { dst[i * 4 + 3] = 0xff; @@ -792,8 +819,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, /* store as 8-bit texels */ if (wScale == 1 && hScale == 1) { /* no scaling needed - fast case */ - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLubyte *dst = (GLubyte *) dstImage @@ -812,8 +840,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { dst[col] = src[col / wScale]; @@ -831,8 +860,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, else { /* store as 16-bit texels */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -855,8 +885,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row, col; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); for (col = 0; col < width; col++) { @@ -876,8 +907,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, if (srcFormat == GL_RGB && srcType == GL_UNSIGNED_SHORT_5_6_5) { /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -896,8 +928,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLushort *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLushort *src = (const GLushort *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { dst[col] = src[col / wScale]; @@ -909,8 +942,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGB && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -937,8 +971,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { GLint col3 = (col / wScale) * 3; @@ -956,8 +991,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case (used by Quake3) */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -984,8 +1020,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { GLint col4 = (col / wScale) * 4; @@ -1011,8 +1048,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_4_4_4_4_REV){ /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -1031,8 +1069,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLushort *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLushort *src = (const GLushort *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { dst[col] = src[col / wScale]; @@ -1044,8 +1083,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -1074,8 +1114,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { GLint col4 = (col / wScale) * 4; @@ -1100,11 +1141,12 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, case MESA_A1_R5_G5_B5: /* store as 16-bit texels (GR_TEXFMT_ARGB_1555) */ - if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV){ + if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_SHORT_1_5_5_5_REV){ /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -1123,8 +1165,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLushort *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLushort *src = (const GLushort *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { dst[col] = src[col / wScale]; @@ -1136,8 +1179,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLushort *dst = (GLushort *) ((GLubyte *) dstImage @@ -1166,8 +1210,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { GLint col4 = (col / wScale) * 4; @@ -1196,8 +1241,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, if (srcFormat == GL_BGRA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV){ /* special, optimized case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLuint *dst = (GLuint *) ((GLubyte *) dstImage @@ -1216,8 +1262,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLuint *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLuint *src = (const GLuint *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { dst[col] = src[col / wScale]; @@ -1229,8 +1276,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, else if (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE) { /* general case */ if (wScale == 1 && hScale == 1) { - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, srcHeight, + srcFormat, srcType, 0, 0, 0); const GLint srcStride = _mesa_image_row_stride(packing, width, srcFormat, srcType); GLuint *dst = (GLuint *) ((GLubyte *) dstImage @@ -1256,8 +1304,9 @@ _mesa_convert_texsubimage(MesaIntTexFormat dstFormat, GLint row; for (row = 0; row < height; row++) { GLint srcRow = row / hScale; - const GLubyte *src = _mesa_image_address(packing, srcImage, - srcWidth, srcHeight, srcFormat, srcType, 0, srcRow, 0); + const GLubyte *src = (const GLubyte *) + _mesa_image_address(packing, srcImage, srcWidth, + srcHeight, srcFormat, srcType, 0, srcRow, 0); GLint col; for (col = 0; col < width; col++) { GLint col4 = (col / wScale) * 4; diff --git a/xc/extras/Mesa/src/texutil.h b/xc/extras/Mesa/src/texutil.h index f1fbcfab2..22e916d50 100644 --- a/xc/extras/Mesa/src/texutil.h +++ b/xc/extras/Mesa/src/texutil.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/texutil.h,v 1.3 2000/09/26 15:56:34 tsi Exp $ */ + #ifndef TEXUTIL_H #define TEXUTIL_H diff --git a/xc/extras/Mesa/src/triangle.c b/xc/extras/Mesa/src/triangle.c index d8f402d67..eb59d56ab 100644 --- a/xc/extras/Mesa/src/triangle.c +++ b/xc/extras/Mesa/src/triangle.c @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/triangle.c,v 1.7 2000/09/26 15:56:34 tsi Exp $ */ + /* * Triangle rasterizers @@ -42,6 +42,7 @@ #include "mem.h" #include "mmath.h" #include "span.h" +#include "teximage.h" #include "texstate.h" #include "triangle.h" #include "types.h" @@ -221,6 +222,7 @@ static void smooth_rgba_triangle( GLcontext *ctx, } + /* * Render an RGB, GL_DECAL, textured triangle. * Interpolate S,T only w/out mipmapping or perspective correction. @@ -239,8 +241,14 @@ static void simple_textured_triangle( GLcontext *ctx, GLuint v0, GLuint v1, GLint twidth_log2 = obj->Image[b]->WidthLog2; \ GLubyte *texture = obj->Image[b]->Data; \ GLint smask = obj->Image[b]->Width - 1; \ - GLint tmask = obj->Image[b]->Height - 1; - (void) pv; + GLint tmask = obj->Image[b]->Height - 1; \ + (void) pv; \ + if (!texture) { \ + if (!_mesa_get_teximages_from_driver(ctx, obj)) \ + return; \ + texture = obj->Image[b]->Data; \ + ASSERT(texture); \ + } #define INNER_LOOP( LEFT, RIGHT, Y ) \ { \ @@ -261,7 +269,7 @@ static void simple_textured_triangle( GLcontext *ctx, GLuint v0, GLuint v1, fft += fdtdx; \ } \ (*ctx->Driver.WriteRGBSpan)( ctx, n, LEFT, Y, \ - (const GLubyte (*)[3]) rgb, NULL ); \ + (CONST GLubyte (*)[3]) rgb, NULL ); \ } \ } @@ -290,8 +298,14 @@ static void simple_z_textured_triangle( GLcontext *ctx, GLuint v0, GLuint v1, GLint twidth_log2 = obj->Image[b]->WidthLog2; \ GLubyte *texture = obj->Image[b]->Data; \ GLint smask = obj->Image[b]->Width - 1; \ - GLint tmask = obj->Image[b]->Height - 1; - (void) pv; + GLint tmask = obj->Image[b]->Height - 1; \ + (void) pv; \ + if (!texture) { \ + if (!_mesa_get_teximages_from_driver(ctx, obj)) \ + return; \ + texture = obj->Image[b]->Data; \ + ASSERT(texture); \ + } #define INNER_LOOP( LEFT, RIGHT, Y ) \ { \ @@ -322,7 +336,7 @@ static void simple_z_textured_triangle( GLcontext *ctx, GLuint v0, GLuint v1, fft += fdtdx; \ } \ (*ctx->Driver.WriteRGBSpan)( ctx, n, LEFT, Y, \ - (const GLubyte (*)[3]) rgb, mask ); \ + (CONST GLubyte (*)[3]) rgb, mask ); \ } \ } @@ -360,6 +374,12 @@ static void affine_textured_triangle( GLcontext *ctx, GLuint v0, GLuint v1, GLint comp, tbytesline, tsize; \ GLfixed er, eg, eb, ea; \ GLint tr, tg, tb, ta; \ + if (!texture) { \ + if (!_mesa_get_teximages_from_driver(ctx, obj)) \ + return; \ + texture = obj->Image[b]->Data; \ + ASSERT(texture); \ + } \ if (envmode == GL_BLEND) { \ /* potential off-by-one error here? (1.0f -> 2048 -> 0) */ \ er = FloatToFixed(unit->EnvColor[0]); \ @@ -648,6 +668,12 @@ static void persp_textured_triangle( GLcontext *ctx, GLuint v0, GLuint v1, GLint comp, tbytesline, tsize; \ GLfixed er, eg, eb, ea; \ GLint tr, tg, tb, ta; \ + if (!texture) { \ + if (!_mesa_get_teximages_from_driver(ctx, obj)) \ + return; \ + texture = obj->Image[b]->Data; \ + ASSERT(texture); \ + } \ if (envmode == GL_BLEND) { \ er = FloatToFixed(unit->EnvColor[0]); \ eg = FloatToFixed(unit->EnvColor[1]); \ @@ -1017,7 +1043,7 @@ static void general_textured_spec_triangle1( GLcontext *ctx, GLuint v0, } \ gl_write_texture_span( ctx, n, LEFT, Y, zspan, \ s, t, u, NULL, rgba, \ - (const GLubyte (*)[4]) spec, \ + (CONST GLubyte (*)[4]) spec, \ GL_POLYGON ); \ } \ } @@ -1243,7 +1269,7 @@ static void lambda_textured_spec_triangle1( GLcontext *ctx, GLuint v0, } \ gl_write_texture_span( ctx, n, LEFT, Y, zspan, \ s, t, u, lambda, \ - rgba, (const GLubyte (*)[4]) spec, \ + rgba, (CONST GLubyte (*)[4]) spec, \ GL_POLYGON ); \ } \ } @@ -1371,9 +1397,9 @@ static void lambda_multitextured_triangle1( GLcontext *ctx, GLuint v0, } \ } \ gl_write_multitexture_span( ctx, 2, n, LEFT, Y, zspan, \ - (const GLfloat (*)[MAX_WIDTH]) s, \ - (const GLfloat (*)[MAX_WIDTH]) t, \ - (const GLfloat (*)[MAX_WIDTH]) u, \ + (CONST GLfloat (*)[MAX_WIDTH]) s, \ + (CONST GLfloat (*)[MAX_WIDTH]) t, \ + (CONST GLfloat (*)[MAX_WIDTH]) u, \ (GLfloat (*)[MAX_WIDTH]) lambda, \ rgba, NULL, GL_POLYGON ); \ } \ @@ -1505,6 +1531,8 @@ void gl_set_triangle_function( GLcontext *ctx ) } if (ctx->Depth.OcclusionTest && + ctx->DrawBuffer->DepthBuffer && + ctx->Depth.Test && ctx->Depth.Mask == GL_FALSE && ctx->Depth.Func == GL_LESS && !ctx->Stencil.Enabled) { diff --git a/xc/extras/Mesa/src/tritemp.h b/xc/extras/Mesa/src/tritemp.h index 54f8cea2e..d0135dc1d 100644 --- a/xc/extras/Mesa/src/tritemp.h +++ b/xc/extras/Mesa/src/tritemp.h @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/tritemp.h,v 1.9 2000/09/24 13:50:20 alanh Exp $ */ + /* * Triangle Rasterizer Template diff --git a/xc/extras/Mesa/src/types.h b/xc/extras/Mesa/src/types.h index 550f4f1e6..9b1e832d7 100644 --- a/xc/extras/Mesa/src/types.h +++ b/xc/extras/Mesa/src/types.h @@ -23,7 +23,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/types.h,v 1.7 2000/09/26 15:56:34 tsi Exp $ */ + #ifndef TYPES_H #define TYPES_H @@ -449,7 +449,7 @@ struct gl_enable_attrib { GLboolean RescaleNormals; GLboolean Scissor; GLboolean Stencil; - GLuint Texture; + GLuint Texture[MAX_TEXTURE_UNITS]; GLuint TexGen[MAX_TEXTURE_UNITS]; }; @@ -828,8 +828,8 @@ struct gl_texture_object { * Texture units are new with the multitexture extension. */ struct gl_texture_unit { - GLuint Enabled; - GLuint ReallyEnabled; + GLuint Enabled; /* bitmask of TEXTURE0_1D, _2D, _3D, _CUBE */ + GLuint ReallyEnabled; /* 0 or one of TEXTURE0_1D, _2D, _3D, _CUBE */ GLenum EnvMode; /* GL_MODULATE, GL_DECAL, GL_BLEND, GL_COMBINE_EXT */ GLenum LastEnvMode; @@ -886,9 +886,7 @@ struct gl_texture_attrib { GLuint CurrentUnit; /* Current texture unit */ GLuint CurrentTransformUnit; /* Current texture xform unit */ - /* Bitwise-OR of TEXTURE_XD values */ - GLuint Enabled; /* Enabled by the user */ - GLuint ReallyEnabled; /* Really enabled (w.r.t. completeness, etc) */ + GLuint ReallyEnabled; /* Really enabled (w.r.t. completeness, etc) */ GLuint LastEnabled; /* Decide whether enabled has really changed */ @@ -1963,6 +1961,10 @@ struct gl_context { /* Dither disable via MESA_NO_DITHER env var */ GLboolean NoDither; + +#ifdef DEBUG + GLboolean Rendering; +#endif }; @@ -2071,7 +2073,44 @@ do { \ #define Elements(x) sizeof(x)/sizeof(*(x)) -#endif +#ifdef DEBUG + +#define RENDER_START(CTX) \ + do { \ + assert(!(CTX)->Rendering); \ + (CTX)->Rendering = GL_TRUE; \ + if ((CTX)->Driver.RenderStart) { \ + (*(CTX)->Driver.RenderStart)(CTX); \ + } \ + } while (0) + +#define RENDER_FINISH(CTX) \ + do { \ + assert((CTX)->Rendering); \ + (CTX)->Rendering = GL_FALSE; \ + if ((CTX)->Driver.RenderFinish) { \ + (*(CTX)->Driver.RenderFinish)(CTX); \ + } \ + } while (0) + +#else + +#define RENDER_START(CTX) \ + do { \ + if ((CTX)->Driver.RenderStart) { \ + (*(CTX)->Driver.RenderStart)(CTX); \ + } \ + } while (0) + +#define RENDER_FINISH(CTX) \ + do { \ + if ((CTX)->Driver.RenderFinish) { \ + (*(CTX)->Driver.RenderFinish)(CTX); \ + } \ + } while (0) + +#endif +#endif /* TYPES_H */ diff --git a/xc/extras/Mesa/src/varray.c b/xc/extras/Mesa/src/varray.c index ce8a4a186..7547ea3bf 100644 --- a/xc/extras/Mesa/src/varray.c +++ b/xc/extras/Mesa/src/varray.c @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/varray.c,v 1.6 2000/09/26 15:56:34 tsi Exp $ */ #ifdef PC_HEADER #include "all.h" @@ -1124,14 +1123,14 @@ _mesa_InterleavedArrays(GLenum format, GLsizei stride, const GLvoid *pointer) glTexCoordPointer( tcomps, GL_FLOAT, stride, (GLubyte *) pointer + i * coffset ); } - for (i = factor; i < ctx->Const.MaxTextureUnits; i++) { + for (i = factor; i < (GLint) ctx->Const.MaxTextureUnits; i++) { _mesa_ClientActiveTextureARB( (GLenum) (GL_TEXTURE0_ARB + i) ); _mesa_DisableClientState( GL_TEXTURE_COORD_ARRAY ); } } else { GLint i; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + for (i = 0; i < (GLint) ctx->Const.MaxTextureUnits; i++) { _mesa_ClientActiveTextureARB( (GLenum) (GL_TEXTURE0_ARB + i) ); _mesa_DisableClientState( GL_TEXTURE_COORD_ARRAY ); } diff --git a/xc/extras/Mesa/src/vb.c b/xc/extras/Mesa/src/vb.c index 33139e8f3..b5555317e 100644 --- a/xc/extras/Mesa/src/vb.c +++ b/xc/extras/Mesa/src/vb.c @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/vb.c,v 1.6 2000/09/26 15:56:35 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" @@ -207,7 +207,6 @@ void gl_vb_free( struct vertex_buffer *VB ) ALIGN_FREE( VB->CullMask ); ALIGN_FREE( VB->NormCullMask ); } else { - if (VB->store.Elt) gl_vector4f_free( VB->store.Obj ); FREE( VB->store.Obj ); gl_vector3f_free( VB->store.Normal ); FREE( VB->store.Normal ); gl_vector1ub_free( VB->store.EdgeFlag ); FREE( VB->store.EdgeFlag ); diff --git a/xc/extras/Mesa/src/vb.h b/xc/extras/Mesa/src/vb.h index 1d32b13f1..62ce7c613 100644 --- a/xc/extras/Mesa/src/vb.h +++ b/xc/extras/Mesa/src/vb.h @@ -1,9 +1,9 @@ /* * Mesa 3-D graphics library - * Version: 3.3 + * Version: 3.4 * - * Copyright (C) 1999 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -139,10 +139,10 @@ struct immediate GLuint ArrayEltFlush; GLuint FlushElt; - GLuint TF1[2]; /* precalc'ed for glTexCoord */ - GLuint TF2[2]; - GLuint TF3[2]; - GLuint TF4[2]; + GLuint TF1[MAX_TEXTURE_UNITS]; /* precalc'ed for glTexCoord */ + GLuint TF2[MAX_TEXTURE_UNITS]; + GLuint TF3[MAX_TEXTURE_UNITS]; + GLuint TF4[MAX_TEXTURE_UNITS]; GLuint Primitive[VB_SIZE]; /* GLubyte would do... */ GLuint NextPrimitive[VB_SIZE]; diff --git a/xc/extras/Mesa/src/vbcull.c b/xc/extras/Mesa/src/vbcull.c index 4af76165b..ba9524611 100644 --- a/xc/extras/Mesa/src/vbcull.c +++ b/xc/extras/Mesa/src/vbcull.c @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/vbcull.c,v 1.6 2000/09/26 15:56:35 tsi Exp $ */ /* * New (3.1) transformation code written by Keith Whitwell. @@ -834,7 +833,7 @@ GLuint gl_cull_vb( struct vertex_buffer *VB ) first = i; next = VB->NextPrimitive[i]; prim = in_prim[i]; - n = cull_funcs[prim]( VB, i, next, parity, (const GLfloat (*)[4])proj ); + n = cull_funcs[prim]( VB, i, next, parity, (CONST GLfloat (*)[4])proj ); if (n == next - i) out_prim[i] = GL_POLYGON+1; @@ -850,7 +849,7 @@ GLuint gl_cull_vb( struct vertex_buffer *VB ) first, VB->Count, VB->Ovf, - (const GLfloat (*)[4])proj ); + (CONST GLfloat (*)[4])proj ); } VB->Primitive = out_prim; @@ -904,7 +903,7 @@ void gl_dont_cull_vb( struct vertex_buffer *VB ) first, VB->Count, VB->Ovf, - (const GLfloat (*)[4])proj ); + (CONST GLfloat (*)[4])proj ); } VB->CullDone = 1; diff --git a/xc/extras/Mesa/src/vbfill.c b/xc/extras/Mesa/src/vbfill.c index 1bcea531a..50d5806d5 100644 --- a/xc/extras/Mesa/src/vbfill.c +++ b/xc/extras/Mesa/src/vbfill.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/vbfill.c,v 1.6 2000/09/26 15:56:35 tsi Exp $ */ + #ifdef PC_HEADER #include "all.h" diff --git a/xc/extras/Mesa/src/vbindirect.c b/xc/extras/Mesa/src/vbindirect.c index 04639ff9d..79fb29975 100644 --- a/xc/extras/Mesa/src/vbindirect.c +++ b/xc/extras/Mesa/src/vbindirect.c @@ -133,10 +133,10 @@ indexed_render_points( struct vertex_buffer *VB, const GLubyte *clip = VB->ClipMask; for (i = start ; i < count ; i++) if (!clip[elt[i]]) - ctx->Driver.PointsFunc( ctx, elt[i], elt[i] ); + ctx->Driver.PointsFunc( ctx, elt[i], elt[i]+1 ); } else { for (i = start ; i < count ; i++) - ctx->Driver.PointsFunc( ctx, elt[i], elt[i] ); + ctx->Driver.PointsFunc( ctx, elt[i], elt[i]+1 ); } } diff --git a/xc/extras/Mesa/src/vbrender.c b/xc/extras/Mesa/src/vbrender.c index 263dd34fd..ae3a7141a 100644 --- a/xc/extras/Mesa/src/vbrender.c +++ b/xc/extras/Mesa/src/vbrender.c @@ -22,7 +22,7 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/vbrender.c,v 1.7 2000/09/24 13:50:22 alanh Exp $ */ + /* * Render points, lines, and polygons. The only entry point to this @@ -151,7 +151,7 @@ static void unfilled_polygon( GLcontext *ctx, j = vlist[i]; if (edge_ptr[j] & 0x3) { edge_ptr[j] &= ~0x3; - (*ctx->Driver.PointsFunc)( ctx, j, j ); + (*ctx->Driver.PointsFunc)( ctx, j, j+1 ); } } } diff --git a/xc/extras/Mesa/src/vbxform.c b/xc/extras/Mesa/src/vbxform.c index eea0be897..13d47a398 100644 --- a/xc/extras/Mesa/src/vbxform.c +++ b/xc/extras/Mesa/src/vbxform.c @@ -680,7 +680,7 @@ void gl_fixup_cassette( GLcontext *ctx, struct immediate *IM ) IM->NormalLengths = (GLfloat *)MALLOC(sizeof(GLfloat) * VB_SIZE); calc_normal_lengths( IM->NormalLengths + start, - (const GLfloat (*)[3])(IM->Normal + start), + (CONST GLfloat (*)[3])(IM->Normal + start), IM->Flag + start, IM->Count - start); diff --git a/xc/extras/Mesa/src/vector.c b/xc/extras/Mesa/src/vector.c index a7daed715..954a4235a 100644 --- a/xc/extras/Mesa/src/vector.c +++ b/xc/extras/Mesa/src/vector.c @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/vector.c,v 1.5 2000/09/26 15:56:35 tsi Exp $ */ /* * New (3.1) transformation code written by Keith Whitwell. diff --git a/xc/extras/Mesa/src/vertices.c b/xc/extras/Mesa/src/vertices.c index da139173f..f57958a1c 100644 --- a/xc/extras/Mesa/src/vertices.c +++ b/xc/extras/Mesa/src/vertices.c @@ -2,19 +2,19 @@ /* * Mesa 3-D graphics library * Version: 3.3 - * + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -34,7 +34,7 @@ #if defined(USE_X86_ASM) -#include "X86/common_x86asm.h" +#include "X86/common_x86_asm.h" #endif @@ -58,7 +58,7 @@ */ /* The inline 3dnow code seems to give problems with some peoples - * compiler/binutils. + * compiler/binutils. */ /* #undef USE_3DNOW_ASM */ @@ -69,15 +69,15 @@ #endif -static void _PROJAPI transform_v16(GLfloat *f, - const GLfloat *m, - const GLfloat *obj, - GLuint obj_stride, - GLuint count ) +static void _PROJAPI transform_v16( GLfloat *f, + const GLfloat *m, + const GLfloat *obj, + GLuint obj_stride, + GLuint count ) { GLuint i; - for (i = 0 ; i < count ; i++, STRIDE_F(obj, obj_stride), f+=16) + for (i = 0 ; i < count ; i++, STRIDE_F(obj, obj_stride), f+=16) { const GLfloat ox = obj[0], oy = obj[1], oz = obj[2]; f[0] = m[0] * ox + m[4] * oy + m[8] * oz + m[12]; @@ -87,71 +87,27 @@ static void _PROJAPI transform_v16(GLfloat *f, } } -/* Project all vertices upto but not including last. Guarenteed to be - * at least one such vertex. - */ -static void _PROJAPI project_verts(GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride ) -{ - const GLfloat sx = m[0], sy = m[5], sz = m[10]; - const GLfloat tx = m[12], ty = m[13], tz = m[14]; - GLfloat *f; - - for ( f = first ; f != last ; STRIDE_F(f,stride)) - { - const GLfloat oow = 1.0F / f[3]; - f[0] = sx * f[0] * oow + tx; - f[1] = sy * f[1] * oow + ty; - f[2] = sz * f[2] * oow + tz; - f[3] = oow; - } -} - -static void _PROJAPI project_clipped_verts(GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride, - const GLubyte *clipmask ) -{ - const GLfloat sx = m[0], sy = m[5], sz = m[10]; - const GLfloat tx = m[12], ty = m[13], tz = m[14]; - GLfloat *f; - - for ( f = first ; f != last ; STRIDE_F(f,stride), clipmask++) - { - if (!*clipmask) { - const GLfloat oow = 1.0F / f[3]; - f[0] = sx * f[0] * oow + tx; - f[1] = sy * f[1] * oow + ty; - f[2] = sz * f[2] * oow + tz; - f[3] = oow; - } - } -} - static void _PROJAPI cliptest_v16( GLfloat *first, - GLfloat *last, - GLubyte *p_clipOr, - GLubyte *p_clipAnd, - GLubyte *clipmask ) -{ + GLfloat *last, + GLubyte *p_clipOr, + GLubyte *p_clipAnd, + GLubyte *clipmask ) +{ GLubyte clipAnd = (GLubyte) ~0; GLubyte clipOr = 0; GLfloat *f = first; static int i; i = 0; - for ( ; f != last ; f+=16, clipmask++, i++) + for ( ; f != last ; f+=16, clipmask++, i++) { const GLfloat cx = f[0]; const GLfloat cy = f[1]; const GLfloat cz = f[2]; - const GLfloat cw = f[3]; + const GLfloat cw = f[3]; GLubyte mask = 0; if (cx > cw) mask |= CLIP_RIGHT_BIT; @@ -172,6 +128,51 @@ static void _PROJAPI cliptest_v16( GLfloat *first, +/* Project all vertices upto but not including last. Guarenteed to be + * at least one such vertex. + */ +static void _PROJAPI project_verts( GLfloat *first, + GLfloat *last, + const GLfloat *m, + GLuint stride ) +{ + const GLfloat sx = m[0], sy = m[5], sz = m[10]; + const GLfloat tx = m[12], ty = m[13], tz = m[14]; + GLfloat *f; + + for ( f = first ; f != last ; STRIDE_F(f,stride)) + { + const GLfloat oow = 1.0F / f[3]; + f[0] = sx * f[0] * oow + tx; + f[1] = sy * f[1] * oow + ty; + f[2] = sz * f[2] * oow + tz; + f[3] = oow; + } +} + +static void _PROJAPI project_clipped_verts( GLfloat *first, + GLfloat *last, + const GLfloat *m, + GLuint stride, + const GLubyte *clipmask ) +{ + const GLfloat sx = m[0], sy = m[5], sz = m[10]; + const GLfloat tx = m[12], ty = m[13], tz = m[14]; + GLfloat *f; + + for ( f = first ; f != last ; STRIDE_F(f,stride), clipmask++) + { + if (!*clipmask) { + const GLfloat oow = 1.0F / f[3]; + f[0] = sx * f[0] * oow + tx; + f[1] = sy * f[1] * oow + ty; + f[2] = sz * f[2] * oow + tz; + f[3] = oow; + } + } +} + + GLenum gl_reduce_prim[GL_POLYGON+1] = { GL_POINTS, GL_LINES, @@ -183,13 +184,28 @@ GLenum gl_reduce_prim[GL_POLYGON+1] = { GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, -}; +}; + -gl_transform_func gl_xform_points3_v16_general = transform_v16; -gl_cliptest_func gl_cliptest_points4_v16 = cliptest_v16; -gl_project_clipped_func gl_project_clipped_v16 = project_clipped_verts; -gl_project_func gl_project_v16 = project_verts; +gl_transform_func gl_xform_points3_v16_general; +gl_cliptest_func gl_cliptest_points4_v16; +gl_project_func gl_project_v16; +gl_project_clipped_func gl_project_clipped_v16; -void gl_init_vertices() + +void gl_init_vertices( void ) { + gl_xform_points3_v16_general = transform_v16; + gl_cliptest_points4_v16 = cliptest_v16; + gl_project_v16 = project_verts; + gl_project_clipped_v16 = project_clipped_verts; + +#if 0 + /* GH: Add tests/benchmarks for the vertex asm */ + gl_test_all_vertex_functions( "default" ); +#endif + +#ifdef USE_X86_ASM + gl_init_all_x86_vertex_asm(); +#endif } diff --git a/xc/extras/Mesa/src/vertices.h b/xc/extras/Mesa/src/vertices.h index 8bdcf470c..b68646227 100644 --- a/xc/extras/Mesa/src/vertices.h +++ b/xc/extras/Mesa/src/vertices.h @@ -2,19 +2,19 @@ /* * Mesa 3-D graphics library * Version: 3.3 - * + * * Copyright (C) 1999 Brian Paul All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: - * + * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL @@ -24,8 +24,8 @@ */ -#ifndef VERTICES_H_ -#define VERTICES_H_ +#ifndef __VERTICES_H__ +#define __VERTICES_H__ #ifdef USE_X86_ASM #define _PROJAPI _ASMAPI @@ -36,45 +36,44 @@ #endif typedef void (_PROJAPIP gl_transform_func)( GLfloat *first_vert, - const GLfloat *m, - const GLfloat *src, - GLuint src_stride, - GLuint count ); + const GLfloat *m, + const GLfloat *src, + GLuint src_stride, + GLuint count ); typedef void (_PROJAPIP gl_cliptest_func)( GLfloat *first_vert, - GLfloat *last_vert, /* use count instead? */ - GLubyte *or_mask, - GLubyte *and_mask, - GLubyte *clip_mask ); + GLfloat *last_vert, /* use count instead? */ + GLubyte *or_mask, + GLubyte *and_mask, + GLubyte *clip_mask ); + +typedef void (_PROJAPIP gl_project_func)( GLfloat *first, + GLfloat *last, + const GLfloat *m, + GLuint stride ); typedef void (_PROJAPIP gl_project_clipped_func)( GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride, - const GLubyte *clipmask ); + GLfloat *last, + const GLfloat *m, + GLuint stride, + const GLubyte *clipmask ); -typedef void (_PROJAPIP gl_project_func)( GLfloat *first, - GLfloat *last, - const GLfloat *m, - GLuint stride ); +typedef void (_PROJAPIP gl_vertex_interp_func)( GLfloat t, + GLfloat *result, + const GLfloat *in, + const GLfloat *out ); /* At the moment these are used by fastpaths in the FX and MGA drivers. */ extern gl_transform_func gl_xform_points3_v16_general; -extern gl_cliptest_func gl_cliptest_points4_v16; -extern gl_project_clipped_func gl_project_clipped_v16; -extern gl_project_func gl_project_v16; - +extern gl_cliptest_func gl_cliptest_points4_v16; +extern gl_project_func gl_project_v16; +extern gl_project_clipped_func gl_project_clipped_v16; extern GLenum gl_reduce_prim[]; -extern void gl_init_vertices(void); -typedef void (*gl_vertex_interp_func)( GLfloat t, - GLfloat *result, - const GLfloat *in, - const GLfloat *out ); - +extern void gl_init_vertices( void ); #endif diff --git a/xc/extras/Mesa/src/xform.c b/xc/extras/Mesa/src/xform.c index 78309e93c..f057c5058 100644 --- a/xc/extras/Mesa/src/xform.c +++ b/xc/extras/Mesa/src/xform.c @@ -52,7 +52,7 @@ #endif #ifdef USE_X86_ASM -#include "X86/common_x86asm.h" +#include "X86/common_x86_asm.h" #endif clip_func gl_clip_tab[5]; @@ -201,12 +201,12 @@ void gl_init_transformation( void ) init_dotprod_masked(); #ifdef DEBUG - gl_test_all_transform_functions ("default"); - gl_test_all_normal_transform_functions ("default"); + gl_test_all_transform_functions( "default" ); + gl_test_all_normal_transform_functions( "default" ); #endif #ifdef USE_X86_ASM - gl_init_all_x86_asm (); + gl_init_all_x86_transform_asm(); #endif } diff --git a/xc/extras/Mesa/src/xform.h b/xc/extras/Mesa/src/xform.h index ce2166901..2f8843815 100644 --- a/xc/extras/Mesa/src/xform.h +++ b/xc/extras/Mesa/src/xform.h @@ -22,7 +22,10 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/xform.h,v 1.5 2000/09/26 15:56:35 tsi Exp $ */ + + + + #ifndef XFORM_H #define XFORM_H diff --git a/xc/extras/Mesa/src/xform_tmp.h b/xc/extras/Mesa/src/xform_tmp.h index 8f521fa11..aacbee841 100644 --- a/xc/extras/Mesa/src/xform_tmp.h +++ b/xc/extras/Mesa/src/xform_tmp.h @@ -22,7 +22,6 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* $XFree86: xc/extras/Mesa/src/xform_tmp.h,v 1.5 2000/09/26 15:56:35 tsi Exp $ */ /* * New (3.1) transformation code written by Keith Whitwell. diff --git a/xc/lib/GL/Imakefile b/xc/lib/GL/Imakefile index 59af03fed..5814fb1d7 100644 --- a/xc/lib/GL/Imakefile +++ b/xc/lib/GL/Imakefile @@ -31,37 +31,43 @@ NormalLintTarget($(PROFSRC)) #endif #if BuildXF86DRI && defined(i386Architecture) - GLAPI_OBJ = glx/glapi_x86.o + GLAPI_OBJ = glx/glapi_x86.o #endif - GLXOBJS = glx/?*.o - GLXUOBJS = glx/unshared/?*.o $(GLAPI_OBJ) - GLXDOBJS = glx/debugger/?*.o $(GLAPI_OBJ) - GLXPOBJS = glx/profiled/?*.o $(GLAPI_OBJ) - GLXDONES = glx/DONE + GLXOBJS = glx/?*.o + GLXUOBJS = glx/unshared/?*.o $(GLAPI_OBJ) + GLXDOBJS = glx/debugger/?*.o $(GLAPI_OBJ) + GLXPOBJS = glx/profiled/?*.o $(GLAPI_OBJ) + GLXDONES = glx/DONE #if BuildXF86DRI - DRIOBJS = dri/XF86dri.o dri/dri_glx.o - DRIUOBJS = dri/unshared/XF86dri.o dri/unshared/dri_glx.o - DRIDOBJS = dri/debugger/XF86dri.o dri/debugger/dri_glx.o - DRIPOBJS = dri/profiled/XF86dri.o dri/profiled/dri_glx.o - DRIDONES = dri/DONE - - DRMOBJS = dri/drm/?*.o - DRMUOBJS = dri/drm/unshared/?*.o - DRMDOBJS = dri/drm/debugger/?*.o - DRMPOBJS = dri/drm/profiled/?*.o - DRMDONES = dri/drm/DONE - - MESAOBJS = mesa/src/?*.o + DRIOBJS = dri/XF86dri.o dri/dri_glx.o + DRIUOBJS = dri/unshared/XF86dri.o dri/unshared/dri_glx.o + DRIDOBJS = dri/debugger/XF86dri.o dri/debugger/dri_glx.o + DRIPOBJS = dri/profiled/XF86dri.o dri/profiled/dri_glx.o + DRIDONES = dri/DONE + + DRMOBJS = dri/drm/?*.o + DRMUOBJS = dri/drm/unshared/?*.o + DRMDOBJS = dri/drm/debugger/?*.o + DRMPOBJS = dri/drm/profiled/?*.o + DRMDONES = dri/drm/DONE + + MESAOBJS = mesa/src/?*.o #ifdef i386Architecture - ASM_OBJS = mesa/src/X86/common_x86asm.o \ - mesa/src/X86/mmx_blend.o \ - mesa/src/X86/vertex.o \ - mesa/src/X86/x86a.o + ASM_OBJS = mesa/src/X86/common_x86_asm.o \ + mesa/src/X86/x86_cliptest.o \ + mesa/src/X86/x86_vertex.o \ + mesa/src/X86/x86_xform_masked2.o \ + mesa/src/X86/x86_xform_masked3.o \ + mesa/src/X86/x86_xform_masked4.o \ + mesa/src/X86/x86_xform_raw2.o \ + mesa/src/X86/x86_xform_raw3.o \ + mesa/src/X86/x86_xform_raw4.o \ + mesa/src/X86/mmx_blend.o #ifdef MesaUse3DNow - ASM_OBJS += mesa/src/X86/3dnow_norm.o \ - mesa/src/X86/3dnow_norm_raw.o \ + ASM_OBJS += mesa/src/X86/3dnow_norm_raw.o \ + mesa/src/X86/3dnow_vertex.o \ mesa/src/X86/3dnow_xform_masked1.o \ mesa/src/X86/3dnow_xform_masked2.o \ mesa/src/X86/3dnow_xform_masked3.o \ @@ -69,12 +75,11 @@ NormalLintTarget($(PROFSRC)) mesa/src/X86/3dnow_xform_raw1.o \ mesa/src/X86/3dnow_xform_raw2.o \ mesa/src/X86/3dnow_xform_raw3.o \ - mesa/src/X86/3dnow_xform_raw4.o \ - mesa/src/X86/vertex_3dnow.o + mesa/src/X86/3dnow_xform_raw4.o #endif #ifdef MesaUseKatmai - ASM_OBJS += mesa/src/X86/katmai_norm.o \ - mesa/src/X86/katmai_norm_raw.o \ + ASM_OBJS += mesa/src/X86/katmai_norm_raw.o \ + mesa/src/X86/katmai_vertex.o \ mesa/src/X86/katmai_xform_masked1.o \ mesa/src/X86/katmai_xform_masked2.o \ mesa/src/X86/katmai_xform_masked3.o \ @@ -82,8 +87,7 @@ NormalLintTarget($(PROFSRC)) mesa/src/X86/katmai_xform_raw1.o \ mesa/src/X86/katmai_xform_raw2.o \ mesa/src/X86/katmai_xform_raw3.o \ - mesa/src/X86/katmai_xform_raw4.o \ - mesa/src/X86/vertex_katmai.o + mesa/src/X86/katmai_xform_raw4.o #endif MESAOBJS = mesa/src/?*.o mesa/src/X86/?*.o diff --git a/xc/lib/GL/mesa/src/Imakefile b/xc/lib/GL/mesa/src/Imakefile index f8232128f..07d28a17c 100644 --- a/xc/lib/GL/mesa/src/Imakefile +++ b/xc/lib/GL/mesa/src/Imakefile @@ -349,12 +349,12 @@ LinkSourceFile(zoom.h, $(MESASRCDIR)/src) ASM_SRCS = ASM_OBJS = #ifdef MesaUse3DNow - 3DNOW_DEFS = -DUSE_3DNOW_ASM + 3DNOW_DEFS = -DUSE_3DNOW_ASM #endif #ifdef MesaUseKatmai - KATMAI_DEFS = -DUSE_KATMAI_ASM + KATMAI_DEFS = -DUSE_KATMAI_ASM #endif - ASM_DEFS = -DUSE_MMX_ASM -DUSE_X86_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) + ASM_DEFS = -DUSE_X86_ASM -DUSE_MMX_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) #endif #ifdef UseCompaqMathLibrary diff --git a/xc/lib/GL/mesa/src/OSmesa/Imakefile b/xc/lib/GL/mesa/src/OSmesa/Imakefile index a38ddd585..e09928541 100644 --- a/xc/lib/GL/mesa/src/OSmesa/Imakefile +++ b/xc/lib/GL/mesa/src/OSmesa/Imakefile @@ -171,26 +171,38 @@ MESA_INCLUDES = -I. -I.. -I../../include -I../../../../../include osmesa.o #ifdef i386Architecture - X86_SRCS = ../X86/x86.c \ - ../X86/x86a.S \ - ../X86/common_x86.c \ - ../X86/common_x86asm.S \ - ../X86/vertex.S - - X86_OBJS = ../X86/x86.o \ - ../X86/x86a.o \ - ../X86/common_x86.o \ - ../X86/common_x86asm.o \ - ../X86/vertex.o + X86_SRCS = ../X86/common_x86.c \ + ../X86/common_x86_asm.S \ + ../X86/x86.c \ + ../X86/x86_cliptest.S \ + ../X86/x86_vertex.S \ + ../X86/x86_xform_masked2.S \ + ../X86/x86_xform_masked3.S \ + ../X86/x86_xform_masked4.S \ + ../X86/x86_xform_raw2.S \ + ../X86/x86_xform_raw3.S \ + ../X86/x86_xform_raw4.S + + X86_OBJS = ../X86/common_x86.o \ + ../X86/common_x86_asm.o \ + ../X86/x86.o \ + ../X86/x86_cliptest.o \ + ../X86/x86_vertex.o \ + ../X86/x86_xform_masked2.o \ + ../X86/x86_xform_masked3.o \ + ../X86/x86_xform_masked4.o \ + ../X86/x86_xform_raw2.o \ + ../X86/x86_xform_raw3.o \ + ../X86/x86_xform_raw4.o MMX_SRCS = ../X86/mmx_blend.S MMX_OBJS = ../X86/mmx_blend.o -XCOMM Disabling 3Dnow code for the time being. -#if 0 +#ifdef MesaUse3DNow 3DNOW_SRCS = ../X86/3dnow.c \ ../X86/3dnow_norm_raw.S \ + ../X86/3dnow_vertex.S \ ../X86/3dnow_xform_masked1.S \ ../X86/3dnow_xform_masked2.S \ ../X86/3dnow_xform_masked3.S \ @@ -198,11 +210,11 @@ XCOMM Disabling 3Dnow code for the time being. ../X86/3dnow_xform_raw1.S \ ../X86/3dnow_xform_raw2.S \ ../X86/3dnow_xform_raw3.S \ - ../X86/3dnow_xform_raw4.S \ - ../X86/vertex_3dnow.S + ../X86/3dnow_xform_raw4.S 3DNOW_OBJS = ../X86/3dnow.o \ ../X86/3dnow_norm_raw.o \ + ../X86/3dnow_vertex.o \ ../X86/3dnow_xform_masked1.o \ ../X86/3dnow_xform_masked2.o \ ../X86/3dnow_xform_masked3.o \ @@ -210,14 +222,38 @@ XCOMM Disabling 3Dnow code for the time being. ../X86/3dnow_xform_raw1.o \ ../X86/3dnow_xform_raw2.o \ ../X86/3dnow_xform_raw3.o \ - ../X86/3dnow_xform_raw4.o \ - ../X86/vertex_3dnow.o + ../X86/3dnow_xform_raw4.o #endif +#ifdef MesaUseKatmai + KATMAI_SRCS = ../X86/katmai.c \ + ../X86/katmai_norm_raw.S \ + ../X86/katmai_vertex.S \ + ../X86/katmai_xform_masked1.S \ + ../X86/katmai_xform_masked2.S \ + ../X86/katmai_xform_masked3.S \ + ../X86/katmai_xform_masked4.S \ + ../X86/katmai_xform_raw1.S \ + ../X86/katmai_xform_raw2.S \ + ../X86/katmai_xform_raw3.S \ + ../X86/katmai_xform_raw4.S + + KATMAI_OBJS = ../X86/katmai.o \ + ../X86/katmai_norm_raw.o \ + ../X86/katmai_vertex.o \ + ../X86/katmai_xform_masked1.o \ + ../X86/katmai_xform_masked2.o \ + ../X86/katmai_xform_masked3.o \ + ../X86/katmai_xform_masked4.o \ + ../X86/katmai_xform_raw1.o \ + ../X86/katmai_xform_raw2.o \ + ../X86/katmai_xform_raw3.o \ + ../X86/katmai_xform_raw4.o +#endif #endif - ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) - ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) + ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) $(KATMAI_SRCS) + ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) $(KATMAI_OBJS) SRCS = $(MESASRCS) $(ASMSRCS) OBJS = $(MESAOBJS) $(ASMOBJS) diff --git a/xc/lib/GL/mesa/src/X86/Imakefile b/xc/lib/GL/mesa/src/X86/Imakefile index e3239ecd8..3f354c901 100644 --- a/xc/lib/GL/mesa/src/X86/Imakefile +++ b/xc/lib/GL/mesa/src/X86/Imakefile @@ -8,14 +8,24 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/X86/Imakefile,v 1.11 2000/09/24 13:51:02 alan LinkSourceFile(assyntax.h, $(MESASRCDIR)/src/X86) LinkSourceFile(common_x86.c, $(MESASRCDIR)/src/X86) -LinkSourceFile(common_x86asm.S, $(MESASRCDIR)/src/X86) -LinkSourceFile(common_x86asm.h, $(MESASRCDIR)/src/X86) +LinkSourceFile(common_x86_asm.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(common_x86_asm.h, $(MESASRCDIR)/src/X86) +LinkSourceFile(common_x86_features.h, $(MESASRCDIR)/src/X86) LinkSourceFile(glapi_x86.S, $(MESASRCDIR)/src/X86) + +LinkSourceFile(xform_args.h, $(MESASRCDIR)/src/X86) +LinkSourceFile(clip_args.h, $(MESASRCDIR)/src/X86) + LinkSourceFile(x86.c, $(MESASRCDIR)/src/X86) LinkSourceFile(x86.h, $(MESASRCDIR)/src/X86) -LinkSourceFile(x86a.S, $(MESASRCDIR)/src/X86) -LinkSourceFile(vertex.S, $(MESASRCDIR)/src/X86) -LinkSourceFile(x86flatregs.m4, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_cliptest.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_vertex.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_xform_masked2.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_xform_masked3.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_xform_masked4.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_xform_raw2.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_xform_raw3.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(x86_xform_raw4.S, $(MESASRCDIR)/src/X86) LinkSourceFile(mmx.h, $(MESASRCDIR)/src/X86) LinkSourceFile(mmx_blend.S, $(MESASRCDIR)/src/X86) @@ -23,6 +33,7 @@ LinkSourceFile(mmx_blend.S, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow.c, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow.h, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow_norm_raw.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(3dnow_vertex.S, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow_xform_masked1.S, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow_xform_masked2.S, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow_xform_masked3.S, $(MESASRCDIR)/src/X86) @@ -31,11 +42,11 @@ LinkSourceFile(3dnow_xform_raw1.S, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow_xform_raw2.S, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow_xform_raw3.S, $(MESASRCDIR)/src/X86) LinkSourceFile(3dnow_xform_raw4.S, $(MESASRCDIR)/src/X86) -LinkSourceFile(vertex_3dnow.S, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai.c, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai.h, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai_norm_raw.S, $(MESASRCDIR)/src/X86) +LinkSourceFile(katmai_vertex.S, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai_xform_masked1.S, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai_xform_masked2.S, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai_xform_masked3.S, $(MESASRCDIR)/src/X86) @@ -44,7 +55,6 @@ LinkSourceFile(katmai_xform_raw1.S, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai_xform_raw2.S, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai_xform_raw3.S, $(MESASRCDIR)/src/X86) LinkSourceFile(katmai_xform_raw4.S, $(MESASRCDIR)/src/X86) -LinkSourceFile(vertex_katmai.S, $(MESASRCDIR)/src/X86) LinkSourceFile(glapioffsets.h, $(MESASRCDIR)/src) @@ -53,50 +63,95 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL #endif #ifdef i386Architecture -XCOMM Determine at runtime whether 3dNow, Katmai, MMX, etc are really present. - X86_SRCS = x86a.S common_x86.c common_x86asm.S glapi_x86.S x86.c vertex.S - - X86_OBJS = x86a.o common_x86.o common_x86asm.o x86.o vertex.o - - X86_DEFS = -DUSE_X86_ASM - - - MMX_SRCS = mmx_blend.S - - MMX_OBJS = mmx_blend.o - - MMX_DEFS = -DUSE_MMX_ASM +XCOMM Determine at runtime whether 3DNow!, Katmai, MMX, etc are really present. + X86_SRCS = common_x86.c \ + common_x86_asm.S \ + glapi_x86.S \ + x86.c \ + x86_cliptest.S \ + x86_vertex.S \ + x86_xform_masked2.S \ + x86_xform_masked3.S \ + x86_xform_masked4.S \ + x86_xform_raw2.S \ + x86_xform_raw3.S \ + x86_xform_raw4.S + + X86_OBJS = common_x86.o \ + common_x86_asm.o \ + x86.o \ + x86_cliptest.o \ + x86_vertex.o \ + x86_xform_masked2.o \ + x86_xform_masked3.o \ + x86_xform_masked4.o \ + x86_xform_raw2.o \ + x86_xform_raw3.o \ + x86_xform_raw4.o + + X86_DEFS = -DUSE_X86_ASM + + + MMX_SRCS = mmx_blend.S + + MMX_OBJS = mmx_blend.o + + MMX_DEFS = -DUSE_MMX_ASM #ifdef MesaUse3DNow - 3DNOW_SRCS = 3dnow.c 3dnow_norm_raw.S 3dnow_xform_masked1.S \ - 3dnow_xform_masked2.S 3dnow_xform_masked3.S \ - 3dnow_xform_masked4.S 3dnow_xform_raw1.S \ - 3dnow_xform_raw2.S 3dnow_xform_raw3.S 3dnow_xform_raw4.S \ - vertex_3dnow.S - - 3DNOW_OBJS = 3dnow.o 3dnow_norm_raw.o 3dnow_xform_masked1.o \ - 3dnow_xform_masked2.o 3dnow_xform_masked3.o \ - 3dnow_xform_masked4.o 3dnow_xform_raw1.o \ - 3dnow_xform_raw2.o 3dnow_xform_raw3.o 3dnow_xform_raw4.o \ - vertex_3dnow.o - - 3DNOW_DEFS = -DUSE_3DNOW_ASM + 3DNOW_SRCS = 3dnow.c \ + 3dnow_norm_raw.S \ + 3dnow_vertex.S \ + 3dnow_xform_masked1.S \ + 3dnow_xform_masked2.S \ + 3dnow_xform_masked3.S \ + 3dnow_xform_masked4.S \ + 3dnow_xform_raw1.S \ + 3dnow_xform_raw2.S \ + 3dnow_xform_raw3.S \ + 3dnow_xform_raw4.S \ + + 3DNOW_OBJS = 3dnow.o \ + 3dnow_norm_raw.o \ + 3dnow_vertex.o \ + 3dnow_xform_masked1.o \ + 3dnow_xform_masked2.o \ + 3dnow_xform_masked3.o \ + 3dnow_xform_masked4.o \ + 3dnow_xform_raw1.o \ + 3dnow_xform_raw2.o \ + 3dnow_xform_raw3.o \ + 3dnow_xform_raw4.o + + 3DNOW_DEFS = -DUSE_3DNOW_ASM #endif #ifdef MesaUseKatmai - KATMAI_SRCS = katmai.c katmai_norm_raw.S katmai_xform_masked1.S \ - katmai_xform_masked2.S katmai_xform_masked3.S \ - katmai_xform_masked4.S katmai_xform_raw1.S \ - katmai_xform_raw2.S katmai_xform_raw3.S katmai_xform_raw4.S \ - vertex_katmai.S - - KATMAI_OBJS = katmai.o katmai_norm_raw.o katmai_xform_masked1.o \ - katmai_xform_masked2.o katmai_xform_masked3.o \ - katmai_xform_masked4.o katmai_xform_raw1.o \ - katmai_xform_raw2.o katmai_xform_raw3.o katmai_xform_raw4.o \ - vertex_katmai.o - - KATMAI_DEFS = -DUSE_KATMAI_ASM + KATMAI_SRCS = katmai.c \ + katmai_norm_raw.S \ + katmai_vertex.S \ + katmai_xform_masked1.S \ + katmai_xform_masked2.S \ + katmai_xform_masked3.S \ + katmai_xform_masked4.S \ + katmai_xform_raw1.S \ + katmai_xform_raw2.S \ + katmai_xform_raw3.S \ + katmai_xform_raw4.S \ + + KATMAI_OBJS = katmai.o \ + katmai_norm_raw.o \ + katmai_vertex.o \ + katmai_xform_masked1.o \ + katmai_xform_masked2.o \ + katmai_xform_masked3.o \ + katmai_xform_masked4.o \ + katmai_xform_raw1.o \ + katmai_xform_raw2.o \ + katmai_xform_raw3.o \ + katmai_xform_raw4.o + + KATMAI_DEFS = -DUSE_KATMAI_ASM #endif #endif @@ -122,8 +177,22 @@ STD_CPP_DEFINES = StandardDefines $(PROJECT_DEFINES) SubdirLibraryRule($(OBJS)) NormalLintTarget($(SRCS)) +ObjectFromAsmSource(common_x86_asm, NullParameter) + +ObjectFromAsmSource(x86_cliptest, NullParameter) +ObjectFromAsmSource(x86_vertex, NullParameter) +ObjectFromAsmSource(x86_xform_masked2, NullParameter) +ObjectFromAsmSource(x86_xform_masked3, NullParameter) +ObjectFromAsmSource(x86_xform_masked4, NullParameter) +ObjectFromAsmSource(x86_xform_raw2, NullParameter) +ObjectFromAsmSource(x86_xform_raw3, NullParameter) +ObjectFromAsmSource(x86_xform_raw4, NullParameter) + +ObjectFromAsmSource(mmx_blend, NullParameter) + #ifdef MesaUse3DNow ObjectFromAsmSource(3dnow_norm_raw, NullParameter) +ObjectFromAsmSource(3dnow_vertex, NullParameter) ObjectFromAsmSource(3dnow_xform_masked1, NullParameter) ObjectFromAsmSource(3dnow_xform_masked2, NullParameter) ObjectFromAsmSource(3dnow_xform_masked3, NullParameter) @@ -132,11 +201,11 @@ ObjectFromAsmSource(3dnow_xform_raw1, NullParameter) ObjectFromAsmSource(3dnow_xform_raw2, NullParameter) ObjectFromAsmSource(3dnow_xform_raw3, NullParameter) ObjectFromAsmSource(3dnow_xform_raw4, NullParameter) -ObjectFromAsmSource(vertex_3dnow, NullParameter) #endif #ifdef MesaUseKatmai ObjectFromAsmSource(katmai_norm_raw, NullParameter) +ObjectFromAsmSource(katmai_vertex, NullParameter) ObjectFromAsmSource(katmai_xform_masked1, NullParameter) ObjectFromAsmSource(katmai_xform_masked2, NullParameter) ObjectFromAsmSource(katmai_xform_masked3, NullParameter) @@ -145,14 +214,6 @@ ObjectFromAsmSource(katmai_xform_raw1, NullParameter) ObjectFromAsmSource(katmai_xform_raw2, NullParameter) ObjectFromAsmSource(katmai_xform_raw3, NullParameter) ObjectFromAsmSource(katmai_xform_raw4, NullParameter) -ObjectFromAsmSource(vertex_katmai, NullParameter) #endif -ObjectFromAsmSource(mmx_blend, NullParameter) - -ObjectFromAsmSource(common_x86asm, NullParameter) -ObjectFromAsmSource(vertex, NullParameter) -ObjectFromAsmSource(x86a, NullParameter) - DependTarget() - diff --git a/xc/lib/GL/mesa/src/drv/gamma/Imakefile b/xc/lib/GL/mesa/src/drv/gamma/Imakefile index ff31fdf2b..7282cf2fb 100644 --- a/xc/lib/GL/mesa/src/drv/gamma/Imakefile +++ b/xc/lib/GL/mesa/src/drv/gamma/Imakefile @@ -12,6 +12,16 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/drv/gamma/Imakefile,v 1.14 2000/10/20 12:57:2 ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL #endif +#ifdef i386Architecture +#ifdef MesaUse3DNow + 3DNOW_DEFS = -DUSE_3DNOW_ASM +#endif +#ifdef MesaUseKatmai + KATMAI_DEFS = -DUSE_KATMAI_ASM +#endif + ASM_DEFINES = -DUSE_X86_ASM -DUSE_MMX_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) +#endif + #if BuildXF86DRI DRI_DEFINES = GlxDefines -DDRIVERTS DRI_INCLUDES = -I../../../../dri -I../../../../glx \ @@ -26,17 +36,9 @@ MESA_INCLUDES = -I. -I.. -I../../include \ -I../../../../dri/drm - DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) + DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) $(ASM_DEFINES) INCLUDES = -I$(XLIBSRC) -I$(EXTINCSRC) $(MESA_INCLUDES) $(DRI_INCLUDES) -#if 0 - LOSRC = ../../../../lowpc.c - LOOBJ = ../../../../lowpc.o - - HISRC = ../../../../highpc.c - HIOBJ = ../../../../highpc.o -#endif - DRISRCS = ../../../dri/dri_mesa.c \ ../../../../dri/dri_tmm.c @@ -53,11 +55,21 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../../../dri/drm/xf86drmRandom.o \ ../../../../dri/drm/xf86drmSL.o - GAMMASRCS = gamma_gl.c gamma_xmesa.c gamma_init.c gamma_matrix.c \ - gamma_inithw.c gamma_texture.c /* gamma_dlist.c */ - - GAMMAOBJS = gamma_gl.o gamma_xmesa.o gamma_init.o gamma_matrix.o \ - gamma_inithw.o gamma_texture.o /* gamma_dlist.o */ + GAMMASRCS = gamma_gl.c \ + gamma_xmesa.c \ + gamma_init.c \ + gamma_matrix.c \ + gamma_inithw.c \ + gamma_texture.c \ + /* gamma_dlist.c */ + + GAMMAOBJS = gamma_gl.o \ + gamma_xmesa.o \ + gamma_init.o \ + gamma_matrix.o \ + gamma_inithw.o \ + gamma_texture.o \ + /* gamma_dlist.o */ MESASRCS = ../../aatriangle.c \ ../../accum.c \ @@ -208,26 +220,38 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../zoom.o #ifdef i386Architecture - X86_SRCS = ../../X86/x86.c \ - ../../X86/x86a.S \ - ../../X86/common_x86.c \ - ../../X86/common_x86asm.S \ - ../../X86/vertex.S - - X86_OBJS = ../../X86/x86.o \ - ../../X86/x86a.o \ - ../../X86/common_x86.o \ - ../../X86/common_x86asm.o \ - ../../X86/vertex.o + X86_SRCS = ../../X86/common_x86.c \ + ../../X86/common_x86_asm.S \ + ../../X86/x86.c \ + ../../X86/x86_cliptest.S \ + ../../X86/x86_vertex.S \ + ../../X86/x86_xform_masked2.S \ + ../../X86/x86_xform_masked3.S \ + ../../X86/x86_xform_masked4.S \ + ../../X86/x86_xform_raw2.S \ + ../../X86/x86_xform_raw3.S \ + ../../X86/x86_xform_raw4.S + + X86_OBJS = ../../X86/common_x86.o \ + ../../X86/common_x86_asm.o \ + ../../X86/x86.o \ + ../../X86/x86_cliptest.o \ + ../../X86/x86_vertex.o \ + ../../X86/x86_xform_masked2.o \ + ../../X86/x86_xform_masked3.o \ + ../../X86/x86_xform_masked4.o \ + ../../X86/x86_xform_raw2.o \ + ../../X86/x86_xform_raw3.o \ + ../../X86/x86_xform_raw4.o MMX_SRCS = ../../X86/mmx_blend.S MMX_OBJS = ../../X86/mmx_blend.o -XCOMM Disabling 3Dnow code for the time being. -#if 0 +#ifdef MesaUse3DNow 3DNOW_SRCS = ../../X86/3dnow.c \ ../../X86/3dnow_norm_raw.S \ + ../../X86/3dnow_vertex.S \ ../../X86/3dnow_xform_masked1.S \ ../../X86/3dnow_xform_masked2.S \ ../../X86/3dnow_xform_masked3.S \ @@ -235,11 +259,11 @@ XCOMM Disabling 3Dnow code for the time being. ../../X86/3dnow_xform_raw1.S \ ../../X86/3dnow_xform_raw2.S \ ../../X86/3dnow_xform_raw3.S \ - ../../X86/3dnow_xform_raw4.S \ - ../../X86/vertex_3dnow.S + ../../X86/3dnow_xform_raw4.S 3DNOW_OBJS = ../../X86/3dnow.o \ ../../X86/3dnow_norm_raw.o \ + ../../X86/3dnow_vertex.o \ ../../X86/3dnow_xform_masked1.o \ ../../X86/3dnow_xform_masked2.o \ ../../X86/3dnow_xform_masked3.o \ @@ -247,17 +271,51 @@ XCOMM Disabling 3Dnow code for the time being. ../../X86/3dnow_xform_raw1.o \ ../../X86/3dnow_xform_raw2.o \ ../../X86/3dnow_xform_raw3.o \ - ../../X86/3dnow_xform_raw4.o \ - ../../X86/vertex_3dnow.o + ../../X86/3dnow_xform_raw4.o #endif +#ifdef MesaUseKatmai + KATMAI_SRCS = ../../X86/katmai.c \ + ../../X86/katmai_norm_raw.S \ + ../../X86/katmai_vertex.S \ + ../../X86/katmai_xform_masked1.S \ + ../../X86/katmai_xform_masked2.S \ + ../../X86/katmai_xform_masked3.S \ + ../../X86/katmai_xform_masked4.S \ + ../../X86/katmai_xform_raw1.S \ + ../../X86/katmai_xform_raw2.S \ + ../../X86/katmai_xform_raw3.S \ + ../../X86/katmai_xform_raw4.S + + KATMAI_OBJS = ../../X86/katmai.o \ + ../../X86/katmai_norm_raw.o \ + ../../X86/katmai_vertex.o \ + ../../X86/katmai_xform_masked1.o \ + ../../X86/katmai_xform_masked2.o \ + ../../X86/katmai_xform_masked3.o \ + ../../X86/katmai_xform_masked4.o \ + ../../X86/katmai_xform_raw1.o \ + ../../X86/katmai_xform_raw2.o \ + ../../X86/katmai_xform_raw3.o \ + ../../X86/katmai_xform_raw4.o +#endif #endif - ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) - ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) +#ifdef GlxSoProf + LOSRCS = ../../../../lowpc.c + HISRCS = ../../../../highpc.c + + LOOBJS = ../../../../lowpc.o + HIOBJS = ../../../../highpc.o +#endif - SRCS = $(LOWSRC) $(DRISRCS) $(DRMSRCS) $(MESASRCS) $(ASMSRCS) $(GAMMASRCS) $(HISRC) - OBJS = $(LOWOBJ) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) $(ASMOBJS) $(GAMMAOBJS) $(HIOBJ) + ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) $(KATMAI_SRCS) + ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) $(KATMAI_OBJS) + + SRCS = $(LOWSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) \ + $(ASMSRCS) $(GAMMASRCS) $(HISRCS) + OBJS = $(LOWOBJS) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) \ + $(ASMOBJS) $(GAMMAOBJS) $(HIOBJS) REQUIREDLIBS += MathLibrary #if !GlxBuiltInGamma @@ -285,6 +343,12 @@ ALL_OBJS = $(OBJS) ALL_DEPS = DONE SharedDepModuleTarget($(LIBNAME),$(ALL_DEPS),$(ALL_OBJS)) InstallDynamicModule($(LIBNAME),$(MODULEDIR),dri) + +#ifdef GlxSoProf +SOPROF_LIBNAME = _gamma_dri_p +NormalDepLibraryTarget($(SOPROF_LIBNAME),$(ALL_DEPS),$(ALL_OBJS)) +InstallLibrary($(SOPROF_LIBNAME),$(MODULEDIR)/dri) +#endif #endif DependTarget() diff --git a/xc/lib/GL/mesa/src/drv/i810/Imakefile b/xc/lib/GL/mesa/src/drv/i810/Imakefile index b66d715c1..bfe0bc81d 100644 --- a/xc/lib/GL/mesa/src/drv/i810/Imakefile +++ b/xc/lib/GL/mesa/src/drv/i810/Imakefile @@ -12,6 +12,16 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/drv/i810/Imakefile,v 1.11 2000/10/20 12:57:22 ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL #endif +#ifdef i386Architecture +#ifdef MesaUse3DNow + 3DNOW_DEFS = -DUSE_3DNOW_ASM +#endif +#ifdef MesaUseKatmai + KATMAI_DEFS = -DUSE_KATMAI_ASM +#endif + ASM_DEFINES = -DUSE_X86_ASM -DUSE_MMX_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) +#endif + #if BuildXF86DRI DRI_DEFINES = GlxDefines -DDRIVERTS DRI_INCLUDES = -I../../../../dri -I../../../../glx \ @@ -25,7 +35,7 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL MESA_INCLUDES = -I. -I.. -I../../include \ -I../../../../dri/drm - DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) + DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) $(ASM_DEFINES) INCLUDES = -I$(XLIBSRC) -I$(EXTINCSRC) $(MESA_INCLUDES) $(DRI_INCLUDES) DRISRCS = ../../../dri/dri_mesa.c \ @@ -44,23 +54,27 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../../../dri/drm/xf86drmRandom.o \ ../../../../dri/drm/xf86drmSL.o -#ifdef GlxSoProf - HIOBJS = ../../../../highpc.o - LOOBJS = ../../../../lowpc.o -#else - HIOBJS = - LOOBJS = -#endif - - I810SRCS = i810_xmesa.c \ - i810dd.c \ - i810pipeline.c i810span.c i810state.c \ - i810tex.c i810tris.c i810vb.c i810fastpath.c i810ioctl.c + I810SRCS = i810_xmesa.c \ + i810dd.c \ + i810pipeline.c \ + i810span.c \ + i810state.c \ + i810tex.c \ + i810tris.c \ + i810vb.c \ + i810fastpath.c \ + i810ioctl.c I810OBJS = i810_xmesa.o \ - i810dd.o \ - i810pipeline.o i810span.o i810state.o \ - i810tex.o i810tris.o i810vb.o i810fastpath.o i810ioctl.o + i810dd.o \ + i810pipeline.o \ + i810span.o \ + i810state.o \ + i810tex.o \ + i810tris.o \ + i810vb.o \ + i810fastpath.o \ + i810ioctl.o MESASRCS = ../../aatriangle.c \ ../../accum.c \ @@ -211,26 +225,38 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../zoom.o #ifdef i386Architecture - X86_SRCS = ../../X86/x86.c \ - ../../X86/x86a.S \ - ../../X86/common_x86.c \ - ../../X86/common_x86asm.S \ - ../../X86/vertex.S - - X86_OBJS = ../../X86/x86.o \ - ../../X86/x86a.o \ - ../../X86/common_x86.o \ - ../../X86/common_x86asm.o \ - ../../X86/vertex.o + X86_SRCS = ../../X86/common_x86.c \ + ../../X86/common_x86_asm.S \ + ../../X86/x86.c \ + ../../X86/x86_cliptest.S \ + ../../X86/x86_vertex.S \ + ../../X86/x86_xform_masked2.S \ + ../../X86/x86_xform_masked3.S \ + ../../X86/x86_xform_masked4.S \ + ../../X86/x86_xform_raw2.S \ + ../../X86/x86_xform_raw3.S \ + ../../X86/x86_xform_raw4.S + + X86_OBJS = ../../X86/common_x86.o \ + ../../X86/common_x86_asm.o \ + ../../X86/x86.o \ + ../../X86/x86_cliptest.o \ + ../../X86/x86_vertex.o \ + ../../X86/x86_xform_masked2.o \ + ../../X86/x86_xform_masked3.o \ + ../../X86/x86_xform_masked4.o \ + ../../X86/x86_xform_raw2.o \ + ../../X86/x86_xform_raw3.o \ + ../../X86/x86_xform_raw4.o MMX_SRCS = ../../X86/mmx_blend.S MMX_OBJS = ../../X86/mmx_blend.o -XCOMM Disabling 3Dnow code for the time being. #ifdef MesaUse3DNow 3DNOW_SRCS = ../../X86/3dnow.c \ ../../X86/3dnow_norm_raw.S \ + ../../X86/3dnow_vertex.S \ ../../X86/3dnow_xform_masked1.S \ ../../X86/3dnow_xform_masked2.S \ ../../X86/3dnow_xform_masked3.S \ @@ -238,11 +264,11 @@ XCOMM Disabling 3Dnow code for the time being. ../../X86/3dnow_xform_raw1.S \ ../../X86/3dnow_xform_raw2.S \ ../../X86/3dnow_xform_raw3.S \ - ../../X86/3dnow_xform_raw4.S \ - ../../X86/vertex_3dnow.S + ../../X86/3dnow_xform_raw4.S 3DNOW_OBJS = ../../X86/3dnow.o \ ../../X86/3dnow_norm_raw.o \ + ../../X86/3dnow_vertex.o \ ../../X86/3dnow_xform_masked1.o \ ../../X86/3dnow_xform_masked2.o \ ../../X86/3dnow_xform_masked3.o \ @@ -250,22 +276,54 @@ XCOMM Disabling 3Dnow code for the time being. ../../X86/3dnow_xform_raw1.o \ ../../X86/3dnow_xform_raw2.o \ ../../X86/3dnow_xform_raw3.o \ - ../../X86/3dnow_xform_raw4.o \ - ../../X86/vertex_3dnow.o + ../../X86/3dnow_xform_raw4.o +#endif + +#ifdef MesaUseKatmai + KATMAI_SRCS = ../../X86/katmai.c \ + ../../X86/katmai_norm_raw.S \ + ../../X86/katmai_vertex.S \ + ../../X86/katmai_xform_masked1.S \ + ../../X86/katmai_xform_masked2.S \ + ../../X86/katmai_xform_masked3.S \ + ../../X86/katmai_xform_masked4.S \ + ../../X86/katmai_xform_raw1.S \ + ../../X86/katmai_xform_raw2.S \ + ../../X86/katmai_xform_raw3.S \ + ../../X86/katmai_xform_raw4.S + + KATMAI_OBJS = ../../X86/katmai.o \ + ../../X86/katmai_norm_raw.o \ + ../../X86/katmai_vertex.o \ + ../../X86/katmai_xform_masked1.o \ + ../../X86/katmai_xform_masked2.o \ + ../../X86/katmai_xform_masked3.o \ + ../../X86/katmai_xform_masked4.o \ + ../../X86/katmai_xform_raw1.o \ + ../../X86/katmai_xform_raw2.o \ + ../../X86/katmai_xform_raw3.o \ + ../../X86/katmai_xform_raw4.o #endif +#endif + +#ifdef GlxSoProf + LOSRCS = ../../../../lowpc.c + HISRCS = ../../../../highpc.c + LOOBJS = ../../../../lowpc.o + HIOBJS = ../../../../highpc.o #endif - ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) - ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) + ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) $(KATMAI_SRCS) + ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) $(KATMAI_OBJS) - COMMONSRCS = ../common/mm.c ../common/hwlog.c - COMMONOBJS = ../common/mm.o ../common/hwlog.o + COMMONSRCS = ../common/mm.c ../common/hwlog.c + COMMONOBJS = ../common/mm.o ../common/hwlog.o - SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) $(ASMSRCS) \ - $(COMMONSRCS) $(I810SRCS) $(HISRCS) - OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) $(ASMOBJS) \ - $(COMMONOBJS) $(I810OBJS) $(HIOBJS) + SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) \ + $(ASMSRCS) $(COMMONSRCS) $(I810SRCS) $(HISRCS) + OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) \ + $(ASMOBJS) $(COMMONOBJS) $(I810OBJS) $(HIOBJS) REQUIREDLIBS += MathLibrary #if !GlxBuiltInI810 @@ -301,6 +359,4 @@ InstallLibrary($(SOPROF_LIBNAME),$(MODULEDIR)/dri) #endif #endif - - DependTarget() diff --git a/xc/lib/GL/mesa/src/drv/i810/i810tris.h b/xc/lib/GL/mesa/src/drv/i810/i810tris.h index 20e7cbd80..547a3f43a 100644 --- a/xc/lib/GL/mesa/src/drv/i810/i810tris.h +++ b/xc/lib/GL/mesa/src/drv/i810/i810tris.h @@ -133,6 +133,7 @@ static __inline__ void i810_draw_line( i810ContextPtr imesa, { GLuint vertsize = imesa->vertsize; GLuint *vb = i810AllocDwordsInline( imesa, 2 * vertsize ); + int j; #if defined(USE_X86_ASM) __asm__ __volatile__( "rep ; movsl" @@ -144,8 +145,6 @@ static __inline__ void i810_draw_line( i810ContextPtr imesa, : "0" (vertsize), "S" ((long)v1) : "memory" ); #else - int j; - for (j = 0 ; j < vertsize ; j++) vb[j] = v0->ui[j]; diff --git a/xc/lib/GL/mesa/src/drv/mga/Imakefile b/xc/lib/GL/mesa/src/drv/mga/Imakefile index ba384a1ee..c2fe27a47 100644 --- a/xc/lib/GL/mesa/src/drv/mga/Imakefile +++ b/xc/lib/GL/mesa/src/drv/mga/Imakefile @@ -14,12 +14,13 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL #ifdef i386Architecture #ifdef MesaUse3DNow - ASM_DEFS = -DUSE_MMX_ASM -DUSE_X86_ASM -DUSE_3DNOW_ASM -#else - ASM_DEFS = -DUSE_MMX_ASM -DUSE_X86_ASM + 3DNOW_DEFS = -DUSE_3DNOW_ASM #endif +#ifdef MesaUseKatmai + KATMAI_DEFS = -DUSE_KATMAI_ASM +#endif + ASM_DEFINES = -DUSE_X86_ASM -DUSE_MMX_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) #endif - #if BuildXF86DRI DRI_DEFINES = GlxDefines -DDRIVERTS @@ -34,9 +35,8 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL MESA_INCLUDES = -I. -I.. -I../../include \ -I../../../../dri/drm - DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) $(ASM_DEFS) - INCLUDES = -I$(XLIBSRC) -I$(EXTINCSRC) $(MESA_INCLUDES) $(DRI_INCLUDES) - + DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) $(ASM_DEFINES) + INCLUDES = -I$(XLIBSRC) -I$(EXTINCSRC) $(MESA_INCLUDES) $(DRI_INCLUDES) DRISRCS = ../../../dri/dri_mesa.c \ ../../../../dri/dri_tmm.c @@ -54,16 +54,35 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../../../dri/drm/xf86drmRandom.o \ ../../../../dri/drm/xf86drmSL.o - - MGASRCS = mgatexcnv.c mgadd.c mgafastpath.c \ - mgaeltpath.c mgapipeline.c mgaspan.c mgastate.c \ - mgatex.c mgatexmem.c mgatris.c mgavb.c mgaioctl.c \ - mga_xmesa.c mgabuffers.c - - MGAOBJS = mgatexcnv.o mgadd.o mgafastpath.o \ - mgaeltpath.o mgapipeline.o mgaspan.o mgastate.o \ - mgatex.o mgatexmem.o mgatris.o mgavb.o mgaioctl.o \ - mga_xmesa.o mgabuffers.o + MGASRCS = mgabuffers.c \ + mgadd.c \ + mgaeltpath.c \ + mgafastpath.c \ + mgaioctl.c \ + mgapipeline.c \ + mgaspan.c \ + mgastate.c \ + mgatex.c \ + mgatexcnv.c \ + mgatexmem.c \ + mgatris.c \ + mgavb.c \ + mga_xmesa.c \ + + MGAOBJS = mgabuffers.o \ + mgadd.o \ + mgaeltpath.o \ + mgafastpath.o \ + mgaioctl.o \ + mgapipeline.o \ + mgaspan.o \ + mgastate.o \ + mgatex.o \ + mgatexcnv.o \ + mgatexmem.o \ + mgatris.o \ + mgavb.o \ + mga_xmesa.o MESASRCS = ../../aatriangle.c \ ../../accum.c \ @@ -214,17 +233,29 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../zoom.o #ifdef i386Architecture - X86_SRCS = ../../X86/x86.c \ - ../../X86/x86a.S \ - ../../X86/common_x86.c \ - ../../X86/common_x86asm.S \ - ../../X86/vertex.S - - X86_OBJS = ../../X86/x86.o \ - ../../X86/x86a.o \ - ../../X86/common_x86.o \ - ../../X86/common_x86asm.o \ - ../../X86/vertex.o + X86_SRCS = ../../X86/common_x86.c \ + ../../X86/common_x86_asm.S \ + ../../X86/x86.c \ + ../../X86/x86_cliptest.S \ + ../../X86/x86_vertex.S \ + ../../X86/x86_xform_masked2.S \ + ../../X86/x86_xform_masked3.S \ + ../../X86/x86_xform_masked4.S \ + ../../X86/x86_xform_raw2.S \ + ../../X86/x86_xform_raw3.S \ + ../../X86/x86_xform_raw4.S + + X86_OBJS = ../../X86/common_x86.o \ + ../../X86/common_x86_asm.o \ + ../../X86/x86.o \ + ../../X86/x86_cliptest.o \ + ../../X86/x86_vertex.o \ + ../../X86/x86_xform_masked2.o \ + ../../X86/x86_xform_masked3.o \ + ../../X86/x86_xform_masked4.o \ + ../../X86/x86_xform_raw2.o \ + ../../X86/x86_xform_raw3.o \ + ../../X86/x86_xform_raw4.o MMX_SRCS = ../../X86/mmx_blend.S @@ -233,6 +264,7 @@ MESA_INCLUDES = -I. -I.. -I../../include \ #ifdef MesaUse3DNow 3DNOW_SRCS = ../../X86/3dnow.c \ ../../X86/3dnow_norm_raw.S \ + ../../X86/3dnow_vertex.S \ ../../X86/3dnow_xform_masked1.S \ ../../X86/3dnow_xform_masked2.S \ ../../X86/3dnow_xform_masked3.S \ @@ -240,11 +272,11 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/3dnow_xform_raw1.S \ ../../X86/3dnow_xform_raw2.S \ ../../X86/3dnow_xform_raw3.S \ - ../../X86/3dnow_xform_raw4.S \ - ../../X86/vertex_3dnow.S + ../../X86/3dnow_xform_raw4.S 3DNOW_OBJS = ../../X86/3dnow.o \ ../../X86/3dnow_norm_raw.o \ + ../../X86/3dnow_vertex.o \ ../../X86/3dnow_xform_masked1.o \ ../../X86/3dnow_xform_masked2.o \ ../../X86/3dnow_xform_masked3.o \ @@ -252,29 +284,54 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/3dnow_xform_raw1.o \ ../../X86/3dnow_xform_raw2.o \ ../../X86/3dnow_xform_raw3.o \ - ../../X86/3dnow_xform_raw4.o \ - ../../X86/vertex_3dnow.o + ../../X86/3dnow_xform_raw4.o +#endif +#ifdef MesaUseKatmai + KATMAI_SRCS = ../../X86/katmai.c \ + ../../X86/katmai_norm_raw.S \ + ../../X86/katmai_vertex.S \ + ../../X86/katmai_xform_masked1.S \ + ../../X86/katmai_xform_masked2.S \ + ../../X86/katmai_xform_masked3.S \ + ../../X86/katmai_xform_masked4.S \ + ../../X86/katmai_xform_raw1.S \ + ../../X86/katmai_xform_raw2.S \ + ../../X86/katmai_xform_raw3.S \ + ../../X86/katmai_xform_raw4.S + + KATMAI_OBJS = ../../X86/katmai.o \ + ../../X86/katmai_norm_raw.o \ + ../../X86/katmai_vertex.o \ + ../../X86/katmai_xform_masked1.o \ + ../../X86/katmai_xform_masked2.o \ + ../../X86/katmai_xform_masked3.o \ + ../../X86/katmai_xform_masked4.o \ + ../../X86/katmai_xform_raw1.o \ + ../../X86/katmai_xform_raw2.o \ + ../../X86/katmai_xform_raw3.o \ + ../../X86/katmai_xform_raw4.o #endif #endif #ifdef GlxSoProf - HIOBJS = ../../../../highpc.o - LOOBJS = ../../../../lowpc.o - HISRCS = ../../../../highpc.c LOSRCS = ../../../../lowpc.c + HISRCS = ../../../../highpc.c + + LOOBJS = ../../../../lowpc.o + HIOBJS = ../../../../highpc.o #endif - ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) - ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) + ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) $(KATMAI_SRCS) + ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) $(KATMAI_OBJS) - COMMONSRCS = ../common/mm.c ../common/hwlog.c - COMMONOBJS = ../common/mm.o ../common/hwlog.o + COMMONSRCS = ../common/mm.c ../common/hwlog.c + COMMONOBJS = ../common/mm.o ../common/hwlog.o - SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) $(ASMSRCS) \ - $(COMMONSRCS) $(MGASRCS) $(HISRCS) - OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) $(ASMOBJS) \ - $(COMMONOBJS) $(MGAOBJS) $(HIOBJS) + SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) \ + $(ASMSRCS) $(COMMONSRCS) $(MGASRCS) $(HISRCS) + OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) \ + $(ASMOBJS) $(COMMONOBJS) $(MGAOBJS) $(HIOBJS) REQUIREDLIBS += MathLibrary #if !GlxBuiltInMga diff --git a/xc/lib/GL/mesa/src/drv/r128/Imakefile b/xc/lib/GL/mesa/src/drv/r128/Imakefile index 3b04b1580..056297ee3 100644 --- a/xc/lib/GL/mesa/src/drv/r128/Imakefile +++ b/xc/lib/GL/mesa/src/drv/r128/Imakefile @@ -14,10 +14,12 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL #ifdef i386Architecture #ifdef MesaUse3DNow - ASM_DEFINES = -DUSE_MMX_ASM -DUSE_X86_ASM -DUSE_3DNOW_ASM -#else - ASM_DEFINES = -DUSE_MMX_ASM -DUSE_X86_ASM + 3DNOW_DEFS = -DUSE_3DNOW_ASM #endif +#ifdef MesaUseKatmai + KATMAI_DEFS = -DUSE_KATMAI_ASM +#endif + ASM_DEFINES = -DUSE_X86_ASM -DUSE_MMX_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) #endif #if BuildXF86DRI @@ -37,11 +39,6 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL -I../common #endif -LinkSourceFile(mm.c, ../common) -LinkSourceFile(mm.h, ../common) -LinkSourceFile(hwlog.c, ../common) -LinkSourceFile(hwlog.h, ../common) - MESA_INCLUDES = -I. -I.. -I../../include \ -I../../../../dri/drm @@ -246,17 +243,29 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../zoom.o #ifdef i386Architecture - X86_SRCS = ../../X86/x86.c \ - ../../X86/x86a.S \ - ../../X86/common_x86.c \ - ../../X86/common_x86asm.S \ - ../../X86/vertex.S - - X86_OBJS = ../../X86/x86.o \ - ../../X86/x86a.o \ - ../../X86/common_x86.o \ - ../../X86/common_x86asm.o \ - ../../X86/vertex.o + X86_SRCS = ../../X86/common_x86.c \ + ../../X86/common_x86_asm.S \ + ../../X86/x86.c \ + ../../X86/x86_cliptest.S \ + ../../X86/x86_vertex.S \ + ../../X86/x86_xform_masked2.S \ + ../../X86/x86_xform_masked3.S \ + ../../X86/x86_xform_masked4.S \ + ../../X86/x86_xform_raw2.S \ + ../../X86/x86_xform_raw3.S \ + ../../X86/x86_xform_raw4.S + + X86_OBJS = ../../X86/common_x86.o \ + ../../X86/common_x86_asm.o \ + ../../X86/x86.o \ + ../../X86/x86_cliptest.o \ + ../../X86/x86_vertex.o \ + ../../X86/x86_xform_masked2.o \ + ../../X86/x86_xform_masked3.o \ + ../../X86/x86_xform_masked4.o \ + ../../X86/x86_xform_raw2.o \ + ../../X86/x86_xform_raw3.o \ + ../../X86/x86_xform_raw4.o MMX_SRCS = ../../X86/mmx_blend.S @@ -265,6 +274,7 @@ MESA_INCLUDES = -I. -I.. -I../../include \ #ifdef MesaUse3DNow 3DNOW_SRCS = ../../X86/3dnow.c \ ../../X86/3dnow_norm_raw.S \ + ../../X86/3dnow_vertex.S \ ../../X86/3dnow_xform_masked1.S \ ../../X86/3dnow_xform_masked2.S \ ../../X86/3dnow_xform_masked3.S \ @@ -272,11 +282,11 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/3dnow_xform_raw1.S \ ../../X86/3dnow_xform_raw2.S \ ../../X86/3dnow_xform_raw3.S \ - ../../X86/3dnow_xform_raw4.S \ - ../../X86/vertex_3dnow.S + ../../X86/3dnow_xform_raw4.S 3DNOW_OBJS = ../../X86/3dnow.o \ ../../X86/3dnow_norm_raw.o \ + ../../X86/3dnow_vertex.o \ ../../X86/3dnow_xform_masked1.o \ ../../X86/3dnow_xform_masked2.o \ ../../X86/3dnow_xform_masked3.o \ @@ -284,29 +294,62 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/3dnow_xform_raw1.o \ ../../X86/3dnow_xform_raw2.o \ ../../X86/3dnow_xform_raw3.o \ - ../../X86/3dnow_xform_raw4.o \ - ../../X86/vertex_3dnow.o + ../../X86/3dnow_xform_raw4.o #endif +#ifdef MesaUseKatmai + KATMAI_SRCS = ../../X86/katmai.c \ + ../../X86/katmai_norm_raw.S \ + ../../X86/katmai_vertex.S \ + ../../X86/katmai_xform_masked1.S \ + ../../X86/katmai_xform_masked2.S \ + ../../X86/katmai_xform_masked3.S \ + ../../X86/katmai_xform_masked4.S \ + ../../X86/katmai_xform_raw1.S \ + ../../X86/katmai_xform_raw2.S \ + ../../X86/katmai_xform_raw3.S \ + ../../X86/katmai_xform_raw4.S + + KATMAI_OBJS = ../../X86/katmai.o \ + ../../X86/katmai_norm_raw.o \ + ../../X86/katmai_vertex.o \ + ../../X86/katmai_xform_masked1.o \ + ../../X86/katmai_xform_masked2.o \ + ../../X86/katmai_xform_masked3.o \ + ../../X86/katmai_xform_masked4.o \ + ../../X86/katmai_xform_raw1.o \ + ../../X86/katmai_xform_raw2.o \ + ../../X86/katmai_xform_raw3.o \ + ../../X86/katmai_xform_raw4.o +#endif #endif #endif - ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) - ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) +#ifdef GlxSoProf + LOSRCS = ../../../../lowpc.c + HISRCS = ../../../../highpc.c + + LOOBJS = ../../../../lowpc.o + HIOBJS = ../../../../highpc.o +#endif - COMMONSRCS = mm.c hwlog.c - COMMONOBJS = mm.o hwlog.o + ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) $(KATMAI_SRCS) + ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) $(KATMAI_OBJS) - SRCS = $(DRISRCS) $(DRMSRCS) $(MESASRCS) $(ASMSRCS) \ - $(COMMONSRCS) $(R128SRCS) - OBJS = $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) $(ASMOBJS) \ - $(COMMONOBJS) $(R128OBJS) + COMMONSRCS = ../common/mm.c ../common/hwlog.c + COMMONOBJS = ../common/mm.o ../common/hwlog.o + + SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) \ + $(ASMSRCS) $(COMMONSRCS) $(R128SRCS) $(HISRCS) + OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) \ + $(ASMOBJS) $(COMMONOBJS) $(R128OBJS) $(HIOBJS) REQUIREDLIBS += MathLibrary -#if !GlxBuiltInMga +#if !GlxBuiltInR128 REQUIREDLIBS += -L../../../.. -lGL #endif + #if !GlxUseBuiltInDRIDriver #undef DoNormalLib NormalLibGlx #undef DoExtraLib SharedLibGlx @@ -327,6 +370,12 @@ ALL_OBJS = $(OBJS) ALL_DEPS = DONE SharedDepModuleTarget($(LIBNAME),$(ALL_DEPS),$(ALL_OBJS)) InstallDynamicModule($(LIBNAME),$(MODULEDIR),dri) + +#ifdef GlxSoProf +SOPROF_LIBNAME = _r128_dri_p +NormalDepLibraryTarget($(SOPROF_LIBNAME),$(ALL_DEPS),$(ALL_OBJS)) +InstallLibrary($(SOPROF_LIBNAME),$(MODULEDIR)/dri) +#endif #endif DependTarget() diff --git a/xc/lib/GL/mesa/src/drv/sis/Imakefile b/xc/lib/GL/mesa/src/drv/sis/Imakefile index 59f2429ac..9993d6568 100644 --- a/xc/lib/GL/mesa/src/drv/sis/Imakefile +++ b/xc/lib/GL/mesa/src/drv/sis/Imakefile @@ -13,26 +13,27 @@ SIS_DEFINES = -DSIS_USE_HW_CULL XCOMM DEBUG_DEFINES = -DDEBUG_LOCKING XCOMM DEBUG_DEFINES = -DSIS_DEBUG - + #if Malloc0ReturnsNull ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL #endif #ifdef i386Architecture #ifdef MesaUse3DNow - ASM_DEFS = -DUSE_MMX_ASM -DUSE_X86_ASM -DUSE_3DNOW_ASM -#else - ASM_DEFS = -DUSE_MMX_ASM -DUSE_X86_ASM + 3DNOW_DEFS = -DUSE_3DNOW_ASM #endif +#ifdef MesaUseKatmai + KATMAI_DEFS = -DUSE_KATMAI_ASM +#endif + ASM_DEFINES = -DUSE_X86_ASM -DUSE_MMX_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) #endif - #if BuildXF86DRI DRI_DEFINES = GlxDefines $(SIS_DEFINES) - DRI_INCLUDES = -I../../../../dri -I../../../../glx \ + DRI_INCLUDES = -I../../../../dri -I../../../../glx \ -I../../../dri \ - -I$(TOP)/include -I$(TOP)/include/GL \ - -I$(XF86OSSRC) -I$(SERVERSRC)/GL/dri \ + -I$(TOP)/include -I$(TOP)/include/GL \ + -I$(XF86OSSRC) -I$(SERVERSRC)/GL/dri \ -I$(XF86DRIVERSRC)/sis \ -I../../../include -I../.. -I../../X \ -I../common/ @@ -57,23 +58,41 @@ LinkSourceFile(xdriP.h, ../../X) STEREO_DEFINES = -DSIS_STEREO=0 #endif - DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) $(DEBUG_DEFINES) \ - $(STEREO_DEFINES) + DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) $(ASM_DEFINES) \ + $(DEBUG_DEFINES) $(STEREO_DEFINES) INCLUDES = -I$(XLIBSRC) -I$(EXTINCSRC) $(MESA_INCLUDES) $(DRI_INCLUDES) \ -I./misc \ -I$(SERVERSRC)/hw/xfree86/drivers/sis - SISSRCS = sis_render.c sis_mesa.c sis_ctx.c sis_alloc.c \ - sis_fog.c sis_clear.c sis_stencil.c \ - sis_texture.c sis_xwin.c sis_xmesa.c sis_span.c sis_fastpath.c \ - $(DEBUG_SRC) \ - $(STEREO_SRC) - - SISOBJS = sis_render.o sis_mesa.o sis_ctx.o sis_alloc.o \ - sis_fog.o sis_clear.o sis_stencil.o \ - sis_texture.o sis_xwin.o sis_xmesa.o sis_span.o sis_fastpath.o \ - $(DEBUG_OBJ) \ - $(STEREO_OBJ) + SISSRCS = sis_render.c \ + sis_mesa.c \ + sis_ctx.c \ + sis_alloc.c \ + sis_fog.c \ + sis_clear.c \ + sis_stencil.c \ + sis_texture.c \ + sis_xwin.c \ + sis_xmesa.c \ + sis_span.c \ + sis_fastpath.c \ + $(DEBUG_SRC) \ + $(STEREO_SRC) + + SISOBJS = sis_render.o \ + sis_mesa.o \ + sis_ctx.o \ + sis_alloc.o \ + sis_fog.o \ + sis_clear.o \ + sis_stencil.o \ + sis_texture.o \ + sis_xwin.o \ + sis_xmesa.o \ + sis_span.o \ + sis_fastpath.o \ + $(DEBUG_OBJ) \ + $(STEREO_OBJ) DRISRCS = ../../../dri/dri_mesa.c \ ../../../../dri/dri_tmm.c @@ -240,17 +259,29 @@ LinkSourceFile(xdriP.h, ../../X) ../../zoom.o #ifdef i386Architecture - X86_SRCS = ../../X86/x86.c \ - ../../X86/x86a.S \ - ../../X86/common_x86.c \ - ../../X86/common_x86asm.S \ - ../../X86/vertex.S - - X86_OBJS = ../../X86/x86.o \ - ../../X86/x86a.o \ - ../../X86/common_x86.o \ - ../../X86/common_x86asm.o \ - ../../X86/vertex.o + X86_SRCS = ../../X86/common_x86.c \ + ../../X86/common_x86_asm.S \ + ../../X86/x86.c \ + ../../X86/x86_cliptest.S \ + ../../X86/x86_vertex.S \ + ../../X86/x86_xform_masked2.S \ + ../../X86/x86_xform_masked3.S \ + ../../X86/x86_xform_masked4.S \ + ../../X86/x86_xform_raw2.S \ + ../../X86/x86_xform_raw3.S \ + ../../X86/x86_xform_raw4.S + + X86_OBJS = ../../X86/common_x86.o \ + ../../X86/common_x86_asm.o \ + ../../X86/x86.o \ + ../../X86/x86_cliptest.o \ + ../../X86/x86_vertex.o \ + ../../X86/x86_xform_masked2.o \ + ../../X86/x86_xform_masked3.o \ + ../../X86/x86_xform_masked4.o \ + ../../X86/x86_xform_raw2.o \ + ../../X86/x86_xform_raw3.o \ + ../../X86/x86_xform_raw4.o MMX_SRCS = ../../X86/mmx_blend.S @@ -259,6 +290,7 @@ LinkSourceFile(xdriP.h, ../../X) #ifdef MesaUse3DNow 3DNOW_SRCS = ../../X86/3dnow.c \ ../../X86/3dnow_norm_raw.S \ + ../../X86/3dnow_vertex.S \ ../../X86/3dnow_xform_masked1.S \ ../../X86/3dnow_xform_masked2.S \ ../../X86/3dnow_xform_masked3.S \ @@ -266,11 +298,11 @@ LinkSourceFile(xdriP.h, ../../X) ../../X86/3dnow_xform_raw1.S \ ../../X86/3dnow_xform_raw2.S \ ../../X86/3dnow_xform_raw3.S \ - ../../X86/3dnow_xform_raw4.S \ - ../../X86/vertex_3dnow.S + ../../X86/3dnow_xform_raw4.S 3DNOW_OBJS = ../../X86/3dnow.o \ ../../X86/3dnow_norm_raw.o \ + ../../X86/3dnow_vertex.o \ ../../X86/3dnow_xform_masked1.o \ ../../X86/3dnow_xform_masked2.o \ ../../X86/3dnow_xform_masked3.o \ @@ -278,17 +310,51 @@ LinkSourceFile(xdriP.h, ../../X) ../../X86/3dnow_xform_raw1.o \ ../../X86/3dnow_xform_raw2.o \ ../../X86/3dnow_xform_raw3.o \ - ../../X86/3dnow_xform_raw4.o \ - ../../X86/vertex_3dnow.o + ../../X86/3dnow_xform_raw4.o #endif +#ifdef MesaUseKatmai + KATMAI_SRCS = ../../X86/katmai.c \ + ../../X86/katmai_norm_raw.S \ + ../../X86/katmai_vertex.S \ + ../../X86/katmai_xform_masked1.S \ + ../../X86/katmai_xform_masked2.S \ + ../../X86/katmai_xform_masked3.S \ + ../../X86/katmai_xform_masked4.S \ + ../../X86/katmai_xform_raw1.S \ + ../../X86/katmai_xform_raw2.S \ + ../../X86/katmai_xform_raw3.S \ + ../../X86/katmai_xform_raw4.S + + KATMAI_OBJS = ../../X86/katmai.o \ + ../../X86/katmai_norm_raw.o \ + ../../X86/katmai_vertex.o \ + ../../X86/katmai_xform_masked1.o \ + ../../X86/katmai_xform_masked2.o \ + ../../X86/katmai_xform_masked3.o \ + ../../X86/katmai_xform_masked4.o \ + ../../X86/katmai_xform_raw1.o \ + ../../X86/katmai_xform_raw2.o \ + ../../X86/katmai_xform_raw3.o \ + ../../X86/katmai_xform_raw4.o +#endif #endif - ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) - ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) +#ifdef GlxSoProf + LOSRCS = ../../../../lowpc.c + HISRCS = ../../../../highpc.c - SRCS = $(DRISRCS) $(DRMSRCS) $(SISSRCS) $(MESASRCS) $(ASMSRCS) - OBJS = $(DRIOBJS) $(DRMOBJS) $(SISOBJS) $(MESAOBJS) $(ASMOBJS) + LOOBJS = ../../../../lowpc.o + HIOBJS = ../../../../highpc.o +#endif + + ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) $(KATMAI_SRCS) + ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) $(KATMAI_OBJS) + + SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) \ + $(ASMSRCS) $(SISSRCS) $(HISRCS) + OBJS = $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) \ + $(ASMOBJS) $(SISOBJS) $(HIOBJS) REQUIREDLIBS += MathLibrary #if !GlxBuiltInSIS @@ -316,6 +382,12 @@ ALL_OBJS = $(OBJS) ALL_DEPS = DONE SharedDepModuleTarget($(LIBNAME),$(ALL_DEPS),$(ALL_OBJS)) InstallDynamicModule($(LIBNAME),$(MODULEDIR),dri) + +#ifdef GlxSoProf +SOPROF_LIBNAME = _sis_dri_p +NormalDepLibraryTarget($(SOPROF_LIBNAME),$(ALL_DEPS),$(ALL_OBJS)) +InstallLibrary($(SOPROF_LIBNAME),$(MODULEDIR)/dri) +#endif #endif DependTarget() diff --git a/xc/lib/GL/mesa/src/drv/tdfx/Imakefile b/xc/lib/GL/mesa/src/drv/tdfx/Imakefile index ae70f81d9..e5866c261 100644 --- a/xc/lib/GL/mesa/src/drv/tdfx/Imakefile +++ b/xc/lib/GL/mesa/src/drv/tdfx/Imakefile @@ -12,6 +12,16 @@ XCOMM $XFree86: xc/lib/GL/mesa/src/drv/tdfx/Imakefile,v 1.15 2000/10/28 01:05:22 ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL #endif +#ifdef i386Architecture +#ifdef MesaUse3DNow + 3DNOW_DEFS = -DUSE_3DNOW_ASM +#endif +#ifdef MesaUseKatmai + KATMAI_DEFS = -DUSE_KATMAI_ASM +#endif + ASM_DEFINES = -DUSE_X86_ASM -DUSE_MMX_ASM $(3DNOW_DEFS) $(KATMAI_DEFS) +#endif + #if BuildXF86DRI DRI_DEFINES = GlxDefines -DFX_GLIDE3 -DDRIVERTS DRI_INCLUDES = -I../../../../dri -I../../../../glx \ @@ -19,13 +29,12 @@ ALLOC_DEFINES = -DMALLOC_0_RETURNS_NULL -I$(TOP)/include -I$(TOP)/include/GL \ -I$(XF86OSSRC) -I$(SERVERSRC)/GL/dri \ -I$(XF86DRIVERSRC)/tdfx \ - -I../../../include -I../.. -I../../X + -I../../../include -I../.. -I../../X -I../common #endif -MESA_INCLUDES = -I. -I.. -I../../include \ - -I../../../../dri/drm +MESA_INCLUDES = -I. -I.. -I../../include - DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) + DEFINES = $(ALLOC_DEFINES) $(DRI_DEFINES) $(ASM_DEFINES) INCLUDES = -I$(XLIBSRC) -I$(EXTINCSRC) $(MESA_INCLUDES) $(DRI_INCLUDES) \ -I$(GLIDE3INCDIR) @@ -45,23 +54,41 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../../../dri/drm/xf86drmRandom.o \ ../../../../dri/drm/xf86drmSL.o -#ifdef GlxSoProf - HIOBJS = ../../../../highpc.o - LOOBJS = ../../../../lowpc.o -#else - HIOBJS = - LOOBJS = -#endif - - TDFXSRCS = tdfx_xmesa.c tdfx_init.c tdfx_inithw.c \ - fxclip.c fxcva.c fxdd.c fxddspan.c fxddtex.c fxfastpath.c \ - fxglidew.c fxpipeline.c fxrender.c fxsanity.c fxsetup.c \ - fxtexman.c fxtrifuncs.c fxvsetup.c - - TDFXOBJS = tdfx_xmesa.o tdfx_init.o tdfx_inithw.o \ - fxclip.o fxcva.o fxdd.o fxddspan.o fxddtex.o fxfastpath.o \ - fxglidew.o fxpipeline.o fxrender.o fxsanity.o fxsetup.o \ - fxtexman.o fxtrifuncs.o fxvsetup.o + TDFXSRCS = tdfx_xmesa.c \ + tdfx_init.c \ + tdfx_inithw.c \ + fxclip.c \ + fxcva.c \ + fxdd.c \ + fxddspan.c \ + fxddtex.c \ + fxfastpath.c \ + fxglidew.c \ + fxpipeline.c \ + fxrender.c \ + fxsanity.c \ + fxsetup.c \ + fxtexman.c \ + fxtrifuncs.c \ + fxvsetup.c + + TDFXOBJS = tdfx_xmesa.o \ + tdfx_init.o \ + tdfx_inithw.o \ + fxclip.o \ + fxcva.o \ + fxdd.o \ + fxddspan.o \ + fxddtex.o \ + fxfastpath.o \ + fxglidew.o \ + fxpipeline.o \ + fxrender.o \ + fxsanity.o \ + fxsetup.o \ + fxtexman.o \ + fxtrifuncs.o \ + fxvsetup.o MESASRCS = ../../aatriangle.c \ ../../accum.c \ @@ -212,17 +239,29 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../zoom.o #ifdef i386Architecture - X86_SRCS = ../../X86/x86.c \ - ../../X86/x86a.S \ - ../../X86/common_x86.c \ - ../../X86/common_x86asm.S \ - ../../X86/vertex.S - - X86_OBJS = ../../X86/x86.o \ - ../../X86/x86a.o \ - ../../X86/common_x86.o \ - ../../X86/common_x86asm.o \ - ../../X86/vertex.o + X86_SRCS = ../../X86/common_x86.c \ + ../../X86/common_x86_asm.S \ + ../../X86/x86.c \ + ../../X86/x86_cliptest.S \ + ../../X86/x86_vertex.S \ + ../../X86/x86_xform_masked2.S \ + ../../X86/x86_xform_masked3.S \ + ../../X86/x86_xform_masked4.S \ + ../../X86/x86_xform_raw2.S \ + ../../X86/x86_xform_raw3.S \ + ../../X86/x86_xform_raw4.S + + X86_OBJS = ../../X86/common_x86.o \ + ../../X86/common_x86_asm.o \ + ../../X86/x86.o \ + ../../X86/x86_cliptest.o \ + ../../X86/x86_vertex.o \ + ../../X86/x86_xform_masked2.o \ + ../../X86/x86_xform_masked3.o \ + ../../X86/x86_xform_masked4.o \ + ../../X86/x86_xform_raw2.o \ + ../../X86/x86_xform_raw3.o \ + ../../X86/x86_xform_raw4.o MMX_SRCS = ../../X86/mmx_blend.S @@ -231,6 +270,7 @@ MESA_INCLUDES = -I. -I.. -I../../include \ #ifdef MesaUse3DNow 3DNOW_SRCS = ../../X86/3dnow.c \ ../../X86/3dnow_norm_raw.S \ + ../../X86/3dnow_vertex.S \ ../../X86/3dnow_xform_masked1.S \ ../../X86/3dnow_xform_masked2.S \ ../../X86/3dnow_xform_masked3.S \ @@ -238,11 +278,11 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/3dnow_xform_raw1.S \ ../../X86/3dnow_xform_raw2.S \ ../../X86/3dnow_xform_raw3.S \ - ../../X86/3dnow_xform_raw4.S \ - ../../X86/vertex_3dnow.S + ../../X86/3dnow_xform_raw4.S 3DNOW_OBJS = ../../X86/3dnow.o \ ../../X86/3dnow_norm_raw.o \ + ../../X86/3dnow_vertex.o \ ../../X86/3dnow_xform_masked1.o \ ../../X86/3dnow_xform_masked2.o \ ../../X86/3dnow_xform_masked3.o \ @@ -250,13 +290,13 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/3dnow_xform_raw1.o \ ../../X86/3dnow_xform_raw2.o \ ../../X86/3dnow_xform_raw3.o \ - ../../X86/3dnow_xform_raw4.o \ - ../../X86/vertex_3dnow.o + ../../X86/3dnow_xform_raw4.o #endif #ifdef MesaUseKatmai - KATMAI_SRCS = ../../X86/katmai.c \ + KATMAI_SRCS = ../../X86/katmai.c \ ../../X86/katmai_norm_raw.S \ + ../../X86/katmai_vertex.S \ ../../X86/katmai_xform_masked1.S \ ../../X86/katmai_xform_masked2.S \ ../../X86/katmai_xform_masked3.S \ @@ -264,11 +304,11 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/katmai_xform_raw1.S \ ../../X86/katmai_xform_raw2.S \ ../../X86/katmai_xform_raw3.S \ - ../../X86/katmai_xform_raw4.S \ - ../../X86/vertex_katmai.S + ../../X86/katmai_xform_raw4.S - KATMAI_OBJS = ../../X86/katmai.o \ + KATMAI_OBJS = ../../X86/katmai.o \ ../../X86/katmai_norm_raw.o \ + ../../X86/katmai_vertex.o \ ../../X86/katmai_xform_masked1.o \ ../../X86/katmai_xform_masked2.o \ ../../X86/katmai_xform_masked3.o \ @@ -276,17 +316,25 @@ MESA_INCLUDES = -I. -I.. -I../../include \ ../../X86/katmai_xform_raw1.o \ ../../X86/katmai_xform_raw2.o \ ../../X86/katmai_xform_raw3.o \ - ../../X86/katmai_xform_raw4.o \ - ../../X86/vertex_katmai.o + ../../X86/katmai_xform_raw4.o +#endif #endif +#ifdef GlxSoProf + LOSRCS = ../../../../lowpc.c + HISRCS = ../../../../highpc.c + + LOOBJS = ../../../../lowpc.o + HIOBJS = ../../../../highpc.o #endif ASMSRCS = $(X86_SRCS) $(MMX_SRCS) $(3DNOW_SRCS) $(KATMAI_SRCS) ASMOBJS = $(X86_OBJS) $(MMX_OBJS) $(3DNOW_OBJS) $(KATMAI_OBJS) - SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(TDFXSRCS) $(MESASRCS) $(ASMSRCS) $(HISRCS) - OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(TDFXOBJS) $(MESAOBJS) $(ASMOBJS) $(HIOBJS) + SRCS = $(LOSRCS) $(DRISRCS) $(DRMSRCS) $(MESASRCS) \ + $(ASMSRCS) $(TDFXSRCS) $(HISRCS) + OBJS = $(LOOBJS) $(DRIOBJS) $(DRMOBJS) $(MESAOBJS) \ + $(ASMOBJS) $(TDFXOBJS) $(HIOBJS) REQUIREDLIBS = -l$(GLIDE3LIBNAME) MathLibrary #if !GlxBuiltInTdfx diff --git a/xc/lib/GL/mesa/src/drv/tdfx/fxfastpath.c b/xc/lib/GL/mesa/src/drv/tdfx/fxfastpath.c index f1c669e9a..9c41e7ade 100644 --- a/xc/lib/GL/mesa/src/drv/tdfx/fxfastpath.c +++ b/xc/lib/GL/mesa/src/drv/tdfx/fxfastpath.c @@ -49,7 +49,7 @@ #include "fxtexman.h" #include "vertices.h" #ifdef __i386__ -#include "X86/common_x86asm.h" +#include "X86/common_x86_asm.h" #endif diff --git a/xc/lib/GL/mesa/src/drv/tdfx/fxfasttmp.h b/xc/lib/GL/mesa/src/drv/tdfx/fxfasttmp.h index 6f15462c4..1552d8247 100644 --- a/xc/lib/GL/mesa/src/drv/tdfx/fxfasttmp.h +++ b/xc/lib/GL/mesa/src/drv/tdfx/fxfasttmp.h @@ -336,8 +336,8 @@ static void TAG(fx_init_fastpath) (struct fx_fast_tab * tab) tab->project_vertices = TAG(fx_project_vertices); tab->project_clipped_vertices = TAG(fx_project_clipped_vertices); -#if defined(USE_3DNOW_ASM) - if (gl_x86_cpu_features & GL_CPU_3Dnow) { +#if 0 && defined(USE_3DNOW_ASM) + if (gl_x86_cpu_features & X86_FEATURE_3DNOW) { extern void TAG(fx_3dnow_project_vertices) (GLfloat * first, GLfloat * last, const GLfloat * mat, diff --git a/xc/lib/GL/mesa/src/drv/tdfx/fxglidew.h b/xc/lib/GL/mesa/src/drv/tdfx/fxglidew.h index 388ca2d7c..0f1f2fd0b 100644 --- a/xc/lib/GL/mesa/src/drv/tdfx/fxglidew.h +++ b/xc/lib/GL/mesa/src/drv/tdfx/fxglidew.h @@ -115,7 +115,6 @@ extern txErrorSetCallback_t txErrorSetCallbackPtr; FX_ENTRY void FX_CALL grEnable(GrEnableMode_t mode); -FX_ENTRY void FX_CALL grEnable(GrEnableMode_t mode); /* * General context: */ |