diff options
author | Dave Airlie <airlied@redhat.com> | 2010-01-07 14:50:12 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2010-01-07 14:50:12 +1000 |
commit | 17df522164b05ad8750a7eecf1514bb386c0a522 (patch) | |
tree | 9f13d43cb294609578ae0ce64b59420bd4fbdaef | |
parent | fb9df2c56cd623cbaa98f21c581d2b985a3d87fc (diff) |
qxl: more not working stuffqxl-hack
-rw-r--r-- | drivers/gpu/drm/Kconfig | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/Makefile | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/lookup3.c | 765 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/lookup3.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_cmd.c | 262 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_display.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_drv.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_drv.h | 63 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_fb.c | 37 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_image.c | 234 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_kms.c | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_object.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_object.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/qxl/qxl_ttm.c | 53 | ||||
-rw-r--r-- | include/drm/qxl_drm.h | 1 |
15 files changed, 1445 insertions, 16 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 591a46422019..7f6c75037960 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -159,5 +159,8 @@ config DRM_SAVAGE config DRM_QXL tristate "QXL virtual GPU" depends on DRM + select FB_SYS_FILLRECT + select FB_SYS_COPYAREA + select FB_SYS_IMAGEBLIT help QXL 4eva diff --git a/drivers/gpu/drm/qxl/Makefile b/drivers/gpu/drm/qxl/Makefile index db0811940dab..7f660e23add6 100644 --- a/drivers/gpu/drm/qxl/Makefile +++ b/drivers/gpu/drm/qxl/Makefile @@ -4,6 +4,6 @@ ccflags-y := -Iinclude/drm -qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o +qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o lookup3.o qxl_image.o obj-$(CONFIG_DRM_QXL)+= qxl.o diff --git a/drivers/gpu/drm/qxl/lookup3.c b/drivers/gpu/drm/qxl/lookup3.c new file mode 100644 index 000000000000..6f9bd627fd77 --- /dev/null +++ b/drivers/gpu/drm/qxl/lookup3.c @@ -0,0 +1,765 @@ +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hashword(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +------------------------------------------------------------------------------- +*/ + +#include <linux/types.h> +#include "lookup3.h" + +/* + * My best guess at if you are big-endian or little-endian. This may + * need adjustment. + */ +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN) || \ + (defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ + __BYTE_ORDER == __BIG_ENDIAN) || \ + (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function hashword() is identical to hashlittle() on little-endian + machines, and identical to hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. hashlittle() is more complicated than hashword() only because + hashlittle() has to dance around fitting the key bytes into registers. +-------------------------------------------------------------------- +*/ +uint32_t hashword( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t initval) /* the previous hash, or an arbitrary value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + + +/* +-------------------------------------------------------------------- +hashword2() -- same as hashword(), but take two seeds and return two +32-bit values. pc and pb must both be nonnull, and *pc and *pb must +both be initialized with seeds. If you pass in (*pb)==0, the output +(*pc) will be the same as the return value from hashword(). +-------------------------------------------------------------------- +*/ +void hashword2 ( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t *pc, /* IN: seed OUT: primary hash value */ +uint32_t *pb) /* IN: more seed OUT: secondary hash value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; + c += *pb; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + *pc=c; *pb=b; +} + + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); + +By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this +code any way you wish, private, educational, or commercial. It's free. + +Use for hash table lookup, or anything where one collision in 2^^32 is +acceptable. Do NOT use for cryptographic purposes. +------------------------------------------------------------------------------- +*/ + +uint32_t hashlittle( const void *key, size_t length, uint32_t initval) +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + + +/* + * hashlittle2: return 2 32-bit hash values + * + * This is identical to hashlittle(), except it returns two 32-bit hash + * values instead of just one. This is good enough for hash table + * lookup with 2^^64 buckets, or if you want a second hash if you're not + * happy with the first, or if you want a probably-unique 64-bit ID for + * the key. *pc is better mixed than *pb, so use *pc first. If you want + * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". + */ +void hashlittle2( + const void *key, /* the key to hash */ + size_t length, /* length of the key */ + uint32_t *pc, /* IN: primary initval, OUT: primary hash */ + uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; + c += *pb; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + } + + final(a,b,c); + *pc=c; *pb=b; +} + + + +/* + * hashbig(): + * This is the same as hashword() on big-endian machines. It is different + * from hashlittle() on all machines. hashbig() takes advantage of + * big-endian byte ordering. + */ +uint32_t hashbig( const void *key, size_t length, uint32_t initval) +{ + uint32_t a,b,c; + union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ +#ifdef VALGRIND + const uint8_t *k8; +#endif + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; + case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; + case 5 : b+=k[1]&0xff000000; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff00; break; + case 2 : a+=k[0]&0xffff0000; break; + case 1 : a+=k[0]&0xff000000; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ + case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ + case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ + case 1 : a+=((uint32_t)k8[0])<<24; break; + case 0 : return c; + } + +#endif /* !VALGRIND */ + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[11]; + case 11: c+=((uint32_t)k[10])<<8; + case 10: c+=((uint32_t)k[9])<<16; + case 9 : c+=((uint32_t)k[8])<<24; + case 8 : b+=k[7]; + case 7 : b+=((uint32_t)k[6])<<8; + case 6 : b+=((uint32_t)k[5])<<16; + case 5 : b+=((uint32_t)k[4])<<24; + case 4 : a+=k[3]; + case 3 : a+=((uint32_t)k[2])<<8; + case 2 : a+=((uint32_t)k[1])<<16; + case 1 : a+=((uint32_t)k[0])<<24; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + diff --git a/drivers/gpu/drm/qxl/lookup3.h b/drivers/gpu/drm/qxl/lookup3.h new file mode 100644 index 000000000000..4dee9f5bf827 --- /dev/null +++ b/drivers/gpu/drm/qxl/lookup3.h @@ -0,0 +1,8 @@ +#ifndef __LOOKUP3_H +#define __LOOKUP3_H + +#include <linux/types.h> + +uint32_t hashlittle( const void *key, size_t length, u32 initval); + +#endif diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c new file mode 100644 index 000000000000..4f7d2c31d488 --- /dev/null +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -0,0 +1,262 @@ +/* QXL cmd/ring handling */ + +#include "qxl_drv.h" +#include "qxl_object.h" +struct ring { + struct qxl_ring_header header; + uint8_t elements[0]; +}; + +struct qxl_ring { + volatile struct ring *ring; + int element_size; + int n_elements; + int prod_notify; +}; + +void qxl_ring_free(struct qxl_ring *ring) +{ + kfree(ring); +} + +struct qxl_ring * +qxl_ring_create (struct qxl_ring_header *header, + int element_size, + int n_elements, + int prod_notify) +{ + struct qxl_ring *ring; + + ring = kmalloc (sizeof(*ring), GFP_KERNEL); + if (!ring) + return NULL; + + ring->ring = (volatile struct ring *)header; + ring->element_size = element_size; + ring->n_elements = n_elements; + ring->prod_notify = prod_notify; + + return ring; +} + +void qxl_ring_push (struct qxl_ring *ring, + const void *new_elt) +{ + volatile struct qxl_ring_header *header = &(ring->ring->header); + volatile uint8_t *elt; + int idx; + + while (header->prod - header->cons == header->num_items) { + header->notify_on_cons = header->cons + 1; + mb(); + } + + idx = header->prod & (ring->n_elements - 1); + elt = ring->ring->elements + idx * ring->element_size; + + memcpy((void *)elt, new_elt, ring->element_size); + + header->prod++; + + mb(); + + if (header->prod == header->notify_on_prod) + outb (0, ring->prod_notify); +} + +bool qxl_ring_pop (struct qxl_ring *ring, + void *element) +{ + volatile struct qxl_ring_header *header = &(ring->ring->header); + volatile uint8_t *ring_elt; + int idx; + + if (header->cons == header->prod) + return false; + + idx = header->cons & (ring->n_elements - 1); + ring_elt = ring->ring->elements + idx * ring->element_size; + + memcpy (element, (void *)ring_elt, ring->element_size); + + header->cons++; + + return true; +} + +void qxl_ring_wait_idle (struct qxl_ring *ring) +{ + while (ring->ring->header.cons != ring->ring->header.prod) + { + msleep (1); + mb(); + } +} + +void qxl_bo_free(struct qxl_bo *bo) +{ + int ret; + ret = qxl_bo_reserve(bo, false); + if (!ret) { + qxl_bo_kunmap(bo); + qxl_bo_unpin(bo); + qxl_bo_unreserve(bo); + } + qxl_bo_unref(&bo); +} + +static int qxl_garbage_collect(struct qxl_device *qdev) +{ + uint64_t id; + int i = 0; + + while (qxl_ring_pop (qdev->release_ring, &id)) { + while (id) { + /* We assume that there the two low bits of a pointer are + * available. If the low one is set, then the command in + * question is a cursor command + */ +#define POINTER_MASK ((1 << 2) - 1) + + struct qxl_bo *bo = (void *)(id & ~POINTER_MASK); + union qxl_release_info *info = bo->kptr; + struct qxl_cursor_cmd *cmd = (struct qxl_cursor_cmd *)info; + struct qxl_drawable *drawable = (struct qxl_drawable *)info; + bool is_cursor = false; + + if ((id & POINTER_MASK) == 1) + is_cursor = true; + +#if 0 + if (is_cursor && cmd->type == QXL_CURSOR_SET) { + struct qxl_cursor *cursor = (void *)qxl_virtual_address + (qdev, (void *)cmd->u.set.shape); + qxl_free(qdev->mem, cursor); + } else if (!is_cursor && drawable->type == QXL_DRAW_COPY) { + struct qxl_image *image = qxl_virtual_address(qdev, + (void *)drawable->u.copy.src_bitmap); + qxl_image_destroy (qdev, image); + } +#endif + + id = info->next; + qxl_bo_free(bo); + // qxl_free(qdev->mem, info); + } + + } + + return i > 0; +} + +/* create and pin bo */ +struct qxl_bo *qxl_allocnf(struct qxl_device *qdev, unsigned long size) +{ + struct qxl_bo *bo; + int ret; + void *result; + int n_attempts = 0; + static int nth_oom = 1; + + qxl_garbage_collect(qdev); + + ret = qxl_bo_create(qdev, NULL, size, true, + QXL_GEM_DOMAIN_IO, &bo); + if (ret) { + DRM_ERROR("failed to allocate IO BO\n"); + return NULL; + } + + ret = qxl_bo_reserve(bo, false); + if (unlikely(ret != 0)) + goto out_unref; + + ret = qxl_bo_pin(bo, QXL_GEM_DOMAIN_IO, NULL); + if (ret) { + DRM_ERROR("failed to pin IO BO %d\n", ret); + goto out_unref; + } + + ret = qxl_bo_kmap(bo, NULL); + qxl_bo_unreserve(bo); + if (ret) + goto out_unref; + return bo; +out_unref: + qxl_bo_unref(&bo); + return NULL; +} + + +#if 0 + while (!(result = qxl_alloc (qdev->mem, size))) + { + struct qxl_ram_header *ram_header = (void *)((unsigned long)qdev->ram + + qdev->rom->ram_header_offset); + + /* Rather than go out of memory, we simply tell the + * device to dump everything + */ + ram_header->update_area.top = 0; + ram_header->update_area.bottom = 1280; + ram_header->update_area.left = 0; + ram_header->update_area.right = 800; + + outb (0, qdev->io_base + QXL_IO_UPDATE_AREA); + + printk(KERN_ERR "eliminated memory (%d)\n", nth_oom++); + + outb (0, qdev->io_base + QXL_IO_NOTIFY_OOM); + + msleep_interruptible(10); + + if (qxl_garbage_collect(qdev)) { + n_attempts = 0; + } + else if (++n_attempts == 1000) + { +// qxl_mem_dump_stats (qdev->mem, "Out of mem - stats\n"); + BUG(); + } + } + return result; +} +#endif + +void qxl_push_update_area(struct qxl_device *qdev, const struct qxl_rect *area) +{ + struct qxl_update_cmd *update; + struct qxl_bo *cmd_bo; + struct qxl_command cmd; + int ret; + + cmd_bo = qxl_allocnf(qdev, sizeof(*update)); + + update = cmd_bo->kptr; + update->release_info.id = (uint64_t)cmd_bo; + update->area = *area; + update->update_id = 0; +// qxl_bo_kunmap(cmd_bo); + cmd.type = QXL_CMD_UPDATE; + cmd.data = qxl_bo_gpu_offset(cmd_bo) + qdev->vram_base + qdev->rom->pages_offset; + + DRM_DEBUG("push ring %llx %x\n", qxl_bo_gpu_offset(cmd_bo), cmd.data); + + if (qdev->mode_set == false) { + DRM_ERROR("ring called before mode set\n"); + qxl_bo_free(cmd_bo); + } + else + qxl_ring_push(qdev->command_ring, &cmd); +} + +void qxl_push_screen(struct qxl_device *qdev) +{ + struct qxl_rect area; + + area.left = area.top = 0; + area.right = 1920; + area.bottom = 1200; + + qxl_push_update_area(qdev, &area); +} diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 08211b23a95a..e83cbb5ec218 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -132,6 +132,7 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc, outb(0, qdev->io_base + QXL_IO_RESET); outb(m->id, qdev->io_base + QXL_IO_SET_MODE); + qdev->mode_set = true; return 0; } diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 42293692bcd5..e9dfb520f0ad 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -10,7 +10,7 @@ static struct pci_device_id pciidlist[] = { { 0, 0, 0 }, }; -MODULE_DEVICE_TABLE(pci, pciidlist); +//MODULE_DEVICE_TABLE(pci, pciidlist); static struct drm_driver qxl_driver; diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 987f53b0652c..e76d5a51318f 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -11,6 +11,8 @@ #include <ttm/ttm_placement.h> #include <ttm/ttm_module.h> +#include "qxl_drm.h" + #define DRIVER_AUTHOR "Dave Airlie" #define DRIVER_NAME "qxl" @@ -459,6 +461,29 @@ struct qxl_bo { struct drm_gem_object *gobj; }; +struct qxl_fence_driver { + atomic_t seq; + uint32_t last_seq; + wait_queue_head_t queue; + rwlock_t lock; + struct list_head created; + struct list_head emited; + struct list_head signaled; +}; + +struct qxl_fence { + struct qxl_device *qdev; + struct kref kref; + struct list_head list; + uint32_t seq; + unsigned long timeout; + bool emited; + bool signaled; +}; + +int qxl_fence_driver_init(struct qxl_device *qdev); +void qxl_fence_driver_fini(struct qxl_device *qdev); + struct qxl_gem { struct mutex mutex; struct list_head objects; @@ -507,17 +532,21 @@ struct qxl_device { struct qxl_mode *modes; int io_base; void *ram; - struct qxl_mem *mem; struct qxl_mman mman; - +// struct qxl_fence_driver fence; struct qxl_gem gem; struct qxl_mode_info mode_info; struct fb_info *fbdev_info; struct qxl_bo *fbdev_rbo; struct qxl_framebuffer *fbdev_rfb; + void *ram_physical; + + struct qxl_ring *release_ring; + struct qxl_ring *command_ring; - + struct qxl_ram_header *ram_header; + bool mode_set; }; int qxl_driver_load(struct drm_device *dev, unsigned long flags); @@ -529,6 +558,25 @@ void qxl_modeset_fini(struct qxl_device *qdev); int qxl_bo_init(struct qxl_device *qdev); void qxl_bo_fini(struct qxl_device *qdev); +struct qxl_ring *qxl_ring_create (struct qxl_ring_header *header, + int element_size, + int n_elements, + int prod_notify); +void qxl_ring_free(struct qxl_ring *ring); +extern struct qxl_bo *qxl_allocnf(struct qxl_device *qdev, unsigned long size); + +static inline uint64_t +qxl_physical_address (struct qxl_device *qdev, void *virtual) +{ + return (uint64_t) ((unsigned long)virtual + (unsigned long)qdev->ram_physical); +} + +static inline void * +qxl_virtual_address (struct qxl_device *qdev, void *physical) +{ + return (void *) ((unsigned long)physical - (unsigned long)qdev->ram_physical); +} + /* qxl_fb.c */ #define QXLFB_CONN_LIMIT 1 @@ -555,4 +603,13 @@ void qxl_gem_object_unpin(struct drm_gem_object *obj); /* qxl ttm */ int qxl_ttm_init(struct qxl_device *qdev); void qxl_ttm_fini(struct qxl_device *qdev); + +/* qxl image */ + +struct qxl_image *qxl_image_create(struct qxl_device *qdev, const uint8_t *data, + int x, int y, int width, int height, + int stride); +void qxl_image_destroy(struct qxl_device *qdev, + struct qxl_image *image); +void qxl_push_screen(struct qxl_device *qxl); #endif diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 7f02f0a79edd..05032bfdebd8 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -46,14 +46,38 @@ struct qxl_fb_device { struct qxl_device *qdev; }; +static void qxl_fb_fillrect(struct fb_info *info, + const struct fb_fillrect *rect) +{ + struct qxl_fb_device *rfbdev = info->par; + cfb_fillrect(info, rect); + qxl_push_screen(rfbdev->qdev); +} + +static void qxl_fb_copyarea(struct fb_info *info, + const struct fb_copyarea *region) +{ + struct qxl_fb_device *rfbdev = info->par; + cfb_copyarea(info, region); + qxl_push_screen(rfbdev->qdev); +} + +static void qxl_fb_imageblit(struct fb_info *info, + const struct fb_image *image) +{ + struct qxl_fb_device *rfbdev = info->par; + cfb_imageblit(info, image); + qxl_push_screen(rfbdev->qdev); +} + static struct fb_ops qxlfb_ops = { .owner = THIS_MODULE, .fb_check_var = drm_fb_helper_check_var, .fb_set_par = drm_fb_helper_set_par, .fb_setcolreg = drm_fb_helper_setcolreg, - .fb_fillrect = cfb_fillrect, - .fb_copyarea = cfb_copyarea, - .fb_imageblit = cfb_imageblit, + .fb_fillrect = qxl_fb_fillrect, + .fb_copyarea = qxl_fb_copyarea, + .fb_imageblit = qxl_fb_imageblit, .fb_pan_display = drm_fb_helper_pan_display, .fb_blank = drm_fb_helper_blank, .fb_setcmap = drm_fb_helper_setcmap, @@ -130,8 +154,6 @@ int qxlfb_create(struct drm_device *dev, mode_cmd.bpp = surface_bpp; mode_cmd.pitch = ALIGN(mode_cmd.width * ((mode_cmd.bpp + 1) / 8), 64); - /* need to align pitch with crtc limits */ - // mode_cmd.pitch = qxl_align_pitch(qdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8); mode_cmd.depth = surface_depth; size = mode_cmd.pitch * mode_cmd.height; @@ -163,6 +185,7 @@ int qxlfb_create(struct drm_device *dev, ret = qxl_bo_reserve(rbo, false); if (unlikely(ret != 0)) goto out_unref; + ret = qxl_bo_pin(rbo, QXL_GEM_DOMAIN_VRAM, &fb_gpuaddr); if (ret) { qxl_bo_unreserve(rbo); @@ -198,7 +221,7 @@ int qxlfb_create(struct drm_device *dev, if (ret) goto out_unref; - memset_io(fbptr, 0xff, aligned_size); + memset(fbptr, 0xff, aligned_size); strcpy(info->fix.id, "qxldrmfb"); @@ -209,6 +232,7 @@ int qxlfb_create(struct drm_device *dev, // tmp = fb_gpuaddr - qdev->mc.vram_location; // info->fix.smem_start = qdev->mc.aper_base + tmp; + info->fix.smem_start = fbptr; info->fix.smem_len = size; info->screen_base = fbptr; info->screen_size = size; @@ -248,6 +272,7 @@ out_unref: ret = qxl_bo_reserve(rbo, false); if (likely(ret == 0)) { qxl_bo_kunmap(rbo); + qxl_bo_unpin(rbo); qxl_bo_unreserve(rbo); } } diff --git a/drivers/gpu/drm/qxl/qxl_image.c b/drivers/gpu/drm/qxl/qxl_image.c new file mode 100644 index 000000000000..0c7409afaf84 --- /dev/null +++ b/drivers/gpu/drm/qxl/qxl_image.c @@ -0,0 +1,234 @@ +#include "qxl_drv.h" +#include "lookup3.h" + +typedef struct image_info_t image_info_t; + +struct image_info_t +{ + struct qxl_image *image; + int ref_count; + image_info_t *next; +}; + +#define HASH_SIZE 4096 +static image_info_t *image_table[HASH_SIZE]; + +static unsigned int +hash_and_copy(const uint8_t *src, int src_stride, + uint8_t *dest, int dest_stride, + int width, int height) +{ + int i, j; + unsigned int hash = 0; + + for (i = 0; i < height; ++i) { + const uint8_t *src_line = src + i * src_stride; + uint8_t *dest_line = dest + i * dest_stride; + + for (j = 0; j < width; ++j) { + uint32_t *s = (uint32_t *)src_line; + uint32_t *d = (uint32_t *)dest_line; + + if (dest) + d[j] = s[j]; + } + + hash = hashlittle(src_line, width * sizeof(uint32_t), hash); + } + + return hash; +} + +static image_info_t * +lookup_image_info(unsigned int hash, + int width, + int height) +{ + struct image_info_t *info = image_table[hash % HASH_SIZE]; + + while (info) { + struct qxl_image *image = info->image; + + if (image->descriptor.id == hash && + image->descriptor.width == width && + image->descriptor.height == height) { + return info; + } + + info = info->next; + } + +#if 0 + ErrorF ("lookup of %u failed\n", hash); +#endif + + return NULL; +} + +static image_info_t * +insert_image_info(unsigned int hash) +{ + struct image_info_t *info = kmalloc(sizeof(image_info_t), GFP_KERNEL); + + if (!info) + return NULL; + + info->next = image_table[hash % HASH_SIZE]; + image_table[hash % HASH_SIZE] = info; + + return info; +} + +static void +remove_image_info(image_info_t *info) +{ + struct image_info_t **location = &image_table[info->image->descriptor.id % HASH_SIZE]; + + while (*location && (*location) != info) + location = &((*location)->next); + + if (*location) + *location = info->next; + + kfree(info); +} + +struct qxl_image *qxl_image_create(struct qxl_device *qdev, const uint8_t *data, + int x, int y, int width, int height, + int stride) +{ + unsigned int hash; + image_info_t *info; + + data += y * stride + x * sizeof(uint32_t); + + hash = hash_and_copy(data, stride, NULL, -1, width, height); + + info = lookup_image_info(hash, width, height); + if (info) { + int i, j; + +#if 0 + ErrorF ("reusing image %p with hash %u (%d x %d)\n", info->image, hash, width, height); +#endif + + info->ref_count++; + + for (i = 0; i < height; ++i) { + struct qxl_data_chunk *chunk; + const uint8_t *src_line = data + i * stride; + uint32_t *dest_line; + + chunk = qxl_virtual_address(qdev, (void *)info->image->u.bitmap.data); + + dest_line = (uint32_t *)chunk->data + width * i; + + for (j = 0; j < width; ++j) { + uint32_t *s = (uint32_t *)src_line; + uint32_t *d = (uint32_t *)dest_line; + + if (d[j] != s[j]) + { +#if 0 + ErrorF ("bad collision at (%d, %d)! %d != %d\n", j, i, s[j], d[j]); +#endif + goto out; + } + } + } +out: + return info->image; + } else { + struct qxl_image *image; + struct qxl_data_chunk *chunk; + int dest_stride = width * sizeof(uint32_t); + image_info_t *info; + +#if 0 + ErrorF ("Must create new image of size %d %d\n", width, height); +#endif + /* Chunk */ + /* FIXME: Check integer overflow */ + chunk = qxl_allocnf(qdev, sizeof(*chunk) + height * dest_stride); + + chunk->data_size = height * dest_stride; + chunk->prev_chunk = 0; + chunk->next_chunk = 0; + + hash_and_copy(data, stride, + chunk->data, dest_stride, + width, height); + + /* Image */ + image = qxl_allocnf(qdev, sizeof *image); + + image->descriptor.id = 0; + image->descriptor.type = QXL_IMAGE_TYPE_BITMAP; + + image->descriptor.flags = 0; + image->descriptor.width = width; + image->descriptor.height = height; + + image->u.bitmap.format = QXL_BITMAP_FMT_32BIT; + image->u.bitmap.flags = QXL_BITMAP_TOP_DOWN; + image->u.bitmap.x = width; + image->u.bitmap.y = height; + image->u.bitmap.stride = width * sizeof(uint32_t); + image->u.bitmap.palette = 0; + image->u.bitmap.data = qxl_physical_address(qdev, chunk); + +#if 0 + ErrorF("%p has size %d %d\n", image, width, height); +#endif + + /* Add to hash table */ + if((info = insert_image_info(hash))) { + info->image = image; + info->ref_count = 1; + + image->descriptor.id = hash; + image->descriptor.flags = QXL_IMAGE_CACHE; + +#if 0 + ErrorF("added with hash %u\n", hash); +#endif + } + return image; + } +} + +void qxl_image_destroy(struct qxl_device *qdev, + struct qxl_image *image) +{ +#if 0 + struct qxl_data_chunk *chunk; + image_info_t *info; + + chunk = qxl_virtual_address(qdev,(void *)image->u.bitmap.data); + + info = lookup_image_info(image->descriptor.id, + image->descriptor.width, + image->descriptor.height); + + if(info && info->image == image) { + --info->ref_count; + + if(info->ref_count != 0) + return; + +#if 0 + ErrorF("removed %p from hash table\n", info->image); +#endif + + remove_image_info(info); + } + + qxl_free(qdev->mem, chunk); + qxl_free(qdev->mem, image); +#endif +} + +void qxl_drop_image_cache(struct qxl_device *qdev) +{ + memset(image_table, 0, HASH_SIZE * sizeof(image_info_t *)); +} diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c index bf3b34aac376..b59b297acd74 100644 --- a/drivers/gpu/drm/qxl/qxl_kms.c +++ b/drivers/gpu/drm/qxl/qxl_kms.c @@ -56,12 +56,13 @@ int qxl_device_init(struct qxl_device *qdev, qdev->flags = flags; mutex_init(&qdev->gem.mutex); +// rwlock_init(&qdev->fence_drv.lock); INIT_LIST_HEAD(&qdev->gem.objects); qdev->rom_base = drm_get_resource_start(qdev->ddev, 2); qdev->rom_size = drm_get_resource_len(qdev->ddev, 2); qdev->vram_base = drm_get_resource_start(qdev->ddev, 0); - + qdev->io_base = drm_get_resource_start(qdev->ddev, 3); qdev->rom = ioremap(qdev->rom_base, qdev->rom_size); if (!qdev->rom){ @@ -69,9 +70,25 @@ int qxl_device_init(struct qxl_device *qdev, return -ENOMEM; } - qdev->io_base = drm_get_resource_start(qdev->ddev, 3); qxl_check_device(qdev); + qdev->ram_header = ioremap(qdev->vram_base + qdev->rom->ram_header_offset, + drm_get_resource_len(qdev->ddev, 0) - qdev->rom->ram_header_offset); + + qdev->command_ring = qxl_ring_create(&(qdev->ram_header->cmd_ring_hdr), + sizeof(struct qxl_command), + 32, qdev->io_base + QXL_IO_NOTIFY_CMD); + + qdev->release_ring = qxl_ring_create(&(qdev->ram_header->release_ring_hdr), + sizeof(uint64_t), + 8, 0); +#if 0 + r = qxl_fence_driver_init(qdev); + if (r) { + DRM_ERROR("fence init failed %d\n", r); + return r; + } +#endif r = qxl_bo_init(qdev); if (r) { DRM_ERROR("bo init failed %d\n", r); @@ -82,7 +99,11 @@ int qxl_device_init(struct qxl_device *qdev, void qxl_device_fini(struct qxl_device *qdev) { + qxl_ring_free(qdev->command_ring); + qxl_ring_free(qdev->release_ring); +// qxl_fence_driver_fini(qdev); qxl_bo_fini(qdev); + iounmap(qdev->ram_header); iounmap(qdev->rom); qdev->rom = NULL; qdev->mode_info.modes = NULL; diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c index 1a89236ce80d..b1343fea3512 100644 --- a/drivers/gpu/drm/qxl/qxl_object.c +++ b/drivers/gpu/drm/qxl/qxl_object.c @@ -28,6 +28,9 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain) qbo->placement.lpfn = 0; qbo->placement.placement = qbo->placements; qbo->placement.busy_placement = qbo->placements; + if (domain & QXL_GEM_DOMAIN_IO) + qbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | + TTM_PL_FLAG_PRIV0; if (domain & QXL_GEM_DOMAIN_VRAM) qbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM; diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h index d06e534db06b..24faf9b745d5 100644 --- a/drivers/gpu/drm/qxl/qxl_object.h +++ b/drivers/gpu/drm/qxl/qxl_object.h @@ -67,3 +67,5 @@ extern void qxl_bo_kunmap(struct qxl_bo *bo); extern void qxl_bo_unref(struct qxl_bo **bo); extern int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr); extern int qxl_bo_unpin(struct qxl_bo *bo); +extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain); +extern bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo); diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index 06090ac57def..f365adbcf9ad 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -7,6 +7,7 @@ #include <drm/drm.h> #include <drm/qxl_drm.h> #include "qxl_drv.h" +#include "qxl_object.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -104,9 +105,20 @@ static int qxl_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; man->default_caching = TTM_PL_FLAG_WC; man->io_addr = NULL; - man->io_offset = qdev->vram_base; + man->io_offset = qdev->vram_base + qdev->rom->draw_area_offset; man->io_size = qdev->vram_size; break; + case TTM_PL_PRIV0: + man->gpu_offset = 0; + man->flags = TTM_MEMTYPE_FLAG_FIXED | + TTM_MEMTYPE_FLAG_NEEDS_IOREMAP | + TTM_MEMTYPE_FLAG_MAPPABLE; + man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; + man->default_caching = TTM_PL_FLAG_WC; + man->io_addr = NULL; + man->io_offset = qdev->vram_base + qdev->rom->pages_offset; + man->io_size = qdev->rom->num_io_pages * PAGE_SIZE; + break; default: DRM_ERROR("Unsupported memory type %u\n", (unsigned)type); return -EINVAL; @@ -120,7 +132,6 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo, struct qxl_bo *rbo; static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; -#if 0 if (!qxl_ttm_bo_is_qxl_bo(bo)) { placement->fpfn = 0; placement->lpfn = 0; @@ -133,7 +144,6 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo, rbo = container_of(bo, struct qxl_bo, tbo); qxl_ttm_placement_from_domain(rbo, QXL_GEM_DOMAIN_CPU); *placement = rbo->placement; -#endif } @@ -142,6 +152,33 @@ static int qxl_verify_access(struct ttm_buffer_object *bo, struct file *filp) return 0; } +#if 0 +static int qxl_sync_obj_wait(void *sync_obj, void *sync_arg, + bool lazy, bool interruptible) +{ + return qxl_fence_wait((struct qxl_fence *)sync_obj, interruptible); +} + +static int qxl_sync_obj_flush(void *sync_obj, void *sync_arg) +{ + return 0; +} + +static void qxl_sync_obj_unref(void **sync_obj) +{ + qxl_fence_unref((struct qxl_fence **)sync_obj); +} + +static void *qxl_sync_obj_ref(void *sync_obj) +{ + return qxl_fence_ref((struct qxl_fence *)sync_obj); +} + +static bool qxl_sync_obj_signaled(void *sync_obj, void *sync_arg) +{ + return qxl_fence_signaled((struct qxl_fence *)sync_obj); +} +#endif static struct ttm_bo_driver qxl_bo_driver = { // .create_ttm_backend_entry = &qxl_create_ttm_backend_entry, @@ -151,6 +188,7 @@ static struct ttm_bo_driver qxl_bo_driver = { // .move = &qxl_bo_move, .verify_access = &qxl_verify_access, #if 0 + .sync_obj_signaled = &qxl_sync_obj_signaled, .sync_obj_wait = &qxl_sync_obj_wait, .sync_obj_flush = &qxl_sync_obj_flush, @@ -183,8 +221,16 @@ int qxl_ttm_init(struct qxl_device *qdev) DRM_ERROR("Failed initializing VRAM heap.\n"); return r; } + r = ttm_bo_init_mm(&qdev->mman.bdev, TTM_PL_PRIV0, + qdev->rom->num_io_pages); + if (r) { + DRM_ERROR("Failed initializing IO space heap.\n"); + return r; + } DRM_INFO("qxl: %uM of VRAM memory ready\n", (unsigned)qdev->vram_size / (1024 * 1024)); + DRM_INFO("qxl: %uM of IO pages memory ready\n", + ((unsigned)qdev->rom->num_io_pages * PAGE_SIZE) / (1024 * 1024)); if (unlikely(qdev->mman.bdev.dev_mapping == NULL)) { qdev->mman.bdev.dev_mapping = qdev->ddev->dev_mapping; } @@ -195,6 +241,7 @@ void qxl_ttm_fini(struct qxl_device *qdev) int r; ttm_bo_clean_mm(&qdev->mman.bdev, TTM_PL_VRAM); + ttm_bo_clean_mm(&qdev->mman.bdev, TTM_PL_PRIV0); ttm_bo_device_release(&qdev->mman.bdev); qxl_ttm_global_fini(qdev); DRM_INFO("qxl: ttm finalized\n"); diff --git a/include/drm/qxl_drm.h b/include/drm/qxl_drm.h index 326ee1743888..b2455df72f1a 100644 --- a/include/drm/qxl_drm.h +++ b/include/drm/qxl_drm.h @@ -5,5 +5,6 @@ #define QXL_GEM_DOMAIN_CPU 0 #define QXL_GEM_DOMAIN_VRAM 1 +#define QXL_GEM_DOMAIN_IO 2 #endif |