/* * nvlib.h * * Created on: Mar 18, 2010 * Author: lb */ #ifndef NVLIB_H_ #define NVLIB_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nouveau_reg.h" #include #define PAGE_SIZE 4096 inline std::ostream& hex08(std::ostream& out) { return out << std::hex << std::setw(8) << std::setfill('0'); } #define NV_PFIFO_RAMHT 0x2210 enum nouveau_card_type { NV_04 = 0x00, NV_10 = 0x10, NV_20 = 0x20, NV_30 = 0x30, NV_40 = 0x40, NV_50 = 0x50, }; enum nv_driver_type { NV_DRIVER_NONE = 0, NV_DRIVER_NVIDIA = 1, NV_DRIVER_NOUVEAU = 2, }; struct os_interface { enum nv_driver_type nv_driver; virtual void memcpy_from_phys(void* to, uint64_t from, size_t size) = 0; virtual void memcpy_to_phys(uint64_t to, const void* from, size_t size) = 0; virtual void memcpy_phys_to_phys(uint64_t to, uint64_t from, size_t size) = 0; virtual void trace_marker(const char* s) {}; }; struct os_linux : public os_interface { int devmem; int physmem; os_linux() { devmem = open("/dev/mem", O_RDWR); if(devmem < 0) { std::cerr << "Unable to open /dev/mem. Are you root?" << std::endl; exit(1); } pci_system_init(); pci_system_init_dev_mem(devmem); int fd; fd = open("/dev/nvidiactl", O_RDWR); if(fd >= 0) { nv_driver = NV_DRIVER_NVIDIA; close(fd); } else { fd = open("/dev/dri/card0", O_RDWR); if(fd >= 0) { nv_driver = NV_DRIVER_NOUVEAU; close(fd); } else nv_driver = NV_DRIVER_NONE; } } virtual void memcpy_from_phys(void* tov, uint64_t from, size_t size) { char* to = (char*)tov; if(pread(devmem, to, size, from) < (ssize_t)size) { while(size) { size_t copy = PAGE_SIZE - (from & (PAGE_SIZE - 1)); if(copy > size) copy = size; char* p = (char*)mmap(0, PAGE_SIZE, PROT_READ, MAP_SHARED, devmem, from & ~(PAGE_SIZE - 1)); if(p == MAP_FAILED) throw std::runtime_error(strerror(errno)); memcpy(to, p + (from & (PAGE_SIZE - 1)), copy); munmap(p, PAGE_SIZE); to += copy; from += copy; size -= copy; } } } /* For some unfathomable reason, sometimes you can mmap vram but not read/write it */ virtual void memcpy_to_phys(uint64_t to, const void* fromv, size_t size) { const char* from = (const char*)fromv; if(pwrite(devmem, from, size, to) < (ssize_t)size) { while(size) { size_t copy = PAGE_SIZE - (to & (PAGE_SIZE - 1)); if(copy > size) copy = size; char* p = (char*)mmap(0, PAGE_SIZE, PROT_WRITE, MAP_SHARED, devmem, to & ~(PAGE_SIZE - 1)); if(p == MAP_FAILED) throw std::runtime_error(strerror(errno)); memcpy(p + (to & (PAGE_SIZE - 1)), from, copy); munmap(p, PAGE_SIZE); to += copy; from += copy; size -= copy; } } } virtual void memcpy_phys_to_phys(uint64_t to, uint64_t from, size_t size) { char buf[4096]; while(size) { size_t copy = size; if(copy > sizeof(buf)) copy = sizeof(buf); memcpy_from_phys(buf, from, size); memcpy_to_phys(to, buf, size); to += copy; from += copy; size -= copy; } } virtual void trace_marker(const char* s) { int fd = open("/sys/kernel/debug/tracing/trace_marker", O_WRONLY); if(fd >= 0) { write(fd, s, strlen(s)); close(fd); } } }; struct os_interface* os; struct os_init_struct { os_init_struct() { os = new os_linux(); } }; os_init_struct os_init_global; /* there are slight variations, but they should not matter since we only match the lower byte */ unsigned nvidia_grclasses[8 * 6] = { /* nv04 */ NV04_SWIZZLED_SURFACE, NV04_TEXTURED_TRIANGLE, NV04_MULTITEX_TRIANGLE, NV04_GDI_RECTANGLE_TEXT, NV04_CONTEXT_SURFACES_2D, NV04_CONTEXT_SURFACES_3D, NV04_IMAGE_BLIT, NV04_SCALED_IMAGE_FROM_MEMORY, /* nv10 */ NV04_GDI_RECTANGLE_TEXT, NV11TCL, NV10_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV04_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV10_CONTEXT_SURFACES_2D, 0, /* nv20 */ NV04_BETA_SOLID, NV20TCL, NV10_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV20_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV10_CONTEXT_SURFACES_2D, 0, /* nv30 */ NV04_GDI_RECTANGLE_TEXT, NV34TCL, NV30_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV30_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV30_CONTEXT_SURFACES_2D, 0, /* nv40 */ NV04_BETA_SOLID, NV40TCL, NV40_SCALED_IMAGE_FROM_MEMORY, NV04_MEMORY_TO_MEMORY_FORMAT, NV40_SWIZZLED_SURFACE, NV12_IMAGE_BLIT, NV40_CONTEXT_SURFACES_2D, 0, /* nv50 */ 0, /* TODO: UNKNOWN! handle is 0xbeef4901, maybe a software object? */ NV50TCL, NV50_MEMORY_TO_MEMORY_FORMAT, NV50_2D, 0, 0, 0, 0, }; #define NV20_GRCTX_SIZE (3580*4) #define NV25_GRCTX_SIZE (3529*4) #define NV2A_GRCTX_SIZE (3500*4) #define NV30_31_GRCTX_SIZE (24392) #define NV34_GRCTX_SIZE (18140) #define NV35_36_GRCTX_SIZE (22396) struct nv_ramht_entry { bool valid; unsigned char channel; unsigned char engine; unsigned handle; unsigned instance; static bool channel_handle_less(const nv_ramht_entry& a, const nv_ramht_entry& b) { if(a.channel != b.channel) return a.channel < b.channel; else return a.handle < b.handle; } static bool instance_less(const nv_ramht_entry& a, const nv_ramht_entry& b) { return a.instance < b.instance; } }; struct nv_region { char* ptr; size_t size; nv_region() : ptr(0), size(0) {} nv_region(char* ptr, size_t size) : ptr(ptr), size(size) {} uint32_t rd32(uint32_t off) const { return *(volatile uint32_t*)(ptr + off); } void wr32(uint32_t off, uint32_t value) const { *(volatile uint32_t*)(ptr + off) = value; } int offset_in(const nv_region& container) const { if(container.ptr > ptr) return -1; if((ptr + size) > (container.ptr + container.size)) return -1; return ptr - container.ptr; } }; struct nv_device : public nv_region { struct nv_ramin : public nv_region { struct nv_device* dev; nv_ramin(struct nv_device* dev) : dev(dev) { /* map larger RAMIN aperture on NV40 cards */ ptr = NULL; if (dev->card_type >= NV_40) { int ramin_bar = 2; if (dev->pci->regions[ramin_bar].size == 0) ramin_bar = 3; size = dev->pci->regions[ramin_bar].size; pci_device_map_range(dev->pci, dev->pci->regions[ramin_bar].base_addr, dev->pci->regions[ramin_bar].size, PCI_DEV_MAP_FLAG_WRITABLE, (void**)&ptr); if (!ptr) throw std::runtime_error("Failed to init RAMIN mapping"); } /* On older cards (or if the above failed), create a map covering * the BAR0 PRAMIN aperture */ if (!ptr) { size = 1 * 1024 * 1024; ptr = dev->ptr + NV_RAMIN; } } }; struct nv_ramht : public nv_region { struct nv_ramin* ramin; unsigned bits; unsigned entries; unsigned search_shift; // TODO: what is this exactly? nv_ramht(struct nv_ramin* ramin) : ramin(ramin) { uint32_t reg = ramin->dev->rd32(NV_PFIFO_RAMHT); bits = ((reg >> 16) & 0xf) + 9; ptr = ramin->ptr + ((reg & 0xffff) << 8); entries = 1 << bits; size = entries * 8; search_shift = (reg >> 24) + 4; } uint32_t hash_handle(int channel, uint32_t handle) { uint32_t hash = 0; int i; for (i = 32; i > 0; i -= bits) { hash ^= (handle & ((1 << bits) - 1)); handle >>= bits; } if (ramin->dev->card_type < NV_50) hash ^= channel << (bits - 4); hash <<= 3; return hash; } int find(unsigned channel, uint32_t handle, nv_ramht_entry& entry) { unsigned start = hash_handle(channel, handle); unsigned i = start; do { entry = get_at(i); if(entry.valid) { if(entry.channel == channel && entry.handle == handle) return i; } else return i; ++i; if(i == entries) i = 0; } while(i != start); return -1; } nv_ramht_entry get_at(unsigned i) { nv_ramht_entry entry; uint32_t ctx; entry.handle = rd32(i * 8); ctx = rd32(i * 8 + 4); if(ramin->dev->card_type < NV_40) { entry.valid = (ctx & (1 << 31)) != 0; entry.engine = (ctx >> 16) & 3; entry.channel = (ctx >> 24) & (ramin->dev->channels - 1); entry.instance = (ctx & 0xffff) << 4; } else if(ramin->dev->card_type < NV_50) { entry.valid = ctx != 0; entry.engine = (ctx >> 20) & 3; entry.channel = (ctx >> 23) & (ramin->dev->channels - 1);; entry.instance = (ctx & 0xfffff) << 4; } else { entry.valid = ctx != 0; if((ctx & 3) == 2) { entry.instance = (ctx & 0xfffff) >> 10; entry.engine = 2; } else { entry.instance = (ctx & 0xfffff) << 4; entry.engine = (ctx >> 20) & 3; } entry.channel = -1; /* TODO: set to the ramht channel */ } return entry; } void set_at(unsigned i, const nv_ramht_entry& entry) { uint32_t ctx; wr32(i * 8, entry.handle); if(ramin->dev->card_type < NV_40) ctx = (entry.instance >> 4) | (entry.engine << 16) | (entry.channel << 23); else if(ramin->dev->card_type < NV_50) ctx = (entry.instance >> 4) | (entry.engine << 20) | (entry.channel << 24); else if(entry.engine == 2) ctx = (entry.instance << 10) | 2; else ctx = (entry.instance >> 4) | (entry.engine << 20); wr32(i * 8 + 4, ctx); } void clear_at(unsigned i) { wr32(i * 8, 0); wr32(i * 8 + 4, 0); } void insert(const nv_ramht_entry& entry) { nv_ramht_entry cur; int i = find(entry.channel, entry.handle, cur); if(i < 0) throw std::runtime_error("ramht table full!"); set_at(i, entry); } void remove(unsigned channel, uint32_t handle) { nv_ramht_entry entry; int i = find(channel, handle, entry); if(i >= 0 && entry.valid) clear_at(i); } }; struct nv_object : public nv_region { enum nv_object_type { grobj, dma_paged, dma_linear }; enum nv_target { vram, vram_tiled, pci, gart }; struct nv_ramin* ramin; nv_object_type type; uint32_t tag; int64_t dma_base; int64_t dma_limit; bool dma_present; nv_target dma_target; static const char* dma_target_str(nv_target dma_target) { const char* dma_target_strs[4] = {"VRAM", "VRAM_TILED", "PCI", "AGP"}; return dma_target_strs[dma_target]; } nv_object(struct nv_ramin* ramin, uint32_t offset) : ramin(ramin) { assert (ramin->dev->card_type < NV_50); ptr = ramin->ptr + offset; tag = rd32(0); dma_base = -1; dma_limit = -1; unsigned objclass = tag & 0xff; if((objclass == 0x3d || objclass == 2 || objclass == 3)) { if(ramin->dev->card_type < NV_50) { type = (tag & (1 << 13)) ? dma_linear : dma_paged; dma_limit = rd32(4); dma_target = (nv_target)((tag >> 16) & 3); dma_present = !!(tag & (1 << 12)); if(!dma_present) /* TODO: guess */ size = 8; else if(type == dma_linear) { dma_base = (rd32(8) & ~0xfff) | (tag & 0xff000000); size = 12; } else size = (((dma_limit + 0xfff) >> 12) + 2) * 4; } else { /* TODO: nv50 non-linear ctxdmas? flags? */ unsigned v3 = rd32(12); dma_limit = rd32(4) | ((uint64_t)(v3 & 0xff000000) << 8); dma_base = rd32(8) | ((uint64_t)(v3 & 0xff) << 32); size = 24; // TODO: this is almost surely incomplete type = dma_linear; dma_present = true; if(tag & 0xf0000) dma_target = vram; else dma_target = gart; } } else { type = grobj; /*XXX: dodgy hack for now */ if (ramin->dev->card_type >= NV_50) size = 24; else if (ramin->dev->card_type >= NV_40) size = 32; else size = 16; } } uint64_t dma_to_linear(uint32_t off) const { uint32_t size; uint64_t addr; assert(type != grobj); if (ramin->dev->card_type < NV_50) { size = dma_limit + 1; if(off >= size) return ~0ULL; if(!dma_present) return ~0ULL; if(type == dma_paged) { uint64_t pte = rd_pte(off >> 12); addr += (pte & ~0xfff) + (off & 0xfff); } else if(type == dma_linear) return dma_base + off; else assert(0); } else { assert(type == dma_linear); return dma_base + off; } return addr; } /* TODO: this probably should be redesigned */ uint64_t dma_to_phys(uint32_t off) const { uint64_t addr; if (ramin->dev->card_type < NV_50) { uint64_t addr = dma_to_linear(off); if(addr == ~0ULL) return addr; if(dma_target == NV_DMA_TARGET_VIDMEM) { if(addr >= ramin->dev->vram_mappable_size) return ~0ULL; else return addr + ramin->dev->vram_phys; } else if(dma_target == NV_DMA_TARGET_PCI) return addr; else if(dma_target == NV_DMA_TARGET_AGP) throw std::runtime_error("AGP not implemented"); else assert(0); } else /* TODO: VM... */ assert(0); return addr; } unsigned num_ptes() { assert(type == dma_paged); return (size >> 2) - 2; } uint64_t rd_pte(unsigned i) const { assert(type == dma_paged); return rd32(8 + i * 4) | (tag & 0xff000000); } void wr_pte_present_wr(unsigned i, uint64_t offset) const { assert(type == dma_paged); wr32(8 + i * 4, offset | 3); } void print(std::ostream& out) const { boost::io::ios_all_saver ias(out); if(type == grobj) { out << "GR"; for(unsigned i = 0; i < size; i += 4) out << ' ' << hex08 << rd32(i); } else { out << dma_target_str(dma_target) << ' ' << hex08 << tag << ' ' << hex08 << dma_limit; if(type == dma_linear) out << " -> " << hex08 << dma_base; } } friend std::ostream& operator <<(std::ostream& out, const nv_object& obj) { obj.print(out); return out; } }; struct nv_ramfc : public nv_region { struct nv_fc : public nv_region { nv_ramfc* ramfc; nv_fc(nv_ramfc* ramfc, unsigned channel) : ramfc(ramfc) { ptr = ramfc->ptr + ramfc->fc_size * channel; size = ramfc->fc_size; } }; struct nv_ramin* ramin; unsigned fc_size; unsigned fc_fifo; int fc_grctx; struct nv_fc* fc[128]; nv_ramfc(struct nv_ramin* ramin) : ramin(ramin) { if (ramin->dev->chipset >= 0x40) fc_size = 128; else if (ramin->dev->chipset >= 0x17) fc_size = 64; else fc_size = 32; if(ramin->dev->card_type >= NV_50) assert(0); else if(ramin->dev->card_type >= NV_40) ptr = ramin->ptr + 0x20000; else ptr = ramin->ptr + 0x11400; size = fc_size * ramin->dev->channels; if(ramin->dev->card_type < NV_10) fc_fifo = 8; else if(ramin->dev->card_type < NV_50) fc_fifo = 12; else fc_fifo = 0x48; if(ramin->dev->card_type == NV_40) fc_grctx = 56; else fc_grctx = -1; for(unsigned i = 0; i < ramin->dev->channels; ++i) fc[i] = new nv_fc(this, i); } }; typedef nv_ramfc::nv_fc nv_fc; struct nv_users : public nv_region { struct nv_user : public nv_region { nv_users* users; nv_user(nv_users* users, unsigned channel) : users(users) { ptr = users->ptr + users->user_size * channel; size = users->user_size; } }; nv_device* dev; unsigned user_size; nv_user* user[128]; nv_users(nv_device* dev) { if (dev->card_type < NV_40) { ptr = dev->ptr + NV03_USER(0); user_size = NV03_USER_SIZE; } else if (dev->card_type < NV_50) { ptr = dev->ptr + NV40_USER(0); user_size = NV40_USER_SIZE; } else { ptr = dev->ptr + NV50_USER(0); user_size = NV50_USER_SIZE; } size = user_size * dev->channels; for(unsigned i = 0; i < dev->channels; ++i) user[i] = new nv_user(this, i); } }; typedef nv_users::nv_user nv_user; struct nv_ramro : public nv_region { nv_ramro(nv_ramin* ramin) { ptr = ramin->ptr + (ramin->dev->rd32(NV03_PFIFO_RAMRO) << 8); size = 512; } }; struct nv20_grctx_table : public nv_region { nv20_grctx_table(nv_ramin* ramin) { ptr = ramin->ptr + (ramin->dev->rd32(NV20_PGRAPH_CHANNEL_CTX_TABLE) << 4); size = 4 * 32; } }; struct pci_device* pci; uint64_t vram_phys; uint64_t vram_total_size; uint64_t vram_mappable_size; unsigned chipset; enum nouveau_card_type card_type; unsigned channels; int grctx_grclasses; unsigned grctx_size; std::unique_ptr ramin; std::unique_ptr ramht; std::unique_ptr ramfc; std::unique_ptr ramro; std::unique_ptr users; std::unique_ptr grctx_table; nv_device(const char* pciname) { if(pciname) { unsigned domain, bus, devid, func; if(sscanf(pciname, "%x:%x:%x.%x", &domain, &bus, &devid, &func) < 4) throw std::runtime_error("unable to parse PCI name"); pci = pci_device_find_by_slot(domain, bus, devid, func); if(!pci) throw std::runtime_error("unable to find PCI device"); if(pci->vendor_id != 0x10de) throw std::runtime_error("not an nVidia card"); } else { struct pci_id_match match; memset(&match, 0, sizeof(match)); match.vendor_id = 0x10de; match.subvendor_id = PCI_MATCH_ANY; match.device_id = PCI_MATCH_ANY; match.subdevice_id = PCI_MATCH_ANY; struct pci_device_iterator *iter = pci_id_match_iterator_create(&match); pci = pci_device_next(iter); if(!pci) throw std::runtime_error("cannot find any nVidia card"); struct pci_device* second = pci_device_next(iter); if(second) throw std::runtime_error("more than one nVidia card: specify the desired one explicitly"); pci_iterator_destroy(iter); } pci_device_probe(pci); vram_phys = pci->regions[1].base_addr; size = pci->regions[0].size; pci_device_map_range(pci, pci->regions[0].base_addr, pci->regions[0].size, PCI_DEV_MAP_FLAG_WRITABLE, (void**)&ptr); uint32_t reg0 = rd32(NV03_PMC_BOOT_0); /* We're dealing with >=NV10 */ if ((reg0 & 0x0f000000) > 0) { /* Bit 27-20 contain the architecture in hex */ chipset = (reg0 & 0xff00000) >> 20; /* NV04 or NV05 */ } else if ((reg0 & 0xff00fff0) == 0x20004000) { if (reg0 & 0x00f00000) chipset = 0x05; else chipset = 0x04; } else chipset = 0xff; switch (chipset & 0xf0) { case 0x00: case 0x10: case 0x20: case 0x30: card_type = (nouveau_card_type)(chipset & 0xf0); break; case 0x40: case 0x60: card_type = NV_40; break; case 0x50: case 0x80: case 0x90: case 0xa0: card_type = NV_50; break; default: std::ostringstream ss; ss << "Unsupported chipset 0x" << std::hex << reg0; throw std::runtime_error(ss.str()); } if(card_type >= NV_50) channels = 128; else if(card_type >= NV_10) channels = 32; else channels = 16; vram_total_size = mem_fb_amount(); vram_mappable_size = vram_total_size; if(vram_mappable_size < pci->regions[1].size) vram_mappable_size = pci->regions[1].size; ramin.reset(new nv_ramin(this)); ramfc.reset(new nv_ramfc(&*ramin)); ramht.reset(new nv_ramht(&*ramin)); ramro.reset(new nv_ramro(&*ramin)); users.reset(new nv_users(this)); if(card_type == NV_20 || card_type == NV_30) grctx_table.reset(new nv20_grctx_table(&*ramin)); init_grctx_info(); grctx_grclasses = -1; if(card_type == NV_40) { // TODO: parse ctxprogs to find out if(os->nv_driver == NV_DRIVER_NOUVEAU) grctx_grclasses = 0x40; } else if(card_type == NV_30) grctx_grclasses = 0x40; } static nv_device* open_default() { return new nv_device(getenv("NV_DEVICE")); } bool is_channel_enabled(unsigned channel) { if(card_type < NV_50) return !!(rd32(NV04_PFIFO_MODE) & (1 << channel)); else return !!(rd32(NV50_PFIFO_CTX_TABLE(channel)) & NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED); } uint32_t rd_get(unsigned channel) { return users->user[channel]->rd32(0x44); } void wr_get(unsigned channel, uint32_t value) { users->user[channel]->wr32(0x44, value); } uint32_t rd_put(unsigned channel) { /* user put always reads as 0 pre-nv40 */ if(card_type < NV_40) return ramfc->fc[channel]->rd32(0); // TODO: what if the channel is executing right now? else return users->user[channel]->rd32(0x40); } void wr_put(unsigned channel, uint32_t value) { users->user[channel]->wr32(0x40, value); } uint32_t get_grctx(unsigned channel) { // TODO: nv50 is at "hdr" in channel-private RAMIN if(ramfc->fc_grctx >= 0) /* NV40 */ return ramfc->fc[channel]->rd32(ramfc->fc_grctx) << 4; else if(grctx_table) /* NV20-NV30 */ return grctx_table->rd32(channel * 4) << 4; else return 0; } bool get_grclasses(unsigned channel, unsigned grclasses[8]) { if(os->nv_driver == NV_DRIVER_NVIDIA) { memcpy(grclasses, nvidia_grclasses + 8 * card_type, 8 * sizeof(unsigned)); } else { if(!grctx_grclasses) return false; uint32_t grctx = get_grctx(channel); if(!grctx) return false; for(unsigned i = 0; i < 8; ++i) grclasses[i] = ramin->rd32(grctx + grctx_grclasses + i * 4) & 0xffff; } return true; } private: void init_grctx_info() { if(card_type == NV_50) grctx_size = 0x70000; else if(card_type == NV_40) grctx_size = 175 * 1024; else { switch (chipset) { case 0x20: grctx_size = NV20_GRCTX_SIZE; //ctx_init = nv20_graph_context_init; //idoffs = 0; break; case 0x25: case 0x28: grctx_size = NV25_GRCTX_SIZE; //ctx_init = nv25_graph_context_init; break; case 0x2a: grctx_size = NV2A_GRCTX_SIZE; //ctx_init = nv2a_graph_context_init; //idoffs = 0; break; case 0x30: case 0x31: grctx_size = NV30_31_GRCTX_SIZE; //ctx_init = nv30_31_graph_context_init; break; case 0x34: grctx_size = NV34_GRCTX_SIZE; //ctx_init = nv34_graph_context_init; break; case 0x35: case 0x36: grctx_size = NV35_36_GRCTX_SIZE; //ctx_init = nv35_36_graph_context_init; break; default: grctx_size = 0; } } } uint64_t mem_fb_amount() { uint32_t boot0; switch (card_type) { case NV_04: boot0 = rd32(NV03_BOOT_0); if (boot0 & 0x00000100) return (((boot0 >> 12) & 0xf) * 2 + 2) * 1024 * 1024; switch (boot0 & NV03_BOOT_0_RAM_AMOUNT) { case NV04_BOOT_0_RAM_AMOUNT_32MB: return 32 * 1024 * 1024; case NV04_BOOT_0_RAM_AMOUNT_16MB: return 16 * 1024 * 1024; case NV04_BOOT_0_RAM_AMOUNT_8MB: return 8 * 1024 * 1024; case NV04_BOOT_0_RAM_AMOUNT_4MB: return 4 * 1024 * 1024; } break; case NV_10: case NV_20: case NV_30: case NV_40: case NV_50: default: // TODO: support nforce/nforce2 uint64_t mem; mem = (rd32(NV04_FIFO_DATA) & NV10_FIFO_DATA_RAM_AMOUNT_MB_MASK) >> NV10_FIFO_DATA_RAM_AMOUNT_MB_SHIFT; return mem * 1024 * 1024; } return 0; } }; typedef nv_device::nv_ramht nv_ramht; typedef nv_device::nv_ramin nv_ramin; typedef nv_device::nv_ramfc nv_ramfc; typedef nv_device::nv_fc nv_fc; typedef nv_device::nv_users nv_users; typedef nv_device::nv_user nv_user; typedef nv_device::nv_object nv_object; std::ostream& operator <<(std::ostream& out, const std::pair& deventry) { nv_device* dev = deventry.first; const nv_ramht_entry& entry = deventry.second; boost::io::ios_all_saver ias(out); out << (unsigned)entry.channel << ':' << hex08 << entry.handle << " @ " << hex08 << entry.instance << ": "; nv_object obj(&*dev->ramin, entry.instance); out << obj; return out; } int nv_find_idle_channel(struct nv_device* dev) { bool enabled[128]; unsigned gets1[128]; unsigned puts1[128]; unsigned gets2[128]; unsigned puts2[128]; int idle = -1; for(unsigned i = 0; i < dev->channels; ++i) { if(i == 0 || i == 30 || i == 127) enabled[i] = 0; else enabled[i] = dev->is_channel_enabled(i); if(enabled[i]) { gets1[i] = dev->rd_get(i); puts1[i] = dev->rd_put(i); } } sleep(1); for(unsigned i = 0; i < dev->channels; ++i) { if(enabled[i]) { enabled[i] = dev->is_channel_enabled(i); if(enabled[i]) { gets2[i] = dev->rd_get(i); puts2[i] = dev->rd_put(i); } } } for(unsigned i = 0; i < dev->channels; ++i) { if(enabled[i] && gets1[i] && gets1[i] == puts1[i] && gets2[i] == puts2[i] && gets1[i] == gets2[i]) { idle = i; } } return idle; } bool nv_find_vram_ramht_entry(struct nv_device* dev, int channel, nv_ramht_entry& ret_entry) { int64_t best_limit = -1; for(unsigned i = 0; i < dev->ramht->entries; ++i) { nv_ramht_entry entry = dev->ramht->get_at(i); if(!entry.valid) continue; if(channel >= 0 && entry.channel != channel) continue; nv_object obj(&*dev->ramin, entry.instance); if(obj.type == nv_object::dma_linear && obj.dma_target == nv_object::vram && obj.dma_base == 0 && obj.dma_limit > best_limit) { best_limit = obj.dma_limit; ret_entry = entry; } } return !!best_limit; } struct nv_channel { struct nv_device* dev; int channel; nv_channel(struct nv_device* dev, int channel) : dev(dev), channel(channel) {} void out(uint32_t v) { outp(&v, 1); } virtual void outp(void* buf, int dwords) = 0; virtual void fire() = 0; virtual void wait_idle() = 0; virtual void wait(size_t size) = 0; }; struct nv_channel_direct : public nv_channel { nv_channel_direct(struct nv_device* dev, int channel) : nv_channel(dev, channel) { } virtual void wait_idle() { uint32_t get, put; put = dev->rd_put(channel); while(get != put) get = dev->rd_get(channel); } }; struct nv_channel_vram : public nv_channel_direct { bool taken; uint32_t orig_ctxdma; uint32_t orig_getput; uint32_t vram_handle; uint32_t vram_ctxdma; uint32_t our_put; uint32_t put; nv_channel_vram(struct nv_device* dev, int channel) : nv_channel_direct(dev, channel) { nv_ramht_entry entry; taken = false; if(!nv_find_vram_ramht_entry(dev, channel, entry)) throw std::runtime_error("Unable to find a vram ctxdma"); vram_ctxdma = entry.instance; vram_handle = entry.handle; /* the middle of vram is hopefully away from anything critical */ our_put = put = dev->vram_mappable_size / 2; } virtual void outp(void* buf, int dwords) { os->memcpy_to_phys(dev->vram_phys + our_put, (const char*)buf, dwords * 4); our_put += dwords * 4; } virtual void fire() { if(put != our_put) { if(!taken) { wait_idle(); orig_getput = dev->rd_put(channel); orig_ctxdma = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4; if(dev->card_type < NV_40) orig_ctxdma &= 0xfffff; dev->ramfc->fc[channel]->wr32(dev->ramfc->fc_fifo, vram_ctxdma >> 4); dev->ramfc->fc[channel]->wr32(0, put); dev->ramfc->fc[channel]->wr32(4, put); //dev->wr_get(channel, put); dev->wr_put(channel, put); taken = true; } put = our_put; dev->wr_put(channel, put); } } virtual void wait(size_t s) { } }; struct nv_channel_runner { virtual void run() = 0; }; struct nv_channel_parasite : public nv_channel_direct { std::unique_ptr dmaobj; uint32_t dmactx; uint32_t put; uint32_t our_put; uint32_t orig_getput; nv_channel_runner* runner; nv_channel_parasite(struct nv_device* dev, int channel, nv_channel_runner* runner) : nv_channel_direct(dev, channel), runner(runner) { uint32_t dmactx = dev->ramfc->fc[channel]->rd32(dev->ramfc->fc_fifo) << 4; dmaobj.reset(new nv_object(&*dev->ramin, dmactx)); orig_getput = our_put = put = dev->rd_put(channel); } virtual void outp(void* buf, int dwords) { // std::cout << "put = " << std::hex << our_put << std::endl; uint64_t phys = dmaobj->dma_to_phys(our_put); os->memcpy_to_phys(phys, buf, dwords * 4); our_put += dwords * 4; } virtual void fire() { if(put != our_put) { put = our_put; dev->wr_put(channel, put); } } virtual void wait(size_t dwords) { assert(dwords < 0x200); for(;;) { unsigned left = 0x1000 - (our_put & 0xfff); if(left >= (dwords * 4)) break; fire(); wait_idle(); dev->wr_get(channel, orig_getput); dev->wr_put(channel, orig_getput); runner->run(); orig_getput = our_put = put = dev->rd_put(channel); } } }; /* Channel implementation based on stealing the channel of a spawned GL process */ struct nv_gl_process : public nv_channel_runner { pid_t pid; nv_gl_process(struct nv_device* dev, const char* name = "glxgears") { pid = fork(); if(!pid) { int devnull = open("/dev/null", O_RDWR); dup2(devnull, 1); dup2(devnull, 2); close(devnull); execlp(name, name, NULL); exit(1); } try { std::cerr << "Please wait a few seconds while we start the helper GL program..." << std::endl; sleep(2); kill(pid, SIGSTOP); sleep(1); } catch(...) { kill(pid, SIGKILL); throw; } } ~nv_gl_process() { kill(pid, SIGKILL); kill(pid, SIGCONT); } virtual void run() { kill(pid, SIGCONT); sched_yield(); kill(pid, SIGSTOP); } }; #define RING(subc, mthd, size) (((subc) << 13) | ((size) << 18) | (mthd)) #endif /* NVLIB_H_ */