diff options
Diffstat (limited to 'moche.c')
-rw-r--r-- | moche.c | 537 |
1 files changed, 537 insertions, 0 deletions
@@ -0,0 +1,537 @@ +/* + * Copyright 2018 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Authors: Jérôme Glisse <jglisse@redhat.com> + */ +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <strings.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> + +#include "xf86drm.h" +#include "libdrm/nouveau/nouveau.h" +#include "nvif/class.h" +#include "nvif/cl0080.h" +#include "nvif/if0008.h" +#include "nvif/if000c.h" + +struct nvk_00f0_cp_desc { + uint32_t unk0[8]; + uint32_t entry; + uint32_t unk9[2]; + uint32_t unk11_0 : 30; + uint32_t linked_tsc : 1; + uint32_t unk11_31 : 1; + uint32_t griddim_x : 31; + uint32_t unk12 : 1; + uint16_t griddim_y; + uint16_t unk13; + uint16_t griddim_z; + uint16_t unk14; + uint32_t unk15[2]; + uint32_t shared_size : 18; + uint32_t unk17 : 14; + uint16_t unk18; + uint16_t blockdim_x; + uint16_t blockdim_y; + uint16_t blockdim_z; + uint32_t cb_mask : 8; + uint32_t unk20 : 24; + uint32_t unk21[8]; + uint32_t local_size_p : 24; + uint32_t unk29 : 3; + uint32_t bar_alloc : 5; + uint32_t local_size_n : 24; + uint32_t gpr_alloc : 8; + uint32_t cstack_size : 24; + uint32_t unk31 : 8; + struct { + uint32_t address_l; + uint32_t address_h : 17; + uint32_t reserved : 2; + uint32_t size_sh4 : 13; + } cb[8]; + uint32_t unk48[16]; +}; + + +typedef struct { + struct nouveau_client *nvclient; + struct nouveau_device *nvdevice; + struct nouveau_object *nvchannel; + struct nouveau_object *nvcompute; + struct nouveau_pushbuf *nvpushbuf; + struct nouveau_drm *nvdrm; + void *hole; + int fd; +} moche_t; + +#define DRM_FILE_PAGE_OFFSET (0x100000000ULL) + +int moche_init_hmm(moche_t *moche) +{ + moche->hole = mmap((void *)(1UL << 30), (2UL << 30), PROT_NONE, + MAP_PRIVATE, moche->fd, DRM_FILE_PAGE_OFFSET); + printf("hmm init %p\n", moche->hole); + return 0; +} + +int moche_init(moche_t *moche) +{ + int r; + + moche->fd = drmOpen("nouveau", NULL); + if (moche->fd < 0) { + printf("EE: failed to open nouveau device file (%d)\n", moche->fd); + return -1; + } + + if ((r = nouveau_drm_new(moche->fd, &moche->nvdrm))) { + printf("EE: failed to create nouveau drm (%d)\n", r); + return r; + } + + { + struct nv_device_v0 arg = {0}; + uint32_t oclass, size; + void *data; + + arg.device = ~0ULL; + oclass = NV_DEVICE; + size = sizeof(arg); + data = &arg; + if ((r = nouveau_device_new(&moche->nvdrm->client, oclass, data, + size, &moche->nvdevice))) { + printf("EE: failed to create nouveau device (%d)\n", r); + goto nouveau_device; + } + } + + if ((r = nouveau_client_new(moche->nvdevice, &moche->nvclient))) { + printf("EE: failed to create nouveau client (%d)\n", r); + goto nouveau_client; + } + + if ((r = moche_init_hmm(moche))) { + printf("EE: failed to enbable HMM (%d)\n", r); + goto nouveau_hmm; + } + + { + struct nve0_fifo arg = {0}; + uint32_t oclass, size; + void *data; + + oclass = NOUVEAU_FIFO_CHANNEL_CLASS; + arg.engine = 0x01 | 0x10 | 0x20; // NVA06F_V0_ENGINE_CE0 | NVA06F_V0_ENGINE_CE1 + size = sizeof(arg); + data = &arg; + if ((r = nouveau_object_new(&moche->nvdevice->object, 0, oclass, + data, size, &moche->nvchannel))) { + printf("EE: failed to create nouveau channel (%d)\n", r); + goto nouveau_channel; + } + } + + { + if ((r = nouveau_pushbuf_new(moche->nvclient, moche->nvchannel, 2, + 64 * 1024, 1, &moche->nvpushbuf))) { + printf("EE: failed to create nouveau pushbuf (%d)\n", r); + goto nouveau_pushbuf; + } + } + + { + if ((r = nouveau_object_new(moche->nvchannel, 0xcafec1c0, + 0xc1c0, NULL, 0, &moche->nvcompute))) { + printf("EE: failed to create nouveau channel (%d)\n", r); + goto nouveau_compute; + } + } + + return 0; + +nouveau_compute: +nouveau_pushbuf: + nouveau_object_del(&moche->nvchannel); +nouveau_channel: +nouveau_hmm: + nouveau_client_del(&moche->nvclient); +nouveau_client: + nouveau_device_del(&moche->nvdevice); +nouveau_device: + nouveau_drm_del(&moche->nvdrm); + return r; +} + +void moche_fini(moche_t *moche) +{ + nouveau_object_del(&moche->nvcompute); + nouveau_pushbuf_del(&moche->nvpushbuf); + nouveau_object_del(&moche->nvchannel); + nouveau_client_del(&moche->nvclient); + nouveau_device_del(&moche->nvdevice); + nouveau_drm_del(&moche->nvdrm); +} + +static inline void moche_push_data(moche_t *moche, uint32_t data) +{ + *moche->nvpushbuf->cur++ = data; +} + +static inline int moche_push_kick(moche_t *moche) +{ + return nouveau_pushbuf_kick(moche->nvpushbuf, moche->nvchannel); +} + +static inline void moche_push_refn(moche_t *moche, + struct nouveau_bo *bo, uint32_t flags) +{ + struct nouveau_pushbuf_refn ref = { bo, flags }; + + nouveau_pushbuf_refn(moche->nvpushbuf, &ref, 1); +} + +static inline uint32_t nvk_sq_cmd(unsigned subc, unsigned method, unsigned len) +{ + return ((method >> 2) & 0x1fff) | + ((len & 0xfff) << 16) | + ((subc & 0x7) << 13) | + (0x1 << 29); +} + +static inline uint32_t nvk_ni_cmd(unsigned subc, unsigned method, unsigned len) +{ + return ((method >> 2) & 0x1fff) | + ((len & 0xfff) << 16) | + ((subc & 0x7) << 13) | + (0x3 << 29); +} + +static inline uint32_t nvk_addr_high(uint64_t offset) +{ + return (offset >> 32) & 0xffffffff; +} + +static inline uint32_t nvk_addr_low(uint64_t offset) +{ + return offset & 0xffffffff; +} + +static inline uint32_t nvk_size_high(uint64_t offset) +{ + return (offset >> 32) & 0xffffffff; +} + +static inline uint32_t nvk_size_low(uint64_t offset) +{ + return offset & 0xffffffff; +} + +int moche_pushbuf_test(moche_t *moche) +{ + struct nouveau_bo *bo; + uint32_t *ptr; + int r, i; + + if ((r = nouveau_bo_new(moche->nvdevice, NOUVEAU_BO_GART, + 0, 4 * 1024, NULL, &bo))) { + printf("EE: failed to create nouveau bo (%d)\n", r); + goto nouveau_bo_new; + } + if ((r = nouveau_bo_map(bo, NOUVEAU_BO_WR, moche->nvclient))) { + printf("EE: failed to map nouveau bo (%d)\n", r); + goto nouveau_bo_map; + } + + ptr = bo->map; + ptr[0] = 0xcafedead; + + if ((r = nouveau_pushbuf_space(moche->nvpushbuf, 9, 1, 0))) { + printf("EE: pushbuf full (%d)\n", r); + goto nouveau_bo_new; + } + moche_push_refn(moche, bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + moche_push_data(moche, nvk_sq_cmd(1, 0x0000, 1)); + moche_push_data(moche, moche->nvcompute->oclass); + moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1)); + moche_push_data(moche, 0x00000000); +#if 1 + moche_push_data(moche, nvk_sq_cmd(1, 0x1b00, 4)); + moche_push_data(moche, nvk_addr_high(bo->offset)); + moche_push_data(moche, nvk_addr_low(bo->offset)); + moche_push_data(moche, 0xdeadcafe); + moche_push_data(moche, 0x00000000); +#else + moche_push_data(moche, nvk_sq_cmd(1, 0x0010, 4)); + moche_push_data(moche, nvk_addr_high(bo->offset)); + moche_push_data(moche, nvk_addr_low(bo->offset)); + moche_push_data(moche, 0xdeadcafe); + moche_push_data(moche, 0x00000002); +#endif + + if ((r = moche_push_kick(moche))) { + printf("EE: pushbuf kick (%d)\n", r); + goto nouveau_push_kick; + } + + for (i = 0; i < 10 && ptr[0] != 0xdeadcafe; ++i) { + nouveau_bo_wait(bo, NOUVEAU_BO_RD, moche->nvclient); + } + if (ptr[0] == 0xdeadcafe) { + printf("OK: pusbuf test 0x%08x\n", ptr[0]); + } else { + printf("EE: pusbuf test 0x%08x\n", ptr[0]); + } + +nouveau_push_kick: +nouveau_bo_map: + nouveau_bo_ref(NULL, &bo); +nouveau_bo_new: + return r; +} + +int moche_compute_test(moche_t *moche, uint64_t doffset, + uint32_t *rptr, unsigned nelem, + struct nouveau_bo *bor) +{ + uint64_t code[] = { + //testtx: + // sched 0x7f1 0x207f9 0x7f9 + // mov $r2 $tid.x + // lea 0x1 cc $r0 $r2 c0[0x0] 0x2 + // lea hi x 0x1 $r1 $r2 c0[0x4] 0x0 0x2 + // sched 0x7f9 0x7f5 0x7ff + // stg e b32 ncg[$r0] $r2 + // exit + // exit + 0x001fe440ff2007f1ul, + 0xf0c8000002170002ul, + 0x4bd7810000070200ul, + 0x1a177f8000170201ul, + 0x001ffc00fea007f9ul, + 0xeedc200000070002ul, + 0xe30000000007000ful, + 0xe30000000007000ful, + }; + struct nouveau_bo *bom, *tls; + struct nvk_00f0_cp_desc *desc; + uint32_t *mptr; + int r, i, tls_size; + + tls_size = 16 << 20; + if ((r = nouveau_bo_new(moche->nvdevice, NOUVEAU_BO_GART, + 0, tls_size, NULL, &tls))) { + printf("EE: failed to create nouveau bo (%d)\n", r); + goto nouveau_tls_new; + } + if ((r = nouveau_bo_new(moche->nvdevice, NOUVEAU_BO_GART, + 0, 64 * 1024, NULL, &bom))) { + printf("EE: failed to create nouveau bo (%d)\n", r); + goto nouveau_bom_new; + } + if ((r = nouveau_bo_map(bom, NOUVEAU_BO_WR, moche->nvclient))) { + printf("EE: failed to map nouveau bo (%d)\n", r); + goto nouveau_bom_map; + } + + mptr = bom->map; + printf("bom offset 0x%lx\n", (unsigned long)bom->offset); + + // initialize result bo + memset(rptr, 0, nelem * 4); + + // fence + mptr[0] = 0xcafedead; + memcpy(&mptr[1024], code, sizeof(code)); + + // param + mptr[64] = nvk_addr_low(doffset); + mptr[65] = nvk_addr_high(doffset); + + // compute descriptor + desc = (void *)&mptr[128]; + memset(desc, 0, sizeof(*desc)); + desc->griddim_x = nelem; + desc->griddim_y = 1; + desc->griddim_z = 1; + desc->blockdim_x = 32; + desc->blockdim_y = 1; + desc->blockdim_z = 1; + desc->entry = 0x0; + desc->shared_size = 0; + desc->local_size_p = 0; + desc->bar_alloc = 0; + desc->local_size_n = 0; + desc->gpr_alloc = 8; + desc->cstack_size = 0x1000; + desc->cb_mask = 1; + desc->cb[0].address_h = nvk_addr_high(bom->offset + 64 * 4); + desc->cb[0].address_l = nvk_addr_low(bom->offset + 64 * 4); + desc->cb[0].size_sh4 = 256 >> 4; + desc->unk0[4] = 0x40; + desc->unk11_0 = 0x04014000; + + if ((r = nouveau_pushbuf_space(moche->nvpushbuf, 1024, 3, 0))) { + printf("EE: pushbuf full (%d)\n", r); + goto nouveau_push_space; + } + moche_push_refn(moche, bom, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + if (bor) + moche_push_refn(moche, bor, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + moche_push_refn(moche, tls, NOUVEAU_BO_GART | NOUVEAU_BO_WR); + moche_push_data(moche, nvk_sq_cmd(1, 0x0000, 1)); + moche_push_data(moche, moche->nvcompute->oclass); + moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1)); + moche_push_data(moche, 0x00000000); + + tls_size = tls_size / 16; + moche_push_data(moche, nvk_sq_cmd(1, 0x0790, 2)); + moche_push_data(moche, nvk_addr_high(tls->offset)); + moche_push_data(moche, nvk_addr_low(tls->offset)); + moche_push_data(moche, nvk_sq_cmd(1, 0x02e4, 3)); + moche_push_data(moche, nvk_addr_high(tls_size)); + moche_push_data(moche, nvk_addr_low(tls_size) & ~0x7fff); + moche_push_data(moche, 0x000000ff); + moche_push_data(moche, nvk_sq_cmd(1, 0x02f0, 3)); + moche_push_data(moche, nvk_addr_high(tls_size)); + moche_push_data(moche, nvk_addr_low(tls_size) & ~0x7fff); + moche_push_data(moche, 0x000000ff); + moche_push_data(moche, nvk_sq_cmd(1, 0x077c, 1)); + moche_push_data(moche, 0xff000000); + moche_push_data(moche, nvk_sq_cmd(1, 0x0214, 1)); + moche_push_data(moche, 0xfe000000); + moche_push_data(moche, nvk_sq_cmd(1, 0x1608, 2)); + moche_push_data(moche, nvk_addr_high(bom->offset + 1024 * 4)); + moche_push_data(moche, nvk_addr_low(bom->offset + 1024 * 4)); + moche_push_data(moche, nvk_sq_cmd(1, 0x0310, 1)); + moche_push_data(moche, 0x00000400); + moche_push_data(moche, nvk_ni_cmd(1, 0x0248, 64)); + for (int i = 63; i >= 0; --i) { + moche_push_data(moche, 0x00038000 | i); + } + moche_push_data(moche, nvk_ni_cmd(1, 0x0110, 1)); + moche_push_data(moche, 0x00000000); + moche_push_data(moche, nvk_sq_cmd(1, 0x2608, 1)); + moche_push_data(moche, 0x00000000); + moche_push_data(moche, nvk_sq_cmd(1, 0x1698, 1)); + moche_push_data(moche, 0x00001000); + moche_push_data(moche, nvk_sq_cmd(1, 0x021c, 1)); + moche_push_data(moche, 0x00001017); + moche_push_data(moche, nvk_sq_cmd(1, 0x02b4, 1)); + moche_push_data(moche, (bom->offset + 128 * 4) >> 8); + moche_push_data(moche, nvk_sq_cmd(1, 0x02bc, 1)); + moche_push_data(moche, 0x00000003); + moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1)); + moche_push_data(moche, 0x00000000); + + moche_push_data(moche, nvk_sq_cmd(1, 0x0110, 1)); + moche_push_data(moche, 0x00000000); + moche_push_data(moche, nvk_sq_cmd(1, 0x1b00, 4)); + moche_push_data(moche, nvk_addr_high(bom->offset)); + moche_push_data(moche, nvk_addr_low(bom->offset)); + moche_push_data(moche, 0xdeadcafe); + moche_push_data(moche, 0x00000000); + + if ((r = moche_push_kick(moche))) { + printf("EE: pushbuf kick (%d)\n", r); + goto nouveau_push_kick; + } + + for (i = 0; i < 10 && mptr[0] != 0xdeadcafe; ++i) { + nouveau_bo_wait(bom, NOUVEAU_BO_RD, moche->nvclient); + } + if (mptr[0] == 0xdeadcafe) { + printf("OK: compute test 0x%08x 0x%08x 0x%08x 0x%08x\n", + mptr[0], rptr[0], rptr[1], rptr[nelem - 1]); + } else { + printf("EE: compute test 0x%08x\n", mptr[0]); + } + for (i = 0; i < nelem; ++i) { + if (rptr[i] != i) { + printf("EE: rptr[%d] != %d -> %d\n", i, i, rptr[i]); + break; + } + } + +nouveau_push_kick: +nouveau_push_space: +nouveau_bom_map: + nouveau_bo_ref(NULL, &bom); +nouveau_bom_new: + nouveau_bo_ref(NULL, &tls); +nouveau_tls_new: + return r; +} + +int main(int argc, char *argv[]) +{ + struct nouveau_bo *bor = NULL; + unsigned nelem = 512 * 1024; + moche_t moche = {0}; + int r; + + if ((r = moche_init(&moche))) { + return r; + } + + printf("OK: fd %d ok chipset 0x%08x (press enter to run)\n", + moche.fd, moche.nvdevice->chipset); + getchar(); + + if ((r = moche_pushbuf_test(&moche))) { + return r; + } + + if (0) { + uint32_t *rptr; + + if ((r = nouveau_bo_new(moche.nvdevice, NOUVEAU_BO_GART, + 0, 4 * nelem, NULL, &bor))) { + printf("EE: failed to create nouveau bo (%d)\n", r); + goto nouveau_bor_new; + } + if ((r = nouveau_bo_map(bor, NOUVEAU_BO_WR, moche.nvclient))) { + printf("EE: failed to map nouveau bo (%d)\n", r); + goto nouveau_bor_map; + } + rptr = bor->map; + printf("bor offset 0x%lx\n", (unsigned long)bor->offset); + + if ((r = moche_compute_test(&moche, bor->offset, rptr, nelem, bor))) { + return r; + } + } else { + uint32_t *rptr; + + rptr = malloc(nelem * 4); + printf("bor offset 0x%lx\n", (unsigned long)rptr); + if ((r = moche_compute_test(&moche, (uint64_t)rptr, rptr, nelem, NULL))) { + return r; + } + } + +nouveau_bor_map: + if (bor) + nouveau_bo_ref(NULL, &bor); +nouveau_bor_new: + moche_fini(&moche); + return 0; +} |