summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerome Glisse <jglisse@redhat.com>2012-01-05 22:11:05 -0500
committerDave Airlie <airlied@redhat.com>2012-01-06 09:15:42 +0000
commit721604a15b934f0a8d1909acb8017f029128be2f (patch)
treeac1dc0f837d70616b36c9b57d22eb9678c5e68fc
parent09b4ea47d1041612b101c369969db123ac2c1511 (diff)
drm/radeon: GPU virtual memory support v22
Virtual address space are per drm client (opener of /dev/drm). Client are in charge of virtual address space, they need to map bo into it by calling DRM_RADEON_GEM_VA ioctl. First 16M of virtual address space is reserved by the kernel. Once using 2 level page table we should be able to have a small vram memory footprint for each pt (there would be one pt for all gart, one for all vram and then one first level for each virtual address space). Plan include using the sub allocator for a common vm page table area and using memcpy to copy vm page table in & out. Or use a gart object and copy things in & out using dma. v2: agd5f fixes: - Add vram base offset for vram pages. The GPU physical address of a vram page is FB_OFFSET + page offset. FB_OFFSET is 0 on discrete cards and the physical bus address of the stolen memory on integrated chips. - VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR covers all vmid's >= 1 v3: agd5f: - integrate with the semaphore/multi-ring stuff v4: - rebase on top ttm dma & multi-ring stuff - userspace is now in charge of the address space - no more specific cs vm ioctl, instead cs ioctl has a new chunk v5: - properly handle mem == NULL case from move_notify callback - fix the vm cleanup path v6: - fix update of page table to only happen on valid mem placement v7: - add tlb flush for each vm context - add flags to define mapping property (readable, writeable, snooped) - make ring id implicit from ib->fence->ring, up to each asic callback to then do ring specific scheduling if vm ib scheduling function v8: - add query for ib limit and kernel reserved virtual space - rename vm->size to max_pfn (maximum number of page) - update gem_va ioctl to also allow unmap operation - bump kernel version to allow userspace to query for vm support v9: - rebuild page table only when bind and incrementaly depending on bo referenced by cs and that have been moved - allow virtual address space to grow - use sa allocator for vram page table - return invalid when querying vm limit on non cayman GPU - dump vm fault register on lockup v10: agd5f: - Move the vm schedule_ib callback to a standalone function, remove the callback and use the existing ib_execute callback for VM IBs. v11: - rebase on top of lastest Linus v12: agd5f: - remove spurious backslash - set IB vm_id to 0 in radeon_ib_get() v13: agd5f: - fix handling of RADEON_CHUNK_ID_FLAGS v14: - fix va destruction - fix suspend resume - forbid bo to have several different va in same vm v15: - rebase v16: - cleanup left over of vm init/fini v17: agd5f: - cs checker v18: agd5f: - reworks the CS ioctl to better support multiple rings and VM. Rather than adding a new chunk id for VM, just re-use the IB chunk id and add a new flags for VM mode. Also define additional dwords for the flags chunk id to define the what ring we want to use (gfx, compute, uvd, etc.) and the priority. v19: - fix cs fini in weird case of no ib - semi working flush fix for ni - rebase on top of sa allocator changes v20: agd5f: - further CS ioctl cleanups from Christian's comments v21: agd5f: - integrate CS checker improvements v22: agd5f: - final cleanups for release, only allow VM CS on cayman Signed-off-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c246
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h65
-rw-r--r--drivers/gpu/drm/radeon/ni.c160
-rw-r--r--drivers/gpu/drm/radeon/nid.h2
-rw-r--r--drivers/gpu/drm/radeon/r300.c6
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c8
-rw-r--r--drivers/gpu/drm/radeon/radeon.h123
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c20
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h12
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c253
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c388
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c136
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c47
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c30
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h12
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c1
-rw-r--r--include/drm/radeon_drm.h36
19 files changed, 1480 insertions, 78 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index cd4590aae154..f7442e62c03f 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -520,7 +520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
520 break; 520 break;
521 case DB_Z_INFO: 521 case DB_Z_INFO:
522 track->db_z_info = radeon_get_ib_value(p, idx); 522 track->db_z_info = radeon_get_ib_value(p, idx);
523 if (!p->keep_tiling_flags) { 523 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
524 r = evergreen_cs_packet_next_reloc(p, &reloc); 524 r = evergreen_cs_packet_next_reloc(p, &reloc);
525 if (r) { 525 if (r) {
526 dev_warn(p->dev, "bad SET_CONTEXT_REG " 526 dev_warn(p->dev, "bad SET_CONTEXT_REG "
@@ -649,7 +649,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
649 case CB_COLOR7_INFO: 649 case CB_COLOR7_INFO:
650 tmp = (reg - CB_COLOR0_INFO) / 0x3c; 650 tmp = (reg - CB_COLOR0_INFO) / 0x3c;
651 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 651 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
652 if (!p->keep_tiling_flags) { 652 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
653 r = evergreen_cs_packet_next_reloc(p, &reloc); 653 r = evergreen_cs_packet_next_reloc(p, &reloc);
654 if (r) { 654 if (r) {
655 dev_warn(p->dev, "bad SET_CONTEXT_REG " 655 dev_warn(p->dev, "bad SET_CONTEXT_REG "
@@ -666,7 +666,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
666 case CB_COLOR11_INFO: 666 case CB_COLOR11_INFO:
667 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8; 667 tmp = ((reg - CB_COLOR8_INFO) / 0x1c) + 8;
668 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx); 668 track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
669 if (!p->keep_tiling_flags) { 669 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
670 r = evergreen_cs_packet_next_reloc(p, &reloc); 670 r = evergreen_cs_packet_next_reloc(p, &reloc);
671 if (r) { 671 if (r) {
672 dev_warn(p->dev, "bad SET_CONTEXT_REG " 672 dev_warn(p->dev, "bad SET_CONTEXT_REG "
@@ -1355,7 +1355,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
1355 return -EINVAL; 1355 return -EINVAL;
1356 } 1356 }
1357 ib[idx+1+(i*8)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1357 ib[idx+1+(i*8)+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1358 if (!p->keep_tiling_flags) { 1358 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1359 ib[idx+1+(i*8)+1] |= 1359 ib[idx+1+(i*8)+1] |=
1360 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags)); 1360 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
1361 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) { 1361 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
@@ -1572,3 +1572,241 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
1572 return 0; 1572 return 0;
1573} 1573}
1574 1574
1575/* vm parser */
1576static bool evergreen_vm_reg_valid(u32 reg)
1577{
1578 /* context regs are fine */
1579 if (reg >= 0x28000)
1580 return true;
1581
1582 /* check config regs */
1583 switch (reg) {
1584 case GRBM_GFX_INDEX:
1585 case VGT_VTX_VECT_EJECT_REG:
1586 case VGT_CACHE_INVALIDATION:
1587 case VGT_GS_VERTEX_REUSE:
1588 case VGT_PRIMITIVE_TYPE:
1589 case VGT_INDEX_TYPE:
1590 case VGT_NUM_INDICES:
1591 case VGT_NUM_INSTANCES:
1592 case VGT_COMPUTE_DIM_X:
1593 case VGT_COMPUTE_DIM_Y:
1594 case VGT_COMPUTE_DIM_Z:
1595 case VGT_COMPUTE_START_X:
1596 case VGT_COMPUTE_START_Y:
1597 case VGT_COMPUTE_START_Z:
1598 case VGT_COMPUTE_INDEX:
1599 case VGT_COMPUTE_THREAD_GROUP_SIZE:
1600 case VGT_HS_OFFCHIP_PARAM:
1601 case PA_CL_ENHANCE:
1602 case PA_SU_LINE_STIPPLE_VALUE:
1603 case PA_SC_LINE_STIPPLE_STATE:
1604 case PA_SC_ENHANCE:
1605 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ:
1606 case SQ_DYN_GPR_SIMD_LOCK_EN:
1607 case SQ_CONFIG:
1608 case SQ_GPR_RESOURCE_MGMT_1:
1609 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1:
1610 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2:
1611 case SQ_CONST_MEM_BASE:
1612 case SQ_STATIC_THREAD_MGMT_1:
1613 case SQ_STATIC_THREAD_MGMT_2:
1614 case SQ_STATIC_THREAD_MGMT_3:
1615 case SPI_CONFIG_CNTL:
1616 case SPI_CONFIG_CNTL_1:
1617 case TA_CNTL_AUX:
1618 case DB_DEBUG:
1619 case DB_DEBUG2:
1620 case DB_DEBUG3:
1621 case DB_DEBUG4:
1622 case DB_WATERMARKS:
1623 case TD_PS_BORDER_COLOR_INDEX:
1624 case TD_PS_BORDER_COLOR_RED:
1625 case TD_PS_BORDER_COLOR_GREEN:
1626 case TD_PS_BORDER_COLOR_BLUE:
1627 case TD_PS_BORDER_COLOR_ALPHA:
1628 case TD_VS_BORDER_COLOR_INDEX:
1629 case TD_VS_BORDER_COLOR_RED:
1630 case TD_VS_BORDER_COLOR_GREEN:
1631 case TD_VS_BORDER_COLOR_BLUE:
1632 case TD_VS_BORDER_COLOR_ALPHA:
1633 case TD_GS_BORDER_COLOR_INDEX:
1634 case TD_GS_BORDER_COLOR_RED:
1635 case TD_GS_BORDER_COLOR_GREEN:
1636 case TD_GS_BORDER_COLOR_BLUE:
1637 case TD_GS_BORDER_COLOR_ALPHA:
1638 case TD_HS_BORDER_COLOR_INDEX:
1639 case TD_HS_BORDER_COLOR_RED:
1640 case TD_HS_BORDER_COLOR_GREEN:
1641 case TD_HS_BORDER_COLOR_BLUE:
1642 case TD_HS_BORDER_COLOR_ALPHA:
1643 case TD_LS_BORDER_COLOR_INDEX:
1644 case TD_LS_BORDER_COLOR_RED:
1645 case TD_LS_BORDER_COLOR_GREEN:
1646 case TD_LS_BORDER_COLOR_BLUE:
1647 case TD_LS_BORDER_COLOR_ALPHA:
1648 case TD_CS_BORDER_COLOR_INDEX:
1649 case TD_CS_BORDER_COLOR_RED:
1650 case TD_CS_BORDER_COLOR_GREEN:
1651 case TD_CS_BORDER_COLOR_BLUE:
1652 case TD_CS_BORDER_COLOR_ALPHA:
1653 case SQ_ESGS_RING_SIZE:
1654 case SQ_GSVS_RING_SIZE:
1655 case SQ_ESTMP_RING_SIZE:
1656 case SQ_GSTMP_RING_SIZE:
1657 case SQ_HSTMP_RING_SIZE:
1658 case SQ_LSTMP_RING_SIZE:
1659 case SQ_PSTMP_RING_SIZE:
1660 case SQ_VSTMP_RING_SIZE:
1661 case SQ_ESGS_RING_ITEMSIZE:
1662 case SQ_ESTMP_RING_ITEMSIZE:
1663 case SQ_GSTMP_RING_ITEMSIZE:
1664 case SQ_GSVS_RING_ITEMSIZE:
1665 case SQ_GS_VERT_ITEMSIZE:
1666 case SQ_GS_VERT_ITEMSIZE_1:
1667 case SQ_GS_VERT_ITEMSIZE_2:
1668 case SQ_GS_VERT_ITEMSIZE_3:
1669 case SQ_GSVS_RING_OFFSET_1:
1670 case SQ_GSVS_RING_OFFSET_2:
1671 case SQ_GSVS_RING_OFFSET_3:
1672 case SQ_HSTMP_RING_ITEMSIZE:
1673 case SQ_LSTMP_RING_ITEMSIZE:
1674 case SQ_PSTMP_RING_ITEMSIZE:
1675 case SQ_VSTMP_RING_ITEMSIZE:
1676 case VGT_TF_RING_SIZE:
1677 case SQ_ESGS_RING_BASE:
1678 case SQ_GSVS_RING_BASE:
1679 case SQ_ESTMP_RING_BASE:
1680 case SQ_GSTMP_RING_BASE:
1681 case SQ_HSTMP_RING_BASE:
1682 case SQ_LSTMP_RING_BASE:
1683 case SQ_PSTMP_RING_BASE:
1684 case SQ_VSTMP_RING_BASE:
1685 case CAYMAN_VGT_OFFCHIP_LDS_BASE:
1686 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS:
1687 return true;
1688 default:
1689 return false;
1690 }
1691}
1692
1693static int evergreen_vm_packet3_check(struct radeon_device *rdev,
1694 u32 *ib, struct radeon_cs_packet *pkt)
1695{
1696 u32 idx = pkt->idx + 1;
1697 u32 idx_value = ib[idx];
1698 u32 start_reg, end_reg, reg, i;
1699
1700 switch (pkt->opcode) {
1701 case PACKET3_NOP:
1702 case PACKET3_SET_BASE:
1703 case PACKET3_CLEAR_STATE:
1704 case PACKET3_INDEX_BUFFER_SIZE:
1705 case PACKET3_DISPATCH_DIRECT:
1706 case PACKET3_DISPATCH_INDIRECT:
1707 case PACKET3_MODE_CONTROL:
1708 case PACKET3_SET_PREDICATION:
1709 case PACKET3_COND_EXEC:
1710 case PACKET3_PRED_EXEC:
1711 case PACKET3_DRAW_INDIRECT:
1712 case PACKET3_DRAW_INDEX_INDIRECT:
1713 case PACKET3_INDEX_BASE:
1714 case PACKET3_DRAW_INDEX_2:
1715 case PACKET3_CONTEXT_CONTROL:
1716 case PACKET3_DRAW_INDEX_OFFSET:
1717 case PACKET3_INDEX_TYPE:
1718 case PACKET3_DRAW_INDEX:
1719 case PACKET3_DRAW_INDEX_AUTO:
1720 case PACKET3_DRAW_INDEX_IMMD:
1721 case PACKET3_NUM_INSTANCES:
1722 case PACKET3_DRAW_INDEX_MULTI_AUTO:
1723 case PACKET3_STRMOUT_BUFFER_UPDATE:
1724 case PACKET3_DRAW_INDEX_OFFSET_2:
1725 case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
1726 case PACKET3_MPEG_INDEX:
1727 case PACKET3_WAIT_REG_MEM:
1728 case PACKET3_MEM_WRITE:
1729 case PACKET3_SURFACE_SYNC:
1730 case PACKET3_EVENT_WRITE:
1731 case PACKET3_EVENT_WRITE_EOP:
1732 case PACKET3_EVENT_WRITE_EOS:
1733 case PACKET3_SET_CONTEXT_REG:
1734 case PACKET3_SET_BOOL_CONST:
1735 case PACKET3_SET_LOOP_CONST:
1736 case PACKET3_SET_RESOURCE:
1737 case PACKET3_SET_SAMPLER:
1738 case PACKET3_SET_CTL_CONST:
1739 case PACKET3_SET_RESOURCE_OFFSET:
1740 case PACKET3_SET_CONTEXT_REG_INDIRECT:
1741 case PACKET3_SET_RESOURCE_INDIRECT:
1742 case CAYMAN_PACKET3_DEALLOC_STATE:
1743 break;
1744 case PACKET3_COND_WRITE:
1745 if (idx_value & 0x100) {
1746 reg = ib[idx + 5] * 4;
1747 if (!evergreen_vm_reg_valid(reg))
1748 return -EINVAL;
1749 }
1750 break;
1751 case PACKET3_COPY_DW:
1752 if (idx_value & 0x2) {
1753 reg = ib[idx + 3] * 4;
1754 if (!evergreen_vm_reg_valid(reg))
1755 return -EINVAL;
1756 }
1757 break;
1758 case PACKET3_SET_CONFIG_REG:
1759 start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
1760 end_reg = 4 * pkt->count + start_reg - 4;
1761 if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
1762 (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
1763 (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
1764 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
1765 return -EINVAL;
1766 }
1767 for (i = 0; i < pkt->count; i++) {
1768 reg = start_reg + (4 * i);
1769 if (!evergreen_vm_reg_valid(reg))
1770 return -EINVAL;
1771 }
1772 break;
1773 default:
1774 return -EINVAL;
1775 }
1776 return 0;
1777}
1778
1779int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
1780{
1781 int ret = 0;
1782 u32 idx = 0;
1783 struct radeon_cs_packet pkt;
1784
1785 do {
1786 pkt.idx = idx;
1787 pkt.type = CP_PACKET_GET_TYPE(ib->ptr[idx]);
1788 pkt.count = CP_PACKET_GET_COUNT(ib->ptr[idx]);
1789 pkt.one_reg_wr = 0;
1790 switch (pkt.type) {
1791 case PACKET_TYPE0:
1792 dev_err(rdev->dev, "Packet0 not allowed!\n");
1793 ret = -EINVAL;
1794 break;
1795 case PACKET_TYPE2:
1796 break;
1797 case PACKET_TYPE3:
1798 pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
1799 ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
1800 break;
1801 default:
1802 dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
1803 ret = -EINVAL;
1804 break;
1805 }
1806 if (ret)
1807 break;
1808 idx += pkt.count + 2;
1809 } while (idx < ib->length_dw);
1810
1811 return ret;
1812}
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index e00039e59a75..b502216d42af 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -242,6 +242,7 @@
242#define PA_CL_ENHANCE 0x8A14 242#define PA_CL_ENHANCE 0x8A14
243#define CLIP_VTX_REORDER_ENA (1 << 0) 243#define CLIP_VTX_REORDER_ENA (1 << 0)
244#define NUM_CLIP_SEQ(x) ((x) << 1) 244#define NUM_CLIP_SEQ(x) ((x) << 1)
245#define PA_SC_ENHANCE 0x8BF0
245#define PA_SC_AA_CONFIG 0x28C04 246#define PA_SC_AA_CONFIG 0x28C04
246#define MSAA_NUM_SAMPLES_SHIFT 0 247#define MSAA_NUM_SAMPLES_SHIFT 0
247#define MSAA_NUM_SAMPLES_MASK 0x3 248#define MSAA_NUM_SAMPLES_MASK 0x3
@@ -319,6 +320,8 @@
319#define SQ_GPR_RESOURCE_MGMT_3 0x8C0C 320#define SQ_GPR_RESOURCE_MGMT_3 0x8C0C
320#define NUM_HS_GPRS(x) ((x) << 0) 321#define NUM_HS_GPRS(x) ((x) << 0)
321#define NUM_LS_GPRS(x) ((x) << 16) 322#define NUM_LS_GPRS(x) ((x) << 16)
323#define SQ_GLOBAL_GPR_RESOURCE_MGMT_1 0x8C10
324#define SQ_GLOBAL_GPR_RESOURCE_MGMT_2 0x8C14
322#define SQ_THREAD_RESOURCE_MGMT 0x8C18 325#define SQ_THREAD_RESOURCE_MGMT 0x8C18
323#define NUM_PS_THREADS(x) ((x) << 0) 326#define NUM_PS_THREADS(x) ((x) << 0)
324#define NUM_VS_THREADS(x) ((x) << 8) 327#define NUM_VS_THREADS(x) ((x) << 8)
@@ -337,6 +340,10 @@
337#define NUM_HS_STACK_ENTRIES(x) ((x) << 0) 340#define NUM_HS_STACK_ENTRIES(x) ((x) << 0)
338#define NUM_LS_STACK_ENTRIES(x) ((x) << 16) 341#define NUM_LS_STACK_ENTRIES(x) ((x) << 16)
339#define SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x8D8C 342#define SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 0x8D8C
343#define SQ_DYN_GPR_SIMD_LOCK_EN 0x8D94
344#define SQ_STATIC_THREAD_MGMT_1 0x8E20
345#define SQ_STATIC_THREAD_MGMT_2 0x8E24
346#define SQ_STATIC_THREAD_MGMT_3 0x8E28
340#define SQ_LDS_RESOURCE_MGMT 0x8E2C 347#define SQ_LDS_RESOURCE_MGMT 0x8E2C
341 348
342#define SQ_MS_FIFO_SIZES 0x8CF0 349#define SQ_MS_FIFO_SIZES 0x8CF0
@@ -691,6 +698,7 @@
691#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36 698#define PACKET3_DRAW_INDEX_MULTI_ELEMENT 0x36
692#define PACKET3_MEM_SEMAPHORE 0x39 699#define PACKET3_MEM_SEMAPHORE 0x39
693#define PACKET3_MPEG_INDEX 0x3A 700#define PACKET3_MPEG_INDEX 0x3A
701#define PACKET3_COPY_DW 0x3B
694#define PACKET3_WAIT_REG_MEM 0x3C 702#define PACKET3_WAIT_REG_MEM 0x3C
695#define PACKET3_MEM_WRITE 0x3D 703#define PACKET3_MEM_WRITE 0x3D
696#define PACKET3_INDIRECT_BUFFER 0x32 704#define PACKET3_INDIRECT_BUFFER 0x32
@@ -768,6 +776,8 @@
768#define SQ_TEX_VTX_VALID_TEXTURE 0x2 776#define SQ_TEX_VTX_VALID_TEXTURE 0x2
769#define SQ_TEX_VTX_VALID_BUFFER 0x3 777#define SQ_TEX_VTX_VALID_BUFFER 0x3
770 778
779#define VGT_VTX_VECT_EJECT_REG 0x88b0
780
771#define SQ_CONST_MEM_BASE 0x8df8 781#define SQ_CONST_MEM_BASE 0x8df8
772 782
773#define SQ_ESGS_RING_BASE 0x8c40 783#define SQ_ESGS_RING_BASE 0x8c40
@@ -892,8 +902,27 @@
892#define PA_SC_SCREEN_SCISSOR_TL 0x28030 902#define PA_SC_SCREEN_SCISSOR_TL 0x28030
893#define PA_SC_GENERIC_SCISSOR_TL 0x28240 903#define PA_SC_GENERIC_SCISSOR_TL 0x28240
894#define PA_SC_WINDOW_SCISSOR_TL 0x28204 904#define PA_SC_WINDOW_SCISSOR_TL 0x28204
895#define VGT_PRIMITIVE_TYPE 0x8958
896 905
906#define VGT_PRIMITIVE_TYPE 0x8958
907#define VGT_INDEX_TYPE 0x895C
908
909#define VGT_NUM_INDICES 0x8970
910
911#define VGT_COMPUTE_DIM_X 0x8990
912#define VGT_COMPUTE_DIM_Y 0x8994
913#define VGT_COMPUTE_DIM_Z 0x8998
914#define VGT_COMPUTE_START_X 0x899C
915#define VGT_COMPUTE_START_Y 0x89A0
916#define VGT_COMPUTE_START_Z 0x89A4
917#define VGT_COMPUTE_INDEX 0x89A8
918#define VGT_COMPUTE_THREAD_GROUP_SIZE 0x89AC
919#define VGT_HS_OFFCHIP_PARAM 0x89B0
920
921#define DB_DEBUG 0x9830
922#define DB_DEBUG2 0x9834
923#define DB_DEBUG3 0x9838
924#define DB_DEBUG4 0x983C
925#define DB_WATERMARKS 0x9854
897#define DB_DEPTH_CONTROL 0x28800 926#define DB_DEPTH_CONTROL 0x28800
898#define DB_DEPTH_VIEW 0x28008 927#define DB_DEPTH_VIEW 0x28008
899#define DB_HTILE_DATA_BASE 0x28014 928#define DB_HTILE_DATA_BASE 0x28014
@@ -1189,8 +1218,40 @@
1189#define SQ_VTX_CONSTANT_WORD6_0 0x30018 1218#define SQ_VTX_CONSTANT_WORD6_0 0x30018
1190#define SQ_VTX_CONSTANT_WORD7_0 0x3001c 1219#define SQ_VTX_CONSTANT_WORD7_0 0x3001c
1191 1220
1221#define TD_PS_BORDER_COLOR_INDEX 0xA400
1222#define TD_PS_BORDER_COLOR_RED 0xA404
1223#define TD_PS_BORDER_COLOR_GREEN 0xA408
1224#define TD_PS_BORDER_COLOR_BLUE 0xA40C
1225#define TD_PS_BORDER_COLOR_ALPHA 0xA410
1226#define TD_VS_BORDER_COLOR_INDEX 0xA414
1227#define TD_VS_BORDER_COLOR_RED 0xA418
1228#define TD_VS_BORDER_COLOR_GREEN 0xA41C
1229#define TD_VS_BORDER_COLOR_BLUE 0xA420
1230#define TD_VS_BORDER_COLOR_ALPHA 0xA424
1231#define TD_GS_BORDER_COLOR_INDEX 0xA428
1232#define TD_GS_BORDER_COLOR_RED 0xA42C
1233#define TD_GS_BORDER_COLOR_GREEN 0xA430
1234#define TD_GS_BORDER_COLOR_BLUE 0xA434
1235#define TD_GS_BORDER_COLOR_ALPHA 0xA438
1236#define TD_HS_BORDER_COLOR_INDEX 0xA43C
1237#define TD_HS_BORDER_COLOR_RED 0xA440
1238#define TD_HS_BORDER_COLOR_GREEN 0xA444
1239#define TD_HS_BORDER_COLOR_BLUE 0xA448
1240#define TD_HS_BORDER_COLOR_ALPHA 0xA44C
1241#define TD_LS_BORDER_COLOR_INDEX 0xA450
1242#define TD_LS_BORDER_COLOR_RED 0xA454
1243#define TD_LS_BORDER_COLOR_GREEN 0xA458
1244#define TD_LS_BORDER_COLOR_BLUE 0xA45C
1245#define TD_LS_BORDER_COLOR_ALPHA 0xA460
1246#define TD_CS_BORDER_COLOR_INDEX 0xA464
1247#define TD_CS_BORDER_COLOR_RED 0xA468
1248#define TD_CS_BORDER_COLOR_GREEN 0xA46C
1249#define TD_CS_BORDER_COLOR_BLUE 0xA470
1250#define TD_CS_BORDER_COLOR_ALPHA 0xA474
1251
1192/* cayman 3D regs */ 1252/* cayman 3D regs */
1193#define CAYMAN_VGT_OFFCHIP_LDS_BASE 0x89B0 1253#define CAYMAN_VGT_OFFCHIP_LDS_BASE 0x89B4
1254#define CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS 0x8E48
1194#define CAYMAN_DB_EQAA 0x28804 1255#define CAYMAN_DB_EQAA 0x28804
1195#define CAYMAN_DB_DEPTH_INFO 0x2803C 1256#define CAYMAN_DB_DEPTH_INFO 0x2803C
1196#define CAYMAN_PA_SC_AA_CONFIG 0x28BE0 1257#define CAYMAN_PA_SC_AA_CONFIG 0x28BE0
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index d89b2ebd5bbb..321137295400 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -934,7 +934,7 @@ void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev)
934 934
935int cayman_pcie_gart_enable(struct radeon_device *rdev) 935int cayman_pcie_gart_enable(struct radeon_device *rdev)
936{ 936{
937 int r; 937 int i, r;
938 938
939 if (rdev->gart.robj == NULL) { 939 if (rdev->gart.robj == NULL) {
940 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n"); 940 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
@@ -945,9 +945,12 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)
945 return r; 945 return r;
946 radeon_gart_restore(rdev); 946 radeon_gart_restore(rdev);
947 /* Setup TLB control */ 947 /* Setup TLB control */
948 WREG32(MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB | 948 WREG32(MC_VM_MX_L1_TLB_CNTL,
949 (0xA << 7) |
950 ENABLE_L1_TLB |
949 ENABLE_L1_FRAGMENT_PROCESSING | 951 ENABLE_L1_FRAGMENT_PROCESSING |
950 SYSTEM_ACCESS_MODE_NOT_IN_SYS | 952 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
953 ENABLE_ADVANCED_DRIVER_MODEL |
951 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU); 954 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
952 /* Setup L2 cache */ 955 /* Setup L2 cache */
953 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | 956 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
@@ -967,9 +970,26 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)
967 WREG32(VM_CONTEXT0_CNTL2, 0); 970 WREG32(VM_CONTEXT0_CNTL2, 0);
968 WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) | 971 WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
969 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); 972 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
970 /* disable context1-7 */ 973
974 WREG32(0x15D4, 0);
975 WREG32(0x15D8, 0);
976 WREG32(0x15DC, 0);
977
978 /* empty context1-7 */
979 for (i = 1; i < 8; i++) {
980 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (i << 2), 0);
981 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (i << 2), 0);
982 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
983 rdev->gart.table_addr >> 12);
984 }
985
986 /* enable context1-7 */
987 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
988 (u32)(rdev->dummy_page.addr >> 12));
971 WREG32(VM_CONTEXT1_CNTL2, 0); 989 WREG32(VM_CONTEXT1_CNTL2, 0);
972 WREG32(VM_CONTEXT1_CNTL, 0); 990 WREG32(VM_CONTEXT1_CNTL, 0);
991 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
992 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
973 993
974 cayman_pcie_gart_tlb_flush(rdev); 994 cayman_pcie_gart_tlb_flush(rdev);
975 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 995 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -1024,7 +1044,10 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
1024 struct radeon_ring *ring = &rdev->ring[fence->ring]; 1044 struct radeon_ring *ring = &rdev->ring[fence->ring];
1025 u64 addr = rdev->fence_drv[fence->ring].gpu_addr; 1045 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
1026 1046
1027 /* flush read cache over gart */ 1047 /* flush read cache over gart for this vmid */
1048 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1049 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1050 radeon_ring_write(ring, 0);
1028 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); 1051 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1029 radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA); 1052 radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1030 radeon_ring_write(ring, 0xFFFFFFFF); 1053 radeon_ring_write(ring, 0xFFFFFFFF);
@@ -1039,6 +1062,33 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
1039 radeon_ring_write(ring, 0); 1062 radeon_ring_write(ring, 0);
1040} 1063}
1041 1064
1065void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
1066{
1067 struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
1068
1069 /* set to DX10/11 mode */
1070 radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
1071 radeon_ring_write(ring, 1);
1072 radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
1073 radeon_ring_write(ring,
1074#ifdef __BIG_ENDIAN
1075 (2 << 0) |
1076#endif
1077 (ib->gpu_addr & 0xFFFFFFFC));
1078 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFF);
1079 radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24));
1080
1081 /* flush read cache over gart for this vmid */
1082 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
1083 radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
1084 radeon_ring_write(ring, ib->vm_id);
1085 radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
1086 radeon_ring_write(ring, PACKET3_TC_ACTION_ENA | PACKET3_SH_ACTION_ENA);
1087 radeon_ring_write(ring, 0xFFFFFFFF);
1088 radeon_ring_write(ring, 0);
1089 radeon_ring_write(ring, 10); /* poll interval */
1090}
1091
1042static void cayman_cp_enable(struct radeon_device *rdev, bool enable) 1092static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
1043{ 1093{
1044 if (enable) 1094 if (enable)
@@ -1324,6 +1374,15 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1324 RREG32(GRBM_STATUS_SE1)); 1374 RREG32(GRBM_STATUS_SE1));
1325 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n", 1375 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
1326 RREG32(SRBM_STATUS)); 1376 RREG32(SRBM_STATUS));
1377 dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_ADDR 0x%08X\n",
1378 RREG32(0x14F8));
1379 dev_info(rdev->dev, " VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
1380 RREG32(0x14D8));
1381 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
1382 RREG32(0x14FC));
1383 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
1384 RREG32(0x14DC));
1385
1327 evergreen_mc_stop(rdev, &save); 1386 evergreen_mc_stop(rdev, &save);
1328 if (evergreen_mc_wait_for_idle(rdev)) { 1387 if (evergreen_mc_wait_for_idle(rdev)) {
1329 dev_warn(rdev->dev, "Wait for MC idle timedout !\n"); 1388 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
@@ -1354,6 +1413,7 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1354 (void)RREG32(GRBM_SOFT_RESET); 1413 (void)RREG32(GRBM_SOFT_RESET);
1355 /* Wait a little for things to settle down */ 1414 /* Wait a little for things to settle down */
1356 udelay(50); 1415 udelay(50);
1416
1357 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n", 1417 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
1358 RREG32(GRBM_STATUS)); 1418 RREG32(GRBM_STATUS));
1359 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n", 1419 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
@@ -1464,6 +1524,10 @@ static int cayman_startup(struct radeon_device *rdev)
1464 return r; 1524 return r;
1465 } 1525 }
1466 1526
1527 r = radeon_vm_manager_start(rdev);
1528 if (r)
1529 return r;
1530
1467 return 0; 1531 return 0;
1468} 1532}
1469 1533
@@ -1491,6 +1555,7 @@ int cayman_suspend(struct radeon_device *rdev)
1491{ 1555{
1492 /* FIXME: we should wait for ring to be empty */ 1556 /* FIXME: we should wait for ring to be empty */
1493 radeon_ib_pool_suspend(rdev); 1557 radeon_ib_pool_suspend(rdev);
1558 radeon_vm_manager_suspend(rdev);
1494 r600_blit_suspend(rdev); 1559 r600_blit_suspend(rdev);
1495 cayman_cp_enable(rdev, false); 1560 cayman_cp_enable(rdev, false);
1496 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1561 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
@@ -1577,6 +1642,10 @@ int cayman_init(struct radeon_device *rdev)
1577 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 1642 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
1578 rdev->accel_working = false; 1643 rdev->accel_working = false;
1579 } 1644 }
1645 r = radeon_vm_manager_init(rdev);
1646 if (r) {
1647 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
1648 }
1580 1649
1581 r = cayman_startup(rdev); 1650 r = cayman_startup(rdev);
1582 if (r) { 1651 if (r) {
@@ -1585,6 +1654,7 @@ int cayman_init(struct radeon_device *rdev)
1585 r600_irq_fini(rdev); 1654 r600_irq_fini(rdev);
1586 radeon_wb_fini(rdev); 1655 radeon_wb_fini(rdev);
1587 r100_ib_fini(rdev); 1656 r100_ib_fini(rdev);
1657 radeon_vm_manager_fini(rdev);
1588 radeon_irq_kms_fini(rdev); 1658 radeon_irq_kms_fini(rdev);
1589 cayman_pcie_gart_fini(rdev); 1659 cayman_pcie_gart_fini(rdev);
1590 rdev->accel_working = false; 1660 rdev->accel_working = false;
@@ -1608,6 +1678,7 @@ void cayman_fini(struct radeon_device *rdev)
1608 cayman_cp_fini(rdev); 1678 cayman_cp_fini(rdev);
1609 r600_irq_fini(rdev); 1679 r600_irq_fini(rdev);
1610 radeon_wb_fini(rdev); 1680 radeon_wb_fini(rdev);
1681 radeon_vm_manager_fini(rdev);
1611 r100_ib_fini(rdev); 1682 r100_ib_fini(rdev);
1612 radeon_irq_kms_fini(rdev); 1683 radeon_irq_kms_fini(rdev);
1613 cayman_pcie_gart_fini(rdev); 1684 cayman_pcie_gart_fini(rdev);
@@ -1621,3 +1692,84 @@ void cayman_fini(struct radeon_device *rdev)
1621 rdev->bios = NULL; 1692 rdev->bios = NULL;
1622} 1693}
1623 1694
1695/*
1696 * vm
1697 */
1698int cayman_vm_init(struct radeon_device *rdev)
1699{
1700 /* number of VMs */
1701 rdev->vm_manager.nvm = 8;
1702 /* base offset of vram pages */
1703 rdev->vm_manager.vram_base_offset = 0;
1704 return 0;
1705}
1706
1707void cayman_vm_fini(struct radeon_device *rdev)
1708{
1709}
1710
1711int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
1712{
1713 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (id << 2), 0);
1714 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (id << 2), vm->last_pfn);
1715 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
1716 /* flush hdp cache */
1717 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1718 /* bits 0-7 are the VM contexts0-7 */
1719 WREG32(VM_INVALIDATE_REQUEST, 1 << id);
1720 return 0;
1721}
1722
1723void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
1724{
1725 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR + (vm->id << 2), 0);
1726 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR + (vm->id << 2), 0);
1727 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
1728 /* flush hdp cache */
1729 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1730 /* bits 0-7 are the VM contexts0-7 */
1731 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
1732}
1733
1734void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
1735{
1736 if (vm->id == -1)
1737 return;
1738
1739 /* flush hdp cache */
1740 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
1741 /* bits 0-7 are the VM contexts0-7 */
1742 WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
1743}
1744
1745#define R600_PTE_VALID (1 << 0)
1746#define R600_PTE_SYSTEM (1 << 1)
1747#define R600_PTE_SNOOPED (1 << 2)
1748#define R600_PTE_READABLE (1 << 5)
1749#define R600_PTE_WRITEABLE (1 << 6)
1750
1751uint32_t cayman_vm_page_flags(struct radeon_device *rdev,
1752 struct radeon_vm *vm,
1753 uint32_t flags)
1754{
1755 uint32_t r600_flags = 0;
1756
1757 r600_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
1758 r600_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
1759 r600_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
1760 if (flags & RADEON_VM_PAGE_SYSTEM) {
1761 r600_flags |= R600_PTE_SYSTEM;
1762 r600_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
1763 }
1764 return r600_flags;
1765}
1766
1767void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
1768 unsigned pfn, uint64_t addr, uint32_t flags)
1769{
1770 void __iomem *ptr = (void *)vm->pt;
1771
1772 addr = addr & 0xFFFFFFFFFFFFF000ULL;
1773 addr |= flags;
1774 writeq(addr, ptr + (pfn * 8));
1775}
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 0d3f52cff2f6..f9df2a645e79 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -222,6 +222,7 @@
222#define SCRATCH_UMSK 0x8540 222#define SCRATCH_UMSK 0x8540
223#define SCRATCH_ADDR 0x8544 223#define SCRATCH_ADDR 0x8544
224#define CP_SEM_WAIT_TIMER 0x85BC 224#define CP_SEM_WAIT_TIMER 0x85BC
225#define CP_COHER_CNTL2 0x85E8
225#define CP_ME_CNTL 0x86D8 226#define CP_ME_CNTL 0x86D8
226#define CP_ME_HALT (1 << 28) 227#define CP_ME_HALT (1 << 28)
227#define CP_PFP_HALT (1 << 26) 228#define CP_PFP_HALT (1 << 26)
@@ -458,6 +459,7 @@
458#define PACKET3_DISPATCH_DIRECT 0x15 459#define PACKET3_DISPATCH_DIRECT 0x15
459#define PACKET3_DISPATCH_INDIRECT 0x16 460#define PACKET3_DISPATCH_INDIRECT 0x16
460#define PACKET3_INDIRECT_BUFFER_END 0x17 461#define PACKET3_INDIRECT_BUFFER_END 0x17
462#define PACKET3_MODE_CONTROL 0x18
461#define PACKET3_SET_PREDICATION 0x20 463#define PACKET3_SET_PREDICATION 0x20
462#define PACKET3_REG_RMW 0x21 464#define PACKET3_REG_RMW 0x21
463#define PACKET3_COND_EXEC 0x22 465#define PACKET3_COND_EXEC 0x22
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 8ad5c6475d55..3fc0d29a5f39 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -704,7 +704,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
704 return r; 704 return r;
705 } 705 }
706 706
707 if (p->keep_tiling_flags) { 707 if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) {
708 ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */ 708 ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */
709 ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset); 709 ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset);
710 } else { 710 } else {
@@ -768,7 +768,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
768 /* RB3D_COLORPITCH1 */ 768 /* RB3D_COLORPITCH1 */
769 /* RB3D_COLORPITCH2 */ 769 /* RB3D_COLORPITCH2 */
770 /* RB3D_COLORPITCH3 */ 770 /* RB3D_COLORPITCH3 */
771 if (!p->keep_tiling_flags) { 771 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
772 r = r100_cs_packet_next_reloc(p, &reloc); 772 r = r100_cs_packet_next_reloc(p, &reloc);
773 if (r) { 773 if (r) {
774 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 774 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
@@ -853,7 +853,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
853 break; 853 break;
854 case 0x4F24: 854 case 0x4F24:
855 /* ZB_DEPTHPITCH */ 855 /* ZB_DEPTHPITCH */
856 if (!p->keep_tiling_flags) { 856 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
857 r = r100_cs_packet_next_reloc(p, &reloc); 857 r = r100_cs_packet_next_reloc(p, &reloc);
858 if (r) { 858 if (r) {
859 DRM_ERROR("No reloc for ib[%d]=0x%04X\n", 859 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index cb1acffd2430..38ce5d0427e3 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -941,7 +941,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
941 track->db_depth_control = radeon_get_ib_value(p, idx); 941 track->db_depth_control = radeon_get_ib_value(p, idx);
942 break; 942 break;
943 case R_028010_DB_DEPTH_INFO: 943 case R_028010_DB_DEPTH_INFO:
944 if (!p->keep_tiling_flags && 944 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
945 r600_cs_packet_next_is_pkt3_nop(p)) { 945 r600_cs_packet_next_is_pkt3_nop(p)) {
946 r = r600_cs_packet_next_reloc(p, &reloc); 946 r = r600_cs_packet_next_reloc(p, &reloc);
947 if (r) { 947 if (r) {
@@ -993,7 +993,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
993 case R_0280B4_CB_COLOR5_INFO: 993 case R_0280B4_CB_COLOR5_INFO:
994 case R_0280B8_CB_COLOR6_INFO: 994 case R_0280B8_CB_COLOR6_INFO:
995 case R_0280BC_CB_COLOR7_INFO: 995 case R_0280BC_CB_COLOR7_INFO:
996 if (!p->keep_tiling_flags && 996 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) &&
997 r600_cs_packet_next_is_pkt3_nop(p)) { 997 r600_cs_packet_next_is_pkt3_nop(p)) {
998 r = r600_cs_packet_next_reloc(p, &reloc); 998 r = r600_cs_packet_next_reloc(p, &reloc);
999 if (r) { 999 if (r) {
@@ -1293,7 +1293,7 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx,
1293 mip_offset <<= 8; 1293 mip_offset <<= 8;
1294 1294
1295 word0 = radeon_get_ib_value(p, idx + 0); 1295 word0 = radeon_get_ib_value(p, idx + 0);
1296 if (!p->keep_tiling_flags) { 1296 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1297 if (tiling_flags & RADEON_TILING_MACRO) 1297 if (tiling_flags & RADEON_TILING_MACRO)
1298 word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); 1298 word0 |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1299 else if (tiling_flags & RADEON_TILING_MICRO) 1299 else if (tiling_flags & RADEON_TILING_MICRO)
@@ -1625,7 +1625,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
1625 return -EINVAL; 1625 return -EINVAL;
1626 } 1626 }
1627 base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff); 1627 base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1628 if (!p->keep_tiling_flags) { 1628 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1629 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) 1629 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1630 ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1); 1630 ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
1631 else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) 1631 else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 374f9a4d94ef..5e3542384b21 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -118,6 +118,10 @@ extern int radeon_msi;
118#define CAYMAN_RING_TYPE_CP1_INDEX 1 118#define CAYMAN_RING_TYPE_CP1_INDEX 1
119#define CAYMAN_RING_TYPE_CP2_INDEX 2 119#define CAYMAN_RING_TYPE_CP2_INDEX 2
120 120
121/* hardcode those limit for now */
122#define RADEON_VA_RESERVED_SIZE (8 << 20)
123#define RADEON_IB_VM_MAX_SIZE (64 << 10)
124
121/* 125/*
122 * Errata workarounds. 126 * Errata workarounds.
123 */ 127 */
@@ -262,6 +266,21 @@ struct radeon_mman {
262 bool initialized; 266 bool initialized;
263}; 267};
264 268
269/* bo virtual address in a specific vm */
270struct radeon_bo_va {
271 /* bo list is protected by bo being reserved */
272 struct list_head bo_list;
273 /* vm list is protected by vm mutex */
274 struct list_head vm_list;
275 /* constant after initialization */
276 struct radeon_vm *vm;
277 struct radeon_bo *bo;
278 uint64_t soffset;
279 uint64_t eoffset;
280 uint32_t flags;
281 bool valid;
282};
283
265struct radeon_bo { 284struct radeon_bo {
266 /* Protected by gem.mutex */ 285 /* Protected by gem.mutex */
267 struct list_head list; 286 struct list_head list;
@@ -275,6 +294,10 @@ struct radeon_bo {
275 u32 tiling_flags; 294 u32 tiling_flags;
276 u32 pitch; 295 u32 pitch;
277 int surface_reg; 296 int surface_reg;
297 /* list of all virtual address to which this bo
298 * is associated to
299 */
300 struct list_head va;
278 /* Constant after initialization */ 301 /* Constant after initialization */
279 struct radeon_device *rdev; 302 struct radeon_device *rdev;
280 struct drm_gem_object gem_base; 303 struct drm_gem_object gem_base;
@@ -408,6 +431,7 @@ struct radeon_mc;
408#define RADEON_GPU_PAGE_SIZE 4096 431#define RADEON_GPU_PAGE_SIZE 4096
409#define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1) 432#define RADEON_GPU_PAGE_MASK (RADEON_GPU_PAGE_SIZE - 1)
410#define RADEON_GPU_PAGE_SHIFT 12 433#define RADEON_GPU_PAGE_SHIFT 12
434#define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & ~RADEON_GPU_PAGE_MASK)
411 435
412struct radeon_gart { 436struct radeon_gart {
413 dma_addr_t table_addr; 437 dma_addr_t table_addr;
@@ -565,6 +589,7 @@ struct radeon_ib {
565 uint64_t gpu_addr; 589 uint64_t gpu_addr;
566 uint32_t *ptr; 590 uint32_t *ptr;
567 struct radeon_fence *fence; 591 struct radeon_fence *fence;
592 unsigned vm_id;
568}; 593};
569 594
570/* 595/*
@@ -602,6 +627,56 @@ struct radeon_ring {
602}; 627};
603 628
604/* 629/*
630 * VM
631 */
632struct radeon_vm {
633 struct list_head list;
634 struct list_head va;
635 int id;
636 unsigned last_pfn;
637 u64 pt_gpu_addr;
638 u64 *pt;
639 struct radeon_sa_bo sa_bo;
640 struct mutex mutex;
641 /* last fence for cs using this vm */
642 struct radeon_fence *fence;
643};
644
645struct radeon_vm_funcs {
646 int (*init)(struct radeon_device *rdev);
647 void (*fini)(struct radeon_device *rdev);
648 /* cs mutex must be lock for schedule_ib */
649 int (*bind)(struct radeon_device *rdev, struct radeon_vm *vm, int id);
650 void (*unbind)(struct radeon_device *rdev, struct radeon_vm *vm);
651 void (*tlb_flush)(struct radeon_device *rdev, struct radeon_vm *vm);
652 uint32_t (*page_flags)(struct radeon_device *rdev,
653 struct radeon_vm *vm,
654 uint32_t flags);
655 void (*set_page)(struct radeon_device *rdev, struct radeon_vm *vm,
656 unsigned pfn, uint64_t addr, uint32_t flags);
657};
658
659struct radeon_vm_manager {
660 struct list_head lru_vm;
661 uint32_t use_bitmap;
662 struct radeon_sa_manager sa_manager;
663 uint32_t max_pfn;
664 /* fields constant after init */
665 const struct radeon_vm_funcs *funcs;
666 /* number of VMIDs */
667 unsigned nvm;
668 /* vram base address for page table entry */
669 u64 vram_base_offset;
670};
671
672/*
673 * file private structure
674 */
675struct radeon_fpriv {
676 struct radeon_vm vm;
677};
678
679/*
605 * R6xx+ IH ring 680 * R6xx+ IH ring
606 */ 681 */
607struct r600_ih { 682struct r600_ih {
@@ -691,12 +766,12 @@ struct radeon_cs_reloc {
691struct radeon_cs_chunk { 766struct radeon_cs_chunk {
692 uint32_t chunk_id; 767 uint32_t chunk_id;
693 uint32_t length_dw; 768 uint32_t length_dw;
694 int kpage_idx[2]; 769 int kpage_idx[2];
695 uint32_t *kpage[2]; 770 uint32_t *kpage[2];
696 uint32_t *kdata; 771 uint32_t *kdata;
697 void __user *user_ptr; 772 void __user *user_ptr;
698 int last_copied_page; 773 int last_copied_page;
699 int last_page_index; 774 int last_page_index;
700}; 775};
701 776
702struct radeon_cs_parser { 777struct radeon_cs_parser {
@@ -717,11 +792,14 @@ struct radeon_cs_parser {
717 /* indices of various chunks */ 792 /* indices of various chunks */
718 int chunk_ib_idx; 793 int chunk_ib_idx;
719 int chunk_relocs_idx; 794 int chunk_relocs_idx;
795 int chunk_flags_idx;
720 struct radeon_ib *ib; 796 struct radeon_ib *ib;
721 void *track; 797 void *track;
722 unsigned family; 798 unsigned family;
723 int parser_error; 799 int parser_error;
724 bool keep_tiling_flags; 800 u32 cs_flags;
801 u32 ring;
802 s32 priority;
725}; 803};
726 804
727extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx); 805extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
@@ -1018,6 +1096,7 @@ struct radeon_asic {
1018 1096
1019 struct { 1097 struct {
1020 void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); 1098 void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
1099 int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
1021 void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); 1100 void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
1022 void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, 1101 void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
1023 struct radeon_semaphore *semaphore, bool emit_wait); 1102 struct radeon_semaphore *semaphore, bool emit_wait);
@@ -1255,6 +1334,8 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
1255 struct drm_file *filp); 1334 struct drm_file *filp);
1256int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, 1335int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
1257 struct drm_file *filp); 1336 struct drm_file *filp);
1337int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
1338 struct drm_file *filp);
1258int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); 1339int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
1259int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data, 1340int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
1260 struct drm_file *filp); 1341 struct drm_file *filp);
@@ -1404,6 +1485,8 @@ struct radeon_device {
1404 /* debugfs */ 1485 /* debugfs */
1405 struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS]; 1486 struct radeon_debugfs debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
1406 unsigned debugfs_count; 1487 unsigned debugfs_count;
1488 /* virtual memory */
1489 struct radeon_vm_manager vm_manager;
1407}; 1490};
1408 1491
1409int radeon_device_init(struct radeon_device *rdev, 1492int radeon_device_init(struct radeon_device *rdev,
@@ -1568,6 +1651,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
1568#define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev)) 1651#define radeon_ring_start(rdev) (rdev)->asic->ring_start((rdev))
1569#define radeon_ring_test(rdev, cp) (rdev)->asic->ring_test((rdev), (cp)) 1652#define radeon_ring_test(rdev, cp) (rdev)->asic->ring_test((rdev), (cp))
1570#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib)) 1653#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
1654#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
1571#define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev)) 1655#define radeon_irq_set(rdev) (rdev)->asic->irq_set((rdev))
1572#define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev)) 1656#define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
1573#define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc)) 1657#define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
@@ -1627,6 +1711,33 @@ extern int radeon_suspend_kms(struct drm_device *dev, pm_message_t state);
1627extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size); 1711extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size);
1628 1712
1629/* 1713/*
1714 * vm
1715 */
1716int radeon_vm_manager_init(struct radeon_device *rdev);
1717void radeon_vm_manager_fini(struct radeon_device *rdev);
1718int radeon_vm_manager_start(struct radeon_device *rdev);
1719int radeon_vm_manager_suspend(struct radeon_device *rdev);
1720int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
1721void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
1722int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm);
1723void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
1724int radeon_vm_bo_update_pte(struct radeon_device *rdev,
1725 struct radeon_vm *vm,
1726 struct radeon_bo *bo,
1727 struct ttm_mem_reg *mem);
1728void radeon_vm_bo_invalidate(struct radeon_device *rdev,
1729 struct radeon_bo *bo);
1730int radeon_vm_bo_add(struct radeon_device *rdev,
1731 struct radeon_vm *vm,
1732 struct radeon_bo *bo,
1733 uint64_t offset,
1734 uint32_t flags);
1735int radeon_vm_bo_rmv(struct radeon_device *rdev,
1736 struct radeon_vm *vm,
1737 struct radeon_bo *bo);
1738
1739
1740/*
1630 * R600 vram scratch functions 1741 * R600 vram scratch functions
1631 */ 1742 */
1632int r600_vram_scratch_init(struct radeon_device *rdev); 1743int r600_vram_scratch_init(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 8493d406f5e3..123a1969d284 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -952,6 +952,16 @@ static struct radeon_asic btc_asic = {
952 .post_page_flip = &evergreen_post_page_flip, 952 .post_page_flip = &evergreen_post_page_flip,
953}; 953};
954 954
955static const struct radeon_vm_funcs cayman_vm_funcs = {
956 .init = &cayman_vm_init,
957 .fini = &cayman_vm_fini,
958 .bind = &cayman_vm_bind,
959 .unbind = &cayman_vm_unbind,
960 .tlb_flush = &cayman_vm_tlb_flush,
961 .page_flags = &cayman_vm_page_flags,
962 .set_page = &cayman_vm_set_page,
963};
964
955static struct radeon_asic cayman_asic = { 965static struct radeon_asic cayman_asic = {
956 .init = &cayman_init, 966 .init = &cayman_init,
957 .fini = &cayman_fini, 967 .fini = &cayman_fini,
@@ -965,17 +975,20 @@ static struct radeon_asic cayman_asic = {
965 .ring_test = &r600_ring_test, 975 .ring_test = &r600_ring_test,
966 .ring = { 976 .ring = {
967 [RADEON_RING_TYPE_GFX_INDEX] = { 977 [RADEON_RING_TYPE_GFX_INDEX] = {
968 .ib_execute = &evergreen_ring_ib_execute, 978 .ib_execute = &cayman_ring_ib_execute,
979 .ib_parse = &evergreen_ib_parse,
969 .emit_fence = &cayman_fence_ring_emit, 980 .emit_fence = &cayman_fence_ring_emit,
970 .emit_semaphore = &r600_semaphore_ring_emit, 981 .emit_semaphore = &r600_semaphore_ring_emit,
971 }, 982 },
972 [CAYMAN_RING_TYPE_CP1_INDEX] = { 983 [CAYMAN_RING_TYPE_CP1_INDEX] = {
973 .ib_execute = &r600_ring_ib_execute, 984 .ib_execute = &cayman_ring_ib_execute,
985 .ib_parse = &evergreen_ib_parse,
974 .emit_fence = &cayman_fence_ring_emit, 986 .emit_fence = &cayman_fence_ring_emit,
975 .emit_semaphore = &r600_semaphore_ring_emit, 987 .emit_semaphore = &r600_semaphore_ring_emit,
976 }, 988 },
977 [CAYMAN_RING_TYPE_CP2_INDEX] = { 989 [CAYMAN_RING_TYPE_CP2_INDEX] = {
978 .ib_execute = &r600_ring_ib_execute, 990 .ib_execute = &cayman_ring_ib_execute,
991 .ib_parse = &evergreen_ib_parse,
979 .emit_fence = &cayman_fence_ring_emit, 992 .emit_fence = &cayman_fence_ring_emit,
980 .emit_semaphore = &r600_semaphore_ring_emit, 993 .emit_semaphore = &r600_semaphore_ring_emit,
981 } 994 }
@@ -1128,6 +1141,7 @@ int radeon_asic_init(struct radeon_device *rdev)
1128 rdev->asic = &cayman_asic; 1141 rdev->asic = &cayman_asic;
1129 /* set num crtcs */ 1142 /* set num crtcs */
1130 rdev->num_crtc = 6; 1143 rdev->num_crtc = 6;
1144 rdev->vm_manager.funcs = &cayman_vm_funcs;
1131 break; 1145 break;
1132 default: 1146 default:
1133 /* FIXME: not supported yet */ 1147 /* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index c002ed1c4483..6304aef0d9b2 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -438,5 +438,17 @@ int cayman_suspend(struct radeon_device *rdev);
438int cayman_resume(struct radeon_device *rdev); 438int cayman_resume(struct radeon_device *rdev);
439bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); 439bool cayman_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
440int cayman_asic_reset(struct radeon_device *rdev); 440int cayman_asic_reset(struct radeon_device *rdev);
441void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
442int cayman_vm_init(struct radeon_device *rdev);
443void cayman_vm_fini(struct radeon_device *rdev);
444int cayman_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
445void cayman_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
446void cayman_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm);
447uint32_t cayman_vm_page_flags(struct radeon_device *rdev,
448 struct radeon_vm *vm,
449 uint32_t flags);
450void cayman_vm_set_page(struct radeon_device *rdev, struct radeon_vm *vm,
451 unsigned pfn, uint64_t addr, uint32_t flags);
452int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
441 453
442#endif 454#endif
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 6559cc455135..4d595403b50c 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -90,11 +90,32 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
90 return radeon_bo_list_validate(&p->validated); 90 return radeon_bo_list_validate(&p->validated);
91} 91}
92 92
93static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
94{
95 p->priority = priority;
96
97 switch (ring) {
98 default:
99 DRM_ERROR("unknown ring id: %d\n", ring);
100 return -EINVAL;
101 case RADEON_CS_RING_GFX:
102 p->ring = RADEON_RING_TYPE_GFX_INDEX;
103 break;
104 case RADEON_CS_RING_COMPUTE:
105 /* for now */
106 p->ring = RADEON_RING_TYPE_GFX_INDEX;
107 break;
108 }
109 return 0;
110}
111
93int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) 112int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
94{ 113{
95 struct drm_radeon_cs *cs = data; 114 struct drm_radeon_cs *cs = data;
96 uint64_t *chunk_array_ptr; 115 uint64_t *chunk_array_ptr;
97 unsigned size, i, flags = 0; 116 unsigned size, i;
117 u32 ring = RADEON_CS_RING_GFX;
118 s32 priority = 0;
98 119
99 if (!cs->num_chunks) { 120 if (!cs->num_chunks) {
100 return 0; 121 return 0;
@@ -104,6 +125,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
104 p->idx = 0; 125 p->idx = 0;
105 p->chunk_ib_idx = -1; 126 p->chunk_ib_idx = -1;
106 p->chunk_relocs_idx = -1; 127 p->chunk_relocs_idx = -1;
128 p->chunk_flags_idx = -1;
107 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); 129 p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
108 if (p->chunks_array == NULL) { 130 if (p->chunks_array == NULL) {
109 return -ENOMEM; 131 return -ENOMEM;
@@ -113,6 +135,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
113 sizeof(uint64_t)*cs->num_chunks)) { 135 sizeof(uint64_t)*cs->num_chunks)) {
114 return -EFAULT; 136 return -EFAULT;
115 } 137 }
138 p->cs_flags = 0;
116 p->nchunks = cs->num_chunks; 139 p->nchunks = cs->num_chunks;
117 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL); 140 p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
118 if (p->chunks == NULL) { 141 if (p->chunks == NULL) {
@@ -141,16 +164,19 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
141 if (p->chunks[i].length_dw == 0) 164 if (p->chunks[i].length_dw == 0)
142 return -EINVAL; 165 return -EINVAL;
143 } 166 }
144 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS && 167 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
145 !p->chunks[i].length_dw) { 168 p->chunk_flags_idx = i;
146 return -EINVAL; 169 /* zero length flags aren't useful */
170 if (p->chunks[i].length_dw == 0)
171 return -EINVAL;
147 } 172 }
148 173
149 p->chunks[i].length_dw = user_chunk.length_dw; 174 p->chunks[i].length_dw = user_chunk.length_dw;
150 p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data; 175 p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;
151 176
152 cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; 177 cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
153 if (p->chunks[i].chunk_id != RADEON_CHUNK_ID_IB) { 178 if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) ||
179 (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) {
154 size = p->chunks[i].length_dw * sizeof(uint32_t); 180 size = p->chunks[i].length_dw * sizeof(uint32_t);
155 p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); 181 p->chunks[i].kdata = kmalloc(size, GFP_KERNEL);
156 if (p->chunks[i].kdata == NULL) { 182 if (p->chunks[i].kdata == NULL) {
@@ -161,29 +187,58 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
161 return -EFAULT; 187 return -EFAULT;
162 } 188 }
163 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { 189 if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
164 flags = p->chunks[i].kdata[0]; 190 p->cs_flags = p->chunks[i].kdata[0];
191 if (p->chunks[i].length_dw > 1)
192 ring = p->chunks[i].kdata[1];
193 if (p->chunks[i].length_dw > 2)
194 priority = (s32)p->chunks[i].kdata[2];
165 } 195 }
166 } else {
167 p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
168 p->chunks[i].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
169 if (p->chunks[i].kpage[0] == NULL || p->chunks[i].kpage[1] == NULL) {
170 kfree(p->chunks[i].kpage[0]);
171 kfree(p->chunks[i].kpage[1]);
172 return -ENOMEM;
173 }
174 p->chunks[i].kpage_idx[0] = -1;
175 p->chunks[i].kpage_idx[1] = -1;
176 p->chunks[i].last_copied_page = -1;
177 p->chunks[i].last_page_index = ((p->chunks[i].length_dw * 4) - 1) / PAGE_SIZE;
178 } 196 }
179 } 197 }
180 if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { 198
181 DRM_ERROR("cs IB too big: %d\n", 199 if ((p->cs_flags & RADEON_CS_USE_VM) &&
182 p->chunks[p->chunk_ib_idx].length_dw); 200 (p->rdev->family < CHIP_CAYMAN)) {
201 DRM_ERROR("VM not supported on asic!\n");
202 if (p->chunk_relocs_idx != -1)
203 kfree(p->chunks[p->chunk_relocs_idx].kdata);
204 if (p->chunk_flags_idx != -1)
205 kfree(p->chunks[p->chunk_flags_idx].kdata);
183 return -EINVAL; 206 return -EINVAL;
184 } 207 }
185 208
186 p->keep_tiling_flags = (flags & RADEON_CS_KEEP_TILING_FLAGS) != 0; 209 if (radeon_cs_get_ring(p, ring, priority)) {
210 if (p->chunk_relocs_idx != -1)
211 kfree(p->chunks[p->chunk_relocs_idx].kdata);
212 if (p->chunk_flags_idx != -1)
213 kfree(p->chunks[p->chunk_flags_idx].kdata);
214 return -EINVAL;
215 }
216
217
218 /* deal with non-vm */
219 if ((p->chunk_ib_idx != -1) &&
220 ((p->cs_flags & RADEON_CS_USE_VM) == 0) &&
221 (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) {
222 if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) {
223 DRM_ERROR("cs IB too big: %d\n",
224 p->chunks[p->chunk_ib_idx].length_dw);
225 return -EINVAL;
226 }
227 p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
228 p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
229 if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
230 p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
231 kfree(p->chunks[p->chunk_ib_idx].kpage[0]);
232 kfree(p->chunks[p->chunk_ib_idx].kpage[1]);
233 return -ENOMEM;
234 }
235 p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
236 p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
237 p->chunks[p->chunk_ib_idx].last_copied_page = -1;
238 p->chunks[p->chunk_ib_idx].last_page_index =
239 ((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE;
240 }
241
187 return 0; 242 return 0;
188} 243}
189 244
@@ -225,11 +280,131 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
225 radeon_ib_free(parser->rdev, &parser->ib); 280 radeon_ib_free(parser->rdev, &parser->ib);
226} 281}
227 282
283static int radeon_cs_ib_chunk(struct radeon_device *rdev,
284 struct radeon_cs_parser *parser)
285{
286 struct radeon_cs_chunk *ib_chunk;
287 int r;
288
289 if (parser->chunk_ib_idx == -1)
290 return 0;
291
292 if (parser->cs_flags & RADEON_CS_USE_VM)
293 return 0;
294
295 ib_chunk = &parser->chunks[parser->chunk_ib_idx];
296 /* Copy the packet into the IB, the parser will read from the
297 * input memory (cached) and write to the IB (which can be
298 * uncached).
299 */
300 r = radeon_ib_get(rdev, parser->ring, &parser->ib,
301 ib_chunk->length_dw * 4);
302 if (r) {
303 DRM_ERROR("Failed to get ib !\n");
304 return r;
305 }
306 parser->ib->length_dw = ib_chunk->length_dw;
307 r = radeon_cs_parse(parser);
308 if (r || parser->parser_error) {
309 DRM_ERROR("Invalid command stream !\n");
310 return r;
311 }
312 r = radeon_cs_finish_pages(parser);
313 if (r) {
314 DRM_ERROR("Invalid command stream !\n");
315 return r;
316 }
317 parser->ib->vm_id = 0;
318 r = radeon_ib_schedule(rdev, parser->ib);
319 if (r) {
320 DRM_ERROR("Failed to schedule IB !\n");
321 }
322 return 0;
323}
324
325static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
326 struct radeon_vm *vm)
327{
328 struct radeon_bo_list *lobj;
329 struct radeon_bo *bo;
330 int r;
331
332 list_for_each_entry(lobj, &parser->validated, tv.head) {
333 bo = lobj->bo;
334 r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem);
335 if (r) {
336 return r;
337 }
338 }
339 return 0;
340}
341
342static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
343 struct radeon_cs_parser *parser)
344{
345 struct radeon_cs_chunk *ib_chunk;
346 struct radeon_fpriv *fpriv = parser->filp->driver_priv;
347 struct radeon_vm *vm = &fpriv->vm;
348 int r;
349
350 if (parser->chunk_ib_idx == -1)
351 return 0;
352
353 if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
354 return 0;
355
356 ib_chunk = &parser->chunks[parser->chunk_ib_idx];
357 if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
358 DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
359 return -EINVAL;
360 }
361 r = radeon_ib_get(rdev, parser->ring, &parser->ib,
362 ib_chunk->length_dw * 4);
363 if (r) {
364 DRM_ERROR("Failed to get ib !\n");
365 return r;
366 }
367 parser->ib->length_dw = ib_chunk->length_dw;
368 /* Copy the packet into the IB */
369 if (DRM_COPY_FROM_USER(parser->ib->ptr, ib_chunk->user_ptr,
370 ib_chunk->length_dw * 4)) {
371 return -EFAULT;
372 }
373 r = radeon_ring_ib_parse(rdev, parser->ring, parser->ib);
374 if (r) {
375 return r;
376 }
377
378 mutex_lock(&vm->mutex);
379 r = radeon_vm_bind(rdev, vm);
380 if (r) {
381 goto out;
382 }
383 r = radeon_bo_vm_update_pte(parser, vm);
384 if (r) {
385 goto out;
386 }
387 parser->ib->vm_id = vm->id;
388 /* ib pool is bind at 0 in virtual address space to gpu_addr is the
389 * offset inside the pool bo
390 */
391 parser->ib->gpu_addr = parser->ib->sa_bo.offset;
392 r = radeon_ib_schedule(rdev, parser->ib);
393out:
394 if (!r) {
395 if (vm->fence) {
396 radeon_fence_unref(&vm->fence);
397 }
398 vm->fence = radeon_fence_ref(parser->ib->fence);
399 }
400 mutex_unlock(&fpriv->vm.mutex);
401 return r;
402}
403
228int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 404int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
229{ 405{
230 struct radeon_device *rdev = dev->dev_private; 406 struct radeon_device *rdev = dev->dev_private;
231 struct radeon_cs_parser parser; 407 struct radeon_cs_parser parser;
232 struct radeon_cs_chunk *ib_chunk;
233 int r; 408 int r;
234 409
235 radeon_mutex_lock(&rdev->cs_mutex); 410 radeon_mutex_lock(&rdev->cs_mutex);
@@ -246,15 +421,6 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
246 radeon_mutex_unlock(&rdev->cs_mutex); 421 radeon_mutex_unlock(&rdev->cs_mutex);
247 return r; 422 return r;
248 } 423 }
249 ib_chunk = &parser.chunks[parser.chunk_ib_idx];
250 r = radeon_ib_get(rdev, RADEON_RING_TYPE_GFX_INDEX, &parser.ib,
251 ib_chunk->length_dw * 4);
252 if (r) {
253 DRM_ERROR("Failed to get ib !\n");
254 radeon_cs_parser_fini(&parser, r);
255 radeon_mutex_unlock(&rdev->cs_mutex);
256 return r;
257 }
258 r = radeon_cs_parser_relocs(&parser); 424 r = radeon_cs_parser_relocs(&parser);
259 if (r) { 425 if (r) {
260 if (r != -ERESTARTSYS) 426 if (r != -ERESTARTSYS)
@@ -263,28 +429,15 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
263 radeon_mutex_unlock(&rdev->cs_mutex); 429 radeon_mutex_unlock(&rdev->cs_mutex);
264 return r; 430 return r;
265 } 431 }
266 /* Copy the packet into the IB, the parser will read from the 432 r = radeon_cs_ib_chunk(rdev, &parser);
267 * input memory (cached) and write to the IB (which can be
268 * uncached). */
269 parser.ib->length_dw = ib_chunk->length_dw;
270 r = radeon_cs_parse(&parser);
271 if (r || parser.parser_error) {
272 DRM_ERROR("Invalid command stream !\n");
273 radeon_cs_parser_fini(&parser, r);
274 radeon_mutex_unlock(&rdev->cs_mutex);
275 return r;
276 }
277 r = radeon_cs_finish_pages(&parser);
278 if (r) { 433 if (r) {
279 DRM_ERROR("Invalid command stream !\n"); 434 goto out;
280 radeon_cs_parser_fini(&parser, r);
281 radeon_mutex_unlock(&rdev->cs_mutex);
282 return r;
283 } 435 }
284 r = radeon_ib_schedule(rdev, parser.ib); 436 r = radeon_cs_ib_vm_chunk(rdev, &parser);
285 if (r) { 437 if (r) {
286 DRM_ERROR("Failed to schedule IB !\n"); 438 goto out;
287 } 439 }
440out:
288 radeon_cs_parser_fini(&parser, r); 441 radeon_cs_parser_fini(&parser, r);
289 radeon_mutex_unlock(&rdev->cs_mutex); 442 radeon_mutex_unlock(&rdev->cs_mutex);
290 return r; 443 return r;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 79b08b487298..0afb13bd8dca 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -735,6 +735,10 @@ int radeon_device_init(struct radeon_device *rdev,
735 init_waitqueue_head(&rdev->irq.vblank_queue); 735 init_waitqueue_head(&rdev->irq.vblank_queue);
736 init_waitqueue_head(&rdev->irq.idle_queue); 736 init_waitqueue_head(&rdev->irq.idle_queue);
737 INIT_LIST_HEAD(&rdev->semaphore_drv.bo); 737 INIT_LIST_HEAD(&rdev->semaphore_drv.bo);
738 /* initialize vm here */
739 rdev->vm_manager.use_bitmap = 1;
740 rdev->vm_manager.max_pfn = 1 << 20;
741 INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
738 742
739 /* Set asic functions */ 743 /* Set asic functions */
740 r = radeon_asic_init(rdev); 744 r = radeon_asic_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index c3ef1d266f88..31da622eef63 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -54,9 +54,10 @@
54 * 2.10.0 - fusion 2D tiling 54 * 2.10.0 - fusion 2D tiling
55 * 2.11.0 - backend map, initial compute support for the CS checker 55 * 2.11.0 - backend map, initial compute support for the CS checker
56 * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS 56 * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS
57 * 2.13.0 - virtual memory support
57 */ 58 */
58#define KMS_DRIVER_MAJOR 2 59#define KMS_DRIVER_MAJOR 2
59#define KMS_DRIVER_MINOR 12 60#define KMS_DRIVER_MINOR 13
60#define KMS_DRIVER_PATCHLEVEL 0 61#define KMS_DRIVER_PATCHLEVEL 0
61int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); 62int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
62int radeon_driver_unload_kms(struct drm_device *dev); 63int radeon_driver_unload_kms(struct drm_device *dev);
@@ -84,6 +85,10 @@ int radeon_dma_ioctl_kms(struct drm_device *dev, void *data,
84 struct drm_file *file_priv); 85 struct drm_file *file_priv);
85int radeon_gem_object_init(struct drm_gem_object *obj); 86int radeon_gem_object_init(struct drm_gem_object *obj);
86void radeon_gem_object_free(struct drm_gem_object *obj); 87void radeon_gem_object_free(struct drm_gem_object *obj);
88int radeon_gem_object_open(struct drm_gem_object *obj,
89 struct drm_file *file_priv);
90void radeon_gem_object_close(struct drm_gem_object *obj,
91 struct drm_file *file_priv);
87extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, 92extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
88 int *vpos, int *hpos); 93 int *vpos, int *hpos);
89extern struct drm_ioctl_desc radeon_ioctls_kms[]; 94extern struct drm_ioctl_desc radeon_ioctls_kms[];
@@ -350,6 +355,8 @@ static struct drm_driver kms_driver = {
350 .ioctls = radeon_ioctls_kms, 355 .ioctls = radeon_ioctls_kms,
351 .gem_init_object = radeon_gem_object_init, 356 .gem_init_object = radeon_gem_object_init,
352 .gem_free_object = radeon_gem_object_free, 357 .gem_free_object = radeon_gem_object_free,
358 .gem_open_object = radeon_gem_object_open,
359 .gem_close_object = radeon_gem_object_close,
353 .dma_ioctl = radeon_dma_ioctl_kms, 360 .dma_ioctl = radeon_dma_ioctl_kms,
354 .dumb_create = radeon_mode_dumb_create, 361 .dumb_create = radeon_mode_dumb_create,
355 .dumb_map_offset = radeon_mode_dumb_mmap, 362 .dumb_map_offset = radeon_mode_dumb_mmap,
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index a4d981608580..3ef58cab18c9 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -276,3 +276,391 @@ void radeon_gart_fini(struct radeon_device *rdev)
276 276
277 radeon_dummy_page_fini(rdev); 277 radeon_dummy_page_fini(rdev);
278} 278}
279
280/*
281 * vm helpers
282 *
283 * TODO bind a default page at vm initialization for default address
284 */
285int radeon_vm_manager_init(struct radeon_device *rdev)
286{
287 int r;
288
289 /* mark first vm as always in use, it's the system one */
290 r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
291 rdev->vm_manager.max_pfn * 8,
292 RADEON_GEM_DOMAIN_VRAM);
293 if (r) {
294 dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
295 (rdev->vm_manager.max_pfn * 8) >> 10);
296 return r;
297 }
298 return rdev->vm_manager.funcs->init(rdev);
299}
300
301/* cs mutex must be lock */
302static void radeon_vm_unbind_locked(struct radeon_device *rdev,
303 struct radeon_vm *vm)
304{
305 struct radeon_bo_va *bo_va;
306
307 if (vm->id == -1) {
308 return;
309 }
310
311 /* wait for vm use to end */
312 if (vm->fence) {
313 radeon_fence_wait(vm->fence, false);
314 radeon_fence_unref(&vm->fence);
315 }
316
317 /* hw unbind */
318 rdev->vm_manager.funcs->unbind(rdev, vm);
319 rdev->vm_manager.use_bitmap &= ~(1 << vm->id);
320 list_del_init(&vm->list);
321 vm->id = -1;
322 radeon_sa_bo_free(rdev, &vm->sa_bo);
323 vm->pt = NULL;
324
325 list_for_each_entry(bo_va, &vm->va, vm_list) {
326 bo_va->valid = false;
327 }
328}
329
330void radeon_vm_manager_fini(struct radeon_device *rdev)
331{
332 if (rdev->vm_manager.sa_manager.bo == NULL)
333 return;
334 radeon_vm_manager_suspend(rdev);
335 rdev->vm_manager.funcs->fini(rdev);
336 radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
337}
338
339int radeon_vm_manager_start(struct radeon_device *rdev)
340{
341 if (rdev->vm_manager.sa_manager.bo == NULL) {
342 return -EINVAL;
343 }
344 return radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
345}
346
347int radeon_vm_manager_suspend(struct radeon_device *rdev)
348{
349 struct radeon_vm *vm, *tmp;
350
351 radeon_mutex_lock(&rdev->cs_mutex);
352 /* unbind all active vm */
353 list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
354 radeon_vm_unbind_locked(rdev, vm);
355 }
356 rdev->vm_manager.funcs->fini(rdev);
357 radeon_mutex_unlock(&rdev->cs_mutex);
358 return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
359}
360
361/* cs mutex must be lock */
362void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
363{
364 mutex_lock(&vm->mutex);
365 radeon_vm_unbind_locked(rdev, vm);
366 mutex_unlock(&vm->mutex);
367}
368
369/* cs mutex must be lock & vm mutex must be lock */
370int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
371{
372 struct radeon_vm *vm_evict;
373 unsigned i;
374 int id = -1, r;
375
376 if (vm == NULL) {
377 return -EINVAL;
378 }
379
380 if (vm->id != -1) {
381 /* update lru */
382 list_del_init(&vm->list);
383 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
384 return 0;
385 }
386
387retry:
388 r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo,
389 RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8),
390 RADEON_GPU_PAGE_SIZE);
391 if (r) {
392 if (list_empty(&rdev->vm_manager.lru_vm)) {
393 return r;
394 }
395 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
396 radeon_vm_unbind(rdev, vm_evict);
397 goto retry;
398 }
399 vm->pt = rdev->vm_manager.sa_manager.cpu_ptr;
400 vm->pt += (vm->sa_bo.offset >> 3);
401 vm->pt_gpu_addr = rdev->vm_manager.sa_manager.gpu_addr;
402 vm->pt_gpu_addr += vm->sa_bo.offset;
403 memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));
404
405retry_id:
406 /* search for free vm */
407 for (i = 0; i < rdev->vm_manager.nvm; i++) {
408 if (!(rdev->vm_manager.use_bitmap & (1 << i))) {
409 id = i;
410 break;
411 }
412 }
413 /* evict vm if necessary */
414 if (id == -1) {
415 vm_evict = list_first_entry(&rdev->vm_manager.lru_vm, struct radeon_vm, list);
416 radeon_vm_unbind(rdev, vm_evict);
417 goto retry_id;
418 }
419
420 /* do hw bind */
421 r = rdev->vm_manager.funcs->bind(rdev, vm, id);
422 if (r) {
423 radeon_sa_bo_free(rdev, &vm->sa_bo);
424 return r;
425 }
426 rdev->vm_manager.use_bitmap |= 1 << id;
427 vm->id = id;
428 list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
429 return radeon_vm_bo_update_pte(rdev, vm, rdev->ib_pool.sa_manager.bo,
430 &rdev->ib_pool.sa_manager.bo->tbo.mem);
431}
432
433/* object have to be reserved */
434int radeon_vm_bo_add(struct radeon_device *rdev,
435 struct radeon_vm *vm,
436 struct radeon_bo *bo,
437 uint64_t offset,
438 uint32_t flags)
439{
440 struct radeon_bo_va *bo_va, *tmp;
441 struct list_head *head;
442 uint64_t size = radeon_bo_size(bo), last_offset = 0;
443 unsigned last_pfn;
444
445 bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
446 if (bo_va == NULL) {
447 return -ENOMEM;
448 }
449 bo_va->vm = vm;
450 bo_va->bo = bo;
451 bo_va->soffset = offset;
452 bo_va->eoffset = offset + size;
453 bo_va->flags = flags;
454 bo_va->valid = false;
455 INIT_LIST_HEAD(&bo_va->bo_list);
456 INIT_LIST_HEAD(&bo_va->vm_list);
457 /* make sure object fit at this offset */
458 if (bo_va->soffset >= bo_va->eoffset) {
459 kfree(bo_va);
460 return -EINVAL;
461 }
462
463 last_pfn = bo_va->eoffset / RADEON_GPU_PAGE_SIZE;
464 if (last_pfn > rdev->vm_manager.max_pfn) {
465 kfree(bo_va);
466 dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
467 last_pfn, rdev->vm_manager.max_pfn);
468 return -EINVAL;
469 }
470
471 mutex_lock(&vm->mutex);
472 if (last_pfn > vm->last_pfn) {
473 /* grow va space 32M by 32M */
474 unsigned align = ((32 << 20) >> 12) - 1;
475 radeon_mutex_lock(&rdev->cs_mutex);
476 radeon_vm_unbind_locked(rdev, vm);
477 radeon_mutex_unlock(&rdev->cs_mutex);
478 vm->last_pfn = (last_pfn + align) & ~align;
479 }
480 head = &vm->va;
481 last_offset = 0;
482 list_for_each_entry(tmp, &vm->va, vm_list) {
483 if (bo_va->soffset >= last_offset && bo_va->eoffset < tmp->soffset) {
484 /* bo can be added before this one */
485 break;
486 }
487 if (bo_va->soffset >= tmp->soffset && bo_va->soffset < tmp->eoffset) {
488 /* bo and tmp overlap, invalid offset */
489 kfree(bo_va);
490 dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
491 bo, (unsigned)bo_va->soffset, tmp->bo,
492 (unsigned)tmp->soffset, (unsigned)tmp->eoffset);
493 mutex_unlock(&vm->mutex);
494 return -EINVAL;
495 }
496 last_offset = tmp->eoffset;
497 head = &tmp->vm_list;
498 }
499 list_add(&bo_va->vm_list, head);
500 list_add_tail(&bo_va->bo_list, &bo->va);
501 mutex_unlock(&vm->mutex);
502 return 0;
503}
504
505static u64 radeon_vm_get_addr(struct radeon_device *rdev,
506 struct ttm_mem_reg *mem,
507 unsigned pfn)
508{
509 u64 addr = 0;
510
511 switch (mem->mem_type) {
512 case TTM_PL_VRAM:
513 addr = (mem->start << PAGE_SHIFT);
514 addr += pfn * RADEON_GPU_PAGE_SIZE;
515 addr += rdev->vm_manager.vram_base_offset;
516 break;
517 case TTM_PL_TT:
518 /* offset inside page table */
519 addr = mem->start << PAGE_SHIFT;
520 addr += pfn * RADEON_GPU_PAGE_SIZE;
521 addr = addr >> PAGE_SHIFT;
522 /* page table offset */
523 addr = rdev->gart.pages_addr[addr];
524 /* in case cpu page size != gpu page size*/
525 addr += (pfn * RADEON_GPU_PAGE_SIZE) & (~PAGE_MASK);
526 break;
527 default:
528 break;
529 }
530 return addr;
531}
532
533/* object have to be reserved & cs mutex took & vm mutex took */
534int radeon_vm_bo_update_pte(struct radeon_device *rdev,
535 struct radeon_vm *vm,
536 struct radeon_bo *bo,
537 struct ttm_mem_reg *mem)
538{
539 struct radeon_bo_va *bo_va;
540 unsigned ngpu_pages, i;
541 uint64_t addr = 0, pfn;
542 uint32_t flags;
543
544 /* nothing to do if vm isn't bound */
545 if (vm->id == -1)
546 return 0;;
547
548 bo_va = radeon_bo_va(bo, vm);
549 if (bo_va == NULL) {
550 dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
551 return -EINVAL;
552 }
553
554 if (bo_va->valid)
555 return 0;
556
557 ngpu_pages = radeon_bo_ngpu_pages(bo);
558 bo_va->flags &= ~RADEON_VM_PAGE_VALID;
559 bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
560 if (mem) {
561 if (mem->mem_type != TTM_PL_SYSTEM) {
562 bo_va->flags |= RADEON_VM_PAGE_VALID;
563 bo_va->valid = true;
564 }
565 if (mem->mem_type == TTM_PL_TT) {
566 bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
567 }
568 }
569 pfn = bo_va->soffset / RADEON_GPU_PAGE_SIZE;
570 flags = rdev->vm_manager.funcs->page_flags(rdev, bo_va->vm, bo_va->flags);
571 for (i = 0, addr = 0; i < ngpu_pages; i++) {
572 if (mem && bo_va->valid) {
573 addr = radeon_vm_get_addr(rdev, mem, i);
574 }
575 rdev->vm_manager.funcs->set_page(rdev, bo_va->vm, i + pfn, addr, flags);
576 }
577 rdev->vm_manager.funcs->tlb_flush(rdev, bo_va->vm);
578 return 0;
579}
580
581/* object have to be reserved */
582int radeon_vm_bo_rmv(struct radeon_device *rdev,
583 struct radeon_vm *vm,
584 struct radeon_bo *bo)
585{
586 struct radeon_bo_va *bo_va;
587
588 bo_va = radeon_bo_va(bo, vm);
589 if (bo_va == NULL)
590 return 0;
591
592 list_del(&bo_va->bo_list);
593 mutex_lock(&vm->mutex);
594 radeon_mutex_lock(&rdev->cs_mutex);
595 radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
596 radeon_mutex_unlock(&rdev->cs_mutex);
597 list_del(&bo_va->vm_list);
598 mutex_lock(&vm->mutex);
599
600 kfree(bo_va);
601 return 0;
602}
603
604void radeon_vm_bo_invalidate(struct radeon_device *rdev,
605 struct radeon_bo *bo)
606{
607 struct radeon_bo_va *bo_va;
608
609 BUG_ON(!atomic_read(&bo->tbo.reserved));
610 list_for_each_entry(bo_va, &bo->va, bo_list) {
611 bo_va->valid = false;
612 }
613}
614
615int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
616{
617 int r;
618
619 vm->id = -1;
620 vm->fence = NULL;
621 mutex_init(&vm->mutex);
622 INIT_LIST_HEAD(&vm->list);
623 INIT_LIST_HEAD(&vm->va);
624 vm->last_pfn = 0;
625 /* map the ib pool buffer at 0 in virtual address space, set
626 * read only
627 */
628 r = radeon_vm_bo_add(rdev, vm, rdev->ib_pool.sa_manager.bo, 0,
629 RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED);
630 return r;
631}
632
633void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
634{
635 struct radeon_bo_va *bo_va, *tmp;
636 int r;
637
638 mutex_lock(&vm->mutex);
639
640 radeon_mutex_lock(&rdev->cs_mutex);
641 radeon_vm_unbind_locked(rdev, vm);
642 radeon_mutex_unlock(&rdev->cs_mutex);
643
644 /* remove all bo */
645 r = radeon_bo_reserve(rdev->ib_pool.sa_manager.bo, false);
646 if (!r) {
647 bo_va = radeon_bo_va(rdev->ib_pool.sa_manager.bo, vm);
648 list_del_init(&bo_va->bo_list);
649 list_del_init(&bo_va->vm_list);
650 radeon_bo_unreserve(rdev->ib_pool.sa_manager.bo);
651 kfree(bo_va);
652 }
653 if (!list_empty(&vm->va)) {
654 dev_err(rdev->dev, "still active bo inside vm\n");
655 }
656 list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
657 list_del_init(&bo_va->vm_list);
658 r = radeon_bo_reserve(bo_va->bo, false);
659 if (!r) {
660 list_del_init(&bo_va->bo_list);
661 radeon_bo_unreserve(bo_va->bo);
662 kfree(bo_va);
663 }
664 }
665 mutex_unlock(&vm->mutex);
666}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ae321975283c..003eeec1b688 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -142,6 +142,44 @@ void radeon_gem_fini(struct radeon_device *rdev)
142 radeon_bo_force_delete(rdev); 142 radeon_bo_force_delete(rdev);
143} 143}
144 144
145/*
146 * Call from drm_gem_handle_create which appear in both new and open ioctl
147 * case.
148 */
149int radeon_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv)
150{
151 return 0;
152}
153
154void radeon_gem_object_close(struct drm_gem_object *obj,
155 struct drm_file *file_priv)
156{
157 struct radeon_bo *rbo = gem_to_radeon_bo(obj);
158 struct radeon_device *rdev = rbo->rdev;
159 struct radeon_fpriv *fpriv = file_priv->driver_priv;
160 struct radeon_vm *vm = &fpriv->vm;
161 struct radeon_bo_va *bo_va, *tmp;
162
163 if (rdev->family < CHIP_CAYMAN) {
164 return;
165 }
166
167 if (radeon_bo_reserve(rbo, false)) {
168 return;
169 }
170 list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
171 if (bo_va->vm == vm) {
172 /* remove from this vm address space */
173 mutex_lock(&vm->mutex);
174 list_del(&bo_va->vm_list);
175 mutex_unlock(&vm->mutex);
176 list_del(&bo_va->bo_list);
177 kfree(bo_va);
178 }
179 }
180 radeon_bo_unreserve(rbo);
181}
182
145 183
146/* 184/*
147 * GEM ioctls. 185 * GEM ioctls.
@@ -354,6 +392,104 @@ out:
354 return r; 392 return r;
355} 393}
356 394
395int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
396 struct drm_file *filp)
397{
398 struct drm_radeon_gem_va *args = data;
399 struct drm_gem_object *gobj;
400 struct radeon_device *rdev = dev->dev_private;
401 struct radeon_fpriv *fpriv = filp->driver_priv;
402 struct radeon_bo *rbo;
403 struct radeon_bo_va *bo_va;
404 u32 invalid_flags;
405 int r = 0;
406
407 /* !! DONT REMOVE !!
408 * We don't support vm_id yet, to be sure we don't have have broken
409 * userspace, reject anyone trying to use non 0 value thus moving
410 * forward we can use those fields without breaking existant userspace
411 */
412 if (args->vm_id) {
413 args->operation = RADEON_VA_RESULT_ERROR;
414 return -EINVAL;
415 }
416
417 if (args->offset < RADEON_VA_RESERVED_SIZE) {
418 dev_err(&dev->pdev->dev,
419 "offset 0x%lX is in reserved area 0x%X\n",
420 (unsigned long)args->offset,
421 RADEON_VA_RESERVED_SIZE);
422 args->operation = RADEON_VA_RESULT_ERROR;
423 return -EINVAL;
424 }
425
426 /* don't remove, we need to enforce userspace to set the snooped flag
427 * otherwise we will endup with broken userspace and we won't be able
428 * to enable this feature without adding new interface
429 */
430 invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM;
431 if ((args->flags & invalid_flags)) {
432 dev_err(&dev->pdev->dev, "invalid flags 0x%08X vs 0x%08X\n",
433 args->flags, invalid_flags);
434 args->operation = RADEON_VA_RESULT_ERROR;
435 return -EINVAL;
436 }
437 if (!(args->flags & RADEON_VM_PAGE_SNOOPED)) {
438 dev_err(&dev->pdev->dev, "only supported snooped mapping for now\n");
439 args->operation = RADEON_VA_RESULT_ERROR;
440 return -EINVAL;
441 }
442
443 switch (args->operation) {
444 case RADEON_VA_MAP:
445 case RADEON_VA_UNMAP:
446 break;
447 default:
448 dev_err(&dev->pdev->dev, "unsupported operation %d\n",
449 args->operation);
450 args->operation = RADEON_VA_RESULT_ERROR;
451 return -EINVAL;
452 }
453
454 gobj = drm_gem_object_lookup(dev, filp, args->handle);
455 if (gobj == NULL) {
456 args->operation = RADEON_VA_RESULT_ERROR;
457 return -ENOENT;
458 }
459 rbo = gem_to_radeon_bo(gobj);
460 r = radeon_bo_reserve(rbo, false);
461 if (r) {
462 args->operation = RADEON_VA_RESULT_ERROR;
463 drm_gem_object_unreference_unlocked(gobj);
464 return r;
465 }
466 switch (args->operation) {
467 case RADEON_VA_MAP:
468 bo_va = radeon_bo_va(rbo, &fpriv->vm);
469 if (bo_va) {
470 args->operation = RADEON_VA_RESULT_VA_EXIST;
471 args->offset = bo_va->soffset;
472 goto out;
473 }
474 r = radeon_vm_bo_add(rdev, &fpriv->vm, rbo,
475 args->offset, args->flags);
476 break;
477 case RADEON_VA_UNMAP:
478 r = radeon_vm_bo_rmv(rdev, &fpriv->vm, rbo);
479 break;
480 default:
481 break;
482 }
483 args->operation = RADEON_VA_RESULT_OK;
484 if (r) {
485 args->operation = RADEON_VA_RESULT_ERROR;
486 }
487out:
488 radeon_bo_unreserve(rbo);
489 drm_gem_object_unreference_unlocked(gobj);
490 return r;
491}
492
357int radeon_mode_dumb_create(struct drm_file *file_priv, 493int radeon_mode_dumb_create(struct drm_file *file_priv,
358 struct drm_device *dev, 494 struct drm_device *dev,
359 struct drm_mode_create_dumb *args) 495 struct drm_mode_create_dumb *args)
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index be2c1224e68a..d3352889a870 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -250,6 +250,18 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
250 return -EINVAL; 250 return -EINVAL;
251 } 251 }
252 break; 252 break;
253 case RADEON_INFO_VA_START:
254 /* this is where we report if vm is supported or not */
255 if (rdev->family < CHIP_CAYMAN)
256 return -EINVAL;
257 value = RADEON_VA_RESERVED_SIZE;
258 break;
259 case RADEON_INFO_IB_VM_MAX_SIZE:
260 /* this is where we report if vm is supported or not */
261 if (rdev->family < CHIP_CAYMAN)
262 return -EINVAL;
263 value = RADEON_IB_VM_MAX_SIZE;
264 break;
253 default: 265 default:
254 DRM_DEBUG_KMS("Invalid request %d\n", info->request); 266 DRM_DEBUG_KMS("Invalid request %d\n", info->request);
255 return -EINVAL; 267 return -EINVAL;
@@ -270,7 +282,6 @@ int radeon_driver_firstopen_kms(struct drm_device *dev)
270 return 0; 282 return 0;
271} 283}
272 284
273
274void radeon_driver_lastclose_kms(struct drm_device *dev) 285void radeon_driver_lastclose_kms(struct drm_device *dev)
275{ 286{
276 vga_switcheroo_process_delayed_switch(); 287 vga_switcheroo_process_delayed_switch();
@@ -278,12 +289,45 @@ void radeon_driver_lastclose_kms(struct drm_device *dev)
278 289
279int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) 290int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
280{ 291{
292 struct radeon_device *rdev = dev->dev_private;
293
294 file_priv->driver_priv = NULL;
295
296 /* new gpu have virtual address space support */
297 if (rdev->family >= CHIP_CAYMAN) {
298 struct radeon_fpriv *fpriv;
299 int r;
300
301 fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
302 if (unlikely(!fpriv)) {
303 return -ENOMEM;
304 }
305
306 r = radeon_vm_init(rdev, &fpriv->vm);
307 if (r) {
308 radeon_vm_fini(rdev, &fpriv->vm);
309 kfree(fpriv);
310 return r;
311 }
312
313 file_priv->driver_priv = fpriv;
314 }
281 return 0; 315 return 0;
282} 316}
283 317
284void radeon_driver_postclose_kms(struct drm_device *dev, 318void radeon_driver_postclose_kms(struct drm_device *dev,
285 struct drm_file *file_priv) 319 struct drm_file *file_priv)
286{ 320{
321 struct radeon_device *rdev = dev->dev_private;
322
323 /* new gpu have virtual address space support */
324 if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) {
325 struct radeon_fpriv *fpriv = file_priv->driver_priv;
326
327 radeon_vm_fini(rdev, &fpriv->vm);
328 kfree(fpriv);
329 file_priv->driver_priv = NULL;
330 }
287} 331}
288 332
289void radeon_driver_preclose_kms(struct drm_device *dev, 333void radeon_driver_preclose_kms(struct drm_device *dev,
@@ -451,5 +495,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
451 DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), 495 DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
452 DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED), 496 DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
453 DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED), 497 DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
498 DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED),
454}; 499};
455int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); 500int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 695b4800329a..d45df1763598 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -46,6 +46,20 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
46 * function are calling it. 46 * function are calling it.
47 */ 47 */
48 48
49void radeon_bo_clear_va(struct radeon_bo *bo)
50{
51 struct radeon_bo_va *bo_va, *tmp;
52
53 list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
54 /* remove from all vm address space */
55 mutex_lock(&bo_va->vm->mutex);
56 list_del(&bo_va->vm_list);
57 mutex_unlock(&bo_va->vm->mutex);
58 list_del(&bo_va->bo_list);
59 kfree(bo_va);
60 }
61}
62
49static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) 63static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
50{ 64{
51 struct radeon_bo *bo; 65 struct radeon_bo *bo;
@@ -55,6 +69,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
55 list_del_init(&bo->list); 69 list_del_init(&bo->list);
56 mutex_unlock(&bo->rdev->gem.mutex); 70 mutex_unlock(&bo->rdev->gem.mutex);
57 radeon_bo_clear_surface_reg(bo); 71 radeon_bo_clear_surface_reg(bo);
72 radeon_bo_clear_va(bo);
58 drm_gem_object_release(&bo->gem_base); 73 drm_gem_object_release(&bo->gem_base);
59 kfree(bo); 74 kfree(bo);
60} 75}
@@ -134,6 +149,7 @@ retry:
134 bo->gem_base.driver_private = NULL; 149 bo->gem_base.driver_private = NULL;
135 bo->surface_reg = -1; 150 bo->surface_reg = -1;
136 INIT_LIST_HEAD(&bo->list); 151 INIT_LIST_HEAD(&bo->list);
152 INIT_LIST_HEAD(&bo->va);
137 radeon_ttm_placement_from_domain(bo, domain); 153 radeon_ttm_placement_from_domain(bo, domain);
138 /* Kernel allocation are uninterruptible */ 154 /* Kernel allocation are uninterruptible */
139 mutex_lock(&rdev->vram_mutex); 155 mutex_lock(&rdev->vram_mutex);
@@ -487,6 +503,7 @@ void radeon_bo_move_notify(struct ttm_buffer_object *bo,
487 return; 503 return;
488 rbo = container_of(bo, struct radeon_bo, tbo); 504 rbo = container_of(bo, struct radeon_bo, tbo);
489 radeon_bo_check_tiling(rbo, 0, 1); 505 radeon_bo_check_tiling(rbo, 0, 1);
506 radeon_vm_bo_invalidate(rbo->rdev, rbo);
490} 507}
491 508
492int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo) 509int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
@@ -560,3 +577,16 @@ int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
560 } 577 }
561 return 0; 578 return 0;
562} 579}
580
581/* object have to be reserved */
582struct radeon_bo_va *radeon_bo_va(struct radeon_bo *rbo, struct radeon_vm *vm)
583{
584 struct radeon_bo_va *bo_va;
585
586 list_for_each_entry(bo_va, &rbo->va, bo_list) {
587 if (bo_va->vm == vm) {
588 return bo_va;
589 }
590 }
591 return NULL;
592}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index cc236fb128ae..cde430308870 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -83,6 +83,16 @@ static inline bool radeon_bo_is_reserved(struct radeon_bo *bo)
83 return !!atomic_read(&bo->tbo.reserved); 83 return !!atomic_read(&bo->tbo.reserved);
84} 84}
85 85
86static inline unsigned radeon_bo_ngpu_pages(struct radeon_bo *bo)
87{
88 return (bo->tbo.num_pages << PAGE_SHIFT) / RADEON_GPU_PAGE_SIZE;
89}
90
91static inline unsigned radeon_bo_gpu_page_alignment(struct radeon_bo *bo)
92{
93 return (bo->tbo.mem.page_alignment << PAGE_SHIFT) / RADEON_GPU_PAGE_SIZE;
94}
95
86/** 96/**
87 * radeon_bo_mmap_offset - return mmap offset of bo 97 * radeon_bo_mmap_offset - return mmap offset of bo
88 * @bo: radeon object for which we query the offset 98 * @bo: radeon object for which we query the offset
@@ -128,6 +138,8 @@ extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
128 struct ttm_mem_reg *mem); 138 struct ttm_mem_reg *mem);
129extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo); 139extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
130extern int radeon_bo_get_surface_reg(struct radeon_bo *bo); 140extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
141extern struct radeon_bo_va *radeon_bo_va(struct radeon_bo *rbo,
142 struct radeon_vm *vm);
131 143
132/* 144/*
133 * sub allocation 145 * sub allocation
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 465fb34c197b..e8bc70933d1b 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -132,6 +132,7 @@ retry:
132 (*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr; 132 (*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
133 (*ib)->gpu_addr += (*ib)->sa_bo.offset; 133 (*ib)->gpu_addr += (*ib)->sa_bo.offset;
134 (*ib)->fence = fence; 134 (*ib)->fence = fence;
135 (*ib)->vm_id = 0;
135 /* ib are most likely to be allocated in a ring fashion 136 /* ib are most likely to be allocated in a ring fashion
136 * thus rdev->ib_pool.head_id should be the id of the 137 * thus rdev->ib_pool.head_id should be the id of the
137 * oldest ib 138 * oldest ib
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index be94be6d6f17..d7079f42624b 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -509,6 +509,7 @@ typedef struct {
509#define DRM_RADEON_GEM_SET_TILING 0x28 509#define DRM_RADEON_GEM_SET_TILING 0x28
510#define DRM_RADEON_GEM_GET_TILING 0x29 510#define DRM_RADEON_GEM_GET_TILING 0x29
511#define DRM_RADEON_GEM_BUSY 0x2a 511#define DRM_RADEON_GEM_BUSY 0x2a
512#define DRM_RADEON_GEM_VA 0x2b
512 513
513#define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) 514#define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
514#define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) 515#define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -550,6 +551,7 @@ typedef struct {
550#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) 551#define DRM_IOCTL_RADEON_GEM_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
551#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) 552#define DRM_IOCTL_RADEON_GEM_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
552#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) 553#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
554#define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va)
553 555
554typedef struct drm_radeon_init { 556typedef struct drm_radeon_init {
555 enum { 557 enum {
@@ -872,12 +874,42 @@ struct drm_radeon_gem_pwrite {
872 uint64_t data_ptr; 874 uint64_t data_ptr;
873}; 875};
874 876
877#define RADEON_VA_MAP 1
878#define RADEON_VA_UNMAP 2
879
880#define RADEON_VA_RESULT_OK 0
881#define RADEON_VA_RESULT_ERROR 1
882#define RADEON_VA_RESULT_VA_EXIST 2
883
884#define RADEON_VM_PAGE_VALID (1 << 0)
885#define RADEON_VM_PAGE_READABLE (1 << 1)
886#define RADEON_VM_PAGE_WRITEABLE (1 << 2)
887#define RADEON_VM_PAGE_SYSTEM (1 << 3)
888#define RADEON_VM_PAGE_SNOOPED (1 << 4)
889
890struct drm_radeon_gem_va {
891 uint32_t handle;
892 uint32_t operation;
893 uint32_t vm_id;
894 uint32_t flags;
895 uint64_t offset;
896};
897
875#define RADEON_CHUNK_ID_RELOCS 0x01 898#define RADEON_CHUNK_ID_RELOCS 0x01
876#define RADEON_CHUNK_ID_IB 0x02 899#define RADEON_CHUNK_ID_IB 0x02
877#define RADEON_CHUNK_ID_FLAGS 0x03 900#define RADEON_CHUNK_ID_FLAGS 0x03
878 901
879/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ 902/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
880#define RADEON_CS_KEEP_TILING_FLAGS 0x01 903#define RADEON_CS_KEEP_TILING_FLAGS 0x01
904#define RADEON_CS_USE_VM 0x02
905/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
906#define RADEON_CS_RING_GFX 0
907#define RADEON_CS_RING_COMPUTE 1
908/* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
909/* 0 = normal, + = higher priority, - = lower priority */
910struct drm_radeon_cs_ring_priority {
911 int32_t priority;
912};
881 913
882struct drm_radeon_cs_chunk { 914struct drm_radeon_cs_chunk {
883 uint32_t chunk_id; 915 uint32_t chunk_id;
@@ -916,6 +948,10 @@ struct drm_radeon_cs {
916#define RADEON_INFO_NUM_TILE_PIPES 0x0b /* tile pipes for r600+ */ 948#define RADEON_INFO_NUM_TILE_PIPES 0x0b /* tile pipes for r600+ */
917#define RADEON_INFO_FUSION_GART_WORKING 0x0c /* fusion writes to GTT were broken before this */ 949#define RADEON_INFO_FUSION_GART_WORKING 0x0c /* fusion writes to GTT were broken before this */
918#define RADEON_INFO_BACKEND_MAP 0x0d /* pipe to backend map, needed by mesa */ 950#define RADEON_INFO_BACKEND_MAP 0x0d /* pipe to backend map, needed by mesa */
951/* virtual address start, va < start are reserved by the kernel */
952#define RADEON_INFO_VA_START 0x0e
953/* maximum size of ib using the virtual memory cs */
954#define RADEON_INFO_IB_VM_MAX_SIZE 0x0f
919 955
920struct drm_radeon_info { 956struct drm_radeon_info {
921 uint32_t request; 957 uint32_t request;