diff options
author | marcheu@polious <marcheu@polious> | 2010-04-11 14:25:53 +0000 |
---|---|---|
committer | marcheu@polious <marcheu@polious> | 2010-04-11 14:25:53 +0000 |
commit | 6b01699963d0d4c1a2e37af81e962b925049e154 (patch) | |
tree | 5ecc9a5bab7f213c7fac7376c2fcad2aa2cae221 |
Initial import
-rw-r--r-- | linuxgraphicsdrivers.lyx | 6572 | ||||
-rw-r--r-- | myfncychap.sty | 683 |
2 files changed, 7255 insertions, 0 deletions
diff --git a/linuxgraphicsdrivers.lyx b/linuxgraphicsdrivers.lyx new file mode 100644 index 0000000..03bbdb4 --- /dev/null +++ b/linuxgraphicsdrivers.lyx @@ -0,0 +1,6572 @@ +#LyX 1.6.0 created this file. For more info see http://www.lyx.org/ +\lyxformat 345 +\begin_document +\begin_header +\textclass book +\begin_preamble +\usepackage[Lenny]{myfncychap} +\usepackage{listings} +\usepackage{color} +\usepackage{geometry} +\usepackage{tikz} +\usepackage{array} + +\usetikzlibrary{positioning,shadows,arrows,shapes,patterns} +\usepackage{verbatim} +\tikzset{ + mynode/.style={rectangle,rounded corners,draw=black, top color=white, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text centered}, + myarrow/.style={->, >=latex', shorten >=1pt, thick}, + mylabel/.style={text width=7em, text centered} +} + +\renewcommand{\chaptermark}[1]{\markboth{\thechapter.\ #1}{}} +\renewcommand{\sectionmark}[1]{\markright{\thesection.\ #1}} +\fancyhead{} +\fancyhead[LE]{\bfseries\leftmark} +\fancyhead[RO]{\bfseries\rightmark} +\fancyfoot{} +\fancyfoot[LE,RO]{\bfseries\thepage} + +\fancypagestyle{plain}{ +\renewcommand{\headrulewidth}{0pt} +\fancyhead{} +\fancyhead[LE]{} +\fancyhead[RO]{} +\fancyfoot{} +\fancyfoot[LE,RO]{\bfseries\thepage} +} + +\fancypagestyle{Contents}{ +\fancyfoot{} +\fancyfoot[LE,RO]{\bfseries\thepage} } + + +\def\contentsname{Table of Contents} + + +\definecolor{listinggray}{gray}{0.95} +\lstset{basicstyle=\small,keywordstyle=,tabsize=3,escapechar=`,extendedchars=true} +\lstset{backgroundcolor=\color{listinggray},rulecolor=\color{black}} +\lstset{commentstyle=\textit, stringstyle=\upshape,showspaces=false} +\lstset{showstringspaces=false} +\lstset{frame=single} +\lstset{breaklines=true} +\lstset{language=C} +\lstset{basicstyle=\footnotesize} +\lstset{columns=flexible} +\end_preamble +\use_default_options true +\begin_modules +theorems-ams +\end_modules +\language english +\inputencoding auto +\font_roman palatino +\font_sans default +\font_typewriter default +\font_default_family default +\font_sc false +\font_osf false +\font_sf_scale 100 +\font_tt_scale 100 + +\graphics default +\paperfontsize 10 +\spacing single +\use_hyperref true +\pdf_title "Linux Graphics Drivers: an Introduction" +\pdf_author "Stéphane Marchesin" +\pdf_bookmarks true +\pdf_bookmarksnumbered false +\pdf_bookmarksopen false +\pdf_bookmarksopenlevel 1 +\pdf_breaklinks false +\pdf_pdfborder true +\pdf_colorlinks true +\pdf_backref false +\pdf_pdfusetitle true +\pdf_quoted_options "linkcolor=cyan" +\papersize b5paper +\use_geometry true +\use_amsmath 1 +\use_esint 1 +\cite_engine basic +\use_bibtopic false +\paperorientation portrait +\leftmargin 2.5cm +\topmargin 2.5cm +\rightmargin 1.7cm +\bottommargin 2.5cm +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation skip +\defskip medskip +\quotes_language english +\papercolumns 1 +\papersides 2 +\paperpagestyle fancy +\bullet 0 0 17 -1 +\tracking_changes false +\output_changes false +\author "" +\author "" +\end_header + +\begin_body + +\begin_layout Title +Linux Graphics Drivers: an Introduction +\begin_inset Newline newline +\end_inset + + +\size small +Version 2 +\end_layout + +\begin_layout Author +Stéphane Marchesin +\begin_inset Newline newline +\end_inset + +<stephane.marchesin@gmail.com> +\end_layout + +\begin_layout Standard +\begin_inset CommandInset toc +LatexCommand tableofcontents + +\end_inset + + +\end_layout + +\begin_layout Chapter +Introduction +\begin_inset CommandInset label +LatexCommand label +name "cha:Introduction" + +\end_inset + + +\end_layout + +\begin_layout Standard + +\lang french +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +markboth{ }{ Introduction } +\end_layout + +\end_inset + + +\lang english +Accelerating graphics is a complex art which suffers a mostly unjustified + reputation of being voodoo magic. + This book is intended as an introduction to the inner workings and development + of graphics drivers under Linux. + Throughout this whole book, knowledge of C programming is expected, along + with some familiarity with graphics processors. + Although its primary audience is the graphics driver developer, this book + details the internals of the full Linux graphics stack and therefore can + also be useful to application developers seeking to enhance their vision + of the Linux graphics world: one can hope to improve the performance of + its applications through better understanding the Linux graphics stack. + In this day and age of pervasive 3D graphics and GPU computing, a better + comprehension of graphics is a must have! +\end_layout + +\begin_layout Section +Book overview +\end_layout + +\begin_layout Standard +The book starts with an introduction of relevant hardware concepts (Chapter + +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:A-Look-at" + +\end_inset + +). + Only concepts directly relevant to the graphics driver business are presented + there. + Then we paint a high-level view of the Linux graphics stack in Chapter + +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:The-Big-Picture" + +\end_inset + + and its evolution over the years. + Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Framebuffer-Drivers" + +\end_inset + + introduces framebuffer drivers, a basic form of graphics drivers under + Linux that, although primitive, sees wide usage in the embedded space. + Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:The-DRM-Kernel" + +\end_inset + + introduces the DRM, a kernel module which is in charge of arbitrating all + graphics activity going on in a Linux system. + The next chapter (Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:X.Org-Drivers" + +\end_inset + +) focuses on X.Org drivers and the existing acceleration APIs available to + the developper. + Video decoding sees its own dedicated part in Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Video-Decoding" + +\end_inset + +. + We then move on to 3D acceleration with Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:OpenGL" + +\end_inset + + where we introduce the basic concepts of OpenGL. + Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Mesa" + +\end_inset + + and +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Gallium-3D" + +\end_inset + + are dedicated to Mesa and Gallium 3D, the two foundations of 3D graphics + acceleration under Linux used as the framework for 3D drivers. + Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:GPU-Computing" + +\end_inset + + tackles an emerging field, GPU computing. + Next, we discuss suspend and resume in Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Suspend-and-Resume" + +\end_inset + +. + We then discuss two side issues with Linux graphics drivers: technical + specifications in Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Technical-Specifications" + +\end_inset + + and what you should do aside pure development in Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Beyond-Development" + +\end_inset + +. + Finally, we conclude in Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Conclusions" + +\end_inset + +. +\end_layout + +\begin_layout Standard +Each chapter finishes with the +\begin_inset Quotes eld +\end_inset + +takeaways +\begin_inset Quotes erd +\end_inset + +, a number of relevant points that we made during said chapter. +\end_layout + +\begin_layout Section +What this book does not cover +\end_layout + +\begin_layout Standard +Computer graphics move at a fast pace, and this book is not about the past. + Obsolete hardware (isa, vlb, ...), old standards (the vga standard and its + dreadful int10, vesa), outdated techniques (user space modesetting) and + old X11 servers (Xsun, XFree86, KDrive...) will not be detailed. +\end_layout + +\begin_layout Chapter +A Look at the Hardware +\begin_inset CommandInset label +LatexCommand label +name "cha:A-Look-at" + +\end_inset + + +\end_layout + +\begin_layout Standard +Before diving any further into the subject of graphics drivers, we need + to understand the hardware which is at play. + This chapter is by no means intended to be a complete description of all + inner workings of your average computer and its graphics hardware, but + only as an introduction thereof. + The goal of this section is to +\begin_inset Quotes eld +\end_inset + +cover the bases +\begin_inset Quotes erd +\end_inset + + on what will be required later on. + Notably, most hardware concepts that will subsequently be required are + introduced here. + Although we sometimes have to go through architecture-specific hoops, we + try to stay as generic as possible and the concepts detailed thereafter + generalize well. +\end_layout + +\begin_layout Section +Hardware Overview +\end_layout + +\begin_layout Standard +Today all computers are architectured the same way: a central processor + and a number of peripherals. + In order to exchange data, these peripherals are interconnected by a bus + over which all communications go. + Figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Peripheral-interconnection-in" + +\end_inset + + outlines the layout of peripherals in a standard computer. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, myarrowtwoside/.style={<->, >=latex', shorten >=1pt, thick}, + mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width=1.5cm] (CPU) {CPU +\backslash + +\backslash + }; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width=1.5cm, right=0.8cm of CPU] (memory) {System +\backslash + +\backslash + Memory}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width=1.5cm, right=0.8cm of memory] (GPU) {Graphics +\backslash + +\backslash + Card}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width=1.5cm, right=0.8cm of GPU] (network) {Network +\backslash + +\backslash + Card}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[right = 0.8cm of network] {$ +\backslash +cdots$}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 10cm, below=2cm of GPU] (bus) {Bus}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrowtwoside] (CPU.south) -> ++(0,-2) (bus); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrowtwoside] (GPU.south) -> ++(0,-2) (bus); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrowtwoside] (memory.south) -> ++(0,-2) (bus); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrowtwoside] (network.south) -> ++(0,-2) (bus); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node[mynode, below=2cm of GPU] (iommu) {IOMMU}; +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node[mynode, left=1cm of mmu] (mmupt) {MMU page table}; +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node[mynode, right=1cm of iommu] (iommupt) {IOMMU page table}; +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node[mynode, text width=5cm, below=2cm of mmu, xshift=1.5cm] (memory) {Memory}; + +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[myarrow] (CPU.south) -| (mmu.north); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[myarrow] (GPU.south) -| (iommu.north); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[myarrow] (mmu.south) -> ++(0,-2) (memory); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[myarrow] (iommu.south) -> ++(0,-2) (memory); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[myarrow] (mmu) -> (mmupt); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[myarrow] (iommu) -> (iommupt); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node at (4,-1.5) {GPU Address}; +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node at (-1.5,-1.5) {Virtual Address}; +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node at (-1.5,-4.5) {Physical Address}; +\end_layout + +\begin_layout Plain Layout + +% +\backslash +node at (4,-4.5) {Physical Address}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:Peripheral-interconnection-in" + +\end_inset + +Peripheral interconnection in a typical computer. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + +The first user of the bus is the CPU. + The CPU uses the bus to access system memory and other peripherals. + However, the CPU is not the only one able to write and read data to the + peripherals, the peripherals themselves also have the capability to exchange + information directly. + In particular, a peripheral which has the ability to read and write to + memory without the CPU intervention is said to be DMA (Direct Memory Access) + capable, and the memory transaction is called a DMA. + Today, all graphics cards feature this ability (named DMA bus mastering) + which consists in the card requesting and subsequently taking control of + the bus for a number of microseconds. + +\end_layout + +\begin_layout Standard +If a peripheral has the ability to achieve DMA to or from an uncontiguous + list of memory pages (which is very convenient when the data is not contiguous + in memory), it is said to have DMA scatter-gather capability (as it can + scatter data to different memory pages, or gather data from different pages). +\end_layout + +\begin_layout Standard +Notice that the DMA capability can be a downside in some cases. + For example on real time systems, this means the CPU is unable to access + the bus while a DMA transaction is in progress, and since DMA transactions + happen asynchronously this can lead to missing a real time scheduling deadline. + Therefore, while DMA has a lot of advantages from a performance viewpoint, + there are situations where it should be avoided. +\end_layout + +\begin_layout Section +Bus types +\end_layout + +\begin_layout Standard +Buses connect the machine peripherals together; each and every communication + between different peripherals goes over (at least) one bus. + In particular, a bus is the way most graphics card are connected to the + rest of the computer (one notable exception being the case of some embedded + systems, where the GPU is directly connected to the CPU). + As shown in Table +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Common-bus-types" + +\end_inset + +, there are many bus types suitable for graphics: PCI, AGP, PCI-X, PCI-express + to name a (relevant) few. + All the bus types we will detail are variants of the PCI bus type, however + some of them feature singular improvements over the original PCI design. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Tabular +<lyxtabular version="3" rows="8" columns="5"> +<features> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Bus type +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Bus width +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Frequency +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Bandwidth +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Capabilities +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +PCI +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +32 bits +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +33 Mhz +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +133 Mb/s (33 Mhz) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +AGP +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +32 bits +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +66 Mhz +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2100Mb/s (8x) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +SBA, FW, +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +GART +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +PCI-X +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +64 bits +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +33, 66, +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +533 Mb/s (66 Mhz) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +133 Mhz +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +PCI-Express (1.0) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Serial +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1.25 Ghz +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +4Gb/s (16 lanes) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +PCI-Express (3.0) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Serial +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +4 Ghz +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +16Gb/s (16 lanes) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +- +\end_layout + +\end_inset +</cell> +</row> +</lyxtabular> + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:Common-bus-types" + +\end_inset + +Common bus types. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subparagraph* +PCI (Peripheral Component Interconnect) +\end_layout + +\begin_layout Standard +PCI is the most basic bus allowing connecting graphics peripherals today. + One of its key feature is called bus mastering. + This feature allows a given peripheral to take hold of the bus for a given + number of cycles and do a complete transaction (called a DMA, Direct Memory + Access). + The PCI bus is coherent, which means that no explicit flushes are required + for the memory to be coherent across devices. +\end_layout + +\begin_layout Subparagraph* +AGP (Accelerated Graphics Port) +\end_layout + +\begin_layout Standard +AGP is essentially a modified PCI bus with a number of extra features compared + to its ancestor. + Most importantly, it is faster thanks to a higher clock speed and the ability + to send 2, 4 or 8 bits per lane on each clock tick (for AGP 2x, 4x and + 8x respectively). + AGP also three distinctive features: +\end_layout + +\begin_layout Itemize +The first feature is AGP GART (Graphics Aperture Remapping Table), a simple + form of IOMMU (as will be seen in section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Virtual-and-Physical" + +\end_inset + +). + It allows taking a (non contiguous) set of physical memory pages out of + system memory and exposing it to the GPU for its use as a contiguous area. + This increases the amount of memory usable by the GPU at little cost, and + creates an convenient area for sharing data between the CPU and the GPU + (AGP graphics cards can do fast DMA to/from this area, and since the GART + area is a chunk of system RAM, CPU access is a lot faster than VRAM). + One notable drawback is that the GART area is not coherent, and therefore + writes to GART (be it from the GPU or CPU) need to be flushed before transactio +ns from the other party can begin. + Another drawback is that only a single GART area is handled by the hardware, + and it has to be sub-allocated by the driver. +\end_layout + +\begin_layout Itemize +The second feature is AGP side band addressing (SBA). + Side band addressing consists in 8 extra bus bits used as an address bus. + Instead of multiplexing the bus bandwidth between adresses and data, the + nominal AGP bandwidth can be dedicated to data only. + This feature is transparent to the driver developer. +\end_layout + +\begin_layout Itemize +The third feature is AGP Fast Writes (FW). + Fast writes allow sending data to the graphics card directly, without having + the card initiate a DMA. + This feature is also transparent for the driver developer. +\end_layout + +\begin_layout Standard +Keep in mind that these last two features are known to be unstable on a + wide range of hardware, and oftentimes require chipset-specific hacks to + work properly. + Therefore it is advisable not to enable them. + In fact, they are an extremely frequent cause for strange hardware errors + on AGP cards. +\end_layout + +\begin_layout Subparagraph* +PCI-X +\end_layout + +\begin_layout Standard +PCI-X was developed as a faster PCI for server boards, and very few graphics + peripherals exist in this format. + It is not to be confused with PCI-Express, which sees real widespread usage. +\end_layout + +\begin_layout Subparagraph* +PCI-Express (PCI-E) +\end_layout + +\begin_layout Standard +PCI-Express is the new generation of PCI devices. + It has more advantages than a simple improved PCI. +\end_layout + +\begin_layout Standard +Finally, it is important to note that, depending on the architecture, the + CPU-GPU communication does not always relies on a bus. + This is especially common on embedded systems where the GPU and the CPU + are on a single die. + In that case the CPU can access the GPU registers directly. +\end_layout + +\begin_layout Section +Virtual and Physical Memory +\begin_inset CommandInset label +LatexCommand label +name "sec:Virtual-and-Physical" + +\end_inset + + +\end_layout + +\begin_layout Standard +The term +\begin_inset Quotes eld +\end_inset + +memory +\begin_inset Quotes erd +\end_inset + + has to two main different acceptions: +\end_layout + +\begin_layout Itemize +Physical memory. + Physical memory is real, hardware memory, as stored in the memory chips. + +\end_layout + +\begin_layout Itemize +Virtual memory. + Virtual memory is a translation of physical memory addresses allowing user + space applications to see their allocated chunks as if they were contiguous + while they are fragmented and scattered on the chips. +\end_layout + +\begin_layout Standard +In order to simplify programming, it is easier to handle contiguous memory + areas. + This is easy to achieve as long as only a small area is needed. + But allocating a bigger memory chunk would require as much contiguous physical + memory which is difficult if not impossible to achieve shortly after bootup + because of memory fragmentation. + Therefore, a mechanism is required to keep the appearance of a contiguous + piece of memory to the application while using scattered pieces. + +\end_layout + +\begin_layout Standard +To achieve this, memory is split into pages. + For the scope of this book, it is sufficient to say that a memory page + is a collection contiguous bytes in physical memory +\begin_inset Foot +status open + +\begin_layout Plain Layout +On x86 and x86-64, a page is usually 4096 bytes long, although different + sizes are possible on other architectures or with huge pages. +\end_layout + +\end_inset + +In order to make a scattered list of physical pages seem contiguous in virtual + space, a piece of hardware called MMU (memory mapping unit) converts virtual + addresses (used in applications) into physical addresses (used for actually + accessing memory) using a page table as shown on Figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:MMU-and-IOMMU" + +\end_inset + +. + In case a page does not exist in virtual space (and therefore not in the + MMU table), the MMU is able to signal it, which provides the basic mechanism + for reporting access to non-existent memory areas. + This in turn is used by the system to implement advanced memory programming + like swapping or on-the-fly page instantiations. + As the MMU is only effective for CPU access to memory, virtual addresses + are not relevant to the hardware since it is not able to match them to + physical addresses. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode] (CPU) {CPU}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=of CPU] (GPU) {GPU}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below=2cm of CPU] (mmu) {MMU}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below=2cm of GPU] (iommu) {IOMMU}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, left=1cm of mmu] (mmupt) {MMU page table}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=1cm of iommu] (iommupt) {IOMMU page table}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width=5cm, below=2cm of mmu, xshift=1.5cm] (memory) {Memory}; + +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (CPU.south) -| (mmu.north); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (GPU.south) -| (iommu.north); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (mmu.south) -> ++(0,-2) (memory); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (iommu.south) -> ++(0,-2) (memory); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (mmu) -> (mmupt); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (iommu) -> (iommupt); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4,-1.5) {GPU Address}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (-1.5,-1.5) {Virtual Address}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (-1.5,-4.5) {Physical Address}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4,-4.5) {Physical Address}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:MMU-and-IOMMU" + +\end_inset + +MMU and IOMMU. +\end_layout + +\end_inset + + +\begin_inset Note Note +status open + +\begin_layout Plain Layout +XXX ajouter les tables de page à ce dessin +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +While the MMU only works for CPU accesses, it has an equivalent for peripherals: + the IOMMU. + As shown on figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:MMU-and-IOMMU" + +\end_inset + +, an IOMMU is the same as an MMU except that it virtualizes the address + space of peripherals. + The IOMMU can see various incarnations, either on the motherboard chipset + (in which case it is shared between all peripherals) or on the graphics + card itself (where it will be called AGP GART, PCI GART). + The job of the IOMMU is to translate memory addresses from the peripherals + into physical addresses. + In particular, this allows +\begin_inset Quotes eld +\end_inset + +fooling +\begin_inset Quotes erd +\end_inset + + a device into restricting its DMAs to a given range of memory and it is + required for better security and hardware virtualization. +\end_layout + +\begin_layout Standard +A special case of IOMMU is the Linux swiotlb which allocates a contiguous + piece of physical memory at boot (which makes it feasible to have a large + contiguous physical allocation since there is no fragmentation yet) and + uses it for DMA. + As the memory is physically contiguous, no page translation is required + and therefore a DMA can occur to and from this memory range. + However, this means that this memory (64MB by default) is preallocated + and will not be used for anything else. +\end_layout + +\begin_layout Standard +AGP GART is another special case of IOMMU present with AGP graphics cards + which exposes a single linear area to the card. + In that case the IOMMU table is embedded in the AGP chipset, on the motherboard. +\begin_inset Note Note +status open + +\begin_layout Plain Layout +Dire que c'est lineaire en memoire physique et virtu +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Yet another special case of IOMMU is the PCI GART which allows exposing + a chunk of system memory to the card. + In that case the IOMMU table is embedded in the graphics card, and often + the physical memory used does not need to be contiguous. +\end_layout + +\begin_layout Standard +\begin_inset Note Note +status open + +\begin_layout Plain Layout +http://images.google.fr/images?hl=fr&source=hp&q=page+table&btnG=Recherche+d'image +s&gbv=2&aq=f&oq= +\end_layout + +\begin_layout Plain Layout +http://pages.cs.wisc.edu/~bart/537/lecturenotes/s16.html +\end_layout + +\begin_layout Plain Layout +http://a.michelizza.free.fr/pmwiki.php?n=TutoOS.Mm3 +\end_layout + +\begin_layout Plain Layout +http://lwn.net/Articles/106177/ +\end_layout + +\begin_layout Plain Layout +http://www.vocw.edu.vn/content/m10106/latest/ +\end_layout + +\begin_layout Plain Layout +http://cs.nyu.edu/courses/spring05/G22.2250-001/lectures/lecture-08.html +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Obviously, with so many different memory types, performance is not homogeneous; + not all combination of accesses are fast, depending on whether they involve + the CPU, the GPU, or bus transfers. + Another issue which arises is memory coherence: how can one ensure that + memory is coherent accross devices, in particular that data written by + the CPU is availble to the GPU (or the opposite). + These two issues are correlated, as higher performance usually means a + lower level of memory coherence, and vice-versa. +\end_layout + +\begin_layout Standard +As far as setting the memory caching parameters goes, there are two ways + to set caching attributes on memory ranges: +\end_layout + +\begin_layout Itemize +MTRRs. + An MTRR (Memory Type Range Register) is a register describing attributes + for a range of given physical memory. + The number of MTRR depends on the system, but is very limited. + Although this applies to a physical memory range, the effect works on the + corresponding virtual memory pages. + This for example makes it possible to map pages with a specific caching + type. +\begin_inset Note Note +status open + +\begin_layout Plain Layout +XXX des exemples +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Itemize +PAT (Page Attribute Table) allows setting per-page memory attributes. + However it is an extension only available on recent x86 processors. +\end_layout + +\begin_layout Standard +On top of these, one can use explicit caching instructions on some architectures +, for example on x86 +\emph on +movntq +\emph default + is an uncached mov instruction and +\emph on +clflush +\emph default + can selectively flush cache lines. +\end_layout + +\begin_layout Standard +There are 3 caching modes, usable both through MTRR and PAT on system memory: +\end_layout + +\begin_layout Itemize +UC (UnCached) memory is uncached. + No CPU read/writes to this area are cached, and each memory write instruction + triggers an actual immediate memory write. + This is helpful to ensure that information has been actually written so + as to avoid CPU/GPU race conditions. +\end_layout + +\begin_layout Itemize +WC (Write Combine) memory is uncached, but CPU writes are combined together + in order to improve the performance. + This is useful to improve performance in situations where uncached memory + is required, but where combining the writes together has no adverse effects. +\end_layout + +\begin_layout Itemize +WB (Write Back) memory is cached. + This is the default mode and leads to the best performance for CPU accesses. + However this does not ensure that memory writes are propagated to central + memory after a finite time. +\end_layout + +\begin_layout Standard +Notice that these caching modes apply to the CPU only, the GPU accesses + are not directly affected by the current caching mode. + However, when the GPU has to access an area of memory which was previously + filled by the CPU, uncached modes ensure that the memory writes are actually + done, and are not pending sitting in a CPU cache. + Another way to achieve the same effect is the use of cache flushing instruction +s present on some x86 processors (like cflush). + However this is less portable than using the caching modes. + Yet another (portable) way is the use of memory barriers, which ensures + that pending memory writes have been committed to main memory before moving + on. +\end_layout + +\begin_layout Standard +Obviously with so many different caching modes, not all accesses have the + same performance: +\end_layout + +\begin_layout Itemize +When it comes to CPU access to system memory, uncached mode provides the + worst performance, write back provides the best performance, and write + combine is in between. +\end_layout + +\begin_layout Itemize +When the CPU accesses the video memory from a discrete card, all accesses + are extremely slow, be they reads or writes, as each access needs a cycle + on the bus. + Therefore it is not recommended to access large areas of VRAM with the + CPU. + Furthermore on some GPUs synchronizing is required or this could cause + a GPU hang. +\end_layout + +\begin_layout Itemize +Obviously the GPU accessing VRAM is extremely fast. +\end_layout + +\begin_layout Itemize +GPU access to system ram is unaffected by the caching mode, but still has + to go over the bus. + This is the case of DMA transactions. + As those happen asynchronously, they can be considered +\begin_inset Quotes eld +\end_inset + +free +\begin_inset Quotes erd +\end_inset + + from the viewpoint of the CPU, however there is a non-negligible setup + cost involved for each DMA transaction. + This is why, when transferring small amounts of memory, a DMA transaction + is not always better than a direct CPU access. +\end_layout + +\begin_layout Standard +Finally, one last important point to make about memory is the notion of + memory barriers and write posting. + In the case of a cached (Write Combine or Write Back) memory area, a memory + barrier ensures that pending writes have actually been committed to memory. + This is used, for example, before asking the GPU to read a given memory + area. + For I/O areas, a similar technique called write posting exists: it consists + in doing a dummy read inside the I/O area which will, as a side effect, + wait until pending writes have taken effect before completing. +\end_layout + +\begin_layout Section +The Graphics Card +\end_layout + +\begin_layout Standard +Today, a graphics card is basically a computer-in-the-computer. + It is a complex beast with a dedicated processor on a separate card, and + features its own computation units, its own bus, and its own memory. + +\end_layout + +\begin_layout Subsubsection* +Graphics Memory +\end_layout + +\begin_layout Standard +The GPU's memory, which we will from now on refer to as video memory, can + be either real, dedicated, on-card memory (in the case of a discrete card), + or memory shared with the CPU (in the case of an integrated card). + Notice that the case of shared memory has interesting implications, as + it means that system to video memory copies can be virtually free if implemente +d properly; while the case of dedicated memory means that transfers back + and forth will need to happen. + +\end_layout + +\begin_layout Standard +It is not uncommon for modern GPUs to feature a form of virtual memory as + well, allowing to map different resources (real video memory of system + memory) into the GPU address space. + This is very similar to the CPU's virtual memory, but uses a completely + separate hardware implementation. + For example, older Radeon cards (actually since Rage 128) feature a number + of surfaces which you can map into the GPU address space, each of which + is a contiguous memory resource (video ram, AGP, PCI). + Old Nvidia cards (everything up to NV40) have a similar concept based on + objects which describe an area of memory which can then be bound to a given + use. + Recent cards (starting with NV50 and R800) let you build the address space + page by page, with the ability of picking system and dedicated video memory + pages at will. + The similarity of these with a CPU virtual address space is very striking, + in fact you can have accesses to unmapped pages be signaled to you through + an interrupt and act on this in a video memory page fault handler. + However, be careful playing with those as the implication here is that + driver developers have to juggle with multiple address spaces from the + CPU and GPU which are going to be fundamentally different. +\end_layout + +\begin_layout Subsubsection* +Surfaces +\end_layout + +\begin_layout Standard +Surfaces are the basic sources and targets for all rendering. + Althought they can be called differenty (textures, render targets, buffers...) + the basic idea is always the same. + Figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:The-layout-of" + +\end_inset + + depicts the layout of a graphics surface. + The surface width is rounded up to what we call the pitch because of hardware + limitations (usually to the next multiple of some power of 2) and therefore + there exists a dead zone of pixels which goes unused. + The graphics surface has a number of characteristics: +\end_layout + +\begin_layout Itemize +The pixel format of the surface. + A pixel color is represented memory by its red, green and blue components, + plus an alpha component used as the opacity for blending. + The number of bits for a whole pixel usually matches hardware sizes (8,16 + or 32 bits) but the repartition of the bits between the four components + does not have to match those. + The number of bits used for each pixels is referred to as bits per pixel, + or +\emph on +bpp +\emph default +. + Common pixel formats include 888 RGBX, 8888 RGBA, 565 RGB, 5551, RGBA, + 4444 RGBA +\begin_inset Note Note +status open + +\begin_layout Plain Layout +, YUV12, YUY16 +\end_layout + +\end_inset + +. + Notice that most cards today work natively in ABGR 8888. +\end_layout + +\begin_layout Itemize +Width and height are the most obvious characteristics, and are given in + pixels. + +\end_layout + +\begin_layout Itemize +The pitch is the width in bytes (not in pixels!) of the surface, including + the dead zone pixels. + The pitch is convenient for computing memory usages, for example the size + of the surface should be computed by +\begin_inset Formula $height\times pitch$ +\end_inset + + and not +\begin_inset Formula $height\times width\times bpp$ +\end_inset + + in order to include the dead zone. +\end_layout + +\begin_layout Standard +Notice that surfaces are not always stored linearly in video memory, in + fact for performance reasons it is extremely common that they are not, + as this improves the locality of the memory accesses when rendering. + Such surfaces are called +\emph on +tiled +\emph default +. + The exact layout of a tiled surface is highly dependent on the hardware, + but is usually a form of space-filling curve like the Z curve or hilbert's + curve. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +hspace{-4cm} +\end_layout + +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikz{ +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner + sep=1em] (2,2) rectangle (10,7); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[pattern = north east lines] (8.5,2) rectangle (10,7); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (2,7.5) -- +(6.5,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (1.5,2) -- +(0,5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (2,1.5) -- +(8,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,8) {Surface width}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,1) {Surface pitch}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (0,4.5) {Surface height}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (5.2,4.5) {Used pixels}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (9.2,4.8) {Dead}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (9.2,4.3) {zone}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,0.5) { }; +\end_layout + +\begin_layout Plain Layout + +} +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:The-layout-of" + +\end_inset + +The layout of a surface. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection* +2D engine +\end_layout + +\begin_layout Standard +The 2D engine, or blitter, is the hardware used for 2D acceleration. + Blitters have been one of the earliest form of graphics acceleration and + are still extremely widespread today. + Generally, a 2D engine is capable of the following operations: +\end_layout + +\begin_layout Itemize +Blits. + Blits are a copy of a memory rectangle from one place to another by the + GPU. + The source and destination can be either video or system memory. +\end_layout + +\begin_layout Itemize +Solid fills. + Solid fills consist in filling a rectangle memory area with a color. + Note that this can also include the alpha channel. +\end_layout + +\begin_layout Itemize +Alpha blits. + Alpha blits use the alpha component of pixels from of a surface to achieve + transparency [porter & duff]. +\end_layout + +\begin_layout Itemize +Stretched blits. + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +hspace{-2cm} +\end_layout + +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikz{ +\end_layout + +\begin_layout Plain Layout + +% Source +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner + sep=1em] (2,2) rectangle (8,6); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[pattern = north east lines] (7,2) rectangle (8,6); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4,7) {Blit width}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (3,6.5) -- +(2,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (0,4.5) {Blit height}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (1.5,3.5) -- +(0,2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (2,1.5) -- +(6,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (5,1) {Src pitch}; +\end_layout + +\begin_layout Plain Layout + +% source pixels +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw (3,3.5) rectangle (5,5.5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4,4.5) {Src pixels}; +\end_layout + +\begin_layout Plain Layout + +% Destination +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner + sep=1em] (9,2) rectangle (12,6); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[pattern = north east lines] (11.5,2) rectangle (12,6); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (9,1.5) -- +(3,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10.5,1) {Dst pitch}; +\end_layout + +\begin_layout Plain Layout + +% destination pixels +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw (9.2,2.5) rectangle (11.2,4.5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10.2,3.5) {Dst pixels}; +\end_layout + +\begin_layout Plain Layout + +% relier les zones src/dst de copie +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[-,style=dashed] (9.2,2.5) -- (3,3.5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[-,style=dashed] (11.2,2.5) -- (5,3.5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[-,style=dashed] (11.2,4.5) -- (5,5.5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[-,style=dashed] (9.2,4.5) -- (3,5.5); +\end_layout + +\begin_layout Plain Layout + +% faux noeud pour pas que la légende soit collée +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,0.5) { }; +\end_layout + +\begin_layout Plain Layout + +} +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:Blitting-between-two" + +\end_inset + +Blitting between two different surfaces. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Blitting-between-two" + +\end_inset + + shows an example of blitting a rectangle between two different surfaces. + This operation is defined by the following parameters: the source and destinati +on coordinates, the source and destination pitches, and the blit width and + height. + However, this is only 2D coordinates, no perspective is possible +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +hspace{-4cm} +\end_layout + +\end_inset + + +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikz{ +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner + sep=1em] (2,2) rectangle (10,7); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[pattern = north east lines] (8.5,2) rectangle (10,7); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (2,7.5) -- +(6.5,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (1.5,2) -- +(0,5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[<->] (2,1.5) -- +(8,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,8) {Surface width}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,1) {Surface pitch}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (0,4.5) {Surface height}; +\end_layout + +\begin_layout Plain Layout + +% source pixels +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw (4,3.5) rectangle (8,6.5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,6) {Src pixels}; +\end_layout + +\begin_layout Plain Layout + +% destination pixels +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw (2.5,2.5) rectangle (6.5,5.5); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4,3) {Dst pixels}; +\end_layout + +\begin_layout Plain Layout + +% faux noeud pour pas que la légende soit collée +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,0.5) { }; +\end_layout + +\begin_layout Plain Layout + +} +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:Overlapping-blit-inside" + +\end_inset + +Overlapping blit inside a surface. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +When a blit happens between two overlapping source and destination surfaces, + the semantics of the copy is not trivially defined, especially if one considers + that what happens for a blit is not a simple move of a rectangle, but is + done pixel-by-pixel at the core. + As seen on Figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Overlapping-blit-inside" + +\end_inset + +, if one does a line-by-line copy top to bottom, some source pixels will + be modified as a side effect. + Therefore, the notion of blitting direction was introduced into the blitters. + In this case, for a proper copy a bottom to top copy is required. + Some cards will determine the blitting direction automatically according + to surface overlap (for example nvidia GPUs), and others will not. +\end_layout + +\begin_layout Standard +Finally, keep in mind that not all current graphics accelerators feature + a 2D engine. + Since 3D acceleration is technically a super-set of 2D acceleration, it + is possible to implement 2D acceleration using the 3D engine (and this + idea is one of the core ideas behind the Gallium 3D design, which will + be detailed in Chapter +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Gallium-3D" + +\end_inset + +). + And indeed some drivers use the 3D engine to implement 2D which allows + GPU makers to completely part with the transistors otherwise dedicated + to it. + Yet some other cards do not dedicate the transistors, but microprogram + 2D operations on top of 3D operations inside the GPU (this is the case + for nVidia cards since nv10 and up to nv50, and for the Radeon R600 series + which have an optional firmware that implements 2D on top of 3D). + This sometimes has an impact on mixing 2D and 3D operations since those + now share hardware units. +\end_layout + +\begin_layout Subsubsection* +3D engine +\end_layout + +\begin_layout Standard +A 3D engine is also called +\begin_inset Quotes eld +\end_inset + +rasterization pipeline +\begin_inset Quotes erd +\end_inset + +, because it contains a series of stages which exchange data in a pipeline + (1-directional) fashion. +\end_layout + +\begin_layout Standard +vertex -> geom -> fragment +\end_layout + +\begin_layout Standard +graphics fifo +\end_layout + +\begin_layout Standard +DMA +\end_layout + +\begin_layout Standard +http://www.x.org/wiki/Development/Documentation/HowVideoCardsWork +\end_layout + +\begin_layout Standard +tiled textures +\end_layout + +\begin_layout Subsubsection* +Overlays and hardware sprites +\end_layout + +\begin_layout Section +Programming the card +\end_layout + +\begin_layout Standard +Each PCI card exposes a number of PCI resources; lspci -v lists these resources. + These can be, but are not limited to, BIOSes, MMIO ranges, video memory + (or only some part of it). + As the total PCI resource size is limited, oftentimes a card will only + expose part of its video memory as a resource, and the only way to access + the remaining memory is through DMA from other, reachable areas (in a way + similar to bounce pages). + This is increasingly common as the video memory sizes keep growing while + the PCI resource space stays limited. +\end_layout + +\begin_layout Subparagraph* +MMIO +\end_layout + +\begin_layout Standard +MMIO is the most direct access to the card. + A range of addresses is exposed to the CPU, where each write goes directly + to the GPU. + This allows the simplest for of communication of commands from the CPU + to the GPU. + This type of programming is synchronous, so writes are done by the CPU + and executed on the GPU in a lockstep fashion This results in sub-par performan +ce as each access turns into a packet on the bus. +\end_layout + +\begin_layout Subparagraph* +DMA +\end_layout + +\begin_layout Standard +A direct memory access (DMA) is the use by a peripheral of the bus mastering + feature of the bus. + This allows one peripheral to talk directly to another, without intervention + from the CPU. + In the graphics card case, the two most common uses of DMAs are: +\end_layout + +\begin_layout Itemize +Transfers by the GPU to and from system memory (for reading textures and + writing buffers). + This allows implementing things like texturing over AGP or PCI, and hardware-ac +celerated texture transfers. +\end_layout + +\begin_layout Itemize +The implementation of command FIFO. + As MMIO between the CPU and GPU is synchronous and graphics drivers inherently + use a lot of I/O, a faster means of communicating with the card is required. + The command FIFO is a piece of memory (either system memory or more rarely + video memory) shared between the graphics card and the CPU, where the CPU + places command for later execution by the GPU. + Then the GPU reads the FIFO asynchronously using DMA and executes the commands. + This model allows asynchronous execution of the CPU and GPU command flows + and thus leads to higher performance. +\end_layout + +\begin_layout Subsubsection* +Interrupts +\end_layout + +\begin_layout Standard +Interrupts are a way for hardware peripherals in general, and GPUs in particular +, to signal events to the CPU. + Usage examples for interrupts include signaling completion of a graphics + command, signaling a vertical blanking event, reporting a GPU error, ... + When an interrupt is raised by the peripheral, the CPU executes a small + routine called an interrupt handler, which preempts other current executions. + There is a maximum execution time for an interrupt handler, so the drivers + have to keep it short (not more than a few microseconds). + In order to execute more code, the common solution is to schedule a tasklet + from the interrupt handler. +\end_layout + +\begin_layout Section +Display devices (aka screens) +\end_layout + +\begin_layout Standard +Display devices are the last ring of the graphics chain. + They are charged with presenting the pictures to the user. +\end_layout + +\begin_layout Standard +digital vs analog signal +\end_layout + +\begin_layout Standard +hsync, vsync +\end_layout + +\begin_layout Standard +sync on green +\end_layout + +\begin_layout Standard +Connectors and encoders: CRTC,TMDS, LVDS, DVI-I, DVI-A, DVI-D, VGA (D-SUB + 15 is the proper name) +\end_layout + +\begin_layout Section +Graphics Hardware Examples +\end_layout + +\begin_layout Paragraph* +ATI +\end_layout + +\begin_layout Standard +Shader engine 4+1 +\end_layout + +\begin_layout Paragraph* +Nvidia +\end_layout + +\begin_layout Standard +NVidia hardware has multiple specificities compared to other architectures. + The first one is the availability of multiple contexts, which is implemented + using multiple command fifos (similar to what some high-end infiniband + networking cards do) and a context switching mechanism to commute between + those fifos. + A small firmware is used for context switches between contexts, which is + responsible for saving the graphics card state to a portion of memory and + restoring another context. + A scheduling system using the round robin algorithm handles the selection + of the contexts, and the timeslice is programmable. + +\end_layout + +\begin_layout Standard +The second specificity is the notion of graphics objects. + Nvidia hardware features two levels of GPU access: the first one is at + the raw level and is used for context switches, an the second one is the + graphics objects which microprogram the raw level to achieve high level + functionality (for example 2D or 3D acceleration). +\end_layout + +\begin_layout Standard +Shader engine nv40/nv50 +\end_layout + +\begin_layout Standard +http://nouveau.freedesktop.org/wiki/HonzaHavlicek +\end_layout + +\begin_layout Paragraph* +SGX +\end_layout + +\begin_layout Standard +Tiling architecture +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +There are multiple memory domains in a computer, and they are not coherent. +\end_layout + +\begin_layout Itemize +A GPU is a completely separate computer with its own bus, address space + and computational units. +\end_layout + +\begin_layout Itemize +Communication between the CPU and GPU is achieved over a bus, which has + non-trivial performance implications. +\end_layout + +\begin_layout Itemize +GPUs can be programmed using two modes: MMIO and command FIFOs. +\end_layout + +\begin_layout Itemize +There is no standard output method for display devices. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +The Big Picture +\begin_inset CommandInset label +LatexCommand label +name "cha:The-Big-Picture" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Note Note +status open + +\begin_layout Plain Layout +X, how it works (encapsulating) with indirect (glx) 3D with kernel FB + + picture. + This is how utah-glx used to work. +\end_layout + +\begin_layout Plain Layout +DRI : bypassing encapsulation for performance-critical operations with kernel + FB + picture +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +The Linux graphics stack has seen numerous evolutions over the years. + The purpose of this section is to detail that history, as well as the justifica +tion behind the changes in order to better motivate the current design. +\end_layout + +\begin_layout Section +The X11 infrastructure +\end_layout + +\begin_layout Standard +\begin_inset Float figure +placement tbh +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode] (application) {Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below=of application] (xlib) {Xlib}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (application.south) -> (xlib.north); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw (1,-1) rectangle (6,-5.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (3.5,-1.2) {X server}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=2cm of xlib] (xserver) {DIX}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xlib.east) -> (xserver.west); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below=1cm of xserver] (driver) {DDX (Driver)}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xserver.south) -> (driver.north); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below=1cm of driver] (hardware) {Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (driver.south) -> (hardware.north); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +The X11 architecture. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +DIX (Device-Independent X), DDX (Device-Dependent X), +\end_layout + +\begin_layout Standard +modules +\end_layout + +\begin_layout Standard +Xlib +\end_layout + +\begin_layout Standard +socket +\end_layout + +\begin_layout Standard +X protocol +\end_layout + +\begin_layout Standard +X extensions +\end_layout + +\begin_layout Standard +shm -> shared memory for transport +\end_layout + +\begin_layout Standard +XCB -> asynchronous +\end_layout + +\begin_layout Standard +Another notable X extension is Xv, which will be discussed in further detail + in the video decoding chapter. +\end_layout + +\begin_layout Section +The DRI/DRM infrastructure +\end_layout + +\begin_layout Standard +Initially (when Linux first supported graphics hardware acceleration), only + a single piece of code would access the graphics card directly: the XFree86 + server. + The design was as follows: by running with super-user privileges, the XFree86 + server could access the card from user space and did not require kernel + support to implement 2D acceleration. + The advantage of such a design was its simplicity, and the fact that the + XFree86 server could be easily ported from one operating system to another + since it required no kernel component. + For years this was the most widespread X server design (although there + were notable exceptions, like XSun which implemented modesetting in the + kernel for some drivers). +\end_layout + +\begin_layout Standard +Later on, Utah-GLX, the first hardware-independent 3D accelerated design, + came to Linux. + Utah-GLX basically consists in an additional user space 3D driver implementing + GLX, and directly accesses the graphics hardware from user space, in a + way similar to the 2D driver. + In a time where the 3D hardware was clearly separated from 2D (because + the functionality used for 2D and 3D was completely different, or because + the 3D card was a completely separate card, à la 3Dfx), it made sense to + have a completely separate driver. + Furthermore, direct access to the hardware from user space was the simplest + approach and the shortest road to getting 3D acceleration going under Linux. +\end_layout + +\begin_layout Standard +At the same time, framebuffer drivers (which will be detailed in Chapter + +\begin_inset CommandInset ref +LatexCommand ref +reference "cha:Framebuffer-Drivers" + +\end_inset + +) were getting increasingly widespread, and represented another component + that could simultaneously access the graphics hardware directly. + To avoid potential conflicts between the framebuffer and XFree86 drivers, + it was decided that VT switches would emit a signal to the X server telling + it to save the graphics hardware state. + Asking each driver to save its complete GPU state on VT switches made the + drivers more fragile, and life became more difficult for developers who + suddenly faced bug-prone interaction between different drivers. +\end_layout + +\begin_layout Standard +\begin_inset Note Note +status open + +\begin_layout Plain Layout +aide à faire des figures : http://www.texample.net/tikz/examples/ +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +placement H +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode] (x11application) {X11 Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=0.5cm of x11application] (glapplication) {OpenGL Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=0.5cm of glapplication] (fbapplication) {Framebuffer Applicati +on}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 6cm, below=1cm of x11application, xshift = 1.7cm] + (xorg) {XFree86}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (x11application.south) -> ++(0,-1) (xorg); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glapplication.south) -> ++(0,-1) (xorg); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below = of xorg, xshift=-2cm] (2ddriver) {2D Driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xorg.south) ++ (-2,0) -> ++(0,-1) (2ddriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below = of xorg, xshift= 2cm] (glxdriver) {Utah GLX driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xorg.south) ++(2,0) -> ++(0,-1) (glxdriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 12cm , below=3cm of 2ddriver, xshift=5cm] (hardware) + {Graphics Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-5.2) -- (11,-5.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-7.2) -- (11,-7.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4.6,-1) {GLX}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-5) {User Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-7) {Kernel Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-7.5) {Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glxdriver.south) -> ++(0,-3.0) (hardware); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below=5.1cm of fbapplication] (fbdriver) {FB driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (fbapplication) -> (fbdriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (fbdriver.south) -> ++(0,-1) (hardware); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (2ddriver.south) -> ++(0,-3) (hardware); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +Early implementation of the Linux graphics stack using Utah-GLX. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Obviously, this model had drawbacks. + First, it required that unprivileged user space applications be allowed + access the graphics hardware for 3D. + Second, as can be seen on figure XXX all GL acceleration had to be indirect + through the X protocol, which would slow it down. + Because of growing concerns about the security in Linux and performance + shortcomings, another model was required. +\end_layout + +\begin_layout Standard +To address the reliability and security concerns with the Utah-GLX model, + the DRI model was put together; it was used in both XFree86 and its successor, + X.Org. + This model relies on a additional kernel component whose duty is to check + the correctness of the 3D command stream, security-wise. + The main change is now that instead of accessing the card directly, the + unprivileged OpenGL application would submit command buffers to the kernel, + which would check them for security and then pass them to the hardware + for execution. + The advantage of this model is that trusting user space is no longer required. + Notice that although this would have been possible, the 2D command stream + from XFree86 still did not go through the DRM, and therefore the X server + still required super-user privileges. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +placement H +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode] (x11application) {X11 Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=0.5cm of x11application] (glapplication) {OpenGL Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=0.5cm of glapplication] (fbapplication) {Framebuffer Applicati +on}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 6cm, below=1cm of x11application, xshift = 1.7cm] + (xorg) {X.Org}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (x11application.south) -> ++(0,-1) (xorg); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glapplication.south) -> ++(0,-1) (xorg); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below = of xorg, xshift=-2cm] (2ddriver) {2D Driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xorg.south) ++ (-2,0) -> ++(0,-1) (2ddriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below = of xorg, xshift= 2cm] (glxdriver) {OpenGL DRI driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glapplication.south) ++(1.3,0) -> ++(0,-3.1) (glxdriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,-2.1) {DRI}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below = 0.9cm of glxdriver] (drm) {DRM}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glxdriver.south) -> ++(0,-0.9) (drm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 12cm , below=3cm of 2ddriver, xshift=5cm] (hardware) + {Graphics Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-5.2) -- (11,-5.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-7.2) -- (11,-7.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4.6,-1) {GLX}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-5) {User Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-7) {Kernel Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-7.5) {Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (fbapplication) -> ++(0,-5.65) (drm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (2ddriver.south) -> ++(0,-3) (drm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (drm.south) -> ++(0,-1.0) (hardware); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below=5.1cm of fbapplication] (fbdriver) {FB driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (fbapplication) -> (fbdriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (fbdriver.south) -> ++(0,-1) (hardware); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +The old picture of the Linux graphics stack. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +The current stack evolved from a new set of needs. + First, requiring the X server to have super-user has always had serious + security implications. + Second, with the previous design different drivers were touching a single + piece of hardware, which would often cause issues. + In order to resolve this the key is two-fold: first, merge the kernel framebuff +er functionality into the DRM module and second, have X.Org access the graphics + card through the DRM module and run unprivileged. + This is called Kernel Modesetting (KMS); in this model the DRM module is + now responsible for providing modesetting services both as a framebuffer + driver and to X.Org. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +placement H +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode] (x11application) {X11 Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=0.5cm of x11application] (glapplication) {OpenGL Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=0.5cm of glapplication] (fbapplication) {Framebuffer Applicati +on}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 6cm, below=1cm of x11application, xshift = 1.7cm] + (xorg) {X.Org}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (x11application.south) -> ++(0,-1) (xorg); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glapplication.south) -> ++(0,-1) (xorg); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below = of xorg, xshift=-2cm] (2ddriver) {2D Driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xorg.south) ++ (-2,0) -> ++(0,-1) (2ddriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, below = of xorg, xshift= 2cm] (glxdriver) {OpenGL DRI driver}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xorg.south) ++(1,0) -> ++(0,-1) (glxdriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (3.5,-3.1) {AIGLX}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glapplication.south) ++(1.3,0) -> ++(0,-3.1) (glxdriver); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,-2.1) {DRI}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 12cm, below = 0.9cm of glxdriver, xshift = 1cm] + (drm) {DRM}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glxdriver.south) -> ++(0,-0.9) (drm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 12cm , below=3cm of 2ddriver, xshift=5cm] (hardware) + {Graphics Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-5.2) -- (11,-5.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-7.2) -- (11,-7.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (4.6,-1) {GLX}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-5) {User Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-7) {Kernel Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (10,-7.5) {Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (fbapplication) -> ++(0,-5.65) (drm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (2ddriver.south) -> ++(0,-0.9) (drm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (drm.south) -> ++(0,-1.0) (hardware); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +The new picture of the Linux graphics stack. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +VT switches +\end_layout + +\begin_layout Standard +http://dri.sourceforge.net/doc/dri_data_flow.html +\end_layout + +\begin_layout Standard +http://dri.sourceforge.net/doc/dri_control_flow.html +\end_layout + +\begin_layout Standard +http://nouveau.freedesktop.org/wiki/GraphicStackOverview +\end_layout + +\begin_layout Standard +http://people.freedesktop.org/~ajax/dri-explanation.txt +\end_layout + +\begin_layout Standard +http://dri.sourceforge.net/doc/DRIintro.html +\end_layout + +\begin_layout Standard +http://jonsmirl.googlepages.com/graphics.html +\end_layout + +\begin_layout Standard +http://wiki.x.org/wiki/Development/Documentation/Glossary +\end_layout + +\begin_layout Standard +http://mjules.littleboboy.net/carnet/index.php?post/2006/11/15/89-comment-marche-x1 +1-xorg-et-toute-la-clique-5-partie +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Applications communicate with X.Org through a specific library which encapsulates + drawing calls. +\end_layout + +\begin_layout Itemize +The current DRI design has evolved over time in a number of significant + steps. +\end_layout + +\begin_layout Itemize +In a modern stack, all graphics hardware activity is moderated by a kernel + module, the DRM. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Framebuffer Drivers +\begin_inset CommandInset label +LatexCommand label +name "cha:Framebuffer-Drivers" + +\end_inset + + +\end_layout + +\begin_layout Standard +Framebuffer drivers are the simplest form of graphics drivers under Linux. + Kernel modesetting DRM drivers are still a relevant option if the only + thing you are after is a basic two-dimensional display. + Furthermore, when implementing framebuffer acceleration on top of a kernel + modesetting DRM driver, the same callbacks need to be filled. + A framebuffer driver implements little functionality, and is therefore + extremely easy to create. + Such a driver is especially interesting for embedded systems, where memory + footprint is essential, or when the intended applications do not require + advanced graphics acceleration. +\end_layout + +\begin_layout Standard +At the core, a framebuffer driver implements the following functionality: +\end_layout + +\begin_layout Itemize +modesetting +\end_layout + +\begin_layout Itemize +basic 2d acceleration (copy, solid) +\end_layout + +\begin_layout Standard +Acceleration is sometimes made available to user space through a hook (user + space must then program card specific bits, must be root for that) +\end_layout + +\begin_layout Standard +Framebuffer drivers do not always rely on a specific card model (like nvidiafb/a +tyfb...). + Drivers on top of vesa, EFI or Openfirmware exist. +\end_layout + +\begin_layout Standard +http://www.linux-fbdev.org/HOWTO/index.html +\end_layout + +\begin_layout Section +Creating a framebuffer driver +\end_layout + +\begin_layout Standard +struct platform_driver with a probe function +\end_layout + +\begin_layout Standard +probe function in charge of creating the fb_info struct and register_framebuffer +() on it. +\end_layout + +\begin_layout Section +Framebuffer operations +\end_layout + +\begin_layout Standard +The framebuffer operations structure is how non-modesetting framebuffer + callbacks are set. + Different callbacks can be set depending on what functionality you wish + to implement, like fills, copies, or cursor handling. + By filling struct fb_ops callbacks, one can implement the following functions: +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +int (*fb_setcolreg)(unsigned regno, unsigned red, unsigned green, unsigned + blue, unsigned transp, struct fb_info *info); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* set color register */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +int (*fb_setcmap)(struct fb_cmap *cmap, struct fb_info *info); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* set color registers in batch */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +int (*fb_blank)(int blank, struct fb_info *info); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* blank display */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +int (*fb_pan_display)(struct fb_var_screeninfo *var, struct fb_info *info); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* pan display */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect); + +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* Draws a rectangle */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* Copy data from area to another */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* Draws a image to the display */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +int (*fb_cursor) (struct fb_info *info, struct fb_cursor *cursor); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* Draws cursor */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +void (*fb_rotate)(struct fb_info *info, int angle); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* Rotates the display */ +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{lstlisting}{} +\end_layout + +\begin_layout Plain Layout + +int (*fb_sync)(struct fb_info *info); +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{lstlisting}{} +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +/* wait for blit idle, optional */ +\end_layout + +\begin_layout Standard +Note that common framebuffer functions (cfb) are available if you do not + want to implement everything for your device specifically. + These functions are cfb_fillrect, cfb_copyarea and cfb_imageblit and will + perform the corresponding function in a generic, unoptimized fashion using + the CPU. +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Framebuffer drivers are the simplest form of linux graphics driver, requiring + little work for implementation. +\end_layout + +\begin_layout Itemize +Framebuffer drivers deliver a low memory footprint and thus are useful for + embedded devices. +\end_layout + +\begin_layout Itemize +Implementing acceleration is optional as software fallback functions exist. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +The DRM Kernel Module +\begin_inset CommandInset label +LatexCommand label +name "cha:The-DRM-Kernel" + +\end_inset + + +\end_layout + +\begin_layout Standard +The use of a kernel module is a requirement in a complex world. + The kernel module, or DRM, has multiple purposes: +\end_layout + +\begin_layout Itemize +Share the rendering hardware between multiple user space components, and + arbitrate access. +\end_layout + +\begin_layout Itemize +Enforce security by preventing applications from performing DMA to arbitrary + memory regions, and more generally programming the card in any way that + could result in a security hole. +\end_layout + +\begin_layout Itemize +Manage the memory of the card, by providing video memory allocation functionalit +y to user space. +\end_layout + +\begin_layout Itemize +More recently, DRM was improve to achieve modesetting. + This simplifies the situation where both the DRM and the framebuffer driver + access the card by removing the framebuffer driver and implementing in + the DRM. +\end_layout + +\begin_layout Itemize +Put critical initialization of the card in the kernel, for example by uploading + firmwares or setting up DMA areas. + +\end_layout + +\begin_layout Standard +Kernel module (DRM) +\end_layout + +\begin_layout Standard +Global DRI/DRM user space/kernel scheme (figure with libdrm - drm - entry + points - multiple user space apps) +\end_layout + +\begin_layout Standard +\begin_inset Float figure +placement H +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode] (xorg) {X.Org}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, right=0.5cm of xorg] (glapplication) {OpenGL Application}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 6cm, below= of xorg, xshift = 2.2cm] (libdrm) {libdrm}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (xorg.south) -> ++(0,-1) (libdrm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (glapplication.south) -> ++(0,-1) (libdrm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 6cm, below= of libdrm] (drm) {drm}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (libdrm.south) -> ++(0,-1) (drm); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node[mynode, text width = 6cm, below= of drm] (hardware) {Graphics Hardware}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[myarrow] (drm.south) -> ++(0,-1.0) (hardware); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-3.2) -- (9,-3.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw [thick, dotted] (-1.8,-5.2) -- (9,-5.2); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (8,-3) {User Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (8,-5) {Kernel Space}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (8,-5.5) {Hardware}; +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +Accessing the DRM through libdrm. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +When designing a Linux graphics driver aiming for more than simple framebuffer + support, a DRM component is the first thing to do. + One should derive a design that is both efficient and enforces security. + The DRI/DRM scheme can be implemented in different ways and the interface + is indeed entirely card-specific. + Do not always follow the existing models that other drivers use, innovate! +\end_layout + +\begin_layout Section +Hardware sharing +\end_layout + +\begin_layout Standard +Multiplexing of the card command fifo - For cards which only feature a single + hardware command submission fifo, it has to be shared between multiple + user space components. + In that case, this is achieved by the DRM module. +\end_layout + +\begin_layout Standard +Prevent simultaneous access to the same hw +\end_layout + +\begin_layout Section +Security +\end_layout + +\begin_layout Standard +Prevent arbitrary DMAs to memory. + IF the hardware does not feature memory protection, you have to check the + command stream before submitting it to the GPU. +\end_layout + +\begin_layout Section +Memory management +\end_layout + +\begin_layout Section +Modesetting +\end_layout + +\begin_layout Standard +Modesetting is the act of setting a mode on the card to display. + This can range from extremely simple procedures (calling a VGA interrupt + or VESA call is a basic form of modesetting) to directly programming the + card registers (which brings along the advantage of not needing to rely + on a VGA or VESA layer). + Historically, this was achieved in user space by the DDX. + +\end_layout + +\begin_layout Standard +However, these days it makes more sense to put it in the kernel once and + for all, and share it between different GPU users (framebuffer drivers, + DDXes, EGL stacks...). + This extension to modesetting is called kernel modesetting (also known + as KMS). + A number of concepts are used by the modesetting interface (those are inherited + from the Randr 1.2 specification). +\end_layout + +\begin_layout Subsubsection* +Crtc +\end_layout + +\begin_layout Standard +Crtc is in charge of reading the framebuffer memory and routes the data + to an encoder +\end_layout + +\begin_layout Subsubsection* +Encoder +\end_layout + +\begin_layout Standard +Encoder encodes the pixel data for a connector +\end_layout + +\begin_layout Subsubsection* +Connector +\end_layout + +\begin_layout Standard +The connector is the name physical output on the card (DVI, Dsub, Svideo...). + Notice that connectors can get their data from multiple encoders (for example + DVI-I which can feed both analog and digital signals) +\end_layout + +\begin_layout Standard +Also, on embedded or old hardware, it is common to have encoders and connectors + merged for simplicity/power efficiency reasons. +\end_layout + +\begin_layout Standard ++++ Ajouter ici un schema crtc-encoder-connector +\end_layout + +\begin_layout Section +libdrm +\end_layout + +\begin_layout Standard +libdrm is a small (but growing) component that interfaces between user space + and the DRM module, and allows calling into the entry points. + +\end_layout + +\begin_layout Standard +Obviously security should not rely on components from libdrm because it + is an unprivileged user space component +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +The DRM manages all graphics activity in a modern linux graphics stack. +\end_layout + +\begin_layout Itemize +It is the only trusted piece of the stack and is responsible for security. + Therefore it shall not trust the other components. +\end_layout + +\begin_layout Itemize +It provides basic graphics functionality: modesetting, framebuffer driver, + memory management. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +X.Org Drivers +\begin_inset CommandInset label +LatexCommand label +name "cha:X.Org-Drivers" + +\end_inset + + +\end_layout + +\begin_layout Standard +This chapter covers the implementation of a 2D acceleration inside X.Org. +\end_layout + +\begin_layout Standard +There are multiple ways to implement a 2D X.Org driver: ShadowFB, XAA, EXA. + Another simple way of implementing X.Org support is through the FBDev module. + This module implements X.Org on top of an existing, in-kernel framebuffer + driver. +\end_layout + +\begin_layout Standard +http://www.x.org/wiki/DriverDevelopment +\end_layout + +\begin_layout Section +Initializing a driver +\end_layout + +\begin_layout Section +ShadowFB acceleration +\end_layout + +\begin_layout Standard +ShadowFB provides no acceleration proper, a copy of the framebuffer is kept + in system memory. + The driver implements a single hook that copies graphics from system to + video memory. + This can be implemented using either a DMA copy, or a CPU copy (depending + on the hardware and copy size, either can be better). +\end_layout + +\begin_layout Standard +Despite the name, shadowFB is not to be confused with the kernel framebuffer + drivers. +\end_layout + +\begin_layout Standard +Although ShadowFB is a very basic design, it can result in a more efficient + and responsive desktop than an incomplete implementation of EXA. +\end_layout + +\begin_layout Standard +\begin_inset Note Note +status open + +\begin_layout Plain Layout +Insérer une image avec la propagation shadowfb +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +begin{tikzpicture}[node distance=1cm, auto] +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white +, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text + centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt, + thick}, mylabel/.style={text width=7em, text centered} } +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + + +\backslash +tikz{ +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner + sep=1em] (0,2) rectangle (5,6); +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner + sep=1em] (6,2) rectangle (11,6); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[<->] (2,7.5) -- +(6.5,0); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[<->] (1.5,2) -- +(0,5); +\end_layout + +\begin_layout Plain Layout + +% +\backslash +draw[<->] (2,1.5) -- +(8,0); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (2.5,1.5) {Shadow surface}; +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (8.5,1.5) {Video ram surface}; +\end_layout + +\begin_layout Plain Layout + +% source pixels +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw (2,2.5) rectangle (4,4); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (3,3) {Dirty pixels}; +\end_layout + +\begin_layout Plain Layout + +% destination pixels +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw (8,2.5) rectangle (10,4); +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (9,3) {Dst pixels}; +\end_layout + +\begin_layout Plain Layout + +% fleches de copie +\end_layout + +\begin_layout Plain Layout + + +\backslash +draw[->] (3,3.25) -- +(6,0); +\end_layout + +\begin_layout Plain Layout + +% faux noeud pour pas que la légende soit collée +\end_layout + +\begin_layout Plain Layout + + +\backslash +node at (6,0.5) { }; +\end_layout + +\begin_layout Plain Layout + +} +\end_layout + +\begin_layout Plain Layout + + +\backslash +end{tikzpicture} +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +Shadowfb acceleration. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Section +XAA acceleration +\end_layout + +\begin_layout Standard +Scanline based acceleration +\end_layout + +\begin_layout Standard +Offscreen area, same pitch as the screen +\end_layout + +\begin_layout Section +EXA acceleration +\end_layout + +\begin_layout Standard +Adapted from KAA from Kdrive +\end_layout + +\begin_layout Standard +Simple interface : Prepare/Act/Finish for each acceleration function +\end_layout + +\begin_layout Standard +Solid - fill an area with a solid color (RGBA) +\end_layout + +\begin_layout Standard +Copy - copies a rectangle area from and to video memory +\end_layout + +\begin_layout Standard +Composite - optional interface used to achieve composite operations like + blending. + This allows accelerating 2D desktop effects like blending, scaling, operations + with masks... +\end_layout + +\begin_layout Standard +UploadToScreen - copies an area from system memory to video memory +\end_layout + +\begin_layout Standard +DowndloadFromScreen - copies an area from video memory to system memory +\end_layout + +\begin_layout Standard +Problématique des migrations de pixmaps +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Multiple choices exist for accelerating 2D in X.Org. +\end_layout + +\begin_layout Itemize +The most efficient one is EXA, which puts all the smart optimizations in + a common piece of code, and leaves the driver implementation very simple. +\end_layout + +\begin_layout Itemize +If your card cannot accelerate 2D operations, shadowfb is probably the path + to take. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Video Decoding +\begin_inset CommandInset label +LatexCommand label +name "cha:Video-Decoding" + +\end_inset + + +\end_layout + +\begin_layout Section +Video decoding pipeline +\end_layout + +\begin_layout Standard +Two typical video pipelines : mpeg2 and h264 +\end_layout + +\begin_layout Paragraph* +The MPEG2 decoding pipeline +\end_layout + +\begin_layout Standard +iDCT -> MC -> CSC -> Final display +\end_layout + +\begin_layout Paragraph* +The H.264 decoding pipeline +\end_layout + +\begin_layout Standard +entropy decoding -> iDCT -> MC -> CSC -> Final display +\end_layout + +\begin_layout Subsection +Entropy +\end_layout + +\begin_layout Standard +Entropy encoding is a lossless compression phase. + It is the last stage of encoding and therefore also the first stage of + decoding. +\end_layout + +\begin_layout Standard +CABAC/CAVLC +\end_layout + +\begin_layout Subsection +Inverse DCT +\end_layout + +\begin_layout Subsection +Motion Compensation +\end_layout + +\begin_layout Subsection +Color Space Conversion +\end_layout + +\begin_layout Standard +Color spaces +\end_layout + +\begin_layout Standard +Linear relation +\end_layout + +\begin_layout Standard +Conversion matrices +\end_layout + +\begin_layout Standard +The YUV color space: 1 component luminance (Y) + 2 components chrominance + (UV). + Chrominance information is less relevant to the eye than chrominance, so + usually chrominance is subsampled and luminance at the original resolution. + Therefore, the Y plane usually has a higher resolution than the U and V + planes. +\end_layout + +\begin_layout Standard +Bandwidth gain (RGBA32 vs YV12) +\end_layout + +\begin_layout Standard +YUV Planar and packed (interlaced) formats +\end_layout + +\begin_layout Standard +Plane order (YV12 vs NV12) +\end_layout + +\begin_layout Standard +Order of the planes (YV12, I420) +\end_layout + +\begin_layout Standard +http://en.wikipedia.org/wiki/YUV +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\begin_inset Formula $\left[\begin{array}{c} +R\\ +G\\ +B\end{array}\right]=\left[\begin{array}{ccc} +1 & 0 & 1.13983\\ +1 & -0.39465 & -0.58060\\ +1 & 2.03211 & 0\end{array}\right]\left[\begin{array}{c} +Y\\ +U\\ +V\end{array}\right]$ +\end_inset + + +\begin_inset Note Note +status open + +\begin_layout Plain Layout +filler verifier la formule +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:YUV-to-RGB" + +\end_inset + +YUV to RGB Conversion formula as per ITU-R RB recommendation 601. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\begin_inset Formula $\left[\begin{array}{c} +R\\ +G\\ +B\end{array}\right]=\left[\begin{array}{ccc} +1 & 0 & 1.13983\\ +1 & -0.39465 & -0.58060\\ +1 & 2.03211 & 0\end{array}\right]\left[\begin{array}{c} +Y\\ +U\\ +V\end{array}\right]$ +\end_inset + + +\begin_inset Note Note +status open + +\begin_layout Plain Layout +filler verifier la formule peut pas etre la meme que 601 +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +\begin_inset CommandInset label +LatexCommand label +name "fig:YUV-to-RGB-1" + +\end_inset + +YUV to RGB Conversion formula as per ITU-R RB recommendation 709. +\end_layout + +\end_inset + + +\end_layout + +\end_inset + +Figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:YUV-to-RGB" + +\end_inset + + shows the conversion matrices from ITU-R BT Recommendation 601 (standard + content) and recommendation 709 (intended for HD content). + Notice that although these matrices are very similar, there are numerical + differences which will result in slight off-colored rendering if one is + used in place of the other. + This is indeed often the case that video decoders with YUV to RGB hardware + are used to playback high definition content but no attention is made to + the proper conversion matrix that should be used. + Since the colors are only slightly wrong, this problem is commonly overlooked, + whereas most hardware features at least a BT601/BT709 switch, or a fully + programmable conversion matrix. +\end_layout + +\begin_layout Standard +http://www.fourcc.org/yuv.php +\end_layout + +\begin_layout Standard +http://www.glennchan.info/articles/articles.html +\end_layout + +\begin_layout Standard +http://www.poynton.com/papers/SMPTE_98_YYZ_Luma/index.html +\end_layout + +\begin_layout Standard +\begin_inset Float table +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Tabular +<lyxtabular version="3" rows="6" columns="4"> +<features> +<column alignment="center" valignment="top" width="1.5cm"> +<column alignment="center" valignment="top" width="1.2cm"> +<column alignment="center" valignment="top" width="3.5cm"> +<column alignment="center" valignment="top" width="3.5cm"> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Format name +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Y:U:V bits per pixel +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Layout +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Comments +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +YV12 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +8:2:2 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1 Y plane, 1 V 2*2 sub-sampled plane, 1 U 2*2 sampled plane +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Same as I420 except U and V are reversed. +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +I420 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +8:2:2 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1 Y plane, 1 U 2*2 sub-sampled plane, 1 V 2*2 sub-sampled plane +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Same as YV12 except U and V are reversed. +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +NV12 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +8:2:2 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1 Y plane, 1 packed U+V 2*2 sub-sampled plane +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Convenient for hardware implementation on 3D-capable GPUs +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +YUY2 (YUYV) +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +8:4:4 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1 Packed YUV plane +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Packed as Y0U0Y1V0 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset +</cell> +</row> +</lyxtabular> + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption + +\begin_layout Plain Layout +Common YUV color space formats +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Pixel scaling +\end_layout + +\begin_layout Standard +Since the conversion from YUV space to RGB space is linear, filtered scaling + can be done either in the YUV or RGB space, which conveniently allows using + texture filtering which is available on 3D hardware to sample the YUV data. + This allows a single pass color space conversion and scaling. + For example, bi-linear filtering will work just fine with three textures + for the three Y, U and V planes. + Notice that higher quality can be obtained at the expense of performance + by using better filtering modes, such as bi-cubic [citer papier hadwiger], + even though this can prove to be costly. + A trade-off can be achieved by implementing bi-cubic filtering for the + (most eye-visible) Y plane, and keeping bi-linear filtering for U and V + planes. +\end_layout + +\begin_layout Standard +If the hardware cannot achieve color space conversion and scaling at the + same time (for example if you have a YUV->RGB blitter and a shader less + 3D engine), again the linear color conversion allows you to do the scaling + in RGB space, and this will produce the same results (baring gamma correction). +\end_layout + +\begin_layout Section +Video decoding APIs +\end_layout + +\begin_layout Paragraph* +Xv +\end_layout + +\begin_layout Standard +Xv is simply about CSC ans scaling. + In order to implement Xv, a typical X.Org driver will have to implement + this space conversion. + Although the Xv API is a little complex for what it implements, the gits + of it consists in the PutImage function, which puts an YUV image on screen. + Multiple YUV formats can be handled, planar or interlaced mainly. + Note that Xv has RGB support as well. + Thanks to the bandwidth gains and DMA transfers, even an Xv implementation + already provides a relevant level of video decoding acceleration, and can + prove sufficient depending on the target hardware (for example, it can + prove to be fine when coupled with a powerful CPU to decode H264 content). +\end_layout + +\begin_layout Paragraph* +XvMC +\end_layout + +\begin_layout Standard +idct + mc +csc +\end_layout + +\begin_layout Paragraph* +VAAPI +\end_layout + +\begin_layout Standard +VAAPI was initially created for intel's poulsbo video decoding. + The API is very tailored to embedded platforms and has many entry points, + at different pipeline stages, which makes it more complex to implement. +\end_layout + +\begin_layout Paragraph* +VDPAU +\end_layout + +\begin_layout Standard +The VDPAU was initiated by nvidia for H264 & VC1 decoding support +\end_layout + +\begin_layout Paragraph* +XvBA +\end_layout + +\begin_layout Standard +All 3 APIs are intended for full +\end_layout + +\begin_layout Paragraph* +OpenMax +\end_layout + +\begin_layout Standard +http://x264dev.multimedia.cx +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +A video decoding pipeline consists in multiple stages chained together. +\end_layout + +\begin_layout Itemize +Color space conversion and scaling is the most important stage, and if your + driver implements only one operation for simplicity, this is it. +\end_layout + +\begin_layout Itemize +Implementing a full pipeline can provide a high performance boost, and save + battery life on mobile systems. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +OpenGL +\begin_inset CommandInset label +LatexCommand label +name "cha:OpenGL" + +\end_inset + + +\end_layout + +\begin_layout Standard +OpenGL ARB, khronos, bla bla... +\end_layout + +\begin_layout Section +The OpenGL Rendering Pipeline +\end_layout + +\begin_layout Subsection +Vertex processing +\end_layout + +\begin_layout Standard +vertex stage +\end_layout + +\begin_layout Standard +vertex buffers +\end_layout + +\begin_layout Subsection +Geometry processing +\end_layout + +\begin_layout Subsection +Fragment processing +\end_layout + +\begin_layout Standard +Rasterization +\end_layout + +\begin_layout Standard +Render buffers +\end_layout + +\begin_layout Standard +Textures +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +OpenGL is a suite of stages arranged in a pipeline. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Mesa +\begin_inset CommandInset label +LatexCommand label +name "cha:Mesa" + +\end_inset + + +\end_layout + +\begin_layout Standard +Mesa is the Common Rendering Architecture for all open source graphics drivers. +\end_layout + +\begin_layout Section +Mesa +\end_layout + +\begin_layout Standard +Mesa serves two major purposes: +\end_layout + +\begin_layout Itemize +Mesa is a software implementation of OpenGL. + It is considered to be the reference implementation and is useful in checking + conformance, seeing that the official OpenGL conformance tests are not + publicly available. +\end_layout + +\begin_layout Itemize +Mesa provides the OpenGL entry points for Open Source graphics drivers under + linux. +\end_layout + +\begin_layout Standard +In this section, we will focus on the second point. +\end_layout + +\begin_layout Section +Mesa internals +\end_layout + +\begin_layout Subsection +Textures in mesa +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Mesa is the reference OpenGL implementation under Linux. +\end_layout + +\begin_layout Itemize +All Open Source graphics drivers use Mesa for 3D +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Gallium 3D +\begin_inset CommandInset label +LatexCommand label +name "cha:Gallium-3D" + +\end_inset + + +\end_layout + +\begin_layout Standard +Gallium 3D is the Future of 3D Acceleration. +\end_layout + +\begin_layout Standard +http://jrfonseca.blogspot.com/2008/04/gallium3d-introduction.html +\end_layout + +\begin_layout Standard +http://people.freedesktop.org/~csimpson/gallium-docs/ +\end_layout + +\begin_layout Section +Gallium3D: a plan for a new generation of hardware +\end_layout + +\begin_layout Standard +Ten years ago, GPUs were a direct match with all the OpenGL or Direct3D + functionality; back then the GPUs had specific transistors dedicated to + each piece of functionality. + With the explosion in the amount of 3D functionality, this quickly made + it impractical both for application developers (who saw the 3D APIs growing + huge) and hardware designers (who faced an explosion of the number of specific + functionality a GPU needed), and shaders were created. + Instead of providing specific functionality, the 3D APIs would now let + the programmers create these little programs and run them on the GPU. + As the hardware was now programmable in a way which was a superset of fixed + functionality, the fixed function pipelines were not required any more + and were removed from the cards. + Gallium 3D is modeled around the simple observation that today's GPUs do + not have fixed pipe any more and only feature shaders, but drivers still + have to +\begin_inset Quotes eld +\end_inset + +emulate +\begin_inset Quotes erd +\end_inset + + fixed function on top of the shaders to provide API compatibility. + Doing so in every driver would require a lot of code duplication, and the + Gallium model is to put this code in a common place. + Therefore gallium drivers become smaller and easier to write and to maintain. +\end_layout + +\begin_layout Standard +everything is a shader, including inside the driver +\end_layout + +\begin_layout Standard +thin layer for fixed pipe -> programmable functionality translation +\end_layout + +\begin_layout Standard +global diagram +\end_layout + +\begin_layout Section +State trackers +\end_layout + +\begin_layout Standard +A state tracker implements an API (for example OpenGL, OpenVG, Direct3D...) + by turning it into API-agnostic and hardware-agnostic TGSI calls. +\end_layout + +\begin_layout Section +Pipe driver +\end_layout + +\begin_layout Standard +A pipe driver is the main part of a hardware-specific driver. +\end_layout + +\begin_layout Section +Winsys +\end_layout + +\begin_layout Standard +The winsys is in charge of talking to the OS/Platform of choice. + The pipe driver relies on the Winsys to talk to the hardware. + For example, this allows having a single pipe driver with multiple winsyses + targetting different Operating systems. +\end_layout + +\begin_layout Section +Writing Gallium3D drivers +\end_layout + +\begin_layout Standard +screen +\end_layout + +\begin_layout Standard +context +\end_layout + +\begin_layout Standard +pipe_transfer +\end_layout + +\begin_layout Section +Shaders in Gallium +\end_layout + +\begin_layout Standard +In order to operate shaders, Gallium features an internal shader description + language which uses 4-component vectors. + We will later refer to the 4 components of a vector as x,y,z,w. + In particular, v.x is the first component of vector v, v.xyzw are all 4 component +s of v in that order, and swizzling is allowed, for example v.wzyx reverses + the component order. + It is also legal to replicate a component, for example v.xxxx means four + times the x component of v and v.yyzz means two times y and two times z. +\end_layout + +\begin_layout Standard +These components usually carry no semantics, and despite their name they + can very well carry a color or an opacity value indifferently. + +\end_layout + +\begin_layout Standard +TGSI instruction set +\end_layout + +\begin_layout Standard +mesa/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Gallium 3D is the new graphics API. +\end_layout + +\begin_layout Itemize +Everything is converted to a shader internally, fixed functionality is gone. +\end_layout + +\begin_layout Itemize +Drivers are simpler than classic Mesa drivers, as one only has to implement + shaders to get all fixed functionality to work. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +GPU Computing +\begin_inset CommandInset label +LatexCommand label +name "cha:GPU-Computing" + +\end_inset + + +\end_layout + +\begin_layout Chapter +Suspend and Resume +\begin_inset CommandInset label +LatexCommand label +name "cha:Suspend-and-Resume" + +\end_inset + + +\end_layout + +\begin_layout Standard +VT switches +\end_layout + +\begin_layout Standard +Card state +\end_layout + +\begin_layout Standard +Suspend/resume hooks in the DRM +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Suspend and resume has long been very clumsy, but this is solved now thanks + to the DRM implementing more functionality. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Technical Specifications +\begin_inset CommandInset label +LatexCommand label +name "cha:Technical-Specifications" + +\end_inset + + +\end_layout + +\begin_layout Standard +Technical specifications are the nuts and bolts of graphics driver work. + Without hardware specifications, no work can be started. + However, manufacturing companies are usually wary of sharing said specification +s, as they think this will hinder their business. + While this claim is false (because you can't copy a GPU from just its specifica +tions), it is still very widespread and prevents a lot of hardware from + being properly documented. + Therefore, getting hold of hardware specifications will be the first major + step in any graphics driver development project. +\end_layout + +\begin_layout Section +Obtaining official specifications +\end_layout + +\begin_layout Paragraph* +Public specifications +\end_layout + +\begin_layout Standard +Some vendors distribute the technical documentation for their hardware publicly + without restrictions. +\end_layout + +\begin_layout Standard +Sometimes, things can be as simple as asking the vendor, who might share + the documentation (possibly under NDA, see below). +\end_layout + +\begin_layout Paragraph* +NDA (Non-Disclosure Agreement) +\end_layout + +\begin_layout Standard +Put simply, an NDA is a contract signed between the developer and the hardware + company, by which the developer agrees not to spread the docs he received. + However, there can be more restrictions in an NDA. +\end_layout + +\begin_layout Standard +Terms of the NDA +\end_layout + +\begin_layout Standard +Before signing an NDA, think. + Whatever lawyers say, there is no such thing as a +\begin_inset Quotes eld +\end_inset + +standard +\begin_inset Quotes erd +\end_inset + + NDA, you can always negotiate. +\end_layout + +\begin_layout Standard +Can Open Source drivers be written from that documentation under that NDA? +\end_layout + +\begin_layout Standard +What happens when the NDA expires? Can code still be free, are you bound + by any clause? +\end_layout + +\begin_layout Standard +What about yourself? Are you prevented from doing further work on this hardware? +\end_layout + +\begin_layout Section +Reverse engineering +\end_layout + +\begin_layout Standard +When specifications are not easily available or just incomplete, an alternate + route is reverse engineering. + Reverse engineering consists in figuring out the specifications for a given + piece of hardware by yourself, for example by looking at what a black-box + binary driver does to the hardware under certain circumstances. +\end_layout + +\begin_layout Standard +Reverse engineering is not just a tool to obtain missing hardware specifications +, it is also a strong means of Open Source advocacy. + Once a reverse engineered driver exists and ships in linux distributions, + pressure shifts on the hardware vendor for support. + This, in turn, can force the vendor to support Open Source drivers. +\end_layout + +\begin_layout Standard +not as difficult as it seems, requires organization, being rigorous. + Write down all bits of information (even incomplete bits), share it among + developers, try to work out bits one by one. + Do not hesitate writing ad-hoc tools, as they will save precious time down + the road (if you hesitate, you have crossed the line already!). +\end_layout + +\begin_layout Paragraph* +Mmiotrace +\end_layout + +\begin_layout Standard +The basic idea behind mmio-trace is simple: it first hooks the ioremap call, + and therefore prevents mapping of a designated I/O area. + Subsequently, accesses to this area will generate page faults, which are + caught by the kernel. + For each page fault, the faulting instruction is decoded to figure out + the write or read address, along with the value written/read. + The page is put back, the faulting instruction is then single-stepped, + and the page is then removed again. + Execution then continues as usual. +\end_layout + +\begin_layout Standard +mmio trace is now part of the official Linux kernels. + Therefore, any pre-existing driver can be traced. +\end_layout + +\begin_layout Paragraph* +Libsegfault +\end_layout + +\begin_layout Standard +libsegfault is similar to mmio-trace in the way it works: after removing + some pages which one want to track accesses to, it will generate a segmentation + fault on each access and therefore be able to report each access. + The difference is that libsegfault is a user space tool while mmio-trace + is a kernel tool. +\end_layout + +\begin_layout Paragraph* +Valgrind-mmt +\end_layout + +\begin_layout Standard +Valgrind is a dynamic recompiling and instrumentation framework. + Valgrint-mmt is a plugin for valgrind which implements tracing of read + and writes to a certain range of memory addresses, usually an mmio range + accessed from user space. + Memory accesses are dynamically instrumented thanks to valgrind and each + access to the zones we want to see traced is logged. +\end_layout + +\begin_layout Paragraph* +vbetool +\end_layout + +\begin_layout Paragraph* +Virtualization +\end_layout + +\begin_layout Standard +Finally, one last pre-existing tool to help reverse engineering is virtualizatio +n. + By running a proprietary driver in a controled environment, one can figure + out the inner workings of a GPU. + The plan is then to write an emulated GPU while doing the reverse engineering + (which imposes the use of an open source virtualization solution like Qemu). +\end_layout + +\begin_layout Paragraph* +Ad-hoc tools +\end_layout + +\begin_layout Standard +In addition to these generic tools, you will often find it useful to implement + your own additional tools, tailored for specific needs. + Renouveau is an example of such a tool that integrates the reverse engineering + mechanisms, the command decoding and printing. + In order to achieve decoding of the commands, it carries a database of + the graphics commands of nvidia GPUs. + This allows quick testing of new database entries. + Headers generated from this database are later used in the driver development + process. +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Technical specifications of course very important for authoring graphics + drivers. +\end_layout + +\begin_layout Itemize +NDAs can have unforeseen implications on yourself and your work. +\end_layout + +\begin_layout Itemize +When they are unavailable, incomplete or just plain wrong, reverse engineering + can help you figure out how the hardware actually works. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Beyond Development +\begin_inset CommandInset label +LatexCommand label +name "cha:Beyond-Development" + +\end_inset + + +\end_layout + +\begin_layout Section +Testing for conformance +\end_layout + +\begin_layout Paragraph* +Rendercheck +\end_layout + +\begin_layout Paragraph* +OpenGL conformance test suite +\end_layout + +\begin_layout Standard +The official OpenGL testing suite is not publicly available, and (paying) + Khronos Membership is required. + Instead, most developers use alternate sources for test programs. +\end_layout + +\begin_layout Paragraph* +Piglit +\end_layout + +\begin_layout Paragraph* +glean +\end_layout + +\begin_layout Standard +glean.sourceforge.net +\end_layout + +\begin_layout Paragraph* +Mesa demos +\end_layout + +\begin_layout Standard +mesa/progs/* +\end_layout + +\begin_layout Section +Debugging +\end_layout + +\begin_layout Paragraph* +gdb and X.Org +\end_layout + +\begin_layout Standard +gdb needs to run on a terminal emulator while the application debug might + be with a lock held. + That might result in a deadlock between the application stuck with a lock + and gdb waiting to be able to output text. +\end_layout + +\begin_layout Standard +printk debug +\end_layout + +\begin_layout Standard +crash (surcouche gdb pour analyser les vmcore) +\end_layout + +\begin_layout Standard +kgdb +\end_layout + +\begin_layout Standard +serial console +\end_layout + +\begin_layout Standard +diskdump +\end_layout + +\begin_layout Standard +linux-uml +\end_layout + +\begin_layout Standard +systemtap +\end_layout + +\begin_layout Section +Upstreaming +\end_layout + +\begin_layout Standard +Submitting your code for inclusion in the official trees is an important + part of the graphics driver development process under linux. + There are multiple motivations for doing this. + +\end_layout + +\begin_layout Standard +First, this allows end users to get hold of your driver more easily. +\end_layout + +\begin_layout Standard +Second, this makes it easier for your driver maintenance in the future: + in the event of interface changes, +\end_layout + +\begin_layout Standard +Why upstream? +\end_layout + +\begin_layout Standard +How? +\end_layout + +\begin_layout Standard +When? +\end_layout + +\begin_layout Standard +\begin_inset Box Shadowbox +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +Takeaways: +\end_layout + +\begin_layout Itemize +Thoroughly testing all your changes can save you the cost of bisection later + on. +\end_layout + +\begin_layout Itemize +Debugging is not easy for graphics drivers. +\end_layout + +\begin_layout Itemize +By upstreaming your code in official repositories, you save yourself the + burden of adapting it to ever-moving programming interfaces in X.Org, Mesa + and the kernel. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Chapter +Conclusions +\begin_inset CommandInset label +LatexCommand label +name "cha:Conclusions" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Note Note +status open + +\begin_layout Plain Layout +Bordel à caser quelque part : +\end_layout + +\begin_layout Plain Layout +- la composition, avec XRender ou avec GLX + GL_EXT_texture_from_pixmap, + expliquer les différences +\end_layout + +\begin_layout Plain Layout +- XGL, AIGLX +\end_layout + +\end_inset + + +\end_layout + +\end_body +\end_document diff --git a/myfncychap.sty b/myfncychap.sty new file mode 100644 index 0000000..cd880ee --- /dev/null +++ b/myfncychap.sty @@ -0,0 +1,683 @@ +%%% Copyright Ulf A. Lindgren +%%% +%%% Note Premission is granted to modify this file under +%%% the condition that it is saved using another +%%% file and package name. +%%% +%%% Revision 1.1 (1997) +%%% +%%% Jan. 8th Modified package name base date option +%%% Jan. 22th Modified FmN and FmTi for error in book.cls +%%% \MakeUppercase{#}->{\MakeUppercase#} +%%% Apr. 6th Modified Lenny option to prevent undesired +%%% skip of line. +%%% Nov. 8th Fixed \@chapapp for AMS +%%% +%%% Revision 1.2 (1998) +%%% +%%% Feb. 11th Fixed appendix problem related to Bjarne +%%% Aug. 11th Fixed problem related to 11pt and 12pt +%%% suggested by Tomas Lundberg. THANKS! +%%% +%%% Revision 1.3 (2004) +%%% Sep. 20th problem with frontmatter, mainmatter and +%%% backmatter, pointed out by Lapo Mori +%%% +%%% Revision 1.31 (2004) +%%% Sep. 21th problem with the Rejne definition streched text +%%% caused ugly gaps in the vrule aligned with the title +%%% text. Kindly pointed out to me by Hendri Adriaens +%%% +%%% Revision 1.32 (2005) +%%% Jun. 23th compatibility problem with the KOMA class 'scrbook.cls' +%%% a remedy is a redefinition of '\@schapter' in +%%% line with that used in KOMA. The problem was pointed +%%% out to me by Mikkel Holm Olsen +%%% +%%% Revision 1.33 (2005) +%%% Aug. 9th misspelled ``TWELV'' corrected, the error was pointed +%%% out to me by George Pearson +%%% +%%% Revision 1.34 (2007) +%%% Added an alternative to Lenny provided by Peter +%%% Osborne (2005-11-28) +%%% Corrected front, main and back matter, based on input +%%% from Bas van Gils (2006-04-24) +%%% Jul. 30th Added Bjornstrup option provided by Jean-Marc +%%% Francois (2007-01-05). +%%% Reverted to \MakeUppercase{#} see rev 1.1, solved +%%% problem with MakeUppercase and MakeLowercase pointed +%%% out by Marco Feuerstein (2007-06-06) + + +%%% Last modified Jul. 2007 + +\NeedsTeXFormat{LaTeX2e}[1995/12/01] +\ProvidesPackage{fncychap} + [2007/07/30 v1.34 + LaTeX package (Revised chapters)] + +%%%% For conditional inclusion of color +\newif\ifusecolor +\usecolorfalse + + + +%%%% DEFINITION OF Chapapp variables +\newcommand{\CNV}{\huge\bfseries} +\newcommand{\ChNameVar}[1]{\renewcommand{\CNV}{#1}} + + +%%%% DEFINITION OF TheChapter variables +\newcommand{\CNoV}{\huge\bfseries} +\newcommand{\ChNumVar}[1]{\renewcommand{\CNoV}{#1}} + +\newif\ifUCN +\UCNfalse +\newif\ifLCN +\LCNfalse +\def\ChNameLowerCase{\LCNtrue\UCNfalse} +\def\ChNameUpperCase{\UCNtrue\LCNfalse} +\def\ChNameAsIs{\UCNfalse\LCNfalse} + +%%%%% Fix for AMSBook 971008 + +\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{} + + +%%%%% Fix for Bjarne and appendix 980211 + +\newif\ifinapp +\inappfalse +\renewcommand\appendix{\par + \setcounter{chapter}{0}% + \setcounter{section}{0}% + \inapptrue% + \renewcommand\@chapapp{\appendixname}% + \renewcommand\thechapter{\@Alph\c@chapter}} + +%%%%% Fix for frontmatter, mainmatter, and backmatter 040920 + +\@ifundefined{@mainmatter}{\newif\if@mainmatter \@mainmattertrue}{} + +%%%%% + + + +\newcommand{\FmN}[1]{% +\ifUCN + {\MakeUppercase{#1}}\LCNfalse +\else + \ifLCN + {\MakeLowercase{#1}}\UCNfalse + \else #1 + \fi +\fi} + + +%%%% DEFINITION OF Title variables +\newcommand{\CTV}{\Huge\bfseries} +\newcommand{\ChTitleVar}[1]{\renewcommand{\CTV}{#1}} + +%%%% DEFINITION OF the basic rule width +\newlength{\RW} +\setlength{\RW}{1pt} +\newcommand{\ChRuleWidth}[1]{\setlength{\RW}{#1}} + +\newif\ifUCT +\UCTfalse +\newif\ifLCT +\LCTfalse +\def\ChTitleLowerCase{\LCTtrue\UCTfalse} +\def\ChTitleUpperCase{\UCTtrue\LCTfalse} +\def\ChTitleAsIs{\UCTfalse\LCTfalse} +\newcommand{\FmTi}[1]{% +\ifUCT + {\MakeUppercase{#1}}\LCTfalse +\else + \ifLCT + {\MakeLowercase{#1}}\UCTfalse + \else {#1} + \fi +\fi} + + + +\newlength{\mylen} +\newlength{\myhi} +\newlength{\px} +\newlength{\py} +\newlength{\pyy} +\newlength{\pxx} + + +\def\mghrulefill#1{\leavevmode\leaders\hrule\@height #1\hfill\kern\z@} + +\newcommand{\DOCH}{% + \CNV\FmN{\@chapapp}\space \CNoV\thechapter + \par\nobreak + \vskip 20\p@ + } +\newcommand{\DOTI}[1]{% + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@ + } +\newcommand{\DOTIS}[1]{% + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@ + } + +%%%%%% SONNY DEF + +\DeclareOption{Sonny}{% + \ChNameVar{\Large\sf} + \ChNumVar{\Huge} + \ChTitleVar{\Large\sf} + \ChRuleWidth{0.5pt} + \ChNameUpperCase + \renewcommand{\DOCH}{% + \raggedleft + \CNV\FmN{\@chapapp}\space \CNoV\thechapter + \par\nobreak + \vskip 40\p@} + \renewcommand{\DOTI}[1]{% + \CTV\raggedleft\mghrulefill{\RW}\par\nobreak + \vskip 5\p@ + \CTV\FmTi{#1}\par\nobreak + \mghrulefill{\RW}\par\nobreak + \vskip 40\p@} + \renewcommand{\DOTIS}[1]{% + \CTV\raggedleft\mghrulefill{\RW}\par\nobreak + \vskip 5\p@ + \CTV\FmTi{#1}\par\nobreak + \mghrulefill{\RW}\par\nobreak + \vskip 40\p@} +} + +%%%%%% LENNY DEF + +\DeclareOption{Lenny}{% + + \ChNameVar{\fontsize{14}{16}\usefont{OT1}{phv}{m}{n}\selectfont} + \ChNumVar{\fontsize{60}{62}\usefont{OT1}{ptm}{m}{n}\selectfont} + \ChTitleVar{\huge\bfseries\rm} + \ChRuleWidth{1pt} + \renewcommand{\DOCH}{% + \settowidth{\px}{\CNV\FmN{\@chapapp}} + \addtolength{\px}{2pt} + \settoheight{\py}{\CNV\FmN{\@chapapp}} + \addtolength{\py}{1pt} + + \settowidth{\mylen}{\CNV\FmN{\@chapapp}\space\CNoV\thechapter} + \addtolength{\mylen}{1pt} + \settowidth{\pxx}{\CNoV\thechapter} + \addtolength{\pxx}{-1pt} + + \settoheight{\pyy}{\CNoV\thechapter} + \addtolength{\pyy}{-2pt} + \setlength{\myhi}{\pyy} + \addtolength{\myhi}{-1\py} + \par + \parbox[b]{\textwidth}{% + \rule[\py]{\RW}{\myhi}% + \hskip -\RW% + \rule[\pyy]{\px}{\RW}% + \hskip -\px% + \raggedright% + \CNV\FmN{\@chapapp}\space\CNoV\thechapter% + \hskip1pt% + \mghrulefill{\RW}% + \rule{\RW}{\pyy}\par\nobreak% + \vskip -\baselineskip% + \vskip -\pyy% + \hskip \mylen% + \mghrulefill{\RW}\par\nobreak% + \vskip \pyy}% + \vskip 20\p@} + + + \renewcommand{\DOTI}[1]{% + \raggedright + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@} + + \renewcommand{\DOTIS}[1]{% + \raggedright + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@} + } + +%%%%%% Peter Osbornes' version of LENNY DEF + +\DeclareOption{PetersLenny}{% + +% five new lengths +\newlength{\bl} % bottom left : orig \space +\setlength{\bl}{6pt} +\newcommand{\BL}[1]{\setlength{\bl}{#1}} +\newlength{\br} % bottom right : orig 1pt +\setlength{\br}{1pt} +\newcommand{\BR}[1]{\setlength{\br}{#1}} +\newlength{\tl} % top left : orig 2pt +\setlength{\tl}{2pt} +\newcommand{\TL}[1]{\setlength{\tl}{#1}} +\newlength{\trr} % top right :orig 1pt +\setlength{\trr}{1pt} +\newcommand{\TR}[1]{\setlength{\trr}{#1}} +\newlength{\blrule} % top right :orig 1pt +\setlength{\trr}{0pt} +\newcommand{\BLrule}[1]{\setlength{\blrule}{#1}} + + + \ChNameVar{\fontsize{14}{16}\usefont{OT1}{phv}{m}{n}\selectfont} + \ChNumVar{\fontsize{60}{62}\usefont{OT1}{ptm}{m}{n}\selectfont} + \ChTitleVar{\Huge\bfseries\rm} + \ChRuleWidth{1pt} +\renewcommand{\DOCH}{% + + +%%%%%%% tweaks for 1--9 and A--Z +\ifcase\c@chapter\relax% +\or\BL{-3pt}\TL{-4pt}\BR{0pt}\TR{-6pt}%1 +\or\BL{0pt}\TL{-4pt}\BR{2pt}\TR{-4pt}%2 +\or\BL{0pt}\TL{-4pt}\BR{2pt}\TR{-4pt}%3 +\or\BL{0pt}\TL{5pt}\BR{2pt}\TR{-4pt}%4 +\or\BL{0pt}\TL{3pt}\BR{2pt}\TR{-4pt}%5 +\or\BL{-1pt}\TL{0pt}\BR{2pt}\TR{-2pt}%6 +\or\BL{0pt}\TL{-3pt}\BR{2pt}\TR{-2pt}%7 +\or\BL{0pt}\TL{-3pt}\BR{2pt}\TR{-2pt}%8 +\or\BL{0pt}\TL{-3pt}\BR{-4pt}\TR{-2pt}%9 +\or\BL{-3pt}\TL{-3pt}\BR{2pt}\TR{-7pt}%10 +\or\BL{-6pt}\TL{-6pt}\BR{0pt}\TR{-9pt}%11 +\or\BL{-6pt}\TL{-6pt}\BR{2pt}\TR{-7pt}%12 +\or\BL{-5pt}\TL{-5pt}\BR{0pt}\TR{-9pt}%13 +\or\BL{-6pt}\TL{-6pt}\BR{0pt}\TR{-9pt}%14 +\or\BL{-3pt}\TL{-3pt}\BR{3pt}\TR{-6pt}%15 +\or\BL{-3pt}\TL{-3pt}\BR{3pt}\TR{-6pt}%16 +\or\BL{-5pt}\TL{-3pt}\BR{-8pt}\TR{-6pt}%17 +\or\BL{-5pt}\TL{-5pt}\BR{0pt}\TR{-9pt}%18 +\or\BL{-3pt}\TL{-3pt}\BR{-6pt}\TR{-9pt}%19 +\or\BL{0pt}\TL{0pt}\BR{0pt}\TR{-5pt}%20 +\fi + +\ifinapp\ifcase\c@chapter\relax% +\or\BL{0pt}\TL{14pt}\BR{5pt}\TR{-19pt}%A +\or\BL{0pt}\TL{-5pt}\BR{-3pt}\TR{-8pt}%B +\or\BL{-3pt}\TL{-2pt}\BR{1pt}\TR{-6pt}\BLrule{0pt}%C +\or\BL{0pt}\TL{-5pt}\BR{-3pt}\TR{-8pt}\BLrule{0pt}%D +\or\BL{0pt}\TL{-5pt}\BR{2pt}\TR{-3pt}%E +\or\BL{0pt}\TL{-5pt}\BR{-10pt}\TR{-1pt}%F +\or\BL{-3pt}\TL{0pt}\BR{0pt}\TR{-7pt}%G +\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%H +\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%I +\or\BL{2pt}\TL{0pt}\BR{-3pt}\TR{1pt}%J +\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%K +\or\BL{0pt}\TL{-5pt}\BR{2pt}\TR{-19pt}%L +\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%M +\or\BL{0pt}\TL{-5pt}\BR{-2pt}\TR{-1pt}%N +\or\BL{-3pt}\TL{-2pt}\BR{-3pt}\TR{-11pt}%O +\or\BL{0pt}\TL{-5pt}\BR{-9pt}\TR{-3pt}%P +\or\BL{-3pt}\TL{-2pt}\BR{-3pt}\TR{-11pt}%Q +\or\BL{0pt}\TL{-5pt}\BR{4pt}\TR{-8pt}%R +\or\BL{-2pt}\TL{-2pt}\BR{-2pt}\TR{-7pt}%S +\or\BL{-3pt}\TL{0pt}\BR{-5pt}\TR{4pt}\BLrule{8pt}%T +\or\BL{-7pt}\TL{-11pt}\BR{-5pt}\TR{-7pt}\BLrule{0pt}%U +\or\BL{-14pt}\TL{-5pt}\BR{-14pt}\TR{-1pt}\BLrule{14pt}%V +\or\BL{-10pt}\TL{-9pt}\BR{-13pt}\TR{-3pt}\BLrule{7pt}%W +\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}\BLrule{0pt}%X +\or\BL{-6pt}\TL{-4pt}\BR{-7pt}\TR{1pt}\BLrule{7pt}%Y +\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}\BLrule{0pt}%Z +\fi\fi +%%%%%%% + \settowidth{\px}{\CNV\FmN{\@chapapp}} + \addtolength{\px}{\tl} %MOD change 2pt to \tl + \settoheight{\py}{\CNV\FmN{\@chapapp}} + \addtolength{\py}{1pt} + + \settowidth{\mylen}{\CNV\FmN{\@chapapp}\space\CNoV\thechapter} + \addtolength{\mylen}{\trr}% MOD change 1pt to \tr + \settowidth{\pxx}{\CNoV\thechapter} + \addtolength{\pxx}{-1pt} + + \settoheight{\pyy}{\CNoV\thechapter} + \addtolength{\pyy}{-2pt} + \setlength{\myhi}{\pyy} + \addtolength{\myhi}{-1\py} + \par + \parbox[b]{\textwidth}{% + \rule[\py]{\RW}{\myhi}% + \hskip -\RW% + \rule[\pyy]{\px}{\RW}% + \hskip -\px% + \raggedright% + \CNV\FmN{\@chapapp}\rule{\blrule}{\RW}\hskip\bl\CNoV\thechapter%MOD +% \CNV\FmN{\@chapapp}\space\CNoV\thechapter %ORIGINAL + \hskip\br% %MOD 1pt to \br + \mghrulefill{\RW}% + \rule{\RW}{\pyy}\par\nobreak% + \vskip -\baselineskip% + \vskip -\pyy% + \hskip \mylen% + \mghrulefill{\RW}\par\nobreak% + \vskip \pyy}% + \vskip 20\p@} + + + \renewcommand{\DOTI}[1]{% + \raggedright + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@} + + \renewcommand{\DOTIS}[1]{% + \raggedright + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@} + } + + +% + + +%%%%%% BJORNSTRUP DEF + +\DeclareOption{Bjornstrup}{% + \usecolortrue + % pzc (Zapf Chancelery) is nice. ppl (Palatino) is cool too. + \ChNumVar{\fontsize{76}{80}\usefont{OT1}{pzc}{m}{n}\selectfont} + \ChTitleVar{\raggedleft\Large\sffamily\bfseries} + + \setlength{\myhi}{10pt} % Space between grey box border and text + \setlength{\mylen}{\textwidth} + \addtolength{\mylen}{-2\myhi} + \renewcommand{\DOCH}{% + \settowidth{\py}{\CNoV\thechapter} + \addtolength{\py}{-10pt} % Amount of space by which the +% % number is shifted right + \fboxsep=0pt% + \colorbox[gray]{.85}{\rule{0pt}{40pt}\parbox[b]{\textwidth}{\hfill}}% + \kern-\py\raise20pt% + \hbox{\color[gray]{.5}\CNoV\thechapter}\\% + } + + \renewcommand{\DOTI}[1]{% + \nointerlineskip\raggedright% + \fboxsep=\myhi% + \vskip-1ex% + \colorbox[gray]{.85}{\parbox[t]{\mylen}{\CTV\FmTi{#1}}}\par\nobreak% + \vskip 40\p@% + } + + \renewcommand{\DOTIS}[1]{% + \fboxsep=0pt + \colorbox[gray]{.85}{\rule{0pt}{40pt}\parbox[b]{\textwidth}{\hfill}}\\% + \nointerlineskip\raggedright% + \fboxsep=\myhi% + \colorbox[gray]{.85}{\parbox[t]{\mylen}{\CTV\FmTi{#1}}}\par\nobreak% + \vskip 40\p@% + } +} + + +%%%%%%% GLENN DEF + + +\DeclareOption{Glenn}{% + \ChNameVar{\bfseries\Large\sf} + \ChNumVar{\Huge} + \ChTitleVar{\bfseries\Large\rm} + \ChRuleWidth{1pt} + \ChNameUpperCase + \ChTitleUpperCase + \renewcommand{\DOCH}{% + \settoheight{\myhi}{\CTV\FmTi{Test}} + \setlength{\py}{\baselineskip} + \addtolength{\py}{\RW} + \addtolength{\py}{\myhi} + \setlength{\pyy}{\py} + \addtolength{\pyy}{-1\RW} + + \raggedright + \CNV\FmN{\@chapapp}\space\CNoV\thechapter + \hskip 3pt\mghrulefill{\RW}\rule[-1\pyy]{2\RW}{\py}\par\nobreak} + + \renewcommand{\DOTI}[1]{% + \addtolength{\pyy}{-4pt} + \settoheight{\myhi}{\CTV\FmTi{#1}} + \addtolength{\myhi}{\py} + \addtolength{\myhi}{-1\RW} + \vskip -1\pyy + \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 2pt + \raggedleft\CTV\FmTi{#1}\par\nobreak + \vskip 80\p@} + +\newlength{\backskip} + \renewcommand{\DOTIS}[1]{% +% \setlength{\py}{10pt} +% \setlength{\pyy}{\py} +% \addtolength{\pyy}{\RW} +% \setlength{\myhi}{\baselineskip} +% \addtolength{\myhi}{\pyy} +% \mghrulefill{\RW}\rule[-1\py]{2\RW}{\pyy}\par\nobreak +% \addtolength{}{} +%\vskip -1\baselineskip +% \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 2pt +% \raggedleft\CTV\FmTi{#1}\par\nobreak +% \vskip 60\p@} +%% Fix suggested by Tomas Lundberg + \setlength{\py}{25pt} % eller vad man vill + \setlength{\pyy}{\py} + \setlength{\backskip}{\py} + \addtolength{\backskip}{2pt} + \addtolength{\pyy}{\RW} + \setlength{\myhi}{\baselineskip} + \addtolength{\myhi}{\pyy} + \mghrulefill{\RW}\rule[-1\py]{2\RW}{\pyy}\par\nobreak + \vskip -1\backskip + \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 3pt % + \raggedleft\CTV\FmTi{#1}\par\nobreak + \vskip 40\p@} + } + +%%%%%%% CONNY DEF + +\DeclareOption{Conny}{% + \ChNameUpperCase + \ChTitleUpperCase + \ChNameVar{\centering\Huge\rm\bfseries} + \ChNumVar{\Huge} + \ChTitleVar{\centering\Huge\rm} + \ChRuleWidth{2pt} + + \renewcommand{\DOCH}{% + \mghrulefill{3\RW}\par\nobreak + \vskip -0.5\baselineskip + \mghrulefill{\RW}\par\nobreak + \CNV\FmN{\@chapapp}\space \CNoV\thechapter + \par\nobreak + \vskip -0.5\baselineskip + } + \renewcommand{\DOTI}[1]{% + \mghrulefill{\RW}\par\nobreak + \CTV\FmTi{#1}\par\nobreak + \vskip 60\p@ + } + \renewcommand{\DOTIS}[1]{% + \mghrulefill{\RW}\par\nobreak + \CTV\FmTi{#1}\par\nobreak + \vskip 60\p@ + } + } + +%%%%%%% REJNE DEF + +\DeclareOption{Rejne}{% + + \ChNameUpperCase + \ChTitleUpperCase + \ChNameVar{\centering\Large\rm} + \ChNumVar{\Huge} + \ChTitleVar{\centering\Huge\rm} + \ChRuleWidth{1pt} + \renewcommand{\DOCH}{% + \settoheight{\py}{\CNoV\thechapter} + \parskip=0pt plus 1pt % Set parskip to default, just in case v1.31 + \addtolength{\py}{-1pt} + \CNV\FmN{\@chapapp}\par\nobreak + \vskip 20\p@ + \setlength{\myhi}{2\baselineskip} + \setlength{\px}{\myhi} + \addtolength{\px}{-1\RW} + \rule[-1\px]{\RW}{\myhi}\mghrulefill{\RW}\hskip + 10pt\raisebox{-0.5\py}{\CNoV\thechapter}\hskip 10pt\mghrulefill{\RW}\rule[-1\px]{\RW}{\myhi}\par\nobreak + \vskip -3\p@% Added -2pt vskip to correct for streched text v1.31 + } + \renewcommand{\DOTI}[1]{% + \setlength{\mylen}{\textwidth} + \parskip=0pt plus 1pt % Set parskip to default, just in case v1.31 + \addtolength{\mylen}{-2\RW} + {\vrule width\RW}\parbox{\mylen}{\CTV\FmTi{#1}}{\vrule width\RW}\par\nobreak% + \vskip -3pt\rule{\RW}{2\baselineskip}\mghrulefill{\RW}\rule{\RW}{2\baselineskip}% + \vskip 60\p@% Added -2pt in vskip to correct for streched text v1.31 + } + \renewcommand{\DOTIS}[1]{% + \setlength{\py}{\fboxrule} + \setlength{\fboxrule}{\RW} + \setlength{\mylen}{\textwidth} + \addtolength{\mylen}{-2\RW} + \fbox{\parbox{\mylen}{\vskip 2\baselineskip\CTV\FmTi{#1}\par\nobreak\vskip \baselineskip}} + \setlength{\fboxrule}{\py} + \vskip 60\p@ + } + } + + +%%%%%%% BJARNE DEF + +\DeclareOption{Bjarne}{% + \ChNameUpperCase + \ChTitleUpperCase + \ChNameVar{\raggedleft\normalsize\rm} + \ChNumVar{\raggedleft \bfseries\Large} + \ChTitleVar{\raggedleft \Large\rm} + \ChRuleWidth{1pt} + + +%% Note thechapter -> c@chapter fix appendix bug +%% Fixed misspelled 12 + + \newcounter{AlphaCnt} + \newcounter{AlphaDecCnt} + \newcommand{\AlphaNo}{% + \ifcase\number\theAlphaCnt + \ifnum\c@chapter=0 + ZERO\else{}\fi + \or ONE\or TWO\or THREE\or FOUR\or FIVE + \or SIX\or SEVEN\or EIGHT\or NINE\or TEN + \or ELEVEN\or TWELVE\or THIRTEEN\or FOURTEEN\or FIFTEEN + \or SIXTEEN\or SEVENTEEN\or EIGHTEEN\or NINETEEN\fi +} + + \newcommand{\AlphaDecNo}{% + \setcounter{AlphaDecCnt}{0} + \@whilenum\number\theAlphaCnt>0\do + {\addtocounter{AlphaCnt}{-10} + \addtocounter{AlphaDecCnt}{1}} + \ifnum\number\theAlphaCnt=0 + \else + \addtocounter{AlphaDecCnt}{-1} + \addtocounter{AlphaCnt}{10} + \fi + + + \ifcase\number\theAlphaDecCnt\or TEN\or TWENTY\or THIRTY\or + FORTY\or FIFTY\or SIXTY\or SEVENTY\or EIGHTY\or NINETY\fi + } + \newcommand{\TheAlphaChapter}{% + + \ifinapp + \thechapter + \else + \setcounter{AlphaCnt}{\c@chapter} + \ifnum\c@chapter<20 + \AlphaNo + \else + \AlphaDecNo\AlphaNo + \fi + \fi + } + \renewcommand{\DOCH}{% + \mghrulefill{\RW}\par\nobreak + \CNV\FmN{\@chapapp}\par\nobreak + \CNoV\TheAlphaChapter\par\nobreak + \vskip -1\baselineskip\vskip 5pt\mghrulefill{\RW}\par\nobreak + \vskip 20\p@ + } + \renewcommand{\DOTI}[1]{% + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@ + } + \renewcommand{\DOTIS}[1]{% + \CTV\FmTi{#1}\par\nobreak + \vskip 40\p@ + } +} + +\DeclareOption*{% + \PackageWarning{fancychapter}{unknown style option} + } + +\ProcessOptions* \relax + +\ifusecolor + \RequirePackage{color} +\fi +\def\@makechapterhead#1{% + \vspace*{0\p@}% + {\parindent \z@ \raggedright \normalfont + \ifnum \c@secnumdepth >\m@ne + \if@mainmatter%%%%% Fix for frontmatter, mainmatter, and backmatter 040920 + \DOCH + \fi + \fi + \interlinepenalty\@M + \if@mainmatter%%%%% Fix for frontmatter, mainmatter, and backmatter 060424 + \DOTI{#1}% + \else% + \DOTIS{#1}% + \fi + }} + + +%%% Begin: To avoid problem with scrbook.cls (fncychap version 1.32) + +%%OUT: +%\def\@schapter#1{\if@twocolumn +% \@topnewpage[\@makeschapterhead{#1}]% +% \else +% \@makeschapterhead{#1}% +% \@afterheading +% \fi} + +%%IN: +\def\@schapter#1{% +\if@twocolumn% + \@makeschapterhead{#1}% +\else% + \@makeschapterhead{#1}% + \@afterheading% +\fi} + +%%% End: To avoid problem with scrbook.cls (fncychap version 1.32) + +\def\@makeschapterhead#1{% + \vspace*{50\p@}% + {\parindent \z@ \raggedright + \normalfont + \interlinepenalty\@M + \DOTIS{#1} + \vskip 40\p@ + }} + +\endinput + + |