summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormarcheu@polious <marcheu@polious>2010-04-11 14:25:53 +0000
committermarcheu@polious <marcheu@polious>2010-04-11 14:25:53 +0000
commit6b01699963d0d4c1a2e37af81e962b925049e154 (patch)
tree5ecc9a5bab7f213c7fac7376c2fcad2aa2cae221
Initial import
-rw-r--r--linuxgraphicsdrivers.lyx6572
-rw-r--r--myfncychap.sty683
2 files changed, 7255 insertions, 0 deletions
diff --git a/linuxgraphicsdrivers.lyx b/linuxgraphicsdrivers.lyx
new file mode 100644
index 0000000..03bbdb4
--- /dev/null
+++ b/linuxgraphicsdrivers.lyx
@@ -0,0 +1,6572 @@
+#LyX 1.6.0 created this file. For more info see http://www.lyx.org/
+\lyxformat 345
+\begin_document
+\begin_header
+\textclass book
+\begin_preamble
+\usepackage[Lenny]{myfncychap}
+\usepackage{listings}
+\usepackage{color}
+\usepackage{geometry}
+\usepackage{tikz}
+\usepackage{array}
+
+\usetikzlibrary{positioning,shadows,arrows,shapes,patterns}
+\usepackage{verbatim}
+\tikzset{
+ mynode/.style={rectangle,rounded corners,draw=black, top color=white, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text centered},
+ myarrow/.style={->, >=latex', shorten >=1pt, thick},
+ mylabel/.style={text width=7em, text centered}
+}
+
+\renewcommand{\chaptermark}[1]{\markboth{\thechapter.\ #1}{}}
+\renewcommand{\sectionmark}[1]{\markright{\thesection.\ #1}}
+\fancyhead{}
+\fancyhead[LE]{\bfseries\leftmark}
+\fancyhead[RO]{\bfseries\rightmark}
+\fancyfoot{}
+\fancyfoot[LE,RO]{\bfseries\thepage}
+
+\fancypagestyle{plain}{
+\renewcommand{\headrulewidth}{0pt}
+\fancyhead{}
+\fancyhead[LE]{}
+\fancyhead[RO]{}
+\fancyfoot{}
+\fancyfoot[LE,RO]{\bfseries\thepage}
+}
+
+\fancypagestyle{Contents}{
+\fancyfoot{}
+\fancyfoot[LE,RO]{\bfseries\thepage} }
+
+
+\def\contentsname{Table of Contents}
+
+
+\definecolor{listinggray}{gray}{0.95}
+\lstset{basicstyle=\small,keywordstyle=,tabsize=3,escapechar=`,extendedchars=true}
+\lstset{backgroundcolor=\color{listinggray},rulecolor=\color{black}}
+\lstset{commentstyle=\textit, stringstyle=\upshape,showspaces=false}
+\lstset{showstringspaces=false}
+\lstset{frame=single}
+\lstset{breaklines=true}
+\lstset{language=C}
+\lstset{basicstyle=\footnotesize}
+\lstset{columns=flexible}
+\end_preamble
+\use_default_options true
+\begin_modules
+theorems-ams
+\end_modules
+\language english
+\inputencoding auto
+\font_roman palatino
+\font_sans default
+\font_typewriter default
+\font_default_family default
+\font_sc false
+\font_osf false
+\font_sf_scale 100
+\font_tt_scale 100
+
+\graphics default
+\paperfontsize 10
+\spacing single
+\use_hyperref true
+\pdf_title "Linux Graphics Drivers: an Introduction"
+\pdf_author "Stéphane Marchesin"
+\pdf_bookmarks true
+\pdf_bookmarksnumbered false
+\pdf_bookmarksopen false
+\pdf_bookmarksopenlevel 1
+\pdf_breaklinks false
+\pdf_pdfborder true
+\pdf_colorlinks true
+\pdf_backref false
+\pdf_pdfusetitle true
+\pdf_quoted_options "linkcolor=cyan"
+\papersize b5paper
+\use_geometry true
+\use_amsmath 1
+\use_esint 1
+\cite_engine basic
+\use_bibtopic false
+\paperorientation portrait
+\leftmargin 2.5cm
+\topmargin 2.5cm
+\rightmargin 1.7cm
+\bottommargin 2.5cm
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation skip
+\defskip medskip
+\quotes_language english
+\papercolumns 1
+\papersides 2
+\paperpagestyle fancy
+\bullet 0 0 17 -1
+\tracking_changes false
+\output_changes false
+\author ""
+\author ""
+\end_header
+
+\begin_body
+
+\begin_layout Title
+Linux Graphics Drivers: an Introduction
+\begin_inset Newline newline
+\end_inset
+
+
+\size small
+Version 2
+\end_layout
+
+\begin_layout Author
+Stéphane Marchesin
+\begin_inset Newline newline
+\end_inset
+
+<stephane.marchesin@gmail.com>
+\end_layout
+
+\begin_layout Standard
+\begin_inset CommandInset toc
+LatexCommand tableofcontents
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Introduction
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Introduction"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+
+\lang french
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+markboth{ }{ Introduction }
+\end_layout
+
+\end_inset
+
+
+\lang english
+Accelerating graphics is a complex art which suffers a mostly unjustified
+ reputation of being voodoo magic.
+ This book is intended as an introduction to the inner workings and development
+ of graphics drivers under Linux.
+ Throughout this whole book, knowledge of C programming is expected, along
+ with some familiarity with graphics processors.
+ Although its primary audience is the graphics driver developer, this book
+ details the internals of the full Linux graphics stack and therefore can
+ also be useful to application developers seeking to enhance their vision
+ of the Linux graphics world: one can hope to improve the performance of
+ its applications through better understanding the Linux graphics stack.
+ In this day and age of pervasive 3D graphics and GPU computing, a better
+ comprehension of graphics is a must have!
+\end_layout
+
+\begin_layout Section
+Book overview
+\end_layout
+
+\begin_layout Standard
+The book starts with an introduction of relevant hardware concepts (Chapter
+
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:A-Look-at"
+
+\end_inset
+
+).
+ Only concepts directly relevant to the graphics driver business are presented
+ there.
+ Then we paint a high-level view of the Linux graphics stack in Chapter
+
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:The-Big-Picture"
+
+\end_inset
+
+ and its evolution over the years.
+ Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Framebuffer-Drivers"
+
+\end_inset
+
+ introduces framebuffer drivers, a basic form of graphics drivers under
+ Linux that, although primitive, sees wide usage in the embedded space.
+ Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:The-DRM-Kernel"
+
+\end_inset
+
+ introduces the DRM, a kernel module which is in charge of arbitrating all
+ graphics activity going on in a Linux system.
+ The next chapter (Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:X.Org-Drivers"
+
+\end_inset
+
+) focuses on X.Org drivers and the existing acceleration APIs available to
+ the developper.
+ Video decoding sees its own dedicated part in Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Video-Decoding"
+
+\end_inset
+
+.
+ We then move on to 3D acceleration with Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:OpenGL"
+
+\end_inset
+
+ where we introduce the basic concepts of OpenGL.
+ Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Mesa"
+
+\end_inset
+
+ and
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Gallium-3D"
+
+\end_inset
+
+ are dedicated to Mesa and Gallium 3D, the two foundations of 3D graphics
+ acceleration under Linux used as the framework for 3D drivers.
+ Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:GPU-Computing"
+
+\end_inset
+
+ tackles an emerging field, GPU computing.
+ Next, we discuss suspend and resume in Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Suspend-and-Resume"
+
+\end_inset
+
+.
+ We then discuss two side issues with Linux graphics drivers: technical
+ specifications in Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Technical-Specifications"
+
+\end_inset
+
+ and what you should do aside pure development in Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Beyond-Development"
+
+\end_inset
+
+.
+ Finally, we conclude in Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Conclusions"
+
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+Each chapter finishes with the
+\begin_inset Quotes eld
+\end_inset
+
+takeaways
+\begin_inset Quotes erd
+\end_inset
+
+, a number of relevant points that we made during said chapter.
+\end_layout
+
+\begin_layout Section
+What this book does not cover
+\end_layout
+
+\begin_layout Standard
+Computer graphics move at a fast pace, and this book is not about the past.
+ Obsolete hardware (isa, vlb, ...), old standards (the vga standard and its
+ dreadful int10, vesa), outdated techniques (user space modesetting) and
+ old X11 servers (Xsun, XFree86, KDrive...) will not be detailed.
+\end_layout
+
+\begin_layout Chapter
+A Look at the Hardware
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:A-Look-at"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Before diving any further into the subject of graphics drivers, we need
+ to understand the hardware which is at play.
+ This chapter is by no means intended to be a complete description of all
+ inner workings of your average computer and its graphics hardware, but
+ only as an introduction thereof.
+ The goal of this section is to
+\begin_inset Quotes eld
+\end_inset
+
+cover the bases
+\begin_inset Quotes erd
+\end_inset
+
+ on what will be required later on.
+ Notably, most hardware concepts that will subsequently be required are
+ introduced here.
+ Although we sometimes have to go through architecture-specific hoops, we
+ try to stay as generic as possible and the concepts detailed thereafter
+ generalize well.
+\end_layout
+
+\begin_layout Section
+Hardware Overview
+\end_layout
+
+\begin_layout Standard
+Today all computers are architectured the same way: a central processor
+ and a number of peripherals.
+ In order to exchange data, these peripherals are interconnected by a bus
+ over which all communications go.
+ Figure
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:Peripheral-interconnection-in"
+
+\end_inset
+
+ outlines the layout of peripherals in a standard computer.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\noindent
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, myarrowtwoside/.style={<->, >=latex', shorten >=1pt, thick},
+ mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width=1.5cm] (CPU) {CPU
+\backslash
+
+\backslash
+ };
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width=1.5cm, right=0.8cm of CPU] (memory) {System
+\backslash
+
+\backslash
+ Memory};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width=1.5cm, right=0.8cm of memory] (GPU) {Graphics
+\backslash
+
+\backslash
+ Card};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width=1.5cm, right=0.8cm of GPU] (network) {Network
+\backslash
+
+\backslash
+ Card};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[right = 0.8cm of network] {$
+\backslash
+cdots$};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 10cm, below=2cm of GPU] (bus) {Bus};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrowtwoside] (CPU.south) -> ++(0,-2) (bus);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrowtwoside] (GPU.south) -> ++(0,-2) (bus);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrowtwoside] (memory.south) -> ++(0,-2) (bus);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrowtwoside] (network.south) -> ++(0,-2) (bus);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node[mynode, below=2cm of GPU] (iommu) {IOMMU};
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node[mynode, left=1cm of mmu] (mmupt) {MMU page table};
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node[mynode, right=1cm of iommu] (iommupt) {IOMMU page table};
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node[mynode, text width=5cm, below=2cm of mmu, xshift=1.5cm] (memory) {Memory};
+
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[myarrow] (CPU.south) -| (mmu.north);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[myarrow] (GPU.south) -| (iommu.north);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[myarrow] (mmu.south) -> ++(0,-2) (memory);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[myarrow] (iommu.south) -> ++(0,-2) (memory);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[myarrow] (mmu) -> (mmupt);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[myarrow] (iommu) -> (iommupt);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node at (4,-1.5) {GPU Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node at (-1.5,-1.5) {Virtual Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node at (-1.5,-4.5) {Physical Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+node at (4,-4.5) {Physical Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:Peripheral-interconnection-in"
+
+\end_inset
+
+Peripheral interconnection in a typical computer.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+The first user of the bus is the CPU.
+ The CPU uses the bus to access system memory and other peripherals.
+ However, the CPU is not the only one able to write and read data to the
+ peripherals, the peripherals themselves also have the capability to exchange
+ information directly.
+ In particular, a peripheral which has the ability to read and write to
+ memory without the CPU intervention is said to be DMA (Direct Memory Access)
+ capable, and the memory transaction is called a DMA.
+ Today, all graphics cards feature this ability (named DMA bus mastering)
+ which consists in the card requesting and subsequently taking control of
+ the bus for a number of microseconds.
+
+\end_layout
+
+\begin_layout Standard
+If a peripheral has the ability to achieve DMA to or from an uncontiguous
+ list of memory pages (which is very convenient when the data is not contiguous
+ in memory), it is said to have DMA scatter-gather capability (as it can
+ scatter data to different memory pages, or gather data from different pages).
+\end_layout
+
+\begin_layout Standard
+Notice that the DMA capability can be a downside in some cases.
+ For example on real time systems, this means the CPU is unable to access
+ the bus while a DMA transaction is in progress, and since DMA transactions
+ happen asynchronously this can lead to missing a real time scheduling deadline.
+ Therefore, while DMA has a lot of advantages from a performance viewpoint,
+ there are situations where it should be avoided.
+\end_layout
+
+\begin_layout Section
+Bus types
+\end_layout
+
+\begin_layout Standard
+Buses connect the machine peripherals together; each and every communication
+ between different peripherals goes over (at least) one bus.
+ In particular, a bus is the way most graphics card are connected to the
+ rest of the computer (one notable exception being the case of some embedded
+ systems, where the GPU is directly connected to the CPU).
+ As shown in Table
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:Common-bus-types"
+
+\end_inset
+
+, there are many bus types suitable for graphics: PCI, AGP, PCI-X, PCI-express
+ to name a (relevant) few.
+ All the bus types we will detail are variants of the PCI bus type, however
+ some of them feature singular improvements over the original PCI design.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="8" columns="5">
+<features>
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<column alignment="center" valignment="top" width="0">
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Bus type
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Bus width
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Frequency
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Bandwidth
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Capabilities
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+PCI
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+32 bits
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+33 Mhz
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+133 Mb/s (33 Mhz)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+-
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+AGP
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+32 bits
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+66 Mhz
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+2100Mb/s (8x)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+SBA, FW,
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+GART
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+PCI-X
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+64 bits
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+33, 66,
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+533 Mb/s (66 Mhz)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+-
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+133 Mhz
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+PCI-Express (1.0)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Serial
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1.25 Ghz
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+4Gb/s (16 lanes)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+-
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+PCI-Express (3.0)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Serial
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+4 Ghz
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+16Gb/s (16 lanes)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+-
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:Common-bus-types"
+
+\end_inset
+
+Common bus types.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subparagraph*
+PCI (Peripheral Component Interconnect)
+\end_layout
+
+\begin_layout Standard
+PCI is the most basic bus allowing connecting graphics peripherals today.
+ One of its key feature is called bus mastering.
+ This feature allows a given peripheral to take hold of the bus for a given
+ number of cycles and do a complete transaction (called a DMA, Direct Memory
+ Access).
+ The PCI bus is coherent, which means that no explicit flushes are required
+ for the memory to be coherent across devices.
+\end_layout
+
+\begin_layout Subparagraph*
+AGP (Accelerated Graphics Port)
+\end_layout
+
+\begin_layout Standard
+AGP is essentially a modified PCI bus with a number of extra features compared
+ to its ancestor.
+ Most importantly, it is faster thanks to a higher clock speed and the ability
+ to send 2, 4 or 8 bits per lane on each clock tick (for AGP 2x, 4x and
+ 8x respectively).
+ AGP also three distinctive features:
+\end_layout
+
+\begin_layout Itemize
+The first feature is AGP GART (Graphics Aperture Remapping Table), a simple
+ form of IOMMU (as will be seen in section
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "sec:Virtual-and-Physical"
+
+\end_inset
+
+).
+ It allows taking a (non contiguous) set of physical memory pages out of
+ system memory and exposing it to the GPU for its use as a contiguous area.
+ This increases the amount of memory usable by the GPU at little cost, and
+ creates an convenient area for sharing data between the CPU and the GPU
+ (AGP graphics cards can do fast DMA to/from this area, and since the GART
+ area is a chunk of system RAM, CPU access is a lot faster than VRAM).
+ One notable drawback is that the GART area is not coherent, and therefore
+ writes to GART (be it from the GPU or CPU) need to be flushed before transactio
+ns from the other party can begin.
+ Another drawback is that only a single GART area is handled by the hardware,
+ and it has to be sub-allocated by the driver.
+\end_layout
+
+\begin_layout Itemize
+The second feature is AGP side band addressing (SBA).
+ Side band addressing consists in 8 extra bus bits used as an address bus.
+ Instead of multiplexing the bus bandwidth between adresses and data, the
+ nominal AGP bandwidth can be dedicated to data only.
+ This feature is transparent to the driver developer.
+\end_layout
+
+\begin_layout Itemize
+The third feature is AGP Fast Writes (FW).
+ Fast writes allow sending data to the graphics card directly, without having
+ the card initiate a DMA.
+ This feature is also transparent for the driver developer.
+\end_layout
+
+\begin_layout Standard
+Keep in mind that these last two features are known to be unstable on a
+ wide range of hardware, and oftentimes require chipset-specific hacks to
+ work properly.
+ Therefore it is advisable not to enable them.
+ In fact, they are an extremely frequent cause for strange hardware errors
+ on AGP cards.
+\end_layout
+
+\begin_layout Subparagraph*
+PCI-X
+\end_layout
+
+\begin_layout Standard
+PCI-X was developed as a faster PCI for server boards, and very few graphics
+ peripherals exist in this format.
+ It is not to be confused with PCI-Express, which sees real widespread usage.
+\end_layout
+
+\begin_layout Subparagraph*
+PCI-Express (PCI-E)
+\end_layout
+
+\begin_layout Standard
+PCI-Express is the new generation of PCI devices.
+ It has more advantages than a simple improved PCI.
+\end_layout
+
+\begin_layout Standard
+Finally, it is important to note that, depending on the architecture, the
+ CPU-GPU communication does not always relies on a bus.
+ This is especially common on embedded systems where the GPU and the CPU
+ are on a single die.
+ In that case the CPU can access the GPU registers directly.
+\end_layout
+
+\begin_layout Section
+Virtual and Physical Memory
+\begin_inset CommandInset label
+LatexCommand label
+name "sec:Virtual-and-Physical"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The term
+\begin_inset Quotes eld
+\end_inset
+
+memory
+\begin_inset Quotes erd
+\end_inset
+
+ has to two main different acceptions:
+\end_layout
+
+\begin_layout Itemize
+Physical memory.
+ Physical memory is real, hardware memory, as stored in the memory chips.
+
+\end_layout
+
+\begin_layout Itemize
+Virtual memory.
+ Virtual memory is a translation of physical memory addresses allowing user
+ space applications to see their allocated chunks as if they were contiguous
+ while they are fragmented and scattered on the chips.
+\end_layout
+
+\begin_layout Standard
+In order to simplify programming, it is easier to handle contiguous memory
+ areas.
+ This is easy to achieve as long as only a small area is needed.
+ But allocating a bigger memory chunk would require as much contiguous physical
+ memory which is difficult if not impossible to achieve shortly after bootup
+ because of memory fragmentation.
+ Therefore, a mechanism is required to keep the appearance of a contiguous
+ piece of memory to the application while using scattered pieces.
+
+\end_layout
+
+\begin_layout Standard
+To achieve this, memory is split into pages.
+ For the scope of this book, it is sufficient to say that a memory page
+ is a collection contiguous bytes in physical memory
+\begin_inset Foot
+status open
+
+\begin_layout Plain Layout
+On x86 and x86-64, a page is usually 4096 bytes long, although different
+ sizes are possible on other architectures or with huge pages.
+\end_layout
+
+\end_inset
+
+In order to make a scattered list of physical pages seem contiguous in virtual
+ space, a piece of hardware called MMU (memory mapping unit) converts virtual
+ addresses (used in applications) into physical addresses (used for actually
+ accessing memory) using a page table as shown on Figure
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:MMU-and-IOMMU"
+
+\end_inset
+
+.
+ In case a page does not exist in virtual space (and therefore not in the
+ MMU table), the MMU is able to signal it, which provides the basic mechanism
+ for reporting access to non-existent memory areas.
+ This in turn is used by the system to implement advanced memory programming
+ like swapping or on-the-fly page instantiations.
+ As the MMU is only effective for CPU access to memory, virtual addresses
+ are not relevant to the hardware since it is not able to match them to
+ physical addresses.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\noindent
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode] (CPU) {CPU};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=of CPU] (GPU) {GPU};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below=2cm of CPU] (mmu) {MMU};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below=2cm of GPU] (iommu) {IOMMU};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, left=1cm of mmu] (mmupt) {MMU page table};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=1cm of iommu] (iommupt) {IOMMU page table};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width=5cm, below=2cm of mmu, xshift=1.5cm] (memory) {Memory};
+
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (CPU.south) -| (mmu.north);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (GPU.south) -| (iommu.north);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (mmu.south) -> ++(0,-2) (memory);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (iommu.south) -> ++(0,-2) (memory);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (mmu) -> (mmupt);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (iommu) -> (iommupt);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4,-1.5) {GPU Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (-1.5,-1.5) {Virtual Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (-1.5,-4.5) {Physical Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4,-4.5) {Physical Address};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:MMU-and-IOMMU"
+
+\end_inset
+
+MMU and IOMMU.
+\end_layout
+
+\end_inset
+
+
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+XXX ajouter les tables de page à ce dessin
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+While the MMU only works for CPU accesses, it has an equivalent for peripherals:
+ the IOMMU.
+ As shown on figure
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:MMU-and-IOMMU"
+
+\end_inset
+
+, an IOMMU is the same as an MMU except that it virtualizes the address
+ space of peripherals.
+ The IOMMU can see various incarnations, either on the motherboard chipset
+ (in which case it is shared between all peripherals) or on the graphics
+ card itself (where it will be called AGP GART, PCI GART).
+ The job of the IOMMU is to translate memory addresses from the peripherals
+ into physical addresses.
+ In particular, this allows
+\begin_inset Quotes eld
+\end_inset
+
+fooling
+\begin_inset Quotes erd
+\end_inset
+
+ a device into restricting its DMAs to a given range of memory and it is
+ required for better security and hardware virtualization.
+\end_layout
+
+\begin_layout Standard
+A special case of IOMMU is the Linux swiotlb which allocates a contiguous
+ piece of physical memory at boot (which makes it feasible to have a large
+ contiguous physical allocation since there is no fragmentation yet) and
+ uses it for DMA.
+ As the memory is physically contiguous, no page translation is required
+ and therefore a DMA can occur to and from this memory range.
+ However, this means that this memory (64MB by default) is preallocated
+ and will not be used for anything else.
+\end_layout
+
+\begin_layout Standard
+AGP GART is another special case of IOMMU present with AGP graphics cards
+ which exposes a single linear area to the card.
+ In that case the IOMMU table is embedded in the AGP chipset, on the motherboard.
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Dire que c'est lineaire en memoire physique et virtu
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Yet another special case of IOMMU is the PCI GART which allows exposing
+ a chunk of system memory to the card.
+ In that case the IOMMU table is embedded in the graphics card, and often
+ the physical memory used does not need to be contiguous.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+http://images.google.fr/images?hl=fr&source=hp&q=page+table&btnG=Recherche+d'image
+s&gbv=2&aq=f&oq=
+\end_layout
+
+\begin_layout Plain Layout
+http://pages.cs.wisc.edu/~bart/537/lecturenotes/s16.html
+\end_layout
+
+\begin_layout Plain Layout
+http://a.michelizza.free.fr/pmwiki.php?n=TutoOS.Mm3
+\end_layout
+
+\begin_layout Plain Layout
+http://lwn.net/Articles/106177/
+\end_layout
+
+\begin_layout Plain Layout
+http://www.vocw.edu.vn/content/m10106/latest/
+\end_layout
+
+\begin_layout Plain Layout
+http://cs.nyu.edu/courses/spring05/G22.2250-001/lectures/lecture-08.html
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Obviously, with so many different memory types, performance is not homogeneous;
+ not all combination of accesses are fast, depending on whether they involve
+ the CPU, the GPU, or bus transfers.
+ Another issue which arises is memory coherence: how can one ensure that
+ memory is coherent accross devices, in particular that data written by
+ the CPU is availble to the GPU (or the opposite).
+ These two issues are correlated, as higher performance usually means a
+ lower level of memory coherence, and vice-versa.
+\end_layout
+
+\begin_layout Standard
+As far as setting the memory caching parameters goes, there are two ways
+ to set caching attributes on memory ranges:
+\end_layout
+
+\begin_layout Itemize
+MTRRs.
+ An MTRR (Memory Type Range Register) is a register describing attributes
+ for a range of given physical memory.
+ The number of MTRR depends on the system, but is very limited.
+ Although this applies to a physical memory range, the effect works on the
+ corresponding virtual memory pages.
+ This for example makes it possible to map pages with a specific caching
+ type.
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+XXX des exemples
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Itemize
+PAT (Page Attribute Table) allows setting per-page memory attributes.
+ However it is an extension only available on recent x86 processors.
+\end_layout
+
+\begin_layout Standard
+On top of these, one can use explicit caching instructions on some architectures
+, for example on x86
+\emph on
+movntq
+\emph default
+ is an uncached mov instruction and
+\emph on
+clflush
+\emph default
+ can selectively flush cache lines.
+\end_layout
+
+\begin_layout Standard
+There are 3 caching modes, usable both through MTRR and PAT on system memory:
+\end_layout
+
+\begin_layout Itemize
+UC (UnCached) memory is uncached.
+ No CPU read/writes to this area are cached, and each memory write instruction
+ triggers an actual immediate memory write.
+ This is helpful to ensure that information has been actually written so
+ as to avoid CPU/GPU race conditions.
+\end_layout
+
+\begin_layout Itemize
+WC (Write Combine) memory is uncached, but CPU writes are combined together
+ in order to improve the performance.
+ This is useful to improve performance in situations where uncached memory
+ is required, but where combining the writes together has no adverse effects.
+\end_layout
+
+\begin_layout Itemize
+WB (Write Back) memory is cached.
+ This is the default mode and leads to the best performance for CPU accesses.
+ However this does not ensure that memory writes are propagated to central
+ memory after a finite time.
+\end_layout
+
+\begin_layout Standard
+Notice that these caching modes apply to the CPU only, the GPU accesses
+ are not directly affected by the current caching mode.
+ However, when the GPU has to access an area of memory which was previously
+ filled by the CPU, uncached modes ensure that the memory writes are actually
+ done, and are not pending sitting in a CPU cache.
+ Another way to achieve the same effect is the use of cache flushing instruction
+s present on some x86 processors (like cflush).
+ However this is less portable than using the caching modes.
+ Yet another (portable) way is the use of memory barriers, which ensures
+ that pending memory writes have been committed to main memory before moving
+ on.
+\end_layout
+
+\begin_layout Standard
+Obviously with so many different caching modes, not all accesses have the
+ same performance:
+\end_layout
+
+\begin_layout Itemize
+When it comes to CPU access to system memory, uncached mode provides the
+ worst performance, write back provides the best performance, and write
+ combine is in between.
+\end_layout
+
+\begin_layout Itemize
+When the CPU accesses the video memory from a discrete card, all accesses
+ are extremely slow, be they reads or writes, as each access needs a cycle
+ on the bus.
+ Therefore it is not recommended to access large areas of VRAM with the
+ CPU.
+ Furthermore on some GPUs synchronizing is required or this could cause
+ a GPU hang.
+\end_layout
+
+\begin_layout Itemize
+Obviously the GPU accessing VRAM is extremely fast.
+\end_layout
+
+\begin_layout Itemize
+GPU access to system ram is unaffected by the caching mode, but still has
+ to go over the bus.
+ This is the case of DMA transactions.
+ As those happen asynchronously, they can be considered
+\begin_inset Quotes eld
+\end_inset
+
+free
+\begin_inset Quotes erd
+\end_inset
+
+ from the viewpoint of the CPU, however there is a non-negligible setup
+ cost involved for each DMA transaction.
+ This is why, when transferring small amounts of memory, a DMA transaction
+ is not always better than a direct CPU access.
+\end_layout
+
+\begin_layout Standard
+Finally, one last important point to make about memory is the notion of
+ memory barriers and write posting.
+ In the case of a cached (Write Combine or Write Back) memory area, a memory
+ barrier ensures that pending writes have actually been committed to memory.
+ This is used, for example, before asking the GPU to read a given memory
+ area.
+ For I/O areas, a similar technique called write posting exists: it consists
+ in doing a dummy read inside the I/O area which will, as a side effect,
+ wait until pending writes have taken effect before completing.
+\end_layout
+
+\begin_layout Section
+The Graphics Card
+\end_layout
+
+\begin_layout Standard
+Today, a graphics card is basically a computer-in-the-computer.
+ It is a complex beast with a dedicated processor on a separate card, and
+ features its own computation units, its own bus, and its own memory.
+
+\end_layout
+
+\begin_layout Subsubsection*
+Graphics Memory
+\end_layout
+
+\begin_layout Standard
+The GPU's memory, which we will from now on refer to as video memory, can
+ be either real, dedicated, on-card memory (in the case of a discrete card),
+ or memory shared with the CPU (in the case of an integrated card).
+ Notice that the case of shared memory has interesting implications, as
+ it means that system to video memory copies can be virtually free if implemente
+d properly; while the case of dedicated memory means that transfers back
+ and forth will need to happen.
+
+\end_layout
+
+\begin_layout Standard
+It is not uncommon for modern GPUs to feature a form of virtual memory as
+ well, allowing to map different resources (real video memory of system
+ memory) into the GPU address space.
+ This is very similar to the CPU's virtual memory, but uses a completely
+ separate hardware implementation.
+ For example, older Radeon cards (actually since Rage 128) feature a number
+ of surfaces which you can map into the GPU address space, each of which
+ is a contiguous memory resource (video ram, AGP, PCI).
+ Old Nvidia cards (everything up to NV40) have a similar concept based on
+ objects which describe an area of memory which can then be bound to a given
+ use.
+ Recent cards (starting with NV50 and R800) let you build the address space
+ page by page, with the ability of picking system and dedicated video memory
+ pages at will.
+ The similarity of these with a CPU virtual address space is very striking,
+ in fact you can have accesses to unmapped pages be signaled to you through
+ an interrupt and act on this in a video memory page fault handler.
+ However, be careful playing with those as the implication here is that
+ driver developers have to juggle with multiple address spaces from the
+ CPU and GPU which are going to be fundamentally different.
+\end_layout
+
+\begin_layout Subsubsection*
+Surfaces
+\end_layout
+
+\begin_layout Standard
+Surfaces are the basic sources and targets for all rendering.
+ Althought they can be called differenty (textures, render targets, buffers...)
+ the basic idea is always the same.
+ Figure
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:The-layout-of"
+
+\end_inset
+
+ depicts the layout of a graphics surface.
+ The surface width is rounded up to what we call the pitch because of hardware
+ limitations (usually to the next multiple of some power of 2) and therefore
+ there exists a dead zone of pixels which goes unused.
+ The graphics surface has a number of characteristics:
+\end_layout
+
+\begin_layout Itemize
+The pixel format of the surface.
+ A pixel color is represented memory by its red, green and blue components,
+ plus an alpha component used as the opacity for blending.
+ The number of bits for a whole pixel usually matches hardware sizes (8,16
+ or 32 bits) but the repartition of the bits between the four components
+ does not have to match those.
+ The number of bits used for each pixels is referred to as bits per pixel,
+ or
+\emph on
+bpp
+\emph default
+.
+ Common pixel formats include 888 RGBX, 8888 RGBA, 565 RGB, 5551, RGBA,
+ 4444 RGBA
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+, YUV12, YUY16
+\end_layout
+
+\end_inset
+
+.
+ Notice that most cards today work natively in ABGR 8888.
+\end_layout
+
+\begin_layout Itemize
+Width and height are the most obvious characteristics, and are given in
+ pixels.
+
+\end_layout
+
+\begin_layout Itemize
+The pitch is the width in bytes (not in pixels!) of the surface, including
+ the dead zone pixels.
+ The pitch is convenient for computing memory usages, for example the size
+ of the surface should be computed by
+\begin_inset Formula $height\times pitch$
+\end_inset
+
+ and not
+\begin_inset Formula $height\times width\times bpp$
+\end_inset
+
+ in order to include the dead zone.
+\end_layout
+
+\begin_layout Standard
+Notice that surfaces are not always stored linearly in video memory, in
+ fact for performance reasons it is extremely common that they are not,
+ as this improves the locality of the memory accesses when rendering.
+ Such surfaces are called
+\emph on
+tiled
+\emph default
+.
+ The exact layout of a tiled surface is highly dependent on the hardware,
+ but is usually a form of space-filling curve like the Z curve or hilbert's
+ curve.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\noindent
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+hspace{-4cm}
+\end_layout
+
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikz{
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner
+ sep=1em] (2,2) rectangle (10,7);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[pattern = north east lines] (8.5,2) rectangle (10,7);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (2,7.5) -- +(6.5,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (1.5,2) -- +(0,5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (2,1.5) -- +(8,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,8) {Surface width};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,1) {Surface pitch};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (0,4.5) {Surface height};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (5.2,4.5) {Used pixels};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (9.2,4.8) {Dead};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (9.2,4.3) {zone};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,0.5) { };
+\end_layout
+
+\begin_layout Plain Layout
+
+}
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:The-layout-of"
+
+\end_inset
+
+The layout of a surface.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Subsubsection*
+2D engine
+\end_layout
+
+\begin_layout Standard
+The 2D engine, or blitter, is the hardware used for 2D acceleration.
+ Blitters have been one of the earliest form of graphics acceleration and
+ are still extremely widespread today.
+ Generally, a 2D engine is capable of the following operations:
+\end_layout
+
+\begin_layout Itemize
+Blits.
+ Blits are a copy of a memory rectangle from one place to another by the
+ GPU.
+ The source and destination can be either video or system memory.
+\end_layout
+
+\begin_layout Itemize
+Solid fills.
+ Solid fills consist in filling a rectangle memory area with a color.
+ Note that this can also include the alpha channel.
+\end_layout
+
+\begin_layout Itemize
+Alpha blits.
+ Alpha blits use the alpha component of pixels from of a surface to achieve
+ transparency [porter & duff].
+\end_layout
+
+\begin_layout Itemize
+Stretched blits.
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\noindent
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+hspace{-2cm}
+\end_layout
+
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikz{
+\end_layout
+
+\begin_layout Plain Layout
+
+% Source
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner
+ sep=1em] (2,2) rectangle (8,6);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[pattern = north east lines] (7,2) rectangle (8,6);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4,7) {Blit width};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (3,6.5) -- +(2,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (0,4.5) {Blit height};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (1.5,3.5) -- +(0,2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (2,1.5) -- +(6,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (5,1) {Src pitch};
+\end_layout
+
+\begin_layout Plain Layout
+
+% source pixels
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw (3,3.5) rectangle (5,5.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4,4.5) {Src pixels};
+\end_layout
+
+\begin_layout Plain Layout
+
+% Destination
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner
+ sep=1em] (9,2) rectangle (12,6);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[pattern = north east lines] (11.5,2) rectangle (12,6);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (9,1.5) -- +(3,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10.5,1) {Dst pitch};
+\end_layout
+
+\begin_layout Plain Layout
+
+% destination pixels
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw (9.2,2.5) rectangle (11.2,4.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10.2,3.5) {Dst pixels};
+\end_layout
+
+\begin_layout Plain Layout
+
+% relier les zones src/dst de copie
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[-,style=dashed] (9.2,2.5) -- (3,3.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[-,style=dashed] (11.2,2.5) -- (5,3.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[-,style=dashed] (11.2,4.5) -- (5,5.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[-,style=dashed] (9.2,4.5) -- (3,5.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+% faux noeud pour pas que la légende soit collée
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,0.5) { };
+\end_layout
+
+\begin_layout Plain Layout
+
+}
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:Blitting-between-two"
+
+\end_inset
+
+Blitting between two different surfaces.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Figure
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:Blitting-between-two"
+
+\end_inset
+
+ shows an example of blitting a rectangle between two different surfaces.
+ This operation is defined by the following parameters: the source and destinati
+on coordinates, the source and destination pitches, and the blit width and
+ height.
+ However, this is only 2D coordinates, no perspective is possible
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\noindent
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+hspace{-4cm}
+\end_layout
+
+\end_inset
+
+
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikz{
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner
+ sep=1em] (2,2) rectangle (10,7);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[pattern = north east lines] (8.5,2) rectangle (10,7);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (2,7.5) -- +(6.5,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (1.5,2) -- +(0,5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[<->] (2,1.5) -- +(8,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,8) {Surface width};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,1) {Surface pitch};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (0,4.5) {Surface height};
+\end_layout
+
+\begin_layout Plain Layout
+
+% source pixels
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw (4,3.5) rectangle (8,6.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,6) {Src pixels};
+\end_layout
+
+\begin_layout Plain Layout
+
+% destination pixels
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw (2.5,2.5) rectangle (6.5,5.5);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4,3) {Dst pixels};
+\end_layout
+
+\begin_layout Plain Layout
+
+% faux noeud pour pas que la légende soit collée
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,0.5) { };
+\end_layout
+
+\begin_layout Plain Layout
+
+}
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:Overlapping-blit-inside"
+
+\end_inset
+
+Overlapping blit inside a surface.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+When a blit happens between two overlapping source and destination surfaces,
+ the semantics of the copy is not trivially defined, especially if one considers
+ that what happens for a blit is not a simple move of a rectangle, but is
+ done pixel-by-pixel at the core.
+ As seen on Figure
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:Overlapping-blit-inside"
+
+\end_inset
+
+, if one does a line-by-line copy top to bottom, some source pixels will
+ be modified as a side effect.
+ Therefore, the notion of blitting direction was introduced into the blitters.
+ In this case, for a proper copy a bottom to top copy is required.
+ Some cards will determine the blitting direction automatically according
+ to surface overlap (for example nvidia GPUs), and others will not.
+\end_layout
+
+\begin_layout Standard
+Finally, keep in mind that not all current graphics accelerators feature
+ a 2D engine.
+ Since 3D acceleration is technically a super-set of 2D acceleration, it
+ is possible to implement 2D acceleration using the 3D engine (and this
+ idea is one of the core ideas behind the Gallium 3D design, which will
+ be detailed in Chapter
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Gallium-3D"
+
+\end_inset
+
+).
+ And indeed some drivers use the 3D engine to implement 2D which allows
+ GPU makers to completely part with the transistors otherwise dedicated
+ to it.
+ Yet some other cards do not dedicate the transistors, but microprogram
+ 2D operations on top of 3D operations inside the GPU (this is the case
+ for nVidia cards since nv10 and up to nv50, and for the Radeon R600 series
+ which have an optional firmware that implements 2D on top of 3D).
+ This sometimes has an impact on mixing 2D and 3D operations since those
+ now share hardware units.
+\end_layout
+
+\begin_layout Subsubsection*
+3D engine
+\end_layout
+
+\begin_layout Standard
+A 3D engine is also called
+\begin_inset Quotes eld
+\end_inset
+
+rasterization pipeline
+\begin_inset Quotes erd
+\end_inset
+
+, because it contains a series of stages which exchange data in a pipeline
+ (1-directional) fashion.
+\end_layout
+
+\begin_layout Standard
+vertex -> geom -> fragment
+\end_layout
+
+\begin_layout Standard
+graphics fifo
+\end_layout
+
+\begin_layout Standard
+DMA
+\end_layout
+
+\begin_layout Standard
+http://www.x.org/wiki/Development/Documentation/HowVideoCardsWork
+\end_layout
+
+\begin_layout Standard
+tiled textures
+\end_layout
+
+\begin_layout Subsubsection*
+Overlays and hardware sprites
+\end_layout
+
+\begin_layout Section
+Programming the card
+\end_layout
+
+\begin_layout Standard
+Each PCI card exposes a number of PCI resources; lspci -v lists these resources.
+ These can be, but are not limited to, BIOSes, MMIO ranges, video memory
+ (or only some part of it).
+ As the total PCI resource size is limited, oftentimes a card will only
+ expose part of its video memory as a resource, and the only way to access
+ the remaining memory is through DMA from other, reachable areas (in a way
+ similar to bounce pages).
+ This is increasingly common as the video memory sizes keep growing while
+ the PCI resource space stays limited.
+\end_layout
+
+\begin_layout Subparagraph*
+MMIO
+\end_layout
+
+\begin_layout Standard
+MMIO is the most direct access to the card.
+ A range of addresses is exposed to the CPU, where each write goes directly
+ to the GPU.
+ This allows the simplest for of communication of commands from the CPU
+ to the GPU.
+ This type of programming is synchronous, so writes are done by the CPU
+ and executed on the GPU in a lockstep fashion This results in sub-par performan
+ce as each access turns into a packet on the bus.
+\end_layout
+
+\begin_layout Subparagraph*
+DMA
+\end_layout
+
+\begin_layout Standard
+A direct memory access (DMA) is the use by a peripheral of the bus mastering
+ feature of the bus.
+ This allows one peripheral to talk directly to another, without intervention
+ from the CPU.
+ In the graphics card case, the two most common uses of DMAs are:
+\end_layout
+
+\begin_layout Itemize
+Transfers by the GPU to and from system memory (for reading textures and
+ writing buffers).
+ This allows implementing things like texturing over AGP or PCI, and hardware-ac
+celerated texture transfers.
+\end_layout
+
+\begin_layout Itemize
+The implementation of command FIFO.
+ As MMIO between the CPU and GPU is synchronous and graphics drivers inherently
+ use a lot of I/O, a faster means of communicating with the card is required.
+ The command FIFO is a piece of memory (either system memory or more rarely
+ video memory) shared between the graphics card and the CPU, where the CPU
+ places command for later execution by the GPU.
+ Then the GPU reads the FIFO asynchronously using DMA and executes the commands.
+ This model allows asynchronous execution of the CPU and GPU command flows
+ and thus leads to higher performance.
+\end_layout
+
+\begin_layout Subsubsection*
+Interrupts
+\end_layout
+
+\begin_layout Standard
+Interrupts are a way for hardware peripherals in general, and GPUs in particular
+, to signal events to the CPU.
+ Usage examples for interrupts include signaling completion of a graphics
+ command, signaling a vertical blanking event, reporting a GPU error, ...
+ When an interrupt is raised by the peripheral, the CPU executes a small
+ routine called an interrupt handler, which preempts other current executions.
+ There is a maximum execution time for an interrupt handler, so the drivers
+ have to keep it short (not more than a few microseconds).
+ In order to execute more code, the common solution is to schedule a tasklet
+ from the interrupt handler.
+\end_layout
+
+\begin_layout Section
+Display devices (aka screens)
+\end_layout
+
+\begin_layout Standard
+Display devices are the last ring of the graphics chain.
+ They are charged with presenting the pictures to the user.
+\end_layout
+
+\begin_layout Standard
+digital vs analog signal
+\end_layout
+
+\begin_layout Standard
+hsync, vsync
+\end_layout
+
+\begin_layout Standard
+sync on green
+\end_layout
+
+\begin_layout Standard
+Connectors and encoders: CRTC,TMDS, LVDS, DVI-I, DVI-A, DVI-D, VGA (D-SUB
+ 15 is the proper name)
+\end_layout
+
+\begin_layout Section
+Graphics Hardware Examples
+\end_layout
+
+\begin_layout Paragraph*
+ATI
+\end_layout
+
+\begin_layout Standard
+Shader engine 4+1
+\end_layout
+
+\begin_layout Paragraph*
+Nvidia
+\end_layout
+
+\begin_layout Standard
+NVidia hardware has multiple specificities compared to other architectures.
+ The first one is the availability of multiple contexts, which is implemented
+ using multiple command fifos (similar to what some high-end infiniband
+ networking cards do) and a context switching mechanism to commute between
+ those fifos.
+ A small firmware is used for context switches between contexts, which is
+ responsible for saving the graphics card state to a portion of memory and
+ restoring another context.
+ A scheduling system using the round robin algorithm handles the selection
+ of the contexts, and the timeslice is programmable.
+
+\end_layout
+
+\begin_layout Standard
+The second specificity is the notion of graphics objects.
+ Nvidia hardware features two levels of GPU access: the first one is at
+ the raw level and is used for context switches, an the second one is the
+ graphics objects which microprogram the raw level to achieve high level
+ functionality (for example 2D or 3D acceleration).
+\end_layout
+
+\begin_layout Standard
+Shader engine nv40/nv50
+\end_layout
+
+\begin_layout Standard
+http://nouveau.freedesktop.org/wiki/HonzaHavlicek
+\end_layout
+
+\begin_layout Paragraph*
+SGX
+\end_layout
+
+\begin_layout Standard
+Tiling architecture
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+There are multiple memory domains in a computer, and they are not coherent.
+\end_layout
+
+\begin_layout Itemize
+A GPU is a completely separate computer with its own bus, address space
+ and computational units.
+\end_layout
+
+\begin_layout Itemize
+Communication between the CPU and GPU is achieved over a bus, which has
+ non-trivial performance implications.
+\end_layout
+
+\begin_layout Itemize
+GPUs can be programmed using two modes: MMIO and command FIFOs.
+\end_layout
+
+\begin_layout Itemize
+There is no standard output method for display devices.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+The Big Picture
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:The-Big-Picture"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+X, how it works (encapsulating) with indirect (glx) 3D with kernel FB +
+ picture.
+ This is how utah-glx used to work.
+\end_layout
+
+\begin_layout Plain Layout
+DRI : bypassing encapsulation for performance-critical operations with kernel
+ FB + picture
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The Linux graphics stack has seen numerous evolutions over the years.
+ The purpose of this section is to detail that history, as well as the justifica
+tion behind the changes in order to better motivate the current design.
+\end_layout
+
+\begin_layout Section
+The X11 infrastructure
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+placement tbh
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode] (application) {Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below=of application] (xlib) {Xlib};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (application.south) -> (xlib.north);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw (1,-1) rectangle (6,-5.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (3.5,-1.2) {X server};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=2cm of xlib] (xserver) {DIX};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xlib.east) -> (xserver.west);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below=1cm of xserver] (driver) {DDX (Driver)};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xserver.south) -> (driver.north);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below=1cm of driver] (hardware) {Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (driver.south) -> (hardware.north);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+The X11 architecture.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+DIX (Device-Independent X), DDX (Device-Dependent X),
+\end_layout
+
+\begin_layout Standard
+modules
+\end_layout
+
+\begin_layout Standard
+Xlib
+\end_layout
+
+\begin_layout Standard
+socket
+\end_layout
+
+\begin_layout Standard
+X protocol
+\end_layout
+
+\begin_layout Standard
+X extensions
+\end_layout
+
+\begin_layout Standard
+shm -> shared memory for transport
+\end_layout
+
+\begin_layout Standard
+XCB -> asynchronous
+\end_layout
+
+\begin_layout Standard
+Another notable X extension is Xv, which will be discussed in further detail
+ in the video decoding chapter.
+\end_layout
+
+\begin_layout Section
+The DRI/DRM infrastructure
+\end_layout
+
+\begin_layout Standard
+Initially (when Linux first supported graphics hardware acceleration), only
+ a single piece of code would access the graphics card directly: the XFree86
+ server.
+ The design was as follows: by running with super-user privileges, the XFree86
+ server could access the card from user space and did not require kernel
+ support to implement 2D acceleration.
+ The advantage of such a design was its simplicity, and the fact that the
+ XFree86 server could be easily ported from one operating system to another
+ since it required no kernel component.
+ For years this was the most widespread X server design (although there
+ were notable exceptions, like XSun which implemented modesetting in the
+ kernel for some drivers).
+\end_layout
+
+\begin_layout Standard
+Later on, Utah-GLX, the first hardware-independent 3D accelerated design,
+ came to Linux.
+ Utah-GLX basically consists in an additional user space 3D driver implementing
+ GLX, and directly accesses the graphics hardware from user space, in a
+ way similar to the 2D driver.
+ In a time where the 3D hardware was clearly separated from 2D (because
+ the functionality used for 2D and 3D was completely different, or because
+ the 3D card was a completely separate card, à la 3Dfx), it made sense to
+ have a completely separate driver.
+ Furthermore, direct access to the hardware from user space was the simplest
+ approach and the shortest road to getting 3D acceleration going under Linux.
+\end_layout
+
+\begin_layout Standard
+At the same time, framebuffer drivers (which will be detailed in Chapter
+
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "cha:Framebuffer-Drivers"
+
+\end_inset
+
+) were getting increasingly widespread, and represented another component
+ that could simultaneously access the graphics hardware directly.
+ To avoid potential conflicts between the framebuffer and XFree86 drivers,
+ it was decided that VT switches would emit a signal to the X server telling
+ it to save the graphics hardware state.
+ Asking each driver to save its complete GPU state on VT switches made the
+ drivers more fragile, and life became more difficult for developers who
+ suddenly faced bug-prone interaction between different drivers.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+aide à faire des figures : http://www.texample.net/tikz/examples/
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+placement H
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode] (x11application) {X11 Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=0.5cm of x11application] (glapplication) {OpenGL Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=0.5cm of glapplication] (fbapplication) {Framebuffer Applicati
+on};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 6cm, below=1cm of x11application, xshift = 1.7cm]
+ (xorg) {XFree86};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (x11application.south) -> ++(0,-1) (xorg);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glapplication.south) -> ++(0,-1) (xorg);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below = of xorg, xshift=-2cm] (2ddriver) {2D Driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xorg.south) ++ (-2,0) -> ++(0,-1) (2ddriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below = of xorg, xshift= 2cm] (glxdriver) {Utah GLX driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xorg.south) ++(2,0) -> ++(0,-1) (glxdriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 12cm , below=3cm of 2ddriver, xshift=5cm] (hardware)
+ {Graphics Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-5.2) -- (11,-5.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-7.2) -- (11,-7.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4.6,-1) {GLX};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-5) {User Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-7) {Kernel Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-7.5) {Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glxdriver.south) -> ++(0,-3.0) (hardware);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below=5.1cm of fbapplication] (fbdriver) {FB driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (fbapplication) -> (fbdriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (fbdriver.south) -> ++(0,-1) (hardware);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (2ddriver.south) -> ++(0,-3) (hardware);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+Early implementation of the Linux graphics stack using Utah-GLX.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Obviously, this model had drawbacks.
+ First, it required that unprivileged user space applications be allowed
+ access the graphics hardware for 3D.
+ Second, as can be seen on figure XXX all GL acceleration had to be indirect
+ through the X protocol, which would slow it down.
+ Because of growing concerns about the security in Linux and performance
+ shortcomings, another model was required.
+\end_layout
+
+\begin_layout Standard
+To address the reliability and security concerns with the Utah-GLX model,
+ the DRI model was put together; it was used in both XFree86 and its successor,
+ X.Org.
+ This model relies on a additional kernel component whose duty is to check
+ the correctness of the 3D command stream, security-wise.
+ The main change is now that instead of accessing the card directly, the
+ unprivileged OpenGL application would submit command buffers to the kernel,
+ which would check them for security and then pass them to the hardware
+ for execution.
+ The advantage of this model is that trusting user space is no longer required.
+ Notice that although this would have been possible, the 2D command stream
+ from XFree86 still did not go through the DRM, and therefore the X server
+ still required super-user privileges.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+placement H
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode] (x11application) {X11 Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=0.5cm of x11application] (glapplication) {OpenGL Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=0.5cm of glapplication] (fbapplication) {Framebuffer Applicati
+on};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 6cm, below=1cm of x11application, xshift = 1.7cm]
+ (xorg) {X.Org};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (x11application.south) -> ++(0,-1) (xorg);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glapplication.south) -> ++(0,-1) (xorg);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below = of xorg, xshift=-2cm] (2ddriver) {2D Driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xorg.south) ++ (-2,0) -> ++(0,-1) (2ddriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below = of xorg, xshift= 2cm] (glxdriver) {OpenGL DRI driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glapplication.south) ++(1.3,0) -> ++(0,-3.1) (glxdriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,-2.1) {DRI};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below = 0.9cm of glxdriver] (drm) {DRM};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glxdriver.south) -> ++(0,-0.9) (drm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 12cm , below=3cm of 2ddriver, xshift=5cm] (hardware)
+ {Graphics Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-5.2) -- (11,-5.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-7.2) -- (11,-7.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4.6,-1) {GLX};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-5) {User Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-7) {Kernel Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-7.5) {Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (fbapplication) -> ++(0,-5.65) (drm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (2ddriver.south) -> ++(0,-3) (drm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (drm.south) -> ++(0,-1.0) (hardware);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below=5.1cm of fbapplication] (fbdriver) {FB driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (fbapplication) -> (fbdriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (fbdriver.south) -> ++(0,-1) (hardware);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+The old picture of the Linux graphics stack.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The current stack evolved from a new set of needs.
+ First, requiring the X server to have super-user has always had serious
+ security implications.
+ Second, with the previous design different drivers were touching a single
+ piece of hardware, which would often cause issues.
+ In order to resolve this the key is two-fold: first, merge the kernel framebuff
+er functionality into the DRM module and second, have X.Org access the graphics
+ card through the DRM module and run unprivileged.
+ This is called Kernel Modesetting (KMS); in this model the DRM module is
+ now responsible for providing modesetting services both as a framebuffer
+ driver and to X.Org.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+placement H
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode] (x11application) {X11 Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=0.5cm of x11application] (glapplication) {OpenGL Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=0.5cm of glapplication] (fbapplication) {Framebuffer Applicati
+on};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 6cm, below=1cm of x11application, xshift = 1.7cm]
+ (xorg) {X.Org};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (x11application.south) -> ++(0,-1) (xorg);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glapplication.south) -> ++(0,-1) (xorg);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below = of xorg, xshift=-2cm] (2ddriver) {2D Driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xorg.south) ++ (-2,0) -> ++(0,-1) (2ddriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, below = of xorg, xshift= 2cm] (glxdriver) {OpenGL DRI driver};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xorg.south) ++(1,0) -> ++(0,-1) (glxdriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (3.5,-3.1) {AIGLX};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glapplication.south) ++(1.3,0) -> ++(0,-3.1) (glxdriver);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,-2.1) {DRI};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 12cm, below = 0.9cm of glxdriver, xshift = 1cm]
+ (drm) {DRM};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glxdriver.south) -> ++(0,-0.9) (drm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 12cm , below=3cm of 2ddriver, xshift=5cm] (hardware)
+ {Graphics Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-5.2) -- (11,-5.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-7.2) -- (11,-7.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (4.6,-1) {GLX};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-5) {User Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-7) {Kernel Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (10,-7.5) {Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (fbapplication) -> ++(0,-5.65) (drm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (2ddriver.south) -> ++(0,-0.9) (drm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (drm.south) -> ++(0,-1.0) (hardware);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+The new picture of the Linux graphics stack.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+VT switches
+\end_layout
+
+\begin_layout Standard
+http://dri.sourceforge.net/doc/dri_data_flow.html
+\end_layout
+
+\begin_layout Standard
+http://dri.sourceforge.net/doc/dri_control_flow.html
+\end_layout
+
+\begin_layout Standard
+http://nouveau.freedesktop.org/wiki/GraphicStackOverview
+\end_layout
+
+\begin_layout Standard
+http://people.freedesktop.org/~ajax/dri-explanation.txt
+\end_layout
+
+\begin_layout Standard
+http://dri.sourceforge.net/doc/DRIintro.html
+\end_layout
+
+\begin_layout Standard
+http://jonsmirl.googlepages.com/graphics.html
+\end_layout
+
+\begin_layout Standard
+http://wiki.x.org/wiki/Development/Documentation/Glossary
+\end_layout
+
+\begin_layout Standard
+http://mjules.littleboboy.net/carnet/index.php?post/2006/11/15/89-comment-marche-x1
+1-xorg-et-toute-la-clique-5-partie
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Applications communicate with X.Org through a specific library which encapsulates
+ drawing calls.
+\end_layout
+
+\begin_layout Itemize
+The current DRI design has evolved over time in a number of significant
+ steps.
+\end_layout
+
+\begin_layout Itemize
+In a modern stack, all graphics hardware activity is moderated by a kernel
+ module, the DRM.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Framebuffer Drivers
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Framebuffer-Drivers"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Framebuffer drivers are the simplest form of graphics drivers under Linux.
+ Kernel modesetting DRM drivers are still a relevant option if the only
+ thing you are after is a basic two-dimensional display.
+ Furthermore, when implementing framebuffer acceleration on top of a kernel
+ modesetting DRM driver, the same callbacks need to be filled.
+ A framebuffer driver implements little functionality, and is therefore
+ extremely easy to create.
+ Such a driver is especially interesting for embedded systems, where memory
+ footprint is essential, or when the intended applications do not require
+ advanced graphics acceleration.
+\end_layout
+
+\begin_layout Standard
+At the core, a framebuffer driver implements the following functionality:
+\end_layout
+
+\begin_layout Itemize
+modesetting
+\end_layout
+
+\begin_layout Itemize
+basic 2d acceleration (copy, solid)
+\end_layout
+
+\begin_layout Standard
+Acceleration is sometimes made available to user space through a hook (user
+ space must then program card specific bits, must be root for that)
+\end_layout
+
+\begin_layout Standard
+Framebuffer drivers do not always rely on a specific card model (like nvidiafb/a
+tyfb...).
+ Drivers on top of vesa, EFI or Openfirmware exist.
+\end_layout
+
+\begin_layout Standard
+http://www.linux-fbdev.org/HOWTO/index.html
+\end_layout
+
+\begin_layout Section
+Creating a framebuffer driver
+\end_layout
+
+\begin_layout Standard
+struct platform_driver with a probe function
+\end_layout
+
+\begin_layout Standard
+probe function in charge of creating the fb_info struct and register_framebuffer
+() on it.
+\end_layout
+
+\begin_layout Section
+Framebuffer operations
+\end_layout
+
+\begin_layout Standard
+The framebuffer operations structure is how non-modesetting framebuffer
+ callbacks are set.
+ Different callbacks can be set depending on what functionality you wish
+ to implement, like fills, copies, or cursor handling.
+ By filling struct fb_ops callbacks, one can implement the following functions:
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+int (*fb_setcolreg)(unsigned regno, unsigned red, unsigned green, unsigned
+ blue, unsigned transp, struct fb_info *info);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* set color register */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+int (*fb_setcmap)(struct fb_cmap *cmap, struct fb_info *info);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* set color registers in batch */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+int (*fb_blank)(int blank, struct fb_info *info);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* blank display */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+int (*fb_pan_display)(struct fb_var_screeninfo *var, struct fb_info *info);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* pan display */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect);
+
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* Draws a rectangle */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* Copy data from area to another */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* Draws a image to the display */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+int (*fb_cursor) (struct fb_info *info, struct fb_cursor *cursor);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* Draws cursor */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+void (*fb_rotate)(struct fb_info *info, int angle);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* Rotates the display */
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{lstlisting}{}
+\end_layout
+
+\begin_layout Plain Layout
+
+int (*fb_sync)(struct fb_info *info);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{lstlisting}{}
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+/* wait for blit idle, optional */
+\end_layout
+
+\begin_layout Standard
+Note that common framebuffer functions (cfb) are available if you do not
+ want to implement everything for your device specifically.
+ These functions are cfb_fillrect, cfb_copyarea and cfb_imageblit and will
+ perform the corresponding function in a generic, unoptimized fashion using
+ the CPU.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Framebuffer drivers are the simplest form of linux graphics driver, requiring
+ little work for implementation.
+\end_layout
+
+\begin_layout Itemize
+Framebuffer drivers deliver a low memory footprint and thus are useful for
+ embedded devices.
+\end_layout
+
+\begin_layout Itemize
+Implementing acceleration is optional as software fallback functions exist.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+The DRM Kernel Module
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:The-DRM-Kernel"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The use of a kernel module is a requirement in a complex world.
+ The kernel module, or DRM, has multiple purposes:
+\end_layout
+
+\begin_layout Itemize
+Share the rendering hardware between multiple user space components, and
+ arbitrate access.
+\end_layout
+
+\begin_layout Itemize
+Enforce security by preventing applications from performing DMA to arbitrary
+ memory regions, and more generally programming the card in any way that
+ could result in a security hole.
+\end_layout
+
+\begin_layout Itemize
+Manage the memory of the card, by providing video memory allocation functionalit
+y to user space.
+\end_layout
+
+\begin_layout Itemize
+More recently, DRM was improve to achieve modesetting.
+ This simplifies the situation where both the DRM and the framebuffer driver
+ access the card by removing the framebuffer driver and implementing in
+ the DRM.
+\end_layout
+
+\begin_layout Itemize
+Put critical initialization of the card in the kernel, for example by uploading
+ firmwares or setting up DMA areas.
+
+\end_layout
+
+\begin_layout Standard
+Kernel module (DRM)
+\end_layout
+
+\begin_layout Standard
+Global DRI/DRM user space/kernel scheme (figure with libdrm - drm - entry
+ points - multiple user space apps)
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+placement H
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode] (xorg) {X.Org};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, right=0.5cm of xorg] (glapplication) {OpenGL Application};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 6cm, below= of xorg, xshift = 2.2cm] (libdrm) {libdrm};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (xorg.south) -> ++(0,-1) (libdrm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (glapplication.south) -> ++(0,-1) (libdrm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 6cm, below= of libdrm] (drm) {drm};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (libdrm.south) -> ++(0,-1) (drm);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node[mynode, text width = 6cm, below= of drm] (hardware) {Graphics Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[myarrow] (drm.south) -> ++(0,-1.0) (hardware);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-3.2) -- (9,-3.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw [thick, dotted] (-1.8,-5.2) -- (9,-5.2);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (8,-3) {User Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (8,-5) {Kernel Space};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (8,-5.5) {Hardware};
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+Accessing the DRM through libdrm.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+When designing a Linux graphics driver aiming for more than simple framebuffer
+ support, a DRM component is the first thing to do.
+ One should derive a design that is both efficient and enforces security.
+ The DRI/DRM scheme can be implemented in different ways and the interface
+ is indeed entirely card-specific.
+ Do not always follow the existing models that other drivers use, innovate!
+\end_layout
+
+\begin_layout Section
+Hardware sharing
+\end_layout
+
+\begin_layout Standard
+Multiplexing of the card command fifo - For cards which only feature a single
+ hardware command submission fifo, it has to be shared between multiple
+ user space components.
+ In that case, this is achieved by the DRM module.
+\end_layout
+
+\begin_layout Standard
+Prevent simultaneous access to the same hw
+\end_layout
+
+\begin_layout Section
+Security
+\end_layout
+
+\begin_layout Standard
+Prevent arbitrary DMAs to memory.
+ IF the hardware does not feature memory protection, you have to check the
+ command stream before submitting it to the GPU.
+\end_layout
+
+\begin_layout Section
+Memory management
+\end_layout
+
+\begin_layout Section
+Modesetting
+\end_layout
+
+\begin_layout Standard
+Modesetting is the act of setting a mode on the card to display.
+ This can range from extremely simple procedures (calling a VGA interrupt
+ or VESA call is a basic form of modesetting) to directly programming the
+ card registers (which brings along the advantage of not needing to rely
+ on a VGA or VESA layer).
+ Historically, this was achieved in user space by the DDX.
+
+\end_layout
+
+\begin_layout Standard
+However, these days it makes more sense to put it in the kernel once and
+ for all, and share it between different GPU users (framebuffer drivers,
+ DDXes, EGL stacks...).
+ This extension to modesetting is called kernel modesetting (also known
+ as KMS).
+ A number of concepts are used by the modesetting interface (those are inherited
+ from the Randr 1.2 specification).
+\end_layout
+
+\begin_layout Subsubsection*
+Crtc
+\end_layout
+
+\begin_layout Standard
+Crtc is in charge of reading the framebuffer memory and routes the data
+ to an encoder
+\end_layout
+
+\begin_layout Subsubsection*
+Encoder
+\end_layout
+
+\begin_layout Standard
+Encoder encodes the pixel data for a connector
+\end_layout
+
+\begin_layout Subsubsection*
+Connector
+\end_layout
+
+\begin_layout Standard
+The connector is the name physical output on the card (DVI, Dsub, Svideo...).
+ Notice that connectors can get their data from multiple encoders (for example
+ DVI-I which can feed both analog and digital signals)
+\end_layout
+
+\begin_layout Standard
+Also, on embedded or old hardware, it is common to have encoders and connectors
+ merged for simplicity/power efficiency reasons.
+\end_layout
+
+\begin_layout Standard
++++ Ajouter ici un schema crtc-encoder-connector
+\end_layout
+
+\begin_layout Section
+libdrm
+\end_layout
+
+\begin_layout Standard
+libdrm is a small (but growing) component that interfaces between user space
+ and the DRM module, and allows calling into the entry points.
+
+\end_layout
+
+\begin_layout Standard
+Obviously security should not rely on components from libdrm because it
+ is an unprivileged user space component
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+The DRM manages all graphics activity in a modern linux graphics stack.
+\end_layout
+
+\begin_layout Itemize
+It is the only trusted piece of the stack and is responsible for security.
+ Therefore it shall not trust the other components.
+\end_layout
+
+\begin_layout Itemize
+It provides basic graphics functionality: modesetting, framebuffer driver,
+ memory management.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+X.Org Drivers
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:X.Org-Drivers"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+This chapter covers the implementation of a 2D acceleration inside X.Org.
+\end_layout
+
+\begin_layout Standard
+There are multiple ways to implement a 2D X.Org driver: ShadowFB, XAA, EXA.
+ Another simple way of implementing X.Org support is through the FBDev module.
+ This module implements X.Org on top of an existing, in-kernel framebuffer
+ driver.
+\end_layout
+
+\begin_layout Standard
+http://www.x.org/wiki/DriverDevelopment
+\end_layout
+
+\begin_layout Section
+Initializing a driver
+\end_layout
+
+\begin_layout Section
+ShadowFB acceleration
+\end_layout
+
+\begin_layout Standard
+ShadowFB provides no acceleration proper, a copy of the framebuffer is kept
+ in system memory.
+ The driver implements a single hook that copies graphics from system to
+ video memory.
+ This can be implemented using either a DMA copy, or a CPU copy (depending
+ on the hardware and copy size, either can be better).
+\end_layout
+
+\begin_layout Standard
+Despite the name, shadowFB is not to be confused with the kernel framebuffer
+ drivers.
+\end_layout
+
+\begin_layout Standard
+Although ShadowFB is a very basic design, it can result in a more efficient
+ and responsive desktop than an incomplete implementation of EXA.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Insérer une image avec la propagation shadowfb
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\noindent
+\align center
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+begin{tikzpicture}[node distance=1cm, auto]
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikzset{ mynode/.style={rectangle,rounded corners,draw=black, top color=white
+, bottom color=yellow!50,very thick, inner sep=1em, minimum size=3em, text
+ centered, drop shadow}, myarrow/.style={->, >=latex', shorten >=1pt,
+ thick}, mylabel/.style={text width=7em, text centered} }
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+tikz{
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner
+ sep=1em] (0,2) rectangle (5,6);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[top color=white, bottom color=yellow!50, drop shadow,very thick, inner
+ sep=1em] (6,2) rectangle (11,6);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[<->] (2,7.5) -- +(6.5,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[<->] (1.5,2) -- +(0,5);
+\end_layout
+
+\begin_layout Plain Layout
+
+%
+\backslash
+draw[<->] (2,1.5) -- +(8,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (2.5,1.5) {Shadow surface};
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (8.5,1.5) {Video ram surface};
+\end_layout
+
+\begin_layout Plain Layout
+
+% source pixels
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw (2,2.5) rectangle (4,4);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (3,3) {Dirty pixels};
+\end_layout
+
+\begin_layout Plain Layout
+
+% destination pixels
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw (8,2.5) rectangle (10,4);
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (9,3) {Dst pixels};
+\end_layout
+
+\begin_layout Plain Layout
+
+% fleches de copie
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+draw[->] (3,3.25) -- +(6,0);
+\end_layout
+
+\begin_layout Plain Layout
+
+% faux noeud pour pas que la légende soit collée
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+node at (6,0.5) { };
+\end_layout
+
+\begin_layout Plain Layout
+
+}
+\end_layout
+
+\begin_layout Plain Layout
+
+
+\backslash
+end{tikzpicture}
+\end_layout
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+Shadowfb acceleration.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Section
+XAA acceleration
+\end_layout
+
+\begin_layout Standard
+Scanline based acceleration
+\end_layout
+
+\begin_layout Standard
+Offscreen area, same pitch as the screen
+\end_layout
+
+\begin_layout Section
+EXA acceleration
+\end_layout
+
+\begin_layout Standard
+Adapted from KAA from Kdrive
+\end_layout
+
+\begin_layout Standard
+Simple interface : Prepare/Act/Finish for each acceleration function
+\end_layout
+
+\begin_layout Standard
+Solid - fill an area with a solid color (RGBA)
+\end_layout
+
+\begin_layout Standard
+Copy - copies a rectangle area from and to video memory
+\end_layout
+
+\begin_layout Standard
+Composite - optional interface used to achieve composite operations like
+ blending.
+ This allows accelerating 2D desktop effects like blending, scaling, operations
+ with masks...
+\end_layout
+
+\begin_layout Standard
+UploadToScreen - copies an area from system memory to video memory
+\end_layout
+
+\begin_layout Standard
+DowndloadFromScreen - copies an area from video memory to system memory
+\end_layout
+
+\begin_layout Standard
+Problématique des migrations de pixmaps
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Multiple choices exist for accelerating 2D in X.Org.
+\end_layout
+
+\begin_layout Itemize
+The most efficient one is EXA, which puts all the smart optimizations in
+ a common piece of code, and leaves the driver implementation very simple.
+\end_layout
+
+\begin_layout Itemize
+If your card cannot accelerate 2D operations, shadowfb is probably the path
+ to take.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Video Decoding
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Video-Decoding"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Section
+Video decoding pipeline
+\end_layout
+
+\begin_layout Standard
+Two typical video pipelines : mpeg2 and h264
+\end_layout
+
+\begin_layout Paragraph*
+The MPEG2 decoding pipeline
+\end_layout
+
+\begin_layout Standard
+iDCT -> MC -> CSC -> Final display
+\end_layout
+
+\begin_layout Paragraph*
+The H.264 decoding pipeline
+\end_layout
+
+\begin_layout Standard
+entropy decoding -> iDCT -> MC -> CSC -> Final display
+\end_layout
+
+\begin_layout Subsection
+Entropy
+\end_layout
+
+\begin_layout Standard
+Entropy encoding is a lossless compression phase.
+ It is the last stage of encoding and therefore also the first stage of
+ decoding.
+\end_layout
+
+\begin_layout Standard
+CABAC/CAVLC
+\end_layout
+
+\begin_layout Subsection
+Inverse DCT
+\end_layout
+
+\begin_layout Subsection
+Motion Compensation
+\end_layout
+
+\begin_layout Subsection
+Color Space Conversion
+\end_layout
+
+\begin_layout Standard
+Color spaces
+\end_layout
+
+\begin_layout Standard
+Linear relation
+\end_layout
+
+\begin_layout Standard
+Conversion matrices
+\end_layout
+
+\begin_layout Standard
+The YUV color space: 1 component luminance (Y) + 2 components chrominance
+ (UV).
+ Chrominance information is less relevant to the eye than chrominance, so
+ usually chrominance is subsampled and luminance at the original resolution.
+ Therefore, the Y plane usually has a higher resolution than the U and V
+ planes.
+\end_layout
+
+\begin_layout Standard
+Bandwidth gain (RGBA32 vs YV12)
+\end_layout
+
+\begin_layout Standard
+YUV Planar and packed (interlaced) formats
+\end_layout
+
+\begin_layout Standard
+Plane order (YV12 vs NV12)
+\end_layout
+
+\begin_layout Standard
+Order of the planes (YV12, I420)
+\end_layout
+
+\begin_layout Standard
+http://en.wikipedia.org/wiki/YUV
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\begin_inset Formula $\left[\begin{array}{c}
+R\\
+G\\
+B\end{array}\right]=\left[\begin{array}{ccc}
+1 & 0 & 1.13983\\
+1 & -0.39465 & -0.58060\\
+1 & 2.03211 & 0\end{array}\right]\left[\begin{array}{c}
+Y\\
+U\\
+V\end{array}\right]$
+\end_inset
+
+
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+filler verifier la formule
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:YUV-to-RGB"
+
+\end_inset
+
+YUV to RGB Conversion formula as per ITU-R RB recommendation 601.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float figure
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\begin_inset Formula $\left[\begin{array}{c}
+R\\
+G\\
+B\end{array}\right]=\left[\begin{array}{ccc}
+1 & 0 & 1.13983\\
+1 & -0.39465 & -0.58060\\
+1 & 2.03211 & 0\end{array}\right]\left[\begin{array}{c}
+Y\\
+U\\
+V\end{array}\right]$
+\end_inset
+
+
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+filler verifier la formule peut pas etre la meme que 601
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+\begin_inset CommandInset label
+LatexCommand label
+name "fig:YUV-to-RGB-1"
+
+\end_inset
+
+YUV to RGB Conversion formula as per ITU-R RB recommendation 709.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+Figure
+\begin_inset CommandInset ref
+LatexCommand ref
+reference "fig:YUV-to-RGB"
+
+\end_inset
+
+ shows the conversion matrices from ITU-R BT Recommendation 601 (standard
+ content) and recommendation 709 (intended for HD content).
+ Notice that although these matrices are very similar, there are numerical
+ differences which will result in slight off-colored rendering if one is
+ used in place of the other.
+ This is indeed often the case that video decoders with YUV to RGB hardware
+ are used to playback high definition content but no attention is made to
+ the proper conversion matrix that should be used.
+ Since the colors are only slightly wrong, this problem is commonly overlooked,
+ whereas most hardware features at least a BT601/BT709 switch, or a fully
+ programmable conversion matrix.
+\end_layout
+
+\begin_layout Standard
+http://www.fourcc.org/yuv.php
+\end_layout
+
+\begin_layout Standard
+http://www.glennchan.info/articles/articles.html
+\end_layout
+
+\begin_layout Standard
+http://www.poynton.com/papers/SMPTE_98_YYZ_Luma/index.html
+\end_layout
+
+\begin_layout Standard
+\begin_inset Float table
+wide false
+sideways false
+status open
+
+\begin_layout Plain Layout
+\align center
+\begin_inset Tabular
+<lyxtabular version="3" rows="6" columns="4">
+<features>
+<column alignment="center" valignment="top" width="1.5cm">
+<column alignment="center" valignment="top" width="1.2cm">
+<column alignment="center" valignment="top" width="3.5cm">
+<column alignment="center" valignment="top" width="3.5cm">
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Format name
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Y:U:V bits per pixel
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Layout
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Comments
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+YV12
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+8:2:2
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1 Y plane, 1 V 2*2 sub-sampled plane, 1 U 2*2 sampled plane
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Same as I420 except U and V are reversed.
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+I420
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+8:2:2
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1 Y plane, 1 U 2*2 sub-sampled plane, 1 V 2*2 sub-sampled plane
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Same as YV12 except U and V are reversed.
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+NV12
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+8:2:2
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1 Y plane, 1 packed U+V 2*2 sub-sampled plane
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Convenient for hardware implementation on 3D-capable GPUs
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+YUY2 (YUYV)
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+8:4:4
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+1 Packed YUV plane
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+Packed as Y0U0Y1V0
+\end_layout
+
+\end_inset
+</cell>
+</row>
+<row>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
+\begin_inset Text
+
+\begin_layout Plain Layout
+
+\end_layout
+
+\end_inset
+</cell>
+</row>
+</lyxtabular>
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Plain Layout
+\begin_inset Caption
+
+\begin_layout Plain Layout
+Common YUV color space formats
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Pixel scaling
+\end_layout
+
+\begin_layout Standard
+Since the conversion from YUV space to RGB space is linear, filtered scaling
+ can be done either in the YUV or RGB space, which conveniently allows using
+ texture filtering which is available on 3D hardware to sample the YUV data.
+ This allows a single pass color space conversion and scaling.
+ For example, bi-linear filtering will work just fine with three textures
+ for the three Y, U and V planes.
+ Notice that higher quality can be obtained at the expense of performance
+ by using better filtering modes, such as bi-cubic [citer papier hadwiger],
+ even though this can prove to be costly.
+ A trade-off can be achieved by implementing bi-cubic filtering for the
+ (most eye-visible) Y plane, and keeping bi-linear filtering for U and V
+ planes.
+\end_layout
+
+\begin_layout Standard
+If the hardware cannot achieve color space conversion and scaling at the
+ same time (for example if you have a YUV->RGB blitter and a shader less
+ 3D engine), again the linear color conversion allows you to do the scaling
+ in RGB space, and this will produce the same results (baring gamma correction).
+\end_layout
+
+\begin_layout Section
+Video decoding APIs
+\end_layout
+
+\begin_layout Paragraph*
+Xv
+\end_layout
+
+\begin_layout Standard
+Xv is simply about CSC ans scaling.
+ In order to implement Xv, a typical X.Org driver will have to implement
+ this space conversion.
+ Although the Xv API is a little complex for what it implements, the gits
+ of it consists in the PutImage function, which puts an YUV image on screen.
+ Multiple YUV formats can be handled, planar or interlaced mainly.
+ Note that Xv has RGB support as well.
+ Thanks to the bandwidth gains and DMA transfers, even an Xv implementation
+ already provides a relevant level of video decoding acceleration, and can
+ prove sufficient depending on the target hardware (for example, it can
+ prove to be fine when coupled with a powerful CPU to decode H264 content).
+\end_layout
+
+\begin_layout Paragraph*
+XvMC
+\end_layout
+
+\begin_layout Standard
+idct + mc +csc
+\end_layout
+
+\begin_layout Paragraph*
+VAAPI
+\end_layout
+
+\begin_layout Standard
+VAAPI was initially created for intel's poulsbo video decoding.
+ The API is very tailored to embedded platforms and has many entry points,
+ at different pipeline stages, which makes it more complex to implement.
+\end_layout
+
+\begin_layout Paragraph*
+VDPAU
+\end_layout
+
+\begin_layout Standard
+The VDPAU was initiated by nvidia for H264 & VC1 decoding support
+\end_layout
+
+\begin_layout Paragraph*
+XvBA
+\end_layout
+
+\begin_layout Standard
+All 3 APIs are intended for full
+\end_layout
+
+\begin_layout Paragraph*
+OpenMax
+\end_layout
+
+\begin_layout Standard
+http://x264dev.multimedia.cx
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+A video decoding pipeline consists in multiple stages chained together.
+\end_layout
+
+\begin_layout Itemize
+Color space conversion and scaling is the most important stage, and if your
+ driver implements only one operation for simplicity, this is it.
+\end_layout
+
+\begin_layout Itemize
+Implementing a full pipeline can provide a high performance boost, and save
+ battery life on mobile systems.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+OpenGL
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:OpenGL"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+OpenGL ARB, khronos, bla bla...
+\end_layout
+
+\begin_layout Section
+The OpenGL Rendering Pipeline
+\end_layout
+
+\begin_layout Subsection
+Vertex processing
+\end_layout
+
+\begin_layout Standard
+vertex stage
+\end_layout
+
+\begin_layout Standard
+vertex buffers
+\end_layout
+
+\begin_layout Subsection
+Geometry processing
+\end_layout
+
+\begin_layout Subsection
+Fragment processing
+\end_layout
+
+\begin_layout Standard
+Rasterization
+\end_layout
+
+\begin_layout Standard
+Render buffers
+\end_layout
+
+\begin_layout Standard
+Textures
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+OpenGL is a suite of stages arranged in a pipeline.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Mesa
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Mesa"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Mesa is the Common Rendering Architecture for all open source graphics drivers.
+\end_layout
+
+\begin_layout Section
+Mesa
+\end_layout
+
+\begin_layout Standard
+Mesa serves two major purposes:
+\end_layout
+
+\begin_layout Itemize
+Mesa is a software implementation of OpenGL.
+ It is considered to be the reference implementation and is useful in checking
+ conformance, seeing that the official OpenGL conformance tests are not
+ publicly available.
+\end_layout
+
+\begin_layout Itemize
+Mesa provides the OpenGL entry points for Open Source graphics drivers under
+ linux.
+\end_layout
+
+\begin_layout Standard
+In this section, we will focus on the second point.
+\end_layout
+
+\begin_layout Section
+Mesa internals
+\end_layout
+
+\begin_layout Subsection
+Textures in mesa
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Mesa is the reference OpenGL implementation under Linux.
+\end_layout
+
+\begin_layout Itemize
+All Open Source graphics drivers use Mesa for 3D
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Gallium 3D
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Gallium-3D"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Gallium 3D is the Future of 3D Acceleration.
+\end_layout
+
+\begin_layout Standard
+http://jrfonseca.blogspot.com/2008/04/gallium3d-introduction.html
+\end_layout
+
+\begin_layout Standard
+http://people.freedesktop.org/~csimpson/gallium-docs/
+\end_layout
+
+\begin_layout Section
+Gallium3D: a plan for a new generation of hardware
+\end_layout
+
+\begin_layout Standard
+Ten years ago, GPUs were a direct match with all the OpenGL or Direct3D
+ functionality; back then the GPUs had specific transistors dedicated to
+ each piece of functionality.
+ With the explosion in the amount of 3D functionality, this quickly made
+ it impractical both for application developers (who saw the 3D APIs growing
+ huge) and hardware designers (who faced an explosion of the number of specific
+ functionality a GPU needed), and shaders were created.
+ Instead of providing specific functionality, the 3D APIs would now let
+ the programmers create these little programs and run them on the GPU.
+ As the hardware was now programmable in a way which was a superset of fixed
+ functionality, the fixed function pipelines were not required any more
+ and were removed from the cards.
+ Gallium 3D is modeled around the simple observation that today's GPUs do
+ not have fixed pipe any more and only feature shaders, but drivers still
+ have to
+\begin_inset Quotes eld
+\end_inset
+
+emulate
+\begin_inset Quotes erd
+\end_inset
+
+ fixed function on top of the shaders to provide API compatibility.
+ Doing so in every driver would require a lot of code duplication, and the
+ Gallium model is to put this code in a common place.
+ Therefore gallium drivers become smaller and easier to write and to maintain.
+\end_layout
+
+\begin_layout Standard
+everything is a shader, including inside the driver
+\end_layout
+
+\begin_layout Standard
+thin layer for fixed pipe -> programmable functionality translation
+\end_layout
+
+\begin_layout Standard
+global diagram
+\end_layout
+
+\begin_layout Section
+State trackers
+\end_layout
+
+\begin_layout Standard
+A state tracker implements an API (for example OpenGL, OpenVG, Direct3D...)
+ by turning it into API-agnostic and hardware-agnostic TGSI calls.
+\end_layout
+
+\begin_layout Section
+Pipe driver
+\end_layout
+
+\begin_layout Standard
+A pipe driver is the main part of a hardware-specific driver.
+\end_layout
+
+\begin_layout Section
+Winsys
+\end_layout
+
+\begin_layout Standard
+The winsys is in charge of talking to the OS/Platform of choice.
+ The pipe driver relies on the Winsys to talk to the hardware.
+ For example, this allows having a single pipe driver with multiple winsyses
+ targetting different Operating systems.
+\end_layout
+
+\begin_layout Section
+Writing Gallium3D drivers
+\end_layout
+
+\begin_layout Standard
+screen
+\end_layout
+
+\begin_layout Standard
+context
+\end_layout
+
+\begin_layout Standard
+pipe_transfer
+\end_layout
+
+\begin_layout Section
+Shaders in Gallium
+\end_layout
+
+\begin_layout Standard
+In order to operate shaders, Gallium features an internal shader description
+ language which uses 4-component vectors.
+ We will later refer to the 4 components of a vector as x,y,z,w.
+ In particular, v.x is the first component of vector v, v.xyzw are all 4 component
+s of v in that order, and swizzling is allowed, for example v.wzyx reverses
+ the component order.
+ It is also legal to replicate a component, for example v.xxxx means four
+ times the x component of v and v.yyzz means two times y and two times z.
+\end_layout
+
+\begin_layout Standard
+These components usually carry no semantics, and despite their name they
+ can very well carry a color or an opacity value indifferently.
+
+\end_layout
+
+\begin_layout Standard
+TGSI instruction set
+\end_layout
+
+\begin_layout Standard
+mesa/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Gallium 3D is the new graphics API.
+\end_layout
+
+\begin_layout Itemize
+Everything is converted to a shader internally, fixed functionality is gone.
+\end_layout
+
+\begin_layout Itemize
+Drivers are simpler than classic Mesa drivers, as one only has to implement
+ shaders to get all fixed functionality to work.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+GPU Computing
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:GPU-Computing"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Suspend and Resume
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Suspend-and-Resume"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+VT switches
+\end_layout
+
+\begin_layout Standard
+Card state
+\end_layout
+
+\begin_layout Standard
+Suspend/resume hooks in the DRM
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Suspend and resume has long been very clumsy, but this is solved now thanks
+ to the DRM implementing more functionality.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Technical Specifications
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Technical-Specifications"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Technical specifications are the nuts and bolts of graphics driver work.
+ Without hardware specifications, no work can be started.
+ However, manufacturing companies are usually wary of sharing said specification
+s, as they think this will hinder their business.
+ While this claim is false (because you can't copy a GPU from just its specifica
+tions), it is still very widespread and prevents a lot of hardware from
+ being properly documented.
+ Therefore, getting hold of hardware specifications will be the first major
+ step in any graphics driver development project.
+\end_layout
+
+\begin_layout Section
+Obtaining official specifications
+\end_layout
+
+\begin_layout Paragraph*
+Public specifications
+\end_layout
+
+\begin_layout Standard
+Some vendors distribute the technical documentation for their hardware publicly
+ without restrictions.
+\end_layout
+
+\begin_layout Standard
+Sometimes, things can be as simple as asking the vendor, who might share
+ the documentation (possibly under NDA, see below).
+\end_layout
+
+\begin_layout Paragraph*
+NDA (Non-Disclosure Agreement)
+\end_layout
+
+\begin_layout Standard
+Put simply, an NDA is a contract signed between the developer and the hardware
+ company, by which the developer agrees not to spread the docs he received.
+ However, there can be more restrictions in an NDA.
+\end_layout
+
+\begin_layout Standard
+Terms of the NDA
+\end_layout
+
+\begin_layout Standard
+Before signing an NDA, think.
+ Whatever lawyers say, there is no such thing as a
+\begin_inset Quotes eld
+\end_inset
+
+standard
+\begin_inset Quotes erd
+\end_inset
+
+ NDA, you can always negotiate.
+\end_layout
+
+\begin_layout Standard
+Can Open Source drivers be written from that documentation under that NDA?
+\end_layout
+
+\begin_layout Standard
+What happens when the NDA expires? Can code still be free, are you bound
+ by any clause?
+\end_layout
+
+\begin_layout Standard
+What about yourself? Are you prevented from doing further work on this hardware?
+\end_layout
+
+\begin_layout Section
+Reverse engineering
+\end_layout
+
+\begin_layout Standard
+When specifications are not easily available or just incomplete, an alternate
+ route is reverse engineering.
+ Reverse engineering consists in figuring out the specifications for a given
+ piece of hardware by yourself, for example by looking at what a black-box
+ binary driver does to the hardware under certain circumstances.
+\end_layout
+
+\begin_layout Standard
+Reverse engineering is not just a tool to obtain missing hardware specifications
+, it is also a strong means of Open Source advocacy.
+ Once a reverse engineered driver exists and ships in linux distributions,
+ pressure shifts on the hardware vendor for support.
+ This, in turn, can force the vendor to support Open Source drivers.
+\end_layout
+
+\begin_layout Standard
+not as difficult as it seems, requires organization, being rigorous.
+ Write down all bits of information (even incomplete bits), share it among
+ developers, try to work out bits one by one.
+ Do not hesitate writing ad-hoc tools, as they will save precious time down
+ the road (if you hesitate, you have crossed the line already!).
+\end_layout
+
+\begin_layout Paragraph*
+Mmiotrace
+\end_layout
+
+\begin_layout Standard
+The basic idea behind mmio-trace is simple: it first hooks the ioremap call,
+ and therefore prevents mapping of a designated I/O area.
+ Subsequently, accesses to this area will generate page faults, which are
+ caught by the kernel.
+ For each page fault, the faulting instruction is decoded to figure out
+ the write or read address, along with the value written/read.
+ The page is put back, the faulting instruction is then single-stepped,
+ and the page is then removed again.
+ Execution then continues as usual.
+\end_layout
+
+\begin_layout Standard
+mmio trace is now part of the official Linux kernels.
+ Therefore, any pre-existing driver can be traced.
+\end_layout
+
+\begin_layout Paragraph*
+Libsegfault
+\end_layout
+
+\begin_layout Standard
+libsegfault is similar to mmio-trace in the way it works: after removing
+ some pages which one want to track accesses to, it will generate a segmentation
+ fault on each access and therefore be able to report each access.
+ The difference is that libsegfault is a user space tool while mmio-trace
+ is a kernel tool.
+\end_layout
+
+\begin_layout Paragraph*
+Valgrind-mmt
+\end_layout
+
+\begin_layout Standard
+Valgrind is a dynamic recompiling and instrumentation framework.
+ Valgrint-mmt is a plugin for valgrind which implements tracing of read
+ and writes to a certain range of memory addresses, usually an mmio range
+ accessed from user space.
+ Memory accesses are dynamically instrumented thanks to valgrind and each
+ access to the zones we want to see traced is logged.
+\end_layout
+
+\begin_layout Paragraph*
+vbetool
+\end_layout
+
+\begin_layout Paragraph*
+Virtualization
+\end_layout
+
+\begin_layout Standard
+Finally, one last pre-existing tool to help reverse engineering is virtualizatio
+n.
+ By running a proprietary driver in a controled environment, one can figure
+ out the inner workings of a GPU.
+ The plan is then to write an emulated GPU while doing the reverse engineering
+ (which imposes the use of an open source virtualization solution like Qemu).
+\end_layout
+
+\begin_layout Paragraph*
+Ad-hoc tools
+\end_layout
+
+\begin_layout Standard
+In addition to these generic tools, you will often find it useful to implement
+ your own additional tools, tailored for specific needs.
+ Renouveau is an example of such a tool that integrates the reverse engineering
+ mechanisms, the command decoding and printing.
+ In order to achieve decoding of the commands, it carries a database of
+ the graphics commands of nvidia GPUs.
+ This allows quick testing of new database entries.
+ Headers generated from this database are later used in the driver development
+ process.
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Technical specifications of course very important for authoring graphics
+ drivers.
+\end_layout
+
+\begin_layout Itemize
+NDAs can have unforeseen implications on yourself and your work.
+\end_layout
+
+\begin_layout Itemize
+When they are unavailable, incomplete or just plain wrong, reverse engineering
+ can help you figure out how the hardware actually works.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Beyond Development
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Beyond-Development"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Section
+Testing for conformance
+\end_layout
+
+\begin_layout Paragraph*
+Rendercheck
+\end_layout
+
+\begin_layout Paragraph*
+OpenGL conformance test suite
+\end_layout
+
+\begin_layout Standard
+The official OpenGL testing suite is not publicly available, and (paying)
+ Khronos Membership is required.
+ Instead, most developers use alternate sources for test programs.
+\end_layout
+
+\begin_layout Paragraph*
+Piglit
+\end_layout
+
+\begin_layout Paragraph*
+glean
+\end_layout
+
+\begin_layout Standard
+glean.sourceforge.net
+\end_layout
+
+\begin_layout Paragraph*
+Mesa demos
+\end_layout
+
+\begin_layout Standard
+mesa/progs/*
+\end_layout
+
+\begin_layout Section
+Debugging
+\end_layout
+
+\begin_layout Paragraph*
+gdb and X.Org
+\end_layout
+
+\begin_layout Standard
+gdb needs to run on a terminal emulator while the application debug might
+ be with a lock held.
+ That might result in a deadlock between the application stuck with a lock
+ and gdb waiting to be able to output text.
+\end_layout
+
+\begin_layout Standard
+printk debug
+\end_layout
+
+\begin_layout Standard
+crash (surcouche gdb pour analyser les vmcore)
+\end_layout
+
+\begin_layout Standard
+kgdb
+\end_layout
+
+\begin_layout Standard
+serial console
+\end_layout
+
+\begin_layout Standard
+diskdump
+\end_layout
+
+\begin_layout Standard
+linux-uml
+\end_layout
+
+\begin_layout Standard
+systemtap
+\end_layout
+
+\begin_layout Section
+Upstreaming
+\end_layout
+
+\begin_layout Standard
+Submitting your code for inclusion in the official trees is an important
+ part of the graphics driver development process under linux.
+ There are multiple motivations for doing this.
+
+\end_layout
+
+\begin_layout Standard
+First, this allows end users to get hold of your driver more easily.
+\end_layout
+
+\begin_layout Standard
+Second, this makes it easier for your driver maintenance in the future:
+ in the event of interface changes,
+\end_layout
+
+\begin_layout Standard
+Why upstream?
+\end_layout
+
+\begin_layout Standard
+How?
+\end_layout
+
+\begin_layout Standard
+When?
+\end_layout
+
+\begin_layout Standard
+\begin_inset Box Shadowbox
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 0
+width "100col%"
+special "none"
+height "1in"
+height_special "totalheight"
+status open
+
+\begin_layout Plain Layout
+Takeaways:
+\end_layout
+
+\begin_layout Itemize
+Thoroughly testing all your changes can save you the cost of bisection later
+ on.
+\end_layout
+
+\begin_layout Itemize
+Debugging is not easy for graphics drivers.
+\end_layout
+
+\begin_layout Itemize
+By upstreaming your code in official repositories, you save yourself the
+ burden of adapting it to ever-moving programming interfaces in X.Org, Mesa
+ and the kernel.
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Chapter
+Conclusions
+\begin_inset CommandInset label
+LatexCommand label
+name "cha:Conclusions"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset Note Note
+status open
+
+\begin_layout Plain Layout
+Bordel à caser quelque part :
+\end_layout
+
+\begin_layout Plain Layout
+- la composition, avec XRender ou avec GLX + GL_EXT_texture_from_pixmap,
+ expliquer les différences
+\end_layout
+
+\begin_layout Plain Layout
+- XGL, AIGLX
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\end_body
+\end_document
diff --git a/myfncychap.sty b/myfncychap.sty
new file mode 100644
index 0000000..cd880ee
--- /dev/null
+++ b/myfncychap.sty
@@ -0,0 +1,683 @@
+%%% Copyright Ulf A. Lindgren
+%%%
+%%% Note Premission is granted to modify this file under
+%%% the condition that it is saved using another
+%%% file and package name.
+%%%
+%%% Revision 1.1 (1997)
+%%%
+%%% Jan. 8th Modified package name base date option
+%%% Jan. 22th Modified FmN and FmTi for error in book.cls
+%%% \MakeUppercase{#}->{\MakeUppercase#}
+%%% Apr. 6th Modified Lenny option to prevent undesired
+%%% skip of line.
+%%% Nov. 8th Fixed \@chapapp for AMS
+%%%
+%%% Revision 1.2 (1998)
+%%%
+%%% Feb. 11th Fixed appendix problem related to Bjarne
+%%% Aug. 11th Fixed problem related to 11pt and 12pt
+%%% suggested by Tomas Lundberg. THANKS!
+%%%
+%%% Revision 1.3 (2004)
+%%% Sep. 20th problem with frontmatter, mainmatter and
+%%% backmatter, pointed out by Lapo Mori
+%%%
+%%% Revision 1.31 (2004)
+%%% Sep. 21th problem with the Rejne definition streched text
+%%% caused ugly gaps in the vrule aligned with the title
+%%% text. Kindly pointed out to me by Hendri Adriaens
+%%%
+%%% Revision 1.32 (2005)
+%%% Jun. 23th compatibility problem with the KOMA class 'scrbook.cls'
+%%% a remedy is a redefinition of '\@schapter' in
+%%% line with that used in KOMA. The problem was pointed
+%%% out to me by Mikkel Holm Olsen
+%%%
+%%% Revision 1.33 (2005)
+%%% Aug. 9th misspelled ``TWELV'' corrected, the error was pointed
+%%% out to me by George Pearson
+%%%
+%%% Revision 1.34 (2007)
+%%% Added an alternative to Lenny provided by Peter
+%%% Osborne (2005-11-28)
+%%% Corrected front, main and back matter, based on input
+%%% from Bas van Gils (2006-04-24)
+%%% Jul. 30th Added Bjornstrup option provided by Jean-Marc
+%%% Francois (2007-01-05).
+%%% Reverted to \MakeUppercase{#} see rev 1.1, solved
+%%% problem with MakeUppercase and MakeLowercase pointed
+%%% out by Marco Feuerstein (2007-06-06)
+
+
+%%% Last modified Jul. 2007
+
+\NeedsTeXFormat{LaTeX2e}[1995/12/01]
+\ProvidesPackage{fncychap}
+ [2007/07/30 v1.34
+ LaTeX package (Revised chapters)]
+
+%%%% For conditional inclusion of color
+\newif\ifusecolor
+\usecolorfalse
+
+
+
+%%%% DEFINITION OF Chapapp variables
+\newcommand{\CNV}{\huge\bfseries}
+\newcommand{\ChNameVar}[1]{\renewcommand{\CNV}{#1}}
+
+
+%%%% DEFINITION OF TheChapter variables
+\newcommand{\CNoV}{\huge\bfseries}
+\newcommand{\ChNumVar}[1]{\renewcommand{\CNoV}{#1}}
+
+\newif\ifUCN
+\UCNfalse
+\newif\ifLCN
+\LCNfalse
+\def\ChNameLowerCase{\LCNtrue\UCNfalse}
+\def\ChNameUpperCase{\UCNtrue\LCNfalse}
+\def\ChNameAsIs{\UCNfalse\LCNfalse}
+
+%%%%% Fix for AMSBook 971008
+
+\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}
+
+
+%%%%% Fix for Bjarne and appendix 980211
+
+\newif\ifinapp
+\inappfalse
+\renewcommand\appendix{\par
+ \setcounter{chapter}{0}%
+ \setcounter{section}{0}%
+ \inapptrue%
+ \renewcommand\@chapapp{\appendixname}%
+ \renewcommand\thechapter{\@Alph\c@chapter}}
+
+%%%%% Fix for frontmatter, mainmatter, and backmatter 040920
+
+\@ifundefined{@mainmatter}{\newif\if@mainmatter \@mainmattertrue}{}
+
+%%%%%
+
+
+
+\newcommand{\FmN}[1]{%
+\ifUCN
+ {\MakeUppercase{#1}}\LCNfalse
+\else
+ \ifLCN
+ {\MakeLowercase{#1}}\UCNfalse
+ \else #1
+ \fi
+\fi}
+
+
+%%%% DEFINITION OF Title variables
+\newcommand{\CTV}{\Huge\bfseries}
+\newcommand{\ChTitleVar}[1]{\renewcommand{\CTV}{#1}}
+
+%%%% DEFINITION OF the basic rule width
+\newlength{\RW}
+\setlength{\RW}{1pt}
+\newcommand{\ChRuleWidth}[1]{\setlength{\RW}{#1}}
+
+\newif\ifUCT
+\UCTfalse
+\newif\ifLCT
+\LCTfalse
+\def\ChTitleLowerCase{\LCTtrue\UCTfalse}
+\def\ChTitleUpperCase{\UCTtrue\LCTfalse}
+\def\ChTitleAsIs{\UCTfalse\LCTfalse}
+\newcommand{\FmTi}[1]{%
+\ifUCT
+ {\MakeUppercase{#1}}\LCTfalse
+\else
+ \ifLCT
+ {\MakeLowercase{#1}}\UCTfalse
+ \else {#1}
+ \fi
+\fi}
+
+
+
+\newlength{\mylen}
+\newlength{\myhi}
+\newlength{\px}
+\newlength{\py}
+\newlength{\pyy}
+\newlength{\pxx}
+
+
+\def\mghrulefill#1{\leavevmode\leaders\hrule\@height #1\hfill\kern\z@}
+
+\newcommand{\DOCH}{%
+ \CNV\FmN{\@chapapp}\space \CNoV\thechapter
+ \par\nobreak
+ \vskip 20\p@
+ }
+\newcommand{\DOTI}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+\newcommand{\DOTIS}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+
+%%%%%% SONNY DEF
+
+\DeclareOption{Sonny}{%
+ \ChNameVar{\Large\sf}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\Large\sf}
+ \ChRuleWidth{0.5pt}
+ \ChNameUpperCase
+ \renewcommand{\DOCH}{%
+ \raggedleft
+ \CNV\FmN{\@chapapp}\space \CNoV\thechapter
+ \par\nobreak
+ \vskip 40\p@}
+ \renewcommand{\DOTI}[1]{%
+ \CTV\raggedleft\mghrulefill{\RW}\par\nobreak
+ \vskip 5\p@
+ \CTV\FmTi{#1}\par\nobreak
+ \mghrulefill{\RW}\par\nobreak
+ \vskip 40\p@}
+ \renewcommand{\DOTIS}[1]{%
+ \CTV\raggedleft\mghrulefill{\RW}\par\nobreak
+ \vskip 5\p@
+ \CTV\FmTi{#1}\par\nobreak
+ \mghrulefill{\RW}\par\nobreak
+ \vskip 40\p@}
+}
+
+%%%%%% LENNY DEF
+
+\DeclareOption{Lenny}{%
+
+ \ChNameVar{\fontsize{14}{16}\usefont{OT1}{phv}{m}{n}\selectfont}
+ \ChNumVar{\fontsize{60}{62}\usefont{OT1}{ptm}{m}{n}\selectfont}
+ \ChTitleVar{\huge\bfseries\rm}
+ \ChRuleWidth{1pt}
+ \renewcommand{\DOCH}{%
+ \settowidth{\px}{\CNV\FmN{\@chapapp}}
+ \addtolength{\px}{2pt}
+ \settoheight{\py}{\CNV\FmN{\@chapapp}}
+ \addtolength{\py}{1pt}
+
+ \settowidth{\mylen}{\CNV\FmN{\@chapapp}\space\CNoV\thechapter}
+ \addtolength{\mylen}{1pt}
+ \settowidth{\pxx}{\CNoV\thechapter}
+ \addtolength{\pxx}{-1pt}
+
+ \settoheight{\pyy}{\CNoV\thechapter}
+ \addtolength{\pyy}{-2pt}
+ \setlength{\myhi}{\pyy}
+ \addtolength{\myhi}{-1\py}
+ \par
+ \parbox[b]{\textwidth}{%
+ \rule[\py]{\RW}{\myhi}%
+ \hskip -\RW%
+ \rule[\pyy]{\px}{\RW}%
+ \hskip -\px%
+ \raggedright%
+ \CNV\FmN{\@chapapp}\space\CNoV\thechapter%
+ \hskip1pt%
+ \mghrulefill{\RW}%
+ \rule{\RW}{\pyy}\par\nobreak%
+ \vskip -\baselineskip%
+ \vskip -\pyy%
+ \hskip \mylen%
+ \mghrulefill{\RW}\par\nobreak%
+ \vskip \pyy}%
+ \vskip 20\p@}
+
+
+ \renewcommand{\DOTI}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+
+ \renewcommand{\DOTIS}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+ }
+
+%%%%%% Peter Osbornes' version of LENNY DEF
+
+\DeclareOption{PetersLenny}{%
+
+% five new lengths
+\newlength{\bl} % bottom left : orig \space
+\setlength{\bl}{6pt}
+\newcommand{\BL}[1]{\setlength{\bl}{#1}}
+\newlength{\br} % bottom right : orig 1pt
+\setlength{\br}{1pt}
+\newcommand{\BR}[1]{\setlength{\br}{#1}}
+\newlength{\tl} % top left : orig 2pt
+\setlength{\tl}{2pt}
+\newcommand{\TL}[1]{\setlength{\tl}{#1}}
+\newlength{\trr} % top right :orig 1pt
+\setlength{\trr}{1pt}
+\newcommand{\TR}[1]{\setlength{\trr}{#1}}
+\newlength{\blrule} % top right :orig 1pt
+\setlength{\trr}{0pt}
+\newcommand{\BLrule}[1]{\setlength{\blrule}{#1}}
+
+
+ \ChNameVar{\fontsize{14}{16}\usefont{OT1}{phv}{m}{n}\selectfont}
+ \ChNumVar{\fontsize{60}{62}\usefont{OT1}{ptm}{m}{n}\selectfont}
+ \ChTitleVar{\Huge\bfseries\rm}
+ \ChRuleWidth{1pt}
+\renewcommand{\DOCH}{%
+
+
+%%%%%%% tweaks for 1--9 and A--Z
+\ifcase\c@chapter\relax%
+\or\BL{-3pt}\TL{-4pt}\BR{0pt}\TR{-6pt}%1
+\or\BL{0pt}\TL{-4pt}\BR{2pt}\TR{-4pt}%2
+\or\BL{0pt}\TL{-4pt}\BR{2pt}\TR{-4pt}%3
+\or\BL{0pt}\TL{5pt}\BR{2pt}\TR{-4pt}%4
+\or\BL{0pt}\TL{3pt}\BR{2pt}\TR{-4pt}%5
+\or\BL{-1pt}\TL{0pt}\BR{2pt}\TR{-2pt}%6
+\or\BL{0pt}\TL{-3pt}\BR{2pt}\TR{-2pt}%7
+\or\BL{0pt}\TL{-3pt}\BR{2pt}\TR{-2pt}%8
+\or\BL{0pt}\TL{-3pt}\BR{-4pt}\TR{-2pt}%9
+\or\BL{-3pt}\TL{-3pt}\BR{2pt}\TR{-7pt}%10
+\or\BL{-6pt}\TL{-6pt}\BR{0pt}\TR{-9pt}%11
+\or\BL{-6pt}\TL{-6pt}\BR{2pt}\TR{-7pt}%12
+\or\BL{-5pt}\TL{-5pt}\BR{0pt}\TR{-9pt}%13
+\or\BL{-6pt}\TL{-6pt}\BR{0pt}\TR{-9pt}%14
+\or\BL{-3pt}\TL{-3pt}\BR{3pt}\TR{-6pt}%15
+\or\BL{-3pt}\TL{-3pt}\BR{3pt}\TR{-6pt}%16
+\or\BL{-5pt}\TL{-3pt}\BR{-8pt}\TR{-6pt}%17
+\or\BL{-5pt}\TL{-5pt}\BR{0pt}\TR{-9pt}%18
+\or\BL{-3pt}\TL{-3pt}\BR{-6pt}\TR{-9pt}%19
+\or\BL{0pt}\TL{0pt}\BR{0pt}\TR{-5pt}%20
+\fi
+
+\ifinapp\ifcase\c@chapter\relax%
+\or\BL{0pt}\TL{14pt}\BR{5pt}\TR{-19pt}%A
+\or\BL{0pt}\TL{-5pt}\BR{-3pt}\TR{-8pt}%B
+\or\BL{-3pt}\TL{-2pt}\BR{1pt}\TR{-6pt}\BLrule{0pt}%C
+\or\BL{0pt}\TL{-5pt}\BR{-3pt}\TR{-8pt}\BLrule{0pt}%D
+\or\BL{0pt}\TL{-5pt}\BR{2pt}\TR{-3pt}%E
+\or\BL{0pt}\TL{-5pt}\BR{-10pt}\TR{-1pt}%F
+\or\BL{-3pt}\TL{0pt}\BR{0pt}\TR{-7pt}%G
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%H
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%I
+\or\BL{2pt}\TL{0pt}\BR{-3pt}\TR{1pt}%J
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%K
+\or\BL{0pt}\TL{-5pt}\BR{2pt}\TR{-19pt}%L
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}%M
+\or\BL{0pt}\TL{-5pt}\BR{-2pt}\TR{-1pt}%N
+\or\BL{-3pt}\TL{-2pt}\BR{-3pt}\TR{-11pt}%O
+\or\BL{0pt}\TL{-5pt}\BR{-9pt}\TR{-3pt}%P
+\or\BL{-3pt}\TL{-2pt}\BR{-3pt}\TR{-11pt}%Q
+\or\BL{0pt}\TL{-5pt}\BR{4pt}\TR{-8pt}%R
+\or\BL{-2pt}\TL{-2pt}\BR{-2pt}\TR{-7pt}%S
+\or\BL{-3pt}\TL{0pt}\BR{-5pt}\TR{4pt}\BLrule{8pt}%T
+\or\BL{-7pt}\TL{-11pt}\BR{-5pt}\TR{-7pt}\BLrule{0pt}%U
+\or\BL{-14pt}\TL{-5pt}\BR{-14pt}\TR{-1pt}\BLrule{14pt}%V
+\or\BL{-10pt}\TL{-9pt}\BR{-13pt}\TR{-3pt}\BLrule{7pt}%W
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}\BLrule{0pt}%X
+\or\BL{-6pt}\TL{-4pt}\BR{-7pt}\TR{1pt}\BLrule{7pt}%Y
+\or\BL{0pt}\TL{-5pt}\BR{3pt}\TR{-1pt}\BLrule{0pt}%Z
+\fi\fi
+%%%%%%%
+ \settowidth{\px}{\CNV\FmN{\@chapapp}}
+ \addtolength{\px}{\tl} %MOD change 2pt to \tl
+ \settoheight{\py}{\CNV\FmN{\@chapapp}}
+ \addtolength{\py}{1pt}
+
+ \settowidth{\mylen}{\CNV\FmN{\@chapapp}\space\CNoV\thechapter}
+ \addtolength{\mylen}{\trr}% MOD change 1pt to \tr
+ \settowidth{\pxx}{\CNoV\thechapter}
+ \addtolength{\pxx}{-1pt}
+
+ \settoheight{\pyy}{\CNoV\thechapter}
+ \addtolength{\pyy}{-2pt}
+ \setlength{\myhi}{\pyy}
+ \addtolength{\myhi}{-1\py}
+ \par
+ \parbox[b]{\textwidth}{%
+ \rule[\py]{\RW}{\myhi}%
+ \hskip -\RW%
+ \rule[\pyy]{\px}{\RW}%
+ \hskip -\px%
+ \raggedright%
+ \CNV\FmN{\@chapapp}\rule{\blrule}{\RW}\hskip\bl\CNoV\thechapter%MOD
+% \CNV\FmN{\@chapapp}\space\CNoV\thechapter %ORIGINAL
+ \hskip\br% %MOD 1pt to \br
+ \mghrulefill{\RW}%
+ \rule{\RW}{\pyy}\par\nobreak%
+ \vskip -\baselineskip%
+ \vskip -\pyy%
+ \hskip \mylen%
+ \mghrulefill{\RW}\par\nobreak%
+ \vskip \pyy}%
+ \vskip 20\p@}
+
+
+ \renewcommand{\DOTI}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+
+ \renewcommand{\DOTIS}[1]{%
+ \raggedright
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+ }
+
+
+%
+
+
+%%%%%% BJORNSTRUP DEF
+
+\DeclareOption{Bjornstrup}{%
+ \usecolortrue
+ % pzc (Zapf Chancelery) is nice. ppl (Palatino) is cool too.
+ \ChNumVar{\fontsize{76}{80}\usefont{OT1}{pzc}{m}{n}\selectfont}
+ \ChTitleVar{\raggedleft\Large\sffamily\bfseries}
+
+ \setlength{\myhi}{10pt} % Space between grey box border and text
+ \setlength{\mylen}{\textwidth}
+ \addtolength{\mylen}{-2\myhi}
+ \renewcommand{\DOCH}{%
+ \settowidth{\py}{\CNoV\thechapter}
+ \addtolength{\py}{-10pt} % Amount of space by which the
+% % number is shifted right
+ \fboxsep=0pt%
+ \colorbox[gray]{.85}{\rule{0pt}{40pt}\parbox[b]{\textwidth}{\hfill}}%
+ \kern-\py\raise20pt%
+ \hbox{\color[gray]{.5}\CNoV\thechapter}\\%
+ }
+
+ \renewcommand{\DOTI}[1]{%
+ \nointerlineskip\raggedright%
+ \fboxsep=\myhi%
+ \vskip-1ex%
+ \colorbox[gray]{.85}{\parbox[t]{\mylen}{\CTV\FmTi{#1}}}\par\nobreak%
+ \vskip 40\p@%
+ }
+
+ \renewcommand{\DOTIS}[1]{%
+ \fboxsep=0pt
+ \colorbox[gray]{.85}{\rule{0pt}{40pt}\parbox[b]{\textwidth}{\hfill}}\\%
+ \nointerlineskip\raggedright%
+ \fboxsep=\myhi%
+ \colorbox[gray]{.85}{\parbox[t]{\mylen}{\CTV\FmTi{#1}}}\par\nobreak%
+ \vskip 40\p@%
+ }
+}
+
+
+%%%%%%% GLENN DEF
+
+
+\DeclareOption{Glenn}{%
+ \ChNameVar{\bfseries\Large\sf}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\bfseries\Large\rm}
+ \ChRuleWidth{1pt}
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \renewcommand{\DOCH}{%
+ \settoheight{\myhi}{\CTV\FmTi{Test}}
+ \setlength{\py}{\baselineskip}
+ \addtolength{\py}{\RW}
+ \addtolength{\py}{\myhi}
+ \setlength{\pyy}{\py}
+ \addtolength{\pyy}{-1\RW}
+
+ \raggedright
+ \CNV\FmN{\@chapapp}\space\CNoV\thechapter
+ \hskip 3pt\mghrulefill{\RW}\rule[-1\pyy]{2\RW}{\py}\par\nobreak}
+
+ \renewcommand{\DOTI}[1]{%
+ \addtolength{\pyy}{-4pt}
+ \settoheight{\myhi}{\CTV\FmTi{#1}}
+ \addtolength{\myhi}{\py}
+ \addtolength{\myhi}{-1\RW}
+ \vskip -1\pyy
+ \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 2pt
+ \raggedleft\CTV\FmTi{#1}\par\nobreak
+ \vskip 80\p@}
+
+\newlength{\backskip}
+ \renewcommand{\DOTIS}[1]{%
+% \setlength{\py}{10pt}
+% \setlength{\pyy}{\py}
+% \addtolength{\pyy}{\RW}
+% \setlength{\myhi}{\baselineskip}
+% \addtolength{\myhi}{\pyy}
+% \mghrulefill{\RW}\rule[-1\py]{2\RW}{\pyy}\par\nobreak
+% \addtolength{}{}
+%\vskip -1\baselineskip
+% \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 2pt
+% \raggedleft\CTV\FmTi{#1}\par\nobreak
+% \vskip 60\p@}
+%% Fix suggested by Tomas Lundberg
+ \setlength{\py}{25pt} % eller vad man vill
+ \setlength{\pyy}{\py}
+ \setlength{\backskip}{\py}
+ \addtolength{\backskip}{2pt}
+ \addtolength{\pyy}{\RW}
+ \setlength{\myhi}{\baselineskip}
+ \addtolength{\myhi}{\pyy}
+ \mghrulefill{\RW}\rule[-1\py]{2\RW}{\pyy}\par\nobreak
+ \vskip -1\backskip
+ \rule{2\RW}{\myhi}\mghrulefill{\RW}\hskip 3pt %
+ \raggedleft\CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@}
+ }
+
+%%%%%%% CONNY DEF
+
+\DeclareOption{Conny}{%
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \ChNameVar{\centering\Huge\rm\bfseries}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\centering\Huge\rm}
+ \ChRuleWidth{2pt}
+
+ \renewcommand{\DOCH}{%
+ \mghrulefill{3\RW}\par\nobreak
+ \vskip -0.5\baselineskip
+ \mghrulefill{\RW}\par\nobreak
+ \CNV\FmN{\@chapapp}\space \CNoV\thechapter
+ \par\nobreak
+ \vskip -0.5\baselineskip
+ }
+ \renewcommand{\DOTI}[1]{%
+ \mghrulefill{\RW}\par\nobreak
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 60\p@
+ }
+ \renewcommand{\DOTIS}[1]{%
+ \mghrulefill{\RW}\par\nobreak
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 60\p@
+ }
+ }
+
+%%%%%%% REJNE DEF
+
+\DeclareOption{Rejne}{%
+
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \ChNameVar{\centering\Large\rm}
+ \ChNumVar{\Huge}
+ \ChTitleVar{\centering\Huge\rm}
+ \ChRuleWidth{1pt}
+ \renewcommand{\DOCH}{%
+ \settoheight{\py}{\CNoV\thechapter}
+ \parskip=0pt plus 1pt % Set parskip to default, just in case v1.31
+ \addtolength{\py}{-1pt}
+ \CNV\FmN{\@chapapp}\par\nobreak
+ \vskip 20\p@
+ \setlength{\myhi}{2\baselineskip}
+ \setlength{\px}{\myhi}
+ \addtolength{\px}{-1\RW}
+ \rule[-1\px]{\RW}{\myhi}\mghrulefill{\RW}\hskip
+ 10pt\raisebox{-0.5\py}{\CNoV\thechapter}\hskip 10pt\mghrulefill{\RW}\rule[-1\px]{\RW}{\myhi}\par\nobreak
+ \vskip -3\p@% Added -2pt vskip to correct for streched text v1.31
+ }
+ \renewcommand{\DOTI}[1]{%
+ \setlength{\mylen}{\textwidth}
+ \parskip=0pt plus 1pt % Set parskip to default, just in case v1.31
+ \addtolength{\mylen}{-2\RW}
+ {\vrule width\RW}\parbox{\mylen}{\CTV\FmTi{#1}}{\vrule width\RW}\par\nobreak%
+ \vskip -3pt\rule{\RW}{2\baselineskip}\mghrulefill{\RW}\rule{\RW}{2\baselineskip}%
+ \vskip 60\p@% Added -2pt in vskip to correct for streched text v1.31
+ }
+ \renewcommand{\DOTIS}[1]{%
+ \setlength{\py}{\fboxrule}
+ \setlength{\fboxrule}{\RW}
+ \setlength{\mylen}{\textwidth}
+ \addtolength{\mylen}{-2\RW}
+ \fbox{\parbox{\mylen}{\vskip 2\baselineskip\CTV\FmTi{#1}\par\nobreak\vskip \baselineskip}}
+ \setlength{\fboxrule}{\py}
+ \vskip 60\p@
+ }
+ }
+
+
+%%%%%%% BJARNE DEF
+
+\DeclareOption{Bjarne}{%
+ \ChNameUpperCase
+ \ChTitleUpperCase
+ \ChNameVar{\raggedleft\normalsize\rm}
+ \ChNumVar{\raggedleft \bfseries\Large}
+ \ChTitleVar{\raggedleft \Large\rm}
+ \ChRuleWidth{1pt}
+
+
+%% Note thechapter -> c@chapter fix appendix bug
+%% Fixed misspelled 12
+
+ \newcounter{AlphaCnt}
+ \newcounter{AlphaDecCnt}
+ \newcommand{\AlphaNo}{%
+ \ifcase\number\theAlphaCnt
+ \ifnum\c@chapter=0
+ ZERO\else{}\fi
+ \or ONE\or TWO\or THREE\or FOUR\or FIVE
+ \or SIX\or SEVEN\or EIGHT\or NINE\or TEN
+ \or ELEVEN\or TWELVE\or THIRTEEN\or FOURTEEN\or FIFTEEN
+ \or SIXTEEN\or SEVENTEEN\or EIGHTEEN\or NINETEEN\fi
+}
+
+ \newcommand{\AlphaDecNo}{%
+ \setcounter{AlphaDecCnt}{0}
+ \@whilenum\number\theAlphaCnt>0\do
+ {\addtocounter{AlphaCnt}{-10}
+ \addtocounter{AlphaDecCnt}{1}}
+ \ifnum\number\theAlphaCnt=0
+ \else
+ \addtocounter{AlphaDecCnt}{-1}
+ \addtocounter{AlphaCnt}{10}
+ \fi
+
+
+ \ifcase\number\theAlphaDecCnt\or TEN\or TWENTY\or THIRTY\or
+ FORTY\or FIFTY\or SIXTY\or SEVENTY\or EIGHTY\or NINETY\fi
+ }
+ \newcommand{\TheAlphaChapter}{%
+
+ \ifinapp
+ \thechapter
+ \else
+ \setcounter{AlphaCnt}{\c@chapter}
+ \ifnum\c@chapter<20
+ \AlphaNo
+ \else
+ \AlphaDecNo\AlphaNo
+ \fi
+ \fi
+ }
+ \renewcommand{\DOCH}{%
+ \mghrulefill{\RW}\par\nobreak
+ \CNV\FmN{\@chapapp}\par\nobreak
+ \CNoV\TheAlphaChapter\par\nobreak
+ \vskip -1\baselineskip\vskip 5pt\mghrulefill{\RW}\par\nobreak
+ \vskip 20\p@
+ }
+ \renewcommand{\DOTI}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+ \renewcommand{\DOTIS}[1]{%
+ \CTV\FmTi{#1}\par\nobreak
+ \vskip 40\p@
+ }
+}
+
+\DeclareOption*{%
+ \PackageWarning{fancychapter}{unknown style option}
+ }
+
+\ProcessOptions* \relax
+
+\ifusecolor
+ \RequirePackage{color}
+\fi
+\def\@makechapterhead#1{%
+ \vspace*{0\p@}%
+ {\parindent \z@ \raggedright \normalfont
+ \ifnum \c@secnumdepth >\m@ne
+ \if@mainmatter%%%%% Fix for frontmatter, mainmatter, and backmatter 040920
+ \DOCH
+ \fi
+ \fi
+ \interlinepenalty\@M
+ \if@mainmatter%%%%% Fix for frontmatter, mainmatter, and backmatter 060424
+ \DOTI{#1}%
+ \else%
+ \DOTIS{#1}%
+ \fi
+ }}
+
+
+%%% Begin: To avoid problem with scrbook.cls (fncychap version 1.32)
+
+%%OUT:
+%\def\@schapter#1{\if@twocolumn
+% \@topnewpage[\@makeschapterhead{#1}]%
+% \else
+% \@makeschapterhead{#1}%
+% \@afterheading
+% \fi}
+
+%%IN:
+\def\@schapter#1{%
+\if@twocolumn%
+ \@makeschapterhead{#1}%
+\else%
+ \@makeschapterhead{#1}%
+ \@afterheading%
+\fi}
+
+%%% End: To avoid problem with scrbook.cls (fncychap version 1.32)
+
+\def\@makeschapterhead#1{%
+ \vspace*{50\p@}%
+ {\parindent \z@ \raggedright
+ \normalfont
+ \interlinepenalty\@M
+ \DOTIS{#1}
+ \vskip 40\p@
+ }}
+
+\endinput
+
+