mirror of
https://github.com/apache/poi.git
synced 2026-02-27 20:40:08 +08:00
893 lines
35 KiB
HTML
893 lines
35 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
<html>
|
|
<head>
|
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
<meta content="Apache Forrest" name="Generator">
|
|
<meta name="Forrest-version" content="0.9">
|
|
<meta name="Forrest-skin-name" content="pelt">
|
|
<title>POI-HSLF - A Guide to the PowerPoint File Format</title>
|
|
<link type="text/css" href="../../skin/basic.css" rel="stylesheet">
|
|
<link media="screen" type="text/css" href="../../skin/screen.css" rel="stylesheet">
|
|
<link media="print" type="text/css" href="../../skin/print.css" rel="stylesheet">
|
|
<link type="text/css" href="../../skin/profile.css" rel="stylesheet">
|
|
<script src="../../skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="../../skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="../../skin/fontsize.js" language="javascript" type="text/javascript"></script>
|
|
<link rel="shortcut icon" href="../../images/favicon.ico">
|
|
</head>
|
|
<body onload="init()">
|
|
<script type="text/javascript">ndeSetTextSize();</script>
|
|
<div id="top">
|
|
<!--+
|
|
|breadtrail
|
|
+-->
|
|
<div class="breadtrail">
|
|
<a href="https://www.apache.org">Apache Software Foundation</a> > <a href="https://poi.apache.org">Apache POI</a><script src="../../skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
|
|
</div>
|
|
<!--+
|
|
|header
|
|
+-->
|
|
<div class="header">
|
|
<!--+
|
|
|start group logo
|
|
+-->
|
|
<div class="grouplogo">
|
|
<a href="https://www.apache.org"><img class="logoImage" alt="Apache Software Foundation" src="../../images/asflogo_horizontal_color.svg" title="The Apache Software Foundation is a cornerstone of the modern Open Source software ecosystem – supporting some of the most widely used and important software solutions powering today's Internet economy."></a>
|
|
</div>
|
|
<!--+
|
|
|end group logo
|
|
+-->
|
|
<!--+
|
|
|start Project Logo
|
|
+-->
|
|
<div class="projectlogo">
|
|
<a href="https://poi.apache.org"><img class="logoImage" alt="Apache POI" src="../../images/project-header.png" title="Apache POI is well-known in the Java field as a library for reading and writing Microsoft Office file formats, such as Excel, PowerPoint, Word, Visio, Publisher and Outlook. It supports both the older (OLE2) and new (OOXML - Office Open XML) formats."></a>
|
|
</div>
|
|
<!--+
|
|
|end Project Logo
|
|
+-->
|
|
<!--+
|
|
|start Search
|
|
+-->
|
|
<div class="searchbox">
|
|
<form action="https://www.google.com/search" method="get" class="roundtopsmall">
|
|
<input value="poi.apache.org" name="sitesearch" type="hidden"><input onFocus="getBlank (this, 'Search the site with google');" size="25" name="q" id="query" type="text" value="Search the site with google">
|
|
<input name="Search" value="Search" type="submit">
|
|
</form>
|
|
</div>
|
|
<!--+
|
|
|end search
|
|
+-->
|
|
<!--+
|
|
|start Tabs
|
|
+-->
|
|
<ul id="tabs">
|
|
<li>
|
|
<a class="unselected" href="../../index.html">Home</a>
|
|
</li>
|
|
<li>
|
|
<a class="unselected" href="../../help/index.html">Help</a>
|
|
</li>
|
|
<li class="current">
|
|
<a class="selected" href="../../components/index.html">Component APIs</a>
|
|
</li>
|
|
<li>
|
|
<a class="unselected" href="../../devel/index.html">Getting Involved</a>
|
|
</li>
|
|
</ul>
|
|
<!--+
|
|
|end Tabs
|
|
+-->
|
|
</div>
|
|
</div>
|
|
<div id="main">
|
|
<div id="publishedStrip">
|
|
<!--+
|
|
|start Subtabs
|
|
+-->
|
|
<div id="level2tabs"></div>
|
|
<!--+
|
|
|end Endtabs
|
|
+-->
|
|
<script type="text/javascript"><!--
|
|
document.write("Last Published: " + document.lastModified);
|
|
// --></script>
|
|
</div>
|
|
<!--+
|
|
|breadtrail
|
|
+-->
|
|
<div class="breadtrail">
|
|
|
|
|
|
</div>
|
|
<!--+
|
|
|start Menu, mainarea
|
|
+-->
|
|
<!--+
|
|
|start Menu
|
|
+-->
|
|
<div id="menu">
|
|
<div onclick="SwitchMenu('menu_selected_1.1', '../../skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('../../skin/images/chapter_open.gif');">Component APIs</div>
|
|
<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
|
|
<div class="menuitem">
|
|
<a href="../../components/index.html">Overview</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../apidocs/index.html">Javadocs</a>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_1.1.3', '../../skin/')" id="menu_1.1.3Title" class="menutitle">Excel (HSSF/XSSF)</div>
|
|
<div id="menu_1.1.3" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/index.html">Overview</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/quick-guide.html">Quick Guide</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/how-to.html">HOWTO</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/converting.html">HSSF to SS Converting</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/formula.html">Formula Support</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/eval.html">Formula Evaluation</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/eval-devguide.html">Eval Dev Guide</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/examples.html">Examples</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/use-case.html">Use Case</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/diagrams.html">Pictorial Docs</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/limitations.html">Limitations</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/user-defined-functions.html">User Defined Functions</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/excelant.html">ExcelAnt Tests</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/hacking-hssf.html">Hacking HSSF</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/record-generator.html">Record Generator</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/spreadsheet/chart.html">Charts</a>
|
|
</div>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_selected_1.1.4', '../../skin/')" id="menu_selected_1.1.4Title" class="menutitle" style="background-image: url('../../skin/images/chapter_open.gif');">PowerPoint (HSLF/XSLF)</div>
|
|
<div id="menu_selected_1.1.4" class="selectedmenuitemgroup" style="display: block;">
|
|
<div class="menuitem">
|
|
<a href="../../components/slideshow/index.html">Overview</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/slideshow/quick-guide.html">Quick Guide</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/slideshow/how-to-shapes.html">HSLF Cookbook</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/slideshow/xslf-cookbook.html">XSLF Cookbook</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/slideshow/ppt-wmf-emf-renderer.html">Render SL/WMF/EMF</a>
|
|
</div>
|
|
<div class="menupage">
|
|
<div class="menupagetitle">PPT File Format</div>
|
|
</div>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_1.1.5', '../../skin/')" id="menu_1.1.5Title" class="menutitle">Word (HWPF/XWPF)</div>
|
|
<div id="menu_1.1.5" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="../../components/document/index.html">Overview</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/document/quick-guide.html">HWPF Quick Guide</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/document/quick-guide-xwpf.html">XWPF Quick Guide</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/document/docoverview.html">HWPF Format</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/document/projectplan.html">HWPF Project plan</a>
|
|
</div>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/hsmf/index.html">Outlook (HSMF)</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/diagram/index.html">Visio (HDGF+XDGF)</a>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_1.1.8', '../../skin/')" id="menu_1.1.8Title" class="menutitle">Publisher (HPBF)</div>
|
|
<div id="menu_1.1.8" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="../../components/hpbf/index.html">Overview</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/hpbf/file-format.html">File Format</a>
|
|
</div>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_1.1.9', '../../skin/')" id="menu_1.1.9Title" class="menutitle">OLE2 Filesystem (POIFS)</div>
|
|
<div id="menu_1.1.9" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="../../components/poifs/index.html">Overview</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/poifs/how-to.html">How To</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/poifs/embeded.html">Embedded Documents</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/poifs/fileformat.html">File System Documentation</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/poifs/usecases.html">Use Cases</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/poifs/design.html">Design</a>
|
|
</div>
|
|
</div>
|
|
<div onclick="SwitchMenu('menu_1.1.10', '../../skin/')" id="menu_1.1.10Title" class="menutitle">OLE2 Document Props (HPSF)</div>
|
|
<div id="menu_1.1.10" class="menuitemgroup">
|
|
<div class="menuitem">
|
|
<a href="../../components/hpsf/index.html">Overview</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/hpsf/how-to.html">How To</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/hpsf/thumbnails.html">Thumbnails</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/hpsf/internals.html">Internals</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/hpsf/todo.html">To Do</a>
|
|
</div>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/hmef/index.html">TNEF (HMEF) for winmail.dat</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/oxml4j/index.html">OpenXML4J (OOXML)</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/logging.html">Logging framework</a>
|
|
</div>
|
|
<div class="menuitem">
|
|
<a href="../../components/configuration.html">Configuration</a>
|
|
</div>
|
|
</div>
|
|
<div id="credit"></div>
|
|
<div id="roundbottom">
|
|
<img style="display: none" class="corner" height="15" width="15" alt="" src="../../skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
|
|
<!--+
|
|
|alternative credits
|
|
+-->
|
|
<div id="credit2">
|
|
<a href="https://donate.apache.org/"><img border="0" title="Support Apache" alt="Support Apache - logo" src="../../images/support-asf.png" style="width: 125px;height: 125px;"></a><a href="https://www.apache.org/foundation/press/kit/#poweredby"><img border="0" title="powered by POI" alt="powered by POI - logo" src="../../images/poweredby-poi-logo.png" style="width: 125px;height: 125px;"></a>
|
|
</div>
|
|
</div>
|
|
<!--+
|
|
|end Menu
|
|
+-->
|
|
<!--+
|
|
|start content
|
|
+-->
|
|
<div id="content">
|
|
<h1>POI-HSLF - A Guide to the PowerPoint File Format</h1>
|
|
<h3>Overview</h3>
|
|
<div id="front-matter"></div>
|
|
|
|
<a name="Records%2C+Containers+and+Atoms"></a>
|
|
<h2 class="boxed">Records, Containers and Atoms</h2>
|
|
<div class="section">
|
|
<p>
|
|
PowerPoint documents are made up of a tree of records. A record may
|
|
contain either other records (in which case it is a Container),
|
|
or data (in which case it's an Atom). A record can't hold both.
|
|
</p>
|
|
<p>
|
|
PowerPoint documents don't have one overall container record. Instead,
|
|
there are a number of different container records to be found at
|
|
the top level.
|
|
</p>
|
|
<p>
|
|
Any numbers or strings stored in the records are always stored in
|
|
Little Endian format (least important bytes first). This is the case
|
|
no matter what platform the file was written on - be that a
|
|
Little Endian or a Big Endian system.
|
|
</p>
|
|
<p>
|
|
PowerPoint may have Escher (DDF) records embedded in it. These
|
|
are always held as the children of a PPDrawing record (record
|
|
type 1036). Escher records have the same format as PowerPoint
|
|
records.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
<a name="Record+Headers"></a>
|
|
<h2 class="boxed">Record Headers</h2>
|
|
<div class="section">
|
|
<p>
|
|
All records, be they containers or atoms, have the same standard
|
|
8 byte header. It is:
|
|
</p>
|
|
<ul>
|
|
<li>1/2 byte container flag</li>
|
|
|
|
<li>1.5 byte option field</li>
|
|
|
|
<li>2 byte record type</li>
|
|
|
|
<li>4 byte record length</li>
|
|
</ul>
|
|
<p>
|
|
If the first byte of the header, BINARY_AND with 0x0f, is 0x0f,
|
|
then the record is a container. Otherwise, it's an atom. The rest
|
|
of the first two bytes are used to store the "options" for the
|
|
record. Most commonly, this is used to indicate the version of
|
|
the record, but the exact usage is record specific.
|
|
</p>
|
|
<p>
|
|
The record type is a little endian number, which tells you what
|
|
kind of record you're dealing with. Each different kind of record
|
|
has its own value that gets stored here. PowerPoint records have
|
|
a type that's normally less than 6000 (decimal). Escher records
|
|
normally have a type between 0xF000 and 0xF1FF.
|
|
</p>
|
|
<p>
|
|
The record length is another little endian number. For an atom,
|
|
it's the size of the data part of the record, i.e. the length
|
|
of the record <em>less</em> its 8 byte record header. For a
|
|
container, it's the size of all the records that are children of
|
|
this record. That means that the size of a container record is the
|
|
length, plus 8 bytes for its record header.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
<a name="CurrentUserAtom%2C+UserEditAtom+and+PersistPtrIncrementalBlock"></a>
|
|
<h2 class="boxed">CurrentUserAtom, UserEditAtom and PersistPtrIncrementalBlock</h2>
|
|
<div class="section">
|
|
<p>
|
|
<strong>aka Records that care about the byte level position of other records</strong>
|
|
</p>
|
|
<p>
|
|
A small number of records contain byte level position offsets to other
|
|
records. If you change the position of any records in the file, then
|
|
there's a good chance that you will need to update some of these
|
|
special records.
|
|
</p>
|
|
<p>
|
|
First up, CurrentUserAtom. This is actually stored in a different
|
|
OLE2 (POIFS) stream to the main PowerPoint document. It contains
|
|
a few bits of information on who lasted edited the file. Most
|
|
importantly, at byte 8 of its contents, it stores (as a 32 bit
|
|
little endian number) the offset in the main stream to the most
|
|
recent UserEditAtom.
|
|
</p>
|
|
<p>
|
|
The UserEditAtom contains two byte level offsets (again as 32 bit
|
|
little endian numbers). At byte 12 is the offset to the
|
|
PersistPtrIncrementalBlock associated with this UserEditAtom
|
|
(each UserEditAtom has one and only one PersistPtrIncrementalBlock).
|
|
At byte 8, there's the offset to the previous UserEditAtom. If this
|
|
is 0, then you're at the first one.
|
|
</p>
|
|
<p>
|
|
Every time you do a non full save in PowerPoint, it tacks on another
|
|
UserEditAtom and another PersistPtrIncrementalBlock. The
|
|
CurrentUserAtom is updated to point to this new UserEditAtom, and the
|
|
new UserEditAtom points back to the previous UserEditAtom. You then
|
|
end up with a chain, starting from the CurrentUserAtom, linking
|
|
back through all the UserEditAtoms, until you reach the first one
|
|
from a full save.
|
|
</p>
|
|
<div class="code">
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">/-------------------------------\</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| CurrentUserAtom (own stream) |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| OffsetToCurrentEdit = 10562 |==\</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">\-------------------------------/ |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">/==================================/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| /-----------------------------------\</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | PersistPtrIncrementalBlock @ 6144 |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| \-----------------------------------/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| /---------------------------------\ |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | UserEditAtom @ 6176 | |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | LastUserEditAtomOffset = 0 | |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | PersistPointersOffset = 6144 |==================/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| \---------------------------------/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | /-----------------------------------\</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| \====================\ | PersistPtrIncrementalBlock @ 8646 |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | \-----------------------------------/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| /---------------------------------\ | |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | UserEditAtom @ 8674 | | |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | LastUserEditAtomOffset = 6176 |=/ |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | PersistPointersOffset = 8646 |==================/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| \---------------------------------/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | /------------------------------------\</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| \====================\ | PersistPtrIncrementalBlock @ 10538 |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| | \------------------------------------/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">| /---------------------------------\ | |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">\==| UserEditAtom @ 10562 | | |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> | LastUserEditAtomOffset = 8674 |=/ |</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> | PersistPointersOffset = 10538 |==================/</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> \---------------------------------/</span>
|
|
</div>
|
|
</div>
|
|
<p>
|
|
The PersistPtrIncrementalBlock contains byte offsets to all the
|
|
Slides, Notes, Documents and MasterSlides in the file. The first
|
|
PersistPtrIncrementalBlock will point to all the ones that
|
|
were present the first time the file was saved. Subsequent
|
|
PersistPtrIncrementalBlocks will contain pointers to all the ones
|
|
that were changed in that edit. To find the offset to a given
|
|
sheet in the latest version, then start with the most recent
|
|
PersistPtrIncrementalBlock. If this knows about the sheet, use the
|
|
offset it has. If it doesn't, then work back through older
|
|
PersistPtrIncrementalBlocks until you find one which does, and
|
|
use that.
|
|
</p>
|
|
<p>
|
|
Each PersistPtrIncrementalBlock can contain a number of entries
|
|
blocks. Each block holds information on a sequence of sheets.
|
|
Each block starts with a 32 bit little endian integer. Once read
|
|
into memory, the lower 20 bits contain the starting number for the
|
|
sequence of sheets to be described. The higher 12 bits contain
|
|
the count of the number of sheets described. Following that is
|
|
one 32 bit little endian integer for each sheet in the sequence,
|
|
the value being the offset to that sheet. If there is any data
|
|
left after parsing a block, then it corresponds to the next block.
|
|
</p>
|
|
<div class="code">
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">hex on disk decimal description</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">----------- ------- -----------</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 No options</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">7217 6002 Record type is 6002</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">2000 0000 32 Length of data is 32 bytes</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0100 5000 5242881 Count is 5 (12 highest bits)</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> Starting number is 1 (20 lowest bits)</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0000 0 Sheet (1+0)=1 starts at offset 0</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">900D 0000 3472 Sheet (1+1)=2 starts at offset 3472</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">E403 0000 996 Sheet (1+2)=3 starts at offset 996</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">9213 0000 5010 Sheet (1+3)=4 starts at offset 5010</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">BE15 0000 5566 Sheet (1+4)=5 starts at offset 5566</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0900 1000 1048585 Count is 1 (12 highest bits)</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> Starting number is 9 (20 lowest bits)</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">4418 0000 6212 Sheet (9+0)=9 starts at offset 9212</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<a name="Paragraph+and+Text+Styling"></a>
|
|
<h2 class="boxed">Paragraph and Text Styling</h2>
|
|
<div class="section">
|
|
<p>
|
|
There are quite a number of records that affect the styling
|
|
of text, and a smaller number that are responsible for the
|
|
styling of paragraphs.
|
|
</p>
|
|
<p>
|
|
By default, a given set of text will inherit paragraph and text
|
|
stylings from the appropriate master sheet. If anything differs
|
|
from the master sheet, then appropriate styling records will
|
|
follow the text record.
|
|
</p>
|
|
<p>
|
|
|
|
<em>(We don't currently know enough about master sheet styling
|
|
to write about it)</em>
|
|
|
|
</p>
|
|
<p>
|
|
Normally, powerpoint will have one text record (TextBytesAtom
|
|
or TextCharsAtom) for every paragraph, with a preceding
|
|
TextHeaderAtom to describe what sort of paragraph it is.
|
|
If any of the stylings differ from the master's, then a
|
|
StyleTextPropAtom will follow the text record. This contains
|
|
the paragraph style information, and the styling information
|
|
for each section of the text which has a different style.
|
|
(More on StyleTextPropAtom later)
|
|
</p>
|
|
<p>
|
|
For every font used, a FontEntityAtom must exist for that font.
|
|
The FontEntityAtoms live inside a FontCollection record, and
|
|
there's one of those inside Environment record inside the
|
|
Document record. <em>(More on Fonts to be discovered)</em>
|
|
|
|
</p>
|
|
</div>
|
|
|
|
|
|
<a name="StyleTextPropAtom"></a>
|
|
<h2 class="boxed">StyleTextPropAtom</h2>
|
|
<div class="section">
|
|
<p>
|
|
If the text or paragraph stylings for a given text record
|
|
differ from those of the appropriate master, then there will
|
|
be one of these records.
|
|
</p>
|
|
<p>
|
|
This record is made up of two lists of lists. Firstly,
|
|
there's a list of paragraph stylings - each made up of the
|
|
number of characters it applies two, followed by the matching
|
|
styling elements. Following that is the equivalent for
|
|
character stylings.
|
|
</p>
|
|
<p>
|
|
Each styling list (in either list) starts with the number
|
|
of characters it applies to, stored in a 2 byte little
|
|
endian number. If it is a paragraph styling, it will be
|
|
followed by a 2 byte number (of unknown use). After this is
|
|
a four byte number, which is a mask indicating which stylings
|
|
will follow. You then have an entry for each of the stylings
|
|
indicated in the mask. Finally, you move onto the next set
|
|
of stylings.
|
|
</p>
|
|
<p>
|
|
Each styling has a specific mask flag to indicate its
|
|
presence. (The list may be found towards the top of
|
|
org.apache.poi.hslf.record.StyleTextPropAtom.java, and is
|
|
too long to sensibly include here). For each styling entry
|
|
will occur in the order of its mask value (so one with mask
|
|
1 will come first, followed by the next highest mask value).
|
|
Depending on the styling, it is either made up of a 2 byte
|
|
or 4 byte numeric value. The meaning of the value will
|
|
depend on the styling (eg for font.size, it is the font
|
|
size in points).
|
|
</p>
|
|
<p>
|
|
Some stylings are actually mask stylings. For these, the
|
|
value will be a 4 byte number. This is then processed as
|
|
mask, to indicate a number of different sub-stylings.
|
|
The styling for bold/italic/underline is one such example.
|
|
</p>
|
|
<div class="code">
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">hex on disk decimal description</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">----------- ------- -----------</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 No options</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">A10F 4001 Record type is 4001</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">8000 0000 128 Length of data is 128 bytes</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1E00 0000 30 The paragraph styling applies to 30 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 Paragraph options are 0</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0018 0000 6144 0x0800=Text Alignment, 0x1000=Line Spacing</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 Text Alignment = Left</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">5000 80 Line Spacing = 80</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1C00 0000 28 The paragraph styling applies to 28 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 Paragraph options are 0</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0010 0000 4096 0x1000=Line Spacing</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">5000 80 Line Spacing = 80</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1900 0000 25 The paragraph styling applies to 25 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 Paragraph options are 0</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0018 0000 6144 0x0800=Text Alignment, 0x1000=Line Spacing</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0200 0 Text Alignment = Right</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">5000 80 Line Spacing = 80</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">6100 0000 61 The paragraph styling applies to 61 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> (includes final CR)</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 Paragraph options are 0</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0018 0000 6144 0x0800=Text Alignment, 0x1000=Line Spacing</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0 Text Alignment = Left</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">5000 80 Line Spacing = 80</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1E00 0000 30 The character styling applies to 30 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0100 0200 131073 0x0001=Char Props Mask, 0x20000=Font Size</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0100 1 Char Props 0x0001=Bold</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1400 20 Font Size = 20</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1C00 0000 28 The character styling applies to 28 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0200 0600 393218 0x0002=Char Props Mask, 0x20000=Font Size, 0x40000=Font Color</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0200 2 Char Props 0x0002=Italic</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1400 20 Font Size = 20</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0005 83886080 Blue</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"> </span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1900 0000 25 The character styling applies to 25 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0000 0600 393216 0x20000=Font Size, 0x40000=Font Color</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1400 20 Font Size = 20</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">FF33 00FE 4261426175 Red</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody"></span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">6000 0000 96 The character styling applies to 96 characters</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0400 0300 196612 0x0004=Char Props Mask, 0x10000=Font Index, 0x20000=Font Size</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0400 4 Char Props 0x0004=Underlined</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">0100 1 Font Index = 1 (2nd Font in table)</span>
|
|
</div>
|
|
<div class="codeline">
|
|
<span class="lineno"></span><span class="codebody">1800 24 Font Size = 24</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<a name="Fonts+in+PowerPoint"></a>
|
|
<h2 class="boxed">Fonts in PowerPoint</h2>
|
|
<div class="section">
|
|
<p>
|
|
PowerPoint stores information about the fonts used in FontEntityAtoms,
|
|
which live inside Document.Environment.FontCollection. For every different
|
|
font used, a FontEntityAtom must exist for that font. There is always at
|
|
least one FontEntityAtom in Document.Environment.FontCollection,
|
|
which describes the default font.
|
|
</p>
|
|
</div>
|
|
|
|
|
|
<a name="FontEntityAtom"></a>
|
|
<h2 class="boxed">FontEntityAtom</h2>
|
|
<div class="section">
|
|
<p>
|
|
The instance field of the record header contains the zero based index of the
|
|
font. Font index entries in StyleTextPropAtoms will refer to their required
|
|
font via this index.
|
|
</p>
|
|
<p>
|
|
The length of FontEntityAtoms is always 68 bytes. The first 64 bytes of
|
|
it hold the typeface name of the font to be used. This is stored as
|
|
a null-terminated string, and encoded as little endian unicode. (The
|
|
length of the string must not exceed 32 characters including the null
|
|
termination, so the typeface name cannot exceed 31 characters).
|
|
</p>
|
|
<p>
|
|
After the typeface name there are 4 bytes of bitmask flags. The details of these
|
|
can be found in the Windows API, under the LOGFONT structure.
|
|
The 65th byte is the output precision, which defines how closely the system chosen
|
|
font must match the requested font, in terms of height, width, pitch etc.
|
|
The 66th byte is the clipping precision, which defines how to clip characters
|
|
that occur partly outside the clipping region.
|
|
The 67th byte is the output quality, which defines how closely the system
|
|
must match the logical font's attributes to those of the physical font used.
|
|
The 68th (and final) byte is the pitch and family, which is used by the
|
|
system when matching fonts.
|
|
</p>
|
|
</div>
|
|
|
|
<p align="right">
|
|
<font size="-2">by Nick Burch, Yegor Kozlov</font>
|
|
</p>
|
|
</div>
|
|
<!--+
|
|
|end content
|
|
+-->
|
|
<div class="clearboth"> </div>
|
|
</div>
|
|
<div id="footer">
|
|
<!--+
|
|
|start bottomstrip
|
|
+-->
|
|
<div class="lastmodified">
|
|
<script type="text/javascript"><!--
|
|
document.write("Last Published: " + document.lastModified);
|
|
// --></script>
|
|
</div>
|
|
<div class="copyright">
|
|
Copyright ©
|
|
2001-2025 <a href="https://www.apache.org/">The Apache Software Foundation</a>
|
|
<br>
|
|
Apache POI, POI, Apache, the Apache logo, and the Apache
|
|
POI project logo are trademarks of The Apache Software Foundation.
|
|
</div>
|
|
<div id="feedback">
|
|
Send feedback about the website to:
|
|
<a id="feedbackto" href="mailto:dev@poi.apache.org?subject=Feedback%C2%A0components/slideshow/ppt-file-format.html">dev@poi.apache.org</a>
|
|
</div>
|
|
<!--+
|
|
|end bottomstrip
|
|
+-->
|
|
</div>
|
|
</body>
|
|
</html>
|