2016-07-02 04:28:41 +00:00
|
|
|
/* ====================================================================
|
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
|
limitations under the License.
|
|
|
|
|
==================================================================== */
|
|
|
|
|
|
|
|
|
|
package org.apache.poi.poifs.macros;
|
|
|
|
|
|
|
|
|
|
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
|
2017-08-11 20:47:48 +00:00
|
|
|
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
|
2016-07-02 04:28:41 +00:00
|
|
|
|
2018-10-29 15:48:44 +00:00
|
|
|
import java.io.ByteArrayInputStream;
|
|
|
|
|
import java.io.ByteArrayOutputStream;
|
|
|
|
|
import java.io.Closeable;
|
|
|
|
|
import java.io.EOFException;
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.io.FileInputStream;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.io.InputStream;
|
|
|
|
|
import java.io.InputStreamReader;
|
2016-07-02 04:28:41 +00:00
|
|
|
import java.nio.charset.Charset;
|
2018-10-29 15:48:44 +00:00
|
|
|
import java.nio.charset.StandardCharsets;
|
2016-07-02 04:28:41 +00:00
|
|
|
import java.util.HashMap;
|
2018-10-29 15:48:44 +00:00
|
|
|
import java.util.LinkedHashMap;
|
2016-07-02 04:28:41 +00:00
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.zip.ZipEntry;
|
|
|
|
|
import java.util.zip.ZipInputStream;
|
|
|
|
|
|
|
|
|
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
|
|
|
|
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
|
|
|
|
import org.apache.poi.poifs.filesystem.DocumentNode;
|
|
|
|
|
import org.apache.poi.poifs.filesystem.Entry;
|
2017-08-11 20:47:48 +00:00
|
|
|
import org.apache.poi.poifs.filesystem.FileMagic;
|
2018-08-31 00:25:50 +00:00
|
|
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
2016-07-02 04:28:41 +00:00
|
|
|
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
2017-10-07 06:11:12 +00:00
|
|
|
import org.apache.poi.poifs.macros.Module.ModuleType;
|
2016-10-18 18:24:39 +00:00
|
|
|
import org.apache.poi.util.CodePageUtil;
|
2016-07-09 08:05:43 +00:00
|
|
|
import org.apache.poi.util.HexDump;
|
2016-07-02 04:28:41 +00:00
|
|
|
import org.apache.poi.util.IOUtils;
|
2017-09-14 02:22:55 +00:00
|
|
|
import org.apache.poi.util.LittleEndian;
|
2018-10-29 15:48:44 +00:00
|
|
|
import org.apache.poi.util.POILogFactory;
|
|
|
|
|
import org.apache.poi.util.POILogger;
|
2016-07-02 04:28:41 +00:00
|
|
|
import org.apache.poi.util.RLEDecompressingInputStream;
|
2017-09-14 02:22:55 +00:00
|
|
|
import org.apache.poi.util.StringUtil;
|
2016-07-02 04:28:41 +00:00
|
|
|
|
|
|
|
|
/**
|
2016-10-19 18:44:46 +00:00
|
|
|
* <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
|
2016-04-12 01:30:20 +00:00
|
|
|
* and returns them.
|
2016-10-19 18:44:46 +00:00
|
|
|
* </p>
|
|
|
|
|
* <p>
|
|
|
|
|
* <b>NOTE:</b> This does not read macros from .ppt files.
|
|
|
|
|
* See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF() in the scratchpad
|
|
|
|
|
* module for an example of how to do this. Patches that make macro
|
|
|
|
|
* extraction from .ppt more elegant are welcomed!
|
|
|
|
|
* </p>
|
2016-04-12 01:30:20 +00:00
|
|
|
*
|
2016-07-02 04:28:41 +00:00
|
|
|
* @since 3.15-beta2
|
|
|
|
|
*/
|
|
|
|
|
public class VBAMacroReader implements Closeable {
|
2018-10-29 15:48:44 +00:00
|
|
|
private static final POILogger LOGGER = POILogFactory.getLogger(VBAMacroReader.class);
|
|
|
|
|
|
2016-07-02 04:28:41 +00:00
|
|
|
protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
|
|
|
|
|
protected static final String VBA_PROJECT_POIFS = "VBA";
|
2017-09-14 02:22:55 +00:00
|
|
|
|
2018-08-31 00:25:50 +00:00
|
|
|
private POIFSFileSystem fs;
|
2016-07-02 04:28:41 +00:00
|
|
|
|
|
|
|
|
public VBAMacroReader(InputStream rstream) throws IOException {
|
2017-08-11 20:47:48 +00:00
|
|
|
InputStream is = FileMagic.prepareToCheckMagic(rstream);
|
|
|
|
|
FileMagic fm = FileMagic.valueOf(is);
|
|
|
|
|
if (fm == FileMagic.OLE2) {
|
2018-08-31 00:25:50 +00:00
|
|
|
fs = new POIFSFileSystem(is);
|
2016-07-02 04:28:41 +00:00
|
|
|
} else {
|
2017-08-11 20:47:48 +00:00
|
|
|
openOOXML(is);
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public VBAMacroReader(File file) throws IOException {
|
|
|
|
|
try {
|
2018-08-31 00:25:50 +00:00
|
|
|
this.fs = new POIFSFileSystem(file);
|
2016-07-02 04:28:41 +00:00
|
|
|
} catch (OfficeXmlFileException e) {
|
|
|
|
|
openOOXML(new FileInputStream(file));
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-08-31 00:25:50 +00:00
|
|
|
public VBAMacroReader(POIFSFileSystem fs) {
|
2016-07-02 04:28:41 +00:00
|
|
|
this.fs = fs;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void openOOXML(InputStream zipFile) throws IOException {
|
2018-06-07 06:03:35 +00:00
|
|
|
try(ZipInputStream zis = new ZipInputStream(zipFile)) {
|
|
|
|
|
ZipEntry zipEntry;
|
|
|
|
|
while ((zipEntry = zis.getNextEntry()) != null) {
|
|
|
|
|
if (endsWithIgnoreCase(zipEntry.getName(), VBA_PROJECT_OOXML)) {
|
|
|
|
|
try {
|
|
|
|
|
// Make a NPOIFS from the contents, and close the stream
|
2018-08-31 00:25:50 +00:00
|
|
|
this.fs = new POIFSFileSystem(zis);
|
2018-06-07 06:03:35 +00:00
|
|
|
return;
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
// Tidy up
|
|
|
|
|
zis.close();
|
|
|
|
|
|
|
|
|
|
// Pass on
|
|
|
|
|
throw e;
|
|
|
|
|
}
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
throw new IllegalArgumentException("No VBA project found");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void close() throws IOException {
|
|
|
|
|
fs.close();
|
|
|
|
|
fs = null;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-07 06:11:12 +00:00
|
|
|
public Map<String, Module> readMacroModules() throws IOException {
|
|
|
|
|
final ModuleMap modules = new ModuleMap();
|
2018-10-29 15:48:44 +00:00
|
|
|
//ascii -> unicode mapping for module names
|
|
|
|
|
//preserve insertion order
|
|
|
|
|
final Map<String, String> moduleNameMap = new LinkedHashMap<>();
|
|
|
|
|
|
2017-10-07 06:11:12 +00:00
|
|
|
findMacros(fs.getRoot(), modules);
|
2018-10-29 15:48:44 +00:00
|
|
|
findModuleNameMap(fs.getRoot(), moduleNameMap, modules);
|
|
|
|
|
findProjectProperties(fs.getRoot(), moduleNameMap, modules);
|
2017-10-07 06:11:12 +00:00
|
|
|
|
|
|
|
|
Map<String, Module> moduleSources = new HashMap<>();
|
|
|
|
|
for (Map.Entry<String, ModuleImpl> entry : modules.entrySet()) {
|
|
|
|
|
ModuleImpl module = entry.getValue();
|
|
|
|
|
module.charset = modules.charset;
|
|
|
|
|
moduleSources.put(entry.getKey(), module);
|
|
|
|
|
}
|
|
|
|
|
return moduleSources;
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-02 04:28:41 +00:00
|
|
|
/**
|
|
|
|
|
* Reads all macros from all modules of the opened office file.
|
2016-04-12 01:30:20 +00:00
|
|
|
* @return All the macros and their contents
|
|
|
|
|
*
|
2016-07-02 04:28:41 +00:00
|
|
|
* @since 3.15-beta2
|
|
|
|
|
*/
|
|
|
|
|
public Map<String, String> readMacros() throws IOException {
|
2017-10-07 06:11:12 +00:00
|
|
|
Map<String, Module> modules = readMacroModules();
|
2017-09-16 08:27:23 +00:00
|
|
|
Map<String, String> moduleSources = new HashMap<>();
|
2016-07-02 04:28:41 +00:00
|
|
|
for (Map.Entry<String, Module> entry : modules.entrySet()) {
|
2017-10-07 06:11:12 +00:00
|
|
|
moduleSources.put(entry.getKey(), entry.getValue().getContent());
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
|
|
|
|
return moduleSources;
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-07 06:11:12 +00:00
|
|
|
protected static class ModuleImpl implements Module {
|
2016-07-02 04:28:41 +00:00
|
|
|
Integer offset;
|
|
|
|
|
byte[] buf;
|
2017-10-07 06:11:12 +00:00
|
|
|
ModuleType moduleType;
|
|
|
|
|
Charset charset;
|
2016-07-09 08:51:57 +00:00
|
|
|
void read(InputStream in) throws IOException {
|
|
|
|
|
final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
|
|
|
IOUtils.copy(in, out);
|
|
|
|
|
out.close();
|
|
|
|
|
buf = out.toByteArray();
|
|
|
|
|
}
|
2017-10-07 06:11:12 +00:00
|
|
|
public String getContent() {
|
|
|
|
|
return new String(buf, charset);
|
|
|
|
|
}
|
|
|
|
|
public ModuleType geModuleType() {
|
|
|
|
|
return moduleType;
|
|
|
|
|
}
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
2017-10-07 06:11:12 +00:00
|
|
|
protected static class ModuleMap extends HashMap<String, ModuleImpl> {
|
2017-09-14 02:22:55 +00:00
|
|
|
Charset charset = StringUtil.WIN_1252; // default charset
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Recursively traverses directory structure rooted at <tt>dir</tt>.
|
|
|
|
|
* For each macro module that is found, the module's name and code are
|
|
|
|
|
* added to <tt>modules<tt>.
|
|
|
|
|
*
|
2016-10-19 06:52:44 +00:00
|
|
|
* @param dir The directory of entries to look at
|
|
|
|
|
* @param modules The resulting map of modules
|
|
|
|
|
* @throws IOException If reading the VBA module fails
|
2016-07-02 04:28:41 +00:00
|
|
|
* @since 3.15-beta2
|
|
|
|
|
*/
|
|
|
|
|
protected void findMacros(DirectoryNode dir, ModuleMap modules) throws IOException {
|
|
|
|
|
if (VBA_PROJECT_POIFS.equalsIgnoreCase(dir.getName())) {
|
|
|
|
|
// VBA project directory, process
|
|
|
|
|
readMacros(dir, modules);
|
|
|
|
|
} else {
|
|
|
|
|
// Check children
|
|
|
|
|
for (Entry child : dir) {
|
|
|
|
|
if (child instanceof DirectoryNode) {
|
|
|
|
|
findMacros((DirectoryNode)child, modules);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-14 02:22:55 +00:00
|
|
|
|
2016-07-09 08:18:29 +00:00
|
|
|
|
|
|
|
|
/**
|
2016-07-09 08:51:57 +00:00
|
|
|
* reads module from DIR node in input stream and adds it to the modules map for decompression later
|
2016-07-09 08:18:29 +00:00
|
|
|
* on the second pass through this function, the module will be decompressed
|
|
|
|
|
*
|
|
|
|
|
* Side-effects: adds a new module to the module map or sets the buf field on the module
|
|
|
|
|
* to the decompressed stream contents (the VBA code for one module)
|
|
|
|
|
*
|
|
|
|
|
* @param in the run-length encoded input stream to read from
|
|
|
|
|
* @param streamName the stream name of the module
|
|
|
|
|
* @param modules a map to store the modules
|
2016-10-19 06:52:44 +00:00
|
|
|
* @throws IOException If reading data from the stream or from modules fails
|
2016-07-09 08:18:29 +00:00
|
|
|
*/
|
2017-09-14 02:22:55 +00:00
|
|
|
private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
|
2016-07-09 08:18:29 +00:00
|
|
|
int moduleOffset = in.readInt();
|
2017-10-07 06:11:12 +00:00
|
|
|
ModuleImpl module = modules.get(streamName);
|
2016-07-09 08:18:29 +00:00
|
|
|
if (module == null) {
|
2016-07-09 08:51:57 +00:00
|
|
|
// First time we've seen the module. Add it to the ModuleMap and decompress it later
|
2017-10-07 06:11:12 +00:00
|
|
|
module = new ModuleImpl();
|
2016-07-09 08:18:29 +00:00
|
|
|
module.offset = moduleOffset;
|
|
|
|
|
modules.put(streamName, module);
|
2016-07-09 08:51:57 +00:00
|
|
|
// Would adding module.read(in) here be correct?
|
|
|
|
|
} else {
|
|
|
|
|
// Decompress a previously found module and store the decompressed result into module.buf
|
|
|
|
|
InputStream stream = new RLEDecompressingInputStream(
|
|
|
|
|
new ByteArrayInputStream(module.buf, moduleOffset, module.buf.length - moduleOffset)
|
|
|
|
|
);
|
|
|
|
|
module.read(stream);
|
|
|
|
|
stream.close();
|
2016-07-09 08:18:29 +00:00
|
|
|
}
|
2016-07-09 08:51:57 +00:00
|
|
|
}
|
|
|
|
|
|
2017-09-14 02:22:55 +00:00
|
|
|
private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException {
|
2017-10-07 06:11:12 +00:00
|
|
|
ModuleImpl module = modules.get(name);
|
2016-07-09 08:51:57 +00:00
|
|
|
// TODO Refactor this to fetch dir then do the rest
|
|
|
|
|
if (module == null) {
|
|
|
|
|
// no DIR stream with offsets yet, so store the compressed bytes for later
|
2017-10-07 06:11:12 +00:00
|
|
|
module = new ModuleImpl();
|
2016-07-09 08:51:57 +00:00
|
|
|
modules.put(name, module);
|
2017-10-07 06:11:12 +00:00
|
|
|
try (InputStream dis = new DocumentInputStream(documentNode)) {
|
2017-09-14 02:22:55 +00:00
|
|
|
module.read(dis);
|
|
|
|
|
}
|
2016-10-18 16:43:20 +00:00
|
|
|
} else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
|
2017-09-14 02:22:55 +00:00
|
|
|
|
2016-07-15 06:12:37 +00:00
|
|
|
if (module.offset == null) {
|
|
|
|
|
//This should not happen. bug 59858
|
|
|
|
|
throw new IOException("Module offset for '" + name + "' was never read.");
|
|
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
|
|
|
|
|
//try the general case, where module.offset is accurate
|
|
|
|
|
InputStream decompressed = null;
|
|
|
|
|
InputStream compressed = new DocumentInputStream(documentNode);
|
|
|
|
|
try {
|
|
|
|
|
// we know the offset already, so decompress immediately on-the-fly
|
|
|
|
|
long skippedBytes = compressed.skip(module.offset);
|
|
|
|
|
if (skippedBytes != module.offset) {
|
|
|
|
|
throw new IOException("tried to skip " + module.offset + " bytes, but actually skipped " + skippedBytes + " bytes");
|
|
|
|
|
}
|
|
|
|
|
decompressed = new RLEDecompressingInputStream(compressed);
|
|
|
|
|
module.read(decompressed);
|
|
|
|
|
return;
|
2017-10-07 06:11:12 +00:00
|
|
|
} catch (IllegalArgumentException | IllegalStateException e) {
|
2017-09-14 02:22:55 +00:00
|
|
|
} finally {
|
|
|
|
|
IOUtils.closeQuietly(compressed);
|
|
|
|
|
IOUtils.closeQuietly(decompressed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//bad module.offset, try brute force
|
|
|
|
|
compressed = new DocumentInputStream(documentNode);
|
2017-10-07 06:11:12 +00:00
|
|
|
byte[] decompressedBytes;
|
2017-09-14 02:22:55 +00:00
|
|
|
try {
|
|
|
|
|
decompressedBytes = findCompressedStreamWBruteForce(compressed);
|
|
|
|
|
} finally {
|
|
|
|
|
IOUtils.closeQuietly(compressed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (decompressedBytes != null) {
|
|
|
|
|
module.read(new ByteArrayInputStream(decompressedBytes));
|
2016-07-09 08:51:57 +00:00
|
|
|
}
|
2016-07-09 08:18:29 +00:00
|
|
|
}
|
2016-07-09 08:51:57 +00:00
|
|
|
|
2016-07-09 08:18:29 +00:00
|
|
|
}
|
2016-04-12 01:30:20 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Skips <tt>n</tt> bytes in an input stream, throwing IOException if the
|
|
|
|
|
* number of bytes skipped is different than requested.
|
2016-10-19 06:52:44 +00:00
|
|
|
* @throws IOException If skipping would exceed the available data or skipping did not work.
|
2016-04-12 01:30:20 +00:00
|
|
|
*/
|
|
|
|
|
private static void trySkip(InputStream in, long n) throws IOException {
|
2017-09-14 02:22:55 +00:00
|
|
|
long skippedBytes = IOUtils.skipFully(in, n);
|
2016-04-12 01:30:20 +00:00
|
|
|
if (skippedBytes != n) {
|
2016-07-09 08:05:43 +00:00
|
|
|
if (skippedBytes < 0) {
|
|
|
|
|
throw new IOException(
|
|
|
|
|
"Tried skipping " + n + " bytes, but no bytes were skipped. "
|
|
|
|
|
+ "The end of the stream has been reached or the stream is closed.");
|
|
|
|
|
} else {
|
|
|
|
|
throw new IOException(
|
|
|
|
|
"Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. "
|
2017-09-14 02:22:55 +00:00
|
|
|
+ "This should never happen with a non-corrupt file.");
|
2016-07-09 08:05:43 +00:00
|
|
|
}
|
2016-04-12 01:30:20 +00:00
|
|
|
}
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
2016-07-09 08:05:43 +00:00
|
|
|
|
|
|
|
|
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
|
2016-10-18 15:47:38 +00:00
|
|
|
private static final int STREAMNAME_RESERVED = 0x0032;
|
2017-09-14 02:22:55 +00:00
|
|
|
private static final int PROJECT_CONSTANTS_RESERVED = 0x003C;
|
|
|
|
|
private static final int HELP_FILE_PATH_RESERVED = 0x003D;
|
|
|
|
|
private static final int REFERENCE_NAME_RESERVED = 0x003E;
|
|
|
|
|
private static final int DOC_STRING_RESERVED = 0x0040;
|
|
|
|
|
private static final int MODULE_DOCSTRING_RESERVED = 0x0048;
|
2016-04-12 01:30:20 +00:00
|
|
|
|
2016-07-09 08:05:43 +00:00
|
|
|
/**
|
2016-04-12 01:30:20 +00:00
|
|
|
* Reads VBA Project modules from a VBA Project directory located at
|
|
|
|
|
* <tt>macroDir</tt> into <tt>modules</tt>.
|
|
|
|
|
*
|
|
|
|
|
* @since 3.15-beta2
|
2016-07-02 04:28:41 +00:00
|
|
|
*/
|
|
|
|
|
protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
|
2017-09-14 02:22:55 +00:00
|
|
|
//bug59858 shows that dirstream may not be in this directory (\MBD00082648\_VBA_PROJECT_CUR\VBA ENTRY NAME)
|
|
|
|
|
//but may be in another directory (\_VBA_PROJECT_CUR\VBA ENTRY NAME)
|
|
|
|
|
//process the dirstream first -- "dir" is case insensitive
|
|
|
|
|
for (String entryName : macroDir.getEntryNames()) {
|
|
|
|
|
if ("dir".equalsIgnoreCase(entryName)) {
|
|
|
|
|
processDirStream(macroDir.getEntry(entryName), modules);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-02 04:28:41 +00:00
|
|
|
for (Entry entry : macroDir) {
|
|
|
|
|
if (! (entry instanceof DocumentNode)) { continue; }
|
|
|
|
|
|
|
|
|
|
String name = entry.getName();
|
|
|
|
|
DocumentNode document = (DocumentNode)entry;
|
2017-09-14 02:22:55 +00:00
|
|
|
|
|
|
|
|
if (! "dir".equalsIgnoreCase(name) && !startsWithIgnoreCase(name, "__SRP")
|
|
|
|
|
&& !startsWithIgnoreCase(name, "_VBA_PROJECT")) {
|
|
|
|
|
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
|
|
|
|
|
readModuleFromDocumentStream(document, name, modules);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-29 15:48:44 +00:00
|
|
|
protected void findProjectProperties(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
|
2017-10-07 06:11:12 +00:00
|
|
|
for (Entry entry : node) {
|
|
|
|
|
if ("project".equalsIgnoreCase(entry.getName())) {
|
|
|
|
|
DocumentNode document = (DocumentNode)entry;
|
|
|
|
|
DocumentInputStream dis = new DocumentInputStream(document);
|
2018-10-29 15:48:44 +00:00
|
|
|
readProjectProperties(dis, moduleNameMap, modules);
|
2018-10-30 21:30:57 +00:00
|
|
|
} else if (entry instanceof DirectoryNode) {
|
|
|
|
|
findProjectProperties((DirectoryNode)entry, moduleNameMap, modules);
|
2018-10-29 15:48:44 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected void findModuleNameMap(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
|
|
|
|
|
for (Entry entry : node) {
|
|
|
|
|
if ("projectwm".equalsIgnoreCase(entry.getName())) {
|
|
|
|
|
DocumentNode document = (DocumentNode)entry;
|
|
|
|
|
DocumentInputStream dis = new DocumentInputStream(document);
|
|
|
|
|
readNameMapRecords(dis, moduleNameMap, modules.charset);
|
2018-10-30 21:30:57 +00:00
|
|
|
} else if (entry.isDirectoryEntry()) {
|
|
|
|
|
findModuleNameMap((DirectoryNode)entry, moduleNameMap, modules);
|
2017-10-07 06:11:12 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-09-14 02:22:55 +00:00
|
|
|
private enum RecordType {
|
|
|
|
|
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
|
|
|
|
|
MODULE_OFFSET(0x0031),
|
|
|
|
|
PROJECT_SYS_KIND(0x01),
|
|
|
|
|
PROJECT_LCID(0x0002),
|
|
|
|
|
PROJECT_LCID_INVOKE(0x14),
|
|
|
|
|
PROJECT_CODEPAGE(0x0003),
|
|
|
|
|
PROJECT_NAME(0x04),
|
|
|
|
|
PROJECT_DOC_STRING(0x05),
|
|
|
|
|
PROJECT_HELP_FILE_PATH(0x06),
|
|
|
|
|
PROJECT_HELP_CONTEXT(0x07, 8),
|
|
|
|
|
PROJECT_LIB_FLAGS(0x08),
|
|
|
|
|
PROJECT_VERSION(0x09, 10),
|
|
|
|
|
PROJECT_CONSTANTS(0x0C),
|
|
|
|
|
PROJECT_MODULES(0x0F),
|
|
|
|
|
DIR_STREAM_TERMINATOR(0x10),
|
|
|
|
|
PROJECT_COOKIE(0x13),
|
|
|
|
|
MODULE_NAME(0x19),
|
|
|
|
|
MODULE_NAME_UNICODE(0x47),
|
|
|
|
|
MODULE_STREAM_NAME(0x1A),
|
|
|
|
|
MODULE_DOC_STRING(0x1C),
|
|
|
|
|
MODULE_HELP_CONTEXT(0x1E),
|
|
|
|
|
MODULE_COOKIE(0x2c),
|
|
|
|
|
MODULE_TYPE_PROCEDURAL(0x21, 4),
|
|
|
|
|
MODULE_TYPE_OTHER(0x22, 4),
|
|
|
|
|
MODULE_PRIVATE(0x28, 4),
|
|
|
|
|
REFERENCE_NAME(0x16),
|
|
|
|
|
REFERENCE_REGISTERED(0x0D),
|
|
|
|
|
REFERENCE_PROJECT(0x0E),
|
|
|
|
|
REFERENCE_CONTROL_A(0x2F),
|
|
|
|
|
|
|
|
|
|
//according to the spec, REFERENCE_CONTROL_B(0x33) should have the
|
|
|
|
|
//same structure as REFERENCE_CONTROL_A(0x2F).
|
|
|
|
|
//However, it seems to have the int(length) record structure that most others do.
|
|
|
|
|
//See 59830.xls for this record.
|
|
|
|
|
REFERENCE_CONTROL_B(0x33),
|
|
|
|
|
//REFERENCE_ORIGINAL(0x33),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODULE_TERMINATOR(0x002B),
|
|
|
|
|
EOF(-1),
|
|
|
|
|
UNKNOWN(-2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private final int VARIABLE_LENGTH = -1;
|
|
|
|
|
private final int id;
|
|
|
|
|
private final int constantLength;
|
|
|
|
|
|
|
|
|
|
RecordType(int id) {
|
|
|
|
|
this.id = id;
|
|
|
|
|
this.constantLength = VARIABLE_LENGTH;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
RecordType(int id, int constantLength) {
|
|
|
|
|
this.id = id;
|
|
|
|
|
this.constantLength = constantLength;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int getConstantLength() {
|
|
|
|
|
return constantLength;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static RecordType lookup(int id) {
|
|
|
|
|
for (RecordType type : RecordType.values()) {
|
|
|
|
|
if (type.id == id) {
|
|
|
|
|
return type;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return UNKNOWN;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private enum DIR_STATE {
|
|
|
|
|
INFORMATION_RECORD,
|
|
|
|
|
REFERENCES_RECORD,
|
|
|
|
|
MODULES_RECORD
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static class ASCIIUnicodeStringPair {
|
|
|
|
|
private final String ascii;
|
|
|
|
|
private final String unicode;
|
2018-10-30 13:25:20 +00:00
|
|
|
private final int pushbackRecordId;
|
|
|
|
|
|
|
|
|
|
ASCIIUnicodeStringPair(String ascii, int pushbackRecordId) {
|
|
|
|
|
this.ascii = ascii;
|
|
|
|
|
this.unicode = "";
|
|
|
|
|
this.pushbackRecordId = pushbackRecordId;
|
|
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
|
|
|
|
|
ASCIIUnicodeStringPair(String ascii, String unicode) {
|
|
|
|
|
this.ascii = ascii;
|
|
|
|
|
this.unicode = unicode;
|
2018-10-30 13:25:20 +00:00
|
|
|
pushbackRecordId = -1;
|
2017-09-14 02:22:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private String getAscii() {
|
|
|
|
|
return ascii;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private String getUnicode() {
|
|
|
|
|
return unicode;
|
|
|
|
|
}
|
2018-10-30 13:25:20 +00:00
|
|
|
|
|
|
|
|
private int getPushbackRecordId() {
|
|
|
|
|
return pushbackRecordId;
|
|
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void processDirStream(Entry dir, ModuleMap modules) throws IOException {
|
|
|
|
|
DocumentNode dirDocumentNode = (DocumentNode)dir;
|
|
|
|
|
DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD;
|
2017-10-07 06:11:12 +00:00
|
|
|
try (DocumentInputStream dis = new DocumentInputStream(dirDocumentNode)) {
|
2017-09-14 02:22:55 +00:00
|
|
|
String streamName = null;
|
|
|
|
|
int recordId = 0;
|
|
|
|
|
boolean inReferenceTwiddled = false;
|
2017-10-07 06:11:12 +00:00
|
|
|
try (RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis)) {
|
2017-09-14 02:22:55 +00:00
|
|
|
while (true) {
|
|
|
|
|
recordId = in.readShort();
|
|
|
|
|
if (recordId == -1) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
RecordType type = RecordType.lookup(recordId);
|
|
|
|
|
|
|
|
|
|
if (type.equals(RecordType.EOF) || type.equals(RecordType.DIR_STREAM_TERMINATOR)) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
switch (type) {
|
|
|
|
|
case PROJECT_VERSION:
|
|
|
|
|
trySkip(in, RecordType.PROJECT_VERSION.getConstantLength());
|
|
|
|
|
break;
|
|
|
|
|
case PROJECT_CODEPAGE:
|
|
|
|
|
in.readInt();//record size must == 4
|
|
|
|
|
int codepage = in.readShort();
|
|
|
|
|
modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
|
|
|
|
|
break;
|
|
|
|
|
case MODULE_STREAM_NAME:
|
|
|
|
|
ASCIIUnicodeStringPair pair = readStringPair(in, modules.charset, STREAMNAME_RESERVED);
|
|
|
|
|
streamName = pair.getAscii();
|
|
|
|
|
break;
|
|
|
|
|
case PROJECT_DOC_STRING:
|
|
|
|
|
readStringPair(in, modules.charset, DOC_STRING_RESERVED);
|
|
|
|
|
break;
|
|
|
|
|
case PROJECT_HELP_FILE_PATH:
|
|
|
|
|
readStringPair(in, modules.charset, HELP_FILE_PATH_RESERVED);
|
|
|
|
|
break;
|
|
|
|
|
case PROJECT_CONSTANTS:
|
|
|
|
|
readStringPair(in, modules.charset, PROJECT_CONSTANTS_RESERVED);
|
|
|
|
|
break;
|
|
|
|
|
case REFERENCE_NAME:
|
|
|
|
|
if (dirState.equals(DIR_STATE.INFORMATION_RECORD)) {
|
|
|
|
|
dirState = DIR_STATE.REFERENCES_RECORD;
|
|
|
|
|
}
|
2018-10-30 13:25:20 +00:00
|
|
|
ASCIIUnicodeStringPair stringPair = readStringPair(in,
|
|
|
|
|
modules.charset, REFERENCE_NAME_RESERVED, false);
|
|
|
|
|
if (stringPair.getPushbackRecordId() == -1) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
//Special handling for when there's only an ascii string and a REFERENCED_REGISTERED
|
|
|
|
|
//record that follows.
|
|
|
|
|
//See https://github.com/decalage2/oletools/blob/master/oletools/olevba.py#L1516
|
|
|
|
|
//and https://github.com/decalage2/oletools/pull/135 from (@c1fe)
|
|
|
|
|
if (stringPair.getPushbackRecordId() != RecordType.REFERENCE_REGISTERED.id) {
|
|
|
|
|
throw new IllegalArgumentException("Unexpected reserved character. "+
|
|
|
|
|
"Expected "+Integer.toHexString(REFERENCE_NAME_RESERVED)
|
|
|
|
|
+ " or "+Integer.toHexString(RecordType.REFERENCE_REGISTERED.id)+
|
|
|
|
|
" not: "+Integer.toHexString(stringPair.getPushbackRecordId()));
|
|
|
|
|
}
|
|
|
|
|
//fall through!
|
|
|
|
|
case REFERENCE_REGISTERED:
|
|
|
|
|
//REFERENCE_REGISTERED must come immediately after
|
|
|
|
|
//REFERENCE_NAME to allow for fall through in special case of bug 62625
|
|
|
|
|
int recLength = in.readInt();
|
|
|
|
|
trySkip(in, recLength);
|
2017-09-14 02:22:55 +00:00
|
|
|
break;
|
2017-10-07 06:11:12 +00:00
|
|
|
case MODULE_DOC_STRING:
|
2017-09-14 02:22:55 +00:00
|
|
|
int modDocStringLength = in.readInt();
|
|
|
|
|
readString(in, modDocStringLength, modules.charset);
|
|
|
|
|
int modDocStringReserved = in.readShort();
|
|
|
|
|
if (modDocStringReserved != MODULE_DOCSTRING_RESERVED) {
|
|
|
|
|
throw new IOException("Expected x003C after stream name before Unicode stream name, but found: " +
|
|
|
|
|
Integer.toHexString(modDocStringReserved));
|
|
|
|
|
}
|
|
|
|
|
int unicodeModDocStringLength = in.readInt();
|
|
|
|
|
readUnicodeString(in, unicodeModDocStringLength);
|
|
|
|
|
// do something with this at some point
|
|
|
|
|
break;
|
|
|
|
|
case MODULE_OFFSET:
|
|
|
|
|
int modOffsetSz = in.readInt();
|
|
|
|
|
//should be 4
|
|
|
|
|
readModuleMetadataFromDirStream(in, streamName, modules);
|
|
|
|
|
break;
|
|
|
|
|
case PROJECT_MODULES:
|
|
|
|
|
dirState = DIR_STATE.MODULES_RECORD;
|
|
|
|
|
in.readInt();//size must == 2
|
|
|
|
|
in.readShort();//number of modules
|
|
|
|
|
break;
|
|
|
|
|
case REFERENCE_CONTROL_A:
|
|
|
|
|
int szTwiddled = in.readInt();
|
|
|
|
|
trySkip(in, szTwiddled);
|
|
|
|
|
int nextRecord = in.readShort();
|
|
|
|
|
//reference name is optional!
|
|
|
|
|
if (nextRecord == RecordType.REFERENCE_NAME.id) {
|
|
|
|
|
readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
|
|
|
|
|
nextRecord = in.readShort();
|
|
|
|
|
}
|
|
|
|
|
if (nextRecord != 0x30) {
|
|
|
|
|
throw new IOException("Expected 0x30 as Reserved3 in a ReferenceControl record");
|
2016-07-02 04:39:53 +00:00
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
int szExtended = in.readInt();
|
|
|
|
|
trySkip(in, szExtended);
|
|
|
|
|
break;
|
|
|
|
|
case MODULE_TERMINATOR:
|
|
|
|
|
int endOfModulesReserved = in.readInt();
|
|
|
|
|
//must be 0;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
if (type.getConstantLength() > -1) {
|
|
|
|
|
trySkip(in, type.getConstantLength());
|
|
|
|
|
} else {
|
|
|
|
|
int recordLength = in.readInt();
|
2016-07-09 08:05:43 +00:00
|
|
|
trySkip(in, recordLength);
|
2016-07-09 05:39:56 +00:00
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
break;
|
2016-07-09 08:05:43 +00:00
|
|
|
}
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
} catch (final IOException e) {
|
|
|
|
|
throw new IOException(
|
|
|
|
|
"Error occurred while reading macros at section id "
|
|
|
|
|
+ recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
|
2016-07-02 04:39:53 +00:00
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-30 13:25:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
|
|
|
|
|
Charset charset, int reservedByte) throws IOException {
|
|
|
|
|
return readStringPair(in, charset, reservedByte, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in,
|
|
|
|
|
Charset charset, int reservedByte,
|
|
|
|
|
boolean throwOnUnexpectedReservedByte) throws IOException {
|
2017-09-14 02:22:55 +00:00
|
|
|
int nameLength = in.readInt();
|
|
|
|
|
String ascii = readString(in, nameLength, charset);
|
|
|
|
|
int reserved = in.readShort();
|
2018-10-30 13:25:20 +00:00
|
|
|
|
2017-09-14 02:22:55 +00:00
|
|
|
if (reserved != reservedByte) {
|
2018-10-30 13:25:20 +00:00
|
|
|
if (throwOnUnexpectedReservedByte) {
|
|
|
|
|
throw new IOException("Expected " + Integer.toHexString(reservedByte) + "after name before Unicode name, but found: " +
|
|
|
|
|
Integer.toHexString(reserved));
|
|
|
|
|
} else {
|
|
|
|
|
return new ASCIIUnicodeStringPair(ascii, reserved);
|
|
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
}
|
|
|
|
|
int unicodeNameRecordLength = in.readInt();
|
|
|
|
|
String unicode = readUnicodeString(in, unicodeNameRecordLength);
|
|
|
|
|
return new ASCIIUnicodeStringPair(ascii, unicode);
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-29 15:48:44 +00:00
|
|
|
private static void readNameMapRecords(InputStream is, Map<String, String> moduleNames, Charset charset) throws IOException {
|
|
|
|
|
//see 2.3.3 PROJECTwm Stream: Module Name Information
|
|
|
|
|
//multibytecharstring
|
|
|
|
|
String mbcs = null;
|
|
|
|
|
String unicode = null;
|
|
|
|
|
do {
|
|
|
|
|
try {
|
|
|
|
|
mbcs = readMBCS(is, charset);
|
|
|
|
|
} catch (EOFException e) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (mbcs == null) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
unicode = readUnicode(is);
|
|
|
|
|
} catch (EOFException e) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (mbcs != null && unicode != null) {
|
|
|
|
|
moduleNames.put(mbcs, unicode);
|
|
|
|
|
}
|
|
|
|
|
} while (mbcs != null && unicode != null);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static String readUnicode(InputStream is) throws IOException {
|
|
|
|
|
//reads null-terminated unicode string
|
|
|
|
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
|
|
|
|
int b0 = is.read();
|
|
|
|
|
int b1 = is.read();
|
|
|
|
|
|
|
|
|
|
while ((b0 + b1) != 0) {
|
|
|
|
|
if (b0 == -1 || b1 == -1) {
|
|
|
|
|
throw new EOFException();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bos.write(b0);
|
|
|
|
|
bos.write(b1);
|
|
|
|
|
b0 = is.read();
|
|
|
|
|
b1 = is.read();
|
|
|
|
|
}
|
|
|
|
|
return new String (bos.toByteArray(), StandardCharsets.UTF_16LE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//returns a string if any bytes are read or null if two 0x00 are read
|
|
|
|
|
private static String readMBCS(InputStream is, Charset charset) throws IOException {
|
|
|
|
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
|
|
|
|
int len = 0;
|
|
|
|
|
int b = is.read();
|
|
|
|
|
while (b != 0) {
|
|
|
|
|
++len;
|
|
|
|
|
if (b == -1) {
|
|
|
|
|
throw new EOFException();
|
|
|
|
|
}
|
|
|
|
|
bos.write(b);
|
|
|
|
|
b = is.read();
|
|
|
|
|
}
|
|
|
|
|
if (len == 0) {
|
|
|
|
|
b = is.read();
|
|
|
|
|
if (b == -1) {
|
|
|
|
|
throw new EOFException();
|
|
|
|
|
}
|
|
|
|
|
if (b != 0) {
|
|
|
|
|
LOGGER.log(POILogger.WARN, "expected two 0x00 at end of module name map");
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
return new String(bos.toByteArray(), charset);
|
|
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
|
|
|
|
|
*
|
|
|
|
|
* @param stream the inputstream to read from
|
|
|
|
|
* @param length number of bytes to read from stream
|
|
|
|
|
* @param charset the character set encoding of the bytes in the stream
|
|
|
|
|
* @return a java String in the supplied character set
|
|
|
|
|
* @throws IOException If reading from the stream fails
|
|
|
|
|
*/
|
|
|
|
|
private static String readString(InputStream stream, int length, Charset charset) throws IOException {
|
|
|
|
|
byte[] buffer = IOUtils.safelyAllocate(length, 20000);
|
|
|
|
|
int bytesRead = IOUtils.readFully(stream, buffer);
|
|
|
|
|
if (bytesRead != length) {
|
|
|
|
|
throw new IOException("Tried to read: "+length +
|
|
|
|
|
", but could only read: "+bytesRead);
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
2017-09-14 02:22:55 +00:00
|
|
|
return new String(buffer, 0, length, charset);
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|
2016-10-18 15:47:38 +00:00
|
|
|
|
2018-10-29 15:48:44 +00:00
|
|
|
protected void readProjectProperties(DocumentInputStream dis,
|
|
|
|
|
Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
|
2017-10-07 06:11:12 +00:00
|
|
|
InputStreamReader reader = new InputStreamReader(dis, modules.charset);
|
|
|
|
|
StringBuilder builder = new StringBuilder();
|
|
|
|
|
char[] buffer = new char[512];
|
|
|
|
|
int read;
|
|
|
|
|
while ((read = reader.read(buffer)) >= 0) {
|
|
|
|
|
builder.append(buffer, 0, read);
|
|
|
|
|
}
|
|
|
|
|
String properties = builder.toString();
|
2018-10-29 15:48:44 +00:00
|
|
|
//the module name map names should be in exactly the same order
|
|
|
|
|
//as the module names here. See 2.3.3 PROJECTwm Stream.
|
|
|
|
|
//At some point, we might want to enforce that.
|
2017-10-07 06:11:12 +00:00
|
|
|
for (String line : properties.split("\r\n|\n\r")) {
|
|
|
|
|
if (!line.startsWith("[")) {
|
|
|
|
|
String[] tokens = line.split("=");
|
2018-08-08 17:30:02 +00:00
|
|
|
if (tokens.length > 1 && tokens[1].length() > 1
|
|
|
|
|
&& tokens[1].startsWith("\"") && tokens[1].endsWith("\"")) {
|
|
|
|
|
// Remove any double quotes
|
|
|
|
|
tokens[1] = tokens[1].substring(1, tokens[1].length() - 1);
|
2017-10-07 06:11:12 +00:00
|
|
|
}
|
2018-10-29 15:48:44 +00:00
|
|
|
if ("Document".equals(tokens[0]) && tokens.length > 1) {
|
2017-10-07 06:11:12 +00:00
|
|
|
String mn = tokens[1].substring(0, tokens[1].indexOf("/&H"));
|
2018-10-29 15:48:44 +00:00
|
|
|
ModuleImpl module = getModule(mn, moduleNameMap, modules);
|
|
|
|
|
if (module != null) {
|
|
|
|
|
module.moduleType = ModuleType.Document;
|
|
|
|
|
} else {
|
|
|
|
|
LOGGER.log(POILogger.WARN, "couldn't find module with name: "+mn);
|
|
|
|
|
}
|
|
|
|
|
} else if ("Module".equals(tokens[0]) && tokens.length > 1) {
|
|
|
|
|
ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
|
|
|
|
|
if (module != null) {
|
|
|
|
|
module.moduleType = ModuleType.Module;
|
|
|
|
|
} else {
|
|
|
|
|
LOGGER.log(POILogger.WARN, "couldn't find module with name: "+tokens[1]);
|
|
|
|
|
}
|
|
|
|
|
} else if ("Class".equals(tokens[0]) && tokens.length > 1) {
|
|
|
|
|
ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
|
|
|
|
|
if (module != null) {
|
|
|
|
|
module.moduleType = ModuleType.Class;
|
|
|
|
|
} else {
|
|
|
|
|
LOGGER.log(POILogger.WARN, "couldn't find module with name: "+tokens[1]);
|
|
|
|
|
}
|
2017-10-07 06:11:12 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-10-29 15:48:44 +00:00
|
|
|
//can return null!
|
|
|
|
|
private ModuleImpl getModule(String moduleName, Map<String, String> moduleNameMap, ModuleMap moduleMap) {
|
|
|
|
|
if (moduleNameMap.containsKey(moduleName)) {
|
|
|
|
|
return moduleMap.get(moduleNameMap.get(moduleName));
|
|
|
|
|
}
|
|
|
|
|
return moduleMap.get(moduleName);
|
|
|
|
|
}
|
2017-10-07 06:11:12 +00:00
|
|
|
|
2016-10-18 15:47:38 +00:00
|
|
|
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
|
2017-09-14 02:22:55 +00:00
|
|
|
byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, 20000);
|
|
|
|
|
int bytesRead = IOUtils.readFully(in, buffer);
|
|
|
|
|
if (bytesRead != unicodeNameRecordLength) {
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
return new String(buffer, StringUtil.UTF16LE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sometimes the offset record in the dirstream is incorrect, but the macro can still be found.
|
|
|
|
|
* This will try to find the the first RLEDecompressing stream that starts with "Attribute".
|
|
|
|
|
* This relies on some, er, heuristics, admittedly.
|
|
|
|
|
*
|
|
|
|
|
* @param is full module inputstream to read
|
|
|
|
|
* @return uncompressed bytes if found, <code>null</code> otherwise
|
|
|
|
|
* @throws IOException for a true IOException copying the is to a byte array
|
|
|
|
|
*/
|
|
|
|
|
private static byte[] findCompressedStreamWBruteForce(InputStream is) throws IOException {
|
|
|
|
|
//buffer to memory for multiple tries
|
|
|
|
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
|
|
|
|
IOUtils.copy(is, bos);
|
|
|
|
|
byte[] compressed = bos.toByteArray();
|
|
|
|
|
byte[] decompressed = null;
|
|
|
|
|
for (int i = 0; i < compressed.length; i++) {
|
|
|
|
|
if (compressed[i] == 0x01 && i < compressed.length-1) {
|
|
|
|
|
int w = LittleEndian.getUShort(compressed, i+1);
|
|
|
|
|
if (w <= 0 || (w & 0x7000) != 0x3000) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
decompressed = tryToDecompress(new ByteArrayInputStream(compressed, i, compressed.length - i));
|
|
|
|
|
if (decompressed != null) {
|
|
|
|
|
if (decompressed.length > 9) {
|
|
|
|
|
//this is a complete hack. The challenge is that there
|
|
|
|
|
//can be many 0 length or junk streams that are uncompressed
|
|
|
|
|
//look in the first 20 characters for "Attribute"
|
|
|
|
|
int firstX = Math.min(20, decompressed.length);
|
|
|
|
|
String start = new String(decompressed, 0, firstX, StringUtil.WIN_1252);
|
|
|
|
|
if (start.contains("Attribute")) {
|
|
|
|
|
return decompressed;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return decompressed;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static byte[] tryToDecompress(InputStream is) {
|
|
|
|
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
|
|
|
|
try {
|
|
|
|
|
IOUtils.copy(new RLEDecompressingInputStream(is), bos);
|
2017-10-07 06:11:12 +00:00
|
|
|
} catch (IllegalArgumentException | IOException | IllegalStateException e){
|
2017-09-14 02:22:55 +00:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
return bos.toByteArray();
|
2016-10-18 15:47:38 +00:00
|
|
|
}
|
2016-07-02 04:28:41 +00:00
|
|
|
}
|