mirror of
https://github.com/apache/poi.git
synced 2026-02-27 20:40:08 +08:00
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1847429 13f79535-47bb-0310-9956-ffa450edef68
180 lines
6.0 KiB
Java
180 lines
6.0 KiB
Java
/* ====================================================================
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.poifs.filesystem;
|
|
|
|
import static org.apache.poi.poifs.common.POIFSConstants.OOXML_FILE_HEADER;
|
|
import static org.apache.poi.poifs.common.POIFSConstants.RAW_XML_FILE_HEADER;
|
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.File;
|
|
import java.io.FileInputStream;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
|
|
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
|
import org.apache.poi.util.IOUtils;
|
|
import org.apache.poi.util.LittleEndian;
|
|
import org.apache.poi.util.LocaleUtil;
|
|
|
|
/**
|
|
* The file magic number, i.e. the file identification based on the first bytes
|
|
* of the file
|
|
*/
|
|
public enum FileMagic {
|
|
/** OLE2 / BIFF8+ stream used for Office 97 and higher documents */
|
|
OLE2(HeaderBlockConstants._signature),
|
|
/** OOXML / ZIP stream */
|
|
OOXML(OOXML_FILE_HEADER),
|
|
/** XML file */
|
|
XML(RAW_XML_FILE_HEADER),
|
|
/** BIFF2 raw stream - for Excel 2 */
|
|
BIFF2(new byte[]{
|
|
0x09, 0x00, // sid=0x0009
|
|
0x04, 0x00, // size=0x0004
|
|
0x00, 0x00, // unused
|
|
0x70, 0x00 // 0x70 = multiple values
|
|
}),
|
|
/** BIFF3 raw stream - for Excel 3 */
|
|
BIFF3(new byte[]{
|
|
0x09, 0x02, // sid=0x0209
|
|
0x06, 0x00, // size=0x0006
|
|
0x00, 0x00, // unused
|
|
0x70, 0x00 // 0x70 = multiple values
|
|
}),
|
|
/** BIFF4 raw stream - for Excel 4 */
|
|
BIFF4(new byte[]{
|
|
0x09, 0x04, // sid=0x0409
|
|
0x06, 0x00, // size=0x0006
|
|
0x00, 0x00, // unused
|
|
0x70, 0x00 // 0x70 = multiple values
|
|
},new byte[]{
|
|
0x09, 0x04, // sid=0x0409
|
|
0x06, 0x00, // size=0x0006
|
|
0x00, 0x00, // unused
|
|
0x00, 0x01
|
|
}),
|
|
/** Old MS Write raw stream */
|
|
MSWRITE(
|
|
new byte[]{0x31, (byte)0xbe, 0x00, 0x00 },
|
|
new byte[]{0x32, (byte)0xbe, 0x00, 0x00 }),
|
|
/** RTF document */
|
|
RTF("{\\rtf"),
|
|
/** PDF document */
|
|
PDF("%PDF"),
|
|
/** Some different HTML documents */
|
|
HTML("<!DOCTYP".getBytes(UTF_8), "<html".getBytes(UTF_8), "<HTML".getBytes(UTF_8)),
|
|
WORD2(new byte[]{ (byte)0xdb, (byte)0xa5, 0x2d, 0x00}),
|
|
// keep UNKNOWN always as last enum!
|
|
/** UNKNOWN magic */
|
|
UNKNOWN(new byte[0]);
|
|
|
|
final byte[][] magic;
|
|
|
|
FileMagic(long magic) {
|
|
this.magic = new byte[1][8];
|
|
LittleEndian.putLong(this.magic[0], 0, magic);
|
|
}
|
|
|
|
FileMagic(byte[]... magic) {
|
|
this.magic = magic;
|
|
}
|
|
|
|
FileMagic(String magic) {
|
|
this(magic.getBytes(LocaleUtil.CHARSET_1252));
|
|
}
|
|
|
|
public static FileMagic valueOf(byte[] magic) {
|
|
for (FileMagic fm : values()) {
|
|
for (byte[] ma : fm.magic) {
|
|
if (findMagic(ma, magic)) {
|
|
return fm;
|
|
}
|
|
}
|
|
}
|
|
return UNKNOWN;
|
|
}
|
|
|
|
private static boolean findMagic(byte[] cmp, byte[] actual) {
|
|
int i=0;
|
|
for (byte m : cmp) {
|
|
byte d = actual[i++];
|
|
if (!(d == m || (m == 0x70 && (d == 0x10 || d == 0x20 || d == 0x40)))) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Get the file magic of the supplied {@link File}<p>
|
|
*
|
|
* Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean,
|
|
* that the ZIP stream has leading junk bytes
|
|
*
|
|
* @param inp a file to be identified
|
|
*/
|
|
public static FileMagic valueOf(final File inp) throws IOException {
|
|
try (FileInputStream fis = new FileInputStream(inp)) {
|
|
final byte[] data = IOUtils.toByteArray(fis, 8);
|
|
return FileMagic.valueOf(data);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Get the file magic of the supplied InputStream (which MUST
|
|
* support mark and reset).<p>
|
|
*
|
|
* If unsure if your InputStream does support mark / reset,
|
|
* use {@link #prepareToCheckMagic(InputStream)} to wrap it and make
|
|
* sure to always use that, and not the original!<p>
|
|
*
|
|
* Even if this method returns {@link FileMagic#UNKNOWN} it could potentially mean,
|
|
* that the ZIP stream has leading junk bytes
|
|
*
|
|
* @param inp An InputStream which supports either mark/reset
|
|
*/
|
|
public static FileMagic valueOf(InputStream inp) throws IOException {
|
|
if (!inp.markSupported()) {
|
|
throw new IOException("getFileMagic() only operates on streams which support mark(int)");
|
|
}
|
|
|
|
// Grab the first 8 bytes
|
|
byte[] data = IOUtils.peekFirst8Bytes(inp);
|
|
|
|
return FileMagic.valueOf(data);
|
|
}
|
|
|
|
|
|
/**
|
|
* Checks if an {@link InputStream} can be reset (i.e. used for checking the header magic) and wraps it if not
|
|
*
|
|
* @param stream stream to be checked for wrapping
|
|
* @return a mark enabled stream
|
|
*/
|
|
public static InputStream prepareToCheckMagic(InputStream stream) {
|
|
if (stream.markSupported()) {
|
|
return stream;
|
|
}
|
|
// we used to process the data via a PushbackInputStream, but user code could provide a too small one
|
|
// so we use a BufferedInputStream instead now
|
|
return new BufferedInputStream(stream);
|
|
}
|
|
} |