= dataBlockEndOffset )
+ {
+ realPicoffset -= UNKNOWN_HEADER_SIZE;
+ }
+ return realPicoffset;
}
- return height;
- }
- private static int getBigEndianInt(byte[] data, int offset)
- {
- return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
- }
+ private void fillJPGWidthHeight()
+ {
+ /*
+ * http://www.codecomments.com/archive281-2004-3-158083.html
+ *
+ * Algorhitm proposed by Patrick TJ McPhee:
+ *
+ * read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make
+ * sure the first one is 'ff'x if the second one is 'd9'x stop else if
+ * the second one is c0 or c2 (or possibly other values ...) skip 2
+ * bytes read one byte into depth read two bytes into height read two
+ * bytes into width else read two bytes into length skip forward
+ * length-2 bytes
+ *
+ * Also used Ruby code snippet from:
+ * http://www.bigbold.com/snippets/posts/show/805 for reference
+ */
+ int pointer = pictureBytesStartOffset + 2;
+ int firstByte = _dataStream[pointer];
+ int secondByte = _dataStream[pointer + 1];
- private static int getBigEndianShort(byte[] data, int offset)
- {
- return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF));
- }
+ int endOfPicture = pictureBytesStartOffset + size;
+ while ( pointer < endOfPicture - 1 )
+ {
+ do
+ {
+ firstByte = _dataStream[pointer];
+ secondByte = _dataStream[pointer + 1];
+ pointer += 2;
+ }
+ while ( !( firstByte == (byte) 0xFF ) && pointer < endOfPicture - 1 );
+
+ if ( firstByte == ( (byte) 0xFF ) && pointer < endOfPicture - 1 )
+ {
+ if ( secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA )
+ {
+ break;
+ }
+ else if ( ( secondByte & 0xF0 ) == 0xC0
+ && secondByte != (byte) 0xC4
+ && secondByte != (byte) 0xC8
+ && secondByte != (byte) 0xCC )
+ {
+ pointer += 5;
+ this.height = getBigEndianShort( _dataStream, pointer );
+ this.width = getBigEndianShort( _dataStream, pointer + 2 );
+ break;
+ }
+ else
+ {
+ pointer++;
+ pointer++;
+ int length = getBigEndianShort( _dataStream, pointer );
+ pointer += length;
+ }
+ }
+ else
+ {
+ pointer++;
+ }
+ }
+ }
+
+ private void fillPNGWidthHeight()
+ {
+ /*
+ * Used PNG file format description from
+ * http://www.wotsit.org/download.asp?f=png
+ */
+ int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
+ if ( matchSignature( _dataStream, IHDR, HEADER_START ) )
+ {
+ int IHDR_CHUNK_WIDTH = HEADER_START + 4;
+ this.width = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH );
+ this.height = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH + 4 );
+ }
+ }
+
+ /**
+ * returns pixel width of the picture or -1 if dimensions determining was
+ * failed
+ */
+ public int getWidth()
+ {
+ if ( width == -1 )
+ {
+ fillWidthHeight();
+ }
+ return width;
+ }
+
+ /**
+ * returns pixel height of the picture or -1 if dimensions determining was
+ * failed
+ */
+ public int getHeight()
+ {
+ if ( height == -1 )
+ {
+ fillWidthHeight();
+ }
+ return height;
+ }
+
+ private static int getBigEndianInt( byte[] data, int offset )
+ {
+ return ( ( ( data[offset] & 0xFF ) << 24 )
+ + ( ( data[offset + 1] & 0xFF ) << 16 )
+ + ( ( data[offset + 2] & 0xFF ) << 8 ) + ( data[offset + 3] & 0xFF ) );
+ }
+
+ private static int getBigEndianShort( byte[] data, int offset )
+ {
+ return ( ( ( data[offset] & 0xFF ) << 8 ) + ( data[offset + 1] & 0xFF ) );
+ }
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java
index d89f0e7bd6..5858c15e09 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java
@@ -36,11 +36,34 @@ public enum PictureType {
TIFF( "image/tiff", "tiff", new byte[][] { { 0x49, 0x49, 0x2A, 0x00 },
{ 0x4D, 0x4D, 0x00, 0x2A } } ),
+ UNKNOWN( "image/unknown", "", new byte[][] {} ),
+
WMF( "image/x-wmf", "wmf", new byte[][] {
{ (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 },
- { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } ),
+ { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } );
- UNKNOWN( "image/unknown", "", new byte[][] {} );
+ public static PictureType findMatchingType( byte[] pictureContent )
+ {
+ for ( PictureType pictureType : PictureType.values() )
+ for ( byte[] signature : pictureType.getSignatures() )
+ if ( matchSignature( pictureContent, signature ) )
+ return pictureType;
+
+ // TODO: DIB, PICT
+ return PictureType.UNKNOWN;
+ }
+
+ private static boolean matchSignature( byte[] pictureData, byte[] signature )
+ {
+ if ( pictureData.length < signature.length )
+ return false;
+
+ for ( int i = 0; i < signature.length; i++ )
+ if ( pictureData[i] != signature[i] )
+ return false;
+
+ return true;
+ }
private String _extension;
@@ -69,4 +92,12 @@ public enum PictureType {
{
return _signatures;
}
+
+ public boolean matchSignature( byte[] pictureData )
+ {
+ for ( byte[] signature : getSignatures() )
+ if ( matchSignature( signature, pictureData ) )
+ return true;
+ return false;
+ }
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java
index 4fd3ac216c..daa37eae87 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java
@@ -29,9 +29,8 @@ import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.usermodel.Picture;
+import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
-import org.w3c.dom.Element;
/**
* Test cases for {@link WordToHtmlConverter}
@@ -62,16 +61,21 @@ public class TestWordToHtmlConverter extends TestCase
Document newDocument = DocumentBuilderFactory.newInstance()
.newDocumentBuilder().newDocument();
- WordToHtmlConverter wordToHtmlConverter = !emulatePictureStorage ? new WordToHtmlConverter(
- newDocument ) : new WordToHtmlConverter( newDocument )
+ WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
+ newDocument );
+
+ if ( emulatePictureStorage )
{
- @Override
- protected void processImage( Element currentBlock, boolean inlined,
- Picture picture )
+ wordToHtmlConverter.setPicturesManager( new PicturesManager()
{
- processImage( currentBlock, inlined, picture, "picture.bin" );
- }
- };
+ public String savePicture( byte[] content,
+ PictureType pictureType, String suggestedName )
+ {
+ return suggestedName;
+ }
+ } );
+ }
+
wordToHtmlConverter.processDocument( hwpfDocument );
StringWriter stringWriter = new StringWriter();
@@ -172,20 +176,6 @@ public class TestWordToHtmlConverter extends TestCase
assertContains( result, "" );
}
- public void testPicture() throws Exception
- {
- String result = getHtmlText( "picture.doc", true );
-
- // picture
- assertContains( result, "src=\"picture.bin\"" );
- // visible size
- assertContains( result, "width:3.1305554in;height:1.7250001in;" );
- // shift due to crop
- assertContains( result, "left:-0.09375;top:-0.25694445;" );
- // size without crop
- assertContains( result, "width:3.4125in;height:2.325in;" );
- }
-
public void testHyperlink() throws Exception
{
String result = getHtmlText( "hyperlink.doc" );
@@ -201,14 +191,6 @@ public class TestWordToHtmlConverter extends TestCase
getHtmlText( "innertable.doc" );
}
- public void testTableMerges() throws Exception
- {
- String result = getHtmlText( "table-merges.doc" );
-
- assertContains( result, "| " );
- assertContains( result, " | " );
- }
-
public void testO_kurs_doc() throws Exception
{
getHtmlText( "o_kurs.doc" );
@@ -222,4 +204,33 @@ public class TestWordToHtmlConverter extends TestCase
assertContains( result, "" );
assertContains( result, "1" );
}
+
+ public void testPicture() throws Exception
+ {
+ String result = getHtmlText( "picture.doc", true );
+
+ // picture
+ assertContains( result, "src=\"0.emf\"" );
+ // visible size
+ assertContains( result, "width:3.1305554in;height:1.7250001in;" );
+ // shift due to crop
+ assertContains( result, "left:-0.09375;top:-0.25694445;" );
+ // size without crop
+ assertContains( result, "width:3.4125in;height:2.325in;" );
+ }
+
+ public void testPicturesEscher() throws Exception
+ {
+ String result = getHtmlText( "pictures_escher.doc", true );
+ assertContains( result, " " );
+ assertContains( result, " " );
+ }
+
+ public void testTableMerges() throws Exception
+ {
+ String result = getHtmlText( "table-merges.doc" );
+
+ assertContains( result, " | " );
+ assertContains( result, " | " );
+ }
}
diff --git a/test-data/document/pictures_escher.doc b/test-data/document/pictures_escher.doc
new file mode 100644
index 0000000000..4870bc7ab2
Binary files /dev/null and b/test-data/document/pictures_escher.doc differ
|