From d72e3a608ce4b46490542f85b80c3b5409488357 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 16 Jan 2026 09:16:09 +0100 Subject: [PATCH] Add a file-handler for .emf files These can be embedded in slideshows and POI supports some handling of them. Also Tika uses this code to extract some textual information from such images. --- .../apache/poi/stress/FileHandlerKnown.java | 1 + .../apache/poi/stress/HEMFFileHandler.java | 85 ++++++++++++++++++ test-data/spreadsheet/stress.xls | Bin 76288 -> 76800 bytes 3 files changed, 86 insertions(+) create mode 100644 poi-integration/src/test/java/org/apache/poi/stress/HEMFFileHandler.java diff --git a/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java b/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java index dc2af11ba9..1e113c714a 100644 --- a/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java +++ b/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java @@ -29,6 +29,7 @@ public enum FileHandlerKnown { HSLF, HSMF, HSSF, + HEMF, HWMF, HWPF, OPC, diff --git a/poi-integration/src/test/java/org/apache/poi/stress/HEMFFileHandler.java b/poi-integration/src/test/java/org/apache/poi/stress/HEMFFileHandler.java new file mode 100644 index 0000000000..dd0e5fbe27 --- /dev/null +++ b/poi-integration/src/test/java/org/apache/poi/stress/HEMFFileHandler.java @@ -0,0 +1,85 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertInstanceOf; + +import java.awt.geom.Rectangle2D; +import java.awt.image.BufferedImage; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; + +import org.apache.poi.hemf.record.emf.HemfRecord; +import org.apache.poi.hemf.record.emf.HemfRecordType; +import org.apache.poi.hemf.record.emf.HemfText; +import org.apache.poi.hemf.usermodel.HemfPicture; +import org.junit.jupiter.api.Test; + +public class HEMFFileHandler implements FileHandler { + + @Override + public void handleExtracting(File file) throws Exception { + try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) { + HemfPicture picture = new HemfPicture(stream); + + // mimic a bit what e.g. Tika does to extract some information from .emf files + for (HemfRecord record : picture.getRecords()) { + if (record.getEmfRecordType().equals(HemfRecordType.extTextOutW)) { + assertInstanceOf(HemfText.EmfExtTextOutW.class, record); + HemfText.EmfExtTextOutW textOut = (HemfText.EmfExtTextOutW) record; + textOut.getText(StandardCharsets.UTF_16LE); + } else if (record.getEmfRecordType().equals(HemfRecordType.extTextOutA)) { + assertInstanceOf(HemfText.EmfExtTextOutA.class, record); + HemfText.EmfExtTextOutA textOut = (HemfText.EmfExtTextOutA) record; + textOut.getText(StandardCharsets.UTF_8); + } + } + } + } + + @Override + public void handleAdditional(File file) throws Exception { + // no additional checks for now + } + + @Override + public void handleFile(InputStream stream, String path) throws Exception { + HemfPicture picture = new HemfPicture(stream); + + for (HemfRecord record : picture.getRecords()) { + record.getEmfRecordType(); + record.getGenericRecordType(); + } + + BufferedImage dest = new BufferedImage(256, 256, BufferedImage.TYPE_INT_ARGB); + picture.draw(dest.createGraphics(), new Rectangle2D.Double(0, 0, 256, 256)); + } + + @Test + void test() throws Exception { + String file = "test-data/slideshow/wrench.emf"; + + try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) { + handleFile(stream, file); + } + + handleExtracting(new File(file)); + } +} diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls index a9d24e82448271e58e7f61816761f62f8882f172..a03baec0813e4b2f5fb87c33305666dad0360a45 100644 GIT binary patch delta 3649 zcmZ9Odr;I>6vyvfc42ue%ko-XL>2=bAm8|0Q4kReNeK;+=d$dgV>}Ej9miR#F;kmH z+h*=K4dWjT)58jta??tC6PX#MX`*GC`3UooW@$+0+&%kqe+$DL?&o*T=X-yT{lWU9 z+Vv;2TgMqZjJ{$R+qOF=C(@G{5uvp*rtfNr%;ZD*8t`}oD_HGtwzMs8Zff1YV%Wt2 z&z`6kbnRh`jqJ)dNf3RjqB-I;XX%xoLEhv%%iB{7Gk5%(5`us7V>;Hq8f-Kb%0ihb>HcxZ=B6%v^MwsOwCmcA+YjsE&K+1q?Ha9_Xv^`#diWq{1HFqM1s$2fRF( zRVimOX%ChD8Q{2wG^GpoFo#xEau0rAmFk?Tr0*H>Rr!5xRwad-6$J|4rY%)V;XBGI z@T97xa4rd}{X$kPeWS@&&C%CMh8pSHrK~ucWtI;Fo^g#7!c!oxdW|Bik;2;wN1e+wqSskt#JYxpUa5eq%TGiRtO1F_5wTin|<*t>R#l6_&Mt9Rj?b79>Q?g6f zNpjg0mz`_fur+wB&)pb&6AYq*%tdpDb`|70cT{1XSK8M_3 zD|7pz9V($i5jvz0pAtMrhZN#1##Ox(zNf4LZ*{%&h0_M>6<@vdZKM;f=QOaQ0TvePAPmJj^PK)sR*4ap;K;f9~oTIw;xwxkn@~`t&n?faB$H7^D?fzB^utz z-4V>Cy8kX!pi2pKsRCUx@B<2Lkbxg6tH2XzkiL)L7@k0b;%o5vqUn+}NZ|pI$oXZF zVdWNK(Q+eaB2&vP(Rj&00GA$r6KkzJ#k*3=3*qG#asRcYgBFvwN5@~)7!T5BaLUX+ z7Hjo9gMT7A^gN4yDtfVeCI+y4E;7QQd?6NN`BFR`&XahzXvgxE*as!K8)OII&WEuO zW`S?5d=Bs(Xl6a)x^KV`0Rvx)3@FM#xN0Cm4jdA8?CcinuzVx72M$E42Jo>rv%{j- zH*f{Zw<0_;Aiuh{JyIE>|caVc=Xq#B5o z13!q=D4CFh<)|o!qMUS`Y9L+?92483_!R!5=!t@hd|ceZazdmUp!_6?v78iZu=I%z z19YAer?H$C3DE$57I|2H5sg^Rc(+IMof`hDC@{hDtXPTVoM<<}#Bbslmh-|83*~oF z5DUjHh?Q6_igqlQL=TqB;ue-aL~0z*GWOF0lO-RRE21KfCma7H_#nZJ0QptX0R#M+ z=*79~)H0S@BI9|6?FPYdJ_xG=@D3ZtH1hHBKS1N9`g8zx&=^dor91^%eV1!QjA)QHZCqj4? zNjS>~y~h)IyT+y=nybpn4a&-sSs~P$2fbrJ*MQbF%hX^Jm0gNK}Fu_8AHUq&TRa8+>)O;BgO}+C;umi0|g2e<6 zA&epT2*KMJWg@sh6}2EJYN3pZrQU@kSTGE%aRf^Uu0ywn$pkA@Q58W^l`?8L^;VLg1_Z_wf>i{!AhZ&!Cg??&O0b6Dx#7IgID%lU-6vxk9c41kT-GyaYT0mIS@YUp70m1c+iVqSK(kPzIOkS)@xnn@D2E^()S= z7nb1itD~$<9@r%SZs9hT2mv0jVr4ipVOAK;%$U)B!)q2ug%`Q59HR$zN3*_^a>10g z^zMNz9@xXK3e}4)1Mw=9Ye+PsUU+qQje1SxtWx!gP%5>FD-~sJ)Jc?$(F`dq@DjJL zQkcEbtnbKG3NxJxs{~>0jAk^<%UoBjD6d2_qC~b-ttfb$Y7^Cpl6VbMtH}j5SYl~g z)3dIHc2AiHBA6O&Pc;hTWK5015QXY$Roy;rU#sdGqHA>BPe(J_1m0_0N1NDBbuEag zQJ%oji#pXlkqhfo`v|5^5as~a)oYY`MQP@`dW}-AD0Gmwpv4CD3PT4%ds?b6 zuST=J2eVXR;u*SBVV;a;G|WM+YfzL>G$Tsn$}}j-q3~J-G-#LxHOy;V*r+gvqgmf! z8vA0PQKK{}$`QHQEG|4_5)+cTdwO~ze{DE%cgR0L1clt=7e1F6?*-HfP04T|c9w{d z&s&7gK@9w?A{qa)$Zo&L3Uyk<416fqdOyzS5$;eRPP|}pw)31+<%QSf<#;hz9F-Y1 zF;u)EizvJ)S5kOO?xgUxJdPpv9j=R2bz}L0#)1hc$8hZ^9!o65fDhkhbx8OS!L@hg z@C1=Aj>%#Sx$iN>s!^;O#i~*0AieOu+!>~{6XgS@*ffewqu4Y`f}(sVU3N9gFbugJ zOreLS7a}{dYZSXip*!3QAIa7*2N5QQ`H9EI;fE|=JC65q@AWK4gMofLkQ zDQ>LzNfuN1Sw2qTtZaAV+Bw-t;TM^bg5g&gpm1I;qtGe0VaWfD_c>JU^8#i^@rgPV z_epnOD|0G#;rM|Ha;sns=nZJ4$&kvv(9XD@+xc&c9h4&EowiHeF$Hj7A+}pwi9kg{$lx zt9i#7yyKMjANG#pW?eMFH5SLS==0(P|FW2?QF9H{JVpJFy?L59&*05h-s|km=Vm*w znezsV6Ikq^32w4jpiv79RQgWhgnUXu;Irl?*f_D<2fQw(1EKI4TL_DTH=Ww&RnK(fri*s3Ajbf97#d#Wao`E`FQJw6a zuX*Piyd}!(Vs8mI>!t~kSiGCXA^mWIWEK}_)CC4=siL~sTdH|W4c>*yo5J3O+-xs4 zb9z|3hsD!0K`M)jH0mM)6@PZ%t>|TMndU7sco!>g8haOWv#Knd;5HVQu(%P$CLfFC z8nxU&tx(i-_Eu=#3WK*&c{A8s$;}+uI6)?hRV?O_*pJ0(78|q0^873oYqVB1hE}zz zRW^HTBdz>M^|M$PX+>gx7V9Ie{9XYb#g uEK72PTbAVryR|vB|6}b>oDpq)bBb)u5m&_1^5H>Zo$1-M(DgwgHs*i2lXyP>