From 1910fff53edf7eb5da010ff642a38392cce9704c Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 16 Jan 2026 08:56:42 +0100 Subject: [PATCH] Add a file-handler for .wmf files These can be embedded in slideshows and POI supports some handling of them. Also Tika uses this code to extract some textual information from such images. --- .../apache/poi/stress/FileHandlerKnown.java | 1 + .../apache/poi/stress/HWMFFileHandler.java | 93 ++++++++++++++++++ test-data/spreadsheet/stress.xls | Bin 75776 -> 76288 bytes 3 files changed, 94 insertions(+) create mode 100644 poi-integration/src/test/java/org/apache/poi/stress/HWMFFileHandler.java diff --git a/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java b/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java index c3f1e11137..dc2af11ba9 100644 --- a/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java +++ b/poi-integration/src/test/java/org/apache/poi/stress/FileHandlerKnown.java @@ -29,6 +29,7 @@ public enum FileHandlerKnown { HSLF, HSMF, HSSF, + HWMF, HWPF, OPC, POIFS, diff --git a/poi-integration/src/test/java/org/apache/poi/stress/HWMFFileHandler.java b/poi-integration/src/test/java/org/apache/poi/stress/HWMFFileHandler.java new file mode 100644 index 0000000000..3429844bfb --- /dev/null +++ b/poi-integration/src/test/java/org/apache/poi/stress/HWMFFileHandler.java @@ -0,0 +1,93 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertInstanceOf; + +import java.awt.image.BufferedImage; +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.nio.charset.Charset; + +import org.apache.poi.hwmf.record.HwmfFont; +import org.apache.poi.hwmf.record.HwmfRecord; +import org.apache.poi.hwmf.record.HwmfRecordType; +import org.apache.poi.hwmf.record.HwmfText; +import org.apache.poi.hwmf.usermodel.HwmfPicture; +import org.apache.poi.util.LocaleUtil; +import org.junit.jupiter.api.Test; + +public class HWMFFileHandler implements FileHandler { + + @Override + public void handleExtracting(File file) throws Exception { + try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) { + HwmfPicture picture = new HwmfPicture(stream); + Charset charset = LocaleUtil.CHARSET_1252; + + // mimic a bit what e.g. Tika does to extract some information from .wmf files + for (HwmfRecord record : picture.getRecords()) { + if (record.getWmfRecordType().equals(HwmfRecordType.createFontIndirect)) { + HwmfFont font = ((HwmfText.WmfCreateFontIndirect) record).getFont(); + charset = (font.getCharset() == null || font.getCharset().getCharset() == null) ? + LocaleUtil.CHARSET_1252 : font.getCharset().getCharset(); + } + + if (record.getWmfRecordType().equals(HwmfRecordType.extTextOut)) { + assertInstanceOf(HwmfText.WmfExtTextOut.class, record); + HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut) record; + textOut.getText(charset); + } else if (record.getWmfRecordType().equals(HwmfRecordType.textOut)) { + assertInstanceOf(HwmfText.WmfTextOut.class, record); + HwmfText.WmfTextOut textOut = (HwmfText.WmfTextOut) record; + textOut.getText(charset); + } + } + } + } + + @Override + public void handleAdditional(File file) throws Exception { + // no additional checks for now + } + + @Override + public void handleFile(InputStream stream, String path) throws Exception { + HwmfPicture picture = new HwmfPicture(stream); + + for (HwmfRecord record : picture.getRecords()) { + record.getWmfRecordType(); + record.getGenericRecordType(); + } + + BufferedImage dest = new BufferedImage(256, 256, BufferedImage.TYPE_INT_ARGB); + picture.draw(dest.createGraphics()); + } + + @Test + void test() throws Exception { + String file = "test-data/slideshow/santa.wmf"; + + try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) { + handleFile(stream, file); + } + + handleExtracting(new File(file)); + } +} diff --git a/test-data/spreadsheet/stress.xls b/test-data/spreadsheet/stress.xls index 17d8e2afca1f562c9b7449a61fad3b66e016c031..a9d24e82448271e58e7f61816761f62f8882f172 100644 GIT binary patch delta 3314 zcmZ9Odr(wW9LLYO>@Lf)>@K@JmdC1AbUhGj3ar&&fsFRjD}{m$7R;JNI`&-1;X^ZES__p(dp zfUfhLZvUL9CkTGo2-*Ka-WId7+Q$xlVLJqsr!t+yzu zAYXYbwRdm%rGyR#A#IKRdQV$J%dXj3bJNpvX6Ihl7zPw}_A!}Owo_aOdqPA`K z2UK^K+nt_~t=8@giXh;r&s8&W+_AS?y#7Yd-5HtOQw)LzkGjiWA8b%tTGa-R-`4UOradYXf3qrb@??A`7Q*WdXtDS-o+;d~dT z5b~^xkbCn8Nsc4rY84C+O~`dGA@`*A_B2+}j^Rp}3cl`x9TZ-U=!vyL6w+y$Scj1? zG8{3n))5^Uo;tDC6FnJjheqf#5Cd@}O)0*#;@sM_0ae=J&7Z);x(_Z935YA zixEjLz1`xa;}#RN_foETt?T&c-~ol^#O{Mlw?;=t$NqKkb>AeF_R{WW=yV#^eU>I# z_%1jz?1um8U%E!aY1g5q3Ra<`w&>y8ezJg3M_)ik4=L7TI33>@PmZ(6;o$3MW#M!N zeVw)Q@&s6aesgo z{$$3Ju@jjki&?Tn3Ck(=e8_T|TTY|JB3Uew#Ufd(!txP~2zGVv{U3$zqc% z@QVghf5i5OEYESv09x#l#V%Rwk_Fy*z;gCLq!=ZcmycOaB+aIuus|f-{d4RfFQ2lL zynM#4@bWoR%}~BzWo971WDoFio*m=iSL`w`7g$Ubl&|~pqv&CUe#3^Wu>6)eZBV{r z#WvXSJ=@L859~!=F0vth?MLQ}hH{A&^YRl5@bWV|$jfDR63YBv@R&2iF$ZDQ4{t9s z;F$d38+-g_ilhCC?933$X4$EW4zV&j=ze4EP^`m<%tqvholdv@j^P{(2VtM@4-Dr@ zjpu3_&l8RRM0TFU&eO2zg8d8GbZpk_pf2m*7-nGD1EKEheBD+vx7i!o=g1v?8q7d7+Czd{?SVu9<#}V#wQdbA;7jJO| z5LyX;0{I6;0mSj*gMz;q7D&Hng`x#~1_-iH8mCY*PLW_0WQ!!Wh_mddlSW&^5GcmU z&ckFr9fpggU5hmxN<;@evLzB*qG6W^)`091Y*w8BwzU60LDl3{{)4686Kh0y23aD!ysps{Wg)&yiXO6*1ryGgK# z$Zo=BXZQq37;eUJginx+;TFldMPscNRu{6>5?ifdw+c1|*{#@YkBc^0Q!(5&Hh~K! zP%*5LtTh^It*}l)wpL0(%C8ofdvXFI3R O`;{JN-^dhd2>Tx>S5~Y5 delta 2470 zcmZ9OdrVt(6vuz3v{0a>6e#8WSY(Qr$aFZcLnw?f1q6f*kRi@ZqcIGLuPuZOW9I&F ziEJdEEQa`pAsY9GnN6(t0H2#U=f>Qe%H|8FY>KlwoDQL$b9)NhdmHklpZk44d;91= zW$r&=9w@Z$Q*y*B_1vM-4qM8|159F6)sC(AKl0c^-P<4Ct>RV6+~DiBy_UUEN@e*f z?cQ?Jbe8X#y7Q^N_y?mb*X>d3>5Pqn{%e%#%T&r*s?@74rJ7rnI!WanR6CjFE9+_n zXP}Wz(9uT;pqp#(mF?NyK=cwdE3*pZE#0>$iwf*WUQ9)*NaadjI#1)^@>#p>1HRH= z17Iq^FPNVLX_#gP2PnMW3~6!OAc7h|Z3Fr(0()~H8+*(!{k#c^oeRNWF#NNhYzzL} z72pSKvp}aa5VN#p;Vu8T1zs{a{Gil4Jy_i8R&Qdj74l&OhpkWmZ{Y;X+c?ki4(3Ob zyo(!IM)9d=NQW^TVtEfoNHWK$Y?R8PRD?>Sn^~*2Qn_2bkAYCY9z%f-FrQ>G5UmUF zU*lFE;udy}VIRv!cwl)TRu|x>&84EI#Cx$(gMeDpAFW)=QwVo z2l+V8vYfzNJIMsrv7E%6EMMTTot&re9Ls5R#S#1xy)0+2i{&f-fjBs5g0Hd4Nq4`& z9+pWQa#G?fo@P0R_5_k|u_}SieTO|P-{TO=4>->9BhIo+VQwN6IDV1~ra-%3&SQHb zWIBEpc!R(_1nK%6rT|Ri1m`Y@eiyh}H?`U@RisT_5^s?dWV@*K4}rG`tYH`sxJKZU z1U-KWEY?jG8>ULMslUWqA_d2}^|HWQ1p#MxV44HOa6V2y67#xPZ@O+|{gRtjF^)+m8>0*^AZ3cOR` z6`qO~Sg)I^H%v8XQ!(Ogkb?RQYK;}xD6p5IP2gPuUnc0W3vAL&H5sOwwW&DqHcLT} zTO9&73M|MZ7%y;>z!riYr@$87REuGXKFR5IkRaYxDR_-r69u*joMf0J@NR)oSv2Jm z*shyuH%x8Trjo_GSqeI-z~L6yA@Es-DFW{i=x3NJuv6ev7IZn%1l}9&%qC~Lz%AiU zh8Y6y3wJWi6u33qnGId^^`bEcGWb)G4tjxT2YL`M7%yldk@