From d20fa4430554760967f193cafc3e1a07fb28ced0 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sat, 19 Dec 2020 19:45:09 +0000 Subject: [PATCH] [github-198] Remove jdk.charset module dependency for spreadsheets generation. Thanks to Robert Marcano. This closes #198 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1884631 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/util/CodePageUtil.java | 25 -------- src/java/org/apache/poi/util/StringUtil.java | 1 - .../scratchpad/test9/module-info.class | Bin 2652 -> 2690 bytes .../scratchpad/test9/module-info.java | 1 + .../org/apache/poi/hwpf/HWPFOldDocument.java | 4 +- .../poi/hwpf/model/OldTextPieceTable.java | 6 +- .../org/apache/poi/hwpf/model/TextPiece.java | 6 +- .../apache/poi/hwpf/util/DoubleByteUtil.java | 59 ++++++++++++++++++ .../hwpf}/util/LittleEndianCP950Reader.java | 10 ++- .../util/TestLittleEndianCP950Reader.java | 3 +- 10 files changed, 76 insertions(+), 39 deletions(-) create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java rename src/{java/org/apache/poi => scratchpad/src/org/apache/poi/hwpf}/util/LittleEndianCP950Reader.java (98%) rename src/{testcases/org/apache/poi => scratchpad/testcases/org/apache/poi/hwpf}/util/TestLittleEndianCP950Reader.java (98%) diff --git a/src/java/org/apache/poi/util/CodePageUtil.java b/src/java/org/apache/poi/util/CodePageUtil.java index da8f8a9842..2c1480253d 100644 --- a/src/java/org/apache/poi/util/CodePageUtil.java +++ b/src/java/org/apache/poi/util/CodePageUtil.java @@ -31,8 +31,6 @@ import java.util.Set; public class CodePageUtil { - public static final Set DOUBLE_BYTE_CHARSETS = Collections.singleton(StringUtil.BIG5); - /**

Codepage 037, a special case

*/ public static final int CP_037 = 37; @@ -446,27 +444,4 @@ public class CodePageUtil return "cp" + codepage; } } - - /** - * This tries to convert a LE byte array in cp950 - * (Microsoft's dialect of Big5) to a String. - * We know MS zero-padded ascii, and we drop those. - * There may be areas for improvement in this. - * - * @param data - * @param offset - * @param lengthInBytes - * @return Decoded String - */ - public static String cp950ToString(byte[] data, int offset, int lengthInBytes) { - StringBuilder sb = new StringBuilder(); - LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes); - int c = reader.read(); - while (c != -1) { - sb.append((char)c); - c = reader.read(); - } - reader.close(); - return sb.toString(); - } } diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index a0778a3efa..d281c63386 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -34,7 +34,6 @@ public final class StringUtil { public static final Charset UTF16LE = StandardCharsets.UTF_16LE; public static final Charset UTF8 = StandardCharsets.UTF_8; public static final Charset WIN_1252 = Charset.forName("cp1252"); - public static final Charset BIG5 = Charset.forName("Big5"); private StringUtil() { // no instances of this class diff --git a/src/multimodule/scratchpad/test9/module-info.class b/src/multimodule/scratchpad/test9/module-info.class index cfda7bb49527ea644a890b9504f41002075dc286..e74671e4fb1d2f31a2b76fed8288ef8680d75711 100644 GIT binary patch literal 2690 zcmai$>3Z8l6ox+)8n?7%sSb2+Ln);cVnIq+3uQ}4A*H2e-(`6cDUmEy8runL*q4jo zLioW2a8W#*Id&9V8iRkl8tXeVXU;NX|NYOOe*sv)?+q+Dn7NjAda-ixnu}!@S6#gB z;tdxoF0K*YBwQ!lAiPDmNoWz?CcHy}M&;d6pdXcGcL zhoA_ngf&7)hzRS14MI#v2wg%-$OyLxIiVnELXXfV3<#TqEy5RsF9}}}z9xJ__?ECu z_>S;>13xtIV*@`m@N)ydH1KN!zd1ONNJMi{cQtizEX~)Ne&z?EYG!HFEP~wEL74d+ z2m9CkzTa&7g>rDXV#l@)4vIlX6&pIu92^SLBuTqPGx2qJ$-&zOco#s4-Du;JD2o2z%WP7HgplGFvG8b{NXh z>S9JIvOKZGVWc4K_EpY1ouZG$S*wbf-z!wk-KFATUrkim=p(I`qf+xCZjdv!UCUHY zZ)M7sq(QRk$;cG~(Ea zbKOj|g^@-PUzn#OfTZy$h}s~K3ZWq*mXBwv6qum=E_@ctt^$ZN+;(klh}#LymvM6*VIkb zg*)wn}*r-|V*kN(eir=H$2k0y+_0f+z33_lL`@Yg?Zn|z(Y?b~Ls7qiS6GekqU zPq<&WfIH-QKzLAiNO-64F5%t6dxVFDu8@TH3hxsh5grvD6CM|y5S|pC5}p>G5zY$l z7oHV9AUr30P|8bHb+36Fw$g#51xCOY#3*Wn6}Uc^hxm+=Z$=T$8J3)xEa9RL6T literal 2652 zcmZ{mX@A>95XPSkG^X^R)Pmk^D5aD_ypXn>g#v9t3Mnl$_bp4CNQq>r(%Mcq!_6n) z!|;L+z=z_;?ATEpWtQo>_roR{3VT=mzTfKjrDuR(svpHt{gk^~hhr05d~1ui)3e6I2_bj5Lb*=~xydlheMaF#J-0KK zwlIELdU#C8>>S35dtBk96ocCDT2oMvClpRg9%el-xs6F(4^Ju_mwbY#*Igs~Q$l}( zoceoO=#LD;{$>@9)}uOOnHQ-CEi~t>LDub?f@=5h+ys}g{x3~%>6NCSaW#pDE5rHR zF&Hk<$ozt!U}e@ZP1Z{aXQVzjfVLaCA&FiQBpjs6x-dajgdSd%#2|ZE_yZ5uq&CPp z*2WvVT`C=PmY6vY*M)7$JG4#n!Zwj%xJ_^E4jegS72U&4K|<#;lJFgcnYuugS>Fpf zu_c9W2}P92sEFHwe9pKf-+Mxl&OTOTTNw1A%l4puFd@vC&L0UI<-&B5IQQ_0poFuI zl=w{HLS2cn?ydb`?;blr29!OfdX>(_ukperT zZ~f6UuS{?mlRK8=rYPg*%9?P(QGGi13wl3Efm8@$$YHqcnwB2uTXVLq=}wIK z^INH;7u!n<)q_>4Ulh*Oo}*Mwd3`hWSIk}0rKVh4zwMSGa_1!bF4C&z|A*kewX1rO zRy+JR0Y$%9hJOn8(d}QjpKqrzz3CPYV21dhTVg-+AoCFOG7j_AV?N3}$~?|I$vn-R zVV-54W1eSTU|wWC!)!61WzI1#GoNR^$b6Z3mH8U;b>^GQ8_WgfBJ*wL67yZ=9p+u; z`^*oSA2UB?e$M=o`8Bh{44KQ!h`Gv4m|bShESQ$4a1i~`b$xW5g6mA-5L{R83fhHl RX!RX_Aa3GkOyO7j{y)V{>HYu! diff --git a/src/multimodule/scratchpad/test9/module-info.java b/src/multimodule/scratchpad/test9/module-info.java index 9d406fd36e..2aaef57a0a 100644 --- a/src/multimodule/scratchpad/test9/module-info.java +++ b/src/multimodule/scratchpad/test9/module-info.java @@ -82,6 +82,7 @@ module org.apache.poi.scratchpad { exports org.apache.poi.hemf.hemfplus.extractor to junit; exports org.apache.poi.hslf to junit; exports org.apache.poi.hwmf to junit; + exports org.apache.poi.hwpf.util to junit; opens org.apache.poi.hwpf.model to org.mockito; opens org.apache.poi.hwpf.model.types to org.mockito; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java index 238e0406f8..a8fb4078a7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java @@ -37,7 +37,7 @@ import org.apache.poi.hwpf.model.TextPieceTable; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.util.CodePageUtil; +import org.apache.poi.hwpf.util.DoubleByteUtil; import org.apache.poi.util.IOUtils; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.NotImplemented; @@ -176,7 +176,7 @@ public class HWPFOldDocument extends HWPFDocumentCore { _fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin(), MAX_RECORD_LENGTH); int numChars = textData.length; - if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) { + if (DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) { numChars /= 2; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java index e3cb94c868..e6a0887f32 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldTextPieceTable.java @@ -20,7 +20,7 @@ import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collections; -import org.apache.poi.util.CodePageUtil; +import org.apache.poi.hwpf.util.DoubleByteUtil; import org.apache.poi.util.IOUtils; import org.apache.poi.util.Internal; @@ -73,7 +73,7 @@ public class OldTextPieceTable extends TextPieceTable { boolean unicode = pieces[x].isUnicode(); int multiple = 1; if (unicode || - (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) { + (charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) { multiple = 2; } @@ -106,7 +106,7 @@ public class OldTextPieceTable extends TextPieceTable { @Override protected int getEncodingMultiplier(TextPiece textPiece) { Charset charset = textPiece.getPieceDescriptor().getCharset(); - if (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) { + if (charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) { return 2; } return 1; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java index 5c9fcf70d9..0c606cbf03 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java @@ -20,7 +20,7 @@ package org.apache.poi.hwpf.model; import java.nio.charset.Charset; -import org.apache.poi.util.CodePageUtil; +import org.apache.poi.hwpf.util.DoubleByteUtil; import org.apache.poi.util.Internal; import org.apache.poi.util.StringUtil; @@ -77,8 +77,8 @@ public class TextPiece extends PropertyNode { * Create the StringBuilder from the text and unicode flag */ private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) { - if (StringUtil.BIG5.equals(pd.getCharset())) { - return new StringBuilder(CodePageUtil.cp950ToString(text, 0, text.length)); + if (DoubleByteUtil.BIG5.equals(pd.getCharset())) { + return new StringBuilder(DoubleByteUtil.cp950ToString(text, 0, text.length)); } String str = new String(text, 0, text.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset()); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java b/src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java new file mode 100644 index 0000000000..5d55711ed9 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/util/DoubleByteUtil.java @@ -0,0 +1,59 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.util; + +import java.nio.charset.Charset; +import java.util.Collections; +import java.util.Set; + +/** + * Utilities for working with double byte CodePages. + * + *

Provides constants for understanding numeric codepages, + * along with utilities to translate these into Java Character Sets.

+ */ +public class DoubleByteUtil +{ + + public static final Charset BIG5 = Charset.forName("Big5"); + + public static final Set DOUBLE_BYTE_CHARSETS = Collections.singleton(BIG5); + + /** + * This tries to convert a LE byte array in cp950 + * (Microsoft's dialect of Big5) to a String. + * We know MS zero-padded ascii, and we drop those. + * There may be areas for improvement in this. + * + * @param data + * @param offset + * @param lengthInBytes + * @return Decoded String + */ + public static String cp950ToString(byte[] data, int offset, int lengthInBytes) { + StringBuilder sb = new StringBuilder(); + LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes); + int c = reader.read(); + while (c != -1) { + sb.append((char)c); + c = reader.read(); + } + reader.close(); + return sb.toString(); + } +} diff --git a/src/java/org/apache/poi/util/LittleEndianCP950Reader.java b/src/scratchpad/src/org/apache/poi/hwpf/util/LittleEndianCP950Reader.java similarity index 98% rename from src/java/org/apache/poi/util/LittleEndianCP950Reader.java rename to src/scratchpad/src/org/apache/poi/hwpf/util/LittleEndianCP950Reader.java index 61808afcaa..195629bb04 100644 --- a/src/java/org/apache/poi/util/LittleEndianCP950Reader.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/util/LittleEndianCP950Reader.java @@ -15,13 +15,18 @@ limitations under the License. ==================================================================== */ -package org.apache.poi.util; +package org.apache.poi.hwpf.util; + import java.io.IOException; import java.io.Reader; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharsetDecoder; +import org.apache.poi.util.Internal; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + /** * Stream that converts CP950 (MSOffice's dialect of Big5), with * zero-byte padding for ASCII and in LittleEndianOrder. @@ -31,11 +36,10 @@ public class LittleEndianCP950Reader extends Reader { private static final POILogger LOGGER = POILogFactory.getLogger(LittleEndianCP950Reader.class); - private static final char UNMAPPABLE = '?'; private final ByteBuffer doubleByteBuffer = ByteBuffer.allocate(2); private final CharBuffer charBuffer = CharBuffer.allocate(2); - private final CharsetDecoder decoder = StringUtil.BIG5.newDecoder(); + private final CharsetDecoder decoder = DoubleByteUtil.BIG5.newDecoder(); //https://en.wikipedia.org/wiki/Code_page_950 //see private use area diff --git a/src/testcases/org/apache/poi/util/TestLittleEndianCP950Reader.java b/src/scratchpad/testcases/org/apache/poi/hwpf/util/TestLittleEndianCP950Reader.java similarity index 98% rename from src/testcases/org/apache/poi/util/TestLittleEndianCP950Reader.java rename to src/scratchpad/testcases/org/apache/poi/hwpf/util/TestLittleEndianCP950Reader.java index ef648e4f00..b6c7fd201a 100644 --- a/src/testcases/org/apache/poi/util/TestLittleEndianCP950Reader.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/util/TestLittleEndianCP950Reader.java @@ -15,8 +15,7 @@ limitations under the License. ==================================================================== */ -package org.apache.poi.util; - +package org.apache.poi.hwpf.util; import static org.junit.Assert.assertEquals;