2004-04-09 13:05:39 +00:00
|
|
|
|
|
|
|
|
/* ====================================================================
|
2006-12-22 19:18:16 +00:00
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
|
the License. You may obtain a copy of the License at
|
2004-04-09 13:05:39 +00:00
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
|
limitations under the License.
|
|
|
|
|
==================================================================== */
|
|
|
|
|
|
2003-07-24 16:58:09 +00:00
|
|
|
package org.apache.poi.util;
|
|
|
|
|
|
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
|
|
|
import java.text.FieldPosition;
|
2004-08-23 08:52:54 +00:00
|
|
|
import java.text.NumberFormat;
|
2003-08-19 14:07:40 +00:00
|
|
|
/**
|
|
|
|
|
* Title: String Utility Description: Collection of string handling utilities
|
|
|
|
|
*
|
2003-07-24 16:58:09 +00:00
|
|
|
*
|
2003-08-19 14:07:40 +00:00
|
|
|
*@author Andrew C. Oliver
|
|
|
|
|
*@author Sergei Kozello (sergeikozello at mail.ru)
|
|
|
|
|
*@author Toshiaki Kamoshida (kamoshida.toshiaki at future dot co dot jp)
|
2003-12-04 22:26:01 +00:00
|
|
|
*@since May 10, 2002
|
2003-08-19 14:07:40 +00:00
|
|
|
*@version 1.0
|
2003-07-24 16:58:09 +00:00
|
|
|
*/
|
|
|
|
|
public class StringUtil {
|
2003-08-19 14:07:40 +00:00
|
|
|
private final static String ENCODING = "ISO-8859-1";
|
|
|
|
|
/**
|
|
|
|
|
* Constructor for the StringUtil object
|
|
|
|
|
*/
|
|
|
|
|
private StringUtil() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Given a byte array of 16-bit unicode characters in Little Endian
|
|
|
|
|
* format (most important byte last), return a Java String representation
|
|
|
|
|
* of it.
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
* { 0x16, 0x00 } -0x16
|
|
|
|
|
*
|
2004-08-23 08:52:54 +00:00
|
|
|
* @param string the byte array to be converted
|
|
|
|
|
* @param offset the initial offset into the
|
|
|
|
|
* byte array. it is assumed that string[ offset ] and string[ offset +
|
|
|
|
|
* 1 ] contain the first 16-bit unicode character
|
|
|
|
|
* @param len the length of the final string
|
|
|
|
|
* @return the converted string
|
|
|
|
|
* @exception ArrayIndexOutOfBoundsException if offset is out of bounds for
|
2003-08-19 14:07:40 +00:00
|
|
|
* the byte array (i.e., is negative or is greater than or equal to
|
|
|
|
|
* string.length)
|
2004-08-23 08:52:54 +00:00
|
|
|
* @exception IllegalArgumentException if len is too large (i.e.,
|
2003-08-19 14:07:40 +00:00
|
|
|
* there is not enough data in string to create a String of that
|
|
|
|
|
* length)
|
|
|
|
|
*/
|
|
|
|
|
public static String getFromUnicodeLE(
|
|
|
|
|
final byte[] string,
|
|
|
|
|
final int offset,
|
|
|
|
|
final int len)
|
|
|
|
|
throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
|
|
|
|
|
if ((offset < 0) || (offset >= string.length)) {
|
|
|
|
|
throw new ArrayIndexOutOfBoundsException("Illegal offset");
|
|
|
|
|
}
|
|
|
|
|
if ((len < 0) || (((string.length - offset) / 2) < len)) {
|
|
|
|
|
throw new IllegalArgumentException("Illegal length");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
return new String(string, offset, len * 2, "UTF-16LE");
|
|
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
throw new InternalError(); /*unreachable*/
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Given a byte array of 16-bit unicode characters in little endian
|
|
|
|
|
* format (most important byte last), return a Java String representation
|
|
|
|
|
* of it.
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
* { 0x16, 0x00 } -0x16
|
|
|
|
|
*
|
|
|
|
|
*@param string the byte array to be converted
|
|
|
|
|
*@return the converted string
|
|
|
|
|
*/
|
|
|
|
|
public static String getFromUnicodeLE(final byte[] string) {
|
2006-05-10 13:23:48 +00:00
|
|
|
if(string.length == 0) { return ""; }
|
2003-08-19 14:07:40 +00:00
|
|
|
return getFromUnicodeLE(string, 0, string.length / 2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Given a byte array of 16-bit unicode characters in big endian
|
|
|
|
|
* format (most important byte first), return a Java String representation
|
|
|
|
|
* of it.
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
* { 0x00, 0x16 } -0x16
|
|
|
|
|
*
|
|
|
|
|
*@param string the byte array to be converted
|
|
|
|
|
**@param offset the initial offset into the
|
|
|
|
|
* byte array. it is assumed that string[ offset ] and string[ offset +
|
|
|
|
|
* 1 ] contain the first 16-bit unicode character
|
2003-12-06 11:01:19 +00:00
|
|
|
*@param len the length of the final string
|
2003-08-19 14:07:40 +00:00
|
|
|
*@return the converted string
|
|
|
|
|
*@exception ArrayIndexOutOfBoundsException if offset is out of bounds for
|
|
|
|
|
* the byte array (i.e., is negative or is greater than or equal to
|
|
|
|
|
* string.length)
|
|
|
|
|
*@exception IllegalArgumentException if len is too large (i.e.,
|
|
|
|
|
* there is not enough data in string to create a String of that
|
|
|
|
|
* length)
|
|
|
|
|
*/
|
|
|
|
|
public static String getFromUnicodeBE(
|
|
|
|
|
final byte[] string,
|
|
|
|
|
final int offset,
|
|
|
|
|
final int len)
|
|
|
|
|
throws ArrayIndexOutOfBoundsException, IllegalArgumentException {
|
|
|
|
|
if ((offset < 0) || (offset >= string.length)) {
|
|
|
|
|
throw new ArrayIndexOutOfBoundsException("Illegal offset");
|
|
|
|
|
}
|
|
|
|
|
if ((len < 0) || (((string.length - offset) / 2) < len)) {
|
|
|
|
|
throw new IllegalArgumentException("Illegal length");
|
|
|
|
|
}
|
|
|
|
|
try {
|
|
|
|
|
return new String(string, offset, len * 2, "UTF-16BE");
|
|
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
throw new InternalError(); /*unreachable*/
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Given a byte array of 16-bit unicode characters in big endian
|
|
|
|
|
* format (most important byte first), return a Java String representation
|
|
|
|
|
* of it.
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
* { 0x00, 0x16 } -0x16
|
|
|
|
|
*
|
|
|
|
|
*@param string the byte array to be converted
|
|
|
|
|
*@return the converted string
|
|
|
|
|
*/
|
|
|
|
|
public static String getFromUnicodeBE(final byte[] string) {
|
2006-05-10 13:23:48 +00:00
|
|
|
if(string.length == 0) { return ""; }
|
2003-08-19 14:07:40 +00:00
|
|
|
return getFromUnicodeBE(string, 0, string.length / 2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java
|
|
|
|
|
* String and return.
|
|
|
|
|
* (In Excel terms, read compressed 8 bit unicode as a string)
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
* @param string byte array to read
|
|
|
|
|
* @param offset offset to read byte array
|
|
|
|
|
* @param len length to read byte array
|
|
|
|
|
* @return String generated String instance by reading byte array
|
|
|
|
|
*/
|
|
|
|
|
public static String getFromCompressedUnicode(
|
|
|
|
|
final byte[] string,
|
|
|
|
|
final int offset,
|
|
|
|
|
final int len) {
|
|
|
|
|
try {
|
2007-12-04 16:55:56 +00:00
|
|
|
int len_to_use = Math.min(len, string.length - offset);
|
|
|
|
|
return new String(string, offset, len_to_use, "ISO-8859-1");
|
2003-08-19 14:07:40 +00:00
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
throw new InternalError(); /* unreachable */
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2005-05-16 14:44:37 +00:00
|
|
|
/**
|
|
|
|
|
* Takes a unicode (java) string, and returns it as 8 bit data (in ISO-8859-1
|
|
|
|
|
* codepage).
|
|
|
|
|
* (In Excel terms, write compressed 8 bit unicode)
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
*@param input the String containing the data to be written
|
|
|
|
|
*@param output the byte array to which the data is to be written
|
|
|
|
|
*@param offset an offset into the byte arrat at which the data is start
|
|
|
|
|
* when written
|
|
|
|
|
*/
|
|
|
|
|
public static void putCompressedUnicode(
|
|
|
|
|
final String input,
|
|
|
|
|
final byte[] output,
|
|
|
|
|
final int offset) {
|
|
|
|
|
try {
|
|
|
|
|
byte[] bytes = input.getBytes("ISO-8859-1");
|
|
|
|
|
System.arraycopy(bytes, 0, output, offset, bytes.length);
|
|
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
throw new InternalError(); /*unreachable*/
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Takes a unicode string, and returns it as little endian (most
|
|
|
|
|
* important byte last) bytes in the supplied byte array.
|
|
|
|
|
* (In Excel terms, write uncompressed unicode)
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
*@param input the String containing the unicode data to be written
|
2005-05-16 14:44:37 +00:00
|
|
|
*@param output the byte array to hold the uncompressed unicode, should be twice the length of the String
|
2003-08-19 14:07:40 +00:00
|
|
|
*@param offset the offset to start writing into the byte array
|
|
|
|
|
*/
|
|
|
|
|
public static void putUnicodeLE(
|
|
|
|
|
final String input,
|
|
|
|
|
final byte[] output,
|
|
|
|
|
final int offset) {
|
|
|
|
|
try {
|
|
|
|
|
byte[] bytes = input.getBytes("UTF-16LE");
|
|
|
|
|
System.arraycopy(bytes, 0, output, offset, bytes.length);
|
|
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
throw new InternalError(); /*unreachable*/
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Takes a unicode string, and returns it as big endian (most
|
|
|
|
|
* important byte first) bytes in the supplied byte array.
|
|
|
|
|
* (In Excel terms, write uncompressed unicode)
|
2003-08-19 14:07:40 +00:00
|
|
|
*
|
|
|
|
|
*@param input the String containing the unicode data to be written
|
2005-05-16 14:44:37 +00:00
|
|
|
*@param output the byte array to hold the uncompressed unicode, should be twice the length of the String
|
2003-08-19 14:07:40 +00:00
|
|
|
*@param offset the offset to start writing into the byte array
|
|
|
|
|
*/
|
|
|
|
|
public static void putUnicodeBE(
|
|
|
|
|
final String input,
|
|
|
|
|
final byte[] output,
|
|
|
|
|
final int offset) {
|
|
|
|
|
try {
|
|
|
|
|
byte[] bytes = input.getBytes("UTF-16BE");
|
|
|
|
|
System.arraycopy(bytes, 0, output, offset, bytes.length);
|
|
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
throw new InternalError(); /*unreachable*/
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Apply printf() like formatting to a string.
|
|
|
|
|
* Primarily used for logging.
|
|
|
|
|
*@param message the string with embedded formatting info
|
|
|
|
|
* eg. "This is a test %2.2"
|
|
|
|
|
*@param params array of values to format into the string
|
|
|
|
|
*@return The formatted string
|
2003-08-19 14:07:40 +00:00
|
|
|
*/
|
|
|
|
|
public static String format(String message, Object[] params) {
|
|
|
|
|
int currentParamNumber = 0;
|
|
|
|
|
StringBuffer formattedMessage = new StringBuffer();
|
|
|
|
|
for (int i = 0; i < message.length(); i++) {
|
|
|
|
|
if (message.charAt(i) == '%') {
|
|
|
|
|
if (currentParamNumber >= params.length) {
|
|
|
|
|
formattedMessage.append("?missing data?");
|
|
|
|
|
} else if (
|
|
|
|
|
(params[currentParamNumber] instanceof Number)
|
|
|
|
|
&& (i + 1 < message.length())) {
|
|
|
|
|
i
|
|
|
|
|
+= matchOptionalFormatting(
|
|
|
|
|
(Number) params[currentParamNumber++],
|
|
|
|
|
message.substring(i + 1),
|
|
|
|
|
formattedMessage);
|
|
|
|
|
} else {
|
|
|
|
|
formattedMessage.append(
|
|
|
|
|
params[currentParamNumber++].toString());
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if ((message.charAt(i) == '\\')
|
|
|
|
|
&& (i + 1 < message.length())
|
|
|
|
|
&& (message.charAt(i + 1) == '%')) {
|
|
|
|
|
formattedMessage.append('%');
|
|
|
|
|
i++;
|
|
|
|
|
} else {
|
|
|
|
|
formattedMessage.append(message.charAt(i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return formattedMessage.toString();
|
|
|
|
|
}
|
|
|
|
|
|
2005-05-16 14:44:37 +00:00
|
|
|
|
2003-08-19 14:07:40 +00:00
|
|
|
private static int matchOptionalFormatting(
|
|
|
|
|
Number number,
|
|
|
|
|
String formatting,
|
|
|
|
|
StringBuffer outputTo) {
|
|
|
|
|
NumberFormat numberFormat = NumberFormat.getInstance();
|
|
|
|
|
if ((0 < formatting.length())
|
|
|
|
|
&& Character.isDigit(formatting.charAt(0))) {
|
|
|
|
|
numberFormat.setMinimumIntegerDigits(
|
|
|
|
|
Integer.parseInt(formatting.charAt(0) + ""));
|
|
|
|
|
if ((2 < formatting.length())
|
|
|
|
|
&& (formatting.charAt(1) == '.')
|
|
|
|
|
&& Character.isDigit(formatting.charAt(2))) {
|
|
|
|
|
numberFormat.setMaximumFractionDigits(
|
|
|
|
|
Integer.parseInt(formatting.charAt(2) + ""));
|
|
|
|
|
numberFormat.format(number, outputTo, new FieldPosition(0));
|
|
|
|
|
return 3;
|
|
|
|
|
}
|
|
|
|
|
numberFormat.format(number, outputTo, new FieldPosition(0));
|
|
|
|
|
return 1;
|
|
|
|
|
} else if (
|
|
|
|
|
(0 < formatting.length()) && (formatting.charAt(0) == '.')) {
|
|
|
|
|
if ((1 < formatting.length())
|
|
|
|
|
&& Character.isDigit(formatting.charAt(1))) {
|
|
|
|
|
numberFormat.setMaximumFractionDigits(
|
|
|
|
|
Integer.parseInt(formatting.charAt(1) + ""));
|
|
|
|
|
numberFormat.format(number, outputTo, new FieldPosition(0));
|
|
|
|
|
return 2;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
numberFormat.format(number, outputTo, new FieldPosition(0));
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* @return the encoding we want to use, currently hardcoded to ISO-8859-1
|
2003-08-19 14:07:40 +00:00
|
|
|
*/
|
|
|
|
|
public static String getPreferredEncoding() {
|
|
|
|
|
return ENCODING;
|
|
|
|
|
}
|
2005-04-28 14:03:28 +00:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* check the parameter has multibyte character
|
|
|
|
|
*
|
|
|
|
|
* @param value string to check
|
|
|
|
|
* @return boolean result
|
|
|
|
|
* true:string has at least one multibyte character
|
|
|
|
|
*/
|
|
|
|
|
public static boolean hasMultibyte(String value){
|
|
|
|
|
if( value == null )return false;
|
|
|
|
|
for(int i = 0 ; i < value.length() ; i++ ){
|
|
|
|
|
char c = value.charAt(i);
|
|
|
|
|
if(c > 0xFF )return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2005-05-16 14:44:37 +00:00
|
|
|
* Checks to see if a given String needs to be represented as Unicode
|
|
|
|
|
* @param value
|
|
|
|
|
* @return true if string needs Unicode to be represented.
|
|
|
|
|
*/
|
2005-05-02 05:26:13 +00:00
|
|
|
public static boolean isUnicodeString(final String value) {
|
2005-04-28 14:03:28 +00:00
|
|
|
try {
|
2005-05-02 05:26:13 +00:00
|
|
|
return !value.equals(new String(value.getBytes("ISO-8859-1"), "ISO-8859-1"));
|
2005-04-28 14:03:28 +00:00
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2003-07-24 16:58:09 +00:00
|
|
|
}
|