mirror of
https://github.com/apache/poi.git
synced 2026-02-27 20:40:08 +08:00
[bug-66532] more performant way to iterate over codepoints. Thanks to Matthias Raschhofer
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1908458 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3e6dddaa95
commit
0275daa5de
@ -31,6 +31,7 @@ import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Iterator;
|
||||
import java.util.PrimitiveIterator;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
@ -397,37 +398,39 @@ public class SheetDataWriter implements Closeable {
|
||||
return;
|
||||
}
|
||||
|
||||
for (Iterator<String> iter = CodepointsUtil.iteratorFor(s); iter.hasNext(); ) {
|
||||
String codepoint = iter.next();
|
||||
int codepoint;
|
||||
for (PrimitiveIterator.OfInt iter = CodepointsUtil.primitiveIterator(s); iter.hasNext(); ) {
|
||||
codepoint = iter.nextInt();
|
||||
switch (codepoint) {
|
||||
case "<":
|
||||
case '<':
|
||||
_out.write("<");
|
||||
break;
|
||||
case ">":
|
||||
case '>':
|
||||
_out.write(">");
|
||||
break;
|
||||
case "&":
|
||||
case '&':
|
||||
_out.write("&");
|
||||
break;
|
||||
case "\"":
|
||||
case '\"':
|
||||
_out.write(""");
|
||||
break;
|
||||
// Special characters
|
||||
case "\n":
|
||||
case '\n':
|
||||
_out.write("
");
|
||||
break;
|
||||
case "\r":
|
||||
case '\r':
|
||||
_out.write("
");
|
||||
break;
|
||||
case "\t":
|
||||
case '\t':
|
||||
_out.write("	");
|
||||
break;
|
||||
case "\u00A0": // NO-BREAK SPACE
|
||||
case '\u00A0': // NO-BREAK SPACE
|
||||
_out.write(" ");
|
||||
break;
|
||||
default:
|
||||
if (codepoint.length() == 1) {
|
||||
char c = codepoint.charAt(0);
|
||||
final char[] chars = Character.toChars(codepoint);
|
||||
if (chars.length == 1) {
|
||||
char c = chars[0];
|
||||
// YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks.
|
||||
// the same rule applies to "not a character" symbols.
|
||||
if (replaceWithQuestionMark(c)) {
|
||||
@ -436,7 +439,7 @@ public class SheetDataWriter implements Closeable {
|
||||
_out.write(c);
|
||||
}
|
||||
} else {
|
||||
_out.write(codepoint);
|
||||
_out.write(chars);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -18,12 +18,28 @@
|
||||
package org.apache.poi.util;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.PrimitiveIterator;
|
||||
|
||||
@Internal
|
||||
public class CodepointsUtil {
|
||||
|
||||
/**
|
||||
* @param text to iterate over
|
||||
* @return iterator with Strings representing the codepoints
|
||||
* @see #primitiveIterator(String) a more performnt iterator
|
||||
*/
|
||||
public static Iterator<String> iteratorFor(String text) {
|
||||
return text.codePoints()
|
||||
.mapToObj(codePoint -> new String(Character.toChars(codePoint)))
|
||||
.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param text to iterate over
|
||||
* @return iterator with ints representing the codepoints
|
||||
* @since POI 5.2.4
|
||||
*/
|
||||
public static PrimitiveIterator.OfInt primitiveIterator(String text) {
|
||||
return text.codePoints().iterator();
|
||||
}
|
||||
}
|
||||
@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.function.IntConsumer;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@ -38,8 +39,22 @@ class TestCodepointsUtil {
|
||||
List<String> codePoints = new ArrayList<>();
|
||||
CodepointsUtil.iteratorFor(unicodeSurrogates).forEachRemaining(codePoints::add);
|
||||
assertEquals(17, codePoints.size());
|
||||
for(String point : codePoints){
|
||||
assertTrue(point.length() >=1 && point.length() <= 2, "codepoint " + point + "is wrong size");
|
||||
for (String point : codePoints) {
|
||||
assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint " + point + "is wrong size");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPrimitiveIterator() {
|
||||
final String unicodeSurrogates = "\uD835\uDF4A\uD835\uDF4B\uD835\uDF4C\uD835\uDF4D\uD835\uDF4E"
|
||||
+ "abcdef123456";
|
||||
List<String> codePoints = new ArrayList<>();
|
||||
CodepointsUtil.primitiveIterator(unicodeSurrogates).forEachRemaining((IntConsumer) (i) -> {
|
||||
codePoints.add(new String(Character.toChars(i)));
|
||||
});
|
||||
assertEquals(17, codePoints.size());
|
||||
for (String point : codePoints) {
|
||||
assertTrue(point.length() >= 1 && point.length() <= 2, "codepoint " + point + "is wrong size");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user