Make sure OLE2ScratchpadExtractorFacory is sorted first

Otherwise order of found extractors would depend on jar-loading order
and thus might have unexpected side-effects and missing features in
text-extraction.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1914407 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2023-12-06 19:50:02 +00:00
parent 497482d4dc
commit c8c8130ae3
2 changed files with 17 additions and 1 deletions

View File

@ -88,6 +88,21 @@ public final class ExtractorFactory {
private ExtractorFactory() {
ClassLoader cl = ExtractorFactory.class.getClassLoader();
ServiceLoader.load(ExtractorProvider.class, cl).forEach(provider::add);
// loading of service-files is non-deterministic as it depends on order of loaded jars
// however we would like to "prefer" one Factory, so let's make sure the more
// powerful "ScratchpadProvider" is sorted first
provider.sort((o1, o2) -> {
if (o1.getClass() != o2.getClass()) {
if (o1.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) {
return -1;
} else if (o2.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) {
return 1;
}
}
return o1.getClass().getName().compareTo(o2.getClass().getName());
});
}
/**

View File

@ -70,7 +70,8 @@ public interface ExtractorProvider {
* @param dirs a list to be filled with directory references holding embedded
* @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries
*
* @throws IOException when the format specific extraction fails because of invalid entires
* @throws IOException when the format specific extraction fails because of invalid entries
* @throws java.lang.IllegalArgumentException if implementations do not overwrite this method
*/
default void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
throw new IllegalArgumentException("Error checking for Scratchpad embedded resources");