Improved how HSMF handles multiple recipients

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@898295 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-01-12 12:02:18 +00:00
parent ababd504b5
commit 2a4805f938
17 changed files with 817 additions and 217 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.7-SNAPSHOT" date="2010-??-??">
<action dev="POI-DEVELOPERS" type="add">Improved how HSMF handles multiple recipients</action>
<action dev="POI-DEVELOPERS" type="add">Add PublisherTextExtractor support to ExtractorFactory</action>
<action dev="POI-DEVELOPERS" type="add">Add XSLF support for text extraction from tables</action>
<action dev="POI-DEVELOPERS" type="add">Support attachments as embeded documents within the new OutlookTextExtractor</action>

View File

@ -20,6 +20,7 @@ package org.apache.poi.util;
import java.io.UnsupportedEncodingException;
import java.text.FieldPosition;
import java.text.NumberFormat;
import java.util.Iterator;
import org.apache.poi.hssf.record.RecordInputStream;
/**
@ -392,4 +393,30 @@ public class StringUtil {
return true;
}
}
/**
* An Iterator over an array of Strings.
*/
public static class StringsIterator implements Iterator<String> {
private String[] strings;
private int position = 0;
public StringsIterator(String[] strings) {
if(strings != null) {
this.strings = strings;
} else {
this.strings = new String[0];
}
}
public boolean hasNext() {
return position < strings.length;
}
public String next() {
int ourPos = position++;
if(ourPos >= strings.length)
throw new ArrayIndexOutOfBoundsException(ourPos);
return strings[ourPos];
}
public void remove() {}
}
}

View File

@ -23,14 +23,17 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import org.apache.poi.POIDocument;
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter;
import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.hsmf.parsers.POIFSChunkParser;
@ -48,7 +51,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class MAPIMessage extends POIDocument {
private Chunks mainChunks;
private NameIdChunks nameIdChunks;
private RecipientChunks recipientChunks;
private RecipientChunks[] recipientChunks;
private AttachmentChunks[] attachmentChunks;
private boolean returnNullOnMissingChunk = false;
@ -102,6 +105,7 @@ public class MAPIMessage extends POIDocument {
// Grab interesting bits
ArrayList<AttachmentChunks> attachments = new ArrayList<AttachmentChunks>();
ArrayList<RecipientChunks> recipients = new ArrayList<RecipientChunks>();
for(ChunkGroup group : chunkGroups) {
// Should only ever be one of these
if(group instanceof Chunks) {
@ -109,7 +113,7 @@ public class MAPIMessage extends POIDocument {
} else if(group instanceof NameIdChunks) {
nameIdChunks = (NameIdChunks)group;
} else if(group instanceof RecipientChunks) {
recipientChunks = (RecipientChunks)group;
recipients.add( (RecipientChunks)group );
}
// Add to list(s)
@ -118,6 +122,12 @@ public class MAPIMessage extends POIDocument {
}
}
attachmentChunks = attachments.toArray(new AttachmentChunks[attachments.size()]);
recipientChunks = recipients.toArray(new RecipientChunks[recipients.size()]);
// Now sort these chunks lists so they're in ascending order,
// rather than in random filesystem order
Arrays.sort(attachmentChunks, new AttachmentChunksSorter());
Arrays.sort(recipientChunks, new RecipientChunksSorter());
}
@ -154,15 +164,6 @@ public class MAPIMessage extends POIDocument {
return getStringFromChunk(mainChunks.subjectChunk);
}
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* @throws ChunkNotFoundException
*/
public String getDisplayTo() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayToChunk);
}
/**
* Gets the display value of the "FROM" line of the outlook message
* This is not the actual address that was sent from but the formated display of the user name.
@ -173,8 +174,25 @@ public class MAPIMessage extends POIDocument {
}
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* Gets the display value of the "TO" line of the outlook message.
* If there are multiple recipients, they will be separated
* by semicolons.
* This is not the actual list of addresses/values that will be
* sent to if you click Reply in the email - those are stored
* in {@link RecipientChunks}.
* @throws ChunkNotFoundException
*/
public String getDisplayTo() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayToChunk);
}
/**
* Gets the display value of the "CC" line of the outlook message.
* If there are multiple recipients, they will be separated
* by semicolons.
* This is not the actual list of addresses/values that will be
* sent to if you click Reply in the email - those are stored
* in {@link RecipientChunks}.
* @throws ChunkNotFoundException
*/
public String getDisplayCC() throws ChunkNotFoundException {
@ -182,31 +200,88 @@ public class MAPIMessage extends POIDocument {
}
/**
* Gets the display value of the "TO" line of the outlook message
* This is not the actual list of addresses/values that will be sent to if you click Reply in the email.
* Gets the display value of the "BCC" line of the outlook message.
* If there are multiple recipients, they will be separated
* by semicolons.
* This is not the actual list of addresses/values that will be
* sent to if you click Reply in the email - those are stored
* in {@link RecipientChunks}.
* This will only be present in sent emails, not received ones!
* @throws ChunkNotFoundException
*/
public String getDisplayBCC() throws ChunkNotFoundException {
return getStringFromChunk(mainChunks.displayBCCChunk);
}
/**
* Returns the recipient's email address, checking all the
* likely chunks in search of it.
* Returns all the recipients' email address, separated by
* semicolons. Checks all the likely chunks in search of
* the addresses.
*/
public String getRecipientEmailAddress() throws ChunkNotFoundException {
if(recipientChunks == null) {
return toSemicolonList(getRecipientEmailAddressList());
}
/**
* Returns an array of all the recipient's email address, normally
* in TO then CC then BCC order.
* Checks all the likely chunks in search of the addresses.
*/
public String[] getRecipientEmailAddressList() throws ChunkNotFoundException {
if(recipientChunks == null || recipientChunks.length == 0) {
throw new ChunkNotFoundException("No recipients section present");
}
String email = recipientChunks.getRecipientEmailAddress();
String[] emails = new String[recipientChunks.length];
for(int i=0; i<emails.length; i++) {
RecipientChunks rc = recipientChunks[i];
String email = rc.getRecipientEmailAddress();
if(email != null) {
return email;
emails[i] = email;
} else {
throw new ChunkNotFoundException();
throw new ChunkNotFoundException("No email address holding chunks found for the " + (i+1) + "th recipient");
}
}
return emails;
}
/**
* Returns all the recipients' names, separated by
* semicolons. Checks all the likely chunks in search of
* the names.
* See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
* and {@link #getDisplayBCC()}.
*/
public String getRecipientNames() throws ChunkNotFoundException {
return toSemicolonList(getRecipientNamesList());
}
/**
* Returns an array of all the recipient's names, normally
* in TO then CC then BCC order.
* Checks all the likely chunks in search of the names.
* See also {@link #getDisplayTo()}, {@link #getDisplayCC()}
* and {@link #getDisplayBCC()}.
*/
public String[] getRecipientNamesList() throws ChunkNotFoundException {
if(recipientChunks == null || recipientChunks.length == 0) {
throw new ChunkNotFoundException("No recipients section present");
}
String[] names = new String[recipientChunks.length];
for(int i=0; i<names.length; i++) {
RecipientChunks rc = recipientChunks[i];
String name = rc.getRecipientName();
if(name != null) {
names[i] = name;
} else {
throw new ChunkNotFoundException("No display name holding chunks found for the " + (i+1) + "th recipient");
}
}
return names;
}
/**
* Gets the conversation topic of the parsed Outlook Message.
@ -219,7 +294,8 @@ public class MAPIMessage extends POIDocument {
/**
* Gets the message class of the parsed Outlook Message.
* (Yes, you can use this to determine if a message is a calendar item, note, or actual outlook Message)
* (Yes, you can use this to determine if a message is a calendar
* item, note, or actual outlook Message)
* For emails the class will be IPM.Note
*
* @throws ChunkNotFoundException
@ -249,10 +325,13 @@ public class MAPIMessage extends POIDocument {
return mainChunks;
}
/**
* Gets the recipient details chunks, or
* null if there aren't any
* Gets all the recipient details chunks.
* These will normally be in the order of:
* * TO recipients, in the order returned by {@link #getDisplayTo()}
* * CC recipients, in the order returned by {@link #getDisplayCC()}
* * BCC recipients, in the order returned by {@link #getDisplayBCC()}
*/
public RecipientChunks getRecipientDetailsChunks() {
public RecipientChunks[] getRecipientDetailsChunks() {
return recipientChunks;
}
/**
@ -297,4 +376,19 @@ public class MAPIMessage extends POIDocument {
}
private String toSemicolonList(String[] l) {
StringBuffer list = new StringBuffer();
boolean first = true;
for(String s : l) {
if(first) {
first = false;
} else {
list.append("; ");
}
list.append(s);
}
return list.toString();
}
}

View File

@ -17,6 +17,7 @@
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
/**
@ -99,4 +100,14 @@ public class AttachmentChunks implements ChunkGroup {
// And add to the main list
allChunks.add(chunk);
}
/**
* Orders by the attachment number.
*/
public static class AttachmentChunksSorter implements Comparator<AttachmentChunks> {
@Override
public int compare(AttachmentChunks a, AttachmentChunks b) {
return a.poifsName.compareTo(b.poifsName);
}
}
}

View File

@ -18,20 +18,29 @@
package org.apache.poi.hsmf.datatypes;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
/**
* Collection of convenience chunks for the
* Recip(ient) part of an outlook file
* Recip(ient) part of an outlook file.
*
* If a message has multiple recipients, there will be
* several of these.
*/
public final class RecipientChunks implements ChunkGroup {
public static final String PREFIX = "__recip_version1.0_#";
public static final int RECIPIENT_NAME = 0x3001;
public static final int DELIVERY_TYPE = 0x3002;
public static final int RECIPIENT_EMAIL_ADDRESS = 0x3003;
public static final int RECIPIENT_SEARCH = 0x300B;
public static final int RECIPIENT_EMAIL = 0x39FE;
public static final int RECIPIENT_SMTP_ADDRESS = 0x39FE;
public static final int RECIPIENT_DISPLAY_NAME = 0x5FF6;
/** Our 0 based position in the list of recipients */
public int recipientNumber;
/** TODO */
public ByteChunk recipientSearchChunk;
@ -42,27 +51,84 @@ public final class RecipientChunks implements ChunkGroup {
*/
public StringChunk recipientNameChunk;
/**
* The email address of the recipient, but
* The email address of the recipient, which
* could be in SMTP or SEARCH format, but
* isn't always present...
*/
public StringChunk recipientEmailChunk;
/**
* The smtp destination email address of
* the recipient, but isn't always present...
*/
public StringChunk recipientSMTPChunk;
/**
* Normally EX or SMTP. Will generally affect
* where the email address ends up.
*/
public StringChunk deliveryTypeChunk;
/**
* The display name of the recipient.
* Normally seems to hold the same value
* as in recipientNameChunk
*/
public StringChunk recipientDisplayNameChunk;
public RecipientChunks(String name) {
recipientNumber = -1;
int splitAt = name.lastIndexOf('#');
if(splitAt > -1) {
String number = name.substring(splitAt+1);
try {
recipientNumber = Integer.parseInt(number, 16);
} catch(NumberFormatException e) {
System.err.println("Invalid recipient number in name " + name);
}
}
}
/**
* Tries to find their name,
* in whichever chunk holds it.
*/
public String getRecipientName() {
if(recipientNameChunk != null) {
return recipientNameChunk.getValue();
}
if(recipientDisplayNameChunk != null) {
return recipientDisplayNameChunk.getValue();
}
// Can't find it
return null;
}
/**
* Tries to find their email address, in
* whichever chunk holds it given the
* delivery type.
*/
public String getRecipientEmailAddress() {
if(recipientEmailChunk != null) {
return recipientEmailChunk.getValue();
// If we have this, it really has the email
if(recipientSMTPChunk != null) {
return recipientSMTPChunk.getValue();
}
// Probably in the name field
// This might be a real email, or might be
// in CN=... format
if(recipientEmailChunk != null) {
String email = recipientEmailChunk.getValue();
int cne = email.indexOf("/CN=");
if(cne == -1) {
// Normal smtp address
return email;
} else {
// /O=..../CN=em@ail
return email.substring(cne+4);
}
}
// Might be in the name field, check there
if(recipientNameChunk != null) {
String name = recipientNameChunk.getValue();
if(name.indexOf('@') > -1) {
@ -73,13 +139,16 @@ public final class RecipientChunks implements ChunkGroup {
return name;
}
}
// Check the search chunk
// Check the search chunk, see if it's
// encoded as a SMTP destination in there.
if(recipientSearchChunk != null) {
String search = recipientSearchChunk.getAs7bitString();
if(search.indexOf("SMTP:") != -1) {
return search.substring(search.indexOf("SMTP:") + 5);
}
}
// Can't find it
return null;
}
@ -104,11 +173,17 @@ public final class RecipientChunks implements ChunkGroup {
recipientSearchChunk = (ByteChunk)chunk;
break;
case RECIPIENT_NAME:
recipientDisplayNameChunk = (StringChunk)chunk;
break;
case RECIPIENT_DISPLAY_NAME:
recipientNameChunk = (StringChunk)chunk;
break;
case RECIPIENT_EMAIL:
case RECIPIENT_EMAIL_ADDRESS:
recipientEmailChunk = (StringChunk)chunk;
break;
case RECIPIENT_SMTP_ADDRESS:
recipientSMTPChunk = (StringChunk)chunk;
break;
case DELIVERY_TYPE:
deliveryTypeChunk = (StringChunk)chunk;
break;
@ -117,4 +192,18 @@ public final class RecipientChunks implements ChunkGroup {
// And add to the main list
allChunks.add(chunk);
}
/**
* Orders by the recipient number.
*/
public static class RecipientChunksSorter implements Comparator<RecipientChunks> {
@Override
public int compare(RecipientChunks a, RecipientChunks b) {
if(a.recipientNumber < b.recipientNumber)
return -1;
if(a.recipientNumber > b.recipientNumber)
return +1;
return 0;
}
}
}

View File

@ -25,6 +25,7 @@ import org.apache.poi.hsmf.MAPIMessage;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.StringUtil.StringsIterator;
/**
* A text extractor for HSMF (Outlook) .msg files.
@ -58,20 +59,33 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
MAPIMessage msg = (MAPIMessage)document;
StringBuffer s = new StringBuffer();
StringsIterator emails;
try {
emails = new StringsIterator(
msg.getRecipientEmailAddressList()
);
} catch(ChunkNotFoundException e) {
emails = new StringsIterator(new String[0]);
}
try {
s.append("From: " + msg.getDisplayFrom() + "\n");
} catch(ChunkNotFoundException e) {}
// For To, CC and BCC, try to match the names
// up with their email addresses. Relies on the
// Recipient Chunks being in the same order as
// people in To + CC + BCC.
try {
s.append("To: " + msg.getDisplayTo() + "\n");
handleEmails(s, "To", msg.getDisplayTo(), emails);
} catch(ChunkNotFoundException e) {}
try {
if(msg.getDisplayCC().length() > 0)
s.append("CC: " + msg.getDisplayCC() + "\n");
handleEmails(s, "CC", msg.getDisplayCC(), emails);
} catch(ChunkNotFoundException e) {}
try {
if(msg.getDisplayBCC().length() > 0)
s.append("BCC: " + msg.getDisplayBCC() + "\n");
handleEmails(s, "BCC", msg.getDisplayBCC(), emails);
} catch(ChunkNotFoundException e) {}
try {
SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss");
s.append("Date: " + f.format(msg.getMessageDate().getTime()) + "\n");
@ -85,4 +99,38 @@ public class OutlookTextExtactor extends POIOLE2TextExtractor {
return s.toString();
}
/**
* Takes a Display focused string, eg "Nick; Jim" and an iterator
* of emails, and does its best to return something like
* "Nick <nick@example.com>; Jim <jim@example.com>"
*/
protected void handleEmails(StringBuffer s, String type, String displayText, StringsIterator emails) {
if(displayText == null || displayText.length() == 0) {
return;
}
String[] names = displayText.split(";\\s*");
boolean first = true;
s.append(type + ": ");
for(String name : names) {
if(first) {
first = false;
} else {
s.append("; ");
}
s.append(name);
if(emails.hasNext()) {
String email = emails.next();
// Append the email address in <>, assuming
// the name wasn't already the email address
if(! email.equals(name)) {
s.append( " <" + email + ">");
}
}
}
s.append("\n");
}
}

View File

@ -67,7 +67,7 @@ public final class POIFSChunkParser {
group = new NameIdChunks();
}
if(dir.getName().startsWith(RecipientChunks.PREFIX)) {
group = new RecipientChunks();
group = new RecipientChunks(dir.getName());
}
if(group != null) {

View File

@ -21,6 +21,7 @@ import junit.framework.Test;
import junit.framework.TestSuite;
import org.apache.poi.hsmf.datatypes.*;
import org.apache.poi.hsmf.extractor.TestOutlookTextExtractor;
import org.apache.poi.hsmf.parsers.*;
public final class AllHSMFTests {
@ -34,6 +35,9 @@ public final class AllHSMFTests {
suite.addTestSuite(TestChunkData.class);
suite.addTestSuite(TestTypes.class);
suite.addTestSuite(TestSorters.class);
suite.addTestSuite(TestOutlookTextExtractor.class);
suite.addTestSuite(TestPOIFSChunkParser.class);

View File

@ -52,8 +52,17 @@ public final class TestBasics extends TestCase {
public void testRecipientEmail() throws Exception {
assertEquals("travis@overwrittenstack.com", simple.getRecipientEmailAddress());
assertEquals("kevin.roast@alfresco.org", quick.getRecipientEmailAddress());
assertEquals("randall.scarberry@pnl.gov", outlook30.getRecipientEmailAddress());
assertEquals("nicolas1.23456@free.fr", attachments.getRecipientEmailAddress());
// This one has lots...
assertEquals(18, outlook30.getRecipientEmailAddressList().length);
assertEquals("shawn.bohn@pnl.gov; gus.calapristi@pnl.gov; Richard.Carter@pnl.gov; " +
"barb.cheney@pnl.gov; nick.cramer@pnl.gov; vern.crow@pnl.gov; Laura.Curtis@pnl.gov; " +
"julie.dunkle@pnl.gov; david.gillen@pnl.gov; michelle@pnl.gov; Jereme.Haack@pnl.gov; " +
"Michelle.Hart@pnl.gov; ranata.johnson@pnl.gov; grant.nakamura@pnl.gov; " +
"debbie.payne@pnl.gov; stuart.rose@pnl.gov; randall.scarberry@pnl.gov; Leigh.Williams@pnl.gov",
outlook30.getRecipientEmailAddress()
);
}
/**

View File

@ -0,0 +1,97 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hsmf.datatypes;
import java.util.Arrays;
import org.apache.poi.hsmf.datatypes.AttachmentChunks.AttachmentChunksSorter;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import junit.framework.TestCase;
/**
* Checks that the sorters on the chunk groups order
* chunks properly.
*/
public final class TestSorters extends TestCase {
public void testAttachmentChunksSorter() {
AttachmentChunks[] chunks;
// Simple
chunks = new AttachmentChunks[] {
new AttachmentChunks("__attach_version1.0_#00000001"),
new AttachmentChunks("__attach_version1.0_#00000000"),
};
Arrays.sort(chunks, new AttachmentChunksSorter());
assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName());
assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName());
// Lots, with gaps
chunks = new AttachmentChunks[] {
new AttachmentChunks("__attach_version1.0_#00000101"),
new AttachmentChunks("__attach_version1.0_#00000001"),
new AttachmentChunks("__attach_version1.0_#00000002"),
new AttachmentChunks("__attach_version1.0_#00000005"),
new AttachmentChunks("__attach_version1.0_#00000026"),
new AttachmentChunks("__attach_version1.0_#00000000"),
new AttachmentChunks("__attach_version1.0_#000000AB"),
};
Arrays.sort(chunks, new AttachmentChunksSorter());
assertEquals("__attach_version1.0_#00000000", chunks[0].getPOIFSName());
assertEquals("__attach_version1.0_#00000001", chunks[1].getPOIFSName());
assertEquals("__attach_version1.0_#00000002", chunks[2].getPOIFSName());
assertEquals("__attach_version1.0_#00000005", chunks[3].getPOIFSName());
assertEquals("__attach_version1.0_#00000026", chunks[4].getPOIFSName());
assertEquals("__attach_version1.0_#000000AB", chunks[5].getPOIFSName());
assertEquals("__attach_version1.0_#00000101", chunks[6].getPOIFSName());
}
public void testRecipientChunksSorter() {
RecipientChunks[] chunks;
// Simple
chunks = new RecipientChunks[] {
new RecipientChunks("__recip_version1.0_#00000001"),
new RecipientChunks("__recip_version1.0_#00000000"),
};
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals(0, chunks[0].recipientNumber);
assertEquals(1, chunks[1].recipientNumber);
// Lots, with gaps
chunks = new RecipientChunks[] {
new RecipientChunks("__recip_version1.0_#00020001"),
new RecipientChunks("__recip_version1.0_#000000FF"),
new RecipientChunks("__recip_version1.0_#00000205"),
new RecipientChunks("__recip_version1.0_#00000001"),
new RecipientChunks("__recip_version1.0_#00000005"),
new RecipientChunks("__recip_version1.0_#00000009"),
new RecipientChunks("__recip_version1.0_#00000404"),
new RecipientChunks("__recip_version1.0_#00000000"),
};
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals(0, chunks[0].recipientNumber);
assertEquals(1, chunks[1].recipientNumber);
assertEquals(5, chunks[2].recipientNumber);
assertEquals(9, chunks[3].recipientNumber);
assertEquals(0xFF, chunks[4].recipientNumber);
assertEquals(0x205, chunks[5].recipientNumber);
assertEquals(0x404, chunks[6].recipientNumber);
assertEquals(0x20001, chunks[7].recipientNumber);
}
}

View File

@ -53,7 +53,7 @@ public final class TestOutlookTextExtractor extends TestCase {
String text = ext.getText();
assertContains(text, "From: Kevin Roast\n");
assertContains(text, "To: Kevin Roast\n");
assertContains(text, "To: Kevin Roast <kevin.roast@alfresco.org>\n");
assertEquals(-1, text.indexOf("CC:"));
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: Test the content transformer\n");
@ -92,4 +92,77 @@ public final class TestOutlookTextExtractor extends TestCase {
assertEquals(inp, poifs);
assertEquals(inp, mapi);
}
/**
* Test that we correctly handle multiple To+CC+BCC
* recipients in an email we sent.
*/
public void testSentWithMulipleRecipients() throws Exception {
// To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>,
// 'Paul Holmes-Higgin' <paul.hh@alfresco.com>,
// 'Mike Farman' <mikef@alfresco.com>
// Cc: nickb@alfresco.com, nick.burch@alfresco.com,
// 'Roy Wetherall' <roy.wetherall@alfresco.com>
// Bcc: 'David Caruana' <dave.caruana@alfresco.com>,
// 'Vonka Jan' <roy.wetherall@alfresco.com>
String[] files = new String[] {
"example_sent_regular.msg", "example_sent_unicode.msg"
};
for(String file : files) {
MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
new FileInputStream(samples.getFile(file))
));
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
assertContains(text, "From: Mike Farman\n");
assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
"'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
assertContains(text, "CC: 'nickb@alfresco.com' <nickb@alfresco.com>; " +
"'nick.burch@alfresco.com' <nick.burch@alfresco.com>; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
assertContains(text, "BCC: 'David Caruana' <dave.caruana@alfresco.com>; " +
"'Vonka Jan' <jan.vonka@alfresco.com>\n");
assertContains(text, "Subject: This is a test message please ignore\n");
assertEquals(-1, text.indexOf("Date:"));
assertContains(text, "The quick brown fox jumps over the lazy dog");
}
}
/**
* Test that we correctly handle multiple To+CC
* recipients in an email we received.
*/
public void testReceivedWithMultipleRecipients() throws Exception {
// To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>,
// 'Paul Holmes-Higgin' <paul.hh@alfresco.com>,
// 'Mike Farman' <mikef@alfresco.com>
// Cc: nickb@alfresco.com, nick.burch@alfresco.com,
// 'Roy Wetherall' <roy.wetherall@alfresco.com>
// (No BCC shown)
String[] files = new String[] {
"example_received_regular.msg", "example_received_unicode.msg"
};
for(String file : files) {
MAPIMessage msg = new MAPIMessage(new POIFSFileSystem(
new FileInputStream(samples.getFile(file))
));
OutlookTextExtactor ext = new OutlookTextExtactor(msg);
String text = ext.getText();
assertContains(text, "From: Mike Farman\n");
assertContains(text, "To: 'Ashutosh Dandavate' <ashutosh.dandavate@alfresco.com>; " +
"'Paul Holmes-Higgin' <paul.hh@alfresco.com>; 'Mike Farman' <mikef@alfresco.com>\n");
assertContains(text, "CC: nickb@alfresco.com; " +
"nick.burch@alfresco.com; 'Roy Wetherall' <roy.wetherall@alfresco.com>\n");
assertEquals(-1, text.indexOf("BCC:"));
assertContains(text, "Subject: This is a test message please ignore\n");
assertEquals(-1, text.indexOf("Date:"));
assertContains(text, "The quick brown fox jumps over the lazy dog");
}
}
}

View File

@ -21,6 +21,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Calendar;
import org.apache.poi.hsmf.MAPIMessage;
@ -29,6 +30,7 @@ import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
@ -81,7 +83,7 @@ public final class TestPOIFSChunkParser extends TestCase {
}
}
public void testFindsRecips() throws IOException {
public void testFindsRecips() throws IOException, ChunkNotFoundException {
POIFSFileSystem simple = new POIFSFileSystem(
new FileInputStream(samples.getFile("quick.msg"))
);
@ -95,7 +97,9 @@ public final class TestPOIFSChunkParser extends TestCase {
assertTrue(groups[2] instanceof NameIdChunks);
RecipientChunks recips = (RecipientChunks)groups[1];
assertEquals("kevin.roast@alfresco.org", recips.recipientEmailChunk.getValue());
assertEquals("kevin.roast@alfresco.org", recips.recipientSMTPChunk.getValue());
assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben",
recips.recipientEmailChunk.getValue());
String search = new String(recips.recipientSearchChunk.getValue(), "ASCII");
assertEquals("CN=KEVIN.ROAST@BEN\0", search.substring(search.length()-19));
@ -103,20 +107,123 @@ public final class TestPOIFSChunkParser extends TestCase {
// Now via MAPIMessage
MAPIMessage msg = new MAPIMessage(simple);
assertNotNull(msg.getRecipientDetailsChunks());
assertEquals(1, msg.getRecipientDetailsChunks().length);
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].getRecipientEmailAddress());
assertEquals("Kevin Roast", msg.getRecipientDetailsChunks()[0].getRecipientName());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientEmailAddress());
// Try both SMTP and EX files for recipient
assertEquals("EX", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks().recipientEmailChunk.getValue());
assertEquals("EX", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue());
assertEquals("kevin.roast@alfresco.org", msg.getRecipientDetailsChunks()[0].recipientSMTPChunk.getValue());
assertEquals("/O=HOSTEDSERVICE2/OU=FIRST ADMINISTRATIVE GROUP/CN=RECIPIENTS/CN=Kevin.roast@ben",
msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue());
// Now look at another message
msg = new MAPIMessage(new POIFSFileSystem(
new FileInputStream(samples.getFile("simple_test_msg.msg"))
));
assertEquals("SMTP", msg.getRecipientDetailsChunks().deliveryTypeChunk.getValue());
assertEquals(null, msg.getRecipientDetailsChunks().recipientEmailChunk);
assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks().recipientNameChunk.getValue());
assertNotNull(msg.getRecipientDetailsChunks());
assertEquals(1, msg.getRecipientDetailsChunks().length);
assertEquals("SMTP", msg.getRecipientDetailsChunks()[0].deliveryTypeChunk.getValue());
assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientSMTPChunk);
assertEquals(null, msg.getRecipientDetailsChunks()[0].recipientNameChunk);
assertEquals("travis@overwrittenstack.com", msg.getRecipientDetailsChunks()[0].recipientEmailChunk.getValue());
assertEquals("travis@overwrittenstack.com", msg.getRecipientEmailAddress());
}
public void testFindsMultipleRecipients() throws IOException, ChunkNotFoundException {
POIFSFileSystem multiple = new POIFSFileSystem(
new FileInputStream(samples.getFile("example_received_unicode.msg"))
);
multiple.getRoot().getEntry("__recip_version1.0_#00000000");
multiple.getRoot().getEntry("__recip_version1.0_#00000001");
multiple.getRoot().getEntry("__recip_version1.0_#00000002");
multiple.getRoot().getEntry("__recip_version1.0_#00000003");
multiple.getRoot().getEntry("__recip_version1.0_#00000004");
multiple.getRoot().getEntry("__recip_version1.0_#00000005");
ChunkGroup[] groups = POIFSChunkParser.parse(multiple.getRoot());
assertEquals(9, groups.length);
assertTrue(groups[0] instanceof Chunks);
assertTrue(groups[1] instanceof RecipientChunks);
assertTrue(groups[2] instanceof AttachmentChunks);
assertTrue(groups[3] instanceof RecipientChunks);
assertTrue(groups[4] instanceof RecipientChunks);
assertTrue(groups[5] instanceof RecipientChunks);
assertTrue(groups[6] instanceof RecipientChunks);
assertTrue(groups[7] instanceof RecipientChunks);
assertTrue(groups[8] instanceof NameIdChunks);
// In FS order initially
RecipientChunks[] chunks = new RecipientChunks[] {
(RecipientChunks)groups[1],
(RecipientChunks)groups[3],
(RecipientChunks)groups[4],
(RecipientChunks)groups[5],
(RecipientChunks)groups[6],
(RecipientChunks)groups[7],
};
assertEquals(6, chunks.length);
assertEquals(0, chunks[0].recipientNumber);
assertEquals(4, chunks[1].recipientNumber);
assertEquals(3, chunks[2].recipientNumber);
assertEquals(2, chunks[3].recipientNumber);
assertEquals(1, chunks[4].recipientNumber);
assertEquals(5, chunks[5].recipientNumber);
// Check
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[2].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[3].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[4].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
// Now sort, and re-check
Arrays.sort(chunks, new RecipientChunksSorter());
assertEquals("'Ashutosh Dandavate'", chunks[0].getRecipientName());
assertEquals("ashutosh.dandavate@alfresco.com", chunks[0].getRecipientEmailAddress());
assertEquals("'Paul Holmes-Higgin'", chunks[1].getRecipientName());
assertEquals("paul.hh@alfresco.com", chunks[1].getRecipientEmailAddress());
assertEquals("'Mike Farman'", chunks[2].getRecipientName());
assertEquals("mikef@alfresco.com", chunks[2].getRecipientEmailAddress());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientName());
assertEquals("nickb@alfresco.com", chunks[3].getRecipientEmailAddress());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientName());
assertEquals("nick.burch@alfresco.com", chunks[4].getRecipientEmailAddress());
assertEquals("'Roy Wetherall'", chunks[5].getRecipientName());
assertEquals("roy.wetherall@alfresco.com", chunks[5].getRecipientEmailAddress());
// Finally check on message
MAPIMessage msg = new MAPIMessage(multiple);
assertEquals(6, msg.getRecipientEmailAddressList().length);
assertEquals(6, msg.getRecipientNamesList().length);
assertEquals("'Ashutosh Dandavate'", msg.getRecipientNamesList()[0]);
assertEquals("'Paul Holmes-Higgin'", msg.getRecipientNamesList()[1]);
assertEquals("'Mike Farman'", msg.getRecipientNamesList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientNamesList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientNamesList()[4]);
assertEquals("'Roy Wetherall'", msg.getRecipientNamesList()[5]);
assertEquals("ashutosh.dandavate@alfresco.com", msg.getRecipientEmailAddressList()[0]);
assertEquals("paul.hh@alfresco.com", msg.getRecipientEmailAddressList()[1]);
assertEquals("mikef@alfresco.com", msg.getRecipientEmailAddressList()[2]);
assertEquals("nickb@alfresco.com", msg.getRecipientEmailAddressList()[3]);
assertEquals("nick.burch@alfresco.com", msg.getRecipientEmailAddressList()[4]);
assertEquals("roy.wetherall@alfresco.com", msg.getRecipientEmailAddressList()[5]);
}
public void testFindsNameId() throws IOException {

View File

@ -20,6 +20,8 @@ package org.apache.poi.util;
import java.io.UnsupportedEncodingException;
import java.text.NumberFormat;
import org.apache.poi.util.StringUtil.StringsIterator;
import junit.framework.TestCase;
/**
@ -158,5 +160,43 @@ public final class TestStringUtil extends TestCase {
return nf.format( num );
}
public void testStringsIterator() {
StringsIterator i;
i = new StringsIterator(new String[0]);
assertFalse(i.hasNext());
try {
i.next();
fail();
} catch(ArrayIndexOutOfBoundsException e) {}
i = new StringsIterator(new String[] {"1"});
assertTrue(i.hasNext());
assertEquals("1", i.next());
assertFalse(i.hasNext());
try {
i.next();
fail();
} catch(ArrayIndexOutOfBoundsException e) {}
i = new StringsIterator(new String[] {"1","2","3"});
assertTrue(i.hasNext());
assertEquals("1", i.next());
assertTrue(i.hasNext());
assertEquals("2", i.next());
assertTrue(i.hasNext());
assertEquals("3", i.next());
assertFalse(i.hasNext());
try {
i.next();
fail();
} catch(ArrayIndexOutOfBoundsException e) {}
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.