001/**************************************************************** 002 * Licensed to the Apache Software Foundation (ASF) under one * 003 * or more contributor license agreements. See the NOTICE file * 004 * distributed with this work for additional information * 005 * regarding copyright ownership. The ASF licenses this file * 006 * to you under the Apache License, Version 2.0 (the * 007 * "License"); you may not use this file except in compliance * 008 * with the License. You may obtain a copy of the License at * 009 * * 010 * http://www.apache.org/licenses/LICENSE-2.0 * 011 * * 012 * Unless required by applicable law or agreed to in writing, * 013 * software distributed under the License is distributed on an * 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * 015 * KIND, either express or implied. See the License for the * 016 * specific language governing permissions and limitations * 017 * under the License. * 018 ****************************************************************/ 019 020package org.apache.james.mime4j.parser; 021 022import java.io.IOException; 023import java.io.InputStream; 024 025import org.apache.james.mime4j.MimeException; 026import org.apache.james.mime4j.codec.DecodeMonitor; 027import org.apache.james.mime4j.stream.BodyDescriptor; 028import org.apache.james.mime4j.stream.BodyDescriptorBuilder; 029import org.apache.james.mime4j.stream.EntityState; 030import org.apache.james.mime4j.stream.Field; 031import org.apache.james.mime4j.stream.MimeConfig; 032import org.apache.james.mime4j.stream.MimeTokenStream; 033import org.apache.james.mime4j.stream.RecursionMode; 034 035/** 036 * <p> 037 * Parses MIME (or RFC822) message streams of bytes or characters and reports 038 * parsing events to a {@link ContentHandler} instance. 039 * </p> 040 * <p> 041 * Typical usage:<br/> 042 * <pre> 043 * ContentHandler handler = new MyHandler(); 044 * MimeConfig config = new MimeConfig(); 045 * MimeStreamParser parser = new MimeStreamParser(config); 046 * parser.setContentHandler(handler); 047 * InputStream instream = new FileInputStream("mime.msg"); 048 * try { 049 * parser.parse(instream); 050 * } finally { 051 * instream.close(); 052 * } 053 * </pre> 054 */ 055public class MimeStreamParser { 056 057 private ContentHandler handler = null; 058 private boolean contentDecoding; 059 060 private final MimeTokenStream mimeTokenStream; 061 062 public MimeStreamParser(MimeTokenStream tokenStream) { 063 super(); 064 this.mimeTokenStream = tokenStream; 065 this.contentDecoding = false; 066 } 067 068 public MimeStreamParser( 069 final MimeConfig config, 070 final DecodeMonitor monitor, 071 final BodyDescriptorBuilder bodyDescBuilder) { 072 this(new MimeTokenStream(config != null ? config.clone() : new MimeConfig(), 073 monitor, bodyDescBuilder)); 074 } 075 076 public MimeStreamParser(final MimeConfig config) { 077 this(config, null, null); 078 } 079 080 public MimeStreamParser() { 081 this(new MimeTokenStream(new MimeConfig(), null, null)); 082 } 083 084 /** 085 * Determines whether this parser automatically decodes body content 086 * based on the on the MIME fields with the standard defaults. 087 */ 088 public boolean isContentDecoding() { 089 return contentDecoding; 090 } 091 092 /** 093 * Defines whether parser should automatically decode body content 094 * based on the on the MIME fields with the standard defaults. 095 */ 096 public void setContentDecoding(boolean b) { 097 this.contentDecoding = b; 098 } 099 100 /** 101 * Parses a stream of bytes containing a MIME message. Please note that if the 102 * {@link MimeConfig} associated with the mime stream returns a not null Content-Type 103 * value from its {@link MimeConfig#getHeadlessParsing()} method, the message is 104 * assumed to have no head section and the headless parsing mode will be used. 105 * 106 * @param instream the stream to parse. 107 * @throws MimeException if the message can not be processed 108 * @throws IOException on I/O errors. 109 */ 110 public void parse(InputStream instream) throws MimeException, IOException { 111 MimeConfig config = mimeTokenStream.getConfig(); 112 if (config.getHeadlessParsing() != null) { 113 Field contentType = mimeTokenStream.parseHeadless( 114 instream, config.getHeadlessParsing()); 115 handler.startMessage(); 116 handler.startHeader(); 117 handler.field(contentType); 118 handler.endHeader(); 119 } else { 120 mimeTokenStream.parse(instream); 121 } 122 OUTER: for (;;) { 123 EntityState state = mimeTokenStream.getState(); 124 switch (state) { 125 case T_BODY: 126 BodyDescriptor desc = mimeTokenStream.getBodyDescriptor(); 127 InputStream bodyContent; 128 if (contentDecoding) { 129 bodyContent = mimeTokenStream.getDecodedInputStream(); 130 } else { 131 bodyContent = mimeTokenStream.getInputStream(); 132 } 133 handler.body(desc, bodyContent); 134 break; 135 case T_END_BODYPART: 136 handler.endBodyPart(); 137 break; 138 case T_END_HEADER: 139 handler.endHeader(); 140 break; 141 case T_END_MESSAGE: 142 handler.endMessage(); 143 break; 144 case T_END_MULTIPART: 145 handler.endMultipart(); 146 break; 147 case T_END_OF_STREAM: 148 break OUTER; 149 case T_EPILOGUE: 150 handler.epilogue(mimeTokenStream.getInputStream()); 151 break; 152 case T_FIELD: 153 handler.field(mimeTokenStream.getField()); 154 break; 155 case T_PREAMBLE: 156 handler.preamble(mimeTokenStream.getInputStream()); 157 break; 158 case T_RAW_ENTITY: 159 handler.raw(mimeTokenStream.getInputStream()); 160 break; 161 case T_START_BODYPART: 162 handler.startBodyPart(); 163 break; 164 case T_START_HEADER: 165 handler.startHeader(); 166 break; 167 case T_START_MESSAGE: 168 handler.startMessage(); 169 break; 170 case T_START_MULTIPART: 171 handler.startMultipart(mimeTokenStream.getBodyDescriptor()); 172 break; 173 default: 174 throw new IllegalStateException("Invalid state: " + state); 175 } 176 state = mimeTokenStream.next(); 177 } 178 } 179 180 /** 181 * Determines if this parser is currently in raw mode. 182 * 183 * @return <code>true</code> if in raw mode, <code>false</code> 184 * otherwise. 185 * @see #setRaw() 186 */ 187 public boolean isRaw() { 188 return mimeTokenStream.isRaw(); 189 } 190 191 /** 192 * Enables raw mode. In raw mode all future entities (messages 193 * or body parts) in the stream will be reported to the 194 * {@link ContentHandler#raw(InputStream)} handler method only. 195 * The stream will contain the entire unparsed entity contents 196 * including header fields and whatever is in the body. 197 */ 198 public void setRaw() { 199 mimeTokenStream.setRecursionMode(RecursionMode.M_RAW); 200 } 201 202 /** 203 * Enables flat mode. In flat mode rfc822 parts are not recursively 204 * parsed and multipart content is handled as a single "simple" stream. 205 */ 206 public void setFlat() { 207 mimeTokenStream.setRecursionMode(RecursionMode.M_FLAT); 208 } 209 210 /** 211 * Enables recursive mode. In this mode rfc822 parts are recursively 212 * parsed. 213 */ 214 public void setRecurse() { 215 mimeTokenStream.setRecursionMode(RecursionMode.M_RECURSE); 216 } 217 218 /** 219 * Finishes the parsing and stops reading lines. 220 * NOTE: No more lines will be parsed but the parser 221 * will still call 222 * {@link ContentHandler#endMultipart()}, 223 * {@link ContentHandler#endBodyPart()}, 224 * {@link ContentHandler#endMessage()}, etc to match previous calls 225 * to 226 * {@link ContentHandler#startMultipart(BodyDescriptor)}, 227 * {@link ContentHandler#startBodyPart()}, 228 * {@link ContentHandler#startMessage()}, etc. 229 */ 230 public void stop() { 231 mimeTokenStream.stop(); 232 } 233 234 /** 235 * Sets the <code>ContentHandler</code> to use when reporting 236 * parsing events. 237 * 238 * @param h the <code>ContentHandler</code>. 239 */ 240 public void setContentHandler(ContentHandler h) { 241 this.handler = h; 242 } 243 244}