//***********************************************************************\
//                                                                      *
//      MultipartHandler.java	                                        *
//                                                                      *
//      Purpose: A handler for a multipart input stream                 *
//                                                                      *
//      Author:    Simon Brooke                                         *
//	Copyright (c) Simon Brooke & Weft Technology Ltd; see LICENSE.	*
//      Created:   27th December 2000                                   *
//	   $Revision: 1.19 $; $Date: 2001/07/19 14:13:21 $		*
//                                                                      *
//***********************************************************************/

package uk.co.weft.maybeupload;

import java.lang.*;
import java.io.*;
import java.util.*;
import org.apache.regexp.*;	// for unpacking content disposition headers

/** an extremely kludgy way of implementing the nasty recursion
 *  necessary in readrfc1867() */
class EndOfPartException extends IOException
{
    EndOfPartException( String line)
    {
	super( line);
    }
}


/** A handler for multipart-form-data data per RFC 1867. One of the
 *  trickier elements of RFC 1867 is that multipart/mixed elements may
 *  be embedded inside multipart/form-data. The RFC does
 *  <strong>not</strong> say that multipart elements may not be
 *  arbitrarily nested. While I don't know whether any clients
 *  <em>do</em> nest multipart elements, it would be nice to be fully
 *  RFC 1867 compliant...</p>
 *
 *  <p>This class knows about content-type-encodings but does not yet
 *  decode encoded types. This is something I intend to fix in a later
 *  release.</p>
 *
 *  <p>Objects of this class are not intended to be reusable. Use once
 *  and throw away.</p>
 *
 *  @author Simon Brooke (simon@jasmine.org.uk)
 *  @version $Revision: 1.19 $
 *  This revision: $Author: simon $
 *  <pre>
 *  $Log: MultipartHandler.java,v $
 *  Revision 1.19  2001/07/19 14:13:21  simon
 *  Set DEBUG to false for stable 1.0.5 release.
 *
 *  Revision 1.18  2001/07/17 12:59:03  simon
 *  [stupid] I had missed java.io.PushbackInputStream, and reinvented the
 *  wheel. Fixed. Thanks, Randy, for pointing this out.
 *
 *  Revision 1.17  2001/07/17 12:27:09  simon
 *  Rewrote handleInlinePart to use former readFilePart (now renamed to
 *  readPartData) in order to attempt to address Samuel ARNOD-PRIN's Mac
 *  upload bug; incorporated Randy Chang's latest bugfix to the core loop
 *  in readPartData.
 *
 *  Revision 1.16  2001/07/13 12:56:36  simon
 *  Tidied up Thomas Wilson's disallowedCharacters stuff; merged Randy
 *  Chang's binary upload improvements; had another bash at the Mac upload
 *  problems.
 *
 *  Revision 1.15  2001/06/26 09:39:07  thomas
 *  Protected HashTable attribute mapping Characters disallowed in a filename to characters which they should be replaced with. Defaults to ' ' -> '_'.
 *
 *  Revision 1.14  2001/06/25 16:03:29  simon
 *  *** empty log message ***
 *
 *  Revision 1.13  2001/06/25 15:55:14  simon
 *  Rewrote readFilePart() as a state transition engine; seems a lot
 *  cleaner, seems to work better. Still have doubts about efficiency.
 *
 *  Revision 1.12  2001/06/21 08:52:00  simon
 *  Essentially just confirming Thomas' changes to MultipartHandler, and adding
 *  in an additional DEBUG message of my own...
 *
 *  Revision 1.11 2001/06/13 10:04:30 thomas
 *  No longer use <code>read &lt; expected</code> to control exit from the
 *  various loops which read data from the input stream. This method proved 
 *  to introduce problems and can be avoided by reading from the stream until 
 *  we reach the end and not when the count reaches the expected count. The 
 *  bytes expected is retained to provide a guide to the amount of data read 
 *  which may prove useful for debugging at some later date.
 *
 *  Revision 1.10  2001/04/24 15:55:58  simon
 *  Patch release incorporating Aaron Dunlop's ByteArrayInputStream stuff.
 *
 *  Revision 1.5  2001/04/11 20:06:56  aarond
 *  Added check for 0-length uploads (e.g., the user did not specify a file to upload)
 *
 *  Revision 1.4  2001/04/11 00:29:02  aarond
 *  Upgraded maybeupload package to 1.0.2pre3 and patched with our changes
 *
 *  Revision 1.9  2001/03/22 10:48:32  simon
 *  Bugfixes including a nasty one where if the last parameter in an input
 *  stream was inline, it got truncated by one byte.
 *
 *  Revision 1.8  2001/02/22 11:04:07  simon
 *  Applied patch supplied by Juho Snellman to fix a problem in
 *  readFilePart. An alterantive patch for the same problem was supplied
 *  by Cor Hofman. Grateful thanks to both.
 *
 *  Revision 1.7  2001/01/23 19:12:17  simon
 *  A number of bugfixes, plus an important new feature: you can decide
 *  whether to allow name collisions in the upload directory to result
 *  in overwriting, renaming of the new file, or an exception.
 *
 *  Revision 1.6  2001/01/22 15:56:39  simon
 *  'nother little horrible... tokens were being returned as '"token"',
 *  rather than 'token'. Fixed.
 *
 *  Revision 1.5  2001/01/22 15:08:21  simon
 *  More bugs, unfortunately. Was double-counting some characters read;
 *  once that was sorted, found that I was reading off the end of input.
 *  Now appears fixed even for complex forms... touch wood.
 *
 *  Revision 1.4  2001/01/09 12:45:47  simon
 *  Fixed the 'won't read past a null value' bug. Last <em>known</em>
 *  bug...
 *
 *  Revision 1.3  2001/01/09 12:14:12  simon
 *  Now tested with:
 *  	Netscape Communicator 4.76/Linux 2.2
 *  	Konqueror 1.9.8/Linux 2.2
 *  	Microsoft Internet Explorer 5.00.2014.0216IC
 *  File upload (including binary file upload) works. Remaining known bug:
 *  all fields must have data...
 *
 *  Revision 1.2  2001/01/08 12:39:09  simon
 *  Now working. Hooray! [that was *hard*]
 *
 *  Revision 1.1.1.1  2001/01/05 14:58:09  simon
 *  First cut - not yet tested
 *
 *
 *  </pre> */

public class MultipartHandler
{
    /** policy for whether to save uploaded files immediately.
        Default is true. If false, we add the binary data to the 
        hash instead */
    private boolean saveUploadedFilesToDisk = true;

    /** policy for whether or not to allow uploaded files to be
     *  overwritten by files uploaded later. If you want to be able to
     *  over-write files, set to true */
    private boolean allowOverwrite = false;
    
    /** policy for whether or not to auto-generate unique
     *  filenames. Default is we do, so there's no danger of
     *  previously uploaded files being over-written. */
    private boolean silentlyRename = true;

				/** the boundaries I am currently watching */
    private Stack boundaries = new Stack();
    
				/** the name-value pairs I have identified */
    protected Hashtable values = null;

				/** the stream I read from */
    private PushbackInputStream in;

				/** the advertised length of the stream */
    private int expected;

				/** the directory in which I will save
                                 *  uploaded files */
    private File workDir;

				/** the number of bytes I've read so far */
    private int read = 0;

				/** a buffer to assemble stuff I'm reading */
    private byte[] buf = new byte[ 8 * 1024];

			        /** a nasty marker to indicate whether
				 * the last file part read contained
				 * any valid characters -- some *
				 * browsers send more than one
				 * line-end for 'no file to upload' */
    private int validChars = 0;

				/** a counter to use to name anonymous
                                 *  part values (should never be
                                 *  needed) */
    protected int anon = 0;

				/** whether to print debugging
                                 *  output. Compile-time option only;
                                 *  do not set true for deliverable
                                 *  code! */
    protected final boolean DEBUG = false;
    
    /** content-transfer-encoding types, as mandated by RFC 1521,
     *  section 5. X-tokens will not be handled because to quote the
     *  RFC 'the creation of new Content-Transfer-Encoding values is
     *  explicitly and strongly discouraged' */
    protected final char CTE_7BIT = '7'; // default
    protected final char CTE_QUOTED_PRINTABLE = 'q';
    protected final char CTE_BASE64 = 'a';
    protected final char CTE_8BIT = '8';
    protected final char CTE_BINARY = 'i';
    protected final char CTE_XTOKEN = 'x';

    /** patterns I use to unpack the content-disposition header */
    private final String filePattern = 
	"form-data; *name=\"([^\"]*)\"; *filename=\"([^\"]*)\"",
	simplePattern = "form-data; *name=\"([^\"]*)\"";

    /** Disallowed characters in filenames. Hashtable of Characters. */
    protected Hashtable disallowedCharacters = new Hashtable();

    /** read multiple values from this RFC 1867 formatted input stream
     *  into this hashtable
     *
     *  @param values a hashtable to populate with the values read
     *  @param in an input stream, assumed to be RFC 1867 formatted
     *  @param cthdr the content-type header which identifies this
     *  stream as multipart 
     *  @param workdir a directory in which to save uploaded files
     */
    MultipartHandler( Hashtable values, InputStream in, int length, 
		      String cthdr, File workdir)
	throws IOException, UploadException
    {
	this( values, in, length, cthdr, workdir, true, false, true);

    }

    /** read multiple values from this RFC 1867 formatted input stream
     *  into this hashtable
     *
     *  @param values a hashtable to populate with the values read
     *  @param in an input stream, assumed to be RFC 1867 formatted
     *  @param cthdr the content-type header which identifies this
     *  stream as multipart 
     *  @param workdir a directory in which to save uploaded files
     */
    MultipartHandler( Hashtable values, InputStream in, int length, 
		      String cthdr, File workdir, 
		      boolean saveUploadedFilesToDisk,
		      boolean allowOverwrite, boolean silentlyRename)
	throws IOException, UploadException
    {
	this.in = new PushbackInputStream( in);
	this.expected = length;
	this.saveUploadedFilesToDisk = saveUploadedFilesToDisk;
	this.workDir = workdir;
	this.values = values;
	this.allowOverwrite = allowOverwrite;
	this.silentlyRename = silentlyRename;

	disallow( ' ', '_');	// spaces in filenames are a nuisance
	disallow( "?*", '_');	// wildcards are a worse nuisance
	disallow( '#', '_');	// UN*X shell comment chars are best avoided
	
	if ( DEBUG)
	    System.out.println( "\n\n*** starting to read input");

	readrfc1867( cthdr);

	if ( DEBUG) {
	    System.out.println( "\n*** finished reading input");
	    System.out.println("internal count shows: read "+read+
			       " of expected "+expected+" bytes\n");
	}

    }


    /** mark the specified character as diallowed in filenames, and
     *  replace it if found with the specified replacement
     *
     *  @param disallowed the character we disallow
     *  @param preferred the character to replace it with 
     */
    public void disallow( char disallowed, char preferred)
    {
	disallowedCharacters.put( new Character( disallowed),
	    new Character( preferred));
    }

    /** mark the specified characters as diallowed in filenames, and
     *  replace it if found with the specified replacement
     *
     *  @param disallowed a String comprising the characters we disallow
     *  @param preferred the character to replace it with 
     */
    public void disallow( String disallowed, char preferred)
    {
	for ( int i = 0; i < disallowed.length(); i ++)
	    disallow( disallowed.charAt( i), preferred);
    }

    /** the recursive multipart reader. Deep. Dark. Mysterious. Could
     *  almost certainly be rewritten better. See RFC 1521, RFC 1867
     *
     *  @param in an input stream, assumed to be RFC 1867 formatted
     *  @param cthdr the content-type header which identifies this
     *  stream as multipart 
     *  @return the last line read.  */
    private String readrfc1867( String cthdr)
	throws IOException, UploadException
    {
	String line = readLine();
	String boundary = getBoundary( cthdr);

	while( line != null && line.startsWith( boundary))
	    {			// iterate over the parts in this part
		if ( DEBUG)
		    System.out.println( "\n*** seeking new part");

		line = readLine();
				// OK: the next line after a part
				// boundary is either 
				// a header or 
				// the next boundary up the stack or
				// null for the end of input

		if ( line != null)
		    {		// got something
			if ( line.startsWith( "--"))
			    {	// looks like another boundary
				if ( line.startsWith( ( String)
						      boundaries.peek()))
				    throw new EndOfPartException( line);
				// a slightly inelegant way of
				// returning from recursion, no?
				else
				    throw new 
					IOException( "Unexpected recursive " +
						     "boundary: " + line);
			    }
			else
			    {
				line = handlePart( line, boundary);
			    }
		    }
	    }
	return line;
    }

    /** handle a single part of a multipart file, starting with this
     *  line which has already been read in */
    protected String handlePart( String line, String boundary)
	throws IOException, UploadException
    {
	Hashtable headers = handlePartHeaders( line);
				// nicer if we could use a
				// uk.co.weft.dbutil.Context, but this
				// version is intended to be
				// Jacquard-independent

	if ( ! headers.isEmpty())
	    {			// if headers is empty, we've probably
				// come to the end of input. That's OK.
		if ( DEBUG)
		    System.out.println( "*** seeking part data");

		String cthdr = ( String)headers.get( "content-type");

		if ( cthdr != null && cthdr.indexOf( "multipart") > -1)
		    {		// it's a multipart (RFC 1521) - recurse
			boundaries.push( boundary);

			try
			    {
				readrfc1867( cthdr);
			    }
			catch ( EndOfPartException ok) 
			    {
				line = ok.getMessage();
			    }

			if ( ! boundary.startsWith( ( String)
						    boundaries.pop()))
			    throw new 
				IOException( "Misnested multipart " +
					     "boundaries?");
		    }
		else	// not multipart
		    {
			line = handlePartData( headers, boundary);
		    }

		if ( DEBUG)
		    System.out.println( "-- handled data, returning [" 
					+ line + "]");

	    }

	return line;
    }

    /** strip leading and trailing spaces and quotes from this string */
    private String deString( String string)
    {
	if ( string != null)
	    {
		string = string.trim();

		while ( string.startsWith( "\""))
		    string = string.substring( 1);

		while ( string.endsWith( "\""))
		    string = string.substring( 0, string.length() - 1);
	    }

	return string;
    }


    private void extractPartHeadersFromLine( String line, Hashtable headers)
    {
				// OK, why not use StringTokenizer? 
				// Well, because I tried that, and on
				// some JVMs it swallowed some
				// separators and not others.
	int tokenStart = 0, tokenEnd = 0;
	String name = null, value = null;
	String namesep = ":";
	String valuesep = ";";

	while ( tokenEnd < line.length())
	    {
		tokenEnd = line.indexOf( namesep, tokenStart);
		if ( tokenEnd == -1) tokenEnd = line.length();

		name = deString( line.substring( tokenStart, tokenEnd));

		namesep = "=";
 				// first name on a line is terminated
				// with a colon; second and subsequent
				// with '='

		tokenStart = tokenEnd + 1;
		tokenEnd = line.indexOf( valuesep, tokenStart);
		if ( tokenEnd == -1) tokenEnd = line.length();

		value = deString( line.substring( tokenStart, tokenEnd));

		tokenStart = tokenEnd + 1;

		if ( DEBUG)
		    System.out.println( "  ++ Setting part header [" +
					name + "] to [" + value + "]");

		headers.put( name, value);
	    }
    }


    private Hashtable handlePartHeaders( String line)
	throws IOException
    {
	Hashtable headers = new Hashtable();
				// nicer if we could use a
				// uk.co.weft.dbutil.Context, but this
				// version is intended to be
				// Jacquard-independent
	int colon;		// index of first colon in line;
	String name, value;	// the name and value parts of the line

	while ( line != null &&
		line.indexOf( ':') > -1)
	    {			// extract all the headers for this part
		if ( DEBUG)
		    System.out.println( "  -- Seeking new part headers in " +
					line);

		extractPartHeadersFromLine( line, headers);

		line = readLine();
				// seek next header
	    }

				// the line which follows the headers
				// should be blank; if it's not we may
				// have an error.
 	if ( DEBUG && line != null && line.trim().length() > 0) 
	    System.out.println( "Non-blank line [" + line +
 				   "] (first character [" +
				line.getBytes()[ 0] +
				"]) following part headers?");

	return headers;
    }

    /** read part data from the provided input stream, up to and
     *  including the next boundary line. Interpret the headers and
     *  dispose of the content appropriately. At the point at which
     *  this method is entered, the next line in the file is the value
     *  part of the part...
     *
     *  @param headers the headers for this part
     *  @param boundary the boundary delimiting this part
     *  @return the last line read (should be boundary or null) */
    private String handlePartData( Hashtable headers, String boundary)
	throws IOException, UploadException
    {
	String line = null;	// moderately safe default value...
	String cteheader = ( String)headers.get( "content-transfer-encoding");
	char cte = CTE_7BIT;	// RFC 1521 default
	String name = ( String)headers.get( "name");

	if ( name == null)
	    {
		name = "unknown" + new Integer( anon ++).toString();
				// if a name wasn't provided, invent
				// one (unlikely).
		headers.put( "name", name);
	    }

	if ( cteheader != null)	// encode content-transfer-encoding
	    {
		cteheader = cteheader.toLowerCase();

		cte = cteheader.charAt( 0);
		
		if ( cte == 'b') cteheader.charAt( 1);
				// else 'base64' and 'binary' would be
				// ambiguous
	    }

	if ( headers.get( "filename") != null)
	    {			// handle file
		line = handleFilePart( headers, cte, boundary);
	    }
	else
	    {
		line = handleInlinePart( headers, cte, boundary);
	    }

	return line;
    }


    /** read a value from the input stream up to the next boundary,
     *  and cache it in my values on the name which is the value of
     *  the "name" header in these headers 
     *
     *  @param headers a hash of the headers of the current part
     *  @param cte the content-transfer-encoding of the current part
     *  @param boundary the boundary of the current part
     *
     *  @return the last line read- which should be the boundary...
     */
    protected String handleInlinePart( Hashtable headers, char cte, 
				       String boundary)
	throws IOException
    {
	String line = null;

	ByteArrayOutputStream out = new ByteArrayOutputStream();

	if ( DEBUG)
	    System.out.println( "-- handling inline part [" +
				headers.get( "name") + "]");
	
	line = readPartData( out, boundary, cte);

	put( ( String)headers.get( "name"), out.toString());
	
	if ( DEBUG)
	    System.out.println( "-- handled inline part, value was [" +
				out.toString() + "]");
	
	return line;
    }

    /** read a value from the input stream up to the next boundary,
     *  save it to a file in my workdir whose name is the value of the
     *  'filename' header in these headers, and cache a File object
     *  describing it in my values on the name which is the value of
     *  the "name" header in these headers
     *
     *  @param headers a hash of the headers of the current part
     *  @param cte the content-transfer-encoding of the current part
     *  @param boundary the boundary of the current part
     *
     *  @return the last line read-which should be the boundary...  
     */
    protected String handleFilePart( Hashtable headers, char cte, 
				     String boundary)
	throws IOException, UploadException
    {
	String line = null;
	String filename = ( String)headers.get( "filename");
	File value = null;

	if ( filename != null)  // which it shouldn't be, but let's be
				// carefull
	    {
		int sep = Math.max( filename.lastIndexOf( '/'), 
				    filename.lastIndexOf( "\\"));

		if ( DEBUG)
		    {
		        System.out.println( "Seeking separator in filename [" +
					    filename + "]; index was " + sep);
		    }

		if ( sep > -1)
		    filename = filename.substring( sep + 1);
				// we don't want any directory
				// structure in the filename!
	    }

	if ( filename == null || filename.length() == 0)
	    filename = "unknown" + new Integer( anon ++).toString();

	// Replace disallowed characters from within the filename
	Enumeration elements = disallowedCharacters.keys();

	while (elements.hasMoreElements()) 
	    {
		Character charToReplace = 
		    (Character) elements.nextElement();
		Character charToReplaceWith = 
		    (Character) disallowedCharacters.get(charToReplace);
		filename = filename.replace(charToReplace.charValue(), 
					    charToReplaceWith.charValue());
	    }

	if ( DEBUG)
	    System.out.println( "-- handling file part [" +
				filename + "]");
	
	value = new File( workDir + File.separator + filename);

	if ( value.exists() && ! allowOverwrite)
	    {
		if ( silentlyRename)
		    {
			for ( int i = 1; value.exists(); i++)
				// generate a unique prefix
			    value = new 
				File( workDir + File.separator + new
				      Integer( i).toString() + "_" + filename);
		    }
		else
		    throw new 
			UploadException( "There is already a file called " +
					 filename + 
					 " in the upload directory");
	    }
	if ( saveUploadedFilesToDisk) 
	    {
		FileOutputStream out = new FileOutputStream( value);

		validChars = 0;	// dirty hacky marker to let us know
				// whether there was any valid content
				// in the file

		line = readPartData( out, boundary, cte);
				// read the part into the file

		out.close();	// make sure it is closed

		if ( validChars > 0)
		    put( ( String)headers.get( "name"), value);
				// cache the file object on the value
				// name - we don't cache empty files!
		else
		    value.delete();	// delete empty file. Is this the
				// right thing to do?
	
	    } 
	else 
	    {
		ByteArrayOutputStream out = new ByteArrayOutputStream();
	    
	    // read the part into the outputstream
		line = readPartData(out, boundary, cte);

		if (out.size() > 0) 
		    {
		// Add the binary data to the hash
			put ((String) headers.get("name"), 
			     new ByteArrayInputStream(out.toByteArray()));
		    }
	    }

	if ( DEBUG)
	    System.out.println( "-- handled file part");
	
	return line;		// and return the supposed boundary
    }


    /** within the name/value stream a name may have multiple
     *  values. If the name has just one value I want to store just
     *  the object because that makes life simpler; however if I find
     *  a second value I want to convert it to a vector of values
     *
     *  @param name the key to store against
     *  @param value the String or File to store
     */
    protected synchronized void put( String name, Object value)
    {
	Object existing = values.get( name);

	if ( existing != null)
	    {
		Vector multival = null;

		if ( existing instanceof Vector)
		    multival = ( Vector)existing;
		else
		    {
			multival = new Vector();

			multival.addElement( existing);
			values.put( name, multival);
		    }

		multival.addElement( value);
	    }
	else
	    values.put( name, value);
    }

    /** extract a multipart boundary from this string, presumed to be
     *  a content-type header. Private to this class; final for
     *  efficiency.
     *
     *  @param cthdr a Content-type header value
     *  @return the multipart boundary string which is embedded in it, if any 
     */
    private final String getBoundary( String cthdr)
    {
	int bend = cthdr.indexOf( "boundary=");
	String boundary = null;

	if ( bend > -1)
	    boundary = "--" + 
		cthdr.substring( bend + 
				 9); // length of string 'boundary='

	return boundary;
    }


    /** read the next part from my input stream, considered an an
     *  ASCII stream, and write it to this output stream. Omit (do not
     *  write) trailing CR/LF sequence; return the boundary found on
     *  the stream if any. This version is a total rewrite of the
     *  version in 1.0.4 and earlier; the objective is to make it
     *  easier to understand, and, consequently, debug.
     *
     *  @param out the OutputStream to write to
     *  @param boundary the boundary to read up to
     *  @param cte the content-type-encoding used in the current
     *  part. Currently not used. A later version of this method may
     *  read-and-decode 
     *  @return the boundary found on the input stream
     */
    private String readPartData( OutputStream out, 
				 String boundary, char cte) throws IOException
    {
	final int STATE_READING = 0; // Normal state where we're just
				     // zipping along copying to output
	final int STATE_EOL = 1;     // We've seen an EOL and have
				     // started buffering, but haven't
				     // seen any of the boundary
	final int STATE_HYPHEN_1 = 3;// We've seen a newline followed
				     // by a hyphen - this is probably
				     // a boundary
	final int STATE_HYPHEN_2 = 4;// We've seen a newline followed
				     // by more than one hyphen - this
				     // is probably a boundary
	final int STATE_COMPARING = 5; // We think we've got a
				       // boundary, and are checking
				       // along it
	final int STATE_DONE = -1;   // We've done.

	final int hyphen = 45;	// ASCII hyphen
	
	ByteArrayOutputStream bbuf = null;

	int boundstart = 0;	// where we thought we saw the
				// start of the boundary

	int state = STATE_READING;
				// the state we're in; initially,
				// we're reading

	String value = null;	// The value we're going to
				// return. Moderately safe default...

	int ch = -1;		// the next character read from the stream

	while ( state != STATE_DONE)
	    {
		ch = in.read();	// read the next character...
		read ++;	// and increment the characters read counter

		switch ( state)
		    {
		    case STATE_READING:
			switch ( ch)
			    {
			    case 13:
				// a CR by itself is an EOL condition
				// (UN*X), but a CR followed by an LF
				// is the same EOL condition (DOS &
				// others); we start a new buffer
				bbuf = new ByteArrayOutputStream();
				bbuf.write(ch);	
				int chnext = in.read();
				if ( chnext == 10)
				    {
					bbuf.write( chnext);
					read ++;
				    }
				else
				    {
					in.unread( chnext);
				    }
				state = STATE_EOL;
				break;
			    case 10:
				// an LF by itself is an EOL condition
				// (Mac OS); we start a new buffer
				bbuf = new ByteArrayOutputStream();
				bbuf.write(ch);
				state = STATE_EOL;
				break;
			    case -1:
				// read off the end of input -
				// shouldn't happen
				state = STATE_DONE;
			    default:
				validChars ++;
				out.write( ch);
				break;
			    }
			break;
		    case STATE_EOL:
			switch ( ch)
			    {
			    case hyphen:
				bbuf.write( ch);
				state = STATE_HYPHEN_1;
				break;
			    case -1:
				// read off the end of input -
				// shouldn't happen
				bbuf.writeTo(out);
				state = STATE_DONE;
				break;
			    default:
				// read a character which wasn't a new
				// line or a hyphen - we're back to
				// ordinary reading
				bbuf.write( ch);
				bbuf.writeTo(out);
				state = STATE_READING;
				break;
			    }
			break;
		    case STATE_HYPHEN_1:
			boundstart = read - 1; 
				// we start counting the boundary
				// from the first hyphen
			switch ( ch)
			    {
			    case hyphen:
				// a hyphen followed by another hyphen
				// suggests a boundary
				bbuf.write( ch);
				state = STATE_HYPHEN_2;
				break;
			    case -1:
				// read off the end of input -
				// shouldn't happen
				bbuf.writeTo(out);
				state = STATE_DONE;
				break;
			    default:
				// read a character which wasn't a
				// hyphen - we're back to ordinary
				// reading
				bbuf.write( ch);
				bbuf.writeTo(out);
				state = STATE_READING;
				break;
			    }
			break;
		    case STATE_HYPHEN_2:
			switch ( ch)
			    {
			    case hyphen:
				// arbitrary number of hyphens in a boundary
				bbuf.write( ch);
				break;
			    case -1:
				// read off the end of input -
				// shouldn't happen
				bbuf.writeTo(out);
				state = STATE_DONE;
				break;
			    default:
				// start to compare against boundary
				bbuf.write( ch);
				state = STATE_COMPARING;
				break;
			    }
			break;
		    case STATE_COMPARING:
			{
			    int cursor = read - boundstart;
				// cursor into boundary

			    bbuf.write( ch);
			    if ( ch == boundary.charAt( cursor))
				{
				    if ( ( cursor + 1) == boundary.length())
					{
					    state = STATE_DONE;

					    value = boundary + readLine();
				// swallow anything remaining on the line
					}
				}
			    else if ( ch == -1)
				{
				// read off end of input - shouldn't happen
				    state = STATE_DONE;
				}
			    else
				{
				    bbuf.writeTo(out);
				    state = STATE_READING;
				}
			}
			break;
		    }
	    }
	return value;
    }


    /** read the next line from my input stream and return it as a
     *  String, assuming 7-bit ASCII encoding.
     *
     *  @return a String representation of my next input line */
    private String readLine()
	throws IOException
    {
	return readLine( CTE_7BIT);
    }

    /** read the next line from my input stream and return it as a String
     *
     *  @param cte the content-type-encoding used in the current
     *  part. Currently not used. A later version of this method may
     *  read-and-decode 
     *
     *  @return a String representation of my next input line */
    private String readLine( char cte)
	throws IOException
    {
	StringBuffer sbuf = new StringBuffer();
	String result = null;	// safe default
	int nread; 


	/* OK, what's going on here? We want
	 * to read at least one bufferfull from the input. If we
	 * completely fill the buffer, we want to read another
	 * bufferfull, and so on until we don't fill the buffer */
	do
	    {
		nread = readLine( buf, 0, buf.length, cte);

		if ( nread != -1)
		    {
			sbuf.append( new String( buf, 0, nread, "ISO-8859-1"));
		    }
	    }
	while ( nread == buf.length);

	if ( sbuf.length() > 0)	// got some
	    {
		result = sbuf.toString();

		for ( boolean trimmed = false; 
		      result.length() > 0 && trimmed == false; )
		    {
			switch ( result.charAt( result.length() - 1))
			    {
			    case '\r':
			    case '\n':
				result = 
				    result.substring( 0, result.length() - 1);
				break;
			    default:
				trimmed = true;
			    }
		    }		// trim trailing linefeed
				// sequence. This may not be the best
				// way to do it. Just 'result =
				// sbuf.toString().trim()' might be
				// better. But what if there's
				// significant whitespace at the ends
				// of the string?
	    }

	if ( DEBUG)
	    System.out.println( "-- String readLine returning [" + 
				result + "]; total read = " + read + 
				" out of " + expected + " bytes expected");

	return result;
    }

    /** read a line up to and including a CR/LF line end from my
     *  InputStream into this buffer.  
     *
     *  @param b a byte array to read into
     *  @param off the offset in the buffer at which to start
     *  @param len the maximum number of bytes to read
     *  @param cte the content-type-encoding used in the current
     *  part. Currently not used. A later version of this method may
     *  read-and-decode 
     *
     *  @return the number of bytes read
     */
    public int readLine( byte b[], int off, int len, char cte)
	throws IOException
    {
	int result = 0;

	final int STATE_DFLT = -1;
	final int STATE_CR = 13;
	final int STATE_EOL = 0;

	int state = STATE_DFLT;

	int ch = -2;		// char read: initialise to impossible value


	while ( state != STATE_EOL && 
		result < len)
	    {
		ch = in.read();	// read a byte from the stream

		b[ result + off] = ( byte)ch;
				// store the byte read
		result ++;	// increment counter for this line
		read ++;	// and for total read

		switch ( state)
		    {
		    case STATE_CR:
			switch ( ch)
			    {
			    case -1:
			    case 10:
				state = STATE_EOL;
				break;
			    case 13:
				state = STATE_CR;
				break;
			    default:
				state = STATE_DFLT;
				break;
			    }
			break;
		    default:
			switch ( ch)
			    {
			    case -1:
				state = STATE_EOL;
				break;
			    case 13:
				state = STATE_CR;
				break;
			    default:
				state = STATE_DFLT;
				break;
			    }
			break;
		    }
	    }

	if ( b[ 0] == -1)	// first thing we read was an EOF
	    result = 0;

	return result;
    }
}
