Coverage report

  %line %branch
org.apache.turbine.util.parser.DataStreamParser
96% 
100% 

 1  
 package org.apache.turbine.util.parser;
 2  
 
 3  
 /*
 4  
  * Copyright 2001-2005 The Apache Software Foundation.
 5  
  *
 6  
  * Licensed under the Apache License, Version 2.0 (the "License")
 7  
  * you may not use this file except in compliance with the License.
 8  
  * You may obtain a copy of the License at
 9  
  *
 10  
  *     http://www.apache.org/licenses/LICENSE-2.0
 11  
  *
 12  
  * Unless required by applicable law or agreed to in writing, software
 13  
  * distributed under the License is distributed on an "AS IS" BASIS,
 14  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15  
  * See the License for the specific language governing permissions and
 16  
  * limitations under the License.
 17  
  */
 18  
 
 19  
 import java.io.BufferedReader;
 20  
 import java.io.IOException;
 21  
 import java.io.InputStreamReader;
 22  
 import java.io.Reader;
 23  
 import java.io.StreamTokenizer;
 24  
 
 25  
 import java.util.ArrayList;
 26  
 import java.util.Collections;
 27  
 import java.util.Iterator;
 28  
 import java.util.List;
 29  
 import java.util.NoSuchElementException;
 30  
 
 31  
 import org.apache.commons.lang.exception.NestableRuntimeException;
 32  
 
 33  
 /**
 34  
  * DataStreamParser is used to parse a stream with a fixed format and
 35  
  * generate ValueParser objects which can be used to extract the values
 36  
  * in the desired type.
 37  
  *
 38  
  * <p>The class itself is abstract - a concrete subclass which implements
 39  
  * the initTokenizer method such as CSVParser or TSVParser is required
 40  
  * to use the functionality.
 41  
  *
 42  
  * <p>The class implements the java.util.Iterator interface for convenience.
 43  
  * This allows simple use in a Velocity template for example:
 44  
  *
 45  
  * <pre>
 46  
  * #foreach ($row in $datastream)
 47  
  *   Name: $row.Name
 48  
  *   Description: $row.Description
 49  
  * #end
 50  
  * </pre>
 51  
  *
 52  
  * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
 53  
  * @author <a href="mailto:martin@mvdb.net">Martin van den Bemt</a>
 54  
  * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
 55  
  * @version $Id: DataStreamParser.java 280284 2005-09-12 07:57:42Z henning $
 56  
  */
 57  
 public abstract class DataStreamParser implements Iterator
 58  
 {
 59  
     /**
 60  
      * The constant for empty fields
 61  
      */
 62  
     protected static final String EMPTYFIELDNAME = "UNKNOWNFIELD";
 63  
 
 64  
     /**
 65  
      * The list of column names.
 66  
      */
 67  56
     private List columnNames = Collections.EMPTY_LIST;
 68  
 
 69  
     /**
 70  
      * The stream tokenizer for reading values from the input reader.
 71  
      */
 72  
     private StreamTokenizer tokenizer;
 73  
 
 74  
     /**
 75  
      * The parameter parser holding the values of columns for the current line.
 76  
      */
 77  
     private ValueParser lineValues;
 78  
 
 79  
     /**
 80  
      * Indicates whether or not the tokenizer has read anything yet.
 81  
      */
 82  56
     private boolean neverRead = true;
 83  
 
 84  
     /**
 85  
      * The character encoding of the input
 86  
      */
 87  
     private String characterEncoding;
 88  
 
 89  
     /**
 90  
      * The fieldseperator, which can be almost any char
 91  
      */
 92  
     private char fieldSeparator;
 93  
 
 94  
     /**
 95  
      * Create a new DataStreamParser instance. Requires a Reader to read the
 96  
      * comma-separated values from, a list of column names and a
 97  
      * character encoding.
 98  
      *
 99  
      * @param in the input reader.
 100  
      * @param columnNames a list of column names.
 101  
      * @param characterEncoding the character encoding of the input.
 102  
      */
 103  
     public DataStreamParser(Reader in, List columnNames,
 104  
                             String characterEncoding)
 105  56
     {
 106  56
         setColumnNames(columnNames);
 107  
 
 108  56
         this.characterEncoding = characterEncoding;
 109  
 
 110  56
         if (this.characterEncoding == null)
 111  
         {
 112  6
             if (in instanceof InputStreamReader)
 113  
             {
 114  2
                 this.characterEncoding = ((InputStreamReader) in).getEncoding();
 115  
             }
 116  
 
 117  6
             if (this.characterEncoding == null)
 118  
             {
 119  
                 // try and get the characterEncoding from the reader
 120  4
                 this.characterEncoding = "US-ASCII";
 121  
             }
 122  
         }
 123  
 
 124  56
         tokenizer = new StreamTokenizer(class="keyword">new BufferedReader(in));
 125  56
         initTokenizer(tokenizer);
 126  56
     }
 127  
 
 128  
     /**
 129  
      * Initialize the StreamTokenizer instance used to read the lines
 130  
      * from the input reader. This must be implemented in subclasses to
 131  
      * set up other tokenizing properties.
 132  
      *
 133  
      * @param tokenizer the tokenizer to adjust
 134  
      */
 135  
     protected void initTokenizer(StreamTokenizer tokenizer)
 136  
     {
 137  56
         tokenizer.resetSyntax();
 138  
 
 139  
         // leave out the comma sign (,), we need it for empty fields
 140  56
         tokenizer.wordChars(' ', Character.MAX_VALUE);
 141  
 
 142  
         // and  set the quote mark as the quoting character
 143  56
         tokenizer.quoteChar('"');
 144  
 
 145  
         // and finally say that end of line is significant
 146  56
         tokenizer.eolIsSignificant(true);
 147  56
     }
 148  
 
 149  
     /**
 150  
      * This method must be called to setup the field seperator
 151  
      * @param fieldSeparator the char which separates the fields
 152  
      */
 153  
     public void setFieldSeparator(char fieldSeparator)
 154  
     {
 155  56
         this.fieldSeparator = fieldSeparator;
 156  
         // make this field also an ordinary char by default.
 157  56
         tokenizer.ordinaryChar(fieldSeparator);
 158  56
     }
 159  
 
 160  
     /**
 161  
      * Set the list of column names explicitly.
 162  
      *
 163  
      * @param columnNames A list of column names.
 164  
      */
 165  
     public void setColumnNames(List columnNames)
 166  
     {
 167  76
         if (columnNames != null)
 168  
         {
 169  56
             this.columnNames = columnNames;
 170  
         }
 171  76
     }
 172  
 
 173  
     /**
 174  
      * get the list of column names.
 175  
      *
 176  
      */
 177  
     public List getColumnNames()
 178  
     {
 179  42
         return columnNames;
 180  
     }
 181  
 
 182  
     /**
 183  
      * Read the list of column names from the input reader using the
 184  
      * tokenizer. If fieldNames are empty, we use the current fieldNumber
 185  
      * + the EMPTYFIELDNAME to make one up.
 186  
      *
 187  
      * @exception IOException an IOException occurred.
 188  
      */
 189  
     public void readColumnNames()
 190  
             throws IOException
 191  
     {
 192  16
         List columnNames = new ArrayList();
 193  16
         int fieldCounter = 0;
 194  
 
 195  16
         if (hasNextRow())
 196  
         {
 197  16
             String colName = null;
 198  16
             boolean foundEol = false;
 199  
 
 200  306
             while(!foundEol)
 201  
             {
 202  282
                 tokenizer.nextToken();
 203  
 
 204  282
                 if (tokenizer.ttype == '"'
 205  
                         || tokenizer.ttype == StreamTokenizer.TT_WORD)
 206  
                 {
 207  
                     // tokenizer.ttype is either '"' or TT_WORD
 208  134
                     colName = tokenizer.sval;
 209  
                 }
 210  
                 else
 211  
                 {
 212  
                     // fieldSeparator, EOL or EOF
 213  148
                     fieldCounter++;
 214  
 
 215  148
                     if (colName == null)
 216  
                     {
 217  14
                         colName = EMPTYFIELDNAME + fieldCounter;
 218  
                     }
 219  
 
 220  148
                     columnNames.add(colName);
 221  148
                     colName = null;
 222  
                 }
 223  
 
 224  
                 // EOL and EOF are checked independently from existing fields.
 225  282
                 if (tokenizer.ttype == StreamTokenizer.TT_EOL)
 226  
                 {
 227  12
                     foundEol = true;
 228  
                 }
 229  270
                 else if (tokenizer.ttype == StreamTokenizer.TT_EOF)
 230  
                 {
 231  
                     // Keep this token in the tokenizer for hasNext()
 232  4
                     tokenizer.pushBack();
 233  4
                     foundEol = true;
 234  
                 }
 235  
             }
 236  
 
 237  16
             setColumnNames(columnNames);
 238  
         }
 239  16
     }
 240  
 
 241  
     /**
 242  
      * Determine whether a further row of values exists in the input.
 243  
      *
 244  
      * @return true if the input has more rows.
 245  
      * @exception IOException an IOException occurred.
 246  
      */
 247  
     public boolean hasNextRow()
 248  
             throws IOException
 249  
     {
 250  
         // check for end of line ensures that an empty last line doesn't
 251  
         // give a false positive for hasNextRow
 252  232
         if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
 253  
         {
 254  134
             tokenizer.nextToken();
 255  134
             tokenizer.pushBack();
 256  134
             neverRead = false;
 257  
         }
 258  232
         return tokenizer.ttype != StreamTokenizer.TT_EOF;
 259  
     }
 260  
 
 261  
     /**
 262  
      * Returns a ValueParser object containing the next row of values.
 263  
      *
 264  
      * @return a ValueParser object.
 265  
      * @exception IOException an IOException occurred.
 266  
      * @exception NoSuchElementException there are no more rows in the input.
 267  
      */
 268  
     public ValueParser nextRow()
 269  
             throws IOException, NoSuchElementException
 270  
     {
 271  74
         if (!hasNextRow())
 272  
         {
 273  2
             throw new NoSuchElementException();
 274  
         }
 275  
 
 276  72
         if (lineValues == null)
 277  
         {
 278  42
             lineValues = new BaseValueParser(characterEncoding);
 279  
         }
 280  
         else
 281  
         {
 282  30
             lineValues.clear();
 283  
         }
 284  
 
 285  72
         Iterator it = columnNames.iterator();
 286  
 
 287  72
         String currVal = "";
 288  72
         String colName = null;
 289  
 
 290  72
         boolean foundEol = false;
 291  1398
         while (!foundEol || it.hasNext())
 292  
         {
 293  1290
             if (!foundEol)
 294  
             {
 295  1188
                 tokenizer.nextToken();
 296  
             }
 297  
 
 298  1290
             if (colName == null && it.hasNext())
 299  
             {
 300  592
                 colName = String.valueOf(it.next());
 301  
             }
 302  
 
 303  1290
             if (tokenizer.ttype == '"'
 304  
                     || tokenizer.ttype == StreamTokenizer.TT_WORD)
 305  
             {
 306  
                 // tokenizer.ttype is either '"' or TT_WORD
 307  588
                 currVal = tokenizer.sval;
 308  
             }
 309  
             else
 310  
             {
 311  
                 // fieldSeparator, EOL or EOF
 312  702
                 lineValues.add(colName, currVal);
 313  702
                 colName = null;
 314  702
                 currVal = "";
 315  
             }
 316  
 
 317  
             // EOL and EOF are checked independently from existing fields.
 318  1290
             if (tokenizer.ttype == StreamTokenizer.TT_EOL)
 319  
             {
 320  168
                 foundEol = true;
 321  
             }
 322  1122
             else if (tokenizer.ttype == StreamTokenizer.TT_EOF)
 323  
             {
 324  
                 // Keep this token in the tokenizer for hasNext()
 325  6
                 tokenizer.pushBack();
 326  6
                 foundEol = true;
 327  
             }
 328  
         }
 329  
 
 330  72
         return lineValues;
 331  
     }
 332  
 
 333  
     /**
 334  
      * Determine whether a further row of values exists in the input.
 335  
      *
 336  
      * @return true if the input has more rows.
 337  
      */
 338  
     public boolean hasNext()
 339  
     {
 340  142
         boolean hasNext = false;
 341  
 
 342  
         try
 343  
         {
 344  142
             hasNext = hasNextRow();
 345  71
         }
 346  0
         catch (IOException e)
 347  
         {
 348  0
             throw new NestableRuntimeException(e);
 349  71
         }
 350  
 
 351  142
         return hasNext;
 352  
     }
 353  
 
 354  
     /**
 355  
      * Returns a ValueParser object containing the next row of values.
 356  
      *
 357  
      * @return a ValueParser object as an Object.
 358  
      * @exception NoSuchElementException there are no more rows in the input
 359  
      *                                   or an IOException occurred.
 360  
      */
 361  
     public Object next()
 362  
             throws NoSuchElementException
 363  
     {
 364  74
         Object nextRow = null;
 365  
 
 366  
         try
 367  
         {
 368  74
             nextRow = nextRow();
 369  36
         }
 370  0
         catch (IOException e)
 371  
         {
 372  0
             throw new NestableRuntimeException(e);
 373  36
         }
 374  
 
 375  72
         return nextRow;
 376  
     }
 377  
 
 378  
     /**
 379  
      * The optional Iterator.remove method is not supported.
 380  
      *
 381  
      * @exception UnsupportedOperationException the operation is not supported.
 382  
      */
 383  
     public void remove()
 384  
             throws UnsupportedOperationException
 385  
     {
 386  2
         throw new UnsupportedOperationException();
 387  
     }
 388  
 }

This report is generated by jcoverage, Maven and Maven JCoverage Plugin.