source/net/fortuna/ical4j/data/CalendarParserImpl.java

   1 /**
   2  * Copyright (c) 2009, Ben Fortuna
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  *  o Redistributions of source code must retain the above copyright
  10  * notice, this list of conditions and the following disclaimer.
  11  *
  12  *  o Redistributions in binary form must reproduce the above copyright
  13  * notice, this list of conditions and the following disclaimer in the
  14  * documentation and/or other materials provided with the distribution.
  15  *
  16  *  o Neither the name of Ben Fortuna nor the names of any other contributors
  17  * may be used to endorse or promote products derived from this software
  18  * without specific prior written permission.
  19  *
  20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  24  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  25  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  26  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  27  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  28  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  29  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32 package net.fortuna.ical4j.data;
  33
  34 import java.io.IOException;
  35 import java.io.InputStream;
  36 import java.io.InputStreamReader;
  37 import java.io.Reader;
  38 import java.io.StreamTokenizer;
  39 import java.net.URISyntaxException;
  40 import java.text.MessageFormat;
  41 import java.text.ParseException;
  42
  43 import net.fortuna.ical4j.model.Calendar;
  44 import net.fortuna.ical4j.model.Component;
  45
  46 import org.apache.commons.logging.Log;
  47 import org.apache.commons.logging.LogFactory;
  48
  49 /**
  50  * <pre>
  51  * $Id: CalendarParserImpl.java,v 1.39 2010/02/06 03:16:44 fortuna Exp $
  52  *
  53  *  Created [Nov 5, 2004]
  54  * </pre>
  55  *
  56  * The default implementation of a calendar parser.
  57  * @author Ben Fortuna
  58  */
  59 public class CalendarParserImpl implements CalendarParser {
  60
  61     private static final int WORD_CHAR_START = 32;
  62
  63     private static final int WORD_CHAR_END = 255;
  64
  65     private static final int WHITESPACE_CHAR_START = 0;
  66
  67     private static final int WHITESPACE_CHAR_END = 20;
  68
  69     private static final String UNEXPECTED_TOKEN_MESSAGE = "Expected [{0}], read [{1}]";
  70
  71     private Log log = LogFactory.getLog(CalendarParserImpl.class);
  72
  73     private final ComponentListParser componentListParser = new ComponentListParser();
  74
  75     private final ComponentParser componentParser = new ComponentParser();
  76
  77     private final PropertyListParser propertyListParser = new PropertyListParser();
  78
  79     private final PropertyParser propertyParser = new PropertyParser();
  80
  81     private final ParameterListParser paramListParser = new ParameterListParser();
  82
  83     private final ParameterParser paramParser = new ParameterParser();
  84
  85     /**
  86      * {@inheritDoc}
  87      */
  88     public final void parse(final InputStream in, final ContentHandler handler)
  89             throws IOException, ParserException {
  90         parse(new InputStreamReader(in), handler);
  91     }
  92
  93     /**
  94      * {@inheritDoc}
  95      */
  96     public final void parse(final Reader in, final ContentHandler handler)
  97             throws IOException, ParserException {
  98
  99         final StreamTokenizer tokeniser = new StreamTokenizer(in);
 100         try {
 101             tokeniser.resetSyntax();
 102             tokeniser.wordChars(WORD_CHAR_START, WORD_CHAR_END);
 103             tokeniser.whitespaceChars(WHITESPACE_CHAR_START,
 104                     WHITESPACE_CHAR_END);
 105             tokeniser.ordinaryChar(':');
 106             tokeniser.ordinaryChar(';');
 107             tokeniser.ordinaryChar('=');
 108             tokeniser.ordinaryChar('\t');
 109             tokeniser.eolIsSignificant(true);
 110             tokeniser.whitespaceChars(0, 0);
 111             tokeniser.quoteChar('"');
 112
 113             // BEGIN:VCALENDAR
 114             assertToken(tokeniser, in, Calendar.BEGIN);
 115
 116             assertToken(tokeniser, in, ':');
 117
 118             assertToken(tokeniser, in, Calendar.VCALENDAR, true);
 119
 120             assertToken(tokeniser, in, StreamTokenizer.TT_EOL);
 121
 122             handler.startCalendar();
 123
 124             // parse calendar properties..
 125             propertyListParser.parse(tokeniser, in, handler);
 126
 127             // parse components..
 128             componentListParser.parse(tokeniser, in, handler);
 129
 130             // END:VCALENDAR
 131             // assertToken(tokeniser,Calendar.END);
 132
 133             assertToken(tokeniser, in, ':');
 134
 135             assertToken(tokeniser, in, Calendar.VCALENDAR, true);
 136
 137             handler.endCalendar();
 138         }
 139         catch (Exception e) {
 140
 141             if (e instanceof IOException) {
 142                 throw (IOException) e;
 143             }
 144             if (e instanceof ParserException) {
 145                 throw (ParserException) e;
 146             }
 147             else {
 148                 throw new ParserException(e.getMessage(), getLineNumber(tokeniser, in), e);
 149             }
 150         }
 151     }
 152
 153     /**
 154      * Parses an iCalendar property list from the specified stream tokeniser.
 155      * @param tokeniser
 156      * @throws IOException
 157      * @throws ParseException
 158      * @throws URISyntaxException
 159      * @throws URISyntaxException
 160      * @throws ParserException
 161      */
 162     private class PropertyListParser {
 163
 164         public void parse(final StreamTokenizer tokeniser, Reader in,
 165                 final ContentHandler handler) throws IOException, ParseException,
 166                 URISyntaxException, ParserException {
 167
 168             assertToken(tokeniser, in, StreamTokenizer.TT_WORD);
 169
 170             while (/*
 171                      * !Component.BEGIN.equals(tokeniser.sval) &&
 172                      */!Component.END.equals(tokeniser.sval)) {
 173                 // check for timezones observances or vevent/vtodo alarms..
 174                 if (Component.BEGIN.equals(tokeniser.sval)) {
 175                     componentParser.parse(tokeniser, in, handler);
 176                 }
 177                 else {
 178                     propertyParser.parse(tokeniser, in, handler);
 179                 }
 180                 absorbWhitespace(tokeniser);
 181                 // assertToken(tokeniser, StreamTokenizer.TT_WORD);
 182             }
 183         }
 184     }
 185
 186     /**
 187      * Parses an iCalendar property from the specified stream tokeniser.
 188      * @param tokeniser
 189      * @throws IOException
 190      * @throws ParserException
 191      * @throws URISyntaxException
 192      * @throws ParseException
 193      */
 194     private class PropertyParser {
 195
 196         private static final String PARSE_DEBUG_MESSAGE = "Property [{0}]";
 197
 198         private static final String PARSE_EXCEPTION_MESSAGE = "Property [{0}]";
 199
 200         private void parse(final StreamTokenizer tokeniser, Reader in,
 201                 final ContentHandler handler) throws IOException, ParserException,
 202                 URISyntaxException, ParseException {
 203
 204             final String name = tokeniser.sval;
 205
 206             // debugging..
 207             if (log.isDebugEnabled()) {
 208                 log.debug(MessageFormat.format(PARSE_DEBUG_MESSAGE, new Object[] {name}));
 209             }
 210
 211             handler.startProperty(name);
 212
 213             paramListParser.parse(tokeniser, in, handler);
 214
 215             // it appears that control tokens (ie. ':') are allowed
 216             // after the first instance on a line is used.. as such
 217             // we must continue appending to value until EOL is
 218             // reached..
 219             // assertToken(tokeniser, StreamTokenizer.TT_WORD);
 220
 221             // String value = tokeniser.sval;
 222             final StringBuffer value = new StringBuffer();
 223
 224             // assertToken(tokeniser,StreamTokenizer.TT_EOL);
 225
 226             // DQUOTE is ordinary char for property value
 227             // From sec 4.3.11 of rfc-2445:
 228             // text       = *(TSAFE-CHAR / ":" / DQUOTE / ESCAPED-CHAR)
 229             //
 230             tokeniser.ordinaryChar('"');
 231             int nextToken = tokeniser.nextToken();
 232
 233             while (nextToken != StreamTokenizer.TT_EOL
 234                     && nextToken != StreamTokenizer.TT_EOF) {
 235
 236                 if (tokeniser.ttype == StreamTokenizer.TT_WORD) {
 237                     value.append(tokeniser.sval);
 238                 }
 239                 else {
 240                     value.append((char) tokeniser.ttype);
 241                 }
 242
 243                 nextToken = tokeniser.nextToken();
 244             }
 245
 246             // reset DQUOTE to be quote char
 247             tokeniser.quoteChar('"');
 248
 249             if (nextToken == StreamTokenizer.TT_EOF) {
 250                 throw new ParserException("Unexpected end of file",
 251                         getLineNumber(tokeniser, in));
 252             }
 253
 254             try {
 255                 handler.propertyValue(value.toString());
 256             }
 257             catch (ParseException e) {
 258                 final ParseException eNew = new ParseException("[" + name + "] "
 259                         + e.getMessage(), e.getErrorOffset());
 260                 eNew.initCause(e);
 261                 throw eNew;
 262             }
 263
 264             handler.endProperty(name);
 265
 266         }
 267     }
 268
 269     /**
 270      * Parses a list of iCalendar parameters by parsing the specified stream tokeniser.
 271      * @param tokeniser
 272      * @throws IOException
 273      * @throws ParserException
 274      * @throws URISyntaxException
 275      */
 276     private class ParameterListParser {
 277
 278         public void parse(final StreamTokenizer tokeniser, Reader in,
 279                 final ContentHandler handler) throws IOException, ParserException,
 280                 URISyntaxException {
 281
 282             while (tokeniser.nextToken() == ';') {
 283                 paramParser.parse(tokeniser, in, handler);
 284             }
 285         }
 286     }
 287
 288     /**
 289      * @param tokeniser
 290      * @param handler
 291      * @throws IOException
 292      * @throws ParserException
 293      * @throws URISyntaxException
 294      */
 295     private class ParameterParser {
 296
 297         private void parse(final StreamTokenizer tokeniser, Reader in,
 298                 final ContentHandler handler) throws IOException, ParserException,
 299                 URISyntaxException {
 300
 301             assertToken(tokeniser, in, StreamTokenizer.TT_WORD);
 302
 303             final String paramName = tokeniser.sval;
 304
 305             // debugging..
 306             if (log.isDebugEnabled()) {
 307                 log.debug("Parameter [" + paramName + "]");
 308             }
 309
 310             assertToken(tokeniser, in, '=');
 311
 312             final StringBuffer paramValue = new StringBuffer();
 313
 314             // preserve quote chars..
 315             if (tokeniser.nextToken() == '"') {
 316                 paramValue.append('"');
 317                 paramValue.append(tokeniser.sval);
 318                 paramValue.append('"');
 319             }
 320             else {
 321                 paramValue.append(tokeniser.sval);
 322             }
 323
 324             try {
 325                 handler.parameter(paramName, paramValue.toString());
 326             }
 327             catch (ClassCastException cce) {
 328                 throw new ParserException("Error parsing parameter", getLineNumber(tokeniser, in), cce);
 329             }
 330         }
 331     }
 332
 333     /**
 334      * Parses an iCalendar component list from the specified stream tokeniser.
 335      * @param tokeniser
 336      * @throws IOException
 337      * @throws ParseException
 338      * @throws URISyntaxException
 339      * @throws ParserException
 340      */
 341     private class ComponentListParser {
 342
 343         private void parse(final StreamTokenizer tokeniser, Reader in,
 344                 final ContentHandler handler) throws IOException, ParseException,
 345                 URISyntaxException, ParserException {
 346
 347             while (Component.BEGIN.equals(tokeniser.sval)) {
 348                 componentParser.parse(tokeniser, in, handler);
 349                 absorbWhitespace(tokeniser);
 350                 // assertToken(tokeniser, StreamTokenizer.TT_WORD);
 351             }
 352         }
 353     }
 354
 355     /**
 356      * Parses an iCalendar component from the specified stream tokeniser.
 357      * @param tokeniser
 358      * @throws IOException
 359      * @throws ParseException
 360      * @throws URISyntaxException
 361      * @throws ParserException
 362      */
 363     private class ComponentParser {
 364
 365         private void parse(final StreamTokenizer tokeniser, Reader in,
 366                 final ContentHandler handler) throws IOException, ParseException,
 367                 URISyntaxException, ParserException {
 368
 369             assertToken(tokeniser, in, ':');
 370
 371             assertToken(tokeniser, in, StreamTokenizer.TT_WORD);
 372
 373             final String name = tokeniser.sval;
 374
 375             handler.startComponent(name);
 376
 377             assertToken(tokeniser, in, StreamTokenizer.TT_EOL);
 378
 379             propertyListParser.parse(tokeniser, in, handler);
 380
 381             /*
 382              * // a special case for VTIMEZONE component which contains
 383              * // sub-components..
 384              * if (Component.VTIMEZONE.equals(name)) {
 385              *     parseComponentList(tokeniser, handler);
 386              * }
 387              * // VEVENT/VTODO components may optionally have embedded VALARM
 388              * // components..
 389              * else if ((Component.VEVENT.equals(name) || Component.VTODO.equals(name))
 390              *         &amp;&amp; Component.BEGIN.equals(tokeniser.sval)) {
 391              *     parseComponentList(tokeniser, handler);
 392              * }
 393              */
 394
 395             assertToken(tokeniser, in, ':');
 396
 397             assertToken(tokeniser, in, name);
 398
 399             assertToken(tokeniser, in, StreamTokenizer.TT_EOL);
 400
 401             handler.endComponent(name);
 402         }
 403     }
 404
 405     /**
 406      * Asserts that the next token in the stream matches the specified token.
 407      * @param tokeniser stream tokeniser to perform assertion on
 408      * @param token expected token
 409      * @throws IOException when unable to read from stream
 410      * @throws ParserException when next token in the stream does not match the expected token
 411      */
 412     private void assertToken(final StreamTokenizer tokeniser, Reader in, final int token)
 413             throws IOException, ParserException {
 414
 415         if (tokeniser.nextToken() != token) {
 416             throw new ParserException(MessageFormat.format(UNEXPECTED_TOKEN_MESSAGE, new Object[] {
 417                     new Integer(token), new Integer(tokeniser.ttype),
 418             }), getLineNumber(tokeniser, in));
 419         }
 420
 421         if (log.isDebugEnabled()) {
 422             log.debug("[" + token + "]");
 423         }
 424     }
 425
 426     /**
 427      * Asserts that the next token in the stream matches the specified token. This method is case-sensitive.
 428      * @param tokeniser
 429      * @param token
 430      * @throws IOException
 431      * @throws ParserException
 432      */
 433     private void assertToken(final StreamTokenizer tokeniser, Reader in, final String token)
 434             throws IOException, ParserException {
 435         assertToken(tokeniser, in, token, false);
 436     }
 437
 438     /**
 439      * Asserts that the next token in the stream matches the specified token.
 440      * @param tokeniser stream tokeniser to perform assertion on
 441      * @param token expected token
 442      * @throws IOException when unable to read from stream
 443      * @throws ParserException when next token in the stream does not match the expected token
 444      */
 445     private void assertToken(final StreamTokenizer tokeniser, Reader in,
 446             final String token, final boolean ignoreCase) throws IOException,
 447             ParserException {
 448
 449         // ensure next token is a word token..
 450         assertToken(tokeniser, in, StreamTokenizer.TT_WORD);
 451
 452         if (ignoreCase) {
 453             if (!token.equalsIgnoreCase(tokeniser.sval)) {
 454                 throw new ParserException(MessageFormat.format(UNEXPECTED_TOKEN_MESSAGE, new Object[] {
 455                         token, tokeniser.sval,
 456                 }), getLineNumber(tokeniser, in));
 457             }
 458         }
 459         else if (!token.equals(tokeniser.sval)) {
 460             throw new ParserException(MessageFormat.format(UNEXPECTED_TOKEN_MESSAGE, new Object[] {
 461                     token, tokeniser.sval,
 462             }), getLineNumber(tokeniser, in));
 463         }
 464
 465         if (log.isDebugEnabled()) {
 466             log.debug("[" + token + "]");
 467         }
 468     }
 469
 470     /**
 471      * Absorbs extraneous newlines.
 472      * @param tokeniser
 473      * @throws IOException
 474      */
 475     private void absorbWhitespace(final StreamTokenizer tokeniser) throws IOException {
 476         // HACK: absorb extraneous whitespace between components (KOrganizer)..
 477         while (tokeniser.nextToken() == StreamTokenizer.TT_EOL) {
 478             if (log.isTraceEnabled()) {
 479                 log.trace("Absorbing extra whitespace..");
 480             }
 481         }
 482         if (log.isTraceEnabled()) {
 483             log.trace("Aborting: absorbing extra whitespace complete");
 484         }
 485     }
 486
 487     /**
 488      * @param tokeniser
 489      * @param in
 490      * @return
 491      */
 492     private int getLineNumber(StreamTokenizer tokeniser, Reader in) {
 493         int line = tokeniser.lineno();
 494         if (tokeniser.ttype == StreamTokenizer.TT_EOL) {
 495             line -= 1;
 496         }
 497         if (in instanceof UnfoldingReader) {
 498             // need to take unfolded lines into account
 499             final int unfolded = ((UnfoldingReader) in).getLinesUnfolded();
 500             line += unfolded;
 501         }
 502         return line;
 503     }
 504 }