org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java

   1 /*
   2  * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
   3  *
   4  * All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or
   7  * without modification, are permitted provided that the following
   8  * conditions are met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above
  14  *   copyright notice, this list of conditions and the following
  15  *   disclaimer in the documentation and/or other materials provided
  16  *   with the distribution.
  17  *
  18  * - Neither the name of the Git Development Community nor the
  19  *   names of its contributors may be used to endorse or promote
  20  *   products derived from this software without specific prior
  21  *   written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  24  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  25  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  28  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  30  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  33  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  35  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38 package org.spearce.jgit.util;
  39
  40 import static org.spearce.jgit.lib.ObjectChecker.author;
  41 import static org.spearce.jgit.lib.ObjectChecker.committer;
  42 import static org.spearce.jgit.lib.ObjectChecker.encoding;
  43
  44 import java.nio.ByteBuffer;
  45 import java.nio.charset.CharacterCodingException;
  46 import java.nio.charset.Charset;
  47 import java.nio.charset.CharsetDecoder;
  48 import java.nio.charset.CodingErrorAction;
  49 import java.util.Arrays;
  50
  51 import org.spearce.jgit.lib.Constants;
  52 import org.spearce.jgit.lib.PersonIdent;
  53
  54 /** Handy utility functions to parse raw object contents. */
  55 public final class RawParseUtils {
  56         private static final byte[] digits;
  57
  58         static {
  59                 digits = new byte['9' + 1];
  60                 Arrays.fill(digits, (byte) -1);
  61                 for (char i = '0'; i <= '9'; i++)
  62                         digits[i] = (byte) (i - '0');
  63         }
  64
  65         /**
  66          * Determine if b[ptr] matches src.
  67          *
  68          * @param b
  69          *            the buffer to scan.
  70          * @param ptr
  71          *            first position within b, this should match src[0].
  72          * @param src
  73          *            the buffer to test for equality with b.
  74          * @return ptr + src.length if b[ptr..src.length] == src; else -1.
  75          */
  76         public static final int match(final byte[] b, int ptr, final byte[] src) {
  77                 if (ptr + src.length > b.length)
  78                         return -1;
  79                 for (int i = 0; i < src.length; i++, ptr++)
  80                         if (b[ptr] != src[i])
  81                                 return -1;
  82                 return ptr;
  83         }
  84
  85         private static final byte[] base10byte = { '0', '1', '2', '3', '4', '5',
  86                         '6', '7', '8', '9' };
  87
  88         /**
  89          * Format a base 10 numeric into a temporary buffer.
  90          * <p>
  91          * Formatting is performed backwards. The method starts at offset
  92          * <code>o-1</code> and ends at <code>o-1-digits</code>, where
  93          * <code>digits</code> is the number of positions necessary to store the
  94          * base 10 value.
  95          * <p>
  96          * The argument and return values from this method make it easy to chain
  97          * writing, for example:
  98          * </p>
  99          *
 100          * <pre>
 101          * final byte[] tmp = new byte[64];
 102          * int ptr = tmp.length;
 103          * tmp[--ptr] = '\n';
 104          * ptr = RawParseUtils.formatBase10(tmp, ptr, 32);
 105          * tmp[--ptr] = ' ';
 106          * ptr = RawParseUtils.formatBase10(tmp, ptr, 18);
 107          * tmp[--ptr] = 0;
 108          * final String str = new String(tmp, ptr, tmp.length - ptr);
 109          * </pre>
 110          *
 111          * @param b
 112          *            buffer to write into.
 113          * @param o
 114          *            one offset past the location where writing will begin; writing
 115          *            proceeds towards lower index values.
 116          * @param value
 117          *            the value to store.
 118          * @return the new offset value <code>o</code>. This is the position of
 119          *         the last byte written. Additional writing should start at one
 120          *         position earlier.
 121          */
 122         public static int formatBase10(final byte[] b, int o, int value) {
 123                 if (value == 0) {
 124                         b[--o] = '0';
 125                         return o;
 126                 }
 127                 final boolean isneg = value < 0;
 128                 while (value != 0) {
 129                         b[--o] = base10byte[value % 10];
 130                         value /= 10;
 131                 }
 132                 if (isneg)
 133                         b[--o] = '-';
 134                 return o;
 135         }
 136
 137         /**
 138          * Parse a base 10 numeric from a sequence of ASCII digits into an int.
 139          * <p>
 140          * Digit sequences can begin with an optional run of spaces before the
 141          * sequence, and may start with a '+' or a '-' to indicate sign position.
 142          * Any other characters will cause the method to stop and return the current
 143          * result to the caller.
 144          *
 145          * @param b
 146          *            buffer to scan.
 147          * @param ptr
 148          *            position within buffer to start parsing digits at.
 149          * @param ptrResult
 150          *            optional location to return the new ptr value through. If null
 151          *            the ptr value will be discarded.
 152          * @return the value at this location; 0 if the location is not a valid
 153          *         numeric.
 154          */
 155         public static final int parseBase10(final byte[] b, int ptr,
 156                         final MutableInteger ptrResult) {
 157                 int r = 0;
 158                 int sign = 0;
 159                 try {
 160                         final int sz = b.length;
 161                         while (ptr < sz && b[ptr] == ' ')
 162                                 ptr++;
 163                         if (ptr >= sz)
 164                                 return 0;
 165
 166                         switch (b[ptr]) {
 167                         case '-':
 168                                 sign = -1;
 169                                 ptr++;
 170                                 break;
 171                         case '+':
 172                                 ptr++;
 173                                 break;
 174                         }
 175
 176                         while (ptr < sz) {
 177                                 final byte v = digits[b[ptr]];
 178                                 if (v < 0)
 179                                         break;
 180                                 r = (r * 10) + v;
 181                                 ptr++;
 182                         }
 183                 } catch (ArrayIndexOutOfBoundsException e) {
 184                         // Not a valid digit.
 185                 }
 186                 if (ptrResult != null)
 187                         ptrResult.value = ptr;
 188                 return sign < 0 ? -r : r;
 189         }
 190
 191         /**
 192          * Parse a base 10 numeric from a sequence of ASCII digits into a long.
 193          * <p>
 194          * Digit sequences can begin with an optional run of spaces before the
 195          * sequence, and may start with a '+' or a '-' to indicate sign position.
 196          * Any other characters will cause the method to stop and return the current
 197          * result to the caller.
 198          *
 199          * @param b
 200          *            buffer to scan.
 201          * @param ptr
 202          *            position within buffer to start parsing digits at.
 203          * @param ptrResult
 204          *            optional location to return the new ptr value through. If null
 205          *            the ptr value will be discarded.
 206          * @return the value at this location; 0 if the location is not a valid
 207          *         numeric.
 208          */
 209         public static final long parseLongBase10(final byte[] b, int ptr,
 210                         final MutableInteger ptrResult) {
 211                 long r = 0;
 212                 int sign = 0;
 213                 try {
 214                         final int sz = b.length;
 215                         while (ptr < sz && b[ptr] == ' ')
 216                                 ptr++;
 217                         if (ptr >= sz)
 218                                 return 0;
 219
 220                         switch (b[ptr]) {
 221                         case '-':
 222                                 sign = -1;
 223                                 ptr++;
 224                                 break;
 225                         case '+':
 226                                 ptr++;
 227                                 break;
 228                         }
 229
 230                         while (ptr < sz) {
 231                                 final byte v = digits[b[ptr]];
 232                                 if (v < 0)
 233                                         break;
 234                                 r = (r * 10) + v;
 235                                 ptr++;
 236                         }
 237                 } catch (ArrayIndexOutOfBoundsException e) {
 238                         // Not a valid digit.
 239                 }
 240                 if (ptrResult != null)
 241                         ptrResult.value = ptr;
 242                 return sign < 0 ? -r : r;
 243         }
 244
 245         /**
 246          * Parse a Git style timezone string.
 247          * <p>
 248          * The sequence "-0315" will be parsed as the numeric value -195, as the
 249          * lower two positions count minutes, not 100ths of an hour.
 250          *
 251          * @param b
 252          *            buffer to scan.
 253          * @param ptr
 254          *            position within buffer to start parsing digits at.
 255          * @return the timezone at this location, expressed in minutes.
 256          */
 257         public static final int parseTimeZoneOffset(final byte[] b, int ptr) {
 258                 final int v = parseBase10(b, ptr, null);
 259                 final int tzMins = v % 100;
 260                 final int tzHours = v / 100;
 261                 return tzHours * 60 + tzMins;
 262         }
 263
 264         /**
 265          * Locate the first position after a given character.
 266          *
 267          * @param b
 268          *            buffer to scan.
 269          * @param ptr
 270          *            position within buffer to start looking for chrA at.
 271          * @param chrA
 272          *            character to find.
 273          * @return new position just after chrA.
 274          */
 275         public static final int next(final byte[] b, int ptr, final char chrA) {
 276                 final int sz = b.length;
 277                 while (ptr < sz) {
 278                         if (b[ptr++] == chrA)
 279                                 return ptr;
 280                 }
 281                 return ptr;
 282         }
 283
 284         /**
 285          * Locate the first position after the next LF.
 286          * <p>
 287          * This method stops on the first '\n' it finds.
 288          *
 289          * @param b
 290          *            buffer to scan.
 291          * @param ptr
 292          *            position within buffer to start looking for LF at.
 293          * @return new position just after the first LF found.
 294          */
 295         public static final int nextLF(final byte[] b, int ptr) {
 296                 return next(b, ptr, '\n');
 297         }
 298
 299         /**
 300          * Locate the first position after either the given character or LF.
 301          * <p>
 302          * This method stops on the first match it finds from either chrA or '\n'.
 303          *
 304          * @param b
 305          *            buffer to scan.
 306          * @param ptr
 307          *            position within buffer to start looking for chrA or LF at.
 308          * @param chrA
 309          *            character to find.
 310          * @return new position just after the first chrA or LF to be found.
 311          */
 312         public static final int nextLF(final byte[] b, int ptr, final char chrA) {
 313                 final int sz = b.length;
 314                 while (ptr < sz) {
 315                         final byte c = b[ptr++];
 316                         if (c == chrA || c == '\n')
 317                                 return ptr;
 318                 }
 319                 return ptr;
 320         }
 321
 322         /**
 323          * Index the region between <code>[ptr, end)</code> to find line starts.
 324          * <p>
 325          * The returned list is 1 indexed. Index 0 contains
 326          * {@link Integer#MIN_VALUE} to pad the list out.
 327          * <p>
 328          * Using a 1 indexed list means that line numbers can be directly accessed
 329          * from the list, so <code>list.get(1)</code> (aka get line 1) returns
 330          * <code>ptr</code>.
 331          *
 332          * @param buf
 333          *            buffer to scan.
 334          * @param ptr
 335          *            position within the buffer corresponding to the first byte of
 336          *            line 1.
 337          * @param end
 338          *            1 past the end of the content within <code>buf</code>.
 339          * @return a line map indexing the start position of each line.
 340          */
 341         public static final IntList lineMap(final byte[] buf, int ptr, int end) {
 342                 // Experimentally derived from multiple source repositories
 343                 // the average number of bytes/line is 36. Its a rough guess
 344                 // to initially size our map close to the target.
 345                 //
 346                 final IntList map = new IntList((end - ptr) / 36);
 347                 map.fillTo(1, Integer.MIN_VALUE);
 348                 for (; ptr < end; ptr = nextLF(buf, ptr))
 349                         map.add(ptr);
 350                 return map;
 351         }
 352
 353         /**
 354          * Locate the "author " header line data.
 355          *
 356          * @param b
 357          *            buffer to scan.
 358          * @param ptr
 359          *            position in buffer to start the scan at. Most callers should
 360          *            pass 0 to ensure the scan starts from the beginning of the
 361          *            commit buffer and does not accidentally look at message body.
 362          * @return position just after the space in "author ", so the first
 363          *         character of the author's name. If no author header can be
 364          *         located -1 is returned.
 365          */
 366         public static final int author(final byte[] b, int ptr) {
 367                 final int sz = b.length;
 368                 if (ptr == 0)
 369                         ptr += 46; // skip the "tree ..." line.
 370                 while (ptr < sz && b[ptr] == 'p')
 371                         ptr += 48; // skip this parent.
 372                 return match(b, ptr, author);
 373         }
 374
 375         /**
 376          * Locate the "committer " header line data.
 377          *
 378          * @param b
 379          *            buffer to scan.
 380          * @param ptr
 381          *            position in buffer to start the scan at. Most callers should
 382          *            pass 0 to ensure the scan starts from the beginning of the
 383          *            commit buffer and does not accidentally look at message body.
 384          * @return position just after the space in "committer ", so the first
 385          *         character of the committer's name. If no committer header can be
 386          *         located -1 is returned.
 387          */
 388         public static final int committer(final byte[] b, int ptr) {
 389                 final int sz = b.length;
 390                 if (ptr == 0)
 391                         ptr += 46; // skip the "tree ..." line.
 392                 while (ptr < sz && b[ptr] == 'p')
 393                         ptr += 48; // skip this parent.
 394                 if (ptr < sz && b[ptr] == 'a')
 395                         ptr = nextLF(b, ptr);
 396                 return match(b, ptr, committer);
 397         }
 398
 399         /**
 400          * Locate the "encoding " header line.
 401          *
 402          * @param b
 403          *            buffer to scan.
 404          * @param ptr
 405          *            position in buffer to start the scan at. Most callers should
 406          *            pass 0 to ensure the scan starts from the beginning of the
 407          *            buffer and does not accidentally look at the message body.
 408          * @return position just after the space in "encoding ", so the first
 409          *         character of the encoding's name. If no encoding header can be
 410          *         located -1 is returned (and UTF-8 should be assumed).
 411          */
 412         public static final int encoding(final byte[] b, int ptr) {
 413                 final int sz = b.length;
 414                 while (ptr < sz) {
 415                         if (b[ptr] == '\n')
 416                                 return -1;
 417                         if (b[ptr] == 'e')
 418                                 break;
 419                         ptr = nextLF(b, ptr);
 420                 }
 421                 return match(b, ptr, encoding);
 422         }
 423
 424         /**
 425          * Parse the "encoding " header into a character set reference.
 426          * <p>
 427          * Locates the "encoding " header (if present) by first calling
 428          * {@link #encoding(byte[], int)} and then returns the proper character set
 429          * to apply to this buffer to evaluate its contents as character data.
 430          * <p>
 431          * If no encoding header is present, {@link Constants#CHARSET} is assumed.
 432          *
 433          * @param b
 434          *            buffer to scan.
 435          * @return the Java character set representation. Never null.
 436          */
 437         public static Charset parseEncoding(final byte[] b) {
 438                 final int enc = encoding(b, 0);
 439                 if (enc < 0)
 440                         return Constants.CHARSET;
 441                 final int lf = nextLF(b, enc);
 442                 return Charset.forName(decode(Constants.CHARSET, b, enc, lf - 1));
 443         }
 444
 445         /**
 446          * Parse a name line (e.g. author, committer, tagger) into a PersonIdent.
 447          * <p>
 448          * When passing in a value for <code>nameB</code> callers should use the
 449          * return value of {@link #author(byte[], int)} or
 450          * {@link #committer(byte[], int)}, as these methods provide the proper
 451          * position within the buffer.
 452          *
 453          * @param raw
 454          *            the buffer to parse character data from.
 455          * @param nameB
 456          *            first position of the identity information. This should be the
 457          *            first position after the space which delimits the header field
 458          *            name (e.g. "author" or "committer") from the rest of the
 459          *            identity line.
 460          * @return the parsed identity. Never null.
 461          */
 462         public static PersonIdent parsePersonIdent(final byte[] raw, final int nameB) {
 463                 final Charset cs = parseEncoding(raw);
 464                 final int emailB = nextLF(raw, nameB, '<');
 465                 final int emailE = nextLF(raw, emailB, '>');
 466
 467                 final String name = decode(cs, raw, nameB, emailB - 2);
 468                 final String email = decode(cs, raw, emailB, emailE - 1);
 469
 470                 final MutableInteger ptrout = new MutableInteger();
 471                 final long when = parseLongBase10(raw, emailE + 1, ptrout);
 472                 final int tz = parseTimeZoneOffset(raw, ptrout.value);
 473
 474                 return new PersonIdent(name, email, when * 1000L, tz);
 475         }
 476
 477         /**
 478          * Decode a buffer under UTF-8, if possible.
 479          *
 480          * If the byte stream cannot be decoded that way, the platform default is tried
 481          * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
 482          *
 483          * @param buffer
 484          *            buffer to pull raw bytes from.
 485          * @return a string representation of the range <code>[start,end)</code>,
 486          *         after decoding the region through the specified character set.
 487          */
 488         public static String decode(final byte[] buffer) {
 489                 return decode(Constants.CHARSET, buffer, 0, buffer.length);
 490         }
 491
 492         /**
 493          * Decode a buffer under the specified character set if possible.
 494          *
 495          * If the byte stream cannot be decoded that way, the platform default is tried
 496          * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
 497          *
 498          * @param cs
 499          *            character set to use when decoding the buffer.
 500          * @param buffer
 501          *            buffer to pull raw bytes from.
 502          * @return a string representation of the range <code>[start,end)</code>,
 503          *         after decoding the region through the specified character set.
 504          */
 505         public static String decode(final Charset cs, final byte[] buffer) {
 506                 return decode(cs, buffer, 0, buffer.length);
 507         }
 508
 509         /**
 510          * Decode a region of the buffer under the specified character set if possible.
 511          *
 512          * If the byte stream cannot be decoded that way, the platform default is tried
 513          * and if that too fails, the fail-safe ISO-8859-1 encoding is tried.
 514          *
 515          * @param cs
 516          *            character set to use when decoding the buffer.
 517          * @param buffer
 518          *            buffer to pull raw bytes from.
 519          * @param start
 520          *            first position within the buffer to take data from.
 521          * @param end
 522          *            one position past the last location within the buffer to take
 523          *            data from.
 524          * @return a string representation of the range <code>[start,end)</code>,
 525          *         after decoding the region through the specified character set.
 526          */
 527         public static String decode(final Charset cs, final byte[] buffer,
 528                         final int start, final int end) {
 529                 try {
 530                         return decodeNoFallback(cs, buffer, start, end);
 531                 } catch (CharacterCodingException e) {
 532                         // Fall back to an ISO-8859-1 style encoding. At least all of
 533                         // the bytes will be present in the output.
 534                         //
 535                         return extractBinaryString(buffer, start, end);
 536                 }
 537         }
 538
 539         /**
 540          * Decode a region of the buffer under the specified character set if
 541          * possible.
 542          *
 543          * If the byte stream cannot be decoded that way, the platform default is
 544          * tried and if that too fails, an exception is thrown.
 545          *
 546          * @param cs
 547          *            character set to use when decoding the buffer.
 548          * @param buffer
 549          *            buffer to pull raw bytes from.
 550          * @param start
 551          *            first position within the buffer to take data from.
 552          * @param end
 553          *            one position past the last location within the buffer to take
 554          *            data from.
 555          * @return a string representation of the range <code>[start,end)</code>,
 556          *         after decoding the region through the specified character set.
 557          * @throws CharacterCodingException
 558          *             the input is not in any of the tested character sets.
 559          */
 560         public static String decodeNoFallback(final Charset cs,
 561                         final byte[] buffer, final int start, final int end)
 562                         throws CharacterCodingException {
 563                 final ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
 564                 b.mark();
 565
 566                 // Try our built-in favorite. The assumption here is that
 567                 // decoding will fail if the data is not actually encoded
 568                 // using that encoder.
 569                 //
 570                 try {
 571                         return decode(b, Constants.CHARSET);
 572                 } catch (CharacterCodingException e) {
 573                         b.reset();
 574                 }
 575
 576                 if (!cs.equals(Constants.CHARSET)) {
 577                         // Try the suggested encoding, it might be right since it was
 578                         // provided by the caller.
 579                         //
 580                         try {
 581                                 return decode(b, cs);
 582                         } catch (CharacterCodingException e) {
 583                                 b.reset();
 584                         }
 585                 }
 586
 587                 // Try the default character set. A small group of people
 588                 // might actually use the same (or very similar) locale.
 589                 //
 590                 final Charset defcs = Charset.defaultCharset();
 591                 if (!defcs.equals(cs) && !defcs.equals(Constants.CHARSET)) {
 592                         try {
 593                                 return decode(b, defcs);
 594                         } catch (CharacterCodingException e) {
 595                                 b.reset();
 596                         }
 597                 }
 598
 599                 throw new CharacterCodingException();
 600         }
 601
 602         /**
 603          * Decode a region of the buffer under the ISO-8859-1 encoding.
 604          *
 605          * Each byte is treated as a single character in the 8859-1 character
 606          * encoding, performing a raw binary->char conversion.
 607          *
 608          * @param buffer
 609          *            buffer to pull raw bytes from.
 610          * @param start
 611          *            first position within the buffer to take data from.
 612          * @param end
 613          *            one position past the last location within the buffer to take
 614          *            data from.
 615          * @return a string representation of the range <code>[start,end)</code>.
 616          */
 617         public static String extractBinaryString(final byte[] buffer,
 618                         final int start, final int end) {
 619                 final StringBuilder r = new StringBuilder(end - start);
 620                 for (int i = start; i < end; i++)
 621                         r.append((char) (buffer[i] & 0xff));
 622                 return r.toString();
 623         }
 624
 625         private static String decode(final ByteBuffer b, final Charset charset)
 626                         throws CharacterCodingException {
 627                 final CharsetDecoder d = charset.newDecoder();
 628                 d.onMalformedInput(CodingErrorAction.REPORT);
 629                 d.onUnmappableCharacter(CodingErrorAction.REPORT);
 630                 return d.decode(b).toString();
 631         }
 632
 633         /**
 634          * Locate the position of the commit message body.
 635          *
 636          * @param b
 637          *            buffer to scan.
 638          * @param ptr
 639          *            position in buffer to start the scan at. Most callers should
 640          *            pass 0 to ensure the scan starts from the beginning of the
 641          *            commit buffer.
 642          * @return position of the user's message buffer.
 643          */
 644         public static final int commitMessage(final byte[] b, int ptr) {
 645                 final int sz = b.length;
 646                 if (ptr == 0)
 647                         ptr += 46; // skip the "tree ..." line.
 648                 while (ptr < sz && b[ptr] == 'p')
 649                         ptr += 48; // skip this parent.
 650
 651                 // skip any remaining header lines, ignoring what their actual
 652                 // header line type is.
 653                 //
 654                 while (ptr < sz && b[ptr] != '\n')
 655                         ptr = nextLF(b, ptr);
 656                 if (ptr < sz && b[ptr] == '\n')
 657                         return ptr + 1;
 658                 return -1;
 659         }
 660
 661         /**
 662          * Locate the end of a paragraph.
 663          * <p>
 664          * A paragraph is ended by two consecutive LF bytes.
 665          *
 666          * @param b
 667          *            buffer to scan.
 668          * @param start
 669          *            position in buffer to start the scan at. Most callers will
 670          *            want to pass the first position of the commit message (as
 671          *            found by {@link #commitMessage(byte[], int)}.
 672          * @return position of the LF at the end of the paragraph;
 673          *         <code>b.length</code> if no paragraph end could be located.
 674          */
 675         public static final int endOfParagraph(final byte[] b, final int start) {
 676                 int ptr = start;
 677                 final int sz = b.length;
 678                 while (ptr < sz && b[ptr] != '\n')
 679                         ptr = nextLF(b, ptr);
 680                 while (0 < ptr && start < ptr && b[ptr - 1] == '\n')
 681                         ptr--;
 682                 return ptr;
 683         }
 684
 685         private RawParseUtils() {
 686                 // Don't create instances of a static only utility.
 687         }
 688 }