contrib/fast-import/import-directories.perl

   1 #!/usr/bin/perl
   2 #
   3 # Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
   4 #
   5 # ------------------------------------------------------------------------
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19 #
  20 # ------------------------------------------------------------------------
  21
  22 =pod
  23
  24 =head1 NAME
  25
  26 import-directories - Import bits and pieces to Git.
  27
  28 =head1 SYNOPSIS
  29
  30 B<import-directories.perl> F<configfile> F<outputfile>
  31
  32 =head1 DESCRIPTION
  33
  34 Script to import arbitrary projects version controlled by the "copy the
  35 source directory to a new location and edit it there"-version controlled
  36 projects into version control. Handles projects with arbitrary branching
  37 and version trees, taking a file describing the inputs and generating a
  38 file compatible with the L<git-fast-import(1)> format.
  39
  40 =head1 CONFIGURATION FILE
  41
  42 =head2 Format
  43
  44 The configuration file is based on the standard I<.ini> format.
  45
  46  ; Comments start with semi-colons
  47  [section]
  48  key=value
  49
  50 Please see below for information on how to escape special characters.
  51
  52 =head2 Global configuration
  53
  54 Global configuration is done in the B<[config]> section, which should be
  55 the first section in the file. Configuration can be changed by
  56 repeating configuration sections later on.
  57
  58  [config]
  59  ; configure conversion of CRLFs. "convert" means that all CRLFs
  60  ; should be converted into LFs (suitable for the core.autocrlf
  61  ; setting set to true in Git). "none" means that all data is
  62  ; treated as binary.
  63  crlf=convert
  64
  65 =head2 Revision configuration
  66
  67 Each revision that is to be imported is described in three
  68 sections. Revisions should be defined in topological order, so
  69 that a revision's parent has always been defined when a new revision
  70 is introduced. All the sections for one revision must be defined
  71 before defining the next revision.
  72
  73 Each revision is assigned a unique numerical identifier. The
  74 numbers do not need to be consecutive, nor monotonically
  75 increasing.
  76
  77 For instance, if your configuration file contains only the two
  78 revisions 4711 and 42, where 4711 is the initial commit, the
  79 only requirement is that 4711 is completely defined before 42.
  80
  81 =pod
  82
  83 =head3 Revision description section
  84
  85 A section whose section name is just an integer gives meta-data
  86 about the revision.
  87
  88  [3]
  89  ; author sets the author of the revisions
  90  author=Peter Krefting <peter@softwolves.pp.se>
  91  ; branch sets the branch that the revision should be committed to
  92  branch=master
  93  ; parent describes the revision that is the parent of this commit
  94  ; (optional)
  95  parent=1
  96  ; merges describes a revision that is merged into this commit
  97  ; (optional; can be repeated)
  98  merges=2
  99  ; selects one file to take the timestamp from
 100  ; (optional; if unspecified, the most recent file from the .files
 101  ;  section is used)
 102  timestamp=3/source.c
 103
 104 =head3 Revision contents section
 105
 106 A section whose section name is an integer followed by B<.files>
 107 describe all the files included in this revision. If a file that
 108 was available previously is not included in this revision, it will
 109 be removed.
 110
 111 If an on-disk revision is incomplete, you can point to files from
 112 a previous revision. There are no restrictions on where the source
 113 files are located, nor on their names.
 114
 115  [3.files]
 116  ; the key is the path inside the repository, the value is the path
 117  ; as seen from the importer script.
 118  source.c=ver-3.00/source.c
 119  source.h=ver-2.99/source.h
 120  readme.txt=ver-3.00/introduction to the project.txt
 121
 122 File names are treated as byte strings (but please see below on
 123 quoting rules), and should be stored in the configuration file in
 124 the encoding that should be used in the generated repository.
 125
 126 =head3 Revision commit message section
 127
 128 A section whose section name is an integer followed by B<.message>
 129 gives the commit message. This section is read verbatim, up until
 130 the beginning of the next section. As such, a commit message may not
 131 contain a line that begins with an opening square bracket ("[") and
 132 ends with a closing square bracket ("]"), unless they are surrounded
 133 by whitespace or other characters.
 134
 135  [3.message]
 136  Implement foobar.
 137  ; trailing blank lines are ignored.
 138
 139 =cut
 140
 141 # Globals
 142 use strict;
 143 use warnings;
 144 use integer;
 145 my $crlfmode = 0;
 146 my @revs;
 147 my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
 148 my $sectiontype = 0;
 149 my $rev = 0;
 150 my $mark = 1;
 151
 152 # Check command line
 153 if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
 154 {
 155     exec('perldoc', $0);
 156     exit 1;
 157 }
 158
 159 # Open configuration
 160 my $config = $ARGV[0];
 161 open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
 162
 163 # Open output
 164 my $output = $ARGV[1];
 165 open OUT, '>', $output or die "Cannot create output file \"$output\": ";
 166 binmode OUT;
 167
 168 LINE: while (my $line = <CFG>)
 169 {
 170         $line =~ s/\r?\n$//;
 171         next LINE if $sectiontype != 4 && $line eq '';
 172         next LINE if $line =~ /^;/;
 173         my $oldsectiontype = $sectiontype;
 174         my $oldrev = $rev;
 175
 176         # Sections
 177         if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
 178         {
 179                 if ($1 eq 'config')
 180                 {
 181                         $sectiontype = 1;
 182                 }
 183                 elsif ($3 eq '')
 184                 {
 185                         $sectiontype = 2;
 186                         $rev = $2;
 187                         # Create a new revision
 188                         die "Duplicate rev: $line\n " if defined $revmap{$rev};
 189                         print "Reading revision $rev\n";
 190                         push @revs, $rev;
 191                         $revmap{$rev} = $mark ++;
 192                         $time{$revmap{$rev}} = 0;
 193                 }
 194                 elsif ($3 eq '.files')
 195                 {
 196                         $sectiontype = 3;
 197                         $rev = $2;
 198                         die "Revision mismatch: $line\n " unless $rev == $oldrev;
 199                 }
 200                 elsif ($3 eq '.message')
 201                 {
 202                         $sectiontype = 4;
 203                         $rev = $2;
 204                         die "Revision mismatch: $line\n " unless $rev == $oldrev;
 205                 }
 206                 else
 207                 {
 208                         die "Internal parse error: $line\n ";
 209                 }
 210                 next LINE;
 211         }
 212
 213         # Parse data
 214         if ($sectiontype != 4)
 215         {
 216                 # Key and value
 217                 if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
 218                 {
 219                         my ($key, $value) = &parsekeyvaluepair($1);
 220                         # Global configuration
 221                         if (1 == $sectiontype)
 222                         {
 223                                 if ($key eq 'crlf')
 224                                 {
 225                                         $crlfmode = 1, next LINE if $value eq 'convert';
 226                                         $crlfmode = 0, next LINE if $value eq 'none';
 227                                 }
 228                                 die "Unknown configuration option: $line\n ";
 229                         }
 230                         # Revision specification
 231                         if (2 == $sectiontype)
 232                         {
 233                                 my $current = $revmap{$rev};
 234                                 $author{$current} = $value, next LINE if $key eq 'author';
 235                                 $branch{$current} = $value, next LINE if $key eq 'branch';
 236                                 $parent{$current} = $value, next LINE if $key eq 'parent';
 237                                 $timesource{$current} = $value, next LINE if $key eq 'timestamp';
 238                                 push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
 239                                 die "Unknown revision option: $line\n ";
 240                         }
 241                         # Filespecs
 242                         if (3 == $sectiontype)
 243                         {
 244                                 # Add the file and create a marker
 245                                 die "File not found: $line\n " unless -f $value;
 246                                 my $current = $revmap{$rev};
 247                                 ${$files{$current}}{$key} = $mark;
 248                                 my $time = &fileblob($value, $crlfmode, $mark ++);
 249
 250                                 # Update revision timestamp if more recent than other
 251                                 # files seen, or if this is the file we have selected
 252                                 # to take the time stamp from using the "timestamp"
 253                                 # directive.
 254                                 if ((defined $timesource{$current} && $timesource{$current} eq $value)
 255                                     || $time > $time{$current})
 256                                 {
 257                                         $time{$current} = $time;
 258                                 }
 259                         }
 260                 }
 261                 else
 262                 {
 263                         die "Parse error: $line\n ";
 264                 }
 265         }
 266         else
 267         {
 268                 # Commit message
 269                 my $current = $revmap{$rev};
 270                 if (defined $message{$current})
 271                 {
 272                         $message{$current} .= "\n";
 273                 }
 274                 $message{$current} .= $line;
 275         }
 276 }
 277 close CFG;
 278
 279 # Start spewing out data for git-fast-import
 280 foreach my $commit (@revs)
 281 {
 282         # Progress
 283         print OUT "progress Creating revision $commit\n";
 284
 285         # Create commit header
 286         my $mark = $revmap{$commit};
 287
 288         # Branch and commit id
 289         print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
 290
 291         # Author and timestamp
 292         die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
 293         print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
 294
 295         # Commit message
 296         die "No message defined for $commit\n" unless defined $message{$mark};
 297         my $message = $message{$mark};
 298         $message =~ s/\n$//; # Kill trailing empty line
 299         print OUT "data ", length($message), "\n", $message, "\n";
 300
 301         # Parent and any merges
 302         print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
 303         if (defined $merges{$mark})
 304         {
 305                 foreach my $merge (@{$merges{$mark}})
 306                 {
 307                         print OUT "merge :", $revmap{$merge}, "\n";
 308                 }
 309         }
 310
 311         # Output file marks
 312         print OUT "deleteall\n"; # start from scratch
 313         foreach my $file (sort keys %{$files{$mark}})
 314         {
 315                 print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
 316         }
 317         print OUT "\n";
 318 }
 319
 320 # Create one file blob
 321 sub fileblob
 322 {
 323         my ($filename, $crlfmode, $mark) = @_;
 324
 325         # Import the file
 326         print OUT "progress Importing $filename\nblob\nmark :$mark\n";
 327         open FILE, '<', $filename or die "Cannot read $filename\n ";
 328         binmode FILE;
 329         my ($size, $mtime) = (stat(FILE))[7,9];
 330         my $file;
 331         read FILE, $file, $size;
 332         close FILE;
 333         $file =~ s/\r\n/\n/g if $crlfmode;
 334         print OUT "data ", length($file), "\n", $file, "\n";
 335
 336         return $mtime;
 337 }
 338
 339 # Parse a key=value pair
 340 sub parsekeyvaluepair
 341 {
 342 =pod
 343
 344 =head2 Escaping special characters
 345
 346 Key and value strings may be enclosed in quotes, in which case
 347 whitespace inside the quotes is preserved. Additionally, an equal
 348 sign may be included in the key by preceding it with a backslash.
 349 For example:
 350
 351  "key1 "=value1
 352  key2=" value2"
 353  key\=3=value3
 354  key4=value=4
 355  "key5""=value5
 356
 357 Here the first key is "key1 " (note the trailing white-space) and the
 358 second value is " value2" (note the leading white-space). The third
 359 key contains an equal sign "key=3" and so does the fourth value, which
 360 does not need to be escaped. The fifth key contains a trailing quote,
 361 which does not need to be escaped since it is inside a surrounding
 362 quote.
 363
 364 =cut
 365         my $pair = shift;
 366
 367         # Separate key and value by the first non-quoted equal sign
 368         my ($key, $value);
 369         if ($pair =~ /^(.*[^\\])=(.*)$/)
 370         {
 371                 ($key, $value) = ($1, $2)
 372         }
 373         else
 374         {
 375                 die "Parse error: $pair\n ";
 376         }
 377
 378         # Unquote and unescape the key and value separately
 379         return (&unescape($key), &unescape($value));
 380 }
 381
 382 # Unquote and unescape
 383 sub unescape
 384 {
 385         my $string = shift;
 386
 387         # First remove enclosing quotes. Backslash before the trailing
 388         # quote leaves both.
 389         if ($string =~ /^"(.*[^\\])"$/)
 390         {
 391                 $string = $1;
 392         }
 393
 394         # Second remove any backslashes inside the unquoted string.
 395         # For later: Handle special sequences like \t ?
 396         $string =~ s/\\(.)/$1/g;
 397
 398         return $string;
 399 }
 400
 401 __END__
 402
 403 =pod
 404
 405 =head1 EXAMPLES
 406
 407 B<import-directories.perl> F<project.import>
 408
 409 =head1 AUTHOR
 410
 411 Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
 412
 413 This program is free software; you can redistribute it and/or modify
 414 it under the terms of the GNU General Public License as published by
 415 the Free Software Foundation.
 416
 417 =cut