typo fixes
[mplayer/greg.git] / TOOLS / subedit.pl
blob3df18f8bfe6301eaf76ec08a24499422924ab8cd
1 #!/usr/bin/perl -w
3 # A script for pipelined editing of subtitle files.
4 # Copyright (C) 2004 Michael Klepikov <mike72@mail.ru>
6 # Version 1.0 initial release 28-Mar-04
8 # Comments, suggestions -- send me an mail, but the recommended way is
9 # to enhance/fix on your own and submit to the distribution;)
10 # If you like, I can review the fixes.
12 # This script is free software; you can redistribute it and/or
13 # modify it under the terms of the GNU Lesser General Public
14 # License as published by the Free Software Foundation; either
15 # version 2 of the License, or (at your option) any later version.
16 # Retain original credits when modifying.
18 # This script is distributed in the hope that it will be useful,
19 # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 # Lesser General Public License for more details.
23 # You should have received a copy of the GNU Lesser General Public
24 # License along with this library; if not, write to the Free Software
25 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
28 use Math::BigInt;
30 # Constants
31 my $FMT_UNKNOWN = 0;
32 my $FMT_SRT = 1;
34 # Argument values
35 my $DEBUG = 0;
36 my $inFormat;
37 my $outFormat;
38 my $shiftMilli;
39 my $scaleMilli;
40 my $splitFromMilli;
41 my $splitToMilli;
43 ## Process command line
44 while (defined ($argVal = shift)) {
45 if ($argVal eq "-d" || $argVal eq "--debug") {
46 $DEBUG = 1;
47 } elsif ($argVal eq "-if" || $argVal eq "--input-format") {
48 $inFormat = shift;
49 usage ("Must specify input format") if ! $inFormat;
50 if ($inFormat =~ /^srt/i) {
51 $inFormat = $FMT_SRT;
52 } else {
53 usage ("Invalid input format");
55 } elsif ($argVal eq "-of" || $argVal eq "--output-format") {
56 $outFormat = shift;
57 usage ("Must specify input format") if ! $outFormat;
58 if ($outFormat =~ /^srt/i) {
59 $outFormat = $FMT_SRT;
60 } else {
61 usage ("Invalid output format");
63 } elsif ($argVal eq "-s" || $argVal eq "--shift") {
64 my $argTime = shift;
65 if (! defined $argTime ||
66 ! defined ($shiftMilli = getTimeMillis ($argTime))) {
67 usage ("Invalid shift time value");
69 } elsif ($argVal eq "-c" || $argVal eq "--scale") {
70 my $argTime = shift;
71 if (! defined $argTime ||
72 ! defined ($scaleMilli = getTimeMillis ($argTime))) {
73 usage ("Invalid scale time value");
75 } elsif ($argVal eq "-f" || $argVal eq "--split-from") {
76 my $argTime = shift;
77 if (! defined $argTime ||
78 ! defined ($splitFromMilli = getTimeMillis ($argTime))) {
79 usage ("Invalid split start time value");
81 } elsif ($argVal eq "-t" || $argVal eq "--split-to") {
82 my $argTime = shift;
83 if (! defined $argTime ||
84 ! defined ($splitToMilli = getTimeMillis ($argTime))) {
85 usage ("Invalid split end time value");
87 } elsif ($argVal eq "-h" || $argVal eq "--help") {
88 usage ();
89 } else {
90 usage ("Unrecognized argument $argVal");
94 # Input format defaults to SRT
95 $inFormat = $FMT_SRT if (! defined $inFormat);
96 # Output format defaults to the same as input
97 $outFormat = $inFormat if (! defined $outFormat);
99 ## Read
101 my $subs;
102 if ($inFormat == $FMT_SRT) {
103 $subs = readSRT (*STDIN);
104 printf STDERR ("Read %d SRT subs\n", scalar @{$subs}) if $DEBUG;
105 # Sort by start time
106 @{$subs} = sort {$a -> {srtStartTime} <=> $b -> {srtEndTime}} @{$subs};
109 ## Transform
111 if (defined $shiftMilli && 0 != $shiftMilli) {
112 printf STDERR ("Shift: %d milliseconds\n", $shiftMilli) if $DEBUG;
113 shiftSRT ($subs, $shiftMilli);
116 if (defined $splitFromMilli || defined $splitToMilli) {
117 if ($DEBUG) {
118 my $printFrom = (defined $splitFromMilli) ? $splitFromMilli : "-";
119 my $printTo = (defined $splitToMilli) ? $splitToMilli : "-";
120 printf STDERR ("Split: from $printFrom to $printTo\n");
122 splitSRT ($subs, $splitFromMilli, $splitToMilli);
125 if (defined $scaleMilli && 0 != $scaleMilli) {
126 my $lastSubIdx = scalar @{$subs} - 1;
127 if ($lastSubIdx >= 0) {
128 my $lastTimeOrig = $subs -> [$lastSubIdx] -> {srtEndTime};
129 if ($lastTimeOrig == 0) {
130 die "Cannot scale when last subtitle ends at 00:00:00,000";
132 my $lastTimeScaled = $lastTimeOrig + $scaleMilli;
133 printf STDERR ("Scale: %d/%d\n", $lastTimeScaled, $lastTimeOrig) if $DEBUG;
134 scaleSRT ($subs, $lastTimeScaled, $lastTimeOrig);
138 ## Write
139 if ($outFormat == $FMT_SRT) {
140 writeSRT (*STDOUT, $subs);
143 # Close STDOUT, as recommended by Perl manual
144 # (allows diagnostics on disc overflow, etc.)
145 close (STDOUT) || die "Cannot close output stream: $!";
147 exit 0;
149 ## Subroutines
151 # Convert string time format to milliseconds
152 # SRT style: "01:20:03.251", and "," is allowed instead of "."
153 # Return undef in case of format error
154 sub getTimeMillis
156 $_ = shift;
157 my $millis = 0;
159 if (/\s*(.*)[\.,]([0-9]+)?\s*$/) { # Fraction; strip surrounding spaces
160 #print STDERR "frac: \$1=$1 \$2=$2\n" if $DEBUG;
161 $_ = $1;
162 $millis += ("0." . $2) * 1000 if $2;
164 if (/(.*?)([0-9]+)$/) { # Seconds
165 #print STDERR "secs: \$1=$1 \$2=$2\n" if $DEBUG;
166 $_ = $1;
167 $millis += $2 * 1000 if $2;
169 if (/(.*?)([0-9]+):$/) { # Minutes
170 #print STDERR "mins: \$1=$1 \$2=$2\n" if $DEBUG;
171 $_ = $1;
172 $millis += $2 * 60000 if $2;
174 if (/(.*?)([0-9]+):$/) { # Hours
175 #print STDERR "mins: \$1=$1 \$2=$2\n" if $DEBUG;
176 $_ = $1;
177 $millis += $2 * 3600000 if $2;
179 if (/(.*?)\-$/) { # Minus sign
180 $_ = $1;
181 $millis *= -1;
183 $millis = undef if (! /^$/); # Make sure we ate everything up
184 if ($DEBUG) {
185 if (defined $millis) {
186 #print STDERR "time value match: $millis ms\n";
187 } else {
188 #print STDERR "time mismatch\n";
191 return $millis;
194 # Convert milliseconds to SRT formatted string
195 sub getTimeSRT
197 my $t = shift;
198 my $tMinus = "";
199 if ($t < 0) {
200 $t = -$t;
201 $tMinus = "-";
203 my $tMilli = $t % 1000;
204 $t /= 1000;
205 my $tSec = $t % 60;
206 $t /= 60;
207 my $tMin = $t % 60;
208 $t /= 60;
209 my $tHr = $t;
210 return sprintf ("%s%02d:%02d:%02d,%03d",
211 $tMinus, $tHr, $tMin, $tSec, $tMilli);
214 # Read SRT subtitles
215 sub readSRT
217 local *IN = shift;
218 my $subs = [];
220 $_ = <IN>;
221 print STDERR "Undefined first line\n" if ! defined $_ && $DEBUG;
222 my $lineNo = 1;
223 READ_SUBS:
224 while (defined $_) {
225 # Each loop iteration reads one subtitle from <IN>
226 my $sub = {};
228 # print STDERR "Reading line $lineNo\n" if $DEBUG;
230 # Skip empty lines
231 while (/^\s*$/) {
232 last READ_SUBS if ! ($_ = <IN>);
233 ++$lineNo;
236 # Subtitle number
237 if (/^\s*([0-9]+)\s*$/) {
238 $sub -> {srtNumber} = $1;
239 # print "SRT num: $1\n" if $DEBUG;
240 } else {
241 die "Invalid SRT format at line $lineNo";
244 # Timing
245 if ($_ = <IN>) {
246 ++$lineNo;
247 } else {
248 die "Unexpected end of SRT stream at line $lineNo";
250 # print STDERR "LINE: $_\n" if $DEBUG;
251 if (/^\s*(\S+)\s*--\>\s*(\S+)\s*$/) {
252 my $startMillis = getTimeMillis ($1);
253 my $endMillis = getTimeMillis ($2);
254 die "Invalid SRT timing format at line $lineNo: $_"
255 if ! defined $startMillis || ! defined $endMillis;
256 $sub -> {srtStartTime} = $startMillis;
257 $sub -> {srtEndTime} = $endMillis;
258 } else {
259 die "Invalid SRT timing format at line $lineNo: $_";
262 # Text lines
263 my $subLines = [];
264 while (1) {
265 last if ! ($_ = <IN>); # EOF ends subtitle
266 ++$lineNo;
267 last if /^\s*$/; # Empty line ends subtitle
268 ($_ = $_) =~ s/\s+$//; # Strip trailing spaces
269 push @{$subLines}, $_;
271 die "No text in SRT subtitle at line $lineNo" if 0 == scalar @{$subLines};
272 $sub -> {lines} = $subLines;
274 # Append subtitle to the list
275 push @{$subs}, $sub;
277 print STDERR "SRT read ok, $lineNo lines\n" if $DEBUG;
279 return $subs;
282 # Write SRT subtitles
283 sub writeSRT
285 use integer; # For integer division
286 local *OUT = shift;
287 my $subs = shift;
289 my $subNum = 0;
290 foreach (@{$subs}) {
291 ++$subNum;
293 my $sub = $_;
294 my $sTimeSRT = getTimeSRT ($sub -> {srtStartTime});
295 my $eTimeSRT = getTimeSRT ($sub -> {srtEndTime});
296 printf OUT ("%d\n%s --> %s\n", $subNum, $sTimeSRT, $eTimeSRT);
297 foreach (@{$sub -> {lines}}) {
298 printf OUT ("%s\n", $_);
300 printf OUT "\n";
302 printf STDERR ("Wrote %d SRT subs\n", $subNum) if $DEBUG;
305 # Shift SRT subtitles by a given number of seconds.
306 # The number may be negative and fractional.
307 sub shiftSRT
309 use integer; # $shiftMilli could be passed as float
310 my $subs = shift;
311 my $shiftMilli = shift;
313 foreach (@{$subs}) {
314 $_ -> {srtStartTime} += $shiftMilli;
315 $_ -> {srtEndTime} += $shiftMilli;
319 # Multiply each subtitle timing by a divident and divide by divisor.
320 # The idea is that the divident is usually the new total number of
321 # milliseconds in the subtitle file, and the divisor is the old
322 # total number of milliseconds in the subtitle file.
323 # We could simply use a double precision real coefficient instead of
324 # integer divident and divisor, and that could be good enough, but
325 # using integer arithmetics *guarantees* precision up to the last
326 # digit, so why settle for good enough when we can have a guarantee.
328 # Uses Math::BigInt arithmetics, because it works with numbers
329 # up to (total number of milliseconds for a subtitle timing)^2,
330 # which could be on the order of approximately 1e+13, which is
331 # larger than maximum 32-bit integer.
332 # There is a performance loss when using BigInt vs. regular floating
333 # point arithmetics, but the actual performance is quite acceptable
334 # on files with a few thousand subtitles.
335 sub scaleSRT
337 use integer; # Divident and divisor could be passed as floats, truncate
338 my $subs = shift;
339 my $scaleDividend = shift;
340 my $scaleDivisor = shift;
342 foreach (@{$subs}) {
343 my $ss = Math::BigInt -> new ($_ -> {srtStartTime});
344 $ss = $ss -> bmul ($scaleDividend);
345 $_ -> {srtStartTime} = $ss -> bdiv ($scaleDivisor) -> bsstr ();
346 my $se = Math::BigInt -> new ($_ -> {srtEndTime});
347 $se = $se -> bmul ($scaleDividend);
348 $_ -> {srtEndTime} = $se -> bdiv ($scaleDivisor) -> bsstr ();
352 # Extract a fragment within a given time interval
353 # Either "from" or "to" may be undefined
354 sub splitSRT
356 use integer; # fromMilli and toMilli could be passed as floats, truncate
357 my $subs = shift;
358 my $fromMilli = shift;
359 my $toMilli = shift;
361 my $iSub = 0;
362 while ($iSub < scalar @{$subs}) {
363 $_ = $subs -> [$iSub];
364 my $keep = 0;
365 if (! defined $fromMilli || $_ -> {srtEndTime} >= $fromMilli) {
366 # The subtitle ends later than the start boundary
368 # Fix overlapping start timing,
369 # but only of the start boundary is not infinite (undef)
370 if (defined $fromMilli && $_ -> {srtStartTime} < $fromMilli) {
371 $_ -> {srtStartTime} = $fromMilli;
373 if (! defined $toMilli || $_ -> {srtStartTime} <= $toMilli) {
374 # The subtitle begins earlier than the end boundary
376 # Fix overlapping end timing,
377 # but only of the end boundary is not infinite (undef)
378 if (defined $toMilli && $_ -> {srtEndTime} > $toMilli) {
379 $_ -> {srtEndTime} = $toMilli;
382 # All conditions met, all fixes done
383 $keep = 1;
386 if ($keep) {
387 ++$iSub;
388 } else {
389 splice @{$subs}, $iSub, 1;
394 # Print brief usage help
395 # Accepts an optional error message, e.g. for errors parsing command line
396 sub usage
398 my $msg = shift;
399 my $exitCode = 0;
401 if (defined $msg) {
402 $exitCode = 2;
403 print STDERR "$msg\n";
406 print STDERR <<USAGE;
407 Usage: $0 [switches]
408 -if,--input-format <fmt> input format; supported: SRT
409 default is SRT
410 -of,--output-format <fmt> output format; supported: SRT
411 default is same as input format
412 -s,--shift <time> shift all subtitles by <time>
413 (format: [-]hh:mm:ss,fraction)
414 -c,--scale <time> scale by adding <time> to overall duration
415 -f,--split-from <time> Drop subtitles that end before <time>
416 -t,--split-to <time> Drop subtitles that start after <time>
417 (will truncate timing if it overlaps a boundary)
418 -r,--renumber renumber SRT subtitles in output
419 -d,--debug enable debug output
420 -h,--help this help message
422 All times could be negative. Input/output may also contain negative timings,
423 which is sometimes useful for intermediate results.
424 SRT subtitles are always renumbered on output.
426 EXAMPLES
428 Split subtitle file into two disks at a boundary of one hour 15 minutes:
430 subedit.pl --split-to 1:15:0 < all.srt > p1.srt
431 subedit.pl -f 1:15:0 < all.srt | subedit.pl --shift -1:15:0 > p2.srt
433 Join the previous two disks back into one file:
435 subedit.pl -s 1:15:00 < p2.srt | cat p1.srt - | subedit.pl > all.srt
437 Correct a situation where the first subtitle starts in sync with the video,
438 but the last one starts 3.5 seconds earlier than the speech in the video,
439 assuming the first subtitle timing is 00:01:05.030:
441 subedit.pl -s -1:5.03 | subedit.pl -c 3.5 | subedit.pl -s 1:5.03
442 USAGE
444 exit $exitCode;