Remove [Courses] from lesson names/post subjects
[mbox2moodle.git] / mbox2moodle.pl
bloba8297bae39b2b8ae5548317264e27947a9bb2730
1 #!/usr/bin/perl -w
2 =pod
4 =head1 convert.pl
6 =head2 Description
8 Converts a series of mbox files into moodle backup.zip files
9 ready to be restored into moodle.
11 =head1 SYNOPSIS
13 ./mbox2moodle.pl
15 Options
16 --sourcedir DIRECTORY
17 --sourcefile FILE
18 --outputdir DIRECTORY
19 --templatedir DIRECTORY
21 =head1 Authors
23 Penny Leach <penny@she.geek.nz>
24 Julie Pichon <julie.pichon@gmx.com>
26 =cut
28 use diagnostics;
29 use open ':utf8';
30 use strict;
31 use warnings;
33 use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
34 use Carp;
35 use Cwd;
36 use Data::Dumper;
37 use Encode;
38 use File::Basename;
39 use File::Path;
40 use FindBin;
41 use Getopt::Long;
42 use HTML::Mason;
43 use HTML::Mason::Compiler;
44 use Mail::Box::Manager;
45 use Mail::MboxParser;
46 use Mail::Message::Attachment::Stripper;
47 use Mail::Thread;
48 use Pod::Usage;
49 use Smart::Comments;
50 use Tie::Dir qw(DIR_UNLINK);
52 my $root = $FindBin::Bin;
54 my $mboxes = [];
55 my $sourcedir;
56 my $outputdir="$root/output";
57 my $templatedir="$root/templates";
58 my $forumdata = "moddata/forum/8000";
59 my $courses = [];
60 my $coursecounter = 0;
61 my $lessoncounter;
62 our $postcounter;
63 our $dummystarters;
64 our $dummies;
66 GetOptions(
67 'sourcefile=s@', \@$mboxes,
68 'sourcedir=s', \$sourcedir,
69 'outputdir=s', \$outputdir,
70 'templatedir=s', \$templatedir,
71 ) or pod2usage;
73 pod2usage unless ((scalar(@{$mboxes}) > 0 or $sourcedir) and $outputdir);
75 $mboxes = [<$sourcedir*>] unless scalar(@{$mboxes}) > 0;
77 my $mgr = Mail::Box::Manager->new;
79 my $outbuf; # Temp buffer used for writing output
80 my %mason_args = (
81 'comp_root' => $templatedir,
82 'use_object_files' => 0,
83 'static_source' => 1,
84 'out_method' =>\$outbuf,
87 my $interp = HTML::Mason::Interp->new(%mason_args);
88 my $compiler = $interp->compiler;
90 foreach my $mbox (@{$mboxes}) {
91 $outbuf = '';
93 $postcounter = 1;
94 $lessoncounter = 1;
96 my $course = {
97 'id' => $coursecounter++,
98 'name' => basename($mbox),
99 'lessons' => [],
100 'namelessons' => 0,
103 my $coursedir = "$outputdir/$course->{name}";
105 my $folder = $mgr->open(folder => $mbox);
106 ### Parsing: $mbox
107 $dummystarters = 0;
109 my $threader = new Mail::Thread($folder->messages('ALL'));
111 $threader->thread;
113 for ($threader->rootset) { ### Evaluating |===[%] |
114 sort_out_messages($_, 0, $course, '', 0, "$coursedir/$forumdata");
117 $folder->close(write => 'NEVER');
119 warn "this course had $dummystarters dummy thread starts "
120 . "(out of " . scalar(@{$course->{lessons}}) . ") "
121 . "and I tried to guess the first post.\n" if $dummystarters;
123 # Sort lessons chronologically, so they appear in order on the course page
124 sub by_timestamp {
125 @{$a->{posts}}[0]->{timestamp} <=> @{$b->{posts}}[0]->{timestamp};
127 @{$course->{lessons}} = sort by_timestamp @{$course->{lessons}};
129 $interp->exec('/moodle.xml', course => $course);
130 mkpath "$coursedir" unless -d "$coursedir";
131 open my $out, '>', "$coursedir/moodle.xml" or croak "Output error: $!";
132 print $out $outbuf;
133 close $out;
135 # zip it all up
136 my $zip = Archive::Zip->new();
137 $zip->addTree("$coursedir", '');
139 unless ( $zip->writeToFileNamed("backup-$course->{name}.zip") == AZ_OK ) {
140 croak "zip write error ($!)";
142 print "written to backup-$course->{name}.zip\n";
145 exit 0;
147 sub sort_out_messages {
148 my ($post, $level, $course, $lesson, $parentid, $path) = @_;
150 if (!$post->message) {
151 $dummystarters++;
153 my $newtop = guess_top_post($post->children) if $post->children;
154 if (!$newtop->message) { next; }
156 $post->remove_child($newtop);
158 foreach my $child ($post->children) {
159 $newtop->add_child($child);
162 $post = $newtop;
165 # the first message in a thread is a special case
166 # it needs to be both a lesson and a forum post.
167 if ($level == 0) {
168 $lesson = {
169 'id' => $lessoncounter++,
170 'name' => strip_subject($post->message->head->get("Subject")),
171 'body' => parse_body($post->message),
172 'posts' => [],
175 $lesson->{summary} = (split(/\n\n/, $lesson->{body}))[0];
177 if (!$lesson->{summary}) {
178 $lesson->{summary} = $lesson->{name};
181 push @{$course->{lessons}}, $lesson;
184 # Create post
185 my $newpost = {
186 'parent' => $parentid,
187 'id' => $postcounter++,
188 'subject' => strip_subject($post->message->head->get("Subject")),
189 'body' => parse_body($post->message),
190 'timestamp' => $post->message->timestamp,
193 $newpost->{attachment} = parse_attachments($post->message,
194 "$path/$newpost->{id}");
195 push @{$lesson->{posts}}, $newpost;
197 sort_out_messages($post->child, $level+1, $course, $lesson, "$newpost->{id}", $path) if $post->child;
198 sort_out_messages($post->next, $level, $course, $lesson, "$newpost->{parent}", $path) if $post->next;
202 # Only take relevant content type
203 sub parse_body {
204 my $message = shift;
205 my $body = '';
207 if ($message->isMultipart()) {
208 foreach my $part ($message->parts) {
209 if ((!defined($part->contentType)) ||
210 (lc($part->contentType) eq 'text/html') ||
211 (lc($part->contentType) eq 'text/plain')) {
212 $body .= $part->decoded->string;
215 } else {
216 my $content_type = $message->get('Content-Type') || 'text/plain';
217 if (!defined($content_type) ||
218 (lc($content_type) eq 'text/html') ||
219 (lc($content_type) eq 'text/plain')) {
220 $body = $message->decoded->string;
224 $body;
227 sub parse_attachments {
228 my ($message, $path) = @_;
229 my $name = '';
231 my $stripper = Mail::Message::Attachment::Stripper->new($message);
232 my @attachments = $stripper->attachments;
234 my $count = count_valid_attachments(@attachments);
236 if ($count > 0) {
237 # Extract attachments from message
238 foreach my $attach (@attachments) {
239 if (($attach->{content_type} ne 'text/html') &&
240 ($attach->{content_type} ne 'application/pgp-signature')) {
241 mkpath($path);
242 open my $out_file, '>:raw', "$path/$attach->{filename}" or croak "Output error: ($!)";
243 print $out_file $attach->{payload};
244 close $out_file;
246 $name = $attach->{filename};
250 # Moodle only allows 1 attachment/post, if >1 zip them up
251 if ($count > 1) {
252 $name = "attachments.zip";
254 my $zip = Archive::Zip->new();
255 $zip->addTree("$path");
257 # If the script is re-run, remove old attachments archive
258 if ($zip->memberNamed($name)) {
259 $zip->removeMember($name);
262 unless ($zip->overwriteAs("$path/$name") == AZ_OK) {
263 croak "attachment zip write error ($!)";
266 # Remove unzipped attachments
267 tie my %dir, 'Tie::Dir', $path, DIR_UNLINK;
268 foreach (keys %dir) {
269 if (($_ !~ /^$name$/) && ($_ ne ".") && ($_ ne "..")) {
270 delete $dir{$_};
276 $name;
279 sub count_valid_attachments {
280 my @attachments = @_;
281 my $count = 0;
283 foreach my $attach (@attachments) {
284 if (($attach->{content_type} ne 'text/html') &&
285 ($attach->{content_type} ne 'application/pgp-signature')) {
286 $count += 1;
290 $count;
293 sub strip_subject {
294 $_ = shift;
295 s/\[Courses\]\s*//;
299 sub guess_top_post {
300 my (@children) = @_;
302 my $guess;
303 my @candidates;
305 foreach my $child (@children) {
306 if ($child->message) {
307 if (!$child->isreply) {
308 push @candidates, $child;
313 if (@candidates == 0) {
314 push @candidates, @children;
317 if (@candidates == 1) {
318 $guess = $candidates[0];
319 } else {
320 my @round2;
322 # Is there a 're:' in the subject?
323 foreach my $cand (@candidates) {
324 if (index(lc($cand->message->head->get("Subject")), 're:') == -1) {
325 push @round2, $cand;
329 if (@round2 == 0) {
330 push @round2, @children;
333 if (@round2 == 1) {
334 $guess = $round2[0];
335 } else {
336 # Last resort, pick the one with the oldest timestamp
337 my $oldest = pop @round2;
338 foreach my $msg (@round2) {
339 if ($msg->message->timestamp < $oldest->message->timestamp) {
340 $oldest = $msg;
343 $guess = $oldest;
347 $guess;