Updated the 19 build version to 20101023
[moodle.git] / mod / lesson / importppt.php
blob0631f3fd12bc0aeeff57866a5e7e38424fdffca2
1 <?php // $Id$
2 /**
3 * This is a very rough importer for powerpoint slides
4 * Export a powerpoint presentation with powerpoint as html pages
5 * Do it with office 2002 (I think?) and no special settings
6 * Then zip the directory with all of the html pages
7 * and the zip file is what you want to upload
8 *
9 * The script supports book and lesson.
11 * @version $Id$
12 * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
13 * @package lesson
14 **/
16 require_once("../../config.php");
17 require_once("locallib.php");
19 $id = required_param('id', PARAM_INT); // Course Module ID
20 $pageid = optional_param('pageid', '', PARAM_INT); // Page ID
21 global $matches;
23 if (! $cm = get_coursemodule_from_id('lesson', $id)) {
24 error("Course Module ID was incorrect");
27 if (! $course = get_record("course", "id", $cm->course)) {
28 error("Course is misconfigured");
31 // allows for adaption for multiple modules
32 if(! $modname = get_field('modules', 'name', 'id', $cm->module)) {
33 error("Could not find module name");
36 if (! $mod = get_record($modname, "id", $cm->instance)) {
37 error("Course module is incorrect");
40 require_login($course->id, false, $cm);
41 $context = get_context_instance(CONTEXT_MODULE, $cm->id);
42 require_capability('mod/lesson:edit', $context);
44 $strimportppt = get_string("importppt", "lesson");
45 $strlessons = get_string("modulenameplural", "lesson");
47 $navigation = build_navigation($strimportppt, $cm);
48 print_header_simple("$strimportppt", " $strimportppt", $navigation);
50 if ($form = data_submitted()) { /// Filename
52 if (empty($_FILES['newfile'])) { // file was just uploaded
53 notify(get_string("uploadproblem") );
56 if ((!is_uploaded_file($_FILES['newfile']['tmp_name']) or $_FILES['newfile']['size'] == 0)) {
57 notify(get_string("uploadnofilefound") );
59 } else { // Valid file is found
61 if ($rawpages = readdata($_FILES, $course->id, $modname)) { // first try to reall all of the data in
62 $pageobjects = extract_data($rawpages, $course->id, $mod->name, $modname); // parse all the html files into objects
63 clean_temp(); // all done with files so dump em
65 $mod_create_objects = $modname.'_create_objects';
66 $mod_save_objects = $modname.'_save_objects';
68 $objects = $mod_create_objects($pageobjects, $mod->id); // function to preps the data to be sent to DB
70 if(! $mod_save_objects($objects, $mod->id, $pageid)) { // sends it to DB
71 error("could not save");
73 } else {
74 error('could not get data');
77 echo "<hr>";
78 print_continue("$CFG->wwwroot/mod/$modname/view.php?id=$cm->id");
79 print_footer($course);
80 exit;
84 /// Print upload form
86 print_heading_with_help($strimportppt, "importppt", "lesson");
88 print_simple_box_start("center");
89 echo "<form id=\"theform\" enctype=\"multipart/form-data\" method=\"post\">";
90 echo "<input type=\"hidden\" name=\"id\" value=\"$cm->id\" />\n";
91 echo "<input type=\"hidden\" name=\"pageid\" value=\"$pageid\" />\n";
92 echo "<table cellpadding=\"5\">";
94 echo "<tr><td align=\"right\">";
95 print_string("upload");
96 echo ":</td><td>";
97 echo "<input name=\"newfile\" type=\"file\" size=\"50\" />";
98 echo "</td></tr><tr><td>&nbsp;</td><td>";
99 echo "<input type=\"submit\" name=\"save\" value=\"".get_string("uploadthisfile")."\" />";
100 echo "</td></tr>";
102 echo "</table>";
103 echo "</form>";
104 print_simple_box_end();
106 print_footer($course);
108 // START OF FUNCTIONS
110 function readdata($file, $courseid, $modname) {
111 // this function expects a zip file to be uploaded. Then it parses
112 // outline.htm to determine the slide path. Then parses each
113 // slide to get data for the content
115 global $CFG;
117 // create an upload directory in temp
118 make_upload_directory('temp/'.$modname);
120 $base = $CFG->dataroot."/temp/$modname/";
122 $zipfile = $_FILES["newfile"]["name"];
123 $tempzipfile = $_FILES["newfile"]["tmp_name"];
125 // create our directory
126 $path_parts = pathinfo($zipfile);
127 $dirname = substr($zipfile, 0, strpos($zipfile, '.'.$path_parts['extension'])); // take off the extension
128 if (!file_exists($base.$dirname)) {
129 mkdir($base.$dirname, $CFG->directorypermissions);
132 // move our uploaded file to temp/lesson
133 move_uploaded_file($tempzipfile, $base.$zipfile);
135 // unzip it!
136 unzip_file($base.$zipfile, $base, false);
138 $base = $base.$dirname; // update the base
140 // this is the file where we get the names of the files for the slides (in the correct order too)
141 $outline = $base.'/outline.htm';
143 $pages = array();
145 if (file_exists($outline) and is_readable($outline)) {
146 $outlinecontents = file_get_contents($outline);
147 $filenames = array();
148 preg_match_all("/javascript:GoToSld\('(.*)'\)/", $outlinecontents, $filenames); // this gets all of our files names
150 // file $pages with the contents of all of the slides
151 foreach ($filenames[1] as $file) {
152 $path = $base.'/'.$file;
153 if (is_readable($path)) {
154 $pages[$path] = file_get_contents($path);
155 } else {
156 return false;
159 } else {
160 // cannot find the outline, so grab all files that start with slide
161 $dh = opendir($base);
162 while (false !== ($file = readdir($dh))) { // read throug the directory
163 if ('slide' == substr($file, 0, 5)) { // check for name (may want to check extension later)
164 $path = $base.'/'.$file;
165 if (is_readable($path)) {
166 $pages[$path] = file_get_contents($path);
167 } else {
168 return false;
173 ksort($pages); // order them by file name
176 if (empty($pages)) {
177 return false;
180 return $pages;
183 function extract_data($pages, $courseid, $lessonname, $modname) {
184 // this function attempts to extract the content out of the slides
185 // the slides are ugly broken xml. and the xml is broken... yeah...
187 global $CFG;
188 global $matches;
190 $extratedpages = array();
192 // directory for images
193 make_mod_upload_directory($courseid); // make sure moddata is made
194 make_upload_directory($courseid.'/moddata/'.$modname, false); // we store our images in a subfolder in here
196 $imagedir = $CFG->dataroot.'/'.$courseid.'/moddata/'.$modname;
198 require_once($CFG->libdir .'/filelib.php');
199 $imagelink = get_file_url($courseid.'/moddata/'.$modname);
201 // try to make a unique subfolder to store the images
202 $lessonname = str_replace(' ', '_', $lessonname); // get rid of spaces
203 $i = 0;
204 while(true) {
205 if (!file_exists($imagedir.'/'.$lessonname.$i)) {
206 // ok doesnt exist so make the directory and update our paths
207 mkdir($imagedir.'/'.$lessonname.$i, $CFG->directorypermissions);
208 $imagedir = $imagedir.'/'.$lessonname.$i;
209 $imagelink = $imagelink.'/'.$lessonname.$i;
210 break;
212 $i++;
215 foreach ($pages as $file => $content) {
216 // to make life easier on our preg_match_alls, we strip out all tags except
217 // for div and img (where our content is). We want div because sometimes we
218 // can identify the content in the div based on the div's class
220 $tags = '<div><img>'; // should also allow <b><i>
221 $string = strip_tags($content,$tags);
222 //echo s($string);
224 $matches = array();
225 // this will look for a non nested tag that is closed
226 // want to allow <b><i>(maybe more) tags but when we do that
227 // the preg_match messes up.
228 preg_match_all("/(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>)/", $string, $matches);
229 //(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>) original pattern
230 //(<(div+)[^>]*>)[^(<div*)](<\/div>) work in progress
232 $path_parts = pathinfo($file);
233 $file = substr($path_parts['basename'], 0, strpos($path_parts['basename'], '.')); // get rid of the extension
235 $imgs = array();
236 // this preg matches all images
237 preg_match_all("/<img[^>]*(src\=\"(".$file."\_image[^>^\"]*)\"[^>]*)>/i", $string, $imgs);
239 // start building our page
240 $page = new stdClass;
241 $page->title = '';
242 $page->contents = array();
243 $page->images = array();
244 $page->source = $path_parts['basename']; // need for book only
246 // this foreach keeps the style intact. Found it doesn't help much. But if you want back uncomment
247 // this foreach and uncomment the line with the comment imgstyle in it. Also need to comment out
248 // the $page->images[]... line in the next foreach
249 /*foreach ($imgs[1] as $img) {
250 $page->images[] = '<img '.str_replace('src="', "src=\"$imagelink/", $img).' />';
252 foreach ($imgs[2] as $img) {
253 copy($path_parts['dirname'].'/'.$img, $imagedir.'/'.$img);
254 $page->images[] = "<img src=\"$imagelink/$img\" title=\"$img\" />"; // comment out this line if you are using the above foreach loop
256 for($i = 0; $i < count($matches[1]); $i++) { // go through all of our div matches
258 $class = isolate_class($matches[1][$i]); // first step in isolating the class
260 // check for any static classes
261 switch ($class) {
262 case 'T': // class T is used for Titles
263 $page->title = $matches[3][$i];
264 break;
265 case 'B': // I would guess that all bullet lists would start with B then go to B1, B2, etc
266 case 'B1': // B1-B4 are just insurance, should just hit B and all be taken care of
267 case 'B2':
268 case 'B3':
269 case 'B4':
270 $page->contents[] = build_list('<ul>', $i, 0); // this is a recursive function that will grab all the bullets and rebuild the list in html
271 break;
272 default:
273 if ($matches[3][$i] != '&#13;') { // odd crap generated... sigh
274 if (substr($matches[3][$i], 0, 1) == ':') { // check for leading : ... hate MS ...
275 $page->contents[] = substr($matches[3][$i], 1); // get rid of :
276 } else {
277 $page->contents[] = $matches[3][$i];
280 break;
283 /*if (count($page->contents) == 0) { // didnt find anything, grab everything
284 // potential to pull in a lot of crap
285 for($i = 0; $i < count($matches[1]); $i++) {
286 //if($class = isolate_class($matches[1][$i])) {
287 //if ($class == 'O') {
288 if ($matches[3][$i] != '&#13;') { // odd crap generated... sigh
289 if (substr($matches[3][$i], 0, 1) == ':') { // check for leading : ... hate MS ...
290 $page->contents[] = substr($matches[3][$i], 1); // get rid of :
291 } else {
292 $page->contents[] = $matches[3][$i];
299 // add the page to the array;
300 $extratedpages[] = $page;
302 } // end $pages foreach loop
304 return $extratedpages;
308 A recursive function to build a html list
310 function build_list($list, &$i, $depth) {
311 global $matches; // not sure why I global this...
313 while($i < count($matches[1])) {
315 $class = isolate_class($matches[1][$i]);
317 if (strstr($class, 'B')) { // make sure we are still working with bullet classes
318 if ($class == 'B') {
319 $this_depth = 0; // calling class B depth 0
320 } else {
321 // set the depth number. So B1 is depth 1 and B2 is depth 2 and so on
322 $this_depth = substr($class, 1);
323 if (!is_numeric($this_depth)) {
324 error("Depth not parsed!");
327 if ($this_depth < $depth) {
328 // we are moving back a level in the nesting
329 break;
331 if ($this_depth > $depth) {
332 // we are moving in a lvl in nesting
333 $list .= '<ul>';
334 $list = build_list($list, $i, $this_depth);
335 // once we return back, should go to the start of the while
336 continue;
338 // no depth changes, so add the match to our list
339 if ($cleanstring = ppt_clean_text($matches[3][$i])) {
340 $list .= '<li>'.ppt_clean_text($matches[3][$i]).'</li>';
342 $i++;
343 } else {
344 // not a B class, so get out of here...
345 break;
348 // end the list and return it
349 $list .= '</ul>';
350 return $list;
355 Given an html tag, this function will
357 function isolate_class($string) {
358 if($class = strstr($string, 'class=')) { // first step in isolating the class
359 $class = substr($class, strpos($class, '=')+1); // this gets rid of <div blawblaw class= there are no "" or '' around the class name ...sigh...
360 if (strstr($class, ' ')) {
361 // spaces found, so cut off everything off after the first space
362 return substr($class, 0, strpos($class, ' '));
363 } else {
364 // no spaces so nothing else in the div tag, cut off the >
365 return substr($class, 0, strpos($class, '>'));
367 } else {
368 // no class defined in the tag
369 return '';
374 This function strips off the random chars that ppt puts infront of bullet lists
376 function ppt_clean_text($string) {
377 $chop = 1; // default: just a single char infront of the content
379 // look for any other crazy things that may be infront of the content
380 if (strstr($string, '&lt;') and strpos($string, '&lt;') == 0) { // look for the &lt; in the sting and make sure it is in the front
381 $chop = 4; // increase the $chop
383 // may need to add more later....
385 $string = substr($string, $chop);
387 if ($string != '&#13;') {
388 return $string;
389 } else {
390 return false;
395 Clean up the temp directory
397 function clean_temp() {
398 global $CFG;
399 // this function is broken, use it to clean up later
400 // should only clean up what we made as well because someone else could be importing ppt as well
401 //delDirContents($CFG->dataroot.'/temp/lesson');
405 Creates objects an object with the page and answers that are to be inserted into the database
407 function lesson_create_objects($pageobjects, $lessonid) {
409 $branchtables = array();
410 $branchtable = new stdClass;
412 // all pages have this info
413 $page->lessonid = $lessonid;
414 $page->prevpageid = 0;
415 $page->nextpageid = 0;
416 $page->qtype = LESSON_BRANCHTABLE;
417 $page->qoption = 0;
418 $page->layout = 1;
419 $page->display = 1;
420 $page->timecreated = time();
421 $page->timemodified = 0;
423 // all answers are the same
424 $answer->lessonid = $lessonid;
425 $answer->jumpto = LESSON_NEXTPAGE;
426 $answer->grade = 0;
427 $answer->score = 0;
428 $answer->flags = 0;
429 $answer->timecreated = time();
430 $answer->timemodified = 0;
431 $answer->answer = "Next";
432 $answer->response = "";
434 $answers[] = clone($answer);
436 $answer->jumpto = LESSON_PREVIOUSPAGE;
437 $answer->answer = "Previous";
439 $answers[] = clone($answer);
441 $branchtable->answers = $answers;
443 $i = 1;
445 foreach ($pageobjects as $pageobject) {
446 $temp = prep_page($pageobject, $i); // makes our title and contents
447 $page->title = $temp->title;
448 $page->contents = $temp->contents;
449 $branchtable->page = clone($page); // add the page
450 $branchtables[] = clone($branchtable); // add it all to our array
451 $i++;
454 return $branchtables;
458 Creates objects an chapter object that is to be inserted into the database
460 function book_create_objects($pageobjects, $bookid) {
462 $chapters = array();
463 $chapter = new stdClass;
465 // same for all chapters
466 $chapter->bookid = $bookid;
467 $chapter->pagenum = count_records('book_chapters', 'bookid', $bookid)+1;
468 $chapter->timecreated = time();
469 $chapter->timemodified = time();
470 $chapter->subchapter = 0;
472 $i = 1;
473 foreach ($pageobjects as $pageobject) {
474 $page = prep_page($pageobject, $i); // get title and contents
475 $chapter->importsrc = addslashes($pageobject->source); // add the source
476 $chapter->title = $page->title;
477 $chapter->content = $page->contents;
478 $chapters[] = $chapter;
480 // increment our page number and our counter
481 $chapter->pagenum = $chapter->pagenum + 1;
482 $i++;
485 return $chapters;
489 Builds the title and content strings from an object
491 function prep_page($pageobject, $count) {
492 if ($pageobject->title == '') {
493 $page->title = "Page $count"; // no title set so make a generic one
494 } else {
495 $page->title = addslashes($pageobject->title);
498 $page->contents = '';
500 // nab all the images first
501 foreach ($pageobject->images as $image) {
502 $image = str_replace("\n", '', $image);
503 $image = str_replace("\r", '', $image);
504 $image = str_replace("'", '"', $image); // imgstyle
506 $page->contents .= addslashes($image);
508 // go through the contents array and put <p> tags around each element and strip out \n which I have found to be uneccessary
509 foreach ($pageobject->contents as $content) {
510 $content = str_replace("\n", '', $content);
511 $content = str_replace("\r", '', $content);
512 $content = str_replace('&#13;', '', $content); // puts in returns?
513 $content = '<p>'.$content.'</p>';
514 $page->contents .= addslashes($content);
516 return $page;
520 Saves the branchtable objects to the DB
522 function lesson_save_objects($branchtables, $lessonid, $after) {
523 // first set up the prevpageid and nextpageid
524 if ($after == 0) { // adding it to the top of the lesson
525 $prevpageid = 0;
526 // get the id of the first page. If not found, then no pages in the lesson
527 if (!$nextpageid = get_field('lesson_pages', 'id', 'prevpageid', 0, 'lessonid', $lessonid)) {
528 $nextpageid = 0;
530 } else {
531 // going after an actual page
532 $prevpageid = $after;
533 $nextpageid = get_field('lesson_pages', 'nextpageid', 'id', $after);
536 foreach ($branchtables as $branchtable) {
538 // set the doubly linked list
539 $branchtable->page->nextpageid = $nextpageid;
540 $branchtable->page->prevpageid = $prevpageid;
542 // insert the page
543 if(!$id = insert_record('lesson_pages', $branchtable->page)) {
544 error("insert page");
547 // update the link of the page previous to the one we just updated
548 if ($prevpageid != 0) { // if not the first page
549 if (!set_field("lesson_pages", "nextpageid", $id, "id", $prevpageid)) {
550 error("Insert page: unable to update next link $prevpageid");
554 // insert the answers
555 foreach ($branchtable->answers as $answer) {
556 $answer->pageid = $id;
557 if(!insert_record('lesson_answers', $answer)) {
558 error("insert answer $id");
562 $prevpageid = $id;
565 // all done with inserts. Now check to update our last page (this is when we import between two lesson pages)
566 if ($nextpageid != 0) { // if the next page is not the end of lesson
567 if (!set_field("lesson_pages", "prevpageid", $id, "id", $nextpageid)) {
568 error("Insert page: unable to update next link $prevpageid");
572 return true;
576 Save the chapter objects to the database
578 function book_save_objects($chapters, $bookid, $pageid='0') {
579 // nothing fancy, just save them all in order
580 foreach ($chapters as $chapter) {
581 if (!$chapter->id = insert_record('book_chapters', $chapter)) {
582 error('Could not update your book');
585 return true;