Upgraded phpmyadmin to 4.0.4 (All Languages) - No modifications yet
[openemr.git] / phpmyadmin / libraries / plugins / import / ImportMediawiki.class.php
blob767c966c1d043f33cd93f2cf5ce9a1c95e80ba4c
1 <?php
2 /* vim: set expandtab sw=4 ts=4 sts=4: */
3 /**
4 * MediaWiki import plugin for phpMyAdmin
6 * @package PhpMyAdmin-Import
7 * @subpackage MediaWiki
8 */
9 if (! defined('PHPMYADMIN')) {
10 exit;
13 /* Get the import interface */
14 require_once 'libraries/plugins/ImportPlugin.class.php';
16 /**
17 * Handles the import for the MediaWiki format
19 * @package PhpMyAdmin-Import
20 * @subpackage MediaWiki
22 class ImportMediawiki extends ImportPlugin
24 /**
25 * Whether to analyze tables
27 * @var bool
29 private $_analyze;
31 /**
32 * Constructor
34 public function __construct()
36 $this->setProperties();
39 /**
40 * Sets the import plugin properties.
41 * Called in the constructor.
43 * @return void
45 protected function setProperties()
47 $this->_setAnalyze(false);
48 if ($GLOBALS['plugin_param'] !== 'table') {
49 $this->_setAnalyze(true);
52 $props = 'libraries/properties/';
53 include_once "$props/plugins/ImportPluginProperties.class.php";
55 $importPluginProperties = new ImportPluginProperties();
56 $importPluginProperties->setText(__('MediaWiki Table'));
57 $importPluginProperties->setExtension('txt');
58 $importPluginProperties->setMimeType('text/plain');
59 $importPluginProperties->setOptions(array());
60 $importPluginProperties->setOptionsText(__('Options'));
62 $this->properties = $importPluginProperties;
65 /**
66 * This method is called when any PluginManager to which the observer
67 * is attached calls PluginManager::notify()
69 * @param SplSubject $subject The PluginManager notifying the observer
70 * of an update.
72 * @return void
74 public function update (SplSubject $subject)
78 /**
79 * Handles the whole import logic
81 * @return void
83 public function doImport()
85 global $error, $timeout_passed, $finished;
87 // Defaults for parser
89 // The buffer that will be used to store chunks read from the imported file
90 $buffer = '';
92 // Used as storage for the last part of the current chunk data
93 // Will be appended to the first line of the next chunk, if there is one
94 $last_chunk_line = '';
96 // Remembers whether the current buffer line is part of a comment
97 $inside_comment = false;
98 // Remembers whether the current buffer line is part of a data comment
99 $inside_data_comment = false;
100 // Remembers whether the current buffer line is part of a structure comment
101 $inside_structure_comment = false;
103 // MediaWiki only accepts "\n" as row terminator
104 $mediawiki_new_line = "\n";
106 // Initialize the name of the current table
107 $cur_table_name = "";
109 while (! $finished && ! $error && ! $timeout_passed ) {
110 $data = PMA_importGetNextChunk();
112 if ($data === false) {
113 // Subtract data we didn't handle yet and stop processing
114 $offset -= strlen($buffer);
115 break;
116 } elseif ($data === true) {
117 // Handle rest of buffer
118 } else {
119 // Append new data to buffer
120 $buffer = $data;
121 unset($data);
122 // Don't parse string if we're not at the end
123 // and don't have a new line inside
124 if ( strpos($buffer, $mediawiki_new_line) === false ) {
125 continue;
129 // Because of reading chunk by chunk, the first line from the buffer
130 // contains only a portion of an actual line from the imported file.
131 // Therefore, we have to append it to the last line from the previous
132 // chunk. If we are at the first chunk, $last_chunk_line should be empty.
133 $buffer = $last_chunk_line . $buffer;
135 // Process the buffer line by line
136 $buffer_lines = explode($mediawiki_new_line, $buffer);
138 $full_buffer_lines_count = count($buffer_lines);
139 // If the reading is not finalised, the final line of the current chunk
140 // will not be complete
141 if (! $finished) {
142 $full_buffer_lines_count -= 1;
143 $last_chunk_line = $buffer_lines[$full_buffer_lines_count];
146 for ($line_nr = 0; $line_nr < $full_buffer_lines_count; ++ $line_nr) {
147 $cur_buffer_line = trim($buffer_lines[$line_nr]);
149 // If the line is empty, go to the next one
150 if ( $cur_buffer_line === '' ) {
151 continue;
154 $first_character = $cur_buffer_line[0];
155 $matches = array();
157 // Check beginnning of comment
158 if (! strcmp(substr($cur_buffer_line, 0, 4), "<!--")) {
159 $inside_comment = true;
160 continue;
161 } elseif ($inside_comment) {
162 // Check end of comment
163 if (! strcmp(substr($cur_buffer_line, 0, 4), "-->")) {
164 // Only data comments are closed. The structure comments
165 // will be closed when a data comment begins (in order to
166 // skip structure tables)
167 if ($inside_data_comment) {
168 $inside_data_comment = false;
171 // End comments that are not related to table structure
172 if (! $inside_structure_comment) {
173 $inside_comment = false;
175 } else {
176 // Check table name
177 $match_table_name = array();
178 if (preg_match(
179 "/^Table data for `(.*)`$/",
180 $cur_buffer_line,
181 $match_table_name
184 $cur_table_name = $match_table_name[1];
185 $inside_data_comment = true;
187 // End ignoring structure rows
188 if ($inside_structure_comment) {
189 $inside_structure_comment = false;
191 } elseif (preg_match(
192 "/^Table structure for `(.*)`$/",
193 $cur_buffer_line,
194 $match_table_name
197 // The structure comments will be ignored
198 $inside_structure_comment = true;
201 continue;
202 } elseif (preg_match('/^\{\|(.*)$/', $cur_buffer_line, $matches)) {
203 // Check start of table
205 // This will store all the column info on all rows from
206 // the current table read from the buffer
207 $cur_temp_table = array();
209 // Will be used as storage for the current row in the buffer
210 // Once all its columns are read, it will be added to
211 // $cur_temp_table and then it will be emptied
212 $cur_temp_line = array();
214 // Helps us differentiate the header columns
215 // from the normal columns
216 $in_table_header = false;
217 // End processing because the current line does not
218 // contain any column information
219 } elseif (substr($cur_buffer_line, 0, 2) === '|-'
220 || substr($cur_buffer_line, 0, 2) === '|+'
221 || substr($cur_buffer_line, 0, 2) === '|}'
223 // Check begin row or end table
225 // Add current line to the values storage
226 if (! empty($cur_temp_line)) {
227 // If the current line contains header cells
228 // ( marked with '!' ),
229 // it will be marked as table header
230 if ( $in_table_header ) {
231 // Set the header columns
232 $cur_temp_table_headers = $cur_temp_line;
233 } else {
234 // Normal line, add it to the table
235 $cur_temp_table [] = $cur_temp_line;
239 // Empty the temporary buffer
240 $cur_temp_line = array();
242 // No more processing required at the end of the table
243 if (substr($cur_buffer_line, 0, 2) === '|}') {
244 $current_table = array(
245 $cur_table_name,
246 $cur_temp_table_headers,
247 $cur_temp_table
250 // Import the current table data into the database
251 $this->_importDataOneTable($current_table);
253 // Reset table name
254 $cur_table_name = "";
256 // What's after the row tag is now only attributes
258 } elseif (($first_character === '|') || ($first_character === '!')) {
259 // Check cell elements
261 // Header cells
262 if ($first_character === '!') {
263 // Mark as table header, but treat as normal row
264 $cur_buffer_line = str_replace('!!', '||', $cur_buffer_line);
265 // Will be used to set $cur_temp_line as table header
266 $in_table_header = true;
267 } else {
268 $in_table_header = false;
271 // Loop through each table cell
272 $cells = $this->_explodeMarkup($cur_buffer_line);
273 foreach ($cells as $cell) {
274 // A cell could contain both parameters and data
275 $cell_data = explode('|', $cell, 2);
277 // A '|' inside an invalid link should not
278 // be mistaken as delimiting cell parameters
279 if (strpos($cell_data[0], '[[') === true ) {
280 if (count($cell_data) == 1) {
281 $cell = $cell_data[0];
282 } else {
283 $cell = $cell_data[1];
287 // Delete the beginning of the column, if there is one
288 $cell = trim($cell);
289 $col_start_chars = array( "|", "!");
290 foreach ($col_start_chars as $col_start_char) {
291 if (strpos($cell, $col_start_char) === 0) {
292 $cell = trim(substr($cell, 1));
296 // Add the cell to the row
297 $cur_temp_line [] = $cell;
298 } // foreach $cells
299 } else {
300 // If it's none of the above, then the current line has a bad
301 // format
302 $message = PMA_Message::error(
303 __('Invalid format of mediawiki input on line: <br />%s.')
305 $message->addParam($cur_buffer_line);
306 $error = true;
308 } // End treating full buffer lines
309 } // while - finished parsing buffer
313 * Imports data from a single table
315 * @param array $table containing all table info:
316 * <code>
317 * $table[0] - string containing table name
318 * $table[1] - array[] of table headers
319 * $table[2] - array[][] of table content rows
320 * </code>
322 * @global bool $analyze whether to scan for column types
324 * @return void
326 private function _importDataOneTable ($table)
328 $analyze = $this->_getAnalyze();
329 if ($analyze) {
330 // Set the table name
331 $this->_setTableName($table[0]);
333 // Set generic names for table headers if they don't exist
334 $this->_setTableHeaders($table[1], $table[2][0]);
336 // Create the tables array to be used in PMA_buildSQL()
337 $tables = array();
338 $tables [] = array($table[0], $table[1], $table[2]);
340 // Obtain the best-fit MySQL types for each column
341 $analyses = array();
342 $analyses [] = PMA_analyzeTable($tables[0]);
344 $this->_executeImportTables($tables, $analyses);
347 // Commit any possible data in buffers
348 PMA_importRunQuery();
352 * Sets the table name
354 * @param string &$table_name reference to the name of the table
356 * @return void
358 private function _setTableName(&$table_name)
360 if (empty($table_name)) {
361 $result = PMA_DBI_fetch_result('SHOW TABLES');
362 // todo check if the name below already exists
363 $table_name = 'TABLE '.(count($result) + 1);
368 * Set generic names for table headers, if they don't exist
370 * @param array &$table_headers reference to the array containing the headers
371 * of a table
372 * @param array $table_row array containing the first content row
374 * @return void
376 private function _setTableHeaders(&$table_headers, $table_row)
378 if (empty($table_headers)) {
379 // The first table row should contain the number of columns
380 // If they are not set, generic names will be given (COL 1, COL 2, etc)
381 $num_cols = count($table_row);
382 for ($i = 0; $i < $num_cols; ++ $i) {
383 $table_headers [$i] = 'COL '. ($i + 1);
389 * Sets the database name and additional options and calls PMA_buildSQL()
390 * Used in PMA_importDataAllTables() and $this->_importDataOneTable()
392 * @param array &$tables structure:
393 * array(
394 * array(table_name, array() column_names, array()() rows)
396 * @param array &$analyses structure:
397 * $analyses = array(
398 * array(array() column_types, array() column_sizes)
401 * @global string $db name of the database to import in
403 * @return void
405 private function _executeImportTables(&$tables, &$analyses)
407 global $db;
409 // $db_name : The currently selected database name, if applicable
410 // No backquotes
411 // $options : An associative array of options
412 if (strlen($db)) {
413 $db_name = $db;
414 $options = array('create_db' => false);
415 } else {
416 $db_name = 'mediawiki_DB';
417 $options = null;
420 // Array of SQL strings
421 // Non-applicable parameters
422 $create = null;
424 // Create and execute necessary SQL statements from data
425 PMA_buildSQL($db_name, $tables, $analyses, $create, $options);
427 unset($tables);
428 unset($analyses);
433 * Replaces all instances of the '||' separator between delimiters
434 * in a given string
436 * @param string $start_delim start delimiter
437 * @param string $end_delim end delimiter
438 * @param string $replace the string to be replaced with
439 * @param string $subject the text to be replaced
441 * @return string with replacements
443 private function _delimiterReplace($start_delim, $end_delim, $replace, $subject)
445 // String that will be returned
446 $cleaned = "";
447 // Possible states of current character
448 $inside_tag = false;
449 $inside_attribute = false;
450 // Attributes can be declared with either " or '
451 $start_attribute_character = false;
453 // The full separator is "||";
454 // This rembembers if the previous character was '|'
455 $partial_separator = false;
457 // Parse text char by char
458 for ($i = 0; $i < strlen($subject); $i ++) {
459 $cur_char = $subject[$i];
460 // Check for separators
461 if ($cur_char == '|') {
462 // If we're not inside a tag, then this is part of a real separator,
463 // so we append it to the current segment
464 if (! $inside_attribute) {
465 $cleaned .= $cur_char;
466 if ($partial_separator) {
467 $inside_tag = false;
468 $inside_attribute = false;
470 } elseif ($partial_separator) {
471 // If we are inside a tag, we replace the current char with
472 // the placeholder and append that to the current segment
473 $cleaned .= $replace;
476 // If the previous character was also '|', then this ends a
477 // full separator. If not, this may be the beginning of one
478 $partial_separator = ! $partial_separator;
479 } else {
480 // If we're inside a tag attribute and the current character is
481 // not '|', but the previous one was, it means that the single '|'
482 // was not appended, so we append it now
483 if ($partial_separator && $inside_attribute) {
484 $cleaned .= "|";
486 // If the char is different from "|", no separator can be formed
487 $partial_separator = false;
489 // any other character should be appended to the current segment
490 $cleaned .= $cur_char;
492 if ($cur_char == '<' && ! $inside_attribute) {
493 // start of a tag
494 $inside_tag = true;
495 } elseif ($cur_char == '>' && ! $inside_attribute) {
496 // end of a tag
497 $inside_tag = false;
498 } elseif (($cur_char == '"' || $cur_char == "'") && $inside_tag) {
499 // start or end of an attribute
500 if (! $inside_attribute) {
501 $inside_attribute = true;
502 // remember the attribute`s declaration character (" or ')
503 $start_attribute_character = $cur_char;
504 } else {
505 if ($cur_char == $start_attribute_character) {
506 $inside_attribute = false;
507 // unset attribute declaration character
508 $start_attribute_character = false;
513 } // end for each character in $subject
515 return $cleaned;
519 * Separates a string into items, similarly to explode
520 * Uses the '||' separator (which is standard in the mediawiki format)
521 * and ignores any instances of it inside markup tags
522 * Used in parsing buffer lines containing data cells
524 * @param string $text text to be split
526 * @return array
528 private function _explodeMarkup($text)
530 $separator = "||";
531 $placeholder = "\x00";
533 // Remove placeholder instances
534 $text = str_replace($placeholder, '', $text);
536 // Replace instances of the separator inside HTML-like
537 // tags with the placeholder
538 $cleaned = $this->_delimiterReplace("<", ">", $placeholder, $text);
539 // Explode, then put the replaced separators back in
540 $items = explode($separator, $cleaned);
541 foreach ($items as $i => $str) {
542 $items[$i] = str_replace($placeholder, $separator, $str);
545 return $items;
549 /* ~~~~~~~~~~~~~~~~~~~~ Getters and Setters ~~~~~~~~~~~~~~~~~~~~ */
553 * Returns true if the table should be analyzed, false otherwise
555 * @return bool
557 private function _getAnalyze()
559 return $this->_analyze;
563 * Sets to true if the table should be analyzed, false otherwise
565 * @param bool $analyze status
567 * @return void
569 private function _setAnalyze($analyze)
571 $this->_analyze = $analyze;