weekly back-to-dev release 5.0dev
[moodle.git] / backup / util / xml / parser / processors / grouped_parser_processor.class.php
blob718b57d8773c25be3d4387d2838fdec44b9dbbb8
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * @package moodlecore
19 * @subpackage xml
20 * @copyright 2010 onwards Eloy Lafuente (stronk7) {@link http://stronk7.com}
21 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
24 require_once($CFG->dirroot.'/backup/util/xml/parser/processors/simplified_parser_processor.class.php');
26 /**
27 * Abstract xml parser processor able to group chunks as configured
28 * and dispatch them to other arbitrary methods
30 * This @progressive_parser_processor handles the requested paths,
31 * allowing to group information under any of them, dispatching them
32 * to the methods specified
34 * Note memory increases as you group more and more paths, so use it for
35 * well-known structures being smaller enough (never to group MBs into one
36 * in-memory structure)
38 * TODO: Complete phpdocs
40 abstract class grouped_parser_processor extends simplified_parser_processor {
42 protected $groupedpaths; // Paths we are requesting grouped
43 protected $currentdata; // Where we'll be acummulating data
45 // We create a array that stores each of the paths in a tree fashion
46 // like the filesystem. Each element stores all the child elements that are
47 // part of a full path that builds the grouped parent path we are storing.
48 // eg Array keys are stored as follows;
49 // root => a => b
50 // => b
51 // => c => d
52 // => e => f.
53 // Grouped paths here are; /a/b, /b, /c/d, /c/e/f.
54 // There are no nested parent paths, that is an enforced rule so
55 // we store an empty array to designate that the particular XML path element
56 // is in fact a grouped path.
57 // eg; $this->groupedparentprefixtree['a']['b'] = array();
58 /** @var array Search tree storing the grouped paths. */
59 protected $groupedparentprefixtree;
61 /**
62 * Keep cache of parent directory paths for XML parsing.
63 * @var array
65 protected $parentcache = array();
67 /**
68 * Remaining space for parent directory paths.
69 * @var integer
71 protected $parentcacheavailablesize = 2048;
73 public function __construct(array $paths = array()) {
74 $this->groupedpaths = array();
75 $this->currentdata = null;
76 parent::__construct($paths);
79 public function add_path($path, $grouped = false) {
80 if ($grouped) {
81 // Check there is no parent in the branch being grouped
82 if ($found = $this->grouped_parent_exists($path)) {
83 $a = new stdclass();
84 $a->path = $path;
85 $a->parent = $found;
86 throw new progressive_parser_exception('xml_grouped_parent_found', $a);
88 // Check there is no child in the branch being grouped
89 if ($found = $this->grouped_child_exists($path)) {
90 $a = new stdclass();
91 $a->path = $path;
92 $a->child = $found;
93 throw new progressive_parser_exception('xml_grouped_child_found', $a);
95 $this->groupedpaths[$path] = true;
97 // We check earlier in the function if there is a parent that is above the path
98 // to be added so we can be sure no parent exists in the tree.
99 $patharray = explode('/', $path);
100 $currentpos = &$this->groupedparentprefixtree;
101 foreach ($patharray as $item) {
102 if (!isset($currentpos[$item])) {
103 $currentpos[$item] = array();
105 // Update the current array position using a reference to allow in-place updates to the array.
106 $currentpos = &$currentpos[$item];
109 parent::add_path($path);
113 * The parser fires this each time one path is going to be parsed
115 * @param string $path xml path which parsing has started
117 public function before_path($path) {
118 if ($this->path_is_grouped($path) and !isset($this->currentdata[$path])) {
119 // If the grouped element itself does not contain any final tags,
120 // we would not get any chunk data for it. So we add an artificial
121 // empty data chunk here that will be eventually replaced with
122 // real data later in {@link self::postprocess_chunk()}.
123 $this->currentdata[$path] = array(
124 'path' => $path,
125 'level' => substr_count($path, '/') + 1,
126 'tags' => array(),
129 if (!$this->grouped_parent_exists($path)) {
130 parent::before_path($path);
135 * The parser fires this each time one path has been parsed
137 * @param string $path xml path which parsing has ended
139 public function after_path($path) {
140 // Have finished one grouped path, dispatch it
141 if ($this->path_is_grouped($path)) {
142 // Any accumulated information must be in
143 // currentdata, properly built
144 $data = $this->currentdata[$path];
145 unset($this->currentdata[$path]);
146 // Always, before dispatching any chunk, send all pending start notifications.
147 $this->process_pending_startend_notifications($path, 'start');
148 // TODO: If running under DEBUG_DEVELOPER notice about >1MB grouped chunks
149 // And, finally, dispatch it.
150 $this->dispatch_chunk($data);
152 // Normal notification of path end
153 // Only if path is selected and not child of grouped
154 if (!$this->grouped_parent_exists($path)) {
155 parent::after_path($path);
159 // Protected API starts here
162 * Override this method so grouping will be happening here
163 * also deciding between accumulating/dispatching
165 protected function postprocess_chunk($data) {
166 $path = $data['path'];
167 // If the chunk is a grouped one, simply put it into currentdata
168 if ($this->path_is_grouped($path)) {
169 $this->currentdata[$path] = $data;
171 // If the chunk is child of grouped one, add it to currentdata
172 } else if ($grouped = $this->grouped_parent_exists($path)) {
173 $this->build_currentdata($grouped, $data);
174 $this->chunks--; // not counted, as it's accumulated
176 // No grouped nor child of grouped, dispatch it
177 } else {
178 $this->dispatch_chunk($data);
182 protected function path_is_grouped($path) {
183 return isset($this->groupedpaths[$path]);
187 * Function that will look for any grouped
188 * parent for the given path, returning it if found,
189 * false if not
191 protected function grouped_parent_exists($path) {
192 // Search the tree structure to find out if one of the paths
193 // above the $path is a grouped path.
194 $patharray = explode('/', $this->get_parent_path($path));
195 $groupedpath = '';
196 $currentpos = &$this->groupedparentprefixtree;
197 foreach ($patharray as $item) {
198 // When the item isn't set in the array we know
199 // there is no parent grouped path.
200 if (!isset($currentpos[$item])) {
201 return false;
204 // When we aren't at the start of the path, continue to build
205 // a string representation of the path that is traversed. We will
206 // return the grouped path to the caller if we find one.
207 if ($item != '') {
208 $groupedpath .= '/'.$item;
211 if ($currentpos[$item] == array()) {
212 return $groupedpath;
214 $currentpos = &$currentpos[$item];
216 return false;
220 * Get the parent path using a local cache for performance.
222 * @param $path string The pathname you wish to obtain the parent name for.
223 * @return string The parent pathname.
225 protected function get_parent_path($path) {
226 if (!isset($this->parentcache[$path])) {
227 $this->parentcache[$path] = progressive_parser::dirname($path);
228 $this->parentcacheavailablesize--;
229 if ($this->parentcacheavailablesize < 0) {
230 // Older first is cheaper than LRU. We use 10% as items are grouped together and the large quiz
231 // restore from MDL-40585 used only 600 parent paths. This is an XML heirarchy, so common paths
232 // are grouped near each other. eg; /question_bank/question_category/question/element. After keeping
233 // question_bank paths in the cache when we move to another area and the question_bank cache is not
234 // useful any longer.
235 $this->parentcache = array_slice($this->parentcache, 200, null, true);
236 $this->parentcacheavailablesize += 200;
239 return $this->parentcache[$path];
244 * Function that will look for any grouped
245 * child for the given path, returning it if found,
246 * false if not
248 protected function grouped_child_exists($path) {
249 $childpath = $path . '/';
250 foreach ($this->groupedpaths as $groupedpath => $set) {
251 if (strpos($groupedpath, $childpath) === 0) {
252 return $groupedpath;
255 return false;
259 * This function will accumulate the chunk into the specified
260 * grouped element for later dispatching once it is complete
262 protected function build_currentdata($grouped, $data) {
263 // Check the grouped already exists into currentdata
264 if (!is_array($this->currentdata) or !array_key_exists($grouped, $this->currentdata)) {
265 $a = new stdclass();
266 $a->grouped = $grouped;
267 $a->child = $data['path'];
268 throw new progressive_parser_exception('xml_cannot_add_to_grouped', $a);
270 $this->add_missing_sub($grouped, $data['path'], $data['tags']);
274 * Add non-existing subarray elements
276 protected function add_missing_sub($grouped, $path, $tags) {
278 // Remember tag being processed
279 $processedtag = basename($path);
281 $info =& $this->currentdata[$grouped]['tags'];
282 $hierarchyarr = explode('/', str_replace($grouped . '/', '', $path));
284 $previouselement = '';
285 $currentpath = '';
287 foreach ($hierarchyarr as $index => $element) {
289 $currentpath = $currentpath . '/' . $element;
291 // If element is already set and it's not
292 // the processed one (with tags) fast move the $info
293 // pointer and continue
294 if ($element !== $processedtag && isset($info[$element])) {
295 $previouselement = $element;
296 $info =& $info[$element];
297 continue;
300 // If previous element already has occurrences
301 // we move $info pointer there (only if last is
302 // numeric occurrence)
303 if (!empty($previouselement) && is_array($info) && count($info) > 0) {
304 end($info);
305 $key = key($info);
306 if ((int) $key === $key) {
307 $info =& $info[$key];
311 // Create element if not defined
312 if (!isset($info[$element])) {
313 // First into last element if present
314 $info[$element] = array();
317 // If element is the current one, add information
318 if ($element === $processedtag) {
319 $info[$element][] = $tags;
322 $previouselement = $element;
323 $info =& $info[$element];