Codestyle + check trustedproxies
[dokuwiki.git] / inc / search.php
blob3bbea5cc20f417d6151ce6484974984db595b8b7
1 <?php
3 /**
4 * DokuWiki search functions
6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
7 * @author Andreas Gohr <andi@splitbrain.org>
8 */
10 use dokuwiki\Utf8\PhpString;
11 use dokuwiki\File\MediaFile;
12 use dokuwiki\Utf8\Sort;
14 /**
15 * Recurse directory
17 * This function recurses into a given base directory
18 * and calls the supplied function for each file and directory
20 * @param array &$data The results of the search are stored here
21 * @param string $base Where to start the search
22 * @param callback $func Callback (function name or array with object,method)
23 * @param array $opts option array will be given to the Callback
24 * @param string $dir Current directory beyond $base
25 * @param int $lvl Recursion Level
26 * @param mixed $sort 'natural' to use natural order sorting (default);
27 * 'date' to sort by filemtime; leave empty to skip sorting.
28 * @author Andreas Gohr <andi@splitbrain.org>
30 function search(&$data, $base, $func, $opts, $dir = '', $lvl = 1, $sort = 'natural')
32 $dirs = [];
33 $files = [];
34 $filepaths = [];
36 // safeguard against runaways #1452
37 if ($base == '' || $base == '/') {
38 throw new RuntimeException('No valid $base passed to search() - possible misconfiguration or bug');
41 //read in directories and files
42 $dh = @opendir($base . '/' . $dir);
43 if (!$dh) return;
44 while (($file = readdir($dh)) !== false) {
45 if (preg_match('/^[\._]/', $file)) continue; //skip hidden files and upper dirs
46 if (is_dir($base . '/' . $dir . '/' . $file)) {
47 $dirs[] = $dir . '/' . $file;
48 continue;
50 $files[] = $dir . '/' . $file;
51 $filepaths[] = $base . '/' . $dir . '/' . $file;
53 closedir($dh);
54 if (!empty($sort)) {
55 if ($sort == 'date') {
56 @array_multisort(array_map('filemtime', $filepaths), SORT_NUMERIC, SORT_DESC, $files);
57 } else /* natural */ {
58 Sort::asortFN($files);
60 Sort::asortFN($dirs);
63 //give directories to userfunction then recurse
64 foreach ($dirs as $dir) {
65 if (call_user_func_array($func, [&$data, $base, $dir, 'd', $lvl, $opts])) {
66 search($data, $base, $func, $opts, $dir, $lvl + 1, $sort);
69 //now handle the files
70 foreach ($files as $file) {
71 call_user_func_array($func, [&$data, $base, $file, 'f', $lvl, $opts]);
75 /**
76 * The following functions are userfunctions to use with the search
77 * function above. This function is called for every found file or
78 * directory. When a directory is given to the function it has to
79 * decide if this directory should be traversed (true) or not (false)
80 * The function has to accept the following parameters:
82 * array &$data - Reference to the result data structure
83 * string $base - Base usually $conf['datadir']
84 * string $file - current file or directory relative to $base
85 * string $type - Type either 'd' for directory or 'f' for file
86 * int $lvl - Current recursion depht
87 * array $opts - option array as given to search()
89 * return values for files are ignored
91 * All functions should check the ACL for document READ rights
92 * namespaces (directories) are NOT checked (when sneaky_index is 0) as this
93 * would break the recursion (You can have an nonreadable dir over a readable
94 * one deeper nested) also make sure to check the file type (for example
95 * in case of lockfiles).
98 /**
99 * Searches for pages beginning with the given query
101 * @author Andreas Gohr <andi@splitbrain.org>
103 * @param array $data
104 * @param string $base
105 * @param string $file
106 * @param string $type
107 * @param integer $lvl
108 * @param array $opts
110 * @return bool
112 function search_qsearch(&$data, $base, $file, $type, $lvl, $opts)
114 $opts = [
115 'idmatch' => '(^|:)' . preg_quote($opts['query'], '/') . '/',
116 'listfiles' => true,
117 'pagesonly' => true
119 return search_universal($data, $base, $file, $type, $lvl, $opts);
123 * Build the browsable index of pages
125 * $opts['ns'] is the currently viewed namespace
127 * @author Andreas Gohr <andi@splitbrain.org>
129 * @param array $data
130 * @param string $base
131 * @param string $file
132 * @param string $type
133 * @param integer $lvl
134 * @param array $opts
136 * @return bool
138 function search_index(&$data, $base, $file, $type, $lvl, $opts)
140 global $conf;
141 $ns = $opts['ns'] ?? '';
142 $opts = [
143 'pagesonly' => true,
144 'listdirs' => true,
145 'listfiles' => empty($opts['nofiles']),
146 'sneakyacl' => $conf['sneaky_index'],
147 // Hacky, should rather use recmatch
148 'depth' => preg_match('#^' . preg_quote($file, '#') . '(/|$)#', '/' . $ns) ? 0 : -1,
151 return search_universal($data, $base, $file, $type, $lvl, $opts);
155 * List all namespaces
157 * @author Andreas Gohr <andi@splitbrain.org>
159 * @param array $data
160 * @param string $base
161 * @param string $file
162 * @param string $type
163 * @param integer $lvl
164 * @param array $opts
166 * @return bool
168 function search_namespaces(&$data, $base, $file, $type, $lvl, $opts)
170 $opts = ['listdirs' => true];
171 return search_universal($data, $base, $file, $type, $lvl, $opts);
175 * List all mediafiles in a namespace
176 * $opts['depth'] recursion level, 0 for all
177 * $opts['showmsg'] shows message if invalid media id is used
178 * $opts['skipacl'] skip acl checking
179 * $opts['pattern'] check given pattern
180 * $opts['hash'] add hashes to result list
182 * @author Andreas Gohr <andi@splitbrain.org>
184 * @param array $data
185 * @param string $base
186 * @param string $file
187 * @param string $type
188 * @param integer $lvl
189 * @param array $opts
191 * @return bool
193 function search_media(&$data, $base, $file, $type, $lvl, $opts)
196 //we do nothing with directories
197 if ($type == 'd') {
198 if (empty($opts['depth'])) return true; // recurse forever
199 $depth = substr_count($file, '/');
200 if ($depth >= $opts['depth']) return false; // depth reached
201 return true;
204 $info = [];
205 $info['id'] = pathID($file, true);
206 if ($info['id'] !== cleanID($info['id'])) {
207 if (!empty($opts['showmsg']))
208 msg(hsc($info['id']) . ' is not a valid file name for DokuWiki - skipped', -1);
209 return false; // skip non-valid files
212 //check ACL for namespace (we have no ACL for mediafiles)
213 $info['perm'] = auth_quickaclcheck(getNS($info['id']) . ':*');
214 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) {
215 return false;
218 //check pattern filter
219 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $info['id'])) {
220 return false;
223 $info['file'] = PhpString::basename($file);
224 $info['size'] = filesize($base . '/' . $file);
225 $info['mtime'] = filemtime($base . '/' . $file);
226 $info['writable'] = is_writable($base . '/' . $file);
227 if (preg_match("/\.(jpe?g|gif|png)$/", $file)) {
228 $info['isimg'] = true;
229 $info['meta'] = new JpegMeta($base . '/' . $file);
230 } else {
231 $info['isimg'] = false;
233 if (!empty($opts['hash'])) {
234 $info['hash'] = md5(io_readFile(mediaFN($info['id']), false));
237 $data[] = $info;
239 return false;
243 * List all mediafiles in a namespace
244 * $opts['depth'] recursion level, 0 for all
245 * $opts['showmsg'] shows message if invalid media id is used
246 * $opts['skipacl'] skip acl checking
247 * $opts['pattern'] check given pattern
248 * $opts['hash'] add hashes to result list
250 * @todo This is a temporary copy of search_media returning a list of MediaFile intances
252 * @param array $data
253 * @param string $base
254 * @param string $file
255 * @param string $type
256 * @param integer $lvl
257 * @param array $opts
259 * @return bool
261 function search_mediafiles(&$data, $base, $file, $type, $lvl, $opts)
264 //we do nothing with directories
265 if ($type == 'd') {
266 if (empty($opts['depth'])) return true; // recurse forever
267 $depth = substr_count($file, '/');
268 if ($depth >= $opts['depth']) return false; // depth reached
269 return true;
272 $id = pathID($file, true);
273 if ($id != cleanID($id)) {
274 if ($opts['showmsg'])
275 msg(hsc($id) . ' is not a valid file name for DokuWiki - skipped', -1);
276 return false; // skip non-valid files
279 //check ACL for namespace (we have no ACL for mediafiles)
280 $info['perm'] = auth_quickaclcheck(getNS($id) . ':*');
281 if (empty($opts['skipacl']) && $info['perm'] < AUTH_READ) {
282 return false;
285 //check pattern filter
286 if (!empty($opts['pattern']) && !@preg_match($opts['pattern'], $id)) {
287 return false;
290 $data[] = new MediaFile($id);
291 return false;
296 * This function just lists documents (for RSS namespace export)
298 * @author Andreas Gohr <andi@splitbrain.org>
300 * @param array $data
301 * @param string $base
302 * @param string $file
303 * @param string $type
304 * @param integer $lvl
305 * @param array $opts
307 * @return bool
309 function search_list(&$data, $base, $file, $type, $lvl, $opts)
311 //we do nothing with directories
312 if ($type == 'd') return false;
313 //only search txt files
314 if (str_ends_with($file, '.txt')) {
315 //check ACL
316 $id = pathID($file);
317 if (auth_quickaclcheck($id) < AUTH_READ) {
318 return false;
320 $data[]['id'] = $id;
322 return false;
326 * Quicksearch for searching matching pagenames
328 * $opts['query'] is the search query
330 * @author Andreas Gohr <andi@splitbrain.org>
332 * @param array $data
333 * @param string $base
334 * @param string $file
335 * @param string $type
336 * @param integer $lvl
337 * @param array $opts
339 * @return bool
341 function search_pagename(&$data, $base, $file, $type, $lvl, $opts)
343 //we do nothing with directories
344 if ($type == 'd') return true;
345 //only search txt files
346 if (!str_ends_with($file, '.txt')) return true;
348 //simple stringmatching
349 if (!empty($opts['query'])) {
350 if (strpos($file, (string) $opts['query']) !== false) {
351 //check ACL
352 $id = pathID($file);
353 if (auth_quickaclcheck($id) < AUTH_READ) {
354 return false;
356 $data[]['id'] = $id;
359 return true;
363 * Just lists all documents
365 * $opts['depth'] recursion level, 0 for all
366 * $opts['hash'] do md5 sum of content?
367 * $opts['skipacl'] list everything regardless of ACL
369 * @author Andreas Gohr <andi@splitbrain.org>
371 * @param array $data
372 * @param string $base
373 * @param string $file
374 * @param string $type
375 * @param integer $lvl
376 * @param array $opts
378 * @return bool
380 function search_allpages(&$data, $base, $file, $type, $lvl, $opts)
382 if (($opts['depth'] ?? 0) > 0) {
383 $parts = explode('/', ltrim($file, '/'));
384 if (
385 ($type == 'd' && count($parts) >= $opts['depth'])
386 || ($type != 'd' && count($parts) > $opts['depth'])
388 return false; // depth reached
392 //we do nothing with directories
393 if ($type == 'd') {
394 return true;
397 //only search txt files
398 if (!str_ends_with($file, '.txt')) return true;
400 $item = [];
401 $item['id'] = pathID($file);
402 if (empty($opts['skipacl']) && auth_quickaclcheck($item['id']) < AUTH_READ) {
403 return false;
406 $item['rev'] = filemtime($base . '/' . $file);
407 $item['mtime'] = $item['rev'];
408 $item['size'] = filesize($base . '/' . $file);
409 if (!empty($opts['hash'])) {
410 $item['hash'] = md5(trim(rawWiki($item['id'])));
413 $data[] = $item;
414 return true;
417 /* ------------- helper functions below -------------- */
420 * fulltext sort
422 * Callback sort function for use with usort to sort the data
423 * structure created by search_fulltext. Sorts descending by count
425 * @author Andreas Gohr <andi@splitbrain.org>
427 * @param array $a
428 * @param array $b
430 * @return int
432 function sort_search_fulltext($a, $b)
434 if ($a['count'] > $b['count']) {
435 return -1;
436 } elseif ($a['count'] < $b['count']) {
437 return 1;
438 } else {
439 return Sort::strcmp($a['id'], $b['id']);
444 * translates a document path to an ID
446 * @author Andreas Gohr <andi@splitbrain.org>
447 * @todo move to pageutils
449 * @param string $path
450 * @param bool $keeptxt
452 * @return string
454 function pathID($path, $keeptxt = false)
456 $id = utf8_decodeFN($path);
457 $id = str_replace('/', ':', $id);
458 if (!$keeptxt) $id = preg_replace('#\.txt$#', '', $id);
459 $id = trim($id, ':');
460 return $id;
465 * This is a very universal callback for the search() function, replacing
466 * many of the former individual functions at the cost of a more complex
467 * setup.
469 * How the function behaves, depends on the options passed in the $opts
470 * array, where the following settings can be used.
472 * depth int recursion depth. 0 for unlimited (default: 0)
473 * keeptxt bool keep .txt extension for IDs (default: false)
474 * listfiles bool include files in listing (default: false)
475 * listdirs bool include namespaces in listing (default: false)
476 * pagesonly bool restrict files to pages (default: false)
477 * skipacl bool do not check for READ permission (default: false)
478 * sneakyacl bool don't recurse into nonreadable dirs (default: false)
479 * hash bool create MD5 hash for files (default: false)
480 * meta bool return file metadata (default: false)
481 * filematch string match files against this regexp (default: '', so accept everything)
482 * idmatch string match full ID against this regexp (default: '', so accept everything)
483 * dirmatch string match directory against this regexp when adding (default: '', so accept everything)
484 * nsmatch string match namespace against this regexp when adding (default: '', so accept everything)
485 * recmatch string match directory against this regexp when recursing (default: '', so accept everything)
486 * showmsg bool warn about non-ID files (default: false)
487 * showhidden bool show hidden files(e.g. by hidepages config) too (default: false)
488 * firsthead bool return first heading for pages (default: false)
490 * @param array &$data - Reference to the result data structure
491 * @param string $base - Base usually $conf['datadir']
492 * @param string $file - current file or directory relative to $base
493 * @param string $type - Type either 'd' for directory or 'f' for file
494 * @param int $lvl - Current recursion depht
495 * @param array $opts - option array as given to search()
496 * @return bool if this directory should be traversed (true) or not (false)
497 * return value is ignored for files
499 * @author Andreas Gohr <gohr@cosmocode.de>
501 function search_universal(&$data, $base, $file, $type, $lvl, $opts)
503 $item = [];
504 $return = true;
506 // get ID and check if it is a valid one
507 $item['id'] = pathID($file, ($type == 'd' || !empty($opts['keeptxt'])));
508 if ($item['id'] !== cleanID($item['id'])) {
509 if (!empty($opts['showmsg'])) {
510 msg(hsc($item['id']) . ' is not a valid file name for DokuWiki - skipped', -1);
512 return false; // skip non-valid files
514 $item['ns'] = getNS($item['id']);
516 if ($type == 'd') {
517 // decide if to recursion into this directory is wanted
518 if (empty($opts['depth'])) {
519 $return = true; // recurse forever
520 } else {
521 $depth = substr_count($file, '/');
522 if ($depth >= $opts['depth']) {
523 $return = false; // depth reached
524 } else {
525 $return = true;
529 if ($return) {
530 $match = empty($opts['recmatch']) || preg_match('/' . $opts['recmatch'] . '/', $file);
531 if (!$match) {
532 return false; // doesn't match
537 // check ACL
538 if (empty($opts['skipacl'])) {
539 if ($type == 'd') {
540 $item['perm'] = auth_quickaclcheck($item['id'] . ':*');
541 } else {
542 $item['perm'] = auth_quickaclcheck($item['id']); //FIXME check namespace for media files
544 } else {
545 $item['perm'] = AUTH_DELETE;
548 // are we done here maybe?
549 if ($type == 'd') {
550 if (empty($opts['listdirs'])) return $return;
551 //neither list nor recurse forbidden items:
552 if (empty($opts['skipacl']) && !empty($opts['sneakyacl']) && $item['perm'] < AUTH_READ) return false;
553 if (!empty($opts['dirmatch']) && !preg_match('/' . $opts['dirmatch'] . '/', $file)) return $return;
554 if (!empty($opts['nsmatch']) && !preg_match('/' . $opts['nsmatch'] . '/', $item['ns'])) return $return;
555 } else {
556 if (empty($opts['listfiles'])) return $return;
557 if (empty($opts['skipacl']) && $item['perm'] < AUTH_READ) return $return;
558 if (!empty($opts['pagesonly']) && !str_ends_with($file, '.txt')) return $return;
559 if (empty($opts['showhidden']) && isHiddenPage($item['id'])) return $return;
560 if (!empty($opts['filematch']) && !preg_match('/' . $opts['filematch'] . '/', $file)) return $return;
561 if (!empty($opts['idmatch']) && !preg_match('/' . $opts['idmatch'] . '/', $item['id'])) return $return;
564 // still here? prepare the item
565 $item['type'] = $type;
566 $item['level'] = $lvl;
567 $item['open'] = $return;
569 if (!empty($opts['meta'])) {
570 $item['file'] = PhpString::basename($file);
571 $item['size'] = filesize($base . '/' . $file);
572 $item['mtime'] = filemtime($base . '/' . $file);
573 $item['rev'] = $item['mtime'];
574 $item['writable'] = is_writable($base . '/' . $file);
575 $item['executable'] = is_executable($base . '/' . $file);
578 if ($type == 'f') {
579 if (!empty($opts['hash'])) $item['hash'] = md5(io_readFile($base . '/' . $file, false));
580 if (!empty($opts['firsthead'])) $item['title'] = p_get_first_heading($item['id'], METADATA_DONT_RENDER);
583 // finally add the item
584 $data[] = $item;
585 return $return;
588 //Setup VIM: ex: et ts=4 :