Merge pull request #4017 from dokuwiki-translate/lang_update_685_1690411879
[dokuwiki.git] / inc / pageutils.php
blob6af58ed8257e23f83dd9939a98a0a1286f04d76b
1 <?php
3 /**
4 * Utilities for handling pagenames
6 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
7 * @author Andreas Gohr <andi@splitbrain.org>
8 * @todo Combine similar functions like {wiki,media,meta}FN()
9 */
11 use dokuwiki\ChangeLog\MediaChangeLog;
12 use dokuwiki\ChangeLog\PageChangeLog;
13 use dokuwiki\File\MediaResolver;
14 use dokuwiki\File\PageResolver;
16 /**
17 * Fetch the an ID from request
19 * Uses either standard $_REQUEST variable or extracts it from
20 * the full request URI when userewrite is set to 2
22 * For $param='id' $conf['start'] is returned if no id was found.
23 * If the second parameter is true (default) the ID is cleaned.
25 * @author Andreas Gohr <andi@splitbrain.org>
27 * @param string $param the $_REQUEST variable name, default 'id'
28 * @param bool $clean if true, ID is cleaned
29 * @return string
31 function getID($param = 'id', $clean = true)
33 /** @var Input $INPUT */
34 global $INPUT;
35 global $conf;
36 global $ACT;
38 $id = $INPUT->str($param);
40 //construct page id from request URI
41 if (empty($id) && $conf['userewrite'] == 2) {
42 $request = $INPUT->server->str('REQUEST_URI');
43 $script = '';
45 //get the script URL
46 if ($conf['basedir']) {
47 $relpath = '';
48 if ($param != 'id') {
49 $relpath = 'lib/exe/';
51 $script = $conf['basedir'] . $relpath .
52 \dokuwiki\Utf8\PhpString::basename($INPUT->server->str('SCRIPT_FILENAME'));
53 } elseif ($INPUT->server->str('PATH_INFO')) {
54 $request = $INPUT->server->str('PATH_INFO');
55 } elseif ($INPUT->server->str('SCRIPT_NAME')) {
56 $script = $INPUT->server->str('SCRIPT_NAME');
57 } elseif ($INPUT->server->str('DOCUMENT_ROOT') && $INPUT->server->str('SCRIPT_FILENAME')) {
58 $script = preg_replace(
59 '/^' . preg_quote($INPUT->server->str('DOCUMENT_ROOT'), '/') . '/',
60 '',
61 $INPUT->server->str('SCRIPT_FILENAME')
63 $script = '/' . $script;
66 //clean script and request (fixes a windows problem)
67 $script = preg_replace('/\/\/+/', '/', $script);
68 $request = preg_replace('/\/\/+/', '/', $request);
70 //remove script URL and Querystring to gain the id
71 if (preg_match('/^' . preg_quote($script, '/') . '(.*)/', $request, $match)) {
72 $id = preg_replace('/\?.*/', '', $match[1]);
74 $id = urldecode($id);
75 //strip leading slashes
76 $id = preg_replace('!^/+!', '', $id);
79 // Namespace autolinking from URL
80 if (substr($id, -1) == ':' || ($conf['useslash'] && substr($id, -1) == '/')) {
81 if (page_exists($id . $conf['start'])) {
82 // start page inside namespace
83 $id = $id . $conf['start'];
84 } elseif (page_exists($id . noNS(cleanID($id)))) {
85 // page named like the NS inside the NS
86 $id = $id . noNS(cleanID($id));
87 } elseif (page_exists($id)) {
88 // page like namespace exists
89 $id = substr($id, 0, -1);
90 } else {
91 // fall back to default
92 $id = $id . $conf['start'];
94 if (isset($ACT) && $ACT === 'show') {
95 $urlParameters = $_GET;
96 if (isset($urlParameters['id'])) {
97 unset($urlParameters['id']);
99 send_redirect(wl($id, $urlParameters, true, '&'));
102 if ($clean) $id = cleanID($id);
103 if ($id === '' && $param == 'id') $id = $conf['start'];
105 return $id;
109 * Remove unwanted chars from ID
111 * Cleans a given ID to only use allowed characters. Accented characters are
112 * converted to unaccented ones
114 * @author Andreas Gohr <andi@splitbrain.org>
116 * @param string $raw_id The pageid to clean
117 * @param boolean $ascii Force ASCII
118 * @return string cleaned id
120 function cleanID($raw_id, $ascii = false)
122 global $conf;
123 static $sepcharpat = null;
125 global $cache_cleanid;
126 $cache = & $cache_cleanid;
128 // check if it's already in the memory cache
129 if (!$ascii && isset($cache[(string)$raw_id])) {
130 return $cache[(string)$raw_id];
133 $sepchar = $conf['sepchar'];
134 if ($sepcharpat == null) // build string only once to save clock cycles
135 $sepcharpat = '#\\' . $sepchar . '+#';
137 $id = trim((string)$raw_id);
138 $id = \dokuwiki\Utf8\PhpString::strtolower($id);
140 //alternative namespace seperator
141 if ($conf['useslash']) {
142 $id = strtr($id, ';/', '::');
143 } else {
144 $id = strtr($id, ';/', ':' . $sepchar);
147 if ($conf['deaccent'] == 2 || $ascii) $id = \dokuwiki\Utf8\Clean::romanize($id);
148 if ($conf['deaccent'] || $ascii) $id = \dokuwiki\Utf8\Clean::deaccent($id, -1);
150 //remove specials
151 $id = \dokuwiki\Utf8\Clean::stripspecials($id, $sepchar, '\*');
153 if ($ascii) $id = \dokuwiki\Utf8\Clean::strip($id);
155 //clean up
156 $id = preg_replace($sepcharpat, $sepchar, $id);
157 $id = preg_replace('#:+#', ':', $id);
158 $id = trim($id, ':._-');
159 $id = preg_replace('#:[:\._\-]+#', ':', $id);
160 $id = preg_replace('#[:\._\-]+:#', ':', $id);
162 if (!$ascii) $cache[(string)$raw_id] = $id;
163 return($id);
167 * Return namespacepart of a wiki ID
169 * @author Andreas Gohr <andi@splitbrain.org>
171 * @param string $id
172 * @return string|false the namespace part or false if the given ID has no namespace (root)
174 function getNS($id)
176 $pos = strrpos((string)$id, ':');
177 if ($pos !== false) {
178 return substr((string)$id, 0, $pos);
180 return false;
184 * Returns the ID without the namespace
186 * @author Andreas Gohr <andi@splitbrain.org>
188 * @param string $id
189 * @return string
191 function noNS($id)
193 $pos = strrpos($id, ':');
194 if ($pos !== false) {
195 return substr($id, $pos + 1);
196 } else {
197 return $id;
202 * Returns the current namespace
204 * @author Nathan Fritz <fritzn@crown.edu>
206 * @param string $id
207 * @return string
209 function curNS($id)
211 return noNS(getNS($id));
215 * Returns the ID without the namespace or current namespace for 'start' pages
217 * @author Nathan Fritz <fritzn@crown.edu>
219 * @param string $id
220 * @return string
222 function noNSorNS($id)
224 global $conf;
226 $p = noNS($id);
227 if ($p === $conf['start'] || $p === false || $p === '') {
228 $p = curNS($id);
229 if ($p === false || $p === '') {
230 return $conf['start'];
233 return $p;
237 * Creates a XHTML valid linkid from a given headline title
239 * @param string $title The headline title
240 * @param array|bool $check Existing IDs
241 * @return string the title
243 * @author Andreas Gohr <andi@splitbrain.org>
245 function sectionID($title, &$check)
247 $title = str_replace(array(':','.'), '', cleanID($title));
248 $new = ltrim($title, '0123456789_-');
249 if (empty($new)) {
250 $title = 'section' . preg_replace('/[^0-9]+/', '', $title); //keep numbers from headline
251 } else {
252 $title = $new;
255 if (is_array($check)) {
256 $suffix = 0;
257 $candidateTitle = $title;
258 while (in_array($candidateTitle, $check)) {
259 $candidateTitle = $title . ++$suffix;
261 $check [] = $candidateTitle;
262 return $candidateTitle;
263 } else {
264 return $title;
269 * Wiki page existence check
271 * parameters as for wikiFN
273 * @author Chris Smith <chris@jalakai.co.uk>
275 * @param string $id page id
276 * @param string|int $rev empty or revision timestamp
277 * @param bool $clean flag indicating that $id should be cleaned (see wikiFN as well)
278 * @param bool $date_at
279 * @return bool exists?
281 function page_exists($id, $rev = '', $clean = true, $date_at = false)
283 $id = (explode('#', $id, 2))[0]; // #3608
285 if ($rev !== '' && $date_at) {
286 $pagelog = new PageChangeLog($id);
287 $pagelog_rev = $pagelog->getLastRevisionAt($rev);
288 if ($pagelog_rev !== false)
289 $rev = $pagelog_rev;
291 return file_exists(wikiFN($id, $rev, $clean));
295 * Media existence check
297 * @param string $id page id
298 * @param string|int $rev empty or revision timestamp
299 * @param bool $clean flag indicating that $id should be cleaned (see mediaFN as well)
300 * @param bool $date_at
301 * @return bool exists?
303 function media_exists($id, $rev = '', $clean = true, $date_at = false)
305 if ($rev !== '' && $date_at) {
306 $changeLog = new MediaChangeLog($id);
307 $changelog_rev = $changeLog->getLastRevisionAt($rev);
308 if ($changelog_rev !== false) {
309 $rev = $changelog_rev;
312 return file_exists(mediaFN($id, $rev, $clean));
316 * returns the full path to the datafile specified by ID and optional revision
318 * The filename is URL encoded to protect Unicode chars
320 * @param $raw_id string id of wikipage
321 * @param $rev int|string page revision, empty string for current
322 * @param $clean bool flag indicating that $raw_id should be cleaned. Only set to false
323 * when $id is guaranteed to have been cleaned already.
324 * @return string full path
326 * @author Andreas Gohr <andi@splitbrain.org>
328 function wikiFN($raw_id, $rev = '', $clean = true)
330 global $conf;
332 global $cache_wikifn;
333 $cache = & $cache_wikifn;
335 $id = $raw_id;
337 if ($clean) $id = cleanID($id);
338 $id = str_replace(':', '/', $id);
340 if (isset($cache[$id]) && isset($cache[$id][$rev])) {
341 return $cache[$id][$rev];
344 if (empty($rev)) {
345 $fn = $conf['datadir'] . '/' . utf8_encodeFN($id) . '.txt';
346 } else {
347 $fn = $conf['olddir'] . '/' . utf8_encodeFN($id) . '.' . $rev . '.txt';
348 if ($conf['compression']) {
349 //test for extensions here, we want to read both compressions
350 if (file_exists($fn . '.gz')) {
351 $fn .= '.gz';
352 } elseif (file_exists($fn . '.bz2')) {
353 $fn .= '.bz2';
354 } else {
355 //file doesnt exist yet, so we take the configured extension
356 $fn .= '.' . $conf['compression'];
361 if (!isset($cache[$id])) {
362 $cache[$id] = array();
364 $cache[$id][$rev] = $fn;
365 return $fn;
369 * Returns the full path to the file for locking the page while editing.
371 * @author Ben Coburn <btcoburn@silicodon.net>
373 * @param string $id page id
374 * @return string full path
376 function wikiLockFN($id)
378 global $conf;
379 return $conf['lockdir'] . '/' . md5(cleanID($id)) . '.lock';
384 * returns the full path to the meta file specified by ID and extension
386 * @author Steven Danz <steven-danz@kc.rr.com>
388 * @param string $id page id
389 * @param string $ext file extension
390 * @return string full path
392 function metaFN($id, $ext)
394 global $conf;
395 $id = cleanID($id);
396 $id = str_replace(':', '/', $id);
397 $fn = $conf['metadir'] . '/' . utf8_encodeFN($id) . $ext;
398 return $fn;
402 * returns the full path to the media's meta file specified by ID and extension
404 * @author Kate Arzamastseva <pshns@ukr.net>
406 * @param string $id media id
407 * @param string $ext extension of media
408 * @return string
410 function mediaMetaFN($id, $ext)
412 global $conf;
413 $id = cleanID($id);
414 $id = str_replace(':', '/', $id);
415 $fn = $conf['mediametadir'] . '/' . utf8_encodeFN($id) . $ext;
416 return $fn;
420 * returns an array of full paths to all metafiles of a given ID
422 * @author Esther Brunner <esther@kaffeehaus.ch>
423 * @author Michael Hamann <michael@content-space.de>
425 * @param string $id page id
426 * @return array
428 function metaFiles($id)
430 $basename = metaFN($id, '');
431 $files = glob($basename . '.*', GLOB_MARK);
432 // filter files like foo.bar.meta when $id == 'foo'
433 return $files ? preg_grep('/^' . preg_quote($basename, '/') . '\.[^.\/]*$/u', $files) : array();
437 * returns the full path to the mediafile specified by ID
439 * The filename is URL encoded to protect Unicode chars
441 * @author Andreas Gohr <andi@splitbrain.org>
442 * @author Kate Arzamastseva <pshns@ukr.net>
444 * @param string $id media id
445 * @param string|int $rev empty string or revision timestamp
446 * @param bool $clean
448 * @return string full path
450 function mediaFN($id, $rev = '', $clean = true)
452 global $conf;
453 if ($clean) $id = cleanID($id);
454 $id = str_replace(':', '/', $id);
455 if (empty($rev)) {
456 $fn = $conf['mediadir'] . '/' . utf8_encodeFN($id);
457 } else {
458 $ext = mimetype($id);
459 $name = substr($id, 0, -1 * strlen($ext[0]) - 1);
460 $fn = $conf['mediaolddir'] . '/' . utf8_encodeFN($name . '.' . ( (int) $rev ) . '.' . $ext[0]);
462 return $fn;
466 * Returns the full filepath to a localized file if local
467 * version isn't found the english one is returned
469 * @param string $id The id of the local file
470 * @param string $ext The file extension (usually txt)
471 * @return string full filepath to localized file
473 * @author Andreas Gohr <andi@splitbrain.org>
475 function localeFN($id, $ext = 'txt')
477 global $conf;
478 $file = DOKU_CONF . 'lang/' . $conf['lang'] . '/' . $id . '.' . $ext;
479 if (!file_exists($file)) {
480 $file = DOKU_INC . 'inc/lang/' . $conf['lang'] . '/' . $id . '.' . $ext;
481 if (!file_exists($file)) {
482 //fall back to english
483 $file = DOKU_INC . 'inc/lang/en/' . $id . '.' . $ext;
486 return $file;
490 * Resolve relative paths in IDs
492 * Do not call directly use resolve_mediaid or resolve_pageid
493 * instead
495 * Partyly based on a cleanPath function found at
496 * http://php.net/manual/en/function.realpath.php#57016
498 * @deprecated 2020-09-30
499 * @param string $ns namespace which is context of id
500 * @param string $id relative id
501 * @param bool $clean flag indicating that id should be cleaned
502 * @return string
504 function resolve_id($ns, $id, $clean = true)
506 global $conf;
507 dbg_deprecated(\dokuwiki\File\Resolver::class . ' and its children');
509 // some pre cleaning for useslash:
510 if ($conf['useslash']) $id = str_replace('/', ':', $id);
512 // if the id starts with a dot we need to handle the
513 // relative stuff
514 if ($id && $id[0] == '.') {
515 // normalize initial dots without a colon
516 $id = preg_replace('/^((\.+:)*)(\.+)(?=[^:\.])/', '\1\3:', $id);
517 // prepend the current namespace
518 $id = $ns . ':' . $id;
520 // cleanup relatives
521 $result = array();
522 $pathA = explode(':', $id);
523 if (!$pathA[0]) $result[] = '';
524 foreach ($pathA as $key => $dir) {
525 if ($dir == '..') {
526 if (end($result) == '..') {
527 $result[] = '..';
528 } elseif (!array_pop($result)) {
529 $result[] = '..';
531 } elseif ($dir && $dir != '.') {
532 $result[] = $dir;
535 if (!end($pathA)) $result[] = '';
536 $id = implode(':', $result);
537 } elseif ($ns !== false && strpos($id, ':') === false) {
538 //if link contains no namespace. add current namespace (if any)
539 $id = $ns . ':' . $id;
542 if ($clean) $id = cleanID($id);
543 return $id;
547 * Returns a full media id
549 * @param string $ns namespace which is context of id
550 * @param string &$media (reference) relative media id, updated to resolved id
551 * @param bool &$exists (reference) updated with existance of media
552 * @param int|string $rev
553 * @param bool $date_at
554 * @deprecated 2020-09-30
556 function resolve_mediaid($ns, &$media, &$exists, $rev = '', $date_at = false)
558 dbg_deprecated(MediaResolver::class);
559 $resolver = new MediaResolver("$ns:deprecated");
560 $media = $resolver->resolveId($media, $rev, $date_at);
561 $exists = media_exists($media, $rev, false, $date_at);
565 * Returns a full page id
567 * @deprecated 2020-09-30
568 * @param string $ns namespace which is context of id
569 * @param string &$page (reference) relative page id, updated to resolved id
570 * @param bool &$exists (reference) updated with existance of media
571 * @param string $rev
572 * @param bool $date_at
574 function resolve_pageid($ns, &$page, &$exists, $rev = '', $date_at = false)
576 dbg_deprecated(PageResolver::class);
578 global $ID;
579 if (getNS($ID) == $ns) {
580 $context = $ID; // this is usually the case
581 } else {
582 $context = "$ns:deprecated"; // only used when a different context namespace was given
585 $resolver = new PageResolver($context);
586 $page = $resolver->resolveId($page, $rev, $date_at);
587 $exists = page_exists($page, $rev, false, $date_at);
591 * Returns the name of a cachefile from given data
593 * The needed directory is created by this function!
595 * @author Andreas Gohr <andi@splitbrain.org>
597 * @param string $data This data is used to create a unique md5 name
598 * @param string $ext This is appended to the filename if given
599 * @return string The filename of the cachefile
601 function getCacheName($data, $ext = '')
603 global $conf;
604 $md5 = md5($data);
605 $file = $conf['cachedir'] . '/' . $md5[0] . '/' . $md5 . $ext;
606 io_makeFileDir($file);
607 return $file;
611 * Checks a pageid against $conf['hidepages']
613 * @author Andreas Gohr <gohr@cosmocode.de>
615 * @param string $id page id
616 * @return bool
618 function isHiddenPage($id)
620 $data = array(
621 'id' => $id,
622 'hidden' => false
624 \dokuwiki\Extension\Event::createAndTrigger('PAGEUTILS_ID_HIDEPAGE', $data, '_isHiddenPage');
625 return $data['hidden'];
629 * callback checks if page is hidden
631 * @param array $data event data - see isHiddenPage()
633 function _isHiddenPage(&$data)
635 global $conf;
636 global $ACT;
638 if ($data['hidden']) return;
639 if (empty($conf['hidepages'])) return;
640 if ($ACT == 'admin') return;
642 if (preg_match('/' . $conf['hidepages'] . '/ui', ':' . $data['id'])) {
643 $data['hidden'] = true;
648 * Reverse of isHiddenPage
650 * @author Andreas Gohr <gohr@cosmocode.de>
652 * @param string $id page id
653 * @return bool
655 function isVisiblePage($id)
657 return !isHiddenPage($id);
661 * Format an id for output to a user
663 * Namespaces are denoted by a trailing “:*”. The root namespace is
664 * “*”. Output is escaped.
666 * @author Adrian Lang <lang@cosmocode.de>
668 * @param string $id page id
669 * @return string
671 function prettyprint_id($id)
673 if (!$id || $id === ':') {
674 return '*';
676 if ((substr($id, -1, 1) === ':')) {
677 $id .= '*';
679 return hsc($id);
683 * Encode a UTF-8 filename to use on any filesystem
685 * Uses the 'fnencode' option to determine encoding
687 * When the second parameter is true the string will
688 * be encoded only if non ASCII characters are detected -
689 * This makes it safe to run it multiple times on the
690 * same string (default is true)
692 * @author Andreas Gohr <andi@splitbrain.org>
693 * @see urlencode
695 * @param string $file file name
696 * @param bool $safe if true, only encoded when non ASCII characters detected
697 * @return string
699 function utf8_encodeFN($file, $safe = true)
701 global $conf;
702 if ($conf['fnencode'] == 'utf-8') return $file;
704 if ($safe && preg_match('#^[a-zA-Z0-9/_\-\.%]+$#', $file)) {
705 return $file;
708 if ($conf['fnencode'] == 'safe') {
709 return SafeFN::encode($file);
712 $file = urlencode($file);
713 $file = str_replace('%2F', '/', $file);
714 return $file;
718 * Decode a filename back to UTF-8
720 * Uses the 'fnencode' option to determine encoding
722 * @author Andreas Gohr <andi@splitbrain.org>
723 * @see urldecode
725 * @param string $file file name
726 * @return string
728 function utf8_decodeFN($file)
730 global $conf;
731 if ($conf['fnencode'] == 'utf-8') return $file;
733 if ($conf['fnencode'] == 'safe') {
734 return SafeFN::decode($file);
737 return urldecode($file);
741 * Find a page in the current namespace (determined from $ID) or any
742 * higher namespace that can be accessed by the current user,
743 * this condition can be overriden by an optional parameter.
745 * Used for sidebars, but can be used other stuff as well
747 * @todo add event hook
749 * @param string $page the pagename you're looking for
750 * @param bool $useacl only return pages readable by the current user, false to ignore ACLs
751 * @return false|string the full page id of the found page, false if any
753 function page_findnearest($page, $useacl = true)
755 if ((string) $page === '') return false;
756 global $ID;
758 $ns = $ID;
759 do {
760 $ns = getNS($ns);
761 $pageid = cleanID("$ns:$page");
762 if (page_exists($pageid) && (!$useacl || auth_quickaclcheck($pageid) >= AUTH_READ)) {
763 return $pageid;
765 } while ($ns !== false);
767 return false;