Merge pull request #4104 from m-martin-78/xfhsupport
[dokuwiki.git] / bin / wantedpages.php
blob00ed28a9c2b4c6def3923cfdcaf39d8bd0915d70
1 #!/usr/bin/env php
2 <?php
4 use dokuwiki\Utf8\Sort;
5 use dokuwiki\File\PageResolver;
6 use splitbrain\phpcli\CLI;
7 use splitbrain\phpcli\Options;
9 if (!defined('DOKU_INC')) define('DOKU_INC', realpath(__DIR__ . '/../') . '/');
10 define('NOSESSION', 1);
11 require_once(DOKU_INC . 'inc/init.php');
13 /**
14 * Find wanted pages
16 class WantedPagesCLI extends CLI
18 protected const DIR_CONTINUE = 1;
19 protected const DIR_NS = 2;
20 protected const DIR_PAGE = 3;
22 private $skip = false;
23 private $sort = 'wanted';
25 private $result = [];
27 /**
28 * Register options and arguments on the given $options object
30 * @param Options $options
31 * @return void
33 protected function setup(Options $options)
35 $options->setHelp(
36 'Outputs a list of wanted pages (pages that do not exist yet) and their origin pages ' .
37 ' (the pages that are linkin to these missing pages).'
39 $options->registerArgument(
40 'namespace',
41 'The namespace to lookup. Defaults to root namespace',
42 false
45 $options->registerOption(
46 'sort',
47 'Sort by wanted or origin page',
48 's',
49 '(wanted|origin)'
52 $options->registerOption(
53 'skip',
54 'Do not show the second dimension',
55 'k'
59 /**
60 * Your main program
62 * Arguments and options have been parsed when this is run
64 * @param Options $options
65 * @return void
67 protected function main(Options $options)
69 $args = $options->getArgs();
70 if ($args) {
71 $startdir = dirname(wikiFN($args[0] . ':xxx'));
72 } else {
73 $startdir = dirname(wikiFN('xxx'));
76 $this->skip = $options->getOpt('skip');
77 $this->sort = $options->getOpt('sort');
79 $this->info("searching $startdir");
81 foreach ($this->getPages($startdir) as $page) {
82 $this->internalLinks($page);
84 Sort::ksort($this->result);
85 foreach ($this->result as $main => $subs) {
86 if ($this->skip) {
87 echo "$main\n";
88 } else {
89 $subs = array_unique($subs);
90 Sort::sort($subs);
91 foreach ($subs as $sub) {
92 printf("%-40s %s\n", $main, $sub);
98 /**
99 * Determine directions of the search loop
101 * @param string $entry
102 * @param string $basepath
103 * @return int
105 protected function dirFilter($entry, $basepath)
107 if ($entry == '.' || $entry == '..') {
108 return WantedPagesCLI::DIR_CONTINUE;
110 if (is_dir($basepath . '/' . $entry)) {
111 if (strpos($entry, '_') === 0) {
112 return WantedPagesCLI::DIR_CONTINUE;
114 return WantedPagesCLI::DIR_NS;
116 if (preg_match('/\.txt$/', $entry)) {
117 return WantedPagesCLI::DIR_PAGE;
119 return WantedPagesCLI::DIR_CONTINUE;
123 * Collects recursively the pages in a namespace
125 * @param string $dir
126 * @return array
127 * @throws DokuCLI_Exception
129 protected function getPages($dir)
131 static $trunclen = null;
132 if (!$trunclen) {
133 global $conf;
134 $trunclen = strlen($conf['datadir'] . ':');
137 if (!is_dir($dir)) {
138 throw new DokuCLI_Exception("Unable to read directory $dir");
141 $pages = [];
142 $dh = opendir($dir);
143 while (false !== ($entry = readdir($dh))) {
144 $status = $this->dirFilter($entry, $dir);
145 if ($status == WantedPagesCLI::DIR_CONTINUE) {
146 continue;
147 } elseif ($status == WantedPagesCLI::DIR_NS) {
148 $pages = array_merge($pages, $this->getPages($dir . '/' . $entry));
149 } else {
150 $page = ['id' => pathID(substr($dir . '/' . $entry, $trunclen)), 'file' => $dir . '/' . $entry];
151 $pages[] = $page;
154 closedir($dh);
155 return $pages;
159 * Parse instructions and add the non-existing links to the result array
161 * @param array $page array with page id and file path
163 protected function internalLinks($page)
165 global $conf;
166 $instructions = p_get_instructions(file_get_contents($page['file']));
167 $resolver = new PageResolver($page['id']);
168 $pid = $page['id'];
169 foreach ($instructions as $ins) {
170 if ($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink')) {
171 $mid = $resolver->resolveId($ins[1][0]);
172 if (!page_exists($mid)) {
173 [$mid] = explode('#', $mid); //record pages without hashes
175 if ($this->sort == 'origin') {
176 $this->result[$pid][] = $mid;
177 } else {
178 $this->result[$mid][] = $pid;
186 // Main
187 $cli = new WantedPagesCLI();
188 $cli->run();