Fix typo preventing canonicalization.
[xhtml-compiler.git] / XHTMLCompiler / Page.php
blob1132df9cb3aa6b761d21304efd7cf8c64991d22f
1 <?php
3 /**
4 * Represents a page in our content management system. This is loosely
5 * bound to the filesystem, although it doesn't actually refer to a
6 * specific file, just a class of files.
7 */
8 class XHTMLCompiler_Page
11 /**
12 * Filename identifier of this page without extension
14 protected $pathStem;
16 /**
17 * File extension of the source file
18 * @note Previously, this parameter could only be xhtml, but since we've
19 * to be a little more flexible the 'markup' configuration variable
20 * will be used to figure out what extensions to look at, and then
21 * this parameter will be populated with the appropriate extension.
23 protected $sourceExt;
24 /** File extension of cache/served files */
25 protected $cacheExt = 'html';
26 /** File extension of dependency files */
27 protected $depsExt = 'xc-deps';
29 /** Instance of XHTMLCompiler_File for source file */
30 protected $source;
31 /** Instance of XHTMLCompiler_File for cache file */
32 protected $cache;
33 /** Instance of XHTMLCompiler_File for dependency file */
34 protected $deps;
36 /** Instance of XHTMLCompiler_Directory for all of the above files*/
37 protected $dir;
39 /** Array of attributes about this file. Currently used by News/NewsLinker */
40 public $attr = array();
42 /**
43 * Constructs a page object, validates filename for correctness
44 * @param $path String path filename, can be from untrusted source
45 * @param $mute Whether or not to stop the class from complaining when
46 * the source file doesn't exist. This is a stopgap measure,
47 * please replace with better exception handling.
48 * @todo Cleanup into subroutines
49 * @todo Factor out allowed_directories realpath'ing to config class
51 public function __construct($path, $mute = false) {
53 $xc = XHTMLCompiler::getInstance();
54 $php = XHTMLCompiler::getPHPWrapper();
55 $markup = $xc->getFilterManager()->getMarkup();
57 // test file extension
58 $info = pathinfo($path);
59 if (
60 empty($info['extension']) || (
61 !isset($markup[$info['extension']]) &&
62 $info['extension'] !== $this->cacheExt
64 ) {
65 throw new XHTMLCompiler_Exception(403, 'Forbidden extension',
66 'File extension cannot be processed by XHTML Compiler, check
67 for faulty <code>.htaccess</code> rules.');
70 // test for directory's existence and resolve to real path
71 $dir = $info['dirname'];
72 if ($dir == '.') $dir .= '/';
73 $dir = $php->realpath($dir);
74 if ($dir === false) {
75 throw new XHTMLCompiler_Exception(404, 'Missing directory',
76 'Requested directory cannot be found; check your file
77 path and try again.' );
79 if ($dir[strlen($dir)-1] == '/') $dir = substr($dir, 0, -1);
81 $dirObj = new XHTMLCompiler_Directory($dir);
82 $ok = $dirObj->isAllowed();
84 if (!$ok) throw new XHTMLCompiler_Exception(403, 'Forbidden directory',
85 'Requested directory is forbidden to XHTML Compiler; try
86 accessing it directly or check for faulty <code>.htaccess</code> rules.');
88 // cannot use pathinfo, since PATHINFO_FILENAME is PHP 5.2.0
89 $this->pathStem = substr($path, 0, strrpos($path, '.'));
91 // setup the files
92 foreach ($markup as $ext => $impl) {
93 $this->source = new XHTMLCompiler_File($this->pathStem . '.' . $ext);
94 $this->sourceExt = $ext;
95 if ($this->source->exists()) break;
97 $this->cache = new XHTMLCompiler_File($this->pathStem . '.' . $this->cacheExt);
98 $this->deps = new XHTMLCompiler_File($this->pathStem . '.' . $this->depsExt);
100 $this->dir = new XHTMLCompiler_Directory(dirname($this->pathStem));
102 if (!$mute && !$this->source->exists()) {
103 // Apache may have redirected to an ErrorDocument which got directed
104 // via mod_rewrite to us, in that case, output the corresponding
105 // status code. Otherwise, we can give the regular 404.
106 $code = $php->getRedirectStatus();
107 if (!$code || $code == 200) $code = 404;
108 throw new XHTMLCompiler_Exception($code, 'Page not found', 'Requested page not found; check the URL in your address bar.');
112 // Note: Do not use this functions internally inside the class
114 /** Returns path stem, full filename without file extension */
115 public function getPathStem() { return $this->pathStem; }
116 /** Returns relative path to cache */
117 public function getCachePath() { return $this->cache->getName(); }
118 /** Returns relative path to source */
119 public function getSourcePath() { return $this->source->getName(); }
120 /** Returns source extension, w/o leading period */
121 public function getSourceExt() { return $this->sourceExt; }
122 /** Returns XHTMLCompiler_Directory representation of directory */
123 public function getDir() { return $this->dir; }
124 /** Returns directory of the files without trailing slash */
125 public function getDirName() { return $this->dir->getName(); }
126 /** Returns directory of the files with trailing slash (unless there is none) */
127 public function getDirSName() { return $this->dir->getSName(); }
128 /** Returns how deep from the root the file is */
129 public function getDepth() { return substr_count($this->getSourcePath(), '/'); }
131 /** Normalizes a relative path as if it were from this page's directory */
132 public function normalizePath($path) {
133 return $this->getDirName() . '/' . $path;
137 * Returns a fully formed web path with web domain to the file. This path
138 * is valid anywhere on the web.
140 public function getWebPath() {
141 $xc = XHTMLCompiler::getInstance();
142 $domain = $xc->getConf('web_domain');
143 if (!$domain) {
144 throw new Exception('Configuration value web_domain must be set for command line');
146 return 'http://' . $domain . $this->getAbsolutePath();
150 * Returns a fully formed absolute web path valid anywhere on the
151 * current domain to the cached file.
153 public function getAbsolutePath() {
154 $xc = XHTMLCompiler::getInstance();
155 $name = $this->cache->getName();
156 // a little icky
157 if ($name[0] !== '/') $name = "/$name";
158 if (strncmp($name, './', 2) === 0) $name = substr($name, 1);
159 return $xc->getConf('web_path') . $name;
162 /** Returns contents of the cache/served file */
163 public function getCache() { return $this->cache->get(); }
164 /** Returns contents of the source file */
165 public function getSource() { return $this->source->get(); }
167 /** Reports whether or not cache file exists and is a file */
168 public function isCacheExistent() { return $this->cache->exists(); }
169 /** Reports whether or not source file exists and is a file */
170 public function isSourceExistent() { return $this->source->exists(); }
172 /** Removes the cache file, forcing this page to be re-updated as if
173 it were newly added.*/
174 public function purge() { return $this->cache->delete(); }
177 * Reports whether or not the cache is stale by comparing the file
178 * modification times between the source file and the cache file.
179 * @warning You must not call this function until you've also called
180 * isCacheExistent().
182 public function isCacheStale() {
183 if (!$this->cache->exists()) {
184 throw new Exception('Cannot check for stale cache when cache
185 does not exist, please call isCacheExistent and take
186 appropriate action with the result');
188 if ($this->source->getMTime() > $this->cache->getMTime()) return true;
189 // check dependencies
190 if (!$this->deps->exists()) return true; // we need a dependency file!
191 $deps = unserialize($this->deps->get());
192 foreach ($deps as $filename => $time) {
193 if ($time < filemtime($filename)) return true;
195 return false;
199 * Writes text to the cache file, overwriting any previous contents
200 * and creating the cache file if it doesn't exist.
201 * @param $contents String contents to write to cache
203 public function writeCache($contents) {$this->cache->write($contents);}
206 * Attempts to display contents from the cache, otherwise returns false
207 * @return True if successful, false if not.
208 * @todo Purge check needs to be factored into XHTMLCompiler
210 public function tryCache() {
211 if (
212 !isset($_GET['purge']) &&
213 $this->cache->exists() &&
214 !$this->isCacheStale()
216 // cached version is fresh, serve it. This shouldn't happen normally
217 set_response_code(200); // if we used ErrorDocument, override
218 readfile($this->getCachePath());
219 return true;
221 return false;
225 * Generates the final version of a page from the source file and writes
226 * it to the cache.
227 * @note This function needs to be extended greatly
228 * @return Generated contents from source
230 public function generate() {
231 $source = $this->source->get();
232 $xc = XHTMLCompiler::getInstance();
233 $filters = $xc->getFilterManager();
234 $contents = $filters->process($source, $this);
235 $deps = $filters->getDeps();
236 if (empty($contents)) return ''; // don't write, probably an error
237 $contents .= '<!-- generated by XHTML Compiler -->';
238 $this->cache->write($contents);
239 $this->cache->chmod(0664);
240 $this->deps->write(serialize($deps));
241 return $contents;
245 * Displays the page, either from cache or fresh regeneration.
247 public function display() {
248 if($this->tryCache()) return;
249 $ret = $this->generate();
250 if ($ret) {
251 if (stripos($_SERVER["HTTP_ACCEPT"], 'application/xhtml+xml') !== false) {
252 header("Content-type: application/xhtml+xml");
253 } else {
254 header("Content-type: text/html");
257 echo $ret;
261 * Retrieves the Git_Repo that represents this page.
263 public function getRepo() {
264 return new Git_Repo($this->source->getDirectory());
268 * Retrieves the filename relative to the Git repository root.
270 public function getGitPath() {
271 $repo = $this->getRepo();
272 // This won't work with bare repositories
273 return $name = str_replace(
274 '\\', '/', // account for Windows
275 substr(
276 realpath($this->source->getName()), // $repo->path is full
277 strlen(dirname($repo->path))+1 // chop off "repo" path (w/o .git) + leading slash
283 * Retrieves the log that represents this page.
285 public function getLog($kwargs = array()) {
286 // This doesn't account for sub-repositories
287 $repo = $this->getRepo();
288 return $repo->log('master', array($this->getGitPath()), array_merge(array('follow' => true), $kwargs));
291 // this is metadata stuff that needs to be moved and cached
294 * Retrieves the DateTime this page was created, according to Git's logs.
295 * If no logs are present, use filectime(), which isn't totally accurate
296 * but is the best information present.
298 public function getCreatedTime() {
299 // As a backwards-compatibility measure, we allow the first meta tag
300 // with the specific signature:
301 // <meta name="Date" contents="..."
302 // to specify an ISO 8601 formatted date (or date compatible with
303 // GNU strtotime; Metadata will convert it into ISO 8601 as per
304 // the Dublin core specification).
305 $source = $this->source->get();
306 if (($p = strpos($source, '<meta name="Date" content="')) !== false) {
307 $p += 27; // cursor is now after the quote
308 // Grab the time
309 $time = substr($source, $p, strpos($source, '"', $p) - $p);
310 return new DateTime($time);
313 $repo = $this->getRepo();
314 // This is extremely memory inefficient, but I can't figure out
315 // how to get Git to limit the commits (-n) without undoing
316 // --reverse.
317 $log = $repo->log('master', array($this->getGitPath()), array(
318 'reverse' => true,
320 if (empty($log)) {
321 $date = new DateTime('@' . $this->source->getCTime());
322 } else {
323 $date = $log[0]->authoredDate;
325 $this->setTimezone($date);
326 return $date;
330 * Retrieves the DateTime this page was last updated, according to Git's logs,
331 * otherwise according to filemtime.
333 public function getLastModifiedTime() {
334 $repo = $this->getRepo();
335 $log = $repo->log('master', array($this->getGitPath()), array(
336 'n' => 1,
338 if (empty($log)) {
339 $date = new DateTime('@' . $this->source->getMTime());
340 } else {
341 $date = $log[0]->authoredDate;
343 $this->setTimezone($date);
344 return $date;
348 * Touches the source file, meaning that any files that depend on this
349 * file should be regenerated. XHTML Compiler knows, however,
350 * that it's not the first time the cache has been generated. This is
351 * weaker than purge().
353 public function touch() {
354 $this->source->touch();
358 * Sets our default timezone to a date object; especially useful if it
359 * was initialized with an @ isgn.
361 private function setTimezone($date) {
362 $date->setTimezone(new DateTimeZone(date_default_timezone_get()));