Implement Newslinker, also fix some bugs.
[xhtml-compiler.git] / XHTMLCompiler / Page.php
blobac030a742f261c7cf872af7b1545340e9e7a9a1f
1 <?php
3 /**
4 * Represents a page in our content management system. This is loosely
5 * bound to the filesystem, although it doesn't actually refer to a
6 * specific file, just a class of files.
7 */
8 class XHTMLCompiler_Page
11 /**
12 * Filename identifier of this page without extension
14 protected $pathStem;
16 /** File extension of source files (no period) */
17 protected $sourceExt = 'xhtml';
18 /** File extension of cache/served files */
19 protected $cacheExt = 'html';
20 /** File extension of dependency files */
21 protected $depsExt = 'xc-deps';
23 /** Instance of XHTMLCompiler_File for source file */
24 protected $source;
25 /** Instance of XHTMLCompiler_File for cache file */
26 protected $cache;
27 /** Instance of XHTMLCompiler_File for dependency file */
28 protected $deps;
30 /** Instance of XHTMLCompiler_Directory for all of the above files*/
31 protected $dir;
33 /** Array of attributes about this file. Currently used by News/NewsLinker */
34 public $attr = array();
36 /**
37 * Constructs a page object, validates filename for correctness
38 * @param $path String path filename, can be from untrusted source
39 * @param $mute Whether or not to stop the class from complaining when
40 * the source file doesn't exist. This is a stopgap measure,
41 * please replace with better exception handling.
42 * @todo Cleanup into subroutines
43 * @todo Factor out allowed_directories realpath'ing to config class
45 public function __construct($path, $mute = false) {
47 $xc = XHTMLCompiler::getInstance();
48 $php = XHTMLCompiler::getPHPWrapper();
50 // test file extension
51 $info = pathinfo($path);
52 if (
53 empty($info['extension']) || (
54 $info['extension'] !== $this->sourceExt &&
55 $info['extension'] !== $this->cacheExt
57 ) {
58 throw new XHTMLCompiler_Exception(403, 'Forbidden extension',
59 'File extension cannot be processed by XHTML Compiler, check
60 for faulty <code>.htaccess</code> rules.');
63 // test for directory's existence and resolve to real path
64 $dir = $info['dirname'];
65 if ($dir == '.') $dir .= '/';
66 $dir = $php->realpath($dir);
67 if ($dir === false) {
68 throw new XHTMLCompiler_Exception(404, 'Missing directory',
69 'Requested directory cannot be found; check your file
70 path and try again.' );
72 if ($dir[strlen($dir)-1] == '/') $dir = substr($dir, 0, -1);
74 $allowed_dirs = $xc->getConf('allowed_dirs');
75 $ok = false;
77 foreach ($allowed_dirs as $allowed_dir => $recursive) {
78 $allowed_dir = $php->realpath($allowed_dir); // factor out!
79 if (!is_string($allowed_dir)) continue;
80 if ($dir === $allowed_dir) {
81 $ok = true;
82 break;
83 // slash is required to prevent $allowed_dir = 'subdir' from
84 // matching $dir = 'subdirectory', thanks Mordred!
85 } elseif (strpos($dir, $allowed_dir . '/') === 0 && $recursive) {
86 $ok = true;
87 break;
91 if (!$ok) throw new XHTMLCompiler_Exception(403, 'Forbidden directory',
92 'Requested directory is forbidden to XHTML Compiler; try
93 accessing it directly or check for faulty <code>.htaccess</code> rules.');
95 // cannot use pathinfo, since PATHINFO_FILENAME is PHP 5.2.0
96 $this->pathStem = substr($path, 0, strrpos($path, '.'));
98 // setup the files
99 $this->source = new XHTMLCompiler_File($this->pathStem . '.' . $this->sourceExt);
100 $this->cache = new XHTMLCompiler_File($this->pathStem . '.' . $this->cacheExt);
101 $this->deps = new XHTMLCompiler_File($this->pathStem . '.' . $this->depsExt);
103 $this->dir = new XHTMLCompiler_Directory(dirname($this->pathStem));
105 if (!$mute && !$this->source->exists()) {
106 // Apache may have redirected to an ErrorDocument which got directed
107 // via mod_rewrite to us, in that case, output the corresponding
108 // status code. Otherwise, we can give the regular 404.
109 $code = $php->getRedirectStatus();
110 if (!$code || $code == 200) $code = 404;
111 throw new XHTMLCompiler_Exception($code, 'Page not found', 'Requested page not found; check the URL in your address bar.');
115 // Note: Do not use this functions internally inside the class
117 /** Returns path stem, full filename without file extension */
118 public function getPathStem() { return $this->pathStem; }
119 /** Returns relative path to cache */
120 public function getCachePath() { return $this->cache->getName(); }
121 /** Returns relative path to source */
122 public function getSourcePath() { return $this->source->getName(); }
123 /** Returns XHTMLCompiler_Directory representation of directory */
124 public function getDir() { return $this->dir; }
125 /** Returns directory of the files without trailing slash */
126 public function getDirName() { return $this->dir->getName(); }
127 /** Returns directory of the files with trailing slash (unless there is none) */
128 public function getDirSName() { return $this->dir->getSName(); }
129 /** Returns how deep from the root the file is */
130 public function getDepth() { return substr_count($this->getSourcePath(), '/'); }
132 /** Normalizes a relative path as if it were from this page's directory */
133 public function normalizePath($path) {
134 return $this->getDirName() . '/' . $path;
138 * Returns a fully formed web path with web domain to the file. This path
139 * is valid anywhere on the web.
141 public function getWebPath() {
142 $xc = XHTMLCompiler::getInstance();
143 $domain = $xc->getConf('web_domain');
144 if (!$domain) {
145 throw new Exception('Configuration value web_domain must be set for command line');
147 return 'http://' . $domain . $this->getAbsolutePath();
151 * Returns a fully formed absolute web path valid anywhere on the
152 * current domain to the cached file.
154 public function getAbsolutePath() {
155 $xc = XHTMLCompiler::getInstance();
156 $name = $this->cache->getName();
157 // a little icky
158 if ($name[0] !== '/') $name = "/$name";
159 if (strncmp($name, './', 2) === 0) $name = substr($name, 1);
160 return $xc->getConf('web_path') . $name;
163 /** Returns contents of the cache/served file */
164 public function getCache() { return $this->cache->get(); }
165 /** Returns contents of the source file */
166 public function getSource() { return $this->source->get(); }
168 /** Reports whether or not cache file exists and is a file */
169 public function isCacheExistent() { return $this->cache->exists(); }
170 /** Reports whether or not source file exists and is a file */
171 public function isSourceExistent() { return $this->source->exists(); }
174 * Reports whether or not the cache is stale by comparing the file
175 * modification times between the source file and the cache file.
176 * @warning You must not call this function until you've also called
177 * isCacheExistent().
179 public function isCacheStale() {
180 if (!$this->cache->exists()) {
181 throw new Exception('Cannot check for stale cache when cache
182 does not exist, please call isCacheExistent and take
183 appropriate action with the result');
185 if ($this->source->getMTime() > $this->cache->getMTime()) return true;
186 // check dependencies
187 if (!$this->deps->exists()) return true; // we need a dependency file!
188 $deps = unserialize($this->deps->get());
189 foreach ($deps as $filename => $time) {
190 if ($time < filemtime($filename)) return true;
192 return false;
196 * Writes text to the cache file, overwriting any previous contents
197 * and creating the cache file if it doesn't exist.
198 * @param $contents String contents to write to cache
200 public function writeCache($contents) {$this->cache->write($contents);}
203 * Attempts to display contents from the cache, otherwise returns false
204 * @return True if successful, false if not.
205 * @todo Purge check needs to be factored into XHTMLCompiler
207 public function tryCache() {
208 if (
209 !isset($_GET['purge']) &&
210 $this->cache->exists() &&
211 !$this->isCacheStale()
213 // cached version is fresh, serve it. This shouldn't happen normally
214 set_response_code(200); // if we used ErrorDocument, override
215 readfile($this->getCachePath());
216 return true;
218 return false;
222 * Generates the final version of a page from the source file and writes
223 * it to the cache.
224 * @note This function needs to be extended greatly
225 * @return Generated contents from source
227 public function generate() {
228 $source = $this->source->get();
229 $xc = XHTMLCompiler::getInstance();
230 $filters = $xc->getFilterManager();
231 $contents = $filters->process($source, $this);
232 $deps = $filters->getDeps();
233 if (empty($contents)) return ''; // don't write, probably an error
234 $contents .= '<!-- generated by XHTML Compiler -->';
235 $this->cache->write($contents);
236 $this->cache->chmod(0664);
237 $this->deps->write(serialize($deps));
238 return $contents;
242 * Displays the page, either from cache or fresh regeneration.
244 public function display() {
245 if($this->tryCache()) return;
246 $ret = $this->generate();
247 if ($ret) {
248 if (stripos($_SERVER["HTTP_ACCEPT"], 'application/xhtml+xml') !== false) {
249 header("Content-type: application/xhtml+xml");
250 } else {
251 header("Content-type: text/html");
254 echo $ret;
258 * Retrieves the Git_Repo that represents this page.
260 public function getRepo() {
261 return new Git_Repo($this->source->getDirectory());
265 * Retrieves the filename relative to the Git repository root.
267 public function getGitPath() {
268 $repo = $this->getRepo();
269 // This won't work with bare repositories
270 return $name = str_replace(
271 '\\', '/', // account for Windows
272 substr(
273 realpath($this->source->getName()), // $repo->path is full
274 strlen(dirname($repo->path))+1 // chop off "repo" path (w/o .git) + leading slash
280 * Retrieves the log that represents this page.
282 public function getLog() {
283 // This doesn't account for sub-repositories
284 $repo = $this->getRepo();
285 return $repo->log('master', array($this->getGitPath()), array('follow' => true));
288 // this is metadata stuff that needs to be moved and cached
291 * Retrieves the DateTime this page was created, according to Git's logs.
293 public function getCreatedTime() {
294 // As a backwards-compatibility measure, we allow the first meta tag
295 // with the specific signature:
296 // <meta name="Date" contents="..."
297 // to specify an ISO 8601 formatted date (or date compatible with
298 // GNU strtotime; Metadata will convert it into ISO 8601 as per
299 // the Dublin core specification).
300 $source = $this->source->get();
301 if (($p = strpos($source, '<meta name="Date" content="')) !== false) {
302 $p += 27; // cursor is now after the quote
303 // Grab the time
304 $time = substr($source, $p, strpos($source, '"', $p) - $p);
305 return new DateTime($time);
308 $repo = $this->getRepo();
309 // This is extremely memory inefficient, but I can't figure out
310 // how to get Git to limit the commits (-n) without undoing
311 // --reverse.
312 $log = $repo->log('master', array($this->getGitPath()), array(
313 'reverse' => true,
315 if (empty($log)) return;
316 return $log[0]->authoredDate;
320 * Retrieves the DateTime this page was last updated, according to Git's logs.
322 public function getLastModifiedTime() {
323 $repo = $this->getRepo();
324 $log = $repo->log('master', array($this->getGitPath()), array(
325 'n' => 1,
327 if (empty($log)) return;
328 // or committedDate?
329 return $log[0]->authoredDate;