Update Comparison with htmLawed.
[xhtml-compiler.git] / XHTMLCompiler / Page.php
bloba88363e45fa945e7b9577c51511f9510110106db
1 <?php
3 /**
4 * Represents a page in our content management system. This is loosely
5 * bound to the filesystem, although it doesn't actually refer to a
6 * specific file, just a class of files.
7 */
8 class XHTMLCompiler_Page
11 /**
12 * Filename identifier of this page without extension
14 protected $pathStem;
16 /** File extension of source files (no period) */
17 protected $sourceExt = 'xhtml';
18 /** File extension of cache/served files */
19 protected $cacheExt = 'html';
20 /** File extension of dependency files */
21 protected $depsExt = 'xc-deps';
23 /** Instance of XHTMLCompiler_File for source file */
24 protected $source;
25 /** Instance of XHTMLCompiler_File for cache file */
26 protected $cache;
27 /** Instance of XHTMLCompiler_File for dependency file */
28 protected $deps;
30 /** Instance of XHTMLCompiler_Directory for all of the above files*/
31 protected $dir;
33 /**
34 * Constructs a page object, validates filename for correctness
35 * @param $path String path filename, can be from untrusted source
36 * @param $mute Whether or not to stop the class from complaining when
37 * the source file doesn't exist. This is a stopgap measure,
38 * please replace with better exception handling.
39 * @todo Cleanup into subroutines
40 * @todo Factor out allowed_directories realpath'ing to config class
42 public function __construct($path, $mute = false) {
44 $xc = XHTMLCompiler::getInstance();
45 $php = XHTMLCompiler::getPHPWrapper();
47 // test file extension
48 $info = pathinfo($path);
49 if (
50 empty($info['extension']) || (
51 $info['extension'] !== $this->sourceExt &&
52 $info['extension'] !== $this->cacheExt
54 ) {
55 throw new XHTMLCompiler_Exception(403, 'Forbidden extension',
56 'File extension cannot be processed by XHTML Compiler, check
57 for faulty <code>.htaccess</code> rules.');
60 // test for directory's existence and resolve to real path
61 $dir = $info['dirname'];
62 if ($dir == '.') $dir .= '/';
63 $dir = $php->realpath($dir);
64 if ($dir === false) {
65 throw new XHTMLCompiler_Exception(404, 'Missing directory',
66 'Requested directory cannot be found; check your file
67 path and try again.' );
69 if ($dir[strlen($dir)-1] == '/') $dir = substr($dir, 0, -1);
71 $allowed_dirs = $xc->getConf('allowed_dirs');
72 $ok = false;
74 foreach ($allowed_dirs as $allowed_dir => $recursive) {
75 $allowed_dir = $php->realpath($allowed_dir); // factor out!
76 if (!is_string($allowed_dir)) continue;
77 if ($dir === $allowed_dir) {
78 $ok = true;
79 break;
80 // slash is required to prevent $allowed_dir = 'subdir' from
81 // matching $dir = 'subdirectory', thanks Mordred!
82 } elseif (strpos($dir, $allowed_dir . '/') === 0 && $recursive) {
83 $ok = true;
84 break;
88 if (!$ok) throw new XHTMLCompiler_Exception(403, 'Forbidden directory',
89 'Requested directory is forbidden to XHTML Compiler; try
90 accessing it directly or check for faulty <code>.htaccess</code> rules.');
92 // cannot use pathinfo, since PATHINFO_FILENAME is PHP 5.2.0
93 $this->pathStem = substr($path, 0, strrpos($path, '.'));
95 // setup the files
96 $this->source = new XHTMLCompiler_File($this->pathStem . '.' . $this->sourceExt);
97 $this->cache = new XHTMLCompiler_File($this->pathStem . '.' . $this->cacheExt);
98 $this->deps = new XHTMLCompiler_File($this->pathStem . '.' . $this->depsExt);
100 $this->dir = new XHTMLCompiler_Directory(dirname($this->pathStem));
102 if (!$mute && !$this->source->exists()) {
103 // Apache may have redirected to an ErrorDocument which got directed
104 // via mod_rewrite to us, in that case, output the corresponding
105 // status code. Otherwise, we can give the regular 404.
106 $code = $php->getRedirectStatus();
107 if (!$code || $code == 200) $code = 404;
108 throw new XHTMLCompiler_Exception($code, 'Page not found', 'Requested page not found; check the URL in your address bar.');
112 // Note: Do not use this functions internally inside the class
114 /** Returns path stem, full filename without file extension */
115 public function getPathStem() { return $this->pathStem; }
116 /** Returns relative path to cache */
117 public function getCachePath() { return $this->cache->getName(); }
118 /** Returns relative path to source */
119 public function getSourcePath() { return $this->source->getName(); }
120 /** Returns XHTMLCompiler_Directory representation of directory */
121 public function getDir() { return $this->dir; }
122 /** Returns directory of the files without trailing slash */
123 public function getDirName() { return $this->dir->getName(); }
124 /** Returns directory of the files with trailing slash (unless there is none) */
125 public function getDirSName() { return $this->dir->getSName(); }
126 /** Returns how deep from the root the file is */
127 public function getDepth() { return substr_count($this->getSourcePath(), '/'); }
129 /** Normalizes a relative path as if it were from this page's directory */
130 public function normalizePath($path) {
131 return $this->getDirName() . '/' . $path;
135 * Returns a fully formed web path to the file
137 public function getWebPath() {
138 $xc = XHTMLCompiler::getInstance();
139 $domain = $xc->getConf('web_domain');
140 if (!$domain) {
141 throw new Exception('Configuration value web_domain must be set for command line');
143 return 'http://' . $domain .
144 $xc->getConf('web_path') . '/' . $this->cache->getName();
147 /** Returns contents of the cache/served file */
148 public function getCache() { return $this->cache->get(); }
149 /** Returns contents of the source file */
150 public function getSource() { return $this->source->get(); }
152 /** Reports whether or not cache file exists and is a file */
153 public function isCacheExistent() { return $this->cache->exists(); }
154 /** Reports whether or not source file exists and is a file */
155 public function isSourceExistent() { return $this->source->exists(); }
158 * Reports whether or not the cache is stale by comparing the file
159 * modification times between the source file and the cache file.
160 * @warning You must not call this function until you've also called
161 * isCacheExistent().
163 public function isCacheStale() {
164 if (!$this->cache->exists()) {
165 throw new Exception('Cannot check for stale cache when cache
166 does not exist, please call isCacheExistent and take
167 appropriate action with the result');
169 if ($this->source->getMTime() > $this->cache->getMTime()) return true;
170 // check dependencies
171 if (!$this->deps->exists()) return true; // we need a dependency file!
172 $deps = unserialize($this->deps->get());
173 foreach ($deps as $filename => $time) {
174 if ($time < filemtime($filename)) return true;
176 return false;
180 * Writes text to the cache file, overwriting any previous contents
181 * and creating the cache file if it doesn't exist.
182 * @param $contents String contents to write to cache
184 public function writeCache($contents) {$this->cache->write($contents);}
187 * Attempts to display contents from the cache, otherwise returns false
188 * @return True if successful, false if not.
189 * @todo Purge check needs to be factored into XHTMLCompiler
191 public function tryCache() {
192 if (
193 !isset($_GET['purge']) &&
194 $this->cache->exists() &&
195 !$this->isCacheStale()
197 // cached version is fresh, serve it. This shouldn't happen normally
198 set_response_code(200); // if we used ErrorDocument, override
199 readfile($this->getCachePath());
200 return true;
202 return false;
206 * Generates the final version of a page from the source file and writes
207 * it to the cache.
208 * @note This function needs to be extended greatly
209 * @return Generated contents from source
211 public function generate() {
212 $source = $this->source->get();
213 $xc = XHTMLCompiler::getInstance();
214 $filters = $xc->getFilterManager();
215 $contents = $filters->process($source, $this);
216 $deps = $filters->getDeps();
217 if (empty($contents)) return ''; // don't write, probably an error
218 $contents .= '<!-- generated by XHTML Compiler -->';
219 $this->cache->write($contents);
220 $this->cache->chmod(0664);
221 $this->deps->write(serialize($deps));
222 return $contents;
226 * Displays the page, either from cache or fresh regeneration.
228 public function display() {
229 if($this->tryCache()) return;
230 $ret = $this->generate();
231 if ($ret) {
232 if (stripos($_SERVER["HTTP_ACCEPT"], 'application/xhtml+xml') !== false) {
233 header("Content-type: application/xhtml+xml");
234 } else {
235 header("Content-type: text/html");
238 echo $ret;
241 // Subversion related functions
243 protected $svnDate, $svnRevision, $svnAuthor, $svnHeadURL, $svnHeadURLMunged;
245 public function registerSVNKeywords(
246 $date, $revision, $author, $head_url
248 $this->svnDate = $date;
249 $this->svnRevision = (int) $revision;
250 $this->svnAuthor = $author;
251 $this->svnHeadURL = $head_url;
253 protected function loadSVNKeywords() {
254 // this is an expensive function
255 // we should log calls to it
256 $raw_status = shell_exec('svn info "'.$this->getSourcePath().'"');
257 if (!$raw_status) {
258 throw new Exception('Attempt to grab SVN info for non-versioned file ' . $this->getCachePath());
260 $raw_status = str_replace("\r", '', $raw_status);
261 $raw_status = explode("\n", $raw_status);
262 $status = array();
263 foreach ($raw_status as $i => $keyval) {
264 if (empty($keyval)) continue;
265 if (!strpos($keyval, ':')) continue;
266 list($key, $value) = explode(': ', $keyval, 2);
267 $status[$key] = $value;
269 $this->svnDate = $status['Last Changed Date'];
270 $this->svnRevision = $status['Last Changed Rev'];
271 $this->svnAuthor = $status['Last Changed Author'];
272 $this->svnHeadURL = $status['URL'];
274 public function getSVNDate() {
275 if (empty($this->svnDate)) $this->loadSVNKeywords();
276 return $this->svnDate;
278 public function getSVNRevision() {
279 if (empty($this->svnRevision)) $this->loadSVNKeywords();
280 return $this->svnRevision;
282 public function getSVNAuthor() {
283 if (empty($this->svnAuthor)) $this->loadSVNKeywords();
284 return $this->svnAuthor;
287 * @warning The Head URL may not be publically accessible if
288 * svn+ssh:// or file:// protocols were used in the
289 * working copy.
291 public function getSVNHeadURL() {
292 if (empty($this->svnHeadURL)) $this->loadSVNKeywords();
293 return $this->svnHeadURL;
297 * Returns the Head URL, but munged with svn_headurl_replace to
298 * an accessible representation (see config.default.php for details)
300 public function getSVNHeadURLMunged() {
301 if (!empty($this->svnHeadURLMunged)) return $this->svnHeadURLMunged;
302 $head_url = $this->getSVNHeadURL();
303 $xc = XHTMLCompiler::getInstance();
304 $pairs = $xc->getConf('svn_headurl_munge');
305 foreach ($pairs as $pair) {
306 if (!(strpos($head_url, $pair[0]) === 0)) continue;
307 $head_url = substr_replace($head_url, $pair[1], 0, strlen($pair[0]));
308 break;
310 return $this->svnHeadURLMunged = $head_url;