3 * Zend Framework (http://framework.zend.com/)
5 * @link http://github.com/zendframework/zf2 for the canonical source repository
6 * @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com)
7 * @license http://framework.zend.com/license/new-bsd New BSD License
10 namespace Zend\Feed\Reader
;
14 use Zend\Cache\Storage\StorageInterface
as CacheStorage
;
15 use Zend\Http
as ZendHttp
;
16 use Zend\Stdlib\ErrorHandler
;
25 const NAMESPACE_ATOM_03
= 'http://purl.org/atom/ns#';
26 const NAMESPACE_ATOM_10
= 'http://www.w3.org/2005/Atom';
27 const NAMESPACE_RDF
= 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
28 const NAMESPACE_RSS_090
= 'http://my.netscape.com/rdf/simple/0.9/';
29 const NAMESPACE_RSS_10
= 'http://purl.org/rss/1.0/';
34 const TYPE_ANY
= 'any';
35 const TYPE_ATOM_03
= 'atom-03';
36 const TYPE_ATOM_10
= 'atom-10';
37 const TYPE_ATOM_10_ENTRY
= 'atom-10-entry';
38 const TYPE_ATOM_ANY
= 'atom';
39 const TYPE_RSS_090
= 'rss-090';
40 const TYPE_RSS_091
= 'rss-091';
41 const TYPE_RSS_091_NETSCAPE
= 'rss-091n';
42 const TYPE_RSS_091_USERLAND
= 'rss-091u';
43 const TYPE_RSS_092
= 'rss-092';
44 const TYPE_RSS_093
= 'rss-093';
45 const TYPE_RSS_094
= 'rss-094';
46 const TYPE_RSS_10
= 'rss-10';
47 const TYPE_RSS_20
= 'rss-20';
48 const TYPE_RSS_ANY
= 'rss';
55 protected static $cache = null;
58 * HTTP client object to use for retrieving feeds
60 * @var ZendHttp\Client
62 protected static $httpClient = null;
65 * Override HTTP PUT and DELETE request methods?
69 protected static $httpMethodOverride = false;
71 protected static $httpConditionalGet = false;
73 protected static $extensionManager = null;
75 protected static $extensions = array(
97 * @return CacheStorage
99 public static function getCache()
101 return static::$cache;
107 * @param CacheStorage $cache
110 public static function setCache(CacheStorage
$cache)
112 static::$cache = $cache;
116 * Set the HTTP client instance
118 * Sets the HTTP client object to use for retrieving the feeds.
120 * @param ZendHttp\Client $httpClient
123 public static function setHttpClient(ZendHttp\Client
$httpClient)
125 static::$httpClient = $httpClient;
130 * Gets the HTTP client object. If none is set, a new ZendHttp\Client will be used.
132 * @return ZendHttp\Client
134 public static function getHttpClient()
136 if (!static::$httpClient instanceof ZendHttp\Client
) {
137 static::$httpClient = new ZendHttp\
Client();
140 return static::$httpClient;
144 * Toggle using POST instead of PUT and DELETE HTTP methods
146 * Some feed implementations do not accept PUT and DELETE HTTP
147 * methods, or they can't be used because of proxies or other
148 * measures. This allows turning on using POST where PUT and
149 * DELETE would normally be used; in addition, an
150 * X-Method-Override header will be sent with a value of PUT or
151 * DELETE as appropriate.
153 * @param bool $override Whether to override PUT and DELETE.
156 public static function setHttpMethodOverride($override = true)
158 static::$httpMethodOverride = $override;
162 * Get the HTTP override state
166 public static function getHttpMethodOverride()
168 return static::$httpMethodOverride;
172 * Set the flag indicating whether or not to use HTTP conditional GET
177 public static function useHttpConditionalGet($bool = true)
179 static::$httpConditionalGet = $bool;
183 * Import a feed by providing a URI
185 * @param string $uri The URI to the feed
186 * @param string $etag OPTIONAL Last received ETag for this resource
187 * @param string $lastModified OPTIONAL Last-Modified value for this resource
188 * @return Feed\FeedInterface
189 * @throws Exception\RuntimeException
191 public static function import($uri, $etag = null, $lastModified = null)
193 $cache = self
::getCache();
196 $client = self
::getHttpClient();
197 $client->resetParameters();
198 $headers = new ZendHttp\
Headers();
199 $client->setHeaders($headers);
200 $client->setUri($uri);
201 $cacheId = 'Zend_Feed_Reader_' . md5($uri);
203 if (static::$httpConditionalGet && $cache) {
204 $data = $cache->getItem($cacheId);
206 if ($etag === null) {
207 $etag = $cache->getItem($cacheId . '_etag');
209 if ($lastModified === null) {
210 $lastModified = $cache->getItem($cacheId . '_lastmodified');
213 $headers->addHeaderLine('If-None-Match', $etag);
216 $headers->addHeaderLine('If-Modified-Since', $lastModified);
219 $response = $client->send();
220 if ($response->getStatusCode() !== 200 && $response->getStatusCode() !== 304) {
221 throw new Exception\
RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
223 if ($response->getStatusCode() == 304) {
224 $responseXml = $data;
226 $responseXml = $response->getBody();
227 $cache->setItem($cacheId, $responseXml);
228 if ($response->getHeaders()->get('ETag')) {
229 $cache->setItem($cacheId . '_etag', $response->getHeaders()->get('ETag')->getFieldValue());
231 if ($response->getHeaders()->get('Last-Modified')) {
232 $cache->setItem($cacheId . '_lastmodified', $response->getHeaders()->get('Last-Modified')->getFieldValue());
235 return static::importString($responseXml);
237 $data = $cache->getItem($cacheId);
239 return static::importString($data);
241 $response = $client->send();
242 if ((int) $response->getStatusCode() !== 200) {
243 throw new Exception\
RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
245 $responseXml = $response->getBody();
246 $cache->setItem($cacheId, $responseXml);
247 return static::importString($responseXml);
249 $response = $client->send();
250 if ((int) $response->getStatusCode() !== 200) {
251 throw new Exception\
RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
253 $reader = static::importString($response->getBody());
254 $reader->setOriginalSourceUri($uri);
260 * Import a feed from a remote URI
262 * Performs similarly to import(), except it uses the HTTP client passed to
263 * the method, and does not take into account cached data.
265 * Primary purpose is to make it possible to use the Reader with alternate
266 * HTTP client implementations.
269 * @param Http\Client $client
271 * @throws Exception\RuntimeException if response is not an Http\ResponseInterface
273 public static function importRemoteFeed($uri, Http\ClientInterface
$client)
275 $response = $client->get($uri);
276 if (!$response instanceof Http\ResponseInterface
) {
277 throw new Exception\
RuntimeException(sprintf(
278 'Did not receive a %s\Http\ResponseInterface from the provided HTTP client; received "%s"',
280 (is_object($response) ?
get_class($response) : gettype($response))
284 if ((int) $response->getStatusCode() !== 200) {
285 throw new Exception\
RuntimeException('Feed failed to load, got response code ' . $response->getStatusCode());
287 $reader = static::importString($response->getBody());
288 $reader->setOriginalSourceUri($uri);
293 * Import a feed from a string
295 * @param string $string
296 * @return Feed\FeedInterface
297 * @throws Exception\InvalidArgumentException
298 * @throws Exception\RuntimeException
300 public static function importString($string)
302 $libxmlErrflag = libxml_use_internal_errors(true);
303 $oldValue = libxml_disable_entity_loader(true);
304 $dom = new DOMDocument
;
305 $status = $dom->loadXML(trim($string));
306 foreach ($dom->childNodes
as $child) {
307 if ($child->nodeType
=== XML_DOCUMENT_TYPE_NODE
) {
308 throw new Exception\
InvalidArgumentException(
309 'Invalid XML: Detected use of illegal DOCTYPE'
313 libxml_disable_entity_loader($oldValue);
314 libxml_use_internal_errors($libxmlErrflag);
317 // Build error message
318 $error = libxml_get_last_error();
319 if ($error && $error->message
) {
320 $error->message
= trim($error->message
);
321 $errormsg = "DOMDocument cannot parse XML: {$error->message}";
323 $errormsg = "DOMDocument cannot parse XML: Please check the XML document's validity";
325 throw new Exception\
RuntimeException($errormsg);
328 $type = static::detectType($dom);
330 static::registerCoreExtensions();
332 if (substr($type, 0, 3) == 'rss') {
333 $reader = new Feed\
Rss($dom, $type);
334 } elseif (substr($type, 8, 5) == 'entry') {
335 $reader = new Entry\
Atom($dom->documentElement
, 0, self
::TYPE_ATOM_10
);
336 } elseif (substr($type, 0, 4) == 'atom') {
337 $reader = new Feed\
Atom($dom, $type);
339 throw new Exception\
RuntimeException('The URI used does not point to a '
340 . 'valid Atom, RSS or RDF feed that Zend\Feed\Reader can parse.');
346 * Imports a feed from a file located at $filename.
348 * @param string $filename
349 * @throws Exception\RuntimeException
350 * @return Feed\FeedInterface
352 public static function importFile($filename)
354 ErrorHandler
::start();
355 $feed = file_get_contents($filename);
356 $err = ErrorHandler
::stop();
357 if ($feed === false) {
358 throw new Exception\
RuntimeException("File '{$filename}' could not be loaded", 0, $err);
360 return static::importString($feed);
368 * @throws Exception\RuntimeException
370 public static function findFeedLinks($uri)
372 $client = static::getHttpClient();
373 $client->setUri($uri);
374 $response = $client->send();
375 if ($response->getStatusCode() !== 200) {
376 throw new Exception\
RuntimeException("Failed to access $uri, got response code " . $response->getStatusCode());
378 $responseHtml = $response->getBody();
379 $libxmlErrflag = libxml_use_internal_errors(true);
380 $oldValue = libxml_disable_entity_loader(true);
381 $dom = new DOMDocument
;
382 $status = $dom->loadHTML(trim($responseHtml));
383 libxml_disable_entity_loader($oldValue);
384 libxml_use_internal_errors($libxmlErrflag);
386 // Build error message
387 $error = libxml_get_last_error();
388 if ($error && $error->message
) {
389 $error->message
= trim($error->message
);
390 $errormsg = "DOMDocument cannot parse HTML: {$error->message}";
392 $errormsg = "DOMDocument cannot parse HTML: Please check the XML document's validity";
394 throw new Exception\
RuntimeException($errormsg);
396 $feedSet = new FeedSet
;
397 $links = $dom->getElementsByTagName('link');
398 $feedSet->addLinks($links, $uri);
403 * Detect the feed type of the provided feed
405 * @param Feed\AbstractFeed|DOMDocument|string $feed
406 * @param bool $specOnly
408 * @throws Exception\InvalidArgumentException
409 * @throws Exception\RuntimeException
411 public static function detectType($feed, $specOnly = false)
413 if ($feed instanceof Feed\AbstractFeed
) {
414 $dom = $feed->getDomDocument();
415 } elseif ($feed instanceof DOMDocument
) {
417 } elseif (is_string($feed) && !empty($feed)) {
418 ErrorHandler
::start(E_NOTICE|E_WARNING
);
419 ini_set('track_errors', 1);
420 $oldValue = libxml_disable_entity_loader(true);
421 $dom = new DOMDocument
;
422 $status = $dom->loadXML($feed);
423 foreach ($dom->childNodes
as $child) {
424 if ($child->nodeType
=== XML_DOCUMENT_TYPE_NODE
) {
425 throw new Exception\
InvalidArgumentException(
426 'Invalid XML: Detected use of illegal DOCTYPE'
430 libxml_disable_entity_loader($oldValue);
431 ini_restore('track_errors');
432 ErrorHandler
::stop();
434 if (!isset($phpErrormsg)) {
435 if (function_exists('xdebug_is_enabled')) {
436 $phpErrormsg = '(error message not available, when XDebug is running)';
438 $phpErrormsg = '(error message not available)';
441 throw new Exception\
RuntimeException("DOMDocument cannot parse XML: $phpErrormsg");
444 throw new Exception\
InvalidArgumentException('Invalid object/scalar provided: must'
445 . ' be of type Zend\Feed\Reader\Feed, DomDocument or string');
447 $xpath = new DOMXPath($dom);
449 if ($xpath->query('/rss')->length
) {
450 $type = self
::TYPE_RSS_ANY
;
451 $version = $xpath->evaluate('string(/rss/@version)');
453 if (strlen($version) > 0) {
456 $type = self
::TYPE_RSS_20
;
460 $type = self
::TYPE_RSS_094
;
464 $type = self
::TYPE_RSS_093
;
468 $type = self
::TYPE_RSS_092
;
472 $type = self
::TYPE_RSS_091
;
480 $xpath->registerNamespace('rdf', self
::NAMESPACE_RDF
);
482 if ($xpath->query('/rdf:RDF')->length
) {
483 $xpath->registerNamespace('rss', self
::NAMESPACE_RSS_10
);
485 if ($xpath->query('/rdf:RDF/rss:channel')->length
486 ||
$xpath->query('/rdf:RDF/rss:image')->length
487 ||
$xpath->query('/rdf:RDF/rss:item')->length
488 ||
$xpath->query('/rdf:RDF/rss:textinput')->length
490 return self
::TYPE_RSS_10
;
493 $xpath->registerNamespace('rss', self
::NAMESPACE_RSS_090
);
495 if ($xpath->query('/rdf:RDF/rss:channel')->length
496 ||
$xpath->query('/rdf:RDF/rss:image')->length
497 ||
$xpath->query('/rdf:RDF/rss:item')->length
498 ||
$xpath->query('/rdf:RDF/rss:textinput')->length
500 return self
::TYPE_RSS_090
;
504 $xpath->registerNamespace('atom', self
::NAMESPACE_ATOM_10
);
506 if ($xpath->query('//atom:feed')->length
) {
507 return self
::TYPE_ATOM_10
;
510 if ($xpath->query('//atom:entry')->length
) {
511 if ($specOnly == true) {
512 return self
::TYPE_ATOM_10
;
514 return self
::TYPE_ATOM_10_ENTRY
;
518 $xpath->registerNamespace('atom', self
::NAMESPACE_ATOM_03
);
520 if ($xpath->query('//atom:feed')->length
) {
521 return self
::TYPE_ATOM_03
;
524 return self
::TYPE_ANY
;
528 * Set plugin manager for use with Extensions
530 * @param ExtensionManagerInterface $extensionManager
532 public static function setExtensionManager(ExtensionManagerInterface
$extensionManager)
534 static::$extensionManager = $extensionManager;
538 * Get plugin manager for use with Extensions
540 * @return ExtensionManagerInterface
542 public static function getExtensionManager()
544 if (!isset(static::$extensionManager)) {
545 static::setExtensionManager(new ExtensionManager());
547 return static::$extensionManager;
551 * Register an Extension by name
553 * @param string $name
555 * @throws Exception\RuntimeException if unable to resolve Extension class
557 public static function registerExtension($name)
559 $feedName = $name . '\Feed';
560 $entryName = $name . '\Entry';
561 $manager = static::getExtensionManager();
562 if (static::isRegistered($name)) {
563 if ($manager->has($feedName) ||
$manager->has($entryName)) {
568 if (!$manager->has($feedName) && !$manager->has($entryName)) {
569 throw new Exception\
RuntimeException('Could not load extension: ' . $name
570 . ' using Plugin Loader. Check prefix paths are configured and extension exists.');
572 if ($manager->has($feedName)) {
573 static::$extensions['feed'][] = $feedName;
575 if ($manager->has($entryName)) {
576 static::$extensions['entry'][] = $entryName;
581 * Is a given named Extension registered?
583 * @param string $extensionName
586 public static function isRegistered($extensionName)
588 $feedName = $extensionName . '\Feed';
589 $entryName = $extensionName . '\Entry';
590 if (in_array($feedName, static::$extensions['feed'])
591 ||
in_array($entryName, static::$extensions['entry'])
599 * Get a list of extensions
603 public static function getExtensions()
605 return static::$extensions;
609 * Reset class state to defaults
613 public static function reset()
615 static::$cache = null;
616 static::$httpClient = null;
617 static::$httpMethodOverride = false;
618 static::$httpConditionalGet = false;
619 static::$extensionManager = null;
620 static::$extensions = array(
641 * Register core (default) extensions
645 protected static function registerCoreExtensions()
647 static::registerExtension('DublinCore');
648 static::registerExtension('Content');
649 static::registerExtension('Atom');
650 static::registerExtension('Slash');
651 static::registerExtension('WellFormedWeb');
652 static::registerExtension('Thread');
653 static::registerExtension('Podcast');
657 * Utility method to apply array_unique operation to a multidimensional
663 public static function arrayUnique(array $array)
665 foreach ($array as &$value) {
666 $value = serialize($value);
668 $array = array_unique($array);
669 foreach ($array as &$value) {
670 $value = unserialize($value);