update porting to new machine
[wikipedia-parser-hphp.git] / Parser_LinkHooks.php
blob2b3069334b55d8ba4a57f0afd6bb06b28ad4cc31
1 <?php
2 /**
3 * Parser with LinkHooks experiment
4 * @ingroup Parser
5 */
6 class Parser_LinkHooks extends Parser
8 /**
9 * Update this version number when the ParserOutput format
10 * changes in an incompatible way, so the parser cache
11 * can automatically discard old data.
13 const VERSION = '1.6.4';
15 # Flags for Parser::setLinkHook
16 # Also available as global constants from Defines.php
17 const SLH_PATTERN = 1;
19 # Constants needed for external link processing
20 # Everything except bracket, space, or control characters
21 const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F]';
22 const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)([^][<>"\\x00-\\x20\\x7F]+)
23 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)gif|png|jpg|jpeg)$/Sx';
25 /**#@+
26 * @private
28 # Persistent:
29 var $mLinkHooks;
31 /**#@-*/
33 /**
34 * Constructor
36 * @public
38 function __construct( $conf = array() ) {
39 parent::__construct( $conf );
40 $this->mLinkHooks = array();
43 /**
44 * Do various kinds of initialisation on the first call of the parser
46 function firstCallInit() {
47 parent::__construct();
48 if ( !$this->mFirstCall ) {
49 return;
51 $this->mFirstCall = false;
53 wfProfileIn( __METHOD__ );
55 $this->setHook( 'pre', array( $this, 'renderPreTag' ) );
56 CoreParserFunctions::register( $this );
57 CoreLinkFunctions::register( $this );
58 $this->initialiseVariables();
60 wfRunHooks( 'ParserFirstCallInit', array( &$this ) );
61 wfProfileOut( __METHOD__ );
64 /**
65 * Create a link hook, e.g. [[Namepsace:...|display}}
66 * The callback function should have the form:
67 * function myLinkCallback( $parser, $holders, $markers,
68 * Title $title, $titleText, &$sortText = null, &$leadingColon = false ) { ... }
70 * Or with SLH_PATTERN:
71 * function myLinkCallback( $parser, $holders, $markers, )
72 * &$titleText, &$sortText = null, &$leadingColon = false ) { ... }
74 * The callback may either return a number of different possible values:
75 * String) Text result of the link
76 * True) (Treat as link) Parse the link according to normal link rules
77 * False) (Bad link) Just output the raw wikitext (You may modify the text first)
79 * @public
81 * @param integer|string $ns The Namespace ID or regex pattern if SLH_PATTERN is set
82 * @param mixed $callback The callback function (and object) to use
83 * @param integer $flags a combination of the following flags:
84 * SLH_PATTERN Use a regex link pattern rather than a namespace
86 * @return The old callback function for this name, if any
88 function setLinkHook( $ns, $callback, $flags = 0 ) {
89 if( $flags & SLH_PATTERN && !is_string($ns) )
90 throw new MWException( __METHOD__.'() expecting a regex string pattern.' );
91 elseif( $flags | ~SLH_PATTERN && !is_int($ns) )
92 throw new MWException( __METHOD__.'() expecting a namespace index.' );
93 $oldVal = isset( $this->mLinkHooks[$ns] ) ? $this->mLinkHooks[$ns][0] : null;
94 $this->mLinkHooks[$ns] = array( $callback, $flags );
95 return $oldVal;
98 /**
99 * Get all registered link hook identifiers
101 * @return array
103 function getLinkHooks() {
104 return array_keys( $this->mLinkHooks );
108 * Process [[ ]] wikilinks
109 * @return LinkHolderArray
111 * @private
113 function replaceInternalLinks2( &$s ) {
114 global $wgContLang;
116 wfProfileIn( __METHOD__ );
118 wfProfileIn( __METHOD__.'-setup' );
119 static $tc = FALSE, $titleRegex;//$e1, $e1_img;
120 if( !$tc ) {
121 # the % is needed to support urlencoded titles as well
122 $tc = Title::legalChars() . '#%';
123 # Match a link having the form [[namespace:link|alternate]]trail
124 //$e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
125 # Match cases where there is no "]]", which might still be images
126 //$e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
127 # Match a valid plain title
128 $titleRegex = "/^([{$tc}]+)$/sD";
131 $sk = $this->mOptions->getSkin();
132 $holders = new LinkHolderArray( $this );
134 if( is_null( $this->mTitle ) ) {
135 wfProfileOut( __METHOD__ );
136 wfProfileOut( __METHOD__.'-setup' );
137 throw new MWException( __METHOD__.": \$this->mTitle is null\n" );
139 $nottalk = !$this->mTitle->isTalkPage();
141 if($wgContLang->hasVariants()) {
142 $selflink = $wgContLang->convertLinkToAllVariants($this->mTitle->getPrefixedText());
143 } else {
144 $selflink = array($this->mTitle->getPrefixedText());
146 wfProfileOut( __METHOD__.'-setup' );
148 $offset = 0;
149 $offsetStack = array();
150 $markers = new LinkMarkerReplacer( $this, $holders, array( &$this, 'replaceInternalLinksCallback' ) );
151 while( true ) {
152 $startBracketOffset = strpos( $s, '[[', $offset );
153 $endBracketOffset = strpos( $s, ']]', $offset );
154 # Finish when there are no more brackets
155 if( $startBracketOffset === false && $endBracketOffset === false ) break;
156 # Determine if the bracket is a starting or ending bracket
157 # When we find both, use the first one
158 elseif( $startBracketOffset !== false && $endBracketOffset !== false )
159 $isStart = $startBracketOffset <= $endBracketOffset;
160 # When we only found one, check which it is
161 else $isStart = $startBracketOffset !== false;
162 $bracketOffset = $isStart ? $startBracketOffset : $endBracketOffset;
163 if( $isStart ) {
164 /** Opening bracket **/
165 # Just push our current offset in the string onto the stack
166 $offsetStack[] = $startBracketOffset;
167 } else {
168 /** Closing bracket **/
169 # Pop the start pos for our current link zone off the stack
170 $startBracketOffset = array_pop($offsetStack);
171 # Just to clean up the code, lets place offsets on the outer ends
172 $endBracketOffset += 2;
174 # Only do logic if we actually have a opening bracket for this
175 if( isset($startBracketOffset) ) {
176 # Extract text inside the link
177 @list( $titleText, $paramText ) = explode('|',
178 substr($s, $startBracketOffset+2, $endBracketOffset-$startBracketOffset-4), 2);
179 # Create markers only for valid links
180 if( preg_match( $titleRegex, $titleText ) ) {
181 # Store the text for the marker
182 $marker = $markers->addMarker($titleText, $paramText);
183 # Replace the current link with the marker
184 $s = substr($s,0,$startBracketOffset).
185 $marker.
186 substr($s, $endBracketOffset);
187 # We have modified $s, because of this we need to set the
188 # offset manually since the end position is different now
189 $offset = $startBracketOffset+strlen($marker);
190 continue;
192 # ToDo: Some LinkHooks may allow recursive links inside of
193 # the link text, create a regex that also matches our
194 # <!-- LINKMARKER ### --> sequence in titles
195 # ToDO: Some LinkHooks use patterns rather than namespaces
196 # these need to be tested at this point here
200 # Bump our offset to after our current bracket
201 $offset = $bracketOffset+2;
205 # Now expand our tree
206 wfProfileIn( __METHOD__.'-expand' );
207 $s = $markers->expand( $s );
208 wfProfileOut( __METHOD__.'-expand' );
210 wfProfileOut( __METHOD__ );
211 return $holders;
214 function replaceInternalLinksCallback( $parser, $holders, $markers, $titleText, $paramText ) {
215 wfProfileIn( __METHOD__ );
216 $wt = isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
217 wfProfileIn( __METHOD__."-misc" );
218 # Don't allow internal links to pages containing
219 # PROTO: where PROTO is a valid URL protocol; these
220 # should be external links.
221 if( preg_match('/^\b(?:' . wfUrlProtocols() . ')/', $titleText) ) {
222 wfProfileOut( __METHOD__ );
223 return $wt;
226 # Make subpage if necessary
227 if( $this->areSubpagesAllowed() ) {
228 $titleText = $this->maybeDoSubpageLink( $titleText, $paramText );
231 # Check for a leading colon and strip it if it is there
232 $leadingColon = $titleText[0] == ':';
233 if( $leadingColon ) $titleText = substr( $titleText, 1 );
235 wfProfileOut( __METHOD__."-misc" );
236 # Make title object
237 wfProfileIn( __METHOD__."-title" );
238 $title = Title::newFromText( $this->mStripState->unstripNoWiki($titleText) );
239 if( !$title ) {
240 wfProfileOut( __METHOD__."-title" );
241 wfProfileOut( __METHOD__ );
242 return $wt;
244 $ns = $title->getNamespace();
245 wfProfileOut( __METHOD__."-title" );
247 # Default for Namespaces is a default link
248 # ToDo: Default for patterns is plain wikitext
249 $return = true;
250 if( isset($this->mLinkHooks[$ns]) ) {
251 list( $callback, $flags ) = $this->mLinkHooks[$ns];
252 if( $flags & SLH_PATTERN ) {
253 $args = array( $parser, $holders, $markers, $titleText, &$paramText, &$leadingColon );
254 } else {
255 $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon );
257 # Workaround for PHP bug 35229 and similar
258 if ( !is_callable( $callback ) ) {
259 throw new MWException( "Tag hook for $name is not callable\n" );
261 $return = call_user_func_array( $callback, $args );
263 if( $return === true ) {
264 # True (treat as plain link) was returned, call the defaultLinkHook
265 $args = array( $parser, $holders, $markers, $title, $titleText, &$paramText, &$leadingColon );
266 $return = call_user_func_array( array( 'CoreLinkFunctions', 'defaultLinkHook' ), $args );
268 if( $return === false ) {
269 # False (no link) was returned, output plain wikitext
270 # Build it again as the hook is allowed to modify $paramText
271 return isset($paramText) ? "[[$titleText|$paramText]]" : "[[$titleText]]";
273 # Content was returned, return it
274 return $return;
279 class LinkMarkerReplacer {
281 protected $markers, $nextId, $parser, $holders, $callback;
283 function __construct( $parser, $holders, $callback ) {
284 $this->nextId = 0;
285 $this->markers = array();
286 $this->parser = $parser;
287 $this->holders = $holders;
288 $this->callback = $callback;
291 function addMarker($titleText, $paramText) {
292 $id = $this->nextId++;
293 $this->markers[$id] = array( $titleText, $paramText );
294 return "<!-- LINKMARKER $id -->";
297 function findMarker( $string ) {
298 return (bool) preg_match('/<!-- LINKMARKER [0-9]+ -->/', $string );
301 function expand( $string ) {
302 return StringUtils::delimiterReplaceCallback( "<!-- LINKMARKER ", " -->", array( &$this, 'callback' ), $string );
305 function callback( $m ) {
306 $id = intval($m[1]);
307 if( !array_key_exists($id, $this->markers) ) return $m[0];
308 $args = $this->markers[$id];
309 array_unshift( $args, $this );
310 array_unshift( $args, $this->holders );
311 array_unshift( $args, $this->parser );
312 return call_user_func_array( $this->callback, $args );