1 package WikiLinkParser
;
6 use HTML
::TreeBuilder
5 -weak
;
10 =head2 get_url_by_text
14 - a string of the wiki text, $wiki_text
15 - a mediawiki api object, $mw
25 while ($text =~ m{\[\[(.*?)\]\]}g) {
26 push @urls, $self->_parse($1,$mw);
28 while ($text =~ m
<{{(.*?
)[\
||}}]>g
) {
29 push @urls, $self->_parse("Template:$1",$mw);
34 sub uri_unescape_utf8
{ my ($str) = @_; $str = uri_unescape
$str; utf8
::decode
$str; $str }
40 my $info_ref = $mw->api ( {
43 text
=> "{{fullurl:$text}}",
44 } ) or die $mw->{error
}->{code
} . ': ' . $mw->{error
}->{details
};
45 my $html = $info_ref->{parse
}{text
}{'*'};
47 my $tree = HTML
::TreeBuilder
->new_from_content($html);
48 my $url = uri_unescape_utf8
('https:'.$tree->look_down('_tag','p')->content_array_ref->[0]);
49 if ($url =~ m{^https:\/\/}) {return $url;}