LJSUP-17669: Login.bml form refactoring
[livejournal.git] / cgi-bin / BotCheck.pm
blobe280138989bd0cc126afee82257638d8044e7c65
1 package BotCheck;
3 use strict;
4 use warnings;
6 my $crawler_agents = qr{
8 sindice-fetcher | # http://sindice.com/developers/bot
9 Yandex |
10 bot |
11 libwww\-perl | # Comes from sup and other bots.
12 Apple\-PubSub |
13 Yahoo\!\ Slurp |
14 Mediapartners\-Google |
15 Jakarta\ Commons\-HttpClient | # comes from independent
16 aggregator | # robots from spinn3r.com
17 crawler |
18 Feed |
19 Yahoo\ Pipes |
20 AppEngine\-Google |
21 spider |
22 lm114\@nyu\.edu | # http://www.nyu.edu; lm114@nyu.edu
23 Akregator |
24 Rome\ Client | # https://rome.dev.java.net/
25 RSS |
26 Python\-urllib |
27 JetBrains\ Omea |
28 www\.fetch\.com | # www.fetch.com
29 Java |
30 AppleSyndication |
31 Surphace\ Scout |
32 DoCoMo |
33 PostRank | # http://postrank.com
34 NetNewsWire |
35 Liferea |
36 Incutio\ XML\-RPC |
37 Vienna | # http://www.vienna-rss.org
38 Wget |
39 centerim | # http://www.centerim.org/index.php/User_Manual#LiveJournal
40 Subscribe\.Ru |
41 Support\ Search\ Agent | # This is our own abusebot
42 SimplePie |
43 NewsFire |
44 webcollage |
45 lwp\-trivial | # Comes from perl module LWP::Simple (script/bot)
46 BuzzTracker | # http://www.buzztracker.com
47 R6\_Primer |
48 bestpersons\.ru |
49 GreatNews |
50 Flexum | # Flexum.ru search service
51 LucidMedia\ ClickSense | # comes from amazonaws
52 Nutch | # http://lucene.apache.org/nutch/about.html
53 BlogScope |
54 Snarfer |
55 Top\-Indexer | # Top-Indexer; http://www.artlebedev.ru; gregory@artlebedev.ru
56 ActiveRefresh |
57 relevantnoise\.com | # http://relevantnoise.com
58 Ravelry\.com |
59 MailRu\-LJImporter |
60 LJpoisk\.ru | # RU Search Engine
61 Virtual\ Reach\ Newsclip\ Collector |
62 liveinternet\.ru |
63 Fever |
64 libcurl |
65 Netvibes |
66 URI\:\:Fetch |
67 OutlookConnector |
68 Bloglovin | # http://www.bloglovin.com/
69 LJ\:\:Simple |
70 SOAP\:\:Lite |
71 LJ\.Rossia\.org |
72 Smokeping | # http://oss.oetiker.ch/smokeping/
73 SharpReader |
74 Gregarius | # http://devlog.gregarius.net/docs/ua
75 blogged\_crawl | # Nothing found on Google for this.
76 LjSEEK | # http://www.ljseek.com/ or http://ljsearch.net
77 WWW\-Mechanize |
78 larbin | # http://www.webmasterworld.com/forum11/2926.htm
79 PycURL |
80 LeapTag | # http://leaptag.com/leaptag.php
81 Syndic8 |
82 online\@monitoring\.ru |
83 Python\-httplib |
84 gooblog | # http://help.goo.ne.jp/contact/
85 facebookexternalhit |
86 heritrix | # www.kit.edu
87 web\.archive\.org |
88 Perl\-ljsm |
89 Tumblr |
90 LWP\:\:Simple |
91 Megite | # http://www.megite.com/
92 WebryReader |
93 Snoopy |
94 BTWebClient | # utorrent.com
95 Attensa |
96 Amazon\.com\ Blog\ Parser |
97 nestreader |
98 Plagger |
99 Headline\-Reader |
100 Microsoft\ URL\ Control |
101 DELCO\ READER |
102 NewsLife |
103 CaRP | # http://www.geckotribe.com/rss/carp/
104 Awasu |
105 LJSearch | # http://www.ljseek.com/ or http://ljsearch.net
106 ^NIF | # http://www.newsisfree.com/robot.php
107 StackRambler | # Russian Search Engine: http://www.rambler.ru/
108 Mail\.ru |
109 ^NewsGator |
110 Sphere\ Scout | # scout at sphere dot com
111 OpenISearch | # http://www.openisearch.com/faq.html
112 CyberPsy | # http://avalon.departament.com/lj-cyberpsy/disclaimer.html
113 WWWC | # http://www.nakka.com/wwwc/
114 Filer\.pro | # Nothing found on Google for this.
115 Yacy | # http://yacy.net/bot.html
116 Teleport\ Pro | # http://www.tenmax.com/teleport/pro/home.htm
117 ShopWiki | # http://www.shopwiki.com/wiki/Help:Bot
118 pirst
120 }ixo;
122 sub is_bot {
123 my ($class, $useragent) = @_;
125 return defined $useragent && $useragent =~ $crawler_agents;