From 368307484507c292c100c99a82e48acf2e6c01ac Mon Sep 17 00:00:00 2001 From: Kazuki Przyborowski Date: Sat, 13 Feb 2016 05:28:33 -0600 Subject: [PATCH] Added more user agents. --- pymotherless-demo.py | 10 ++++++++++ pymotherless.py | 16 +++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/pymotherless-demo.py b/pymotherless-demo.py index 89477f0..8547f64 100755 --- a/pymotherless-demo.py +++ b/pymotherless-demo.py @@ -19,7 +19,9 @@ from __future__ import division, absolute_import, print_function; import re, os, sys, pymotherless, argparse; +__project__ = pymotherless.__project__; __program_name__ = pymotherless.__program_name__; +__project_url__ = pymotherless.__project_url__; __version_info__ = pymotherless.__version_info__; __version_date_info__ = pymotherless.__version_date_info__; __version_date__ = pymotherless.__version_date__; @@ -34,12 +36,20 @@ geturls_ua_seamonkey_windows7 = pymotherless.geturls_ua_seamonkey_windows7; geturls_ua_chrome_windows7 = pymotherless.geturls_ua_chrome_windows7; geturls_ua_chromium_windows7 = pymotherless.geturls_ua_chromium_windows7; geturls_ua_internet_explorer_windows7 = pymotherless.geturls_ua_internet_explorer_windows7; +geturls_ua_pymotherless_python = pymotherless.geturls_ua_pymotherless_python; +geturls_ua_pymotherless_python_alt = pymotherless.geturls_ua_pymotherless_python_alt; +geturls_ua_googlebot_google = pymotherless.geturls_ua_googlebot_google; +geturls_ua_googlebot_google_old = pymotherless.geturls_ua_googlebot_google_old; geturls_headers = pymotherless.geturls_headers; geturls_headers_firefox_windows7 = pymotherless.geturls_headers_firefox_windows7; geturls_headers_seamonkey_windows7 = pymotherless.geturls_headers_seamonkey_windows7; geturls_headers_chrome_windows7 = pymotherless.geturls_headers_chrome_windows7; geturls_headers_chromium_windows7 = pymotherless.geturls_headers_chromium_windows7; geturls_headers_internet_explorer_windows7 = pymotherless.geturls_headers_internet_explorer_windows7; +geturls_headers_pymotherless_python = pymotherless.geturls_headers_pymotherless_python; +geturls_headers_pymotherless_python_alt = pymotherless.geturls_headers_pymotherless_python_alt; +geturls_headers_googlebot_google = pymotherless.geturls_headers_googlebot_google; +geturls_headers_googlebot_google_old = pymotherless.geturls_headers_googlebot_google_old; geturls_download_sleep = pymotherless.geturls_download_sleep; parser = argparse.ArgumentParser(description="get urls of images/videos from motherless.com", conflict_handler="resolve", add_help=True); diff --git a/pymotherless.py b/pymotherless.py index 2f0164c..ae90fde 100755 --- a/pymotherless.py +++ b/pymotherless.py @@ -17,7 +17,7 @@ ''' from __future__ import division, absolute_import, print_function; -import re, os, sys, urllib, gzip, time, datetime, argparse, cgi, subprocess, imp; +import re, os, sys, platform, urllib, gzip, time, datetime, argparse, cgi, subprocess, imp; haverequests = False; try: imp.find_module('requests'); @@ -46,6 +46,8 @@ if(sys.version[0]>="3"): import http.cookiejar as cookielib; __program_name__ = "PyMotherless"; +__project__ = __program_name__; +__project_url__ = "https://github.com/GameMaker2k/PyMotherless"; __version_info__ = (0, 3, 7, "RC 1", 1); __version_date_info__ = (2016, 2, 8, "RC 1", 1); __version_date__ = str(__version_date_info__[0])+"."+str(__version_date_info__[1]).zfill(2)+"."+str(__version_date_info__[2]).zfill(2); @@ -64,12 +66,24 @@ geturls_ua_seamonkey_windows7 = "Mozilla/5.0 (Windows NT 6.1; rv:42.0) Gecko/201 geturls_ua_chrome_windows7 = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36"; geturls_ua_chromium_windows7 = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/48.0.2564.82 Chrome/48.0.2564.82 Safari/537.36"; geturls_ua_internet_explorer_windows7 = "Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko"; +geturls_ua_pymotherless_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname=__project__, prover=__version__, prourl=__project_url__); +if(platform.python_implementation()!=""): + geturls_ua_pymotherless_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp=platform.python_implementation(), pyver=platform.python_version(), proname=__project__, prover=__version__); +if(platform.python_implementation()==""): + geturls_ua_pymotherless_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp="Python", pyver=platform.python_version(), proname=__project__, prover=__version__); +geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; +geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)"; geturls_ua = geturls_ua_firefox_windows7; geturls_headers_firefox_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_firefox_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; geturls_headers_seamonkey_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_seamonkey_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; geturls_headers_chrome_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_chrome_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; geturls_headers_chromium_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_chromium_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; geturls_headers_internet_explorer_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_internet_explorer_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; +geturls_headers_pymotherless_python = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_pymotherless_python, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; +geturls_headers_pymotherless_python_alt = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_pymotherless_python_alt, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; +geturls_headers_googlebot_google = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; +geturls_headers_googlebot_google_old = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}; + geturls_headers = geturls_headers_firefox_windows7; geturls_download_sleep = 0; -- 2.11.4.GIT