added pygooglesearch.py and commit modify
[archive.git] / Apkawa / tsru / torrentru.py
blob9b4528bd0f62df3aa542ce2b8175a5e584f4583b
1 # -*- coding: utf-8 -*-
2 import urllib2, httplib,re,sys, time
3 import optparse
4 from BeautifulSoup import BeautifulSoup as bfsoup
5 from BeautifulSoup import BeautifulStoneSoup as BfSS
7 '''
8 ----------------------------
9 Имя пользователя: testdesu_2
10 Пароль: qazqaz
11 ----------------------------
12 '''
14 USER_AGENT='Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12) \
15 Gecko/20050922 Firefox/1.0.7 (Debian package 1.0.7-1)'
17 class tsru:
18 def __init__(self):
19 self.user = ''
20 self.paswd = ''
21 self.url = 'http://torrents.ru/forum'
23 def auth(self):
24 def get_cookie():
25 body = 'login_username=%s&login_password=%s&autologin=1&login=%%C2%%F5%%EE%%E4'%(self.user, self.paswd)
26 header = httplib.HTTPConnection('torrents.ru')
27 header.putrequest('POST','/forum/login.php')
28 header.putheader('Content-Length', str(len(body)))
29 header.putheader('Referer', 'http://torrents.ru/forum/index.php')
30 header.putheader('User-Agent', USER_AGENT)
31 header.putheader('Content-Type','application/x-www-form-urlencoded')
32 header.endheaders()
33 header.send(body)
34 self.cookie = header.getresponse().getheader('set-cookie')
35 return self.cookie
37 chk = None
38 count = 0
39 while not chk:
40 sys.stderr.write('Попытка авторизации №%d\r'%count)
41 count+=1
42 time.sleep(5)
43 chk = get_cookie()
45 def get_html(self,url,size=None):
46 req = urllib2.Request(url)
47 req.add_header('Cookie', self.cookie)
48 return urllib2.urlopen(req).read(size)
50 def profile(self):
51 stage1 = bfsoup(self.get_html('http://torrents.ru/forum/index.php'))
52 stage1 = self.url+'/'+stage1.find('div',attrs = {'class':'topmenu'}).find('a',{'href':re.compile('profile.php\?mode=viewprofile')}).get('href')
53 stage2 = BfSS(self.get_html(stage1))
54 html_profile = stage2.find('table', {'class':'user_profile bordered w100'})
55 print html_profile
58 def test(self,file):
59 f = open(file)
60 html = f.read()
61 f.close()
62 return html
64 def search(self, word):
65 def parse(html):
66 def extract(tBody):
67 check = tBody.find(attrs = {'class':'row1 tCenter'})
68 if check: check = check.find('span').get('class')
69 else: check = '' #tBody.find(attrs = {'class':'row1 tCenter clickable tor-status-td'}).find('span',attrs = {'class':True})
70 if check == 'tor-icon tor-not-approved' or check == 'tor-icon tor-approved':
72 temp = {
73 'topic_link': self.url+tBody.find(attrs = {'class':'genmed tLink'}).get('href')[1:],
74 'topic_text': tBody.find(attrs = {'class':'genmed tLink'}).find(text=True),
75 'status' : check,
76 'author' : tBody.find(attrs = {'class':'med'}).find(text=True), #class='med'
77 'torrent' : self.url+tBody.find(attrs = {'class':'med dLink'}).get('href')[1:], #class='med dLink'
78 'size' : tBody.find(attrs = {'class':'row4 small nowrap'}).find(text=True), #class='row4 small nowrap'
79 'seed' : tBody.find(attrs = {'class':'row4 seedmed'}).find(text=True), #class='row4 seedmed'
80 'leech' : tBody.find(attrs = {'class':'row4 leechmed'}).find(text=True), #class='row4 leechmed'
81 'downloaded': tBody.find(attrs = {'class':'row4 small'}).find(text=True), #class='row4 small'
82 'added' : ' '.join([i.find(text=True) for i in tBody.find(attrs = {'class':'row4 small nowrap','title':True}).findAll('p')])
85 for i in temp.keys():
86 temp[i] = BfSS(temp[i],convertEntities='html')
87 return temp
88 else: return False
90 soup = bfsoup(html)
91 tbody = soup.findAll('tbody')
92 result = [extract(i) for i in tbody]
93 return result
95 #url=self.url+'/tracker.php'
96 #quere = '%s?max=1&to=1&nm=%s'%(url,urllib2.quote(word))
97 #html = self.get_html(quere)
98 html = self.test('search.html')
99 _parse = parse(html)
100 return self.output(_parse)
102 def output(self, result):
103 count = 1
104 out_format_base = ' | %s |\n[%s] S: [%s] L: [%s] D: [%s] A: [%s]\n %s\n'
105 out_format_1 = '%d) %s'+out_format_base
106 out_format_2 = '%d) %.69s'+out_format_base
107 ls = []
108 for i in result:
109 if i:
110 out = len(i['topic_text'].contents[0]) > 69 and out_format_2 or out_format_1
111 out = out%(count,
112 i['topic_text'],
113 i['author'],
114 i['size'],
115 i['seed'],
116 i['leech'],
117 i['downloaded'],
118 i['added'],
119 i['topic_link'])
120 count+=1
121 ls.append(out)
122 else: pass
123 return ls
125 def optparse(args):
127 pass
129 if __name__ == '__main__':
130 s = tsru()
131 #s.auth()
132 s.search(' '.join(sys.argv[1:]))
133 #s.profile()
138 ----------------------------
139 Имя пользователя: geeqie_001
140 Пароль: qaz
141 ----------------------------
144 Cookie: bb_data=a%3A3%3A%7Bs%3A2%3A%22uk%22%3BN%3Bs%3A3%3A%22uid%22%3Bi%3A314002%3Bs%3A3%3A%22sid%22%3Bs%3A20%3A%22dBI3Kxtsy5fJTOdS5iBi%22%3B%7D; bb_sid=dBI3Kxtsy5fJTOdS5iBi; bb_isl=0
145 Auth
147 POST /forum/login.php HTTP/1.1
148 Host: torrents.ru
149 User-Agent: Mozilla/5.0 (X11; U; Linux i686; ru-RU; rv:1.9.0.2pre) Gecko/2008072703 Firefox/3.0.2pre (Swiftfox)
150 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
151 Accept-Language: ru,en-us;q=0.7,en;q=0.3
152 Accept-Encoding: gzip,deflate
153 Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7
154 Keep-Alive: 300
155 Proxy-Connection: keep-alive
156 Referer: http://torrents.ru/forum/viewforum.php?f=1813
157 Content-Type: application/x-www-form-urlencoded
158 Content-Length: 64
159 login_username=Apkawa&login_password=xerosexo&login=%C2%F5%EE%E4
161 search
163 POST /forum/tracker.php HTTP/1.1
164 Host: torrents.ru
165 User-Agent: Mozilla/5.0 (X11; U; Linux i686; ru-RU; rv:1.9.0.2pre) Gecko/2008072703 Firefox/3.0.2pre (Swiftfox)
166 Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8
167 Accept-Language: ru,en-us;q=0.7,en;q=0.3
168 Accept-Encoding: gzip,deflate
169 Accept-Charset: windows-1251,utf-8;q=0.7,*;q=0.7
170 Keep-Alive: 300
171 Proxy-Connection: keep-alive
172 Referer: http://torrents.ru/forum/index.php
173 Cookie: bb_data=a%3A3%3A%7Bs%3A2%3A%22uk%22%3BN%3Bs%3A3%3A%22uid%22%3Bi%3A314002%3Bs%3A3%3A%22sid%22%3Bs%3A20%3A%22dBI3Kxtsy5fJTOdS5iBi%22%3B%7D; bb_sid=dBI3Kxtsy5fJTOdS5iBi; bb_isl=0
174 Content-Type: application/x-www-form-urlencoded
175 Content-Length: 24
176 max=1&to=1&nm=soul+eater
178 example
180 <tbody id="tor_11248324">
181 <tr class="tCenter">
182 <td class="row1"><img src="http://static.torrents.ru/templates/default/images/icon_minipost.gif" class="icon1" alt="post" /></td>
183 <td class="row1 tCenter" title="не проверено"><span class="tor-icon tor-not-approved">*</span></td>
184 <td class="row1"><a class="gen" href="tracker.php?f=1389">Аниме (основной подраздел)</a></td>
185 <td class="row4 med tLeft">
186 <div>
187 <a class="genmed tLink" href="./viewtopic.php?t=1024117"><b>Пожиратель душ / Soul Eater (Игараси Такуя) [JAP+SUB][2008,приключ<wbr>ения, комедия, фэнтези, сёнэн, TVrip]</wbr></b></a>
188 </div>
189 </td>
190 <td class="row1"><a class="med" href="tracker.php?pid=981210">a-f14</a></td>
191 <td class="row4 med nowrap"><a class="med dLink" href="./download.php?id=873028">[<span class="dlSp"> </span><span class="bold" onclick="this.className='normal'">DL</span><span class="dlSp"> </span>]</a></td>
192 <td class="row4 small nowrap">1.47&nbsp;GB</td>
193 <td class="row4 seedmed" title="Seeders"><b>4</b></td>
194 <td class="row4 leechmed" title="Leechers"><b>5</b></td>
195 <td class="row4 small">3</td>
196 <td class="row4 small nowrap" style="padding: 1px 3px 2px;" title="Добавлен">
197 <p>23:29</p>
198 <p>8-Сен-08</p>
199 </td>
200 </tr>
201 </tbody>
204 def utf8_cp1251(text):
205 u=unicode(text, 'utf8')
206 cyrillic_cp1251=u.encode('cp1251')
207 return cyrillic_cp1251
208 def cp1251_utf8(text):
209 cyrillic_cp1251=unicode(text, 'cp1251')
210 text=cyrillic_cp1251.encode('utf8')
211 return text
213 console color
214 if [ "$USECOLOR" = "YES" -o "$USECOLOR" = "yes" ]; then
215 C_MAIN="\033[1;37;40m" # main text
216 C_OTHER="\033[1;34;40m" # prefix & brackets
217 C_SEPARATOR="\033[1;30;40m" # separator
219 C_BUSY="\033[0;36;40m" # busy
220 C_FAIL="\033[1;31;40m" # failed
221 C_DONE="\033[1;37;40m" # completed
222 C_BKGD="\033[1;35;40m" # backgrounded
224 C_H1="\033[1;37;40m" # highlight text 1
225 C_H2="\033[1;36;40m" # highlight text 2
227 C_CLEAR="\033[1;0m"