1 /** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Copyright 2008 Ledermueller Achim
4 * serc is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * **/
20 import java
.util
.zip
.GZIPInputStream
;
25 private String host
, filename
;
26 private int port
, bytes_read
;
27 private byte[] buffer
;
28 private InputStream from_server
;
29 private StringBuffer output
;
30 private PrintWriter to_server
;
34 public static String
[] searchEngines
= {
35 "http://scholar.google.com/scholar?q=",
36 "http://www.scientificcommons.org/opensearch?search_string=",
37 "http://quod.lib.umich.edu/cgi/b/bib/bib-idx?type=boolean&c=oaister&rgn1=entire+record&op2=and&rgn2=entire+record&sort=weighted+hit+frequency&submit2=search&sourceid=Mozilla-search&q1=",
38 "http://www.scirus.com/srsapp/search?q=",
39 "http://eprints.rclis.org/perl/search/simple?title_srchtype=ALL&_satisfyall=ALL&full_merge=ALL&_order=byyear&_action_search=Submit&full="
45 output
= new StringBuffer();
48 //Returns the content of 'page'+'query' as String
49 //gzip only for scientificcommon!!
50 public String
getPage(String page
, String query
, int number
, boolean gzip
) {
55 if(!url
.getProtocol().equals("http"))
56 throw new IllegalArgumentException("Protocol must be 'http:'");
60 if((port
= url
.getPort()) == -1) port
= 80;
62 //is required, because scientificcommons sends gziped even gzip;q=0 is set
63 if((filename
= url
.getFile()).equals(""))
66 filename
+= transformQuery(page
, query
, number
);
67 System
.out
.println("Filename: " + filename
);
69 Socket socket
= new Socket(host
, port
);
73 //Only for scientificcommons!!!!
74 from_server
= new GZIPInputStream(url
.openStream());
76 from_server
= socket
.getInputStream();
78 to_server
= new PrintWriter(socket
.getOutputStream());
80 to_server
.print("GET " + filename
+ " HTTP/1.1"+"\n");
81 to_server
.print("Host: " + host
+ "\n");
82 to_server
.print("User-Agent: my\n");
83 to_server
.print("Accept: text/plain,text/html\n");
84 to_server
.print("Accept-Language: en-us,en;q=0.5\n");
85 to_server
.print("Accept-Encoding: gzip;q=0,deflate;q=0,compress;q=0\n");
86 to_server
.print("Accept-Charset: ISO-8859-15\n");
87 //to_server.print("Keep-Alive: 300\n");
88 //to_server.print("Connection: keep-alive\n");
89 to_server
.print("\n");
92 buffer
= new byte[4096];
93 while((bytes_read
= from_server
.read(buffer
)) != -1)
94 output
.append(new String(buffer
, 0, bytes_read
));
99 System
.err
.println(e
);
101 //System.out.println(output.toString());
102 return output
.toString();
105 //Returns the query compatible to the in searchEngine specified Searchengine
106 private String
transformQuery(String searchEngine
, String query
, int number
){
108 if(searchEngine
.equals(Fetcher
.searchEngines
[0])){
109 query
= query
.replaceAll("\\s", "+");
110 query
+= "&num=" + number
; //max 100, if you need more use &num=100&start={0,100,200,300.....} and a the while(goOn)-loop
111 //goOn = false; //set true if you need the while(goOn)-loop
113 //scientificcommons.org/opensearch
114 else if(searchEngine
.equals(Fetcher
.searchEngines
[1])) {
115 query
= query
.replaceAll("\\s", "%20");
117 query
+= "&start="+ (++page
);
119 //gets 2 pages.... == 30 results
120 if(page
< (number
/15))
122 //www.openaoister.com
123 } else if(searchEngine
.equals(Fetcher
.searchEngines
[2])) {
124 query
= query
.replaceAll("\\s", "+");
125 query
+= "&size=" + number
;
127 } else if(searchEngine
.equals(Fetcher
.searchEngines
[3])) {
128 query
= query
.replaceAll("\\s", "+");
129 query
+= "&p=" + page
;
135 } else if(searchEngine
.equals(Fetcher
.searchEngines
[4])) {
136 query
= query
.replaceAll("\\s", "+");
137 query
+= "&_offset=" + page
;