drop md4 i?86 specific asm implementations
[mldonkey.git] / src / utils / net / geoip.ml
blob6b88d3f92a7525de16b3f7c2f104feab6023c92a
1 (* Copyright 2006 z *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 (* This product includes GeoIP data created by MaxMind, available from http://maxmind.com/ *)
21 open Int64ops
22 open Printf2
23 open Gettext
24 open Bigarray
26 let _s x = _s "GeoIp" x
27 let _b x = _b "GeoIp" x
29 let verbose = ref false
31 let log_prefix = "[Geo]"
33 let lprintf_nl fmt =
34 lprintf_nl2 log_prefix fmt
36 let country_begin = 16776960
37 let state_begin_rev0 = 16700000
38 let state_begin_rev1 = 16000000
39 let structure_info_max_size = 20
41 let segment_record_length = 3
42 let standard_record_length = 3
43 let org_record_length = 4
44 let max_record_length = 4
46 type database_type =
47 DatabaseInfo_UNKNOWN
48 | DatabaseInfo_COUNTRY_EDITION
49 | DatabaseInfo_REGION_EDITION_REV0
50 | DatabaseInfo_REGION_EDITION_REV1
51 | DatabaseInfo_CITY_EDITION_REV0
52 | DatabaseInfo_CITY_EDITION_REV1
53 | DatabaseInfo_ORG_EDITION
54 | DatabaseInfo_ISP_EDITION
55 | DatabaseInfo_PROXY_EDITION
56 | DatabaseInfo_ASNUM_EDITION
57 | DatabaseInfo_NETSPEED_EDITION
58 | DatabaseInfo_DOMAIN_EDITION
60 let database_type_of_int =
61 [| DatabaseInfo_UNKNOWN;
62 DatabaseInfo_COUNTRY_EDITION;
63 DatabaseInfo_CITY_EDITION_REV1;
64 DatabaseInfo_REGION_EDITION_REV1;
65 DatabaseInfo_ISP_EDITION;
66 DatabaseInfo_ORG_EDITION;
67 DatabaseInfo_CITY_EDITION_REV0;
68 DatabaseInfo_REGION_EDITION_REV0;
69 DatabaseInfo_PROXY_EDITION;
70 DatabaseInfo_ASNUM_EDITION;
71 DatabaseInfo_NETSPEED_EDITION;
72 DatabaseInfo_DOMAIN_EDITION |]
74 let database_name dbtype =
75 match dbtype with
76 | DatabaseInfo_COUNTRY_EDITION -> "country edition"
77 | DatabaseInfo_REGION_EDITION_REV0 -> "region edition v0"
78 | DatabaseInfo_REGION_EDITION_REV1 -> "region edition v1"
79 | DatabaseInfo_CITY_EDITION_REV0 -> "city edition v0"
80 | DatabaseInfo_CITY_EDITION_REV1 -> "city edition v1"
81 | DatabaseInfo_ORG_EDITION -> "org edition"
82 | DatabaseInfo_ISP_EDITION -> "isp edition"
83 | DatabaseInfo_PROXY_EDITION -> "proxy edition"
84 | DatabaseInfo_ASNUM_EDITION -> "asnum edition"
85 | DatabaseInfo_NETSPEED_EDITION -> "netspeed edition"
86 | DatabaseInfo_DOMAIN_EDITION -> "domain edition"
87 | DatabaseInfo_UNKNOWN -> "unknown edition"
89 let country_code_array = [|
90 "--";"AP";"EU";"AD";"AE";"AF";"AG";"AI";"AL";"AM";"AN";"AO";"AQ";"AR";
91 "AS";"AT";"AU";"AW";"AZ";"BA";"BB";"BD";"BE";"BF";"BG";"BH";"BI";"BJ";
92 "BM";"BN";"BO";"BR";"BS";"BT";"BV";"BW";"BY";"BZ";"CA";"CC";"CD";"CF";
93 "CG";"CH";"CI";"CK";"CL";"CM";"CN";"CO";"CR";"CU";"CV";"CX";"CY";"CZ";
94 "DE";"DJ";"DK";"DM";"DO";"DZ";"EC";"EE";"EG";"EH";"ER";"ES";"ET";"FI";
95 "FJ";"FK";"FM";"FO";"FR";"FX";"GA";"GB";"GD";"GE";"GF";"GH";"GI";"GL";
96 "GM";"GN";"GP";"GQ";"GR";"GS";"GT";"GU";"GW";"GY";"HK";"HM";"HN";"HR";
97 "HT";"HU";"ID";"IE";"IL";"IN";"IO";"IQ";"IR";"IS";"IT";"JM";"JO";"JP";
98 "KE";"KG";"KH";"KI";"KM";"KN";"KP";"KR";"KW";"KY";"KZ";"LA";"LB";"LC";
99 "LI";"LK";"LR";"LS";"LT";"LU";"LV";"LY";"MA";"MC";"MD";"MG";"MH";"MK";
100 "ML";"MM";"MN";"MO";"MP";"MQ";"MR";"MS";"MT";"MU";"MV";"MW";"MX";"MY";
101 "MZ";"NA";"NC";"NE";"NF";"NG";"NI";"NL";"NO";"NP";"NR";"NU";"NZ";"OM";
102 "PA";"PE";"PF";"PG";"PH";"PK";"PL";"PM";"PN";"PR";"PS";"PT";"PW";"PY";
103 "QA";"RE";"RO";"RU";"RW";"SA";"SB";"SC";"SD";"SE";"SG";"SH";"SI";"SJ";
104 "SK";"SL";"SM";"SN";"SO";"SR";"ST";"SV";"SY";"SZ";"TC";"TD";"TF";"TG";
105 "TH";"TJ";"TK";"TM";"TN";"TO";"TL";"TR";"TT";"TV";"TW";"TZ";"UA";"UG";
106 "UM";"US";"UY";"UZ";"VA";"VC";"VE";"VG";"VI";"VN";"VU";"WF";"WS";"YE";
107 "YT";"RS";"ZA";"ZM";"ME";"ZW";"A1";"A2";"O1";"AX";"GG";"IM";"JE";"BL";
108 "MF"
111 let country_name_array = [|
112 "N/A";"Asia/Pacific Region";"Europe";"Andorra";"United Arab Emirates";
113 "Afghanistan";"Antigua and Barbuda";"Anguilla";"Albania";"Armenia";
114 "Netherlands Antilles";"Angola";"Antarctica";"Argentina";"American Samoa";
115 "Austria";"Australia";"Aruba";"Azerbaijan";"Bosnia and Herzegovina";
116 "Barbados";"Bangladesh";"Belgium";"Burkina Faso";"Bulgaria";"Bahrain";
117 "Burundi";"Benin";"Bermuda";"Brunei Darussalam";"Bolivia";"Brazil";"Bahamas";
118 "Bhutan";"Bouvet Island";"Botswana";"Belarus";"Belize";"Canada";
119 "Cocos (Keeling) Islands";"Congo; The Democratic Republic of the";
120 "Central African Republic";"Congo";"Switzerland";"Cote D'Ivoire";
121 "Cook Islands";"Chile";"Cameroon";"China";"Colombia";"Costa Rica";"Cuba";
122 "Cape Verde";"Christmas Island";"Cyprus";"Czech Republic";"Germany";
123 "Djibouti";"Denmark";"Dominica";"Dominican Republic";"Algeria";"Ecuador";
124 "Estonia";"Egypt";"Western Sahara";"Eritrea";"Spain";"Ethiopia";"Finland";
125 "Fiji";"Falkland Islands (Malvinas)";"Micronesia; Federated States of";
126 "Faroe Islands";"France";"France; Metropolitan";"Gabon";"United Kingdom";
127 "Grenada";"Georgia";"French Guiana";"Ghana";"Gibraltar";"Greenland";"Gambia";
128 "Guinea";"Guadeloupe";"Equatorial Guinea";"Greece";
129 "South Georgia and the South Sandwich Islands";"Guatemala";"Guam";
130 "Guinea-Bissau";"Guyana";"Hong Kong";"Heard Island and McDonald Islands";
131 "Honduras";"Croatia";"Haiti";"Hungary";"Indonesia";"Ireland";"Israel";"India";
132 "British Indian Ocean Territory";"Iraq";"Iran; Islamic Republic of";
133 "Iceland";"Italy";"Jamaica";"Jordan";"Japan";"Kenya";"Kyrgyzstan";"Cambodia";
134 "Kiribati";"Comoros";"Saint Kitts and Nevis";
135 "Korea; Democratic People's Republic of";"Korea; Republic of";"Kuwait";
136 "Cayman Islands";"Kazakstan";"Lao People's Democratic Republic";"Lebanon";
137 "Saint Lucia";"Liechtenstein";"Sri Lanka";"Liberia";"Lesotho";"Lithuania";
138 "Luxembourg";"Latvia";"Libyan Arab Jamahiriya";"Morocco";"Monaco";
139 "Moldova; Republic of";"Madagascar";"Marshall Islands";
140 "Macedonia";"Mali";"Myanmar";"Mongolia";
141 "Macau";"Northern Mariana Islands";"Martinique";"Mauritania";"Montserrat";
142 "Malta";"Mauritius";"Maldives";"Malawi";"Mexico";"Malaysia";"Mozambique";
143 "Namibia";"New Caledonia";"Niger";"Norfolk Island";"Nigeria";"Nicaragua";
144 "Netherlands";"Norway";"Nepal";"Nauru";"Niue";"New Zealand";"Oman";"Panama";
145 "Peru";"French Polynesia";"Papua New Guinea";"Philippines";"Pakistan";
146 "Poland";"Saint Pierre and Miquelon";"Pitcairn Islands";"Puerto Rico";
147 "Palestinian Territory; Occupied";"Portugal";"Palau";"Paraguay";"Qatar";
148 "Reunion";"Romania";"Russian Federation";"Rwanda";"Saudi Arabia";
149 "Solomon Islands";"Seychelles";"Sudan";"Sweden";"Singapore";"Saint Helena";
150 "Slovenia";"Svalbard and Jan Mayen";"Slovakia";"Sierra Leone";"San Marino";
151 "Senegal";"Somalia";"Suriname";"Sao Tome and Principe";"El Salvador";
152 "Syrian Arab Republic";"Swaziland";"Turks and Caicos Islands";"Chad";
153 "French Southern Territories";"Togo";"Thailand";"Tajikistan";"Tokelau";
154 "Turkmenistan";"Tunisia";"Tonga";"Timor-Leste";"Turkey";"Trinidad and Tobago";
155 "Tuvalu";"Taiwan";"Tanzania; United Republic of";"Ukraine";"Uganda";
156 "United States Minor Outlying Islands";"United States";"Uruguay";"Uzbekistan";
157 "Holy See (Vatican City State)";"Saint Vincent and the Grenadines";
158 "Venezuela";"Virgin Islands; British";"Virgin Islands; U.S.";"Vietnam";
159 "Vanuatu";"Wallis and Futuna";"Samoa";"Yemen";"Mayotte";"Serbia";
160 "South Africa";"Zambia";"Montenegro";"Zimbabwe";"Anonymous Proxy";
161 "Satellite Provider";"Other";"Aland Islands";"Guernsey";"Isle of Man";"Jersey";
162 "Saint Barthelemy";"Saint Martin"
165 let country_continent_code_array = [| "--";
166 "AS";"EU";"EU";"AS";"AS";"SA";"SA";"EU";"AS";"SA";
167 "AF";"AN";"SA";"OC";"EU";"OC";"SA";"AS";"EU";"SA";
168 "AS";"EU";"AF";"EU";"AS";"AF";"AF";"SA";"AS";"SA";
169 "SA";"SA";"AS";"AF";"AF";"EU";"SA";"NA";"AS";"AF";
170 "AF";"AF";"EU";"AF";"OC";"SA";"AF";"AS";"SA";"SA";
171 "SA";"AF";"AS";"AS";"EU";"EU";"AF";"EU";"SA";"SA";
172 "AF";"SA";"EU";"AF";"AF";"AF";"EU";"AF";"EU";"OC";
173 "SA";"OC";"EU";"EU";"EU";"AF";"EU";"SA";"AS";"SA";
174 "AF";"EU";"SA";"AF";"AF";"SA";"AF";"EU";"SA";"SA";
175 "OC";"AF";"SA";"AS";"AF";"SA";"EU";"SA";"EU";"AS";
176 "EU";"AS";"AS";"AS";"AS";"AS";"EU";"EU";"SA";"AS";
177 "AS";"AF";"AS";"AS";"OC";"AF";"SA";"AS";"AS";"AS";
178 "SA";"AS";"AS";"AS";"SA";"EU";"AS";"AF";"AF";"EU";
179 "EU";"EU";"AF";"AF";"EU";"EU";"AF";"OC";"EU";"AF";
180 "AS";"AS";"AS";"OC";"SA";"AF";"SA";"EU";"AF";"AS";
181 "AF";"NA";"AS";"AF";"AF";"OC";"AF";"OC";"AF";"SA";
182 "EU";"EU";"AS";"OC";"OC";"OC";"AS";"SA";"SA";"OC";
183 "OC";"AS";"AS";"EU";"SA";"OC";"SA";"AS";"EU";"OC";
184 "SA";"AS";"AF";"EU";"EU";"AF";"AS";"OC";"AF";"AF";
185 "EU";"AS";"AF";"EU";"EU";"EU";"AF";"EU";"AF";"AF";
186 "SA";"AF";"SA";"AS";"AF";"SA";"AF";"AF";"AF";"AS";
187 "AS";"OC";"AS";"AF";"OC";"AS";"AS";"SA";"OC";"AS";
188 "AF";"EU";"AF";"OC";"NA";"SA";"AS";"EU";"SA";"SA";
189 "SA";"SA";"AS";"OC";"OC";"OC";"AS";"AF";"EU";"AF";
190 "AF";"EU";"AF";"--";"--";"--";"EU";"EU";"EU";"EU";
191 "SA";"SA"
194 let country_continent_name_array =
195 Array.make (Array.length country_continent_code_array) "N/A"
197 let country_index = Hashtbl.create 250
198 let () =
199 Array.iteri (fun i cc ->
200 Hashtbl.add country_index cc i
201 ) country_code_array;
202 Array.iteri (fun i ccc ->
203 country_continent_name_array.(i) <- (
204 match ccc with
205 | "AF" -> "Africa"
206 | "AN" -> "Antarctica"
207 | "AS" -> "Asia"
208 | "EU" -> "Europe"
209 | "NA" -> "North America"
210 | "OC" -> "Oceania"
211 | "SA" -> "South America"
212 | _ -> "N/A"
213 )) country_continent_code_array
215 let unknown_country = ("--", "N/A")
217 type geoip_database = {
218 file: in_channel;
219 dbtype: database_type;
220 segments: int;
221 record_length: int;
222 map: (int, int8_unsigned_elt, c_layout) Array1.t;
225 let unpack filename =
226 let ext = String.lowercase (Filename2.extension filename) in
227 let last_ext = String.lowercase (Filename2.last_extension filename) in
228 let real_ext = if last_ext = ".zip" then last_ext else ext in
229 match real_ext with
230 | ".zip" ->
231 (try
232 let file =
233 Unix2.tryopen_read_zip filename (fun ic ->
235 Zip.find_entry ic "GeoIP.dat"
236 with e ->
237 lprintf_nl "Exception %s while extracting geoip.dat"
238 (Printexc2.to_string e);
239 raise e) in
241 ignore(Misc.archive_extract filename "zip");
242 let geo_file = Filename.concat "web_infos" "GeoIP.dat" in
243 (try Sys.remove geo_file with _ -> ());
244 Unix2.rename file.Zip.filename geo_file;
245 geo_file
246 with e ->
247 lprintf_nl "Exception %s while extracting geoip.dat"
248 (Printexc2.to_string e);
249 raise e
250 with e ->
251 lprintf_nl "Exception %s while opening %s"
252 (Printexc2.to_string e) filename;
253 raise Not_found)
255 | ".dat.gz" | ".dat.bz2" | ".gz" | ".bz2" ->
256 let filetype =
257 if ext = ".bz2" || ext = ".dat.bz2" then "bz2" else "gz" in
258 (try
259 let geo_file = Filename.concat "web_infos" "GeoIP.dat" in
260 let s = Misc.archive_extract filename filetype in
261 (try Sys.remove geo_file with _ -> ());
262 Unix2.rename s geo_file;
263 geo_file
264 with e ->
265 lprintf_nl "Exception %s while extracting"
266 (Printexc2.to_string e);
267 raise Not_found)
268 (* if file is not a supported archive type try loading that file anyway *)
269 | _ -> filename
271 let close_geoip_db geoip_db =
272 close_in geoip_db.file
274 let open_geoip_db filename =
276 let f = open_in filename in
277 let size = in_channel_length f in
278 let map = Misc2.map_file f in
280 let new_database ?offset dbtype =
281 let read_segment_size () =
282 match offset with
283 | None -> failwith "Can't read variable segment size without a signature"
284 | Some offset ->
285 let result = ref 0 in
286 for j = 0 to segment_record_length - 1 do
287 let k = map.{offset + j} lsl (j * 8) in
288 result := !result + k
289 done;
290 !result in
292 match dbtype with
293 | DatabaseInfo_UNKNOWN -> assert false
294 | DatabaseInfo_DOMAIN_EDITION ->
295 None; (* Missing in previous implementation! *)
296 | _ ->
297 Some {
298 file = f;
299 dbtype = dbtype;
300 segments =
301 (match dbtype with
302 | DatabaseInfo_COUNTRY_EDITION
303 | DatabaseInfo_PROXY_EDITION
304 | DatabaseInfo_NETSPEED_EDITION ->
305 country_begin
306 | DatabaseInfo_REGION_EDITION_REV0 ->
307 state_begin_rev0
308 | DatabaseInfo_REGION_EDITION_REV1 ->
309 state_begin_rev1
310 | _ ->
311 read_segment_size ());
312 record_length =
313 (match dbtype with
314 | DatabaseInfo_ORG_EDITION
315 | DatabaseInfo_ISP_EDITION
316 | DatabaseInfo_ASNUM_EDITION ->
317 org_record_length
318 | _ ->
319 standard_record_length);
320 map = map;
321 } in
323 let rec setup_types i =
324 if i >= structure_info_max_size then
325 new_database DatabaseInfo_COUNTRY_EDITION
326 else
327 let offset = size - 3 - i in
328 if map.{offset} <> 255 ||
329 map.{offset + 1} <> 255 ||
330 map.{offset + 2} <> 255 then setup_types (i + 1)
331 else
332 let type_byte = map.{offset + 3} in
333 let type_byte = if type_byte >= 106 then
334 type_byte - 105 else type_byte in
335 if type_byte < 1 || type_byte >= Array.length database_type_of_int
336 then setup_types (i + 1)
337 else
338 let dbtype = database_type_of_int.(type_byte) in
339 new_database ~offset:(offset + 4) dbtype
341 setup_types 0
342 with e ->
343 lprintf_nl "Exception %s while opening"
344 (Printexc2.to_string e);
345 None
347 let seek_country db ip =
348 let ip_long = Ip.to_int64 ip in
350 let rec dive depth offset =
351 if depth < 0 then 0 else
352 let update i =
353 match db.record_length with
354 | 3 -> (* specialized code for common case *)
355 let offset = 6 * offset + 3 * i in
356 db.map.{offset} +
357 (db.map.{offset + 1} lsl 8) +
358 (db.map.{offset + 2} lsl 16)
359 | _ ->
360 let offset = (2 * offset + i) * db.record_length in
361 let tmp = ref 0 in
362 for j = 0 to db.record_length - 1 do
363 let y = db.map.{offset + j} in
364 tmp := !tmp + (y lsl (j * 8));
365 done;
366 !tmp in
368 let swim i =
369 if i >= db.segments then i - country_begin
370 else dive (depth - 1) i in
372 let bit = if (and64 ip_long (left64 1L depth)) = 0L then 0 else 1 in
373 swim (update bit) in
375 dive 31 0
377 let current_db = ref (None: geoip_database option)
379 let active () =
380 !current_db <> None
382 let close () =
383 match !current_db with
384 | None -> ()
385 | Some db ->
386 close_geoip_db db;
387 current_db := None
389 let init filename =
390 close ();
391 current_db := open_geoip_db filename;
392 (match !current_db with
393 | None -> lprintf_nl (_b "database not loaded")
394 | Some db -> lprintf_nl (_b "%s database loaded") (database_name db.dbtype))
396 let get_country_code ip =
397 if !verbose then lprintf_nl "get_country_code %s" (Ip.to_string ip);
398 if ip = Ip.null then 0
399 else
400 match !current_db with
401 | None -> 0
402 | Some db ->
403 try seek_country db ip
404 with _ -> 0
406 let get_country_code_option ip =
407 if !verbose then lprintf_nl "get_country_code_option %s" (Ip.to_string ip);
408 if ip = Ip.null then None
409 else
410 match !current_db with
411 | None -> None
412 | Some db ->
414 let cc = seek_country db ip in
415 if cc = 0 then None else Some cc
416 with _ -> None
418 let get_country ip =
419 if !verbose then lprintf_nl "get_country %s" (Ip.to_string ip);
420 if ip = Ip.null then unknown_country
421 else
422 match !current_db with
423 | None -> unknown_country
424 | Some db ->
425 try
426 let ret = seek_country db ip in
427 if ret = 0 then unknown_country
428 else country_code_array.(ret), country_name_array.(ret);
429 with _ -> unknown_country
431 let get_country_code_name cc =
432 match cc with
433 | Some cc ->
434 country_code_array.(cc),
435 country_name_array.(cc)
436 | None -> unknown_country
438 let _ =
439 Heap.add_memstat "GeoIp" (fun level buf ->
440 match !current_db with
441 | Some db ->
442 Printf.bprintf buf " countries: %d\n" (Array.length country_code_array);
443 Printf.bprintf buf " database_type: %s\n" (database_name db.dbtype);
444 Printf.bprintf buf " map size: %d\n" (Array1.dim db.map);
445 | None -> Printf.bprintf buf " module not active\n"