separate udp trackers code to bTUdpTracker.mlp
[mldonkey.git] / src / networks / bittorrent / bTProtocol.ml
blob4da844e72bef42d8e806b362ad553059c2b9353a
1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 1. Download the .torrent file
23 *****************************
25 "announce" = "http://sucs.org:6969/announce";
26 "info" = {
27 "files" = [
29 "length" = 682164224;
30 "path" = [ "Mandrake91-cd1-inst.i586.iso"; ]
34 "length" = 681279488;
35 "path" = [
36 "Mandrake91-cd2-ext.i586.iso";
42 "length" = 681574400;
43 "path" = [
44 "Mandrake91-cd3-i18n.i586.iso";
51 "name" = "mandrake9.1";
52 "piece length" = 262144;
53 "pieces" = "[EAd\155ã´gÛ ÓþËf\134Ê«\025\016ô͵,1U\150À
54 \132\147îª\n%ù\\é,\012ÿC\008GÈÓd!æ¾öuL!\134Ô\016\152&\017¾\008³¢d\029Ë3\031Ï\134
55 #»×\025\137¡=¢.®\019§´\138î.ñ\151O\137Ùÿ,£ç&\019Àۢç\156.ù\150<Eªª\153\018\145\
56 149d\147[+J=º\155l\139Î\028¡dVÉ\000-\017°Å¤\013\154¼>A¹Ã5ïIt\007\020©ãÚÀÈÈ\014O®
57 ô1\152UÄ\026K\021^ãúì5Í¿ü \026\149\131q\024\015¸]Òþ£\027&\148\\ã-©\028WMÂ5...";
62 2. Extract BitTorrent information needed:
63 *****************************************
65 Metainfo files are bencoded dictionaries with the following keys -
67 'announce'
68 The url of the tracker.
70 'info'
71 This maps to a dictionary, with keys described below.
73 The 'name' key maps to a string which is the suggested name to save
74 the file (or directory) as. It is purely advisory.
76 'piece length' maps to the number of bytes in each piece the file is
77 split into. For the purposes of transfer, files are split into
78 fixed-size pieces which are all the same length except for possibly
79 the last one which may be truncated. Piece length is almost always a
80 power of two, most commonly 2^20 .
82 'pieces' maps to a string whose length is a multiple of 20. It is to
83 be subdivided into strings of length 20, each of which is the sha1
84 hash of the piece at the corresponding index.
86 There is also a key 'length' or a key 'files', but not both or
87 neither. If 'length' is present then the download represents a
88 single file, otherwise it represents a set of files which go in a
89 directory structure.
91 In the single file case, 'length' maps to the length of the file in
92 bytes.
94 For the purposes of the other keys, the multi-file case is treated
95 as only having a single file by concatenating the files in the order
96 they appear in the files list. The files list is the value 'files'
97 maps to, and is a list of dictionaries containing the following keys -
99 'length'
100 The length of the file, in bytes. 'path'
101 A list of strings corresponding to subdirectory names, the last of
102 which is the actual file name (a zero length list is an error case).
104 In the single file case, the 'name' key is the name of a file, in the
105 muliple file case, it's the name of a directory.
108 3. Contact the tracker regularly to update file information
109 ***********************************************************
111 Tracker GET requests have the following keys by HTTP:
113 'info_hash'
114 The 20 byte sha1 hash of the bencoded form of the 'info' value from
115 the metainfo file. Note that this is a substring of the metainfo
116 file. This value will almost certainly have to be escaped.
118 'peer_id'
119 A string of length 20 which this downloader uses as its id. Each
120 downloader generates its own id at random at the start of a new
121 download. This value will also almost certainly have to be escaped.
123 'ip'
124 An optional parameter giving the ip (or dns name) which this peer is
125 at. Generally used for the origin if it's on the same machine as the
126 tracker.
128 'port'
129 The port number this peer is listening on. Common behavior is for a
130 downloader to try to listen on port 6881 and if that port is taken
131 try 6882, then 6883, etc. and give up after 6889.
133 'uploaded'
134 The total amount uploaded so far, encoded in base ten ascii.
136 'downloaded'
137 The total amount downloaded so far, encoded in base ten ascii.
139 'left'
140 The number of bytes this peer still has to download, encoded in base
141 ten ascii. Note that this can't be computed from downloaded and the
142 file length since it might be a resume, and there's a chance that
143 some of the downloaded data failed an integrity check and had to be
144 re-downloaded.
146 'event'
147 This is an optional key which maps to 'started', 'completed', or
148 'stopped' (or '', which is the same as not being present).
150 ---> bencoded replu:
151 { 'failure reason' = ... }
154 'interval' = ....; (* before next request to tracker *)
155 'peers' = [
157 'peer id' = ....;
158 'ip' - ....;
159 'port' = ....;
161 ....
165 4. Contact every peer regularly
166 *******************************
168 Handshake:
170 type int = BigEndian.int32
172 --->
173 string8 (prefixed by length): "BitTorrent protocol"
174 int8[8]: reserved(zeros)
175 int8[20 bytes]: Sha1.string (Bencode.encode file.file_info)
176 int8[20 bytes]: peer id
178 <---
179 string8 (prefixed by length): "BitTorrent protocol"
180 int8[8]: reserved(zeros)
181 int8[20 bytes]: Sha1.string (Bencode.encode file.file_info)
182 int8[20 bytes]: peer id
184 ----> disconnect if sha1 don't match, or if peer id is unexpected
186 msg:
187 int: len of message (byte+payload) 0 -> keepalive sent every 2 minutes
188 byte8: opcode of message
189 int8[..]: payload
191 opcodes:
192 Connections start out choked and not interested.
194 No payload:
195 * 0 - choke: you have been blocked
196 * 1 - unchoke: you have been unblocked
197 * 2 - interested: I'm interested in downloading this file now
198 * 3 - not interested: I'm not interested in downloading this file now
199 With bencoded payload:
200 * 4 - have
201 int : index of new completed chunk
202 * 5 - bitfield:
203 string: a bitfield of bit 1 for downloaded chunks
204 byte: bits are inverted 0....7 ---> 7 .... 0
205 * 6 - request
206 int: index
207 int: begin
208 int: length (power of 2, 2 ^ 15)
209 * 7 - piece
210 int: index
211 int: begin
212 string: piece
213 * 8 - cancel: cancel a requesu
214 int: index
215 int: begin
216 int: length (power of 2, 2 ^ 15)
218 Choke/unchoke every 10 seconds
221 open BasicSocket
222 open CommonTypes
223 open Printf2
224 open CommonOptions
225 open Options
226 open Md4
227 open CommonGlobals
228 open BigEndian
229 open TcpBufferedSocket
230 open AnyEndian
231 open BTTypes
233 let log_prefix = "[BT]"
235 let lprintf_nl fmt =
236 lprintf_nl2 log_prefix fmt
238 let azureus_porttest_random = ref 0
240 type ghandler =
241 BTHeader of (gconn -> TcpBufferedSocket.t ->
242 (string * string * Sha1.t) -> unit)
243 | Reader of (gconn -> TcpBufferedSocket.t -> unit)
245 and gconn = {
246 mutable gconn_handler : ghandler;
247 mutable gconn_refill : (TcpBufferedSocket.t -> unit) list;
248 mutable gconn_close_on_write : bool;
251 module TcpMessages = struct
253 type msg =
254 | Choke
255 | Unchoke
256 | Interested
257 | NotInterested
258 | Have of int64
259 | BitField of string
260 | Request of int * int64 * int64
261 | Piece of int * int64 * string * int * int
262 | Cancel of int * int64 * int64
263 | Ping
264 | PeerID of string
266 let to_string msg =
267 match msg with
268 | Choke -> "Choke"
269 | Unchoke -> "Unchoke"
270 | Interested -> "Interested"
271 | NotInterested -> "NotInterested"
272 | Have n -> Printf.sprintf "Have %Ld" n
273 | BitField s -> Printf.sprintf "BitField %s" (String.escaped s)
274 | Request (index, offset, len) ->
275 Printf.sprintf "Request %d %Ld[%Ld]" index offset len
276 | Piece (index, offset, s, pos, len) ->
277 Printf.sprintf "Piece %d %Ld[%d]" index offset len
278 | Cancel (index, offset, len) ->
279 Printf.sprintf "Cancel %d %Ld[%Ld]" index offset len
280 | Ping -> "Ping"
281 | PeerID s -> Printf.sprintf "PeerID [%s]" (String.escaped s)
283 let parsing opcode m =
284 match opcode with
285 0 -> Choke
286 | 1 -> Unchoke
287 | 2 -> Interested
288 | 3 -> NotInterested
289 | 4 -> Have (get_uint64_32 m 0)
290 | 5 -> BitField m
291 | 6 -> Request (get_int m 0, get_uint64_32 m 4, get_uint64_32 m 8)
292 | 7 -> Piece (get_int m 0, get_uint64_32 m 4, m, 8, String.length m - 8)
293 | 8 -> Cancel (get_int m 0, get_uint64_32 m 4, get_uint64_32 m 8)
294 | -1 -> PeerID m
295 | _ -> raise Not_found
297 let buf = Buffer.create 100
299 let write msg =
300 Buffer.reset buf;
301 begin
302 buf_int buf 0;
303 match msg with
304 | Choke -> buf_int8 buf 0
305 | Unchoke -> buf_int8 buf 1
306 | Interested -> buf_int8 buf 2
307 | NotInterested -> buf_int8 buf 3
308 | Have i -> buf_int8 buf 4; buf_int64_32 buf i
309 | BitField string -> buf_int8 buf 5; Buffer.add_string buf string
310 | Request (index, pos, len) ->
311 buf_int8 buf 6;
312 buf_int buf index; buf_int64_32 buf pos; buf_int64_32 buf len
313 | Piece (num, index, s, pos, len) ->
314 buf_int8 buf 7;
315 buf_int buf num;
316 buf_int64_32 buf index;
317 Buffer.add_substring buf s pos len
319 | Cancel _ -> ()
320 | PeerID _ -> ()
321 | Ping -> ()
322 end;
323 let s = Buffer.contents buf in
324 str_int s 0 (String.length s - 4);
328 (*************************************************************************)
329 (* *)
330 (* UdpMessages *)
331 (* *)
332 (*************************************************************************)
338 module UdpMessages = struct
340 type t =
341 PingReq of int * string * string
342 | SupernodePongReq of int * string * string
343 | NodePongReq of int * string
344 | UnknownReq of int * string
345 let extract_string s pos =
346 let end_pos = String.index_from s pos '\000' in
347 String.sub s pos (end_pos - pos), pos + 1
349 let parse p =
350 match int_of_char p.[0] with
351 | 0x27 ->
352 let min_enc_type = get_int p 1 in
353 let unknown = String.sub p 5 1 in
354 let netname, pos = extract_string p 6 in
356 PingReq (min_enc_type, unknown, netname)
357 | 0x28 ->
359 let min_enc_type = get_int p 1 in
360 let unknown = String.sub p 5 6 in
361 let netname, pos = extract_string p 11 in
362 SupernodePongReq (min_enc_type, unknown, netname)
364 | 0x29 ->
365 let min_enc_type = get_int p 1 in
366 let unknown = String.sub p 5 (String.length p - 5) in
367 NodePongReq (min_enc_type, unknown)
368 | n -> UnknownReq (n, p)
370 let write p =
371 let b = Buffer.create 100 in
372 begin
373 match p with
374 | PingReq (min_enc_type, unknown, netname) ->
375 buf_int8 b 0x27;
376 buf_int b min_enc_type;
377 Buffer.add_string b unknown;
378 Buffer.add_string b netname;
379 buf_int8 b 0x00
380 | SupernodePongReq (min_enc_type, unknown, netname) ->
381 buf_int8 b 0x28;
382 buf_int b min_enc_type;
383 Buffer.add_string b unknown;
384 Buffer.add_string b netname;
385 buf_int8 b 0x00
386 | NodePongReq (min_enc_type, unknown) ->
387 buf_int8 b 0x29;
388 buf_int b min_enc_type;
389 Buffer.add_string b unknown
390 | UnknownReq (opcode, unknown) ->
391 Buffer.add_string b unknown;
392 end;
393 Buffer.contents b
395 let to_string p =
396 let b = Buffer.create 100 in
397 begin
398 match p with
399 | PingReq (min_enc_type, unknown, netname) ->
400 Printf.bprintf b "Ping (%d, " min_enc_type;
401 bprint_ints b unknown;
402 Printf.bprintf b ", %s)" netname
403 | SupernodePongReq (min_enc_type, unknown, netname) ->
404 Printf.bprintf b "SupernodePong (%d, " min_enc_type;
405 bprint_ints b unknown;
406 Printf.bprintf b ", %s)" netname
407 | NodePongReq (min_enc_type, unknown) ->
408 Printf.bprintf b "NodePong (%d, " min_enc_type;
409 bprint_ints b unknown;
410 Printf.bprintf b ")"
411 | UnknownReq (opcode, unknown) ->
412 Printf.bprintf b "Unknown \n ";
413 bprint_ints b unknown;
414 Printf.bprintf b "\n ";
415 bprint_chars b unknown;
416 Printf.bprintf b "\n"
417 end;
418 Buffer.contents b
420 let udp_send t ip port ping msg =
422 if !verbose_udp then begin
423 lprintf "Message UDP to %s:%d\n%s\n" (Ip.to_string ip) port
424 (to_string msg);
425 end;
428 let s = write msg in
429 UdpSocket.write t ping s ip port
430 with e ->
431 lprintf "FT: Exception %s in udp_send\n" (Printexc2.to_string e)
437 exception Wait_for_more of string
439 let bt_handler parse_fun handler c sock =
441 let b = TcpBufferedSocket.buf sock in
442 if not c.client_received_peer_id then
443 begin
444 (* we get and parse the peer_id here because it may
445 not be sent from trackers that test us for NAT
446 (they just wait for our handshake response and
447 then drop the connection) *)
448 if b.len >= 20 then
449 begin
450 let payload = String.sub b.buf b.pos 20 in
451 let p = parse_fun (-1) payload in
452 buf_used b 20;
453 c.client_received_peer_id <- true;
455 handler sock p;
456 with e ->
457 lprintf_nl "Exception %s in BTProtocol.parse_fun while handling peer_id"
458 (Printexc2.to_string e);
459 dump payload;
460 buf_used b b.len;
461 close sock Closed_by_user
463 else raise (Wait_for_more "peer_id");
464 (* must break the loop even if there is data, because the socket
465 could be closed beneath our feet and then b.buf seems to be zero length
466 regardless of what b.len tells (this is a bug somewhere in
467 tcpBufferedSocket i think) *)
468 raise (Wait_for_more "after_peer_id");
469 end;
470 while b.len >= 4 do
471 let msg_len = get_int b.buf b.pos in
472 if msg_len < 0 then
473 begin
474 let (ip,port) = (TcpBufferedSocket.peer_addr sock) in
475 lprintf_nl "BT: Unknown message from %s:%d dropped!! peerid:%b data_len:%i msg_len:%i software: %s"
476 (Ip.to_string ip) port c.client_received_peer_id b.len msg_len (brand_to_string c.client_brand);
477 dump (String.sub b.buf b.pos (min b.len 30));
478 buf_used b b.len;
479 close sock Closed_by_user;
481 else if msg_len > 20000 then
482 (* We NEVER request pieces greater than size 20000, this client is
483 trying to waste our bandwidth ? *)
484 begin
485 let (ip,port) = (TcpBufferedSocket.peer_addr sock) in
486 lprintf_nl "btprotocol.bt_handler: closed connection from %s:%d because of too much data!! data_len:%i msg_len:%i software: %s"
487 (Ip.to_string ip) port b.len msg_len (brand_to_string c.client_brand);
488 dump (String.sub b.buf b.pos (min b.len 30));
489 buf_used b b.len;
490 close sock Closed_by_user
492 else if b.len >= 4 + msg_len then
493 begin
494 buf_used b 4;
495 (* lprintf "Message complete: %d\n" msg_len; *)
496 if msg_len > 0 then
497 let opcode = get_int8 b.buf b.pos in
498 let payload = String.sub b.buf (b.pos+1) (msg_len-1) in
499 buf_used b msg_len;
500 (* lprintf "Opcode %d\n" opcode; *)
502 (* We use opcodes < 0 internaly and
503 they don't occur in the spec
505 if opcode < 0 then raise Not_found;
506 let p = parse_fun opcode payload in
507 (* lprintf "Parsed, calling handler\n"; *)
508 handler sock p
509 with e ->
510 lprintf_nl "Exception %s in BTProtocol.parse_fun while handling message with opcode: %d"
511 (Printexc2.to_string e) opcode;
512 dump payload;
513 else
514 (*received a ping*)
515 set_lifetime sock 130.
517 else raise (Wait_for_more "message")
518 done;
519 if b.len != 0 then raise (Wait_for_more "loop")
520 with
521 | Wait_for_more s ->
522 if closed sock && s <> "after_peer_id" then
523 lprintf_nl "bt_handler: Socket was closed while waiting for more data in %s" s
524 | e ->
525 lprintf_nl "Exception %s in bt_handler"
526 (Printexc2.to_string e)
528 let handlers info gconn =
529 let rec iter_read sock nread =
530 (* lprintf "iter_read %d\n" nread; *)
531 let b = TcpBufferedSocket.buf sock in
532 if b.len > 0 then
533 match gconn.gconn_handler with
534 | BTHeader h ->
535 (* dump (String.sub b.buf b.pos (min b.len 100)); *)
536 let slen = get_int8 b.buf b.pos in
537 if slen + 29 <= b.len then
538 begin
539 (* get proto and file_id from handshake,
540 peer_id is not fetched here because
541 it might be late or not present
543 (* let proto = String.sub b.buf (b.pos+1) slen in *)
544 let file_id = Sha1.direct_of_string
545 (String.sub b.buf (b.pos+9+slen) 20) in
546 let proto,pos = get_string8 b.buf b.pos in
547 let rbits = (String.sub b.buf (b.pos+pos) 8) in
548 buf_used b (slen+29);
549 h gconn sock (proto, rbits, file_id);
551 else
552 if (String.sub b.buf b.pos (min b.len 100)) = "NATCHECK_HANDSHAKE" then
553 write_string sock (Printf.sprintf "azureus_rand_%d" !azureus_porttest_random)
554 else if (TcpBufferedSocket.closed sock) then
555 let (ip,port) = (TcpBufferedSocket.peer_addr sock) in
556 lprintf_nl "bt-handshake: closed sock from %s:%d b.len:%i slen:%i"
557 (Ip.to_string ip) port b.len slen;
559 | Reader h ->
560 h gconn sock
562 iter_read
564 let set_bt_sock sock info ghandler =
565 let gconn = {
566 gconn_handler = ghandler;
567 gconn_refill = [];
568 gconn_close_on_write = false;
569 } in
570 TcpBufferedSocket.set_reader sock (handlers info gconn);
571 TcpBufferedSocket.set_refill sock (fun sock ->
572 match gconn.gconn_refill with
573 [] -> ()
574 | refill :: _ -> refill sock
576 TcpBufferedSocket.set_handler sock TcpBufferedSocket.WRITE_DONE (
577 fun sock ->
578 match gconn.gconn_refill with
579 [] -> ()
580 | _ :: tail ->
581 gconn.gconn_refill <- tail;
582 match tail with
583 [] ->
584 if gconn.gconn_close_on_write then
585 set_lifetime sock 30.
586 (* TcpBufferedSocket.close sock "write done" *)
587 | refill :: _ -> refill sock)
590 No payload:
591 * 0 - choke: you have been blocked
592 * 1 - unchoke: you have been unblocked
593 * 2 - interested: I'm interested in downloading this file now
594 * 3 - not interested: I'm not interested in downloading this file now
595 With bencoded payload:
596 * 4 - have
597 int : index of new completed chunk
598 * 5 - bitfield:
599 string: a bitfield of bit 1 for downloaded chunks
600 byte: bits are inverted 0....7 ---> 7 .... 0
601 * 6 - request
602 int: index
603 int: begin
604 int: length (power of 2, 2 ^ 15)
605 * 7 - piece
606 int: index
607 int: begin
608 string: piece
609 * 8 - cancel: cancel a requesu
610 int: index
611 int: begin
612 int: length (power of 2, 2 ^ 15)
615 let send_client client_sock msg =
616 do_if_connected client_sock (fun sock ->
618 let s = TcpMessages.write msg in
619 if !verbose_msg_clients then begin
620 lprintf_nl "send message: %s" (TcpMessages.to_string msg);
621 end;
622 (* dump s; *)
623 write_string sock s
624 with e ->
625 lprintf_nl "CLIENT : Error %s in send_client"
626 (Printexc2.to_string e)
629 let zero8 = String.make 8 '\000'
631 let send_init client_uid file_id sock =
632 let buf = Buffer.create 100 in
633 buf_string8 buf "BitTorrent protocol";
634 Buffer.add_string buf zero8;
635 Buffer.add_string buf (Sha1.direct_to_string file_id);
636 Buffer.add_string buf (Sha1.direct_to_string client_uid);
637 let s = Buffer.contents buf in
638 write_string sock s