From 6e01602d47df8913a610fa01be6f0bbcfdadbfc7 Mon Sep 17 00:00:00 2001 From: spiralvoice Date: Sun, 7 Nov 2010 15:01:39 +0000 Subject: [PATCH] patch #6012 --- config/Makefile.in | 56 +-- distrib/ChangeLog | 3 + src/utils/cdk/gzip.ml | 139 ++++---- src/utils/cdk/gzip.mli | 28 +- src/utils/cdk/tar.mlcpp | 6 +- src/utils/cdk/unix2.ml | 4 +- src/utils/extlib/IO.ml | 771 ++++++++++++++++++++++++++++++++++++++++++ src/utils/extlib/IO.mli | 323 ++++++++++++++++++ src/utils/net/http_client.ml | 63 +++- src/utils/net/http_client.mli | 1 + 10 files changed, 1285 insertions(+), 109 deletions(-) create mode 100644 src/utils/extlib/IO.ml create mode 100644 src/utils/extlib/IO.mli diff --git a/config/Makefile.in b/config/Makefile.in index 0ba512ea..1172353a 100644 --- a/config/Makefile.in +++ b/config/Makefile.in @@ -67,6 +67,7 @@ else endif +EXTLIB=src/utils/extlib CDK=src/utils/cdk BITSTRING=src/utils/bitstring LIB=src/utils/lib @@ -89,7 +90,7 @@ SRC_SOULSEEK=src/networks/soulseek SRC_DIRECTCONNECT=src/networks/direct_connect SRC_FILETP=src/networks/fileTP -SUBDIRS=$(CDK) $(BITSTRING) $(LIB) $(RSS) $(XML) $(NET) tools \ +SUBDIRS=$(EXTLIB) $(CDK) $(BITSTRING) $(LIB) $(RSS) $(XML) $(NET) tools \ $(COMMON) $(DRIVER) $(MP3) src/config/$(OS_FILES) INCLUDES += $(foreach file, $(SUBDIRS), -I $(file)) -I +camlp4 @@ -154,7 +155,9 @@ CDK_SRCS+= $(LIB)/fifo.ml $(CDK)/arg2.ml $(LIB)/syslog.ml $(CDK)/printf2.ml \ $(CDK)/filename2.ml $(CDK)/list2.ml $(CDK)/hashtbl2.ml \ $(CDK)/unix2.ml $(CDK)/file.ml \ $(CDK)/heap_c.c $(CDK)/array2.ml - + +EXTLIB_SRCS += $(EXTLIB)/IO.ml + ifneq ("$(PTHREAD_CFLAGS)" , "") CFLAGS += $(PTHREAD_CFLAGS) LIBS_flags += -ccopt "$(PTHREAD_CFLAGS)" @@ -508,33 +511,33 @@ DIRECT_CONNECT_SRCS= \ OBSERVER_SRCS = \ - $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ + $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ $(COMMON_SRCS) $(COMMON_CLIENT_SRCS) $(DONKEY_SRCS) \ tools/observer.ml MLD_HASH_SRCS = \ - $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ + $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ tools/mld_hash.ml OCAMLPP_SRCS = \ tools/ocamlpp.ml4 COPYSOURCES_SRCS = \ - $(CDK_SRCS) $(LIB_SRCS) tools/copysources.ml + $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) tools/copysources.ml SUBCONV_SRCS = \ - $(CDK_SRCS) $(LIB_SRCS) tools/subconv.ml + $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) tools/subconv.ml MLSPLIT_SRCS = \ - $(CDK_SRCS) $(LIB_SRCS) tools/mlsplit.ml + $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) tools/mlsplit.ml MAKE_TORRENT_SRCS = \ - $(MAGIC_SRCS) $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ + $(MAGIC_SRCS) $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ $(COMMON_SRCS) $(COMMON_CLIENT_SRCS) $(BITSTRING_SRCS) $(BITTORRENT_SRCS) \ tools/make_torrent.ml GET_RANGE_SRCS = \ - $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ + $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) \ tools/get_range.ml ifeq ("$(OPENFT)" , "yes") @@ -585,18 +588,18 @@ BITSTRING_CMXA=bitstring.cmxa BITSTRING_CMA=bitstring.cma endif MLNET_SRCS+= $(MAIN_SRCS) -MLNET_CMXA=$(CDK_CMXA) $(BITSTRING_CMXA) magic.cmxa common.cmxa client.cmxa core.cmxa driver.cmxa +MLNET_CMXA=extlib.cmxa $(CDK_CMXA) $(BITSTRING_CMXA) magic.cmxa common.cmxa client.cmxa core.cmxa driver.cmxa TESTS_CMXA=$(CDK_CMXA) magic.cmxa common.cmxa client.cmxa core.cmxa TESTS_SRCS=tools/tests.ml ifeq ("$(GUI)", "newgui2") mlnet+gui_CMXA= \ - $(BITSTRING_CMXA) magic.cmxa cdk.cmxa common.cmxa client.cmxa core.cmxa driver.cmxa \ + $(BITSTRING_CMXA) magic.cmxa extlib.cmxa cdk.cmxa common.cmxa client.cmxa core.cmxa driver.cmxa \ icons.cmxa guibase.cmxa gui.cmxa else mlnet+gui_CMXA= \ - $(BITSTRING_CMXA) magic.cmxa cdk.cmxa common.cmxa client.cmxa core.cmxa driver.cmxa \ + $(BITSTRING_CMXA) magic.cmxa extlib.cmxa cdk.cmxa common.cmxa client.cmxa core.cmxa driver.cmxa \ gmisc.cmxa icons.cmxa guibase.cmxa gui.cmxa endif @@ -610,8 +613,7 @@ mlnet+gui_SRCS=$(MAIN_SRCS) ####################################################################### -TESTRSS_SRCS= \ - $(CDK_SRCS) $(LIB_SRCS) tools/testrss.ml +TESTRSS_SRCS= $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) tools/testrss.ml ####################################################################### @@ -658,7 +660,7 @@ ifneq ("$(GUI)" , "no") endif SVG_CONVERTER_SRCS = \ - $(CDK_SRCS) $(LIB_SRCS) tools/svg_converter.ml + $(EXTLIB_SRCS) $(CDK_SRCS) $(LIB_SRCS) tools/svg_converter.ml CURSES_LIBS_byte=-cclib -lncurses CURSES_LIBS_opt=-cclib -lncurses @@ -1080,25 +1082,25 @@ OLDGUI_SRCS= \ GUI_SRCS= $($(GUI_CODE)_SRCS) ifeq ("$(GUI)", "newgui2") - MLDONKEYGUI_CMXA= cdk.cmxa common.cmxa icons.cmxa guibase.cmxa gui.cmxa + MLDONKEYGUI_CMXA= extlib.cmxa cdk.cmxa common.cmxa icons.cmxa guibase.cmxa gui.cmxa else - MLDONKEYGUI_CMXA= cdk.cmxa gmisc.cmxa common.cmxa icons.cmxa guibase.cmxa gui.cmxa + MLDONKEYGUI_CMXA= extlib.cmxa cdk.cmxa gmisc.cmxa common.cmxa icons.cmxa guibase.cmxa gui.cmxa endif MLDONKEYGUI_SRCS= $(MAIN_SRCS) ifeq ("$(GUI)", "newgui2") - STARTER_CMXA=cdk.cmxa common.cmxa icons.cmxa guibase.cmxa + STARTER_CMXA=extlib.cmxa cdk.cmxa common.cmxa icons.cmxa guibase.cmxa STARTER_SRCS= $(SRC_GUI)/guiStarter.ml else - STARTER_CMXA=cdk.cmxa + STARTER_CMXA=extlib.cmxa cdk.cmxa STARTER_SRCS= $(SRC_GUI)/gui_starter.ml endif ifeq ("$(GUI)", "newgui2") - INSTALLER_CMXA= cdk.cmxa common.cmxa icons.cmxa guibase.cmxa + INSTALLER_CMXA= extlib.cmxa cdk.cmxa common.cmxa icons.cmxa guibase.cmxa else - INSTALLER_CMXA= cdk.cmxa gmisc.cmxa common.cmxa icons.cmxa guibase.cmxa + INSTALLER_CMXA= extlib.cmxa cdk.cmxa gmisc.cmxa common.cmxa icons.cmxa guibase.cmxa endif ifeq ("$(GUI)", "newgui2") @@ -1109,7 +1111,7 @@ else $(SRC_GUI)/gui_installer_base.zog $(SRC_GUI)/gui_installer.ml endif -MLPROGRESS_CMXA= cdk.cmxa gmisc.cmxa common.cmxa icons.cmxa guibase.cmxa +MLPROGRESS_CMXA= extlib.cmxa cdk.cmxa gmisc.cmxa common.cmxa icons.cmxa guibase.cmxa MLPROGRESS_SRCS = \ $(PROGRESS_SRCS) $(MAIN_SRCS) @@ -1127,7 +1129,7 @@ top: mldonkeytop runtop: top ./mldonkeytop $(INCLUDES) -TOP_CMXA+=$(BITSTRING_CMA) cdk.cmxa magic.cmxa common.cmxa client.cmxa core.cmxa +TOP_CMXA+=$(BITSTRING_CMA) extlib.cmxa cdk.cmxa magic.cmxa common.cmxa client.cmxa core.cmxa TOP_SRCS= define([[EXPAND_LIB]],[[ @@ -1180,18 +1182,18 @@ $1+gui_CMXA+= $(BITSTRING_CMXA) endif endif -$1_CMXA+= cdk.cmxa magic.cmxa common.cmxa client.cmxa $1.cmxa driver.cmxa +$1_CMXA+= extlib.cmxa cdk.cmxa magic.cmxa common.cmxa client.cmxa $1.cmxa driver.cmxa $1_SRCS+= $(MAIN_SRCS) EXPAND_LIB($2,$1) ifeq ("$(GUI)", "newgui2") -$1+gui_CMXA+=cdk.cmxa \ +$1+gui_CMXA+=extlib.cmxa cdk.cmxa \ magic.cmxa common.cmxa client.cmxa $1.cmxa driver.cmxa \ icons.cmxa guibase.cmxa gui.cmxa else -$1+gui_CMXA+=cdk.cmxa \ +$1+gui_CMXA+=extlib.cmxa cdk.cmxa \ magic.cmxa common.cmxa client.cmxa $1.cmxa driver.cmxa \ gmisc.cmxa icons.cmxa guibase.cmxa gui.cmxa endif @@ -1210,6 +1212,7 @@ EXPAND_DRIVER(mlbt,BITTORRENT,bittorrent) EXPAND_DRIVER(mldonkey,DONKEY,donkey) EXPAND_DRIVER(mlslsk,SOULSEEK,soulseek) +libextlib_SRCS= $(EXTLIB_SRCS) libcdk_SRCS= $(CDK_SRCS) $(LIB_SRCS) $(NET_SRCS) $(MP3TAG_SRCS) libmagic_SRCS= $(MAGIC_SRCS) libbitstring_SRCS= $(BITSTRING_SRCS) @@ -1225,6 +1228,7 @@ libgui_SRCS= $(GUI_SRCS) libgui3_SRCS= $(GUI3_SRCS) libicons_SRCS= $(ALL_ICONS_SRCS) +EXPAND_LIB(libextlib,extlib) EXPAND_LIB(libicons,icons) EXPAND_LIB(libcdk,cdk) EXPAND_LIB(libmagic,magic) diff --git a/distrib/ChangeLog b/distrib/ChangeLog index ceb31b1b..6ac959ac 100644 --- a/distrib/ChangeLog +++ b/distrib/ChangeLog @@ -14,6 +14,9 @@ http://mldonkey.sourceforge.net/Windows#MinGW_Installation ChangeLog ========= +2010/11/07 +6012: http_client: Support gzip accept-encoding + content-encoding (ygrek) + 2010/11/03 7372: GTK2 GUI: Compile with lablgtk-2.14.2 by default diff --git a/src/utils/cdk/gzip.ml b/src/utils/cdk/gzip.ml index 3f2b98b4..cb956215 100644 --- a/src/utils/cdk/gzip.ml +++ b/src/utils/cdk/gzip.ml @@ -6,62 +6,64 @@ (* *) (* Copyright 2001 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) -(* under the terms of the GNU Library General Public License. *) +(* under the terms of the GNU Library General Public License, with *) +(* the special exception on linking described in file LICENSE. *) (* *) (***********************************************************************) -(* $Id$ *) +(* Origin: $ Id: gzip.ml,v 1.2 2006/04/04 08:29:07 xleroy Exp $ *) -(* Module [Gzip]: reading and writing to/from [gzip] compressed files *) +(* Module [Gzip]: reading and writing to/from [gzip] compressed streams *) exception Error of string let buffer_size = 1024 type in_channel = - { in_chan: Pervasives.in_channel; + { in_chan: IO.input; in_buffer: string; mutable in_pos: int; mutable in_avail: int; mutable in_eof: bool; in_stream: Zlib.stream; mutable in_size: int32; - mutable in_crc: int32 } + mutable in_crc: int32; + char_buffer: string } -let open_in_chan ic = +let open_in ic = (* Superficial parsing of header *) begin try - let id1 = input_byte ic in - let id2 = input_byte ic in + let id1 = IO.read_byte ic in + let id2 = IO.read_byte ic in if id1 <> 0x1F || id2 <> 0x8B then raise(Error("bad magic number, not a gzip file")); - let cm = input_byte ic in + let cm = IO.read_byte ic in if cm <> 8 then raise(Error("unknown compression method")); - let flags = input_byte ic in + let flags = IO.read_byte ic in if flags land 0xE0 <> 0 then raise(Error("bad flags, not a gzip file")); - for i = 1 to 6 do ignore(input_byte ic) done; + for i = 1 to 6 do ignore(IO.read_byte ic) done; if flags land 0x04 <> 0 then begin (* Skip extra data *) - let len1 = input_byte ic in - let len2 = input_byte ic in - for i = 1 to len1 + len2 lsl 8 do ignore(input_byte ic) done + let len1 = IO.read_byte ic in + let len2 = IO.read_byte ic in + for i = 1 to len1 + len2 lsl 8 do ignore(IO.read_byte ic) done end; if flags land 0x08 <> 0 then begin (* Skip original file name *) - while input_byte ic <> 0 do () done + while IO.read_byte ic <> 0 do () done end; if flags land 0x10 <> 0 then begin (* Skip comment *) - while input_byte ic <> 0 do () done + while IO.read_byte ic <> 0 do () done end; if flags land 0x02 <> 0 then begin (* Skip header CRC *) - ignore(input_byte ic); ignore(input_byte ic) + ignore(IO.read_byte ic); ignore(IO.read_byte ic) end - with End_of_file -> - raise(Error("premature end of file, not a gzip file")) + with IO.No_more_input -> + raise(Error("premature end of input, not a gzip stream")) end; { in_chan = ic; in_buffer = String.create buffer_size; @@ -70,19 +72,19 @@ let open_in_chan ic = in_eof = false; in_stream = Zlib.inflate_init false; in_size = Int32.zero; - in_crc = Int32.zero } + in_crc = Int32.zero; + char_buffer = String.create 1 } -let open_in filename = +let open_in_file filename = let ic = Pervasives.open_in_bin filename in try - open_in_chan ic + open_in (IO.input_channel ic) with e -> Pervasives.close_in ic; raise e let read_byte iz = if iz.in_avail = 0 then begin - let n = Pervasives.input iz.in_chan iz.in_buffer 0 + let n = IO.input iz.in_chan iz.in_buffer 0 (String.length iz.in_buffer) in - if n = 0 then raise End_of_file; iz.in_pos <- 0; iz.in_avail <- n end; @@ -103,12 +105,13 @@ let read_int32 iz = let rec input iz buf pos len = if pos < 0 || len < 0 || pos + len > String.length buf then - invalid_arg "Gzip.input"; + invalid_arg "Gzip_stream.input"; if iz.in_eof then 0 else begin if iz.in_avail = 0 then begin - let n = Pervasives.input iz.in_chan iz.in_buffer 0 - (String.length iz.in_buffer) in - if n = 0 then raise(Error("truncated file")); + let n = try IO.input iz.in_chan iz.in_buffer 0 + (String.length iz.in_buffer) + with IO.No_more_input -> raise(Error("truncated stream")) + in iz.in_pos <- 0; iz.in_avail <- n end; @@ -132,8 +135,8 @@ let rec input iz buf pos len = raise(Error("size mismatch, data corrupted")); iz.in_eof <- true; used_out - with End_of_file -> - raise(Error("truncated file")) + with IO.No_more_input -> + raise(Error("truncated stream")) end else if used_out = 0 then input iz buf pos len @@ -148,10 +151,8 @@ let rec really_input iz buf pos len = really_input iz buf (pos + n) (len - n) end -let char_buffer = String.create 1 - let input_char iz = - if input iz char_buffer 0 1 = 0 then raise End_of_file else char_buffer.[0] + if input iz iz.char_buffer 0 1 = 0 then raise End_of_file else iz.char_buffer.[0] let input_byte iz = Char.code (input_char iz) @@ -162,44 +163,50 @@ let dispose iz = let close_in iz = dispose iz; - Pervasives.close_in iz.in_chan + IO.close_in iz.in_chan -type out_channel = - { out_chan: Pervasives.out_channel; +type 'a out_channel = + { out_chan: 'a IO.output; out_buffer: string; mutable out_pos: int; mutable out_avail: int; out_stream: Zlib.stream; mutable out_size: int32; - mutable out_crc: int32 } + mutable out_crc: int32; + char_buffer: string } -let open_out_chan ?(level = 6) oc = - if level < 1 || level > 9 then invalid_arg "Gzip.open_out: bad level"; +let open_out ?(level = 6) oc = + if level < 1 || level > 9 then invalid_arg "Gzip_stream.open_output: bad level"; (* Write minimal header *) - output_byte oc 0x1F; (* ID1 *) - output_byte oc 0x8B; (* ID2 *) - output_byte oc 8; (* compression method *) - output_byte oc 0; (* flags *) - for i = 1 to 4 do output_byte oc 0 done; (* mtime *) - output_byte oc 0; (* xflags *) - output_byte oc 0xFF; (* OS (unknown) *) + IO.write_byte oc 0x1F; (* ID1 *) + IO.write_byte oc 0x8B; (* ID2 *) + IO.write_byte oc 8; (* compression method *) + IO.write_byte oc 0; (* flags *) + for i = 1 to 4 do IO.write_byte oc 0 done; (* mtime *) + IO.write_byte oc 0; (* xflags *) + IO.write_byte oc 0xFF; (* OS (unknown) *) { out_chan = oc; out_buffer = String.create buffer_size; out_pos = 0; out_avail = buffer_size; out_stream = Zlib.deflate_init level false; out_size = Int32.zero; - out_crc = Int32.zero } + out_crc = Int32.zero; + char_buffer = String.create 1 } -let open_out ?(level = 6) filename = - open_out_chan ~level (Pervasives.open_out_bin filename) +let open_out_file ?level filename = + let oc = Pervasives.open_out_bin filename in + try + open_out ?level (IO.output_channel oc) + with + exn -> Pervasives.close_out oc; raise exn let rec output oz buf pos len = if pos < 0 || len < 0 || pos + len > String.length buf then - invalid_arg "Gzip.output"; + invalid_arg "Gzip_stream.output"; (* If output buffer is full, flush it *) if oz.out_avail = 0 then begin - Pervasives.output oz.out_chan oz.out_buffer 0 oz.out_pos; + ignore (IO.really_output oz.out_chan oz.out_buffer 0 oz.out_pos); oz.out_pos <- 0; oz.out_avail <- String.length oz.out_buffer end; @@ -217,8 +224,8 @@ let rec output oz buf pos len = if used_in < len then output oz buf (pos + used_in) (len - used_in) let output_char oz c = - char_buffer.[0] <- c; - output oz char_buffer 0 1 + oz.char_buffer.[0] <- c; + output oz oz.char_buffer 0 1 let output_byte oz b = output_char oz (Char.unsafe_chr b) @@ -226,7 +233,7 @@ let output_byte oz b = let write_int32 oc n = let r = ref n in for i = 1 to 4 do - Pervasives.output_byte oc (Int32.to_int !r); + IO.write_byte oc (Int32.to_int !r); r := Int32.shift_right_logical !r 8 done @@ -234,7 +241,7 @@ let flush oz = let rec do_flush () = (* If output buffer is full, flush it *) if oz.out_avail = 0 then begin - Pervasives.output oz.out_chan oz.out_buffer 0 oz.out_pos; + ignore (IO.really_output oz.out_chan oz.out_buffer 0 oz.out_pos); oz.out_pos <- 0; oz.out_avail <- String.length oz.out_buffer end; @@ -248,7 +255,7 @@ let flush oz = do_flush(); (* Final data flush *) if oz.out_pos > 0 then - Pervasives.output oz.out_chan oz.out_buffer 0 oz.out_pos; + ignore (IO.really_output oz.out_chan oz.out_buffer 0 oz.out_pos); (* Write CRC and size *) write_int32 oz.out_chan oz.out_crc; write_int32 oz.out_chan oz.out_size; @@ -257,5 +264,23 @@ let flush oz = let close_out oz = flush oz; - Pervasives.close_out oz.out_chan + IO.close_out oz.out_chan + +let input_io io = + let iz = open_in io in + IO.create_in + ~read:(fun () -> input_char iz) + ~input:(input iz) + ~close:(fun () -> close_in iz) + +let output_io io = + let oz = open_out io in + IO.create_out + ~write:(output_char oz) + ~output:(fun s o l -> output oz s o l; l) + ~flush:(fun () -> IO.flush io) + ~close:(fun () -> close_out oz) + +let input_channel ch = input_io (IO.input_channel ch) +let output_channel ch = output_io (IO.output_channel ch) diff --git a/src/utils/cdk/gzip.mli b/src/utils/cdk/gzip.mli index 9c33ab14..9065b337 100644 --- a/src/utils/cdk/gzip.mli +++ b/src/utils/cdk/gzip.mli @@ -22,12 +22,12 @@ type in_channel (* Abstract type representing a channel opened for reading from a compressed file. *) -val open_in: string -> in_channel - (* Open a compressed file for reading. The argument is the file - name. *) -val open_in_chan: Pervasives.in_channel -> in_channel +val open_in: IO.input -> in_channel (* Open a compressed file for reading. The argument is a regular file channel already opened on the compressed file. *) +val open_in_file: string -> in_channel + (* Open a compressed file for reading. The argument is the file + name. *) val input_char: in_channel -> char (* Uncompress one character from the given channel, and return it. Raise [End_of_file] if no more compressed data is available. *) @@ -73,10 +73,10 @@ val dispose: in_channel -> unit (*** Writing to compressed files *) -type out_channel +type 'a out_channel (* Abstract type representing a channel opened for writing to a compressed file. *) -val open_out: ?level:int -> string -> out_channel +val open_out_file: ?level:int -> string -> unit out_channel (* Open a compressed file for writing. The argument is the file name. The file is created if it does not exist, or truncated to zero length if it exists. @@ -85,28 +85,28 @@ val open_out: ?level:int -> string -> out_channel (but fastest) compression and 9 being the strongest (but slowest) compression. The default level is 6 (medium compression). *) -val open_out_chan: ?level:int -> Pervasives.out_channel -> out_channel +val open_out: ?level:int -> 'a IO.output -> 'a out_channel (* Open a compressed file for writing. The argument is a regular file channel already opened on the compressed file. The optional [level] argument sets the compression level as documented for [Gzip.open_out]. *) -val output_char: out_channel -> char -> unit +val output_char: 'a out_channel -> char -> unit (* Output one character to the given compressed channel. *) -val output_byte: out_channel -> int -> unit +val output_byte: 'a out_channel -> int -> unit (* Same as [Gzip.output_char], but the output character is given by its code. The given integer is taken modulo 256. *) -val output: out_channel -> string -> int -> int -> unit +val output: 'a out_channel -> string -> int -> int -> unit (* [output oc buf pos len] compresses and writes [len] characters from string [buf], starting at offset [pos], and writes the compressed data to the channel [oc]. Raise [Invalid_argument "Gzip.output"] if [pos] and [len] do not designate a valid substring of [buf]. *) -val close_out: out_channel -> unit +val close_out: 'a out_channel -> 'a (* Close the given output channel. If the channel was created with [Gzip.open_out_chan], the underlying regular file channel (of type [Pervasives.out_channel]) is also closed. Do not apply any of the functions above to a closed channel. *) -val flush: out_channel -> unit +val flush: 'a out_channel -> unit (* Same as [Gzip.close_out], but do not close the underlying regular file channel (of type [Pervasives.out_channel]); just flush all pending compressed data and @@ -119,3 +119,7 @@ val flush: out_channel -> unit exception Error of string (* Exception raised by the functions above to signal errors during compression or decompression, or ill-formed input files. *) + +val input_io : IO.input -> IO.input +val output_io : 'a IO.output -> 'a IO.output + diff --git a/src/utils/cdk/tar.mlcpp b/src/utils/cdk/tar.mlcpp index a9bccb5c..d254afab 100644 --- a/src/utils/cdk/tar.mlcpp +++ b/src/utils/cdk/tar.mlcpp @@ -75,7 +75,7 @@ end let open_inchan comp chan = match comp with | `Plain -> new in_chan chan - | `Gzip -> new gzin_chan (Gzip.open_in_chan chan) + | `Gzip -> new gzin_chan (Gzip.open_in (IO.input_channel chan)) | `Bzip2 -> #ifdef USE_BZIP2 new bzin_chan (Bzip2.open_in_chan chan) @@ -286,7 +286,7 @@ end class gzout_chan o = object method output str pos len = Gzip.output o str pos len method flush () = Gzip.flush o - method close () = Gzip.close_out o + method close () = (Gzip.close_out o : unit) end #ifdef USE_BZIP2 @@ -301,7 +301,7 @@ end let open_outchan comp chan = match comp with | `Plain -> new out_chan chan - | `Gzip -> new gzout_chan (Gzip.open_out_chan chan) + | `Gzip -> new gzout_chan (Gzip.open_out (IO.output_channel chan)) | `Bzip2 -> #ifdef USE_BZIP2 new bzout_chan (Bzip2.open_out_chan chan) diff --git a/src/utils/cdk/unix2.ml b/src/utils/cdk/unix2.ml index c9bf4a45..a7939056 100644 --- a/src/utils/cdk/unix2.ml +++ b/src/utils/cdk/unix2.ml @@ -52,9 +52,9 @@ let tryopen_read_tar fn f = let tryopen_write_tar ?compress fn f = tryopen (Tar.open_out ?compress) Tar.close_out fn f let tryopen_read_gzip fn f = - tryopen Gzip.open_in Gzip.close_in fn f + tryopen Gzip.open_in_file Gzip.close_in fn f let tryopen_write_gzip ?level fn f = - tryopen (Gzip.open_out ?level) Gzip.close_out fn f + tryopen (Gzip.open_out_file ?level) Gzip.close_out fn f let tryopen_umask temp_umask f = (* Unix.umask is not implemented on MinGW *) let safe_umask umask = try Unix.umask umask with Invalid_argument _ -> 0 in diff --git a/src/utils/extlib/IO.ml b/src/utils/extlib/IO.ml new file mode 100644 index 00000000..3b78d10f --- /dev/null +++ b/src/utils/extlib/IO.ml @@ -0,0 +1,771 @@ +(* + * IO - Abstract input/output + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +type input = { + mutable in_read : unit -> char; + mutable in_input : string -> int -> int -> int; + mutable in_close : unit -> unit; +} + +type 'a output = { + mutable out_write : char -> unit; + mutable out_output : string -> int -> int -> int; + mutable out_close : unit -> 'a; + mutable out_flush : unit -> unit; +} + +exception No_more_input +exception Input_closed +exception Output_closed + +(* -------------------------------------------------------------- *) +(* API *) + +let default_close = (fun () -> ()) + +let create_in ~read ~input ~close = + { + in_read = read; + in_input = input; + in_close = close; + } + +let create_out ~write ~output ~flush ~close = + { + out_write = write; + out_output = output; + out_close = close; + out_flush = flush; + } + +let read i = i.in_read() + +let nread i n = + if n < 0 then invalid_arg "IO.nread"; + if n = 0 then + "" + else + let s = String.create n in + let l = ref n in + let p = ref 0 in + try + while !l > 0 do + let r = i.in_input s !p !l in + if r = 0 then raise No_more_input; + p := !p + r; + l := !l - r; + done; + s + with + No_more_input as e -> + if !p = 0 then raise e; + String.sub s 0 !p + +let really_output o s p l' = + let sl = String.length s in + if p + l' > sl || p < 0 || l' < 0 then invalid_arg "IO.really_output"; + let l = ref l' in + let p = ref p in + while !l > 0 do + let w = o.out_output s !p !l in + if w = 0 then raise Sys_blocked_io; + p := !p + w; + l := !l - w; + done; + l' + +let input i s p l = + let sl = String.length s in + if p + l > sl || p < 0 || l < 0 then invalid_arg "IO.input"; + if l = 0 then + 0 + else + i.in_input s p l + +let really_input i s p l' = + let sl = String.length s in + if p + l' > sl || p < 0 || l' < 0 then invalid_arg "IO.really_input"; + let l = ref l' in + let p = ref p in + while !l > 0 do + let r = i.in_input s !p !l in + if r = 0 then raise Sys_blocked_io; + p := !p + r; + l := !l - r; + done; + l' + +let really_nread i n = + if n < 0 then invalid_arg "IO.really_nread"; + if n = 0 then "" + else + let s = String.create n + in + ignore(really_input i s 0 n); + s + +let close_in i = + let f _ = raise Input_closed in + i.in_close(); + i.in_read <- f; + i.in_input <- f; + i.in_close <- f + +let write o x = o.out_write x + +let nwrite o s = + let p = ref 0 in + let l = ref (String.length s) in + while !l > 0 do + let w = o.out_output s !p !l in + if w = 0 then raise Sys_blocked_io; + p := !p + w; + l := !l - w; + done + +let output o s p l = + let sl = String.length s in + if p + l > sl || p < 0 || l < 0 then invalid_arg "IO.output"; + o.out_output s p l + +let printf o fmt = + Printf.kprintf (fun s -> nwrite o s) fmt + +let flush o = o.out_flush() + +let close_out o = + let f _ = raise Output_closed in + let r = o.out_close() in + o.out_write <- f; + o.out_output <- f; + o.out_close <- f; + o.out_flush <- f; + r + +let read_all i = + let maxlen = 1024 in + let str = ref [] in + let pos = ref 0 in + let rec loop() = + let s = nread i maxlen in + str := (s,!pos) :: !str; + pos := !pos + String.length s; + loop() + in + try + loop() + with + No_more_input -> + let buf = String.create !pos in + List.iter (fun (s,p) -> + String.unsafe_blit s 0 buf p (String.length s) + ) !str; + buf + +let pos_in i = + let p = ref 0 in + { + in_read = (fun () -> + let c = i.in_read() in + incr p; + c + ); + in_input = (fun s sp l -> + let n = i.in_input s sp l in + p := !p + n; + n + ); + in_close = i.in_close + } , (fun () -> !p) + +let pos_out o = + let p = ref 0 in + { + out_write = (fun c -> + o.out_write c; + incr p + ); + out_output = (fun s sp l -> + let n = o.out_output s sp l in + p := !p + n; + n + ); + out_close = o.out_close; + out_flush = o.out_flush; + } , (fun () -> !p) + +(* -------------------------------------------------------------- *) +(* Standard IO *) + +let input_string s = + let pos = ref 0 in + let len = String.length s in + { + in_read = (fun () -> + if !pos >= len then raise No_more_input; + let c = String.unsafe_get s !pos in + incr pos; + c + ); + in_input = (fun sout p l -> + if !pos >= len then raise No_more_input; + let n = (if !pos + l > len then len - !pos else l) in + String.unsafe_blit s !pos sout p n; + pos := !pos + n; + n + ); + in_close = (fun () -> ()); + } + +let output_string() = + let b = Buffer.create 0 in + { + out_write = (fun c -> + Buffer.add_char b c + ); + out_output = (fun s p l -> + Buffer.add_substring b s p l; + l + ); + out_close = (fun () -> Buffer.contents b); + out_flush = (fun () -> ()); + } + +let input_channel ch = + { + in_read = (fun () -> + try + input_char ch + with + End_of_file -> raise No_more_input + ); + in_input = (fun s p l -> + let n = Pervasives.input ch s p l in + if n = 0 then raise No_more_input; + n + ); + in_close = (fun () -> Pervasives.close_in ch); + } + +let output_channel ch = + { + out_write = (fun c -> output_char ch c); + out_output = (fun s p l -> Pervasives.output ch s p l; l); + out_close = (fun () -> Pervasives.close_out ch); + out_flush = (fun () -> Pervasives.flush ch); + } + +(* +let input_enum e = + let pos = ref 0 in + { + in_read = (fun () -> + match Enum.get e with + | None -> raise No_more_input + | Some c -> + incr pos; + c + ); + in_input = (fun s p l -> + let rec loop p l = + if l = 0 then + 0 + else + match Enum.get e with + | None -> l + | Some c -> + String.unsafe_set s p c; + loop (p + 1) (l - 1) + in + let k = loop p l in + if k = l then raise No_more_input; + l - k + ); + in_close = (fun () -> ()); + } + +let output_enum() = + let b = Buffer.create 0 in + { + out_write = (fun x -> + Buffer.add_char b x + ); + out_output = (fun s p l -> + Buffer.add_substring b s p l; + l + ); + out_close = (fun () -> + let s = Buffer.contents b in + ExtString.String.enum s + ); + out_flush = (fun () -> ()); + } +*) + +let pipe() = + let input = ref "" in + let inpos = ref 0 in + let output = Buffer.create 0 in + let flush() = + input := Buffer.contents output; + inpos := 0; + Buffer.reset output; + if String.length !input = 0 then raise No_more_input + in + let read() = + if !inpos = String.length !input then flush(); + let c = String.unsafe_get !input !inpos in + incr inpos; + c + in + let input s p l = + if !inpos = String.length !input then flush(); + let r = (if !inpos + l > String.length !input then String.length !input - !inpos else l) in + String.unsafe_blit !input !inpos s p r; + inpos := !inpos + r; + r + in + let write c = + Buffer.add_char output c + in + let output s p l = + Buffer.add_substring output s p l; + l + in + let input = { + in_read = read; + in_input = input; + in_close = (fun () -> ()); + } in + let output = { + out_write = write; + out_output = output; + out_close = (fun () -> ()); + out_flush = (fun () -> ()); + } in + input , output + +external cast_output : 'a output -> unit output = "%identity" + +(* -------------------------------------------------------------- *) +(* BINARY APIs *) + +exception Overflow of string + +let read_byte i = int_of_char (i.in_read()) + +let read_signed_byte i = + let c = int_of_char (i.in_read()) in + if c land 128 <> 0 then + c - 256 + else + c + +let read_string i = + let b = Buffer.create 8 in + let rec loop() = + let c = i.in_read() in + if c <> '\000' then begin + Buffer.add_char b c; + loop(); + end; + in + loop(); + Buffer.contents b + +let read_line i = + let b = Buffer.create 8 in + let cr = ref false in + let rec loop() = + let c = i.in_read() in + match c with + | '\n' -> + () + | '\r' -> + cr := true; + loop() + | _ when !cr -> + cr := false; + Buffer.add_char b '\r'; + Buffer.add_char b c; + loop(); + | _ -> + Buffer.add_char b c; + loop(); + in + try + loop(); + Buffer.contents b + with + No_more_input -> + if !cr then Buffer.add_char b '\r'; + if Buffer.length b > 0 then + Buffer.contents b + else + raise No_more_input + +let read_ui16 i = + let ch1 = read_byte i in + let ch2 = read_byte i in + ch1 lor (ch2 lsl 8) + +let read_i16 i = + let ch1 = read_byte i in + let ch2 = read_byte i in + let n = ch1 lor (ch2 lsl 8) in + if ch2 land 128 <> 0 then + n - 65536 + else + n + +let read_i32 ch = + let ch1 = read_byte ch in + let ch2 = read_byte ch in + let ch3 = read_byte ch in + let ch4 = read_byte ch in + if ch4 land 128 <> 0 then begin + if ch4 land 64 = 0 then raise (Overflow "read_i32"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor ((ch4 land 127) lsl 24) + end else begin + if ch4 land 64 <> 0 then raise (Overflow "read_i32"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor (ch4 lsl 24) + end + +let read_real_i32 ch = + let ch1 = read_byte ch in + let ch2 = read_byte ch in + let ch3 = read_byte ch in + let base = Int32.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + let big = Int32.shift_left (Int32.of_int (read_byte ch)) 24 in + Int32.logor base big + +let read_i64 ch = + let ch1 = read_byte ch in + let ch2 = read_byte ch in + let ch3 = read_byte ch in + let ch4 = read_byte ch in + let base = Int64.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + let small = Int64.logor base (Int64.shift_left (Int64.of_int ch4) 24) in + let big = Int64.of_int32 (read_real_i32 ch) in + Int64.logor (Int64.shift_left big 32) small + +let read_double ch = + Int64.float_of_bits (read_i64 ch) + +let write_byte o n = + (* doesn't test bounds of n in order to keep semantics of Pervasives.output_byte *) + write o (Char.unsafe_chr (n land 0xFF)) + +let write_string o s = + nwrite o s; + write o '\000' + +let write_line o s = + nwrite o s; + write o '\n' + +let write_ui16 ch n = + if n < 0 || n > 0xFFFF then raise (Overflow "write_ui16"); + write_byte ch n; + write_byte ch (n lsr 8) + +let write_i16 ch n = + if n < -0x8000 || n > 0x7FFF then raise (Overflow "write_i16"); + if n < 0 then + write_ui16 ch (65536 + n) + else + write_ui16 ch n + +let write_i32 ch n = + write_byte ch n; + write_byte ch (n lsr 8); + write_byte ch (n lsr 16); + write_byte ch (n asr 24) + +let write_real_i32 ch n = + let base = Int32.to_int n in + let big = Int32.to_int (Int32.shift_right_logical n 24) in + write_byte ch base; + write_byte ch (base lsr 8); + write_byte ch (base lsr 16); + write_byte ch big + +let write_i64 ch n = + write_real_i32 ch (Int64.to_int32 n); + write_real_i32 ch (Int64.to_int32 (Int64.shift_right_logical n 32)) + +let write_double ch f = + write_i64 ch (Int64.bits_of_float f) + +(* -------------------------------------------------------------- *) +(* Big Endians *) + +module BigEndian = struct + +let read_ui16 i = + let ch2 = read_byte i in + let ch1 = read_byte i in + ch1 lor (ch2 lsl 8) + +let read_i16 i = + let ch2 = read_byte i in + let ch1 = read_byte i in + let n = ch1 lor (ch2 lsl 8) in + if ch2 land 128 <> 0 then + n - 65536 + else + n + +let read_i32 ch = + let ch4 = read_byte ch in + let ch3 = read_byte ch in + let ch2 = read_byte ch in + let ch1 = read_byte ch in + if ch4 land 128 <> 0 then begin + if ch4 land 64 = 0 then raise (Overflow "read_i32"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor ((ch4 land 127) lsl 24) + end else begin + if ch4 land 64 <> 0 then raise (Overflow "read_i32"); + ch1 lor (ch2 lsl 8) lor (ch3 lsl 16) lor (ch4 lsl 24) + end + +let read_real_i32 ch = + let big = Int32.shift_left (Int32.of_int (read_byte ch)) 24 in + let ch3 = read_byte ch in + let ch2 = read_byte ch in + let ch1 = read_byte ch in + let base = Int32.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + Int32.logor base big + +let read_i64 ch = + let big = Int64.of_int32 (read_real_i32 ch) in + let ch4 = read_byte ch in + let ch3 = read_byte ch in + let ch2 = read_byte ch in + let ch1 = read_byte ch in + let base = Int64.of_int (ch1 lor (ch2 lsl 8) lor (ch3 lsl 16)) in + let small = Int64.logor base (Int64.shift_left (Int64.of_int ch4) 24) in + Int64.logor (Int64.shift_left big 32) small + +let read_double ch = + Int64.float_of_bits (read_i64 ch) + +let write_ui16 ch n = + if n < 0 || n > 0xFFFF then raise (Overflow "write_ui16"); + write_byte ch (n lsr 8); + write_byte ch n + +let write_i16 ch n = + if n < -0x8000 || n > 0x7FFF then raise (Overflow "write_i16"); + if n < 0 then + write_ui16 ch (65536 + n) + else + write_ui16 ch n + +let write_i32 ch n = + write_byte ch (n asr 24); + write_byte ch (n lsr 16); + write_byte ch (n lsr 8); + write_byte ch n + +let write_real_i32 ch n = + let base = Int32.to_int n in + let big = Int32.to_int (Int32.shift_right_logical n 24) in + write_byte ch big; + write_byte ch (base lsr 16); + write_byte ch (base lsr 8); + write_byte ch base + +let write_i64 ch n = + write_real_i32 ch (Int64.to_int32 (Int64.shift_right_logical n 32)); + write_real_i32 ch (Int64.to_int32 n) + +let write_double ch f = + write_i64 ch (Int64.bits_of_float f) + +end + +(* -------------------------------------------------------------- *) +(* Bits API *) + +type 'a bc = { + ch : 'a; + mutable nbits : int; + mutable bits : int; +} + +type in_bits = input bc +type out_bits = unit output bc + +exception Bits_error + +let input_bits ch = + { + ch = ch; + nbits = 0; + bits = 0; + } + +let output_bits ch = + { + ch = cast_output ch; + nbits = 0; + bits = 0; + } + +let rec read_bits b n = + if b.nbits >= n then begin + let c = b.nbits - n in + let k = (b.bits asr c) land ((1 lsl n) - 1) in + b.nbits <- c; + k + end else begin + let k = read_byte b.ch in + if b.nbits >= 24 then begin + if n >= 31 then raise Bits_error; + let c = 8 + b.nbits - n in + let d = b.bits land ((1 lsl b.nbits) - 1) in + let d = (d lsl (8 - c)) lor (k lsr c) in + b.bits <- k; + b.nbits <- c; + d + end else begin + b.bits <- (b.bits lsl 8) lor k; + b.nbits <- b.nbits + 8; + read_bits b n; + end + end + +let drop_bits b = + b.nbits <- 0 + +let rec write_bits b ~nbits x = + let n = nbits in + if n + b.nbits >= 32 then begin + if n > 31 then raise Bits_error; + let n2 = 32 - b.nbits - 1 in + let n3 = n - n2 in + write_bits b ~nbits:n2 (x asr n3); + write_bits b ~nbits:n3 (x land ((1 lsl n3) - 1)); + end else begin + if n < 0 then raise Bits_error; + if (x < 0 || x > (1 lsl n - 1)) && n <> 31 then raise Bits_error; + b.bits <- (b.bits lsl n) lor x; + b.nbits <- b.nbits + n; + while b.nbits >= 8 do + b.nbits <- b.nbits - 8; + write_byte b.ch (b.bits asr b.nbits) + done + end + +let flush_bits b = + if b.nbits > 0 then write_bits b (8 - b.nbits) 0 + +(* -------------------------------------------------------------- *) +(* Generic IO *) + +class in_channel ch = + object + method input s pos len = input ch s pos len + method close_in() = close_in ch + end + +class out_channel ch = + object + method output s pos len = output ch s pos len + method flush() = flush ch + method close_out() = ignore(close_out ch) + end + +class in_chars ch = + object + method get() = try read ch with No_more_input -> raise End_of_file + method close_in() = close_in ch + end + +class out_chars ch = + object + method put t = write ch t + method flush() = flush ch + method close_out() = ignore(close_out ch) + end + +let from_in_channel ch = + let cbuf = String.create 1 in + let read() = + try + if ch#input cbuf 0 1 = 0 then raise Sys_blocked_io; + String.unsafe_get cbuf 0 + with + End_of_file -> raise No_more_input + in + let input s p l = + ch#input s p l + in + create_in + ~read + ~input + ~close:ch#close_in + +let from_out_channel ch = + let cbuf = String.create 1 in + let write c = + String.unsafe_set cbuf 0 c; + if ch#output cbuf 0 1 = 0 then raise Sys_blocked_io; + in + let output s p l = + ch#output s p l + in + create_out + ~write + ~output + ~flush:ch#flush + ~close:ch#close_out + +let from_in_chars ch = + let input s p l = + let i = ref 0 in + try + while !i < l do + String.unsafe_set s (p + !i) (ch#get()); + incr i + done; + l + with + End_of_file when !i > 0 -> + !i + in + create_in + ~read:ch#get + ~input + ~close:ch#close_in + +let from_out_chars ch = + let output s p l = + for i = p to p + l - 1 do + ch#put (String.unsafe_get s i) + done; + l + in + create_out + ~write:ch#put + ~output + ~flush:ch#flush + ~close:ch#close_out diff --git a/src/utils/extlib/IO.mli b/src/utils/extlib/IO.mli new file mode 100644 index 00000000..fe3522dd --- /dev/null +++ b/src/utils/extlib/IO.mli @@ -0,0 +1,323 @@ +(* + * IO - Abstract input/output + * Copyright (C) 2003 Nicolas Cannasse + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version, + * with the special exception on linking described in file LICENSE. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + *) + +(** High-order abstract I/O. + + IO module simply deals with abstract inputs/outputs. It provides a + set of methods for working with these IO as well as several + constructors that enable to write to an underlying channel, buffer, + or enum. +*) + +type input +(** The abstract input type. *) + +type 'a output +(** The abstract output type, ['a] is the accumulator data, it is returned + when the [close_out] function is called. *) + +exception No_more_input +(** This exception is raised when reading on an input with the [read] or + [nread] functions while there is no available token to read. *) + +exception Input_closed +(** This exception is raised when reading on a closed input. *) + +exception Output_closed +(** This exception is raised when reading on a closed output. *) + +(** {6 Standard API} *) + +val read : input -> char +(** Read a single char from an input or raise [No_more_input] if + no input available. *) + +val nread : input -> int -> string +(** [nread i n] reads a string of size up to [n] from an input. + The function will raise [No_more_input] if no input is available. + It will raise [Invalid_argument] if [n] < 0. *) + +val really_nread : input -> int -> string +(** [really_nread i n] reads a string of exactly [n] characters + from the input. Raises [No_more_input] if at least [n] characters are + not available. Raises [Invalid_argument] if [n] < 0. *) + +val input : input -> string -> int -> int -> int +(** [input i s p l] reads up to [l] characters from the given input, storing + them in string [s], starting at character number [p]. It returns the actual + number of characters read or raise [No_more_input] if no character can be + read. It will raise [Invalid_argument] if [p] and [l] do not designate a + valid substring of [s]. *) + +val really_input : input -> string -> int -> int -> int +(** [really_input i s p l] reads exactly [l] characters from the given input, + storing them in the string [s], starting at position [p]. For consistency with + {!IO.input} it returns [l]. Raises [No_more_input] if at [l] characters are + not available. Raises [Invalid_argument] if [p] and [l] do not designate a + valid substring of [s]. *) + +val close_in : input -> unit +(** Close the input. It can no longer be read from. *) + +val write : 'a output -> char -> unit +(** Write a single char to an output. *) + +val nwrite : 'a output -> string -> unit +(** Write a string to an output. *) + +val output : 'a output -> string -> int -> int -> int +(** [output o s p l] writes up to [l] characters from string [s], starting at + offset [p]. It returns the number of characters written. It will raise + [Invalid_argument] if [p] and [l] do not designate a valid substring of [s]. *) + +val really_output : 'a output -> string -> int -> int -> int +(** [really_output o s p l] writes exactly [l] characters from string [s] onto + the the output, starting with the character at offset [p]. For consistency with + {!IO.output} it returns [l]. Raises [Invalid_argument] if [p] and [l] do not + designate a valid substring of [s]. *) + +val flush : 'a output -> unit +(** Flush an output. *) + +val close_out : 'a output -> 'a +(** Close the output and return its accumulator data. + It can no longer be written. *) + +(** {6 Creation of IO Inputs/Outputs} *) + +val input_string : string -> input +(** Create an input that will read from a string. *) + +val output_string : unit -> string output +(** Create an output that will write into a string in an efficient way. + When closed, the output returns all the data written into it. *) + +val input_channel : in_channel -> input +(** Create an input that will read from a channel. *) + +val output_channel : out_channel -> unit output +(** Create an output that will write into a channel. *) + +(* +val input_enum : char Enum.t -> input +(** Create an input that will read from an [enum]. *) + +val output_enum : unit -> char Enum.t output +(** Create an output that will write into an [enum]. The + final enum is returned when the output is closed. *) +*) + +val create_in : + read:(unit -> char) -> + input:(string -> int -> int -> int) -> close:(unit -> unit) -> input +(** Fully create an input by giving all the needed functions. *) + +val create_out : + write:(char -> unit) -> + output:(string -> int -> int -> int) -> + flush:(unit -> unit) -> close:(unit -> 'a) -> 'a output +(** Fully create an output by giving all the needed functions. *) + +(** {6 Utilities} *) + +val printf : 'a output -> ('b, unit, string, unit) format4 -> 'b +(** The printf function works for any output. *) + +val read_all : input -> string +(** read all the contents of the input until [No_more_input] is raised. *) + +val pipe : unit -> input * unit output +(** Create a pipe between an input and an ouput. Data written from + the output can be read from the input. *) + +val pos_in : input -> input * (unit -> int) +(** Create an input that provide a count function of the number of bytes + read from it. *) + +val pos_out : 'a output -> 'a output * (unit -> int) +(** Create an output that provide a count function of the number of bytes + written through it. *) + +external cast_output : 'a output -> unit output = "%identity" +(** You can safely transform any output to an unit output in a safe way + by using this function. *) + +(** {6 Binary files API} + + Here is some API useful for working with binary files, in particular + binary files generated by C applications. By default, encoding of + multibyte integers is low-endian. The BigEndian module provide multibyte + operations with other encoding. +*) + +exception Overflow of string +(** Exception raised when a read or write operation cannot be completed. *) + +val read_byte : input -> int +(** Read an unsigned 8-bit integer. *) + +val read_signed_byte : input -> int +(** Read an signed 8-bit integer. *) + +val read_ui16 : input -> int +(** Read an unsigned 16-bit word. *) + +val read_i16 : input -> int +(** Read a signed 16-bit word. *) + +val read_i32 : input -> int +(** Read a signed 32-bit integer. Raise [Overflow] if the + read integer cannot be represented as a Caml 31-bit integer. *) + +val read_real_i32 : input -> int32 +(** Read a signed 32-bit integer as an OCaml int32. *) + +val read_i64 : input -> int64 +(** Read a signed 64-bit integer as an OCaml int64. *) + +val read_double : input -> float +(** Read an IEEE double precision floating point value. *) + +val read_string : input -> string +(** Read a null-terminated string. *) + +val read_line : input -> string +(** Read a LF or CRLF terminated string. *) + +val write_byte : 'a output -> int -> unit +(** Write an unsigned 8-bit byte. *) + +val write_ui16 : 'a output -> int -> unit +(** Write an unsigned 16-bit word. *) + +val write_i16 : 'a output -> int -> unit +(** Write a signed 16-bit word. *) + +val write_i32 : 'a output -> int -> unit +(** Write a signed 32-bit integer. *) + +val write_real_i32 : 'a output -> int32 -> unit +(** Write an OCaml int32. *) + +val write_i64 : 'a output -> int64 -> unit +(** Write an OCaml int64. *) + +val write_double : 'a output -> float -> unit +(** Write an IEEE double precision floating point value. *) + +val write_string : 'a output -> string -> unit +(** Write a string and append an null character. *) + +val write_line : 'a output -> string -> unit +(** Write a line and append a LF (it might be converted + to CRLF on some systems depending on the underlying IO). *) + +(** Same as operations above, but use big-endian encoding *) +module BigEndian : +sig + + val read_ui16 : input -> int + val read_i16 : input -> int + val read_i32 : input -> int + val read_real_i32 : input -> int32 + val read_i64 : input -> int64 + val read_double : input -> float + + val write_ui16 : 'a output -> int -> unit + val write_i16 : 'a output -> int -> unit + val write_i32 : 'a output -> int -> unit + val write_real_i32 : 'a output -> int32 -> unit + val write_i64 : 'a output -> int64 -> unit + val write_double : 'a output -> float -> unit + +end + +(** {6 Bits API} + + This enable you to read and write from an IO bit-by-bit or several bits + at the same time. +*) + +type in_bits +type out_bits + +exception Bits_error + +val input_bits : input -> in_bits +(** Read bits from an input *) + +val output_bits : 'a output -> out_bits +(** Write bits to an output *) + +val read_bits : in_bits -> int -> int +(** Read up to 31 bits, raise Bits_error if n < 0 or n > 31 *) + +val write_bits : out_bits -> nbits:int -> int -> unit +(** Write up to 31 bits represented as a value, raise Bits_error if nbits < 0 + or nbits > 31 or the value representation excess nbits. *) + +val flush_bits : out_bits -> unit +(** Flush remaining unwritten bits, adding up to 7 bits which values 0. *) + +val drop_bits : in_bits -> unit +(** Drop up to 7 buffered bits and restart to next input character. *) + +(** {6 Generic IO Object Wrappers} + + Theses OO Wrappers have been written to provide easy support of ExtLib + IO by external librairies. If you want your library to support ExtLib + IO without actually requiring ExtLib to compile, you can should implement + the classes [in_channel], [out_channel], [poly_in_channel] and/or + [poly_out_channel] which are the common IO specifications established + for ExtLib, OCamlNet and Camomile. + + (see http://www.ocaml-programming.de/tmp/IO-Classes.html for more details). +*) + +class in_channel : input -> + object + method input : string -> int -> int -> int + method close_in : unit -> unit + end + +class out_channel : 'a output -> + object + method output : string -> int -> int -> int + method flush : unit -> unit + method close_out : unit -> unit + end + +class in_chars : input -> + object + method get : unit -> char + method close_in : unit -> unit + end + +class out_chars : 'a output -> + object + method put : char -> unit + method flush : unit -> unit + method close_out : unit -> unit + end + +val from_in_channel : #in_channel -> input +val from_out_channel : #out_channel -> unit output +val from_in_chars : #in_chars -> input +val from_out_chars : #out_chars -> unit output diff --git a/src/utils/net/http_client.ml b/src/utils/net/http_client.ml index 70348586..305f0865 100644 --- a/src/utils/net/http_client.ml +++ b/src/utils/net/http_client.ml @@ -46,6 +46,7 @@ type request = { req_accept : string; req_proxy : (string * int * (string * string) option) option; (* (host,port,(login,password)) *) mutable req_url : url; + mutable req_gzip : bool; mutable req_save_to_file_time : float; req_request : http_request; req_referer : Url.url option; @@ -68,6 +69,7 @@ let basic_request = { req_referer = None; req_save_to_file_time = 0.; req_request = GET; + req_gzip = false; req_proxy = None; req_headers = []; req_user_agent = "Wget 1.4"; @@ -103,6 +105,7 @@ let make_full_request r = List.iter (fun (a,b) -> Printf.bprintf res "%s: %s\r\n" a b ) r.req_headers; + Printf.bprintf res "Accept-Encoding: gzip\r\n"; Printf.bprintf res "User-Agent: %s\r\n" r.req_user_agent; Printf.bprintf res "Accept: %s\r\n" r.req_accept; Printf.bprintf res "Connection: close\r\n"; @@ -274,9 +277,15 @@ let rec get_page r content_handler f ferr = ok := true; let content_length = ref (-1L) in List.iter (fun (name, content) -> - if String.lowercase name = "content-length" then - try content_length := Int64.of_string content - with _ -> lprintf_nl "bad content length [%s]" content; + match String.lowercase name with + | "content-length" -> + (try + content_length := Int64.of_string content + with _ -> + lprintf_nl "bad content length [%s]" content) + | "content-encoding" -> + if String.lowercase content = "gzip" then r.req_gzip <- true + | _ -> () ) headers; let location = "Location", Url.to_string old_url in let content_handler = content_handler !content_length (location::headers) in @@ -366,12 +375,24 @@ let rec get_page r content_handler f ferr = raise Not_found in get_url 0 r - + +(** Copy all data from [input] to [output] *) +let io_copy input output = + try + let size = 16 * 1024 in + let s = String.create size in + while true do + let n = IO.input input s 0 size in + if n = 0 then raise IO.No_more_input; + ignore (IO.really_output output s 0 n) + done + with IO.No_more_input -> () + let wget r f = - + let file_buf = Buffer.create 1000 in let file_size = ref 0L in - + try get_page r (fun maxlen headers sock nread -> (* lprintf "received %d\n" nread; *) @@ -413,7 +434,20 @@ let wget r f = let filename = Filename.concat webinfos_dir base in if !verbose then lprintf_nl "Filename: %s" filename; - Unix2.tryopen_write_bin filename (fun oc -> output_string oc s); + if r.req_gzip then + begin + try + Unix2.tryopen_write_bin filename begin fun oc -> + let gz = Gzip.input_io (IO.input_string s) in + io_copy gz (IO.output_channel oc) + end + with e -> + lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e) (Url.to_string r.req_url); + Sys.remove filename; + raise Not_found + end + else + Unix2.tryopen_write_bin filename (fun oc -> output_string oc s); if r.req_save_to_file_time <> 0. then Unix.utimes filename r.req_save_to_file_time r.req_save_to_file_time; try @@ -462,8 +496,19 @@ let wget_string r f ?(ferr=def_ferr) progress = if nread > left then TcpBufferedSocket.close sock Closed_by_user end) - (fun _ -> - f (Buffer.contents file_buf) + (fun _ -> + let content = + if r.req_gzip then + try + let io = IO.input_string (Buffer.contents file_buf) in + IO.read_all io + with e -> + lprintf_nl "Exception %s while uncompressing content from %s" (Printexc2.to_string e) (Url.to_string r.req_url); + raise Not_found + else + Buffer.contents file_buf + in + f content ) ferr diff --git a/src/utils/net/http_client.mli b/src/utils/net/http_client.mli index e82cb97f..0c3c868f 100644 --- a/src/utils/net/http_client.mli +++ b/src/utils/net/http_client.mli @@ -37,6 +37,7 @@ type request = { req_accept : string; req_proxy : (string * int * (string * string) option) option; (** (host,port,(login,password)) *) mutable req_url : Url.url; + mutable req_gzip : bool; mutable req_save_to_file_time : float; (* re-download a saved file only if newer *) req_request : http_request; -- 2.11.4.GIT