Skip to content

Commit

Permalink
Add {In,Out}_channel.with_open_{bin,text,gen} and In_channel.input_all (
Browse files Browse the repository at this point in the history
  • Loading branch information
nojb committed Sep 29, 2021
1 parent 64023f5 commit 07dfba8
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 0 deletions.
5 changes: 5 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ Working version
(Nicolás Ojeda Bär, review by John Whitington, Daniel Bünzli, David Allsopp
and Xavier Leroy)

- #10596: Add with_open_bin, with_open_text and with_open_gen to In_channel and
Out_channel. Also, add In_channel.input_all.
(Nicolás Ojeda Bär, review by Daniel Bünzli, Jérémie Dimino, Damien Doligez
and Xavier Leroy)

### Other libraries:

- #10192: Add support for Unix domain sockets on Windows and use them
Expand Down
8 changes: 8 additions & 0 deletions stdlib/.depend
Original file line number Diff line number Diff line change
Expand Up @@ -360,10 +360,16 @@ stdlib__Hashtbl.cmx : hashtbl.ml \
stdlib__Hashtbl.cmi : hashtbl.mli \
stdlib__Seq.cmi
stdlib__In_channel.cmo : in_channel.ml \
stdlib__Sys.cmi \
stdlib.cmi \
stdlib__Fun.cmi \
stdlib__Bytes.cmi \
stdlib__In_channel.cmi
stdlib__In_channel.cmx : in_channel.ml \
stdlib__Sys.cmx \
stdlib.cmx \
stdlib__Fun.cmx \
stdlib__Bytes.cmx \
stdlib__In_channel.cmi
stdlib__In_channel.cmi : in_channel.mli \
stdlib.cmi
Expand Down Expand Up @@ -506,9 +512,11 @@ stdlib__Option.cmi : option.mli \
stdlib__Seq.cmi
stdlib__Out_channel.cmo : out_channel.ml \
stdlib.cmi \
stdlib__Fun.cmi \
stdlib__Out_channel.cmi
stdlib__Out_channel.cmx : out_channel.ml \
stdlib.cmx \
stdlib__Fun.cmx \
stdlib__Out_channel.cmi
stdlib__Out_channel.cmi : out_channel.mli \
stdlib.cmi
Expand Down
107 changes: 107 additions & 0 deletions stdlib/in_channel.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,21 @@ let stdin = Stdlib.stdin
let open_bin = Stdlib.open_in_bin
let open_text = Stdlib.open_in
let open_gen = Stdlib.open_in_gen

let with_open openfun s f =
let ic = openfun s in
Fun.protect ~finally:(fun () -> Stdlib.close_in_noerr ic)
(fun () -> f ic)

let with_open_bin s f =
with_open Stdlib.open_in_bin s f

let with_open_text s f =
with_open Stdlib.open_in s f

let with_open_gen flags perm s f =
with_open (Stdlib.open_in_gen flags perm) s f

let seek = Stdlib.LargeFile.seek_in
let pos = Stdlib.LargeFile.pos_in
let length = Stdlib.LargeFile.in_channel_length
Expand Down Expand Up @@ -63,4 +78,96 @@ let really_input_string ic len =
| s -> Some s
| exception End_of_file -> None

(* Read up to [len] bytes into [buf], starting at [ofs]. Return total bytes
read. *)
let read_upto ic buf ofs len =
let rec loop ofs len =
if len = 0 then ofs
else begin
let r = Stdlib.input ic buf ofs len in
if r = 0 then
ofs
else
loop (ofs + r) (len - r)
end
in
loop ofs len - ofs

(* Best effort attempt to return a buffer with >= (ofs + n) bytes of storage,
and such that it coincides with [buf] at indices < [ofs].
The returned buffer is equal to [buf] itself if it already has sufficient
free space.
The returned buffer may have *fewer* than [ofs + n] bytes of storage if this
number is > [Sys.max_string_length]. However the returned buffer will
*always* have > [ofs] bytes of storage. In the limiting case when [ofs = len
= Sys.max_string_length] (so that it is not possible to resize the buffer at
all), an exception is raised. *)

let ensure buf ofs n =
let len = Bytes.length buf in
if len >= ofs + n then buf
else begin
let new_len = ref len in
while !new_len < ofs + n do
new_len := 2 * !new_len + 1
done;
let new_len = !new_len in
let new_len =
if new_len <= Sys.max_string_length then
new_len
else if ofs < Sys.max_string_length then
Sys.max_string_length
else
failwith "In_channel.input_all: channel content \
is larger than maximum string length"
in
let new_buf = Bytes.create new_len in
Bytes.blit buf 0 new_buf 0 ofs;
buf
end

let input_all ic =
let chunk_size = 65536 in (* IO_BUFFER_SIZE *)
let initial_size =
try
Stdlib.in_channel_length ic - Stdlib.pos_in ic
with Sys_error _ ->
-1
in
let initial_size = if initial_size < 0 then chunk_size else initial_size in
let initial_size =
if initial_size <= Sys.max_string_length then
initial_size
else
Sys.max_string_length
in
let buf = Bytes.create initial_size in
let nread = read_upto ic buf 0 initial_size in
if nread < initial_size then (* EOF reached, buffer partially filled *)
Bytes.sub_string buf 0 nread
else begin (* nread = initial_size, maybe EOF reached *)
match Stdlib.input_char ic with
| exception End_of_file ->
(* EOF reached, buffer is completely filled *)
Bytes.unsafe_to_string buf
| c ->
(* EOF not reached *)
let rec loop buf ofs =
let buf = ensure buf ofs chunk_size in
let rem = Bytes.length buf - ofs in
(* [rem] can be < [chunk_size] if buffer size close to
[Sys.max_string_length] *)
let r = read_upto ic buf ofs rem in
if r < rem then (* EOF reached *)
Bytes.sub_string buf 0 (ofs + r)
else (* r = rem *)
loop buf (ofs + rem)
in
let buf = ensure buf nread (chunk_size + 1) in
Bytes.set buf nread c;
loop buf (nread + 1)
end

let set_binary_mode = Stdlib.set_binary_mode_in
16 changes: 16 additions & 0 deletions stdlib/in_channel.mli
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@ val open_gen : open_flag list -> int -> string -> t
file permissions. {!open_text} and {!open_bin} are special cases of this
function. *)

val with_open_bin : string -> (t -> 'a) -> 'a
(** [with_open_bin fn f] opens a channel [ic] on file [fn] and returns [f
ic]. After [f] returns, either with a value or by raising an exception, [ic]
is guaranteed to be closed. *)

val with_open_text : string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but the channel is opened in text mode (see
{!open_text}). *)

val with_open_gen : open_flag list -> int -> string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but can specify the opening mode and file permission,
in case the file must be created (see {!open_gen}). *)

val seek : t -> int64 -> unit
(** [seek chan pos] sets the current reading position to [pos] for channel
[chan]. This works only for regular files. On files of other kinds, the
Expand Down Expand Up @@ -123,6 +136,9 @@ val really_input_string : t -> int -> string option
returns them in a new string. Returns [None] if the end of file is reached
before [len] characters have been read. *)

val input_all : t -> string
(** [input_all ic] reads all remaining data from [ic]. *)

val set_binary_mode : t -> bool -> unit
(** [set_binary_mode ic true] sets the channel [ic] to binary mode: no
translations take place during input.
Expand Down
15 changes: 15 additions & 0 deletions stdlib/out_channel.ml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ let stderr = Stdlib.stderr
let open_bin = Stdlib.open_out_bin
let open_text = Stdlib.open_out
let open_gen = Stdlib.open_out_gen

let with_open openfun s f =
let oc = openfun s in
Fun.protect ~finally:(fun () -> Stdlib.close_out_noerr oc)
(fun () -> f oc)

let with_open_bin s f =
with_open Stdlib.open_out_bin s f

let with_open_text s f =
with_open Stdlib.open_out s f

let with_open_gen flags perm s f =
with_open (Stdlib.open_out_gen flags perm) s f

let seek = Stdlib.LargeFile.seek_out
let pos = Stdlib.LargeFile.pos_out
let length = Stdlib.LargeFile.out_channel_length
Expand Down
13 changes: 13 additions & 0 deletions stdlib/out_channel.mli
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ val open_gen : open_flag list -> int -> string -> t
created. {!open_text} and {!open_bin} are special cases of this
function. *)

val with_open_bin : string -> (t -> 'a) -> 'a
(** [with_open_bin fn f] opens a channel [oc] on file [fn] and returns [f
oc]. After [f] returns, either with a value or by raising an exception, [oc]
is guaranteed to be closed. *)

val with_open_text : string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but the channel is opened in text mode (see
{!open_text}). *)

val with_open_gen : open_flag list -> int -> string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but can specify the opening mode and file permission,
in case the file must be created (see {!open_gen}). *)

val seek : t -> int64 -> unit
(** [seek chan pos] sets the current writing position to [pos] for channel
[chan]. This works only for regular files. On files of other kinds (such as
Expand Down
88 changes: 88 additions & 0 deletions testsuite/tests/lib-channels/input_all.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
(* TEST
readonly_files = "input_all.ml"
*)

let data_file =
"data.txt"

let random_string size =
String.init size (fun _ -> Char.chr (Random.int 256))

(* various sizes, binary mode *)

let check size =
let data = random_string size in
Out_channel.with_open_bin data_file (fun oc -> Out_channel.output_string oc data);
let read_data = In_channel.with_open_bin data_file In_channel.input_all in
assert (data = read_data)

let () =
List.iter check [ 0; 1; 65536; 65536 + 1; 2 * 65536 ]

(* binary mode; non-zero starting position *)

let data_size = 65536

let check midpoint =
let data = random_string data_size in
Out_channel.with_open_bin data_file
(fun oc -> Out_channel.output_string oc data);
let contents =
In_channel.with_open_bin data_file
(fun ic ->
let s1 = Option.get (In_channel.really_input_string ic midpoint) in
let s2 = In_channel.input_all ic in
s1 ^ s2
)
in
assert (contents = data)

let () =
List.iter check [0; 1; 100; data_size]

(* text mode *)

(* translates into LF *)
let dos2unix inp out =
let s = In_channel.with_open_text inp In_channel.input_all in
Out_channel.with_open_bin out
(fun oc -> Out_channel.output_string oc s)

(* translates into CRLF *)
let unix2dos inp out =
let s = In_channel.with_open_text inp In_channel.input_all in
Out_channel.with_open_text out
(fun oc -> Out_channel.output_string oc s)

let source_fn =
"input_all.ml"

let source_fn_lf =
source_fn ^ ".lf"

let source_fn_crlf =
source_fn ^ ".crlf"

let () =
dos2unix source_fn source_fn_lf

let () =
unix2dos source_fn source_fn_crlf

let raw_contents =
In_channel.with_open_bin source_fn_lf
(fun ic -> Stdlib.really_input_string ic (Stdlib.in_channel_length ic))

let check midpoint =
let contents =
In_channel.with_open_text source_fn_crlf
(fun ic ->
let s1 = Option.get (In_channel.really_input_string ic midpoint) in
let s2 = In_channel.input_all ic in
s1 ^ s2
)
in
assert (contents = raw_contents)

let () =
List.iter check [0; 1; String.length raw_contents]

0 comments on commit 07dfba8

Please sign in to comment.