Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add {In,Out}_channel.with_open_{bin,text,gen} and In_channel.input_all #10596

Merged
merged 20 commits into from
Sep 29, 2021
Merged
5 changes: 5 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ Working version
(Nicolás Ojeda Bär, review by John Whitington, Daniel Bünzli, David Allsopp
and Xavier Leroy)

- #10596: Add with_open_bin, with_open_text and with_open_gen to In_channel and
Out_channel. Also, add In_channel.input_all.
(Nicolás Ojeda Bär, review by Daniel Bünzli, Jérémie Dimino, Damien Doligez
and Xavier Leroy)

### Other libraries:

- #10192: Add support for Unix domain sockets on Windows and use them
Expand Down
8 changes: 8 additions & 0 deletions stdlib/.depend
Original file line number Diff line number Diff line change
Expand Up @@ -360,10 +360,16 @@ stdlib__Hashtbl.cmx : hashtbl.ml \
stdlib__Hashtbl.cmi : hashtbl.mli \
stdlib__Seq.cmi
stdlib__In_channel.cmo : in_channel.ml \
stdlib__Sys.cmi \
stdlib.cmi \
stdlib__Fun.cmi \
stdlib__Bytes.cmi \
stdlib__In_channel.cmi
stdlib__In_channel.cmx : in_channel.ml \
stdlib__Sys.cmx \
stdlib.cmx \
stdlib__Fun.cmx \
stdlib__Bytes.cmx \
stdlib__In_channel.cmi
stdlib__In_channel.cmi : in_channel.mli \
stdlib.cmi
Expand Down Expand Up @@ -506,9 +512,11 @@ stdlib__Option.cmi : option.mli \
stdlib__Seq.cmi
stdlib__Out_channel.cmo : out_channel.ml \
stdlib.cmi \
stdlib__Fun.cmi \
stdlib__Out_channel.cmi
stdlib__Out_channel.cmx : out_channel.ml \
stdlib.cmx \
stdlib__Fun.cmx \
stdlib__Out_channel.cmi
stdlib__Out_channel.cmi : out_channel.mli \
stdlib.cmi
Expand Down
107 changes: 107 additions & 0 deletions stdlib/in_channel.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,21 @@ let stdin = Stdlib.stdin
let open_bin = Stdlib.open_in_bin
let open_text = Stdlib.open_in
let open_gen = Stdlib.open_in_gen

let with_open openfun s f =
let ic = openfun s in
Fun.protect ~finally:(fun () -> Stdlib.close_in_noerr ic)
(fun () -> f ic)

let with_open_bin s f =
with_open Stdlib.open_in_bin s f

let with_open_text s f =
with_open Stdlib.open_in s f

let with_open_gen flags perm s f =
with_open (Stdlib.open_in_gen flags perm) s f

let seek = Stdlib.LargeFile.seek_in
let pos = Stdlib.LargeFile.pos_in
let length = Stdlib.LargeFile.in_channel_length
Expand Down Expand Up @@ -63,4 +78,96 @@ let really_input_string ic len =
| s -> Some s
| exception End_of_file -> None

(* Read up to [len] bytes into [buf], starting at [ofs]. Return total bytes
read. *)
let read_upto ic buf ofs len =
let rec loop ofs len =
if len = 0 then ofs
else begin
let r = Stdlib.input ic buf ofs len in
if r = 0 then
ofs
else
loop (ofs + r) (len - r)
end
in
loop ofs len - ofs

(* Best effort attempt to return a buffer with >= (ofs + n) bytes of storage,
and such that it coincides with [buf] at indices < [ofs].

The returned buffer is equal to [buf] itself if it already has sufficient
free space.

The returned buffer may have *fewer* than [ofs + n] bytes of storage if this
number is > [Sys.max_string_length]. However the returned buffer will
*always* have > [ofs] bytes of storage. In the limiting case when [ofs = len
= Sys.max_string_length] (so that it is not possible to resize the buffer at
all), an exception is raised. *)

let ensure buf ofs n =
let len = Bytes.length buf in
if len >= ofs + n then buf
else begin
let new_len = ref len in
while !new_len < ofs + n do
new_len := 2 * !new_len + 1
done;
This conversation was marked as resolved.
Show resolved Hide resolved
let new_len = !new_len in
let new_len =
if new_len <= Sys.max_string_length then
new_len
else if ofs < Sys.max_string_length then
Sys.max_string_length
This conversation was marked as resolved.
Show resolved Hide resolved
else
failwith "In_channel.input_all: channel content \
is larger than maximum string length"
in
let new_buf = Bytes.create new_len in
Bytes.blit buf 0 new_buf 0 ofs;
buf
end

let input_all ic =
let chunk_size = 65536 in (* IO_BUFFER_SIZE *)
let initial_size =
try
Stdlib.in_channel_length ic - Stdlib.pos_in ic
with Sys_error _ ->
-1
in
let initial_size = if initial_size < 0 then chunk_size else initial_size in
let initial_size =
if initial_size <= Sys.max_string_length then
initial_size
else
Sys.max_string_length
in
let buf = Bytes.create initial_size in
let nread = read_upto ic buf 0 initial_size in
if nread < initial_size then (* EOF reached, buffer partially filled *)
Bytes.sub_string buf 0 nread
else begin (* nread = initial_size, maybe EOF reached *)
match Stdlib.input_char ic with
| exception End_of_file ->
(* EOF reached, buffer is completely filled *)
Bytes.unsafe_to_string buf
| c ->
(* EOF not reached *)
let rec loop buf ofs =
let buf = ensure buf ofs chunk_size in
let rem = Bytes.length buf - ofs in
(* [rem] can be < [chunk_size] if buffer size close to
[Sys.max_string_length] *)
let r = read_upto ic buf ofs rem in
if r < rem then (* EOF reached *)
Bytes.sub_string buf 0 (ofs + r)
else (* r = rem *)
loop buf (ofs + rem)
in
let buf = ensure buf nread (chunk_size + 1) in
Bytes.set buf nread c;
loop buf (nread + 1)
end

let set_binary_mode = Stdlib.set_binary_mode_in
16 changes: 16 additions & 0 deletions stdlib/in_channel.mli
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@ val open_gen : open_flag list -> int -> string -> t
file permissions. {!open_text} and {!open_bin} are special cases of this
function. *)

val with_open_bin : string -> (t -> 'a) -> 'a
(** [with_open_bin fn f] opens a channel [ic] on file [fn] and returns [f
ic]. After [f] returns, either with a value or by raising an exception, [ic]
is guaranteed to be closed. *)

val with_open_text : string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but the channel is opened in text mode (see
{!open_text}). *)

val with_open_gen : open_flag list -> int -> string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but can specify the opening mode and file permission,
in case the file must be created (see {!open_gen}). *)

val seek : t -> int64 -> unit
(** [seek chan pos] sets the current reading position to [pos] for channel
[chan]. This works only for regular files. On files of other kinds, the
Expand Down Expand Up @@ -123,6 +136,9 @@ val really_input_string : t -> int -> string option
returns them in a new string. Returns [None] if the end of file is reached
before [len] characters have been read. *)

val input_all : t -> string
(** [input_all ic] reads all remaining data from [ic]. *)

val set_binary_mode : t -> bool -> unit
(** [set_binary_mode ic true] sets the channel [ic] to binary mode: no
translations take place during input.
Expand Down
15 changes: 15 additions & 0 deletions stdlib/out_channel.ml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ let stderr = Stdlib.stderr
let open_bin = Stdlib.open_out_bin
let open_text = Stdlib.open_out
let open_gen = Stdlib.open_out_gen

let with_open openfun s f =
let oc = openfun s in
Fun.protect ~finally:(fun () -> Stdlib.close_out_noerr oc)
(fun () -> f oc)

let with_open_bin s f =
with_open Stdlib.open_out_bin s f

let with_open_text s f =
with_open Stdlib.open_out s f

let with_open_gen flags perm s f =
with_open (Stdlib.open_out_gen flags perm) s f

let seek = Stdlib.LargeFile.seek_out
let pos = Stdlib.LargeFile.pos_out
let length = Stdlib.LargeFile.out_channel_length
Expand Down
13 changes: 13 additions & 0 deletions stdlib/out_channel.mli
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ val open_gen : open_flag list -> int -> string -> t
created. {!open_text} and {!open_bin} are special cases of this
function. *)

val with_open_bin : string -> (t -> 'a) -> 'a
(** [with_open_bin fn f] opens a channel [oc] on file [fn] and returns [f
oc]. After [f] returns, either with a value or by raising an exception, [oc]
is guaranteed to be closed. *)

val with_open_text : string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but the channel is opened in text mode (see
{!open_text}). *)

val with_open_gen : open_flag list -> int -> string -> (t -> 'a) -> 'a
(** Like {!with_open_bin}, but can specify the opening mode and file permission,
in case the file must be created (see {!open_gen}). *)

val seek : t -> int64 -> unit
(** [seek chan pos] sets the current writing position to [pos] for channel
[chan]. This works only for regular files. On files of other kinds (such as
Expand Down
88 changes: 88 additions & 0 deletions testsuite/tests/lib-channels/input_all.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
(* TEST
readonly_files = "input_all.ml"
*)

let data_file =
"data.txt"

let random_string size =
String.init size (fun _ -> Char.chr (Random.int 256))

(* various sizes, binary mode *)

let check size =
let data = random_string size in
Out_channel.with_open_bin data_file (fun oc -> Out_channel.output_string oc data);
let read_data = In_channel.with_open_bin data_file In_channel.input_all in
assert (data = read_data)

let () =
List.iter check [ 0; 1; 65536; 65536 + 1; 2 * 65536 ]

(* binary mode; non-zero starting position *)

let data_size = 65536

let check midpoint =
let data = random_string data_size in
Out_channel.with_open_bin data_file
(fun oc -> Out_channel.output_string oc data);
let contents =
In_channel.with_open_bin data_file
(fun ic ->
let s1 = Option.get (In_channel.really_input_string ic midpoint) in
let s2 = In_channel.input_all ic in
s1 ^ s2
)
in
assert (contents = data)

let () =
List.iter check [0; 1; 100; data_size]

(* text mode *)

(* translates into LF *)
let dos2unix inp out =
let s = In_channel.with_open_text inp In_channel.input_all in
Out_channel.with_open_bin out
(fun oc -> Out_channel.output_string oc s)

(* translates into CRLF *)
let unix2dos inp out =
let s = In_channel.with_open_text inp In_channel.input_all in
Out_channel.with_open_text out
(fun oc -> Out_channel.output_string oc s)

let source_fn =
"input_all.ml"
This conversation was marked as resolved.
Show resolved Hide resolved

let source_fn_lf =
source_fn ^ ".lf"

let source_fn_crlf =
source_fn ^ ".crlf"

let () =
dos2unix source_fn source_fn_lf

let () =
unix2dos source_fn source_fn_crlf

let raw_contents =
In_channel.with_open_bin source_fn_lf
(fun ic -> Stdlib.really_input_string ic (Stdlib.in_channel_length ic))

let check midpoint =
let contents =
In_channel.with_open_text source_fn_crlf
(fun ic ->
let s1 = Option.get (In_channel.really_input_string ic midpoint) in
let s2 = In_channel.input_all ic in
s1 ^ s2
)
in
assert (contents = raw_contents)

let () =
List.iter check [0; 1; String.length raw_contents]