Skip to content

Commit

Permalink
Add binary integer decoding functions to String
Browse files Browse the repository at this point in the history
  • Loading branch information
dra27 committed Apr 15, 2020
1 parent f7b43e9 commit 776d300
Show file tree
Hide file tree
Showing 6 changed files with 379 additions and 2 deletions.
5 changes: 3 additions & 2 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,9 @@ Working version
and Gabriel Scherer)

- #9448: Add String.{empty,cat} as dual of Bytes.{empty,cat},
String.{of,to}_bytes as aliases of Bytes.{to,of}_string, and
Bytes.split_on_char as dual of String.split_on_char.
String.{of,to}_bytes as aliases of Bytes.{to,of}_string,
Bytes.split_on_char as dual of String.split_on_char, and binary decoding
functions in String to match those in Bytes.
(David Allsopp, review by ???)

### Other libraries:
Expand Down
2 changes: 2 additions & 0 deletions stdlib/bytes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,8 @@ let of_seq i =

(** {6 Binary encoding/decoding of integers} *)

(* The get_ functions are all duplicated in string.ml *)

external get_uint8 : bytes -> int -> int = "%bytes_safe_get"
external get_uint16_ne : bytes -> int -> int = "%caml_bytes_get16"
external get_int32_ne : bytes -> int -> int32 = "%caml_bytes_get32"
Expand Down
18 changes: 18 additions & 0 deletions stdlib/string.ml
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,21 @@ let to_seq s = bos s |> B.to_seq
let to_seqi s = bos s |> B.to_seqi

let of_seq g = B.of_seq g |> bts

(** {6 Binary encoding/decoding of integers} *)

external get_uint8 : string -> int -> int = "%string_safe_get"
external get_uint16_ne : string -> int -> int = "%caml_string_get16"
external get_int32_ne : string -> int -> int32 = "%caml_string_get32"
external get_int64_ne : string -> int -> int64 = "%caml_string_get64"

let get_int8 s i = B.get_int8 (bos s) i
let get_uint16_le s i = B.get_uint16_le (bos s) i
let get_uint16_be s i = B.get_uint16_be (bos s) i
let get_int16_ne s i = B.get_int16_ne (bos s) i
let get_int16_le s i = B.get_int16_le (bos s) i
let get_int16_be s i = B.get_int16_be (bos s) i
let get_int32_le s i = B.get_int32_le (bos s) i
let get_int32_be s i = B.get_int32_be (bos s) i
let get_int64_le s i = B.get_int64_le (bos s) i
let get_int64_be s i = B.get_int64_be (bos s) i
120 changes: 120 additions & 0 deletions stdlib/string.mli
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,126 @@ val of_seq : char Seq.t -> t
(** Create a string from the generator
@since 4.07 *)

(** {1 Binary decoding of integers} *)

(** The functions in this section binary decode integers from strings.
All following functions raise [Invalid_argument] if the characters
needed at index [i] to decode the integer are not available.
Little-endian (resp. big-endian) encoding means that least
(resp. most) significant bytes are stored first. Big-endian is
also known as network byte order. Native-endian encoding is
either little-endian or big-endian depending on {!Sys.big_endian}.
32-bit and 64-bit integers are represented by the [int32] and
[int64] types, which can be interpreted either as signed or
unsigned numbers.
8-bit and 16-bit integers are represented by the [int] type,
which has more bits than the binary encoding. These extra bits
are sign-extended (or zero-extended) for functions which decode 8-bit
or 16-bit integers and represented them with [int] values.
*)

val get_uint8 : string -> int -> int
(** [get_uint8 b i] is [b]'s unsigned 8-bit integer starting at character
index [i].
@since 4.11.0
*)

val get_int8 : string -> int -> int
(** [get_int8 b i] is [b]'s signed 8-bit integer starting at character
index [i].
@since 4.11.0
*)

val get_uint16_ne : string -> int -> int
(** [get_uint16_ne b i] is [b]'s native-endian unsigned 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_uint16_be : string -> int -> int
(** [get_uint16_be b i] is [b]'s big-endian unsigned 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_uint16_le : string -> int -> int
(** [get_uint16_le b i] is [b]'s little-endian unsigned 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int16_ne : string -> int -> int
(** [get_int16_ne b i] is [b]'s native-endian signed 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int16_be : string -> int -> int
(** [get_int16_be b i] is [b]'s big-endian signed 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int16_le : string -> int -> int
(** [get_int16_le b i] is [b]'s little-endian signed 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int32_ne : string -> int -> int32
(** [get_int32_ne b i] is [b]'s native-endian 32-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int32_be : string -> int -> int32
(** [get_int32_be b i] is [b]'s big-endian 32-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int32_le : string -> int -> int32
(** [get_int32_le b i] is [b]'s little-endian 32-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int64_ne : string -> int -> int64
(** [get_int64_ne b i] is [b]'s native-endian 64-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int64_be : string -> int -> int64
(** [get_int64_be b i] is [b]'s big-endian 64-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int64_le : string -> int -> int64
(** [get_int64_le b i] is [b]'s little-endian 64-bit integer
starting at character index [i].
@since 4.11.0
*)

(**/**)

(* The following is for system use only. Do not call directly. *)
Expand Down
120 changes: 120 additions & 0 deletions stdlib/stringLabels.mli
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,126 @@ val of_seq : char Seq.t -> t
(** Create a string from the generator
@since 4.07 *)

(** {1 Binary decoding of integers} *)

(** The functions in this section binary decode integers from strings.
All following functions raise [Invalid_argument] if the characters
needed at index [i] to decode the integer are not available.
Little-endian (resp. big-endian) encoding means that least
(resp. most) significant bytes are stored first. Big-endian is
also known as network byte order. Native-endian encoding is
either little-endian or big-endian depending on {!Sys.big_endian}.
32-bit and 64-bit integers are represented by the [int32] and
[int64] types, which can be interpreted either as signed or
unsigned numbers.
8-bit and 16-bit integers are represented by the [int] type,
which has more bits than the binary encoding. These extra bits
are sign-extended (or zero-extended) for functions which decode 8-bit
or 16-bit integers and represented them with [int] values.
*)

val get_uint8 : string -> int -> int
(** [get_uint8 b i] is [b]'s unsigned 8-bit integer starting at character
index [i].
@since 4.11.0
*)

val get_int8 : string -> int -> int
(** [get_int8 b i] is [b]'s signed 8-bit integer starting at character
index [i].
@since 4.11.0
*)

val get_uint16_ne : string -> int -> int
(** [get_uint16_ne b i] is [b]'s native-endian unsigned 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_uint16_be : string -> int -> int
(** [get_uint16_be b i] is [b]'s big-endian unsigned 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_uint16_le : string -> int -> int
(** [get_uint16_le b i] is [b]'s little-endian unsigned 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int16_ne : string -> int -> int
(** [get_int16_ne b i] is [b]'s native-endian signed 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int16_be : string -> int -> int
(** [get_int16_be b i] is [b]'s big-endian signed 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int16_le : string -> int -> int
(** [get_int16_le b i] is [b]'s little-endian signed 16-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int32_ne : string -> int -> int32
(** [get_int32_ne b i] is [b]'s native-endian 32-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int32_be : string -> int -> int32
(** [get_int32_be b i] is [b]'s big-endian 32-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int32_le : string -> int -> int32
(** [get_int32_le b i] is [b]'s little-endian 32-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int64_ne : string -> int -> int64
(** [get_int64_ne b i] is [b]'s native-endian 64-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int64_be : string -> int -> int64
(** [get_int64_be b i] is [b]'s big-endian 64-bit integer
starting at character index [i].
@since 4.11.0
*)

val get_int64_le : string -> int -> int64
(** [get_int64_le b i] is [b]'s little-endian 64-bit integer
starting at character index [i].
@since 4.11.0
*)

(**/**)

(* The following is for system use only. Do not call directly. *)
Expand Down

0 comments on commit 776d300

Please sign in to comment.