git: 7af6dde54995 - main - textproc/ocaml-csv: the port had been improved (+)

Alexey Dokuchaev danfe at
Fri Sep 10 05:00:20 UTC 2021

The branch main has been updated by danfe:


commit 7af6dde54995fa1784440b52be51499b7dff070b
Author:     Alexey Dokuchaev <danfe at>
AuthorDate: 2021-09-10 04:58:16 +0000
Commit:     Alexey Dokuchaev <danfe at>
CommitDate: 2021-09-10 04:58:18 +0000

    textproc/ocaml-csv: the port had been improved (+)
    - Unbreak against immutable strings in OCaml 4.06+
    - Define LICENSE (GNU LGPL version 2.1 or later)
    - Drop unused dependency on `devel/ocaml-extlib'
    - Hook the provided test target to our framework
    - Fix wording and grammar of the port description
 textproc/ocaml-csv/Makefile                |   3 +-
 textproc/ocaml-csv/files/  | 235 +++++++++++++++++++++++++++++
 textproc/ocaml-csv/files/patch-src_csv.mli |  20 +++
 textproc/ocaml-csv/pkg-descr               |   4 +-
 4 files changed, 259 insertions(+), 3 deletions(-)

diff --git a/textproc/ocaml-csv/Makefile b/textproc/ocaml-csv/Makefile
index 4a9841f25bc7..bdc0c1a94232 100644
--- a/textproc/ocaml-csv/Makefile
+++ b/textproc/ocaml-csv/Makefile
@@ -9,7 +9,7 @@ MASTER_SITES=
 MAINTAINER=	tcrimi at
 COMMENT=	OCaml library to read and write CSV files
-BUILD_DEPENDS=	${LOCALBASE}/${OCAML_SITELIBDIR}/extlib:devel/ocaml-extlib
 USES=		gmake
@@ -17,6 +17,7 @@ MAKE_ARGS=	byte native
diff --git a/textproc/ocaml-csv/files/ b/textproc/ocaml-csv/files/
new file mode 100644
index 000000000000..dbbc419be7dc
--- /dev/null
+++ b/textproc/ocaml-csv/files/
@@ -0,0 +1,235 @@
+--- src/	2010-06-14 11:17:06 UTC
++++ src/
+@@ -50,13 +50,13 @@ let max i j = if (i:int) < j then j else i
+ class type in_obj_channel =
+ object
+-  method input : string -> int -> int -> int
++  method input : bytes -> int -> int -> int
+   method close_in : unit -> unit
+ end
+ class type out_obj_channel =
+ object
+-  method output : string -> int -> int -> int
++  method output : bytes -> int -> int -> int
+   method close_out : unit -> unit
+ end
+@@ -81,7 +81,7 @@ let buffer_len = 0x1FFF
+    FIXME: This is not made for non-blocking channels.  Can we fix it? *)
+ type in_channel = {
+   in_chan : in_obj_channel;
+-  in_buf : string;
++  in_buf : bytes;
+   (* The data in the in_buf is at indexes i s.t. in0 <= i < in1.
+      Invariant: 0 <= in0 ; in1 <= buffer_len in1 < 0 indicates a
+      closed channel. *)
+@@ -156,12 +156,12 @@ object
+   val ic = ic
+   method input buf ofs len =
+-    if ofs < 0 || len < 0 || ofs + len > String.length buf
++    if ofs < 0 || len < 0 || ofs + len > Bytes.length buf
+     then invalid_arg "Csv.to_in_obj#input";
+     if ic.in1 < 0 then raise(Sys_error "Bad file descriptor");
+     fill_in_buf ic;
+     let r = min len (ic.in1 - ic.in0) in
+-    String.blit ic.in_buf ic.in0 buf ofs r;
++    Bytes.blit ic.in_buf ic.in0 buf ofs r;
+     ic.in0 <- ic.in0 + r;
+     r
+@@ -185,8 +185,8 @@ let strip_contents buf =
+    assumed the substring parameters are valid. *)
+ let strip_substring buf ofs len =
+   let n = ref(ofs + len - 1) in
+-  while !n >= ofs && is_space(String.unsafe_get buf !n) do decr n done;
+-  String.sub buf ofs (!n - ofs + 1)
++  while !n >= ofs && is_space(Bytes.unsafe_get buf !n) do decr n done;
++  Bytes.sub_string buf ofs (!n - ofs + 1)
+ (* Skip the possible '\n' following a '\r'.  Reaching End_of_file is
+@@ -194,7 +194,7 @@ let strip_substring buf ofs len =
+ let skip_CR ic =
+   try
+     fill_in_buf ic;
+-    if String.unsafe_get ic.in_buf ic.in0 = '\n' then ic.in0 <- ic.in0 + 1
++    if Bytes.unsafe_get ic.in_buf ic.in0 = '\n' then ic.in0 <- ic.in0 + 1
+   with End_of_file -> ()
+@@ -205,19 +205,19 @@ let skip_CR ic =
+ let rec seek_unquoted_separator ic i =
+   if i >= ic.in1 then (
+     (* End not found, need to look at the next chunk *)
+-    Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
++    Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+     ic.in0 <- i;
+     fill_in_buf ic; (* or raise End_of_file *)
+     seek_unquoted_separator ic 0
+   )
+   else
+-    let c = String.unsafe_get ic.in_buf i in
++    let c = Bytes.unsafe_get ic.in_buf i in
+     if c = ic.separator || c = '\n' || c = '\r' then (
+       if Buffer.length ic.current_field = 0 then
+         (* Avoid copying the string to the buffer if unnecessary *)
+         ic.record <- strip_substring ic.in_buf ic.in0 (i - ic.in0) :: ic.record
+       else (
+-        Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
++        Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+         ic.record <- strip_contents ic.current_field :: ic.record;
+       );
+       ic.in0 <- i + 1;
+@@ -236,7 +236,7 @@ let add_unquoted_field ic =
+    follow, [false] if the record is complete. *)
+ let rec seek_quoted_separator ic field_no =
+   fill_in_buf ic; (* or raise End_of_file *)
+-  let c = String.unsafe_get ic.in_buf ic.in0 in
++  let c = Bytes.unsafe_get ic.in_buf ic.in0 in
+   ic.in0 <- ic.in0 + 1;
+   if c = ic.separator || c = '\n' || c = '\r' then (
+     ic.record <- Buffer.contents ic.current_field :: ic.record;
+@@ -249,13 +249,13 @@ let rec seek_quoted_separator ic field_no =
+ let rec examine_quoted_field ic field_no after_quote i =
+   if i >= ic.in1 then (
+     (* End of field not found, need to look at the next chunk *)
+-    Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
++    Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+     ic.in0 <- i;
+     fill_in_buf ic; (* or raise End_of_file *)
+     examine_quoted_field ic field_no after_quote 0
+   )
+   else
+-    let c = String.unsafe_get ic.in_buf i in
++    let c = Bytes.unsafe_get ic.in_buf i in
+     if !after_quote then (
+       if c = '\"' then (
+         after_quote := false;
+@@ -278,7 +278,7 @@ let rec examine_quoted_field ic field_no after_quote i
+     else if c = '\"' then (
+       after_quote := true;
+       (* Save the field so far, without the quote *)
+-      Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
++      Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+       ic.in0 <- i + 1; (* skip the quote *)
+       examine_quoted_field ic field_no after_quote ic.in0
+     )
+@@ -298,12 +298,12 @@ let add_quoted_field ic field_no =
+ let skip_spaces ic =
+   let is_space = if ic.separator = '\t' then is_real_space else is_space in
+   (* Skip spaces: after this [in0] is a non-space char. *)
+-  while ic.in0 < ic.in1 && is_space(String.unsafe_get ic.in_buf ic.in0) do
++  while ic.in0 < ic.in1 && is_space(Bytes.unsafe_get ic.in_buf ic.in0) do
+     ic.in0 <- ic.in0 + 1
+   done;
+   while ic.in0 >= ic.in1 do
+     fill_in_buf ic;
+-    while ic.in0 < ic.in1 && is_space(String.unsafe_get ic.in_buf ic.in0) do
++    while ic.in0 < ic.in1 && is_space(Bytes.unsafe_get ic.in_buf ic.in0) do
+       ic.in0 <- ic.in0 + 1
+     done;
+   done
+@@ -320,7 +320,7 @@ let add_next_field ic field_no =
+   try
+     skip_spaces ic;
+     (* Now, in0 < in1 or End_of_file was raised *)
+-    let c = String.unsafe_get ic.in_buf ic.in0 in
++    let c = Bytes.unsafe_get ic.in_buf ic.in0 in
+     if c = '\"' then (
+       ic.in0 <- ic.in0 + 1;
+       add_quoted_field ic field_no
+@@ -329,7 +329,7 @@ let add_next_field ic field_no =
+       ic.in0 <- ic.in0 + 1; (* mark '=' as read *)
+       try
+         fill_in_buf ic;
+-        if String.unsafe_get ic.in_buf ic.in0 = '\"' then (
++        if Bytes.unsafe_get ic.in_buf ic.in0 = '\"' then (
+           (* Excel trick ="..." to prevent spaces around the field
+              to be removed. *)
+           ic.in0 <- ic.in0 + 1; (* skip '"' *)
+@@ -414,14 +414,14 @@ let load_rows ?separator ?excel_tricks f ch =
+ type out_channel = {
+   out_chan : out_obj_channel;
+   out_separator : char;
+-  out_separator_string : string;
++  out_separator_bytes : bytes;
+   out_excel_tricks : bool;
+ }
+ let to_out_obj ?(separator=',') ?(excel_tricks=false) out_chan = {
+   out_chan = out_chan;
+   out_separator = separator;
+-  out_separator_string = String.make 1 separator;
++  out_separator_bytes = Bytes.make 1 separator;
+   out_excel_tricks = excel_tricks;
+ }
+@@ -437,6 +437,16 @@ let rec really_output oc s ofs len =
+   let w = oc.out_chan#output s ofs len in
+   if w < len then really_output oc s (ofs+w) (len-w)
++let quote_bytes = Bytes.make 1 '\"'
++let output_quote oc = really_output oc quote_bytes 0 1
++let equal_quote_bytes = Bytes.make 2 '='
++let () = Bytes.unsafe_set equal_quote_bytes 1 '\"'
++let output_equal_quote oc = really_output oc equal_quote_bytes 0 2
++let newline_bytes = Bytes.make 1 '\n'
++let output_newline oc = really_output oc newline_bytes 0 1
+ (* Determine whether the string s must be quoted and how many chars it
+    must be extended to contain the escaped values.  Return -1 if there
+    is no need to quote.  It is assumed that the string length [len]
+@@ -466,10 +476,11 @@ let write_escaped oc field =
+     let use_excel_trick = oc.out_excel_tricks && need_excel_trick field len
+     and n = must_quote oc.out_separator oc.out_excel_tricks field len in
+     if n < 0 && not use_excel_trick then
+-      really_output oc field 0 len
++      (* [really_output] does not mutate the [bytes] argument. *)
++      really_output oc (Bytes.unsafe_of_string field) 0 len
+     else (
+       let field =
+-        if n = 0 then field
++        if n = 0 then Bytes.unsafe_of_string field
+         else (* There are some quotes to escape *)
+           let s = String.create (len + n) in
+           let j = ref 0 in
+@@ -487,26 +498,26 @@ let write_escaped oc field =
+           done;
+           s
+       in
+-      if use_excel_trick then really_output oc "=\"" 0 2
+-      else really_output oc "\"" 0 1;
+-      really_output oc field 0 (String.length field);
+-      really_output oc "\"" 0 1
++      if use_excel_trick then output_equal_quote oc
++      else output_quote oc;
++      really_output oc field 0 (Bytes.length field);
++      output_quote oc
+     )
+   end
+ let output_record oc = function
+   | [] ->
+-      really_output oc "\n" 0 1
++      output_newline oc
+   | [f] ->
+       write_escaped oc f;
+-      really_output oc "\n" 0 1
++      output_newline oc
+   | f :: tl ->
+       write_escaped oc f;
+       List.iter (fun f ->
+-                   really_output oc oc.out_separator_string 0 1;
++                   really_output oc oc.out_separator_bytes 0 1;
+                    write_escaped oc f;
+                 ) tl;
+-      really_output oc "\n" 0 1
++      output_newline oc
+ let output_all oc t =
+   List.iter (fun r -> output_record oc r) t
diff --git a/textproc/ocaml-csv/files/patch-src_csv.mli b/textproc/ocaml-csv/files/patch-src_csv.mli
new file mode 100644
index 000000000000..ecccf681add9
--- /dev/null
+++ b/textproc/ocaml-csv/files/patch-src_csv.mli
@@ -0,0 +1,20 @@
+--- src/csv.mli.orig	2010-06-14 11:17:06 UTC
++++ src/csv.mli
+@@ -37,7 +37,7 @@ type t = string list list
+ (** The most basic input object for best interoperability. *)
+ class type in_obj_channel =
+ object
+-  method input : string -> int -> int -> int
++  method input : bytes -> int -> int -> int
+     (** [input buf ofs len] reads up to [len] octets from the channel
+         and puts them in the substring [buf.[ofs .. ofs+len-1]].
+         Returns the number of octets actually read (and stored).  When
+@@ -52,7 +52,7 @@ end
+ (** The most basic output object for best interoperability. *)
+ class type out_obj_channel =
+ object
+-  method output : string -> int -> int -> int
++  method output : bytes -> int -> int -> int
+     (** [output s ofs len] writes up to [len] bytes of the substring
+         [s.[ofs .. ofs+len-1]].  Return the number of bytes actually
+         written.  When the channel is non-blocking, and there are
diff --git a/textproc/ocaml-csv/pkg-descr b/textproc/ocaml-csv/pkg-descr
index 846a4ce73995..8fdd50978cbd 100644
--- a/textproc/ocaml-csv/pkg-descr
+++ b/textproc/ocaml-csv/pkg-descr
@@ -1,4 +1,4 @@
-This library can read and write CSV files, including all extensions used by
-Excel - eg. quotes, newlines, 8 bit characters in fields, "0 etc.
+This library can read and write CSV files, including all extensions used
+Microsoft Excel, e.g. quotes, newlines, 8-bit field characters, "0, etc.

