summaryrefslogtreecommitdiff
path: root/textproc/ocaml-csv/files/patch-src_csv.ml
blob: dbbc419be7dcda441937a05a7bd7f10309810307 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
--- src/csv.ml.orig	2010-06-14 11:17:06 UTC
+++ src/csv.ml
@@ -50,13 +50,13 @@ let max i j = if (i:int) < j then j else i
 
 class type in_obj_channel =
 object
-  method input : string -> int -> int -> int
+  method input : bytes -> int -> int -> int
   method close_in : unit -> unit
 end
 
 class type out_obj_channel =
 object
-  method output : string -> int -> int -> int
+  method output : bytes -> int -> int -> int
   method close_out : unit -> unit
 end
 
@@ -81,7 +81,7 @@ let buffer_len = 0x1FFF
    FIXME: This is not made for non-blocking channels.  Can we fix it? *)
 type in_channel = {
   in_chan : in_obj_channel;
-  in_buf : string;
+  in_buf : bytes;
   (* The data in the in_buf is at indexes i s.t. in0 <= i < in1.
      Invariant: 0 <= in0 ; in1 <= buffer_len in1 < 0 indicates a
      closed channel. *)
@@ -156,12 +156,12 @@ object
   val ic = ic
 
   method input buf ofs len =
-    if ofs < 0 || len < 0 || ofs + len > String.length buf
+    if ofs < 0 || len < 0 || ofs + len > Bytes.length buf
     then invalid_arg "Csv.to_in_obj#input";
     if ic.in1 < 0 then raise(Sys_error "Bad file descriptor");
     fill_in_buf ic;
     let r = min len (ic.in1 - ic.in0) in
-    String.blit ic.in_buf ic.in0 buf ofs r;
+    Bytes.blit ic.in_buf ic.in0 buf ofs r;
     ic.in0 <- ic.in0 + r;
     r
 
@@ -185,8 +185,8 @@ let strip_contents buf =
    assumed the substring parameters are valid. *)
 let strip_substring buf ofs len =
   let n = ref(ofs + len - 1) in
-  while !n >= ofs && is_space(String.unsafe_get buf !n) do decr n done;
-  String.sub buf ofs (!n - ofs + 1)
+  while !n >= ofs && is_space(Bytes.unsafe_get buf !n) do decr n done;
+  Bytes.sub_string buf ofs (!n - ofs + 1)
 
 
 (* Skip the possible '\n' following a '\r'.  Reaching End_of_file is
@@ -194,7 +194,7 @@ let strip_substring buf ofs len =
 let skip_CR ic =
   try
     fill_in_buf ic;
-    if String.unsafe_get ic.in_buf ic.in0 = '\n' then ic.in0 <- ic.in0 + 1
+    if Bytes.unsafe_get ic.in_buf ic.in0 = '\n' then ic.in0 <- ic.in0 + 1
   with End_of_file -> ()
 
 
@@ -205,19 +205,19 @@ let skip_CR ic =
 let rec seek_unquoted_separator ic i =
   if i >= ic.in1 then (
     (* End not found, need to look at the next chunk *)
-    Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+    Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
     ic.in0 <- i;
     fill_in_buf ic; (* or raise End_of_file *)
     seek_unquoted_separator ic 0
   )
   else
-    let c = String.unsafe_get ic.in_buf i in
+    let c = Bytes.unsafe_get ic.in_buf i in
     if c = ic.separator || c = '\n' || c = '\r' then (
       if Buffer.length ic.current_field = 0 then
         (* Avoid copying the string to the buffer if unnecessary *)
         ic.record <- strip_substring ic.in_buf ic.in0 (i - ic.in0) :: ic.record
       else (
-        Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+        Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
         ic.record <- strip_contents ic.current_field :: ic.record;
       );
       ic.in0 <- i + 1;
@@ -236,7 +236,7 @@ let add_unquoted_field ic =
    follow, [false] if the record is complete. *)
 let rec seek_quoted_separator ic field_no =
   fill_in_buf ic; (* or raise End_of_file *)
-  let c = String.unsafe_get ic.in_buf ic.in0 in
+  let c = Bytes.unsafe_get ic.in_buf ic.in0 in
   ic.in0 <- ic.in0 + 1;
   if c = ic.separator || c = '\n' || c = '\r' then (
     ic.record <- Buffer.contents ic.current_field :: ic.record;
@@ -249,13 +249,13 @@ let rec seek_quoted_separator ic field_no =
 let rec examine_quoted_field ic field_no after_quote i =
   if i >= ic.in1 then (
     (* End of field not found, need to look at the next chunk *)
-    Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+    Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
     ic.in0 <- i;
     fill_in_buf ic; (* or raise End_of_file *)
     examine_quoted_field ic field_no after_quote 0
   )
   else
-    let c = String.unsafe_get ic.in_buf i in
+    let c = Bytes.unsafe_get ic.in_buf i in
     if !after_quote then (
       if c = '\"' then (
         after_quote := false;
@@ -278,7 +278,7 @@ let rec examine_quoted_field ic field_no after_quote i
     else if c = '\"' then (
       after_quote := true;
       (* Save the field so far, without the quote *)
-      Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+      Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
       ic.in0 <- i + 1; (* skip the quote *)
       examine_quoted_field ic field_no after_quote ic.in0
     )
@@ -298,12 +298,12 @@ let add_quoted_field ic field_no =
 let skip_spaces ic =
   let is_space = if ic.separator = '\t' then is_real_space else is_space in
   (* Skip spaces: after this [in0] is a non-space char. *)
-  while ic.in0 < ic.in1 && is_space(String.unsafe_get ic.in_buf ic.in0) do
+  while ic.in0 < ic.in1 && is_space(Bytes.unsafe_get ic.in_buf ic.in0) do
     ic.in0 <- ic.in0 + 1
   done;
   while ic.in0 >= ic.in1 do
     fill_in_buf ic;
-    while ic.in0 < ic.in1 && is_space(String.unsafe_get ic.in_buf ic.in0) do
+    while ic.in0 < ic.in1 && is_space(Bytes.unsafe_get ic.in_buf ic.in0) do
       ic.in0 <- ic.in0 + 1
     done;
   done
@@ -320,7 +320,7 @@ let add_next_field ic field_no =
   try
     skip_spaces ic;
     (* Now, in0 < in1 or End_of_file was raised *)
-    let c = String.unsafe_get ic.in_buf ic.in0 in
+    let c = Bytes.unsafe_get ic.in_buf ic.in0 in
     if c = '\"' then (
       ic.in0 <- ic.in0 + 1;
       add_quoted_field ic field_no
@@ -329,7 +329,7 @@ let add_next_field ic field_no =
       ic.in0 <- ic.in0 + 1; (* mark '=' as read *)
       try
         fill_in_buf ic;
-        if String.unsafe_get ic.in_buf ic.in0 = '\"' then (
+        if Bytes.unsafe_get ic.in_buf ic.in0 = '\"' then (
           (* Excel trick ="..." to prevent spaces around the field
              to be removed. *)
           ic.in0 <- ic.in0 + 1; (* skip '"' *)
@@ -414,14 +414,14 @@ let load_rows ?separator ?excel_tricks f ch =
 type out_channel = {
   out_chan : out_obj_channel;
   out_separator : char;
-  out_separator_string : string;
+  out_separator_bytes : bytes;
   out_excel_tricks : bool;
 }
 
 let to_out_obj ?(separator=',') ?(excel_tricks=false) out_chan = {
   out_chan = out_chan;
   out_separator = separator;
-  out_separator_string = String.make 1 separator;
+  out_separator_bytes = Bytes.make 1 separator;
   out_excel_tricks = excel_tricks;
 }
 
@@ -437,6 +437,16 @@ let rec really_output oc s ofs len =
   let w = oc.out_chan#output s ofs len in
   if w < len then really_output oc s (ofs+w) (len-w)
 
+let quote_bytes = Bytes.make 1 '\"'
+let output_quote oc = really_output oc quote_bytes 0 1
+
+let equal_quote_bytes = Bytes.make 2 '='
+let () = Bytes.unsafe_set equal_quote_bytes 1 '\"'
+let output_equal_quote oc = really_output oc equal_quote_bytes 0 2
+
+let newline_bytes = Bytes.make 1 '\n'
+let output_newline oc = really_output oc newline_bytes 0 1
+
 (* Determine whether the string s must be quoted and how many chars it
    must be extended to contain the escaped values.  Return -1 if there
    is no need to quote.  It is assumed that the string length [len]
@@ -466,10 +476,11 @@ let write_escaped oc field =
     let use_excel_trick = oc.out_excel_tricks && need_excel_trick field len
     and n = must_quote oc.out_separator oc.out_excel_tricks field len in
     if n < 0 && not use_excel_trick then
-      really_output oc field 0 len
+      (* [really_output] does not mutate the [bytes] argument. *)
+      really_output oc (Bytes.unsafe_of_string field) 0 len
     else (
       let field =
-        if n = 0 then field
+        if n = 0 then Bytes.unsafe_of_string field
         else (* There are some quotes to escape *)
           let s = String.create (len + n) in
           let j = ref 0 in
@@ -487,26 +498,26 @@ let write_escaped oc field =
           done;
           s
       in
-      if use_excel_trick then really_output oc "=\"" 0 2
-      else really_output oc "\"" 0 1;
-      really_output oc field 0 (String.length field);
-      really_output oc "\"" 0 1
+      if use_excel_trick then output_equal_quote oc
+      else output_quote oc;
+      really_output oc field 0 (Bytes.length field);
+      output_quote oc
     )
   end
 
 let output_record oc = function
   | [] ->
-      really_output oc "\n" 0 1
+      output_newline oc
   | [f] ->
       write_escaped oc f;
-      really_output oc "\n" 0 1
+      output_newline oc
   | f :: tl ->
       write_escaped oc f;
       List.iter (fun f ->
-                   really_output oc oc.out_separator_string 0 1;
+                   really_output oc oc.out_separator_bytes 0 1;
                    write_escaped oc f;
                 ) tl;
-      really_output oc "\n" 0 1
+      output_newline oc
 
 let output_all oc t =
   List.iter (fun r -> output_record oc r) t