1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
|
--- src/csv.ml.orig 2010-06-14 11:17:06 UTC
+++ src/csv.ml
@@ -50,13 +50,13 @@ let max i j = if (i:int) < j then j else i
class type in_obj_channel =
object
- method input : string -> int -> int -> int
+ method input : bytes -> int -> int -> int
method close_in : unit -> unit
end
class type out_obj_channel =
object
- method output : string -> int -> int -> int
+ method output : bytes -> int -> int -> int
method close_out : unit -> unit
end
@@ -81,7 +81,7 @@ let buffer_len = 0x1FFF
FIXME: This is not made for non-blocking channels. Can we fix it? *)
type in_channel = {
in_chan : in_obj_channel;
- in_buf : string;
+ in_buf : bytes;
(* The data in the in_buf is at indexes i s.t. in0 <= i < in1.
Invariant: 0 <= in0 ; in1 <= buffer_len in1 < 0 indicates a
closed channel. *)
@@ -156,12 +156,12 @@ object
val ic = ic
method input buf ofs len =
- if ofs < 0 || len < 0 || ofs + len > String.length buf
+ if ofs < 0 || len < 0 || ofs + len > Bytes.length buf
then invalid_arg "Csv.to_in_obj#input";
if ic.in1 < 0 then raise(Sys_error "Bad file descriptor");
fill_in_buf ic;
let r = min len (ic.in1 - ic.in0) in
- String.blit ic.in_buf ic.in0 buf ofs r;
+ Bytes.blit ic.in_buf ic.in0 buf ofs r;
ic.in0 <- ic.in0 + r;
r
@@ -185,8 +185,8 @@ let strip_contents buf =
assumed the substring parameters are valid. *)
let strip_substring buf ofs len =
let n = ref(ofs + len - 1) in
- while !n >= ofs && is_space(String.unsafe_get buf !n) do decr n done;
- String.sub buf ofs (!n - ofs + 1)
+ while !n >= ofs && is_space(Bytes.unsafe_get buf !n) do decr n done;
+ Bytes.sub_string buf ofs (!n - ofs + 1)
(* Skip the possible '\n' following a '\r'. Reaching End_of_file is
@@ -194,7 +194,7 @@ let strip_substring buf ofs len =
let skip_CR ic =
try
fill_in_buf ic;
- if String.unsafe_get ic.in_buf ic.in0 = '\n' then ic.in0 <- ic.in0 + 1
+ if Bytes.unsafe_get ic.in_buf ic.in0 = '\n' then ic.in0 <- ic.in0 + 1
with End_of_file -> ()
@@ -205,19 +205,19 @@ let skip_CR ic =
let rec seek_unquoted_separator ic i =
if i >= ic.in1 then (
(* End not found, need to look at the next chunk *)
- Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+ Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
ic.in0 <- i;
fill_in_buf ic; (* or raise End_of_file *)
seek_unquoted_separator ic 0
)
else
- let c = String.unsafe_get ic.in_buf i in
+ let c = Bytes.unsafe_get ic.in_buf i in
if c = ic.separator || c = '\n' || c = '\r' then (
if Buffer.length ic.current_field = 0 then
(* Avoid copying the string to the buffer if unnecessary *)
ic.record <- strip_substring ic.in_buf ic.in0 (i - ic.in0) :: ic.record
else (
- Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+ Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
ic.record <- strip_contents ic.current_field :: ic.record;
);
ic.in0 <- i + 1;
@@ -236,7 +236,7 @@ let add_unquoted_field ic =
follow, [false] if the record is complete. *)
let rec seek_quoted_separator ic field_no =
fill_in_buf ic; (* or raise End_of_file *)
- let c = String.unsafe_get ic.in_buf ic.in0 in
+ let c = Bytes.unsafe_get ic.in_buf ic.in0 in
ic.in0 <- ic.in0 + 1;
if c = ic.separator || c = '\n' || c = '\r' then (
ic.record <- Buffer.contents ic.current_field :: ic.record;
@@ -249,13 +249,13 @@ let rec seek_quoted_separator ic field_no =
let rec examine_quoted_field ic field_no after_quote i =
if i >= ic.in1 then (
(* End of field not found, need to look at the next chunk *)
- Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+ Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
ic.in0 <- i;
fill_in_buf ic; (* or raise End_of_file *)
examine_quoted_field ic field_no after_quote 0
)
else
- let c = String.unsafe_get ic.in_buf i in
+ let c = Bytes.unsafe_get ic.in_buf i in
if !after_quote then (
if c = '\"' then (
after_quote := false;
@@ -278,7 +278,7 @@ let rec examine_quoted_field ic field_no after_quote i
else if c = '\"' then (
after_quote := true;
(* Save the field so far, without the quote *)
- Buffer.add_substring ic.current_field ic.in_buf ic.in0 (i - ic.in0);
+ Buffer.add_subbytes ic.current_field ic.in_buf ic.in0 (i - ic.in0);
ic.in0 <- i + 1; (* skip the quote *)
examine_quoted_field ic field_no after_quote ic.in0
)
@@ -298,12 +298,12 @@ let add_quoted_field ic field_no =
let skip_spaces ic =
let is_space = if ic.separator = '\t' then is_real_space else is_space in
(* Skip spaces: after this [in0] is a non-space char. *)
- while ic.in0 < ic.in1 && is_space(String.unsafe_get ic.in_buf ic.in0) do
+ while ic.in0 < ic.in1 && is_space(Bytes.unsafe_get ic.in_buf ic.in0) do
ic.in0 <- ic.in0 + 1
done;
while ic.in0 >= ic.in1 do
fill_in_buf ic;
- while ic.in0 < ic.in1 && is_space(String.unsafe_get ic.in_buf ic.in0) do
+ while ic.in0 < ic.in1 && is_space(Bytes.unsafe_get ic.in_buf ic.in0) do
ic.in0 <- ic.in0 + 1
done;
done
@@ -320,7 +320,7 @@ let add_next_field ic field_no =
try
skip_spaces ic;
(* Now, in0 < in1 or End_of_file was raised *)
- let c = String.unsafe_get ic.in_buf ic.in0 in
+ let c = Bytes.unsafe_get ic.in_buf ic.in0 in
if c = '\"' then (
ic.in0 <- ic.in0 + 1;
add_quoted_field ic field_no
@@ -329,7 +329,7 @@ let add_next_field ic field_no =
ic.in0 <- ic.in0 + 1; (* mark '=' as read *)
try
fill_in_buf ic;
- if String.unsafe_get ic.in_buf ic.in0 = '\"' then (
+ if Bytes.unsafe_get ic.in_buf ic.in0 = '\"' then (
(* Excel trick ="..." to prevent spaces around the field
to be removed. *)
ic.in0 <- ic.in0 + 1; (* skip '"' *)
@@ -414,14 +414,14 @@ let load_rows ?separator ?excel_tricks f ch =
type out_channel = {
out_chan : out_obj_channel;
out_separator : char;
- out_separator_string : string;
+ out_separator_bytes : bytes;
out_excel_tricks : bool;
}
let to_out_obj ?(separator=',') ?(excel_tricks=false) out_chan = {
out_chan = out_chan;
out_separator = separator;
- out_separator_string = String.make 1 separator;
+ out_separator_bytes = Bytes.make 1 separator;
out_excel_tricks = excel_tricks;
}
@@ -437,6 +437,16 @@ let rec really_output oc s ofs len =
let w = oc.out_chan#output s ofs len in
if w < len then really_output oc s (ofs+w) (len-w)
+let quote_bytes = Bytes.make 1 '\"'
+let output_quote oc = really_output oc quote_bytes 0 1
+
+let equal_quote_bytes = Bytes.make 2 '='
+let () = Bytes.unsafe_set equal_quote_bytes 1 '\"'
+let output_equal_quote oc = really_output oc equal_quote_bytes 0 2
+
+let newline_bytes = Bytes.make 1 '\n'
+let output_newline oc = really_output oc newline_bytes 0 1
+
(* Determine whether the string s must be quoted and how many chars it
must be extended to contain the escaped values. Return -1 if there
is no need to quote. It is assumed that the string length [len]
@@ -466,10 +476,11 @@ let write_escaped oc field =
let use_excel_trick = oc.out_excel_tricks && need_excel_trick field len
and n = must_quote oc.out_separator oc.out_excel_tricks field len in
if n < 0 && not use_excel_trick then
- really_output oc field 0 len
+ (* [really_output] does not mutate the [bytes] argument. *)
+ really_output oc (Bytes.unsafe_of_string field) 0 len
else (
let field =
- if n = 0 then field
+ if n = 0 then Bytes.unsafe_of_string field
else (* There are some quotes to escape *)
let s = String.create (len + n) in
let j = ref 0 in
@@ -487,26 +498,26 @@ let write_escaped oc field =
done;
s
in
- if use_excel_trick then really_output oc "=\"" 0 2
- else really_output oc "\"" 0 1;
- really_output oc field 0 (String.length field);
- really_output oc "\"" 0 1
+ if use_excel_trick then output_equal_quote oc
+ else output_quote oc;
+ really_output oc field 0 (Bytes.length field);
+ output_quote oc
)
end
let output_record oc = function
| [] ->
- really_output oc "\n" 0 1
+ output_newline oc
| [f] ->
write_escaped oc f;
- really_output oc "\n" 0 1
+ output_newline oc
| f :: tl ->
write_escaped oc f;
List.iter (fun f ->
- really_output oc oc.out_separator_string 0 1;
+ really_output oc oc.out_separator_bytes 0 1;
write_escaped oc f;
) tl;
- really_output oc "\n" 0 1
+ output_newline oc
let output_all oc t =
List.iter (fun r -> output_record oc r) t
|