1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2017, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(csv, 36 [ csv//1, % +Rows 37 csv//2, % +Rows, +Options 38 39 csv_read_file/2, % +File, -Data 40 csv_read_file/3, % +File, -Data, +Options 41 csv_read_file_row/3, % +File, -Row, +Options 42 csv_read_row/3, % +Stream, -Row, +CompiledOptions 43 csv_options/2, % -Compiled, +Options 44 45 csv_write_file/2, % +File, +Data 46 csv_write_file/3, % +File, +Data, +Options 47 csv_write_stream/3 % +Stream, +Data, +Options 48 ]). 49:- use_module(library(record)). 50:- use_module(library(error)). 51:- use_module(library(pure_input)). 52:- use_module(library(debug)). 53:- use_module(library(option)).
68:- predicate_options(csv//2, 2, 69 [ separator(nonneg), % mustv be code 70 strip(boolean), 71 ignore_quotes(boolean), 72 convert(boolean), 73 case(oneof([down,preserve,up])), 74 functor(atom), 75 arity(-nonneg), % actually ?nonneg 76 match_arity(boolean) 77 ]). 78:- predicate_options(csv_read_file/3, 3, 79 [ pass_to(csv//2, 2), 80 pass_to(phrase_from_file/3, 3) 81 ]). 82:- predicate_options(csv_read_file_row/3, 3, 83 [ pass_to(csv//2, 2), 84 pass_to(open/4, 4) 85 ]). 86:- predicate_options(csv_write_file/3, 3, 87 [ pass_to(csv//2, 2), 88 pass_to(open/4, 4) 89 ]). 90:- predicate_options(csv_write_stream/3, 3, 91 [ pass_to(csv//2, 2) 92 ]). 93 94 95:- record 96 csv_options(separator:integer=0',, 97 strip:boolean=false, 98 ignore_quotes:boolean=false, 99 convert:boolean=true, 100 case:oneof([down,preserve,up])=preserve, 101 functor:atom=row, 102 arity:integer, 103 match_arity:boolean=true).
\t
for
.tsv
files and ,
otherwise.
Suppose we want to create a predicate table/6 from a CSV file
that we know contains 6 fields per record. This can be done
using the code below. Without the option arity(6)
, this would
generate a predicate table/N, where N is the number of fields
per record in the data.
?- csv_read_file(File, Rows, [functor(table), arity(6)]), maplist(assert, Rows).
127csv_read_file(File, Rows) :- 128 csv_read_file(File, Rows, []). 129 130csv_read_file(File, Rows, Options) :- 131 default_separator(File, Options, Options1), 132 make_csv_options(Options1, Record, RestOptions), 133 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions). 134 135 136default_separator(File, Options0, Options) :- 137 ( option(separator(_), Options0) 138 -> Options = Options0 139 ; file_name_extension(_, Ext0, File), 140 downcase_atom(Ext0, Ext), 141 ext_separator(Ext, Sep) 142 -> Options = [separator(Sep)|Options0] 143 ; Options = Options0 144 ). 145 146ext_separator(csv, 0',). 147ext_separator(tsv, 0'\t).
separator(0';)
parses
a semicolon separated file.true
(default false), threat double quotes as a normal
character.true
(default false
), strip leading and trailing
blank space. RFC4180 says that blank space is part of the
data.true
(default), use name/2 on the field data. This
translates the field into a number if possible.down
, downcase atomic values. If up
, upcase them
and if preserve
(default), do not change the case.row
.domain_error(row_arity(Expected), Found)
if a row is
found with different arity.false
(default true
), do not reject CSV files where
lines provide a varying number of fields (columns). This
can be a work-around to use some incorrect CSV files.191csv(Rows) --> 192 csv(Rows, []). 193 194csv(Rows, Options) --> 195 { make_csv_options(Options, Record, _) }, 196 csv_roptions(Rows, Record). 197 198csv_roptions(Rows, Record) --> 199 { ground(Rows) }, 200 !, 201 emit_csv(Rows, Record). 202csv_roptions(Rows, Record) --> 203 csv_data(Rows, Record). 204 205csv_data([], _) --> 206 eof, 207 !. 208csv_data([Row|More], Options) --> 209 row(Row, Options), 210 !, 211 { debug(csv, 'Row: ~p', [Row]) }, 212 csv_data(More, Options). 213 214eof([], []). 215 216row(Row, Options) --> 217 fields(Fields, Options), 218 { csv_options_functor(Options, Functor), 219 Row =.. [Functor|Fields], 220 functor(Row, _, Arity), 221 check_arity(Options, Arity) 222 }. 223 224check_arity(Options, Arity) :- 225 csv_options_arity(Options, Arity), 226 !. 227check_arity(Options, _) :- 228 csv_options_match_arity(Options, false), 229 !. 230check_arity(Options, Arity) :- 231 csv_options_arity(Options, Expected), 232 domain_error(row_arity(Expected), Arity). 233 234fields([F|T], Options) --> 235 field(F, Options), 236 ( separator(Options) 237 -> fields(T, Options) 238 ; end_of_record 239 -> { T = [] } 240 ). 241 242field(Value, Options) --> 243 "\"", 244 { csv_options_ignore_quotes(Options, false) }, 245 !, 246 string_codes(Codes), 247 { make_value(Codes, Value, Options) }. 248field(Value, Options) --> 249 { csv_options_strip(Options, true) }, 250 !, 251 stripped_field(Value, Options). 252field(Value, Options) --> 253 { csv_options_separator(Options, Sep) }, 254 field_codes(Codes, Sep), 255 { make_value(Codes, Value, Options) }. 256 257 258stripped_field(Value, Options) --> 259 ws, 260 ( "\"", 261 { csv_options_strip(Options, false) } 262 -> string_codes(Codes), 263 ws 264 ; { csv_options_separator(Options, Sep) }, 265 field_codes(Codes0, Sep), 266 { strip_trailing_ws(Codes0, Codes) } 267 ), 268 { make_value(Codes, Value, Options) }. 269 270ws --> " ", !, ws. 271ws --> "\t", !, ws. 272ws --> "". 273 274strip_trailing_ws(List, Stripped) :- 275 append(Stripped, WS, List), 276 all_ws(WS). 277 278all_ws([]). 279all_ws([32|T]) :- all_ws(T). 280all_ws([9|T]) :- all_ws(T).
288string_codes(List) --> 289 [H], 290 ( { H == 0'" } 291 -> ( "\"" 292 -> { List = [H|T] }, 293 string_codes(T) 294 ; { List = [] } 295 ) 296 ; { List = [H|T] }, 297 string_codes(T) 298 ). 299 300field_codes([], Sep), [Sep] --> [Sep], !. 301field_codes([], _), "\n" --> "\r\n", !. 302field_codes([], _), "\n" --> "\n", !. 303field_codes([], _), "\n" --> "\r", !. 304field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep). 305field_codes([], _) --> []. % unterminated last record
312make_value(Codes, Value, Options) :- 313 csv_options_convert(Options, Convert), 314 csv_options_case(Options, Case), 315 make_value(Convert, Case, Codes, Value). 316 317make_value(true, preserve, Codes, Value) :- 318 !, 319 name(Value, Codes). 320make_value(true, Case, Codes, Value) :- 321 !, 322 ( number_string(Value, Codes) 323 -> true 324 ; make_value(false, Case, Codes, Value) 325 ). 326make_value(false, preserve, Codes, Value) :- 327 !, 328 atom_codes(Value, Codes). 329make_value(false, down, Codes, Value) :- 330 !, 331 string_codes(String, Codes), 332 downcase_atom(String, Value). 333make_value(false, up, Codes, Value) :- 334 string_codes(String, Codes), 335 upcase_atom(String, Value). 336 337separator(Options) --> 338 { csv_options_separator(Options, Sep) }, 339 [Sep]. 340 341end_of_record --> "\n". % Unix files 342end_of_record --> "\r\n". % DOS files 343end_of_record --> "\r". % MacOS files 344end_of_record --> eof. % unterminated last record
In addition to the options of csv_read_file/3, this predicate processes the option:
368csv_read_file_row(File, Row, Options) :- 369 default_separator(File, Options, Options1), 370 make_csv_options(Options1, RecordOptions, Options2), 371 select_option(line(Line), Options2, RestOptions, _), 372 setup_call_cleanup( 373 open(File, read, Stream, RestOptions), 374 csv_read_stream_row(Stream, Row, Line, RecordOptions), 375 close(Stream)). 376 377csv_read_stream_row(Stream, Row, Line, Options) :- 378 between(1, infinite, Line), 379 ( csv_read_row(Stream, Row0, Options), 380 Row0 \== end_of_file 381 -> Row = Row0 382 ; !, 383 fail 384 ).
end_of_file
upon reaching the
end of the input.394csv_read_row(Stream, Row, _Record) :- 395 at_end_of_stream(Stream), 396 !, 397 Row = end_of_file. 398csv_read_row(Stream, Row, Record) :- 399 read_lines_to_codes(Stream, Codes, Record, even), 400 phrase(row(Row0, Record), Codes), 401 !, 402 Row = Row0. 403 404read_lines_to_codes(Stream, Codes, Options, QuoteQuantity) :- 405 read_line_to_codes(Stream, Codes0), 406 Codes0 \== end_of_file, 407 ( ( csv_options_ignore_quotes(Options, true) 408 ; check_quotes(Codes0, QuoteQuantity, even) 409 ) 410 -> Codes = Codes0 411 ; append(Codes0, [0'\n|Tail], Codes), 412 read_lines_to_codes(Stream, Tail, Options, odd) 413 ). 414 415check_quotes([], QuoteQuantity, QuoteQuantity) :- 416 !. 417check_quotes([0'"|T], odd, Result) :- 418 !, 419 check_quotes(T, even, Result). 420check_quotes([0'"|T], even, Result) :- 421 !, 422 check_quotes(T, odd, Result). 423check_quotes([_|T], QuoteQuantity, Result) :- 424 check_quotes(T, QuoteQuantity, Result).
434csv_options(Compiled, Options) :- 435 make_csv_options(Options, Compiled, _Ignored). 436 437 438 /******************************* 439 * OUTPUT * 440 *******************************/
\t
for
.tsv
files and ,
otherwise.450csv_write_file(File, Data) :- 451 csv_write_file(File, Data, []). 452 453csv_write_file(File, Data, Options) :- 454 must_be(list, Data), 455 default_separator(File, Options, Options1), 456 make_csv_options(Options1, Record, RestOptions), 457 phrase(emit_csv(Data, Record), String), 458 setup_call_cleanup( 459 open(File, write, Out, RestOptions), 460 format(Out, '~s', [String]), 461 close(Out)). 462 463 464emit_csv([], _) --> []. 465emit_csv([H|T], Options) --> 466 emit_row(H, Options), "\r\n", % RFC 4180 demands \r\n 467 emit_csv(T, Options). 468 469emit_row(Row, Options) --> 470 { Row =.. [_|Fields] }, 471 emit_fields(Fields, Options). 472 473emit_fields([H|T], Options) --> 474 emit_field(H, Options), 475 ( { T == [] } 476 -> [] 477 ; { csv_options_separator(Options, Sep) }, 478 [Sep], 479 emit_fields(T, Options) 480 ). 481 482emit_field(H, Options) --> 483 { ( atom(H) 484 -> atom_codes(H, Codes) 485 ; string(H) 486 -> string_codes(H, Codes) 487 ) 488 }, 489 !, 490 ( { needs_quotes(H, Options) } 491 -> "\"", emit_string(Codes), "\"" 492 ; emit_codes(Codes) 493 ). 494emit_field([], _) --> 495 !, 496 { atom_codes('[]', Codes) }, 497 emit_codes(Codes). 498emit_field(H, _) --> 499 { number_codes(H,Codes) }, 500 emit_codes(Codes). 501 502needs_quotes(Atom, _) :- 503 sub_atom(Atom, _, _, _, '"'), 504 !. 505needs_quotes(Atom, _) :- 506 sub_atom(Atom, _, _, _, '\n'), 507 !. 508needs_quotes(Atom, _) :- 509 sub_atom(Atom, _, _, _, '\r'), 510 !. 511needs_quotes(Atom, Options) :- 512 csv_options_separator(Options, Sep), 513 char_code(Char, Sep), 514 sub_atom(Atom, _, _, _, Char), 515 !. 516 517emit_string([]) --> "". 518emit_string([0'"|T]) --> !, "\"\"", emit_string(T). 519emit_string([H|T]) --> [H], emit_string(T). 520 521emit_codes([]) --> "". 522emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T). 523emit_codes([H|T]) --> [H], emit_codes(T).
save_data(File) :- setup_call_cleanup( open(File, write, Out), forall(data(C1,C2,C3), csv_write_stream(Out, [row(C1,C2,C3)], [])), close(Out)),
542csv_write_stream(Stream, Data, Options) :-
543 must_be(list, Data),
544 make_csv_options(Options, Record, _),
545 phrase(emit_csv(Data, Record), String),
546 format(Stream, '~s', [String])
Process CSV (Comma-Separated Values) data
This library parses and generates CSV data. CSV data is represented in Prolog as a list of rows. Each row is a compound term, where all rows have the same name and arity.