1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker and Matt Lilley 4 E-mail: J.Wielemaker@cs.vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2012-2016, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(archive, 36 [ archive_open/3, % +Stream, -Archive, +Options 37 archive_open/4, % +Stream, +Mode, -Archive, +Options 38 archive_create/3, % +OutputFile, +InputFileList, +Options 39 archive_close/1, % +Archive 40 archive_property/2, % +Archive, ?Property 41 archive_next_header/2, % +Archive, -Name 42 archive_open_entry/2, % +Archive, -EntryStream 43 archive_header_property/2, % +Archive, ?Property 44 archive_set_header_property/2, % +Archive, +Property 45 archive_extract/3, % +Archive, +Dir, +Options 46 47 archive_entries/2, % +Archive, -Entries 48 archive_data_stream/3 % +Archive, -DataStream, +Options 49 ]). 50:- use_module(library(error)). 51:- use_module(library(option)). 52:- use_module(library(filesex)). 53 54/** <module> Access several archive formats 55 56This library uses _libarchive_ to access a variety of archive formats. 57The following example lists the entries in an archive: 58 59 == 60 list_archive(File) :- 61 archive_open(File, Archive, []), 62 repeat, 63 ( archive_next_header(Archive, Path) 64 -> format('~w~n', [Path]), 65 fail 66 ; !, 67 archive_close(Archive) 68 ). 69 == 70 71@see http://code.google.com/p/libarchive/ 72*/ 73 74:- use_foreign_library(foreign(archive4pl)). 75 76%! archive_open(+Data, -Archive, +Options) is det. 77% 78% Wrapper around archive_open/4 that opens the archive in read mode. 79 80archive_open(Stream, Archive, Options) :- 81 archive_open(Stream, read, Archive, Options). 82 83:- predicate_options(archive_open/4, 4, 84 [ close_parent(boolean), 85 filter(oneof([all,bzip2,compress,gzip,grzip,lrzip, 86 lzip,lzma,lzop,none,rpm,uu,xz])), 87 format(oneof([all,'7zip',ar,cab,cpio,empty,gnutar, 88 iso9660,lha,mtree,rar,raw,tar,xar,zip])) 89 ]). 90:- predicate_options(archive_create/3, 3, 91 [ directory(atom), 92 pass_to(archive_open/4, 4) 93 ]). 94 95%! archive_open(+Data, +Mode, -Archive, +Options) is det. 96% 97% Open the archive in Data and unify Archive with a handle to the 98% opened archive. Data is either a file or a stream that contains 99% a valid archive. Details are controlled by Options. Typically, 100% the option close_parent(true) is used to close stream if the 101% archive is closed using archive_close/1. For other options, the 102% defaults are typically fine. The option format(raw) must be used 103% to process compressed streams that do not contain explicit 104% entries (e.g., gzip'ed data) unambibuously. The =raw= format 105% creates a _pseudo archive_ holding a single member named =data=. 106% 107% * close_parent(+Boolean) 108% If this option is =true= (default =false=), Stream is closed 109% if archive_close/1 is called on Archive. 110% 111% * compression(+Compression) 112% Synomym for filter(Compression). Deprecated. 113% 114% * filter(+Filter) 115% Support the indicated filter. This option may be 116% used multiple times to support multiple filters. In read mode, 117% If no filter options are provided, =all= is assumed. In write 118% mode, none is assumed. 119% Supported values are =all=, =bzip2=, =compress=, =gzip=, 120% =grzip=, =lrzip=, =lzip=, =lzma=, =lzop=, =none=, =rpm=, =uu= 121% and =xz=. The value =all= is default for read, =none= for write. 122% 123% * format(+Format) 124% Support the indicated format. This option may be used 125% multiple times to support multiple formats in read mode. 126% In write mode, you must supply a single format. If no format 127% options are provided, =all= is assumed for read mode. Note that 128% =all= does *not* include =raw=. To open both archive 129% and non-archive files, _both_ format(all) and 130% format(raw) must be specified. Supported values are: =all=, 131% =7zip=, =ar=, =cab=, =cpio=, =empty=, =gnutar=, =iso9660=, 132% =lha=, =mtree=, =rar=, =raw=, =tar=, =xar= and =zip=. The 133% value =all= is default for read. 134% 135% Note that the actually supported compression types and formats 136% may vary depending on the version and installation options of 137% the underlying libarchive library. This predicate raises a 138% domain error if the (explicitly) requested format is not 139% supported. 140% 141% @error domain_error(filter, Filter) if the requested 142% filter is not supported. 143% @error domain_error(format, Format) if the requested 144% format type is not supported. 145 146archive_open(stream(Stream), Mode, Archive, Options) :- 147 !, 148 archive_open_stream(Stream, Mode, Archive, Options). 149archive_open(Stream, Mode, Archive, Options) :- 150 is_stream(Stream), 151 !, 152 archive_open_stream(Stream, Mode, Archive, Options). 153archive_open(File, Mode, Archive, Options) :- 154 open(File, Mode, Stream, [type(binary)]), 155 catch(archive_open_stream(Stream, Mode, Archive, [close_parent(true)|Options]), 156 E, (close(Stream, [force(true)]), throw(E))). 157 158 159%! archive_close(+Archive) is det. 160% 161% Close the archive. If close_parent(true) is specified, the 162% underlying stream is closed too. If there is an entry opened 163% with archive_open_entry/2, actually closing the archive is 164% delayed until the stream associated with the entry is closed. 165% This can be used to open a stream to an archive entry without 166% having to worry about closing the archive: 167% 168% == 169% archive_open_named(ArchiveFile, EntryName, Stream) :- 170% archive_open(ArchiveFile, Handle, []), 171% archive_next_header(Handle, Name), 172% archive_open_entry(Handle, Stream), 173% archive_close(Archive). 174% == 175 176 177%! archive_property(+Handle, ?Property) is nondet. 178% 179% True when Property is a property of the archive Handle. Defined 180% properties are: 181% 182% * filters(List) 183% True when the indicated filters are applied before reaching 184% the archive format. 185 186archive_property(Handle, Property) :- 187 defined_archive_property(Property), 188 Property =.. [Name,Value], 189 archive_property(Handle, Name, Value). 190 191defined_archive_property(filter(_)). 192 193 194%! archive_next_header(+Handle, -Name) is semidet. 195% 196% Forward to the next entry of the archive for which Name unifies 197% with the pathname of the entry. Fails silently if the name of 198% the archive is reached before success. Name is typically 199% specified if a single entry must be accessed and unbound 200% otherwise. The following example opens a Prolog stream to a 201% given archive entry. Note that _Stream_ must be closed using 202% close/1 and the archive must be closed using archive_close/1 203% after the data has been used. See also setup_call_cleanup/3. 204% 205% == 206% open_archive_entry(ArchiveFile, Entry, Stream) :- 207% open(ArchiveFile, read, In, [type(binary)]), 208% archive_open(In, Archive, [close_parent(true)]), 209% archive_next_header(Archive, Entry), 210% archive_open_entry(Archive, Stream). 211% == 212% 213% @error permission_error(next_header, archive, Handle) if a 214% previously opened entry is not closed. 215 216%! archive_open_entry(+Archive, -Stream) is det. 217% 218% Open the current entry as a stream. Stream must be closed. 219% If the stream is not closed before the next call to 220% archive_next_header/2, a permission error is raised. 221 222 223%! archive_set_header_property(+Archive, +Property) 224% 225% Set Property of the current header. Write-mode only. Defined 226% properties are: 227% 228% * filetype(-Type) 229% Type is one of =file=, =link=, =socket=, =character_device=, 230% =block_device=, =directory= or =fifo=. It appears that this 231% library can also return other values. These are returned as 232% an integer. 233% * mtime(-Time) 234% True when entry was last modified at time. 235% * size(-Bytes) 236% True when entry is Bytes long. 237% * link_target(-Target) 238% Target for a link. Currently only supported for symbolic 239% links. 240 241%! archive_header_property(+Archive, ?Property) 242% 243% True when Property is a property of the current header. Defined 244% properties are: 245% 246% * filetype(-Type) 247% Type is one of =file=, =link=, =socket=, =character_device=, 248% =block_device=, =directory= or =fifo=. It appears that this 249% library can also return other values. These are returned as 250% an integer. 251% * mtime(-Time) 252% True when entry was last modified at time. 253% * size(-Bytes) 254% True when entry is Bytes long. 255% * link_target(-Target) 256% Target for a link. Currently only supported for symbolic 257% links. 258% * format(-Format) 259% Provides the name of the archive format applicable to the 260% current entry. The returned value is the lowercase version 261% of the output of archive_format_name(). 262% * permissions(-Integer) 263% True when entry has the indicated permission mask. 264 265archive_header_property(Archive, Property) :- 266 ( nonvar(Property) 267 -> true 268 ; header_property(Property) 269 ), 270 archive_header_prop_(Archive, Property). 271 272header_property(filetype(_)). 273header_property(mtime(_)). 274header_property(size(_)). 275header_property(link_target(_)). 276header_property(format(_)). 277header_property(permissions(_)). 278 279 280%! archive_extract(+ArchiveFile, +Dir, +Options) 281% 282% Extract files from the given archive into Dir. Supported 283% options: 284% 285% * remove_prefix(+Prefix) 286% Strip Prefix from all entries before extracting 287% * exclude(+ListOfPatterns) 288% Ignore members that match one of the given patterns. 289% Patterns are handed to wildcard_match/2. 290% 291% @error existence_error(directory, Dir) if Dir does not exist 292% or is not a directory. 293% @error domain_error(path_prefix(Prefix), Path) if a path in 294% the archive does not start with Prefix 295% @tbd Add options 296 297archive_extract(Archive, Dir, Options) :- 298 ( exists_directory(Dir) 299 -> true 300 ; existence_error(directory, Dir) 301 ), 302 setup_call_cleanup( 303 archive_open(Archive, Handle, Options), 304 extract(Handle, Dir, Options), 305 archive_close(Handle)). 306 307extract(Archive, Dir, Options) :- 308 archive_next_header(Archive, Path), 309 !, 310 ( archive_header_property(Archive, filetype(file)), 311 \+ excluded(Path, Options) 312 -> archive_header_property(Archive, permissions(Perm)), 313 ( option(remove_prefix(Remove), Options) 314 -> ( atom_concat(Remove, ExtractPath, Path) 315 -> true 316 ; domain_error(path_prefix(Remove), Path) 317 ) 318 ; ExtractPath = Path 319 ), 320 directory_file_path(Dir, ExtractPath, Target), 321 file_directory_name(Target, FileDir), 322 make_directory_path(FileDir), 323 setup_call_cleanup( 324 archive_open_entry(Archive, In), 325 setup_call_cleanup( 326 open(Target, write, Out, [type(binary)]), 327 copy_stream_data(In, Out), 328 close(Out)), 329 close(In)), 330 set_permissions(Perm, Target) 331 ; true 332 ), 333 extract(Archive, Dir, Options). 334extract(_, _, _). 335 336excluded(Path, Options) :- 337 option(exclude(Patterns), Options), 338 split_string(Path, "/", "/", Parts), 339 member(Segment, Parts), 340 Segment \== "", 341 member(Pattern, Patterns), 342 wildcard_match(Pattern, Segment). 343 344 345%! set_permissions(+Perm:integer, +Target:atom) 346% 347% Restore the permissions. Currently only restores the executable 348% permission. 349 350set_permissions(Perm, Target) :- 351 Perm /\ 0o100 =\= 0, 352 !, 353 '$mark_executable'(Target). 354set_permissions(_, _). 355 356 357 /******************************* 358 * HIGH LEVEL PREDICATES * 359 *******************************/ 360 361%! archive_entries(+Archive, -Paths) is det. 362% 363% True when Paths is a list of pathnames appearing in Archive. 364 365archive_entries(Archive, Paths) :- 366 setup_call_cleanup( 367 archive_open(Archive, Handle, []), 368 contents(Handle, Paths), 369 archive_close(Handle)). 370 371contents(Handle, [Path|T]) :- 372 archive_next_header(Handle, Path), 373 !, 374 contents(Handle, T). 375contents(_, []). 376 377%! archive_data_stream(+Archive, -DataStream, +Options) is nondet. 378% 379% True when DataStream is a stream to a data object inside 380% Archive. This predicate transparently unpacks data inside 381% _possibly nested_ archives, e.g., a _tar_ file inside a _zip_ 382% file. It applies the appropriate decompression filters and thus 383% ensures that Prolog reads the plain data from DataStream. 384% DataStream must be closed after the content has been processed. 385% Backtracking opens the next member of the (nested) archive. This 386% predicate processes the following options: 387% 388% - meta_data(-Data:list(dict)) 389% If provided, Data is unified with a list of filters applied to 390% the (nested) archive to open the current DataStream. The first 391% element describes the outermost archive. Each Data dict 392% contains the header properties (archive_header_property/2) as 393% well as the keys: 394% 395% - filters(Filters:list(atom)) 396% Filter list as obtained from archive_property/2 397% - name(Atom) 398% Name of the entry. 399% 400% Non-archive files are handled as pseudo-archives that hold a 401% single stream. This is implemented by using archive_open/3 with 402% the options `[format(all),format(raw)]`. 403 404archive_data_stream(Archive, DataStream, Options) :- 405 option(meta_data(MetaData), Options, _), 406 archive_content(Archive, DataStream, MetaData, []). 407 408archive_content(Archive, Entry, [EntryMetadata|PipeMetadataTail], PipeMetadata2) :- 409 archive_property(Archive, filter(Filters)), 410 repeat, 411 ( archive_next_header(Archive, EntryName) 412 -> findall(EntryProperty, 413 archive_header_property(Archive, EntryProperty), 414 EntryProperties), 415 dict_create(EntryMetadata, archive_meta_data, 416 [ filters(Filters), 417 name(EntryName) 418 | EntryProperties 419 ]), 420 ( EntryMetadata.filetype == file 421 -> archive_open_entry(Archive, Entry0), 422 ( EntryName == data, 423 EntryMetadata.format == raw 424 -> % This is the last entry in this nested branch. 425 % We therefore close the choicepoint created by repeat/0. 426 % Not closing this choicepoint would cause 427 % archive_next_header/2 to throw an exception. 428 !, 429 PipeMetadataTail = PipeMetadata2, 430 Entry = Entry0 431 ; PipeMetadataTail = PipeMetadata1, 432 open_substream(Entry0, 433 Entry, 434 PipeMetadata1, 435 PipeMetadata2) 436 ) 437 ; fail 438 ) 439 ; !, 440 fail 441 ). 442 443open_substream(In, Entry, ArchiveMetadata, PipeTailMetadata) :- 444 setup_call_cleanup( 445 archive_open(stream(In), 446 Archive, 447 [ close_parent(true), 448 format(all), 449 format(raw) 450 ]), 451 archive_content(Archive, Entry, ArchiveMetadata, PipeTailMetadata), 452 archive_close(Archive)). 453 454 455%! archive_create(+OutputFile, +InputFiles, +Options) is det. 456% 457% Convenience predicate to create an archive in OutputFile with 458% data from a list of InputFiles and the given Options. 459% 460% Besides options supported by archive_open/4, the following 461% options are supported: 462% 463% * directory(+Directory) 464% Changes the directory before adding input files. If this is 465% specified, paths of input files must be relative to 466% Directory and archived files will not have Directory 467% as leading path. This is to simulate =|-C|= option of 468% the =tar= program. 469% 470% * format(+Format) 471% Write mode supports the following formats: `7zip`, `cpio`, 472% `gnutar`, `iso9660`, `xar` and `zip`. Note that a particular 473% installation may support only a subset of these, depending on 474% the configuration of `libarchive`. 475 476archive_create(OutputFile, InputFiles, Options) :- 477 must_be(list(text), InputFiles), 478 option(directory(BaseDir), Options, '.'), 479 setup_call_cleanup( 480 archive_open(OutputFile, write, Archive, Options), 481 archive_create_1(Archive, BaseDir, BaseDir, InputFiles, top), 482 archive_close(Archive)). 483 484archive_create_1(_, _, _, [], _) :- !. 485archive_create_1(Archive, Base, Current, ['.'|Files], sub) :- 486 !, 487 archive_create_1(Archive, Base, Current, Files, sub). 488archive_create_1(Archive, Base, Current, ['..'|Files], Where) :- 489 !, 490 archive_create_1(Archive, Base, Current, Files, Where). 491archive_create_1(Archive, Base, Current, [File|Files], Where) :- 492 directory_file_path(Current, File, Filename), 493 archive_create_2(Archive, Base, Filename), 494 archive_create_1(Archive, Base, Current, Files, Where). 495 496archive_create_2(Archive, Base, Directory) :- 497 exists_directory(Directory), 498 !, 499 entry_name(Base, Directory, Directory0), 500 archive_next_header(Archive, Directory0), 501 time_file(Directory, Time), 502 archive_set_header_property(Archive, mtime(Time)), 503 archive_set_header_property(Archive, filetype(directory)), 504 archive_open_entry(Archive, EntryStream), 505 close(EntryStream), 506 directory_files(Directory, Files), 507 archive_create_1(Archive, Base, Directory, Files, sub). 508archive_create_2(Archive, Base, Filename) :- 509 entry_name(Base, Filename, Filename0), 510 archive_next_header(Archive, Filename0), 511 size_file(Filename, Size), 512 time_file(Filename, Time), 513 archive_set_header_property(Archive, size(Size)), 514 archive_set_header_property(Archive, mtime(Time)), 515 setup_call_cleanup( 516 archive_open_entry(Archive, EntryStream), 517 setup_call_cleanup( 518 open(Filename, read, DataStream, [type(binary)]), 519 copy_stream_data(DataStream, EntryStream), 520 close(DataStream)), 521 close(EntryStream)). 522 523entry_name('.', Name, Name) :- !. 524entry_name(Base, Name, EntryName) :- 525 directory_file_path(Base, EntryName, Name)