View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2003-2017, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_db,
   37          [ rdf_version/1,              % -Version
   38
   39            rdf/3,                      % ?Subject, ?Predicate, ?Object
   40            rdf/4,                      % ?Subject, ?Predicate, ?Object, ?DB
   41            rdf_has/3,                  % ?Subject, +Pred, ?Obj
   42            rdf_has/4,                  % ?Subject, +Pred, ?Obj, -RealPred
   43            rdf_reachable/3,            % ?Subject, +Pred, ?Object
   44            rdf_reachable/5,            % ?Subject, +Pred, ?Object, +MaxD, ?D
   45            rdf_resource/1,             % ?Resource
   46            rdf_subject/1,              % ?Subject
   47
   48            rdf_member_property/2,      % ?Property, ?Index
   49
   50            rdf_assert/3,               % +Subject, +Predicate, +Object
   51            rdf_assert/4,               % +Subject, +Predicate, +Object, +DB
   52            rdf_retractall/3,           % ?Subject, ?Predicate, ?Object
   53            rdf_retractall/4,           % ?Subject, ?Predicate, ?Object, +DB
   54            rdf_update/4,               % +Subject, +Predicate, +Object, +Act
   55            rdf_update/5,               % +Subject, +Predicate, +Object, +Src, +Act
   56            rdf_set_predicate/2,        % +Predicate, +Property
   57            rdf_predicate_property/2,   % +Predicate, ?Property
   58            rdf_current_predicate/1,    % -Predicate
   59            rdf_current_literal/1,      % -Literal
   60            rdf_transaction/1,          % :Goal
   61            rdf_transaction/2,          % :Goal, +Id
   62            rdf_transaction/3,          % :Goal, +Id, +Options
   63            rdf_active_transaction/1,   % ?Id
   64
   65            rdf_monitor/2,              % :Goal, +Options
   66
   67            rdf_save_db/1,              % +File
   68            rdf_save_db/2,              % +File, +DB
   69            rdf_load_db/1,              % +File
   70            rdf_reset_db/0,
   71
   72            rdf_node/1,                 % -Id
   73            rdf_bnode/1,                % -Id
   74            rdf_is_bnode/1,             % +Id
   75
   76            rdf_is_resource/1,          % +Term
   77            rdf_is_literal/1,           % +Term
   78            rdf_literal_value/2,        % +Term, -Value
   79
   80            rdf_load/1,                 % +File
   81            rdf_load/2,                 % +File, +Options
   82            rdf_save/1,                 % +File
   83            rdf_save/2,                 % +File, +Options
   84            rdf_unload/1,               % +File
   85            rdf_unload_graph/1,         % +Graph
   86
   87            rdf_md5/2,                  % +DB, -MD5
   88            rdf_atom_md5/3,             % +Text, +Times, -MD5
   89
   90            rdf_create_graph/1,         % ?Graph
   91            rdf_graph_property/2,       % ?Graph, ?Property
   92            rdf_set_graph/2,            % +Graph, +Property
   93            rdf_graph/1,                % ?Graph
   94            rdf_source/1,               % ?File
   95            rdf_source/2,               % ?DB, ?SourceURL
   96            rdf_make/0,                 % Reload modified databases
   97            rdf_gc/0,                   % Garbage collection
   98
   99            rdf_source_location/2,      % +Subject, -Source
  100            rdf_statistics/1,           % -Key
  101            rdf_set/1,                  % +Term
  102            rdf_generation/1,           % -Generation
  103            rdf_snapshot/1,             % -Snapshot
  104            rdf_delete_snapshot/1,      % +Snapshot
  105            rdf_current_snapshot/1,     % +Snapshot
  106            rdf_estimate_complexity/4,  % +S,+P,+O,-Count
  107
  108            rdf_save_subject/3,         % +Stream, +Subject, +DB
  109            rdf_save_header/2,          % +Out, +Options
  110            rdf_save_footer/1,          % +Out
  111
  112            rdf_equal/2,                % ?Resource, ?Resource
  113            lang_equal/2,               % +Lang1, +Lang2
  114            lang_matches/2,             % +Lang, +Pattern
  115
  116            rdf_prefix/2,               % :Alias, +URI
  117            rdf_current_prefix/2,       % :Alias, ?URI
  118            rdf_register_prefix/2,      % +Alias, +URI
  119            rdf_register_prefix/3,      % +Alias, +URI, +Options
  120            rdf_current_ns/2,           % :Alias, ?URI
  121            rdf_register_ns/2,          % +Alias, +URI
  122            rdf_register_ns/3,          % +Alias, +URI, +Options
  123            rdf_global_id/2,            % ?NS:Name, :Global
  124            rdf_global_object/2,        % +Object, :NSExpandedObject
  125            rdf_global_term/2,          % +Term, :WithExpandedNS
  126
  127            rdf_compare/3,              % -Dif, +Object1, +Object2
  128            rdf_match_label/3,          % +How, +String, +Label
  129            rdf_split_url/3,            % ?Base, ?Local, ?URL
  130            rdf_url_namespace/2,        % +URL, ?Base
  131
  132            rdf_warm_indexes/0,
  133            rdf_warm_indexes/1,         % +Indexed
  134            rdf_update_duplicates/0,
  135
  136            rdf_debug/1,                % Set verbosity
  137
  138            rdf_new_literal_map/1,      % -Handle
  139            rdf_destroy_literal_map/1,  % +Handle
  140            rdf_reset_literal_map/1,    % +Handle
  141            rdf_insert_literal_map/3,   % +Handle, +Key, +Literal
  142            rdf_insert_literal_map/4,   % +Handle, +Key, +Literal, -NewKeys
  143            rdf_delete_literal_map/3,   % +Handle, +Key, +Literal
  144            rdf_delete_literal_map/2,   % +Handle, +Key
  145            rdf_find_literal_map/3,     % +Handle, +KeyList, -Literals
  146            rdf_keys_in_literal_map/3,  % +Handle, +Spec, -Keys
  147            rdf_statistics_literal_map/2, % +Handle, +Name(-Arg...)
  148
  149            rdf_graph_prefixes/2,       % ?Graph, -Prefixes
  150            rdf_graph_prefixes/3,       % ?Graph, -Prefixes, :Filter
  151
  152            (rdf_meta)/1,               % +Heads
  153            op(1150, fx, (rdf_meta))
  154          ]).  155:- use_module(library(rdf)).  156:- use_module(library(lists)).  157:- use_module(library(pairs)).  158:- use_module(library(shlib)).  159:- use_module(library(gensym)).  160:- use_module(library(sgml)).  161:- use_module(library(sgml_write)).  162:- use_module(library(option)).  163:- use_module(library(error)).  164:- use_module(library(uri)).  165:- use_module(library(debug)).  166:- use_module(library(apply)).  167:- use_module(library(xsdp_types)).  168:- if(exists_source(library(thread))).  169:- use_module(library(thread)).  170:- endif.  171:- use_module(library(semweb/rdf_cache)).  172
  173:- use_foreign_library(foreign(rdf_db)).  174:- public rdf_print_predicate_cloud/2.  % print matrix of reachable predicates
  175
  176:- meta_predicate
  177    rdf_current_prefix(:, -),
  178    rdf_current_ns(:, -),
  179    rdf_global_id(?, :),
  180    rdf_global_term(+, :),
  181    rdf_global_object(+, :),
  182    rdf_transaction(0),
  183    rdf_transaction(0, +),
  184    rdf_transaction(0, +, +),
  185    rdf_monitor(1, +),
  186    rdf_save(+, :),
  187    rdf_load(+, :).  188
  189:- predicate_options(rdf_graph_prefixes/3, 3,
  190                     [expand(callable), filter(callable), min_count(nonneg)]).  191:- predicate_options(rdf_load/2, 2,
  192                     [ base_uri(atom),
  193                       cache(boolean),
  194                       concurrent(positive_integer),
  195                       db(atom),
  196                       format(oneof([xml,triples,turtle,trig,nquads,ntriples])),
  197                       graph(atom),
  198                       if(oneof([true,changed,not_loaded])),
  199                       modified(-float),
  200                       silent(boolean),
  201                       register_namespaces(boolean)
  202                     ]).  203:- predicate_options(rdf_register_ns/3, 3, [force(boolean), keep(boolean)]).  204:- predicate_options(rdf_save/2, 2,
  205                     [ graph(atom),
  206                       db(atom),
  207                       anon(boolean),
  208                       base_uri(atom),
  209                       write_xml_base(boolean),
  210                       convert_typed_literal(callable),
  211                       encoding(encoding),
  212                       document_language(atom),
  213                       namespaces(list(atom)),
  214                       xml_attributes(boolean),
  215                       inline(boolean)
  216                     ]).  217:- predicate_options(rdf_save_header/2, 2,
  218                     [ graph(atom),
  219                       db(atom),
  220                       namespaces(list(atom))
  221                     ]).  222:- predicate_options(rdf_save_subject/3, 3,
  223                     [ graph(atom),
  224                       base_uri(atom),
  225                       convert_typed_literal(callable),
  226                       document_language(atom)
  227                     ]).  228:- predicate_options(rdf_transaction/3, 3,
  229                     [ snapshot(any)
  230                     ]).  231
  232:- multifile ns/2.  233:- dynamic   ns/2.                      % ID, URL
  234:- discontiguous
  235    term_expansion/2.  236
  237/** <module> Core RDF database
  238
  239The file library(semweb/rdf_db) provides the core  of the SWI-Prolog RDF
  240store.
  241
  242@deprecated     New applications should use library(semweb/rdf11), which
  243                provides a much more intuitive API to the RDF store, notably
  244                for handling literals.  The library(semweb/rdf11) runs
  245                currently on top of this library and both can run side-by-side
  246                in the same application.  Terms retrieved from the database
  247                however have a different shape and can not be exchanged without
  248                precautions.
  249*/
  250
  251                 /*******************************
  252                 *           PREFIXES           *
  253                 *******************************/
  254
  255%!  rdf_current_prefix(:Alias, ?URI) is nondet.
  256%
  257%   Query   predefined   prefixes   and    prefixes   defined   with
  258%   rdf_register_prefix/2   and   local   prefixes    defined   with
  259%   rdf_prefix/2. If Alias is unbound and one   URI is the prefix of
  260%   another, the longest is returned first.   This  allows turning a
  261%   resource into a prefix/local couple using the simple enumeration
  262%   below. See rdf_global_id/2.
  263%
  264%     ==
  265%     rdf_current_prefix(Prefix, Expansion),
  266%     atom_concat(Expansion, Local, URI),
  267%     ==
  268
  269rdf_current_prefix(Module:Alias, URI) :-
  270    nonvar(Alias),
  271    !,
  272    rdf_current_prefix(Module, Alias, URI),
  273    !.
  274rdf_current_prefix(Module:Alias, URI) :-
  275    rdf_current_prefix(Module, Alias, URI).
  276
  277rdf_current_prefix(system, Alias, URI) :-
  278    !,
  279    ns(Alias, URI).
  280rdf_current_prefix(Module, Alias, URI) :-
  281    default_module(Module, M),
  282    (   M == system
  283    ->  ns(Alias, URI)
  284    ;   '$flushed_predicate'(M:'rdf prefix'(_,_)),
  285        call(M:'rdf prefix'(Alias,URI))
  286    ).
  287
  288%!  rdf_prefix(:Alias, +URI) is det.
  289%
  290%   Register a _local_ prefix.  This   declaration  takes precedence
  291%   over globally defined prefixes   using  rdf_register_prefix/2,3.
  292%   Module local prefixes are notably required   to deal with SWISH,
  293%   where users need to  be  able   to  have  independent  namespace
  294%   declarations.
  295
  296rdf_prefix(Alias, URI) :-
  297    throw(error(context_error(nodirective, rdf_prefix(Alias, URI)), _)).
  298
  299system:term_expansion((:- rdf_prefix(AliasSpec, URI)), Clauses) :-
  300    prolog_load_context(module, Module),
  301    strip_module(Module:AliasSpec, TM, Alias),
  302    must_be(atom, Alias),
  303    must_be(atom, URI),
  304    (   rdf_current_prefix(TM:Alias, URI)
  305    ->  Clauses = []
  306    ;   TM == Module
  307    ->  Clauses = 'rdf prefix'(Alias, URI)
  308    ;   Clauses = TM:'rdf prefix'(Alias, URI)
  309    ).
  310
  311%!  ns(?Alias, ?URI) is nondet.
  312%
  313%   Dynamic and multifile predicate that   maintains  the registered
  314%   namespace aliases.
  315%
  316%   @deprecated New code  must  modify   the  namespace  table using
  317%   rdf_register_ns/3 and query using rdf_current_ns/2.
  318
  319ns(dc,      'http://purl.org/dc/elements/1.1/').
  320ns(dcterms, 'http://purl.org/dc/terms/').
  321ns(eor,     'http://dublincore.org/2000/03/13/eor#').
  322ns(foaf,    'http://xmlns.com/foaf/0.1/').
  323ns(owl,     'http://www.w3.org/2002/07/owl#').
  324ns(rdf,     'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
  325ns(rdfs,    'http://www.w3.org/2000/01/rdf-schema#').
  326ns(serql,   'http://www.openrdf.org/schema/serql#').
  327ns(skos,    'http://www.w3.org/2004/02/skos/core#').
  328ns(void,    'http://rdfs.org/ns/void#').
  329ns(xsd,     'http://www.w3.org/2001/XMLSchema#').
  330
  331%!  rdf_register_prefix(+Prefix, +URI) is det.
  332%!  rdf_register_prefix(+Prefix, +URI, +Options) is det.
  333%
  334%   Register Prefix as an abbreviation for URI. Options:
  335%
  336%           * force(Boolean)
  337%           If =true=, Replace existing namespace alias. Please note
  338%           that replacing a namespace is dangerous as namespaces
  339%           affect preprocessing. Make sure all code that depends on
  340%           a namespace is compiled after changing the registration.
  341%
  342%           * keep(Boolean)
  343%           If =true= and Alias is already defined, keep the
  344%           original binding for Prefix and succeed silently.
  345%
  346%   Without options, an attempt  to  redefine   an  alias  raises  a
  347%   permission error.
  348%
  349%   Predefined prefixes are:
  350%
  351%   | **Alias** | **IRI prefix**                              |
  352%   | dc        | http://purl.org/dc/elements/1.1/            |
  353%   | dcterms   | http://purl.org/dc/terms/                   |
  354%   | eor       | http://dublincore.org/2000/03/13/eor#       |
  355%   | foaf      | http://xmlns.com/foaf/0.1/                  |
  356%   | owl       | http://www.w3.org/2002/07/owl#              |
  357%   | rdf       | http://www.w3.org/1999/02/22-rdf-syntax-ns# |
  358%   | rdfs      | http://www.w3.org/2000/01/rdf-schema#       |
  359%   | serql     | http://www.openrdf.org/schema/serql#        |
  360%   | skos      | http://www.w3.org/2004/02/skos/core#        |
  361%   | void      | http://rdfs.org/ns/void#                    |
  362%   | xsd       | http://www.w3.org/2001/XMLSchema#           |
  363
  364
  365rdf_register_prefix(Alias, URI) :-
  366    rdf_register_prefix(Alias, URI, []).
  367
  368rdf_register_prefix(Alias, URI, Options) :-
  369    must_be(atom, Alias),
  370    must_be(atom, URI),
  371    (   rdf_current_prefix(system:Alias, URI)
  372    ->  true
  373    ;   register_global_prefix(Alias, URI, Options)
  374    ).
  375
  376%!  register_global_prefix(+Alias, +URI, +Options)
  377%
  378%   Register a global prefix.
  379
  380register_global_prefix(Alias, URI, Options) :-
  381    ns(Alias, _),
  382    !,
  383    (   option(force(true), Options, false)
  384    ->  retractall(ns(Alias, _)),
  385        rdf_register_prefix(Alias, URI, Options),
  386        rdf_empty_prefix_cache
  387    ;   option(keep(true), Options, false)
  388    ->  true
  389    ;   throw(error(permission_error(register, namespace, Alias),
  390                    context(_, 'Already defined')))
  391    ).
  392register_global_prefix(Alias, URI, _) :-
  393    findall(P-U, prefix_conflict(URI, P, U), Pairs),
  394    order_prefixes([Alias-URI|Pairs], Ordered),
  395    forall(member(P-U, Pairs), retract(ns(P,U))),
  396    forall(member(P-U, Ordered), assert(ns(P,U))).
  397
  398prefix_conflict(URI, P, U) :-
  399    ns(P,U),
  400    (   sub_atom(URI, 0, _, _, U)
  401    ->  true
  402    ;   sub_atom(U, 0, _, _, URI)
  403    ).
  404
  405order_prefixes(Pairs, Sorted) :-
  406    map_list_to_pairs(prefix_uri_length, Pairs, ByLen),
  407    sort(1, >=, ByLen, SortedByLen),
  408    pairs_values(SortedByLen, Sorted).
  409
  410prefix_uri_length(_-URI, Len) :-
  411    atom_length(URI, Len).
  412
  413%!  rdf_current_ns(:Prefix, ?URI) is nondet.
  414%
  415%   @deprecated.  Use rdf_current_prefix/2.
  416
  417rdf_current_ns(Prefix, URI) :-
  418    rdf_current_prefix(Prefix, URI).
  419
  420%!  rdf_register_ns(:Prefix, ?URI) is det.
  421%!  rdf_register_ns(:Prefix, ?URI, +Options) is det.
  422%
  423%   Register an RDF prefix.
  424%
  425%   @deprecated. Use rdf_register_prefix/2 or rdf_register_prefix/3.
  426
  427rdf_register_ns(Prefix, URI) :-
  428    rdf_register_prefix(Prefix, URI).
  429rdf_register_ns(Prefix, URI, Options) :-
  430    rdf_register_prefix(Prefix, URI, Options).
  431
  432
  433%!  register_file_ns(+Map:list(pair)) is det.
  434%
  435%   Register a namespace as encounted in   the  namespace list of an
  436%   RDF document. We only register if  both the abbreviation and URL
  437%   are not already known. Is there a   better  way? This code could
  438%   also do checks on the consistency   of  RDF and other well-known
  439%   namespaces.
  440%
  441%   @tbd    Better error handling
  442
  443register_file_ns([]) :- !.
  444register_file_ns([Decl|T]) :-
  445    !,
  446    register_file_ns(Decl),
  447    register_file_ns(T).
  448register_file_ns([]=_) :- !.            % xmlns= (overall default)
  449register_file_ns(NS=URL) :-            % compatibility
  450    !,
  451    register_file_ns(NS-URL).
  452register_file_ns(NS-URL) :-
  453    (   ns(NS, URL)
  454    ->  true
  455    ;   ns(NS, _)
  456    ->  true                        % redefined abbreviation
  457    ;   ns(_, URL)
  458    ->  true                        % redefined URL
  459    ;   rdf_register_ns(NS, URL)
  460    ).
  461
  462
  463%!  rdf_global_id(?IRISpec, :IRI) is semidet.
  464%
  465%   Convert between Prefix:Local and full IRI   (an atom). If IRISpec is
  466%   an atom, it  is  simply  unified   with  IRI.  This  predicate fails
  467%   silently if IRI is an RDF literal.
  468%
  469%   Note that this predicate is a meta-predicate on its output argument.
  470%   This is necessary to get the module context while the first argument
  471%   may be of the form (:)/2. The above mode description is correct, but
  472%   should be interpreted as (?,?).
  473%
  474%   @error existence_error(rdf_prefix, Prefix)
  475%   @see   rdf_equal/2 provides a compile time alternative
  476%   @see   The rdf_meta/1 directive asks for compile time expansion
  477%          of arguments.
  478%   @bug   Error handling is incomplete.  In its current implementation
  479%	   the same code is used for compile-time expansion and to
  480%	   facilitate runtime conversion and checking.  These use cases
  481%	   have different requirements.
  482
  483rdf_global_id(Id, Module:Global) :-
  484    rdf_global_id(Id, Global, Module).
  485
  486rdf_global_id(NS:Local, Global, Module) :-
  487    global(NS, Local, Global, Module),
  488    !.
  489rdf_global_id(Global, Global, _).
  490
  491
  492%!  rdf_global_object(+Object, :GlobalObject) is semidet.
  493%!  rdf_global_object(-Object, :GlobalObject) is semidet.
  494%
  495%   Same as rdf_global_id/2,  but  intended   for  dealing  with the
  496%   object part of a  triple,  in   particular  the  type  for typed
  497%   literals. Note that the predicate  is   a  meta-predicate on the
  498%   output argument. This is necessary  to   get  the module context
  499%   while the first argument may be of the form (:)/2.
  500%
  501%   @error  existence_error(rdf_prefix, Prefix)
  502
  503rdf_global_object(Object, Module:GlobalObject) :-
  504    rdf_global_object(Object, GlobalObject, Module).
  505
  506rdf_global_object(Var, Global, _M) :-
  507    var(Var),
  508    !,
  509    Global = Var.
  510rdf_global_object(Prefix:Local, Global, M) :-
  511    global(Prefix, Local, Global, M),
  512    !.
  513rdf_global_object(literal(type(Prefix:Local, Value)),
  514                  literal(type(Global, Value)), M) :-
  515    global(Prefix, Local, Global, M),
  516    !.
  517rdf_global_object(^^(Value,Prefix:Local),
  518                  ^^(Value,Global), M) :-
  519    global(Prefix, Local, Global, M),
  520    !.
  521rdf_global_object(literal(Query0, type(Prefix:Local, Value)),
  522                  literal(Query1, type(Global, Value)), M) :-
  523    global(Prefix, Local, Global, M),
  524    !,
  525    rdf_global_term(Query0, Query1, M).
  526rdf_global_object(literal(Query0, Value),
  527                  literal(Query1, Value), M) :-
  528    !,
  529    rdf_global_term(Query0, Query1, M).
  530rdf_global_object(Global, Global, _).
  531
  532global(Prefix, Local, Global, Module) :-
  533    (   atom(Global)
  534    ->  rdf_current_prefix(Module:Prefix, Full),
  535        atom_concat(Full, Local, Global)
  536    ;   atom(Prefix), atom(Local), var(Global)
  537    ->  (   rdf_current_prefix(Module:Prefix, Full)
  538        *-> atom_concat(Full, Local, Global)
  539        ;   current_prolog_flag(xref, true)
  540        ->  Global = Prefix:Local
  541        ;   existence_error(rdf_prefix, Prefix)
  542        )
  543    ).
  544
  545
  546%!  rdf_global_term(+TermIn, :GlobalTerm) is det.
  547%
  548%   Performs rdf_global_id/2 on predixed IRIs and rdf_global_object/2 on
  549%   RDF literals, by recursively  analysing  the   term.  Note  that the
  550%   predicate is a meta-predicate  on  the   output  argument.  This  is
  551%   necessary to get the module context while  the first argument may be
  552%   of the form (:)/2.
  553%
  554%   Terms of the form `Prefix:Local`  that   appear  in TermIn for which
  555%   `Prefix` is not defined are not replaced. Unlike rdf_global_id/2 and
  556%   rdf_global_object/2, no error is raised.
  557
  558rdf_global_term(TermIn, Module:TermOut) :-
  559    rdf_global_term(TermIn, TermOut, Module).
  560
  561rdf_global_term(Var, Var, _M) :-
  562    var(Var),
  563    !.
  564rdf_global_term(Prefix:Local, Global, Module) :-
  565    atom(Prefix), atom(Local),
  566    rdf_current_prefix(Module:Prefix, Full),
  567    !,
  568    atom_concat(Full, Local, Global).
  569rdf_global_term([H0|T0], [H|T], M) :-
  570    !,
  571    rdf_global_term(H0, H, M),
  572    rdf_global_term(T0, T, M).
  573rdf_global_term(Term0, Term, M) :-
  574    compound(Term0),
  575    !,
  576    Term0 =.. [H|L0],
  577    rdf_global_term(L0, L, M),
  578    Term =.. [H|L].
  579rdf_global_term(Term, Term, _).
  580
  581%!  rdf_global_graph(+TermIn, -GlobalTerm, +Module) is det.
  582%
  583%   Preforms rdf_global_id/2 on rdf/4, etc graph arguments
  584
  585rdf_global_graph(Prefix:Local, Global, Module) :-
  586    atom(Prefix), atom(Local),
  587    !,
  588    global(Prefix, Local, Global, Module).
  589rdf_global_graph(G, G, _).
  590
  591
  592                 /*******************************
  593                 *            EXPANSION         *
  594                 *******************************/
  595
  596:- multifile
  597    system:term_expansion/2,
  598    system:goal_expansion/2.  599
  600system:term_expansion((:- rdf_meta(Heads)), Clauses) :-
  601    prolog_load_context(module, M),
  602    phrase(mk_clauses(Heads, M), Clauses).
  603
  604mk_clauses((A,B), M) -->
  605    mk_clause(A, M),
  606    mk_clauses(B, M).
  607mk_clauses(A, M) -->
  608    mk_clause(A, M).
  609
  610mk_clause(Head0, M0) -->
  611    { strip_module(M0:Head0, Module, Head),
  612      valid_rdf_meta_head(Head),
  613      functor(Head, Name, Arity),
  614      functor(Unbound, Name, Arity),
  615      qualify(Module, 'rdf meta specification'/2, Decl)
  616    },
  617    [ (:- multifile(Decl)),
  618      Module:'rdf meta specification'(Unbound, Head)
  619    ].
  620
  621qualify(Module, Decl, Decl) :-
  622    prolog_load_context(module, Module),
  623    !.
  624qualify(Module, Decl, Module:Decl).
  625
  626
  627valid_rdf_meta_head(Head) :-
  628    callable(Head),
  629    !,
  630    Head =.. [_|Args],
  631    valid_args(Args).
  632valid_rdf_meta_head(Head) :-
  633    throw(error(type_error(callable, Head), _)).
  634
  635valid_args([]).
  636valid_args([H|T]) :-
  637    valid_arg(H),
  638    !,
  639    valid_args(T).
  640
  641valid_arg(:).                           % meta argument
  642valid_arg(+).                           % non-var
  643valid_arg(-).                           % var
  644valid_arg(?).                           % either var or non-var
  645valid_arg(@).                           % not modified
  646valid_arg(r).                           % RDF resource
  647valid_arg(o).                           % RDF object
  648valid_arg(t).                           % term with RDF resources
  649valid_arg(g).                           % graph argument
  650valid_arg(A) :-
  651    throw(error(type_error(rdf_meta_argument, A), _)).
  652
  653%!  rdf_meta(+Heads)
  654%
  655%   This  directive  defines  the  argument    types  of  the  named
  656%   predicates, which will force compile   time  namespace expansion
  657%   for these predicates. Heads is a coma-separated list of callable
  658%   terms. Defined argument properties are:
  659%
  660%     $ : :
  661%     Argument is a goal. The goal is processed using expand_goal/2,
  662%     recursively applying goal transformation on the argument.
  663%
  664%     $ + :
  665%     The argument is instantiated at entry. Nothing is changed.
  666%
  667%     $ - :
  668%     The argument is not instantiated at entry. Nothing is changed.
  669%
  670%     $ ? :
  671%     The argument is unbound or instantiated at entry. Nothing is
  672%     changed.
  673%
  674%     $ @ :
  675%     The argument is not changed.
  676%
  677%     $ r :
  678%     The argument must be a resource. If it is a term
  679%     _prefix_:_local_ it is translated.
  680%
  681%     $ o :
  682%     The argument is an object or resource. See
  683%     rdf_global_object/2.
  684%
  685%     $ t :
  686%     The argument is a term that must be translated. Expansion will
  687%     translate all occurences of _prefix_:_local_ appearing
  688%     anywhere in the term. See rdf_global_term/2.
  689%
  690%   As it is subject to term_expansion/2, the rdf_meta/1 declaration
  691%   can only be used as a directive. The directive must be processed
  692%   before the definition of  the  predicates   as  well  as  before
  693%   compiling code that  uses  the   rdf  meta-predicates.  The atom
  694%   =rdf_meta=  is  declared   as   an    operator   exported   from
  695%   library(semweb/rdf_db). Files using rdf_meta/1  must explicitely
  696%   load this library.
  697%
  698%   Beginning with SWI-Prolog 7.3.17, the   low-level  RDF interface
  699%   (rdf/3,  rdf_assert/3,  etc.)  perform    runtime  expansion  of
  700%   `Prefix:Local` terms. This eliminates the   need  for rdf_meta/1
  701%   for  simple  cases.  However,  runtime   expansion  comes  at  a
  702%   significant overhead and having two  representations for IRIs (a
  703%   plain atom and  a  term   `Prefix:Local`)  implies  that  simple
  704%   operations such as comparison of IRIs   no  longer map to native
  705%   Prolog operations such as `IRI1 == IRI2`.
  706
  707rdf_meta(Heads) :-
  708    throw(error(context_error(nodirective, rdf_meta(Heads)), _)).
  709
  710%!  rdf_meta_specification(+General, +Module, -Spec) is semidet.
  711%
  712%   True when Spec is the RDF meta specification for Module:General.
  713%
  714%   @arg    General is the term Spec with all arguments replaced with
  715%           variables.
  716
  717rdf_meta_specification(Unbounded, Module, Spec) :-
  718    '$flushed_predicate'(Module:'rdf meta specification'(_,_)),
  719    call(Module:'rdf meta specification'(Unbounded, Spec)).
  720
  721system:goal_expansion(G, Expanded) :-
  722    \+ predicate_property(G, iso),
  723    prolog_load_context(module, LM),
  724    predicate_property(LM:G, implementation_module(IM)),
  725    rdf_meta_specification(G, IM, Spec),
  726    rdf_expand(G, Spec, Expanded, LM).
  727
  728system:term_expansion(Fact, Expanded) :-
  729    prolog_load_context(module, Module),
  730    rdf_meta_specification(Fact, Module, Spec),
  731    rdf_expand(Fact, Spec, Expanded, Module),
  732    Fact \== Expanded.
  733system:term_expansion((Head :- Body), (Expanded :- Body)) :-
  734    prolog_load_context(module, Module),
  735    rdf_meta_specification(Head, Module, Spec),
  736    rdf_expand(Head, Spec, Expanded, Module),
  737    Head \== Expanded.
  738
  739rdf_expand(G, Spec, Expanded, M) :-
  740    functor(G, Name, Arity),
  741    functor(Expanded, Name, Arity),
  742    rdf_expand_args(0, Arity, G, Spec, Expanded, M).
  743
  744rdf_expand_args(Arity, Arity, _, _, _, _) :- !.
  745rdf_expand_args(I0, Arity, Goal, Spec, Expanded, M) :-
  746    I is I0 + 1,
  747    arg(I, Goal, GA),
  748    arg(I, Spec, SA),
  749    arg(I, Expanded, EA),
  750    rdf_expand_arg(SA, GA, EA, M),
  751    rdf_expand_args(I, Arity, Goal, Spec, Expanded, M).
  752
  753rdf_expand_arg(r, A, E, M) :-
  754    mk_global(A, E, M),
  755    !.
  756rdf_expand_arg(o, A, E, M) :-
  757    rdf_global_object(A, E, M),
  758    !.
  759rdf_expand_arg(t, A, E, M) :-
  760    rdf_global_term(A, E, M),
  761    !.
  762rdf_expand_arg(g, A, E, M) :-
  763    rdf_global_graph(A, E, M),
  764    !.
  765rdf_expand_arg(:, A, E, _M) :-
  766    !,
  767    expand_goal(A, E).
  768rdf_expand_arg(_, A, A, _M).
  769
  770%!  mk_global(+Src, -Resource, +Module)
  771%
  772%   Realised rdf_global_id(+, -), but adds compiletime checking,
  773%   notably to see whether a namespace is not yet defined.
  774
  775mk_global(X, X, _) :-
  776    var(X),
  777    !.
  778mk_global(X, X, _) :-
  779    atom(X),
  780    !.
  781mk_global(Prefix:Local, Global, Module) :-
  782    must_be(atom, Prefix),
  783    must_be(atom, Local),
  784    (   rdf_current_prefix(Module:Prefix, Full)
  785    ->  atom_concat(Full, Local, Global)
  786    ;   current_prolog_flag(xref, true)
  787    ->  Global = Prefix:Local
  788    ;   existence_error(rdf_prefix, Prefix)
  789    ).
  790
  791:- rdf_meta
  792    rdf(r,r,o),
  793    rdf_has(r,r,o,r),
  794    rdf_has(r,r,o),
  795    rdf_assert(r,r,o),
  796    rdf_retractall(r,r,o),
  797    rdf(r,r,o,?),
  798    rdf_assert(r,r,o,+),
  799    rdf_retractall(r,r,o,?),
  800    rdf_reachable(r,r,o),
  801    rdf_reachable(r,r,o,+,?),
  802    rdf_update(r,r,o,t),
  803    rdf_update(r,r,o,+,t),
  804    rdf_equal(o,o),
  805    rdf_source_location(r,-),
  806    rdf_resource(r),
  807    rdf_subject(r),
  808    rdf_create_graph(r),
  809    rdf_graph(r),
  810    rdf_graph_property(r,?),
  811    rdf_set_graph(r,+),
  812    rdf_unload_graph(r),
  813    rdf_set_predicate(r, t),
  814    rdf_predicate_property(r, -),
  815    rdf_estimate_complexity(r,r,r,-),
  816    rdf_print_predicate_cloud(r,+).  817
  818%!  rdf_equal(?Resource1, ?Resource2)
  819%
  820%   Simple equality test to exploit goal-expansion
  821
  822rdf_equal(Resource, Resource).
  823
  824%!  lang_equal(+Lang1, +Lang2) is semidet.
  825%
  826%   True if two RFC language specifiers denote the same language
  827%
  828%   @see lang_matches/2.
  829
  830lang_equal(Lang, Lang) :- !.
  831lang_equal(Lang1, Lang2) :-
  832    downcase_atom(Lang1, LangCannon),
  833    downcase_atom(Lang2, LangCannon).
  834
  835%!  lang_matches(+Lang, +Pattern) is semidet.
  836%
  837%   True if Lang  matches  Pattern.   This  implements  XML language
  838%   matching  conform  RFC  4647.   Both    Lang   and  Pattern  are
  839%   dash-separated strings of  identifiers  or   (for  Pattern)  the
  840%   wildcart *. Identifiers are  matched   case-insensitive  and a *
  841%   matches any number of identifiers. A   short pattern is the same
  842%   as *.
  843
  844
  845                 /*******************************
  846                 *     BASIC TRIPLE QUERIES     *
  847                 *******************************/
  848
  849%!  rdf(?Subject, ?Predicate, ?Object) is nondet.
  850%
  851%   Elementary query for triples. Subject   and  Predicate are atoms
  852%   representing the fully qualified URL of  the resource. Object is
  853%   either an atom representing a resource  or literal(Value) if the
  854%   object  is  a  literal  value.   If    a   value   of  the  form
  855%   NameSpaceID:LocalName is provided it  is   expanded  to a ground
  856%   atom  using  expand_goal/2.  This  implies   you  can  use  this
  857%   construct in compiled code without paying a performance penalty.
  858%   Literal values take one of the following forms:
  859%
  860%     * Atom
  861%     If the value is a simple atom it is the textual representation
  862%     of a string literal without explicit type or language
  863%     qualifier.
  864%
  865%     * lang(LangID, Atom)
  866%     Atom represents the text of a string literal qualified with
  867%     the given language.
  868%
  869%     * type(TypeID, Value)
  870%     Used for attributes qualified using the =|rdf:datatype|=
  871%     TypeID. The Value is either the textual representation or a
  872%     natural Prolog representation. See the option
  873%     convert_typed_literal(:Convertor) of the parser. The storage
  874%     layer provides efficient handling of atoms, integers (64-bit)
  875%     and floats (native C-doubles). All other data is represented
  876%     as a Prolog record.
  877%
  878%   For literal querying purposes, Object can be of the form
  879%   literal(+Query, -Value), where Query is one of the terms below.
  880%   If the Query takes a literal argument and the value has a
  881%   numeric type numerical comparison is performed.
  882%
  883%     * plain(+Text)
  884%     Perform exact match and demand the language or type qualifiers
  885%     to match. This query is fully indexed.
  886%
  887%     * icase(+Text)
  888%     Perform a full but case-insensitive match. This query is
  889%     fully indexed.
  890%
  891%     * exact(+Text)
  892%     Same as icase(Text).  Backward compatibility.
  893%
  894%     * substring(+Text)
  895%     Match any literal that contains Text as a case-insensitive
  896%     substring. The query is not indexed on Object.
  897%
  898%     * word(+Text)
  899%     Match any literal that contains Text delimited by a non
  900%     alpha-numeric character, the start or end of the string. The
  901%     query is not indexed on Object.
  902%
  903%     * prefix(+Text)
  904%     Match any literal that starts with Text. This call is intended
  905%     for completion. The query is indexed using the skip list of
  906%     literals.
  907%
  908%     * ge(+Literal)
  909%     Match any literal that is equal or larger then Literal in the
  910%     ordered set of literals.
  911%
  912%     * gt(+Literal)
  913%     Match any literal that is larger then Literal in the ordered set
  914%     of literals.
  915%
  916%     * eq(+Literal)
  917%     Match any literal that is equal to Literal in the ordered set
  918%     of literals.
  919%
  920%     * le(+Literal)
  921%     Match any literal that is equal or smaller then Literal in the
  922%     ordered set of literals.
  923%
  924%     * lt(+Literal)
  925%     Match any literal that is smaller then Literal in the ordered set
  926%     of literals.
  927%
  928%     * between(+Literal1, +Literal2)
  929%     Match any literal that is between Literal1 and Literal2 in the
  930%     ordered set of literals. This may include both Literal1 and
  931%     Literal2.
  932%
  933%     * like(+Pattern)
  934%     Match any literal that matches Pattern case insensitively,
  935%     where the `*' character in Pattern matches zero or more
  936%     characters.
  937%
  938%   Backtracking never returns duplicate triples.  Duplicates can be
  939%   retrieved using rdf/4. The predicate   rdf/3 raises a type-error
  940%   if called with improper arguments.  If   rdf/3  is called with a
  941%   term  literal(_)  as  Subject  or   Predicate  object  it  fails
  942%   silently.  This  allows   for   graph    matching   goals   like
  943%   rdf(S,P,O),rdf(O,P2,O2) to proceed without errors.
  944
  945%!  rdf(?Subject, ?Predicate, ?Object, ?Source) is nondet.
  946%
  947%   As rdf/3 but in addition query  the   graph  to which the triple
  948%   belongs. Unlike rdf/3, this predicate does not remove duplicates
  949%   from the result set.
  950%
  951%   @param Source is a term Graph:Line.  If Source is instatiated,
  952%   passing an atom is the same as passing Atom:_.
  953
  954
  955%!  rdf_has(?Subject, +Predicate, ?Object) is nondet.
  956%
  957%   Succeeds if the triple rdf(Subject,   Predicate, Object) is true
  958%   exploiting the rdfs:subPropertyOf predicate as   well as inverse
  959%   predicates   declared   using   rdf_set_predicate/2   with   the
  960%   =inverse_of= property.
  961
  962%!  rdf_has(?Subject, +Predicate, ?Object, -RealPredicate) is nondet.
  963%
  964%   Same as rdf_has/3, but RealPredicate is   unified  to the actual
  965%   predicate that makes this relation   true. RealPredicate must be
  966%   Predicate or an rdfs:subPropertyOf  Predicate.   If  an  inverse
  967%   match is found, RealPredicate is the term inverse_of(Pred).
  968
  969%!  rdf_reachable(?Subject, +Predicate, ?Object) is nondet.
  970%
  971%   Is true if Object can  be   reached  from  Subject following the
  972%   transitive predicate Predicate or a  sub-property thereof, while
  973%   repecting the symetric(true) or inverse_of(P2) properties.
  974%
  975%   If used with either Subject or  Object unbound, it first returns
  976%   the origin, followed by  the   reachable  nodes  in breath-first
  977%   search-order. The implementation internally   looks one solution
  978%   ahead and succeeds deterministically on  the last solution. This
  979%   predicate never generates the same  node   twice  and  is robust
  980%   against cycles in the transitive relation.
  981%
  982%   With all arguments instantiated,   it succeeds deterministically
  983%   if a path can be found from  Subject to Object. Searching starts
  984%   at Subject, assuming the branching factor   is normally lower. A
  985%   call  with  both  Subject   and    Object   unbound   raises  an
  986%   instantiation  error.  The  following    example  generates  all
  987%   subclasses of rdfs:Resource:
  988%
  989%     ==
  990%     ?- rdf_reachable(X, rdfs:subClassOf, rdfs:'Resource').
  991%     X = 'http://www.w3.org/2000/01/rdf-schema#Resource' ;
  992%     X = 'http://www.w3.org/2000/01/rdf-schema#Class' ;
  993%     X = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Property' ;
  994%     ...
  995%     ==
  996
  997
  998%!  rdf_reachable(?Subject, +Predicate, ?Object, +MaxD, -D) is nondet.
  999%
 1000%   Same as rdf_reachable/3, but in addition, MaxD limits the number
 1001%   of edges expanded and D is   unified with the `distance' between
 1002%   Subject and Object. Distance 0 means  Subject and Object are the
 1003%   same resource. MaxD can be the  constant =infinite= to impose no
 1004%   distance-limit.
 1005
 1006%!  rdf_subject(?Resource) is nondet.
 1007%
 1008%   True if Resource appears as a   subject. This query respects the
 1009%   visibility rules implied by the logical update view.
 1010%
 1011%   @see rdf_resource/1.
 1012
 1013rdf_subject(Resource) :-
 1014    rdf_resource(Resource),
 1015    ( rdf(Resource, _, _) -> true ).
 1016
 1017%!  rdf_resource(?Resource) is nondet.
 1018%
 1019%   True when Resource is a resource used as a subject or object in
 1020%   a triple.
 1021%
 1022%   This predicate is primarily intended  as   a  way to process all
 1023%   resources without processing resources twice.   The user must be
 1024%   aware that some of the returned resources  may not appear in any
 1025%   _visible_ triple.
 1026
 1027
 1028                 /*******************************
 1029                 *     TRIPLE MODIFICATIONS     *
 1030                 *******************************/
 1031
 1032%!  rdf_assert(+Subject, +Predicate, +Object) is det.
 1033%
 1034%   Assert a new triple into  the   database.  This is equivalent to
 1035%   rdf_assert/4 using Graph  =user=.  Subject   and  Predicate  are
 1036%   resources. Object is either a resource or a term literal(Value).
 1037%   See rdf/3 for an explanation  of   Value  for typed and language
 1038%   qualified literals. All arguments  are   subject  to  name-space
 1039%   expansion. Complete duplicates (including  the   same  graph and
 1040%   `line' and with a compatible `lifespan')   are  not added to the
 1041%   database.
 1042
 1043%!  rdf_assert(+Subject, +Predicate, +Object, +Graph) is det.
 1044%
 1045%   As rdf_assert/3, adding the  predicate   to  the indicated named
 1046%   graph.
 1047%
 1048%   @param Graph is either the name of a   graph (an atom) or a term
 1049%   Graph:Line, where Line is an integer that denotes a line number.
 1050
 1051%!  rdf_retractall(?Subject, ?Predicate, ?Object) is det.
 1052%
 1053%   Remove   all   matching   triples   from    the   database.   As
 1054%   rdf_retractall/4 using an unbound graph.
 1055
 1056%!  rdf_retractall(?Subject, ?Predicate, ?Object, ?Graph) is det.
 1057%
 1058%   As rdf_retractall/3, also matching Graph.   This  is particulary
 1059%   useful to remove all triples coming from a loaded file. See also
 1060%   rdf_unload/1.
 1061
 1062%!  rdf_update(+Subject, +Predicate, +Object, +Action) is det.
 1063%
 1064%   Replaces one of  the  three  fields   on  the  matching  triples
 1065%   depending on Action:
 1066%
 1067%     * subject(Resource)
 1068%     Changes the first field of the triple.
 1069%     * predicate(Resource)
 1070%     Changes the second field of the triple.
 1071%     * object(Object)
 1072%     Changes the last field of the triple to the given resource or
 1073%     literal(Value).
 1074%     * graph(Graph)
 1075%     Moves the triple from its current named graph to Graph.
 1076
 1077%!  rdf_update(+Subject, +Predicate, +Object, +Graph, +Action) is det
 1078%
 1079%   As rdf_update/4 but allows for specifying the graph.
 1080
 1081
 1082                 /*******************************
 1083                 *          COLLECTIONS         *
 1084                 *******************************/
 1085
 1086%!  rdf_member_property(?Prop, ?Index)
 1087%
 1088%   Deal with the rdf:_1, ... properties.
 1089
 1090term_expansion(member_prefix(x),
 1091               member_prefix(Prefix)) :-
 1092    rdf_db:ns(rdf, NS),
 1093    atom_concat(NS, '_', Prefix).
 1094member_prefix(x).
 1095
 1096rdf_member_property(P, N) :-
 1097    integer(N),
 1098    !,
 1099    member_prefix(Prefix),
 1100    atom_concat(Prefix, N, P).
 1101rdf_member_property(P, N) :-
 1102    member_prefix(Prefix),
 1103    atom_concat(Prefix, Sub, P),
 1104    atom_number(Sub, N).
 1105
 1106
 1107                 /*******************************
 1108                 *      ANONYMOUS SUBJECTS      *
 1109                 *******************************/
 1110
 1111%!  rdf_node(-Id)
 1112%
 1113%   Generate a unique blank node identifier for a subject.
 1114%
 1115%   @deprecated     New code should use rdf_bnode/1.
 1116
 1117rdf_node(Resource) :-
 1118    rdf_bnode(Resource).
 1119
 1120%!  rdf_bnode(-Id)
 1121%
 1122%   Generate a unique anonymous identifier for a subject.
 1123
 1124rdf_bnode(Value) :-
 1125    repeat,
 1126    gensym('_:genid', Value),
 1127    \+ rdf(Value, _, _),
 1128    \+ rdf(_, _, Value),
 1129    \+ rdf(_, Value, _),
 1130    !.
 1131
 1132
 1133
 1134                 /*******************************
 1135                 *             TYPES            *
 1136                 *******************************/
 1137
 1138%!  rdf_is_bnode(+Id)
 1139%
 1140%   Tests if a resource is  a  blank   node  (i.e.  is  an anonymous
 1141%   resource). A blank node is represented   as  an atom that starts
 1142%   with =|_:|=. For backward compatibility   reason, =|__|= is also
 1143%   considered to be a blank node.
 1144%
 1145%   @see rdf_bnode/1.
 1146
 1147%!  rdf_is_resource(@Term) is semidet.
 1148%
 1149%   True if Term is an RDF  resource.   Note  that  this is merely a
 1150%   type-test; it does not mean  this   resource  is involved in any
 1151%   triple.  Blank nodes are also considered resources.
 1152%
 1153%   @see rdf_is_bnode/1
 1154
 1155rdf_is_resource(Term) :-
 1156    atom(Term).
 1157
 1158%!  rdf_is_literal(@Term) is semidet.
 1159%
 1160%   True if Term is an RDF literal object. Currently only checks for
 1161%   groundness and the literal functor.
 1162
 1163rdf_is_literal(literal(Value)) :-
 1164    ground(Value).
 1165
 1166                 /*******************************
 1167                 *             LITERALS         *
 1168                 *******************************/
 1169
 1170%!  rdf_current_literal(-Literal) is nondet.
 1171%
 1172%   True when Literal is a currently  known literal. Enumerates each
 1173%   unique literal exactly once. Note that   it is possible that the
 1174%   literal only appears in already deleted triples. Deleted triples
 1175%   may be locked due to active   queries, transactions or snapshots
 1176%   or may not yet be reclaimed by the garbage collector.
 1177
 1178
 1179%!  rdf_literal_value(+Literal, -Value) is semidet.
 1180%
 1181%   True when value is  the   appropriate  Prolog  representation of
 1182%   Literal in the RDF _|value space|_.  Current mapping:
 1183%
 1184%     | Plain literals              | Atom                    |
 1185%     | Language tagged literal     | Atom holding plain text |
 1186%     | xsd:string                  | Atom                    |
 1187%     | rdf:XMLLiteral              | XML DOM Tree            |
 1188%     | Numeric XSD type            | Number                  |
 1189%
 1190%   @tbd    Well, this is the long-term idea.
 1191%   @tbd    Add mode (-,+)
 1192
 1193:- rdf_meta
 1194    rdf_literal_value(o, -),
 1195    typed_value(r, +, -),
 1196    numeric_value(r, +, -). 1197
 1198rdf_literal_value(literal(String), Value) :-
 1199    atom(String),
 1200    !,
 1201    Value = String.
 1202rdf_literal_value(literal(lang(_Lang, String)), String).
 1203rdf_literal_value(literal(type(Type, String)), Value) :-
 1204    typed_value(Type, String, Value).
 1205
 1206typed_value(Numeric, String, Value) :-
 1207    xsdp_numeric_uri(Numeric, NumType),
 1208    !,
 1209    numeric_value(NumType, String, Value).
 1210typed_value(xsd:string, String, String).
 1211typed_value(rdf:'XMLLiteral', Value, DOM) :-
 1212    (   atom(Value)
 1213    ->  setup_call_cleanup(
 1214            ( atom_to_memory_file(Value, MF),
 1215              open_memory_file(MF, read, In, [free_on_close(true)])
 1216            ),
 1217            load_structure(stream(In), DOM, [dialect(xml)]),
 1218            close(In))
 1219    ;   DOM = Value
 1220    ).
 1221
 1222numeric_value(xsd:integer, String, Value) :-
 1223    atom_number(String, Value),
 1224    integer(Value).
 1225numeric_value(xsd:float, String, Value) :-
 1226    atom_number(String, Number),
 1227    Value is float(Number).
 1228numeric_value(xsd:double, String, Value) :-
 1229    atom_number(String, Number),
 1230    Value is float(Number).
 1231numeric_value(xsd:decimal, String, Value) :-
 1232    atom_number(String, Value).
 1233
 1234
 1235                 /*******************************
 1236                 *            SOURCE            *
 1237                 *******************************/
 1238
 1239%!  rdf_source_location(+Subject, -Location) is nondet.
 1240%
 1241%   True when triples for Subject are loaded from Location.
 1242%
 1243%   @param Location is a term File:Line.
 1244
 1245rdf_source_location(Subject, Source) :-
 1246    findall(Source, rdf(Subject, _, _, Source), Sources),
 1247    sort(Sources, Unique),
 1248    member(Source, Unique).
 1249
 1250
 1251                 /*******************************
 1252                 *       GARBAGE COLLECT        *
 1253                 *******************************/
 1254
 1255%!  rdf_create_gc_thread
 1256%
 1257%   Create the garbage collection thread.
 1258
 1259:- public
 1260    rdf_create_gc_thread/0. 1261
 1262rdf_create_gc_thread :-
 1263    thread_create(rdf_gc_loop, _,
 1264                  [ alias('__rdf_GC')
 1265                  ]).
 1266
 1267%!  rdf_gc_loop
 1268%
 1269%   Take care of running the RDF garbage collection.  This predicate
 1270%   is called from a thread started by creating the RDF DB.
 1271
 1272rdf_gc_loop :-
 1273    catch(rdf_gc_loop(0), E, recover_gc(E)).
 1274
 1275recover_gc('$aborted') :-
 1276    !,
 1277    thread_self(Me),
 1278    thread_detach(Me).
 1279recover_gc(Error) :-
 1280    print_message(error, Error),
 1281    rdf_gc_loop.
 1282
 1283rdf_gc_loop(CPU) :-
 1284    repeat,
 1285    (   consider_gc(CPU)
 1286    ->  rdf_gc(CPU1),
 1287        sleep(CPU1)
 1288    ;   sleep(0.1)
 1289    ),
 1290    fail.
 1291
 1292%!  rdf_gc(-CPU) is det.
 1293%
 1294%   Run RDF GC one time. CPU is  the   amount  of CPU time spent. We
 1295%   update this in Prolog because portable access to thread specific
 1296%   CPU is really hard in C.
 1297
 1298rdf_gc(CPU) :-
 1299    statistics(cputime, CPU0),
 1300    (   rdf_gc_
 1301    ->  statistics(cputime, CPU1),
 1302        CPU is CPU1-CPU0,
 1303        rdf_add_gc_time(CPU)
 1304    ;   CPU = 0.0
 1305    ).
 1306
 1307%!  rdf_gc is det.
 1308%
 1309%   Run the RDF-DB garbage collector until   no  garbage is left and
 1310%   all  tables  are  fully  optimized.  Under  normal  operation  a
 1311%   seperate thread with  identifier   =__rdf_GC=  performs  garbage
 1312%   collection as long as it is considered `useful'.
 1313%
 1314%   Using rdf_gc/0 should only be  needed   to  ensure a fully clean
 1315%   database for analysis purposes such as leak detection.
 1316
 1317rdf_gc :-
 1318    has_garbage,
 1319    !,
 1320    rdf_gc(_),
 1321    rdf_gc.
 1322rdf_gc.
 1323
 1324%!  has_garbage is semidet.
 1325%
 1326%   True if there is something to gain using GC.
 1327
 1328has_garbage :-
 1329    rdf_gc_info_(Info),
 1330    has_garbage(Info),
 1331    !.
 1332
 1333has_garbage(Info) :- arg(2, Info, Garbage),     Garbage > 0.
 1334has_garbage(Info) :- arg(3, Info, Reindexed),   Reindexed > 0.
 1335has_garbage(Info) :- arg(4, Info, Optimizable), Optimizable > 0.
 1336
 1337%!  consider_gc(+CPU) is semidet.
 1338%
 1339%   @param CPU is the amount of CPU time spent in the most recent
 1340%   GC.
 1341
 1342consider_gc(_CPU) :-
 1343    (   rdf_gc_info_(gc_info(Triples,       % Total #triples in DB
 1344                             Garbage,       % Garbage triples in DB
 1345                             Reindexed,     % Reindexed & not reclaimed
 1346                             Optimizable,   % Non-optimized tables
 1347                             _KeepGen,      % Oldest active generation
 1348                             _LastGCGen,    % Oldest active gen at last GC
 1349                             _ReindexGen,
 1350                             _LastGCReindexGen))
 1351    ->  (   (Garbage+Reindexed) * 5 > Triples
 1352        ;   Optimizable > 4
 1353        )
 1354    ;   print_message(error, rdf(invalid_gc_info)),
 1355        sleep(10)
 1356    ),
 1357    !.
 1358
 1359
 1360                 /*******************************
 1361                 *           STATISTICS         *
 1362                 *******************************/
 1363
 1364%!  rdf_statistics(?KeyValue) is nondet.
 1365%
 1366%   Obtain statistics on the RDF database.  Defined statistics are:
 1367%
 1368%     * graphs(-Count)
 1369%     Number of named graphs
 1370%
 1371%     * triples(-Count)
 1372%     Total number of triples in the database.  This is the number
 1373%     of asserted triples minus the number of retracted ones.  The
 1374%     number of _visible_ triples in a particular context may be
 1375%     different due to visibility rules defined by the logical
 1376%     update view and transaction isolation.
 1377%
 1378%     * resources(-Count)
 1379%     Number of resources that appear as subject or object in a
 1380%     triple.  See rdf_resource/1.
 1381%
 1382%     * properties(-Count)
 1383%     Number of current predicates.  See rdf_current_predicate/1.
 1384%
 1385%     * literals(-Count)
 1386%     Number of current literals.  See rdf_current_literal/1.
 1387%
 1388%     * gc(GCCount, ReclaimedTriples, ReindexedTriples, Time)
 1389%     Information about the garbage collector.
 1390%
 1391%     * searched_nodes(-Count)
 1392%     Number of nodes expanded by rdf_reachable/3 and
 1393%     rdf_reachable/5.
 1394%
 1395%     * lookup(rdf(S,P,O,G), Count)
 1396%     Number of queries that have been performed for this particular
 1397%     instantiation pattern.  Each of S,P,O,G is either + or -.
 1398%     Fails in case the number of performed queries is zero.
 1399%
 1400%     * hash_quality(rdf(S,P,O,G), Buckets, Quality, PendingResize)
 1401%     Statistics on the index for this pattern.  Indices are created
 1402%     lazily on the first relevant query.
 1403%
 1404%     * triples_by_graph(Graph, Count)
 1405%     This statistics is produced for each named graph. See
 1406%     =triples= for the interpretation of this value.
 1407
 1408rdf_statistics(graphs(Count)) :-
 1409    rdf_statistics_(graphs(Count)).
 1410rdf_statistics(triples(Count)) :-
 1411    rdf_statistics_(triples(Count)).
 1412rdf_statistics(duplicates(Count)) :-
 1413    rdf_statistics_(duplicates(Count)).
 1414rdf_statistics(lingering(Count)) :-
 1415    rdf_statistics_(lingering(Count)).
 1416rdf_statistics(resources(Count)) :-
 1417    rdf_statistics_(resources(Count)).
 1418rdf_statistics(properties(Count)) :-
 1419    rdf_statistics_(predicates(Count)).
 1420rdf_statistics(literals(Count)) :-
 1421    rdf_statistics_(literals(Count)).
 1422rdf_statistics(gc(Count, Reclaimed, Reindexed, Time)) :-
 1423    rdf_statistics_(gc(Count, Reclaimed, Reindexed, Time)).
 1424rdf_statistics(searched_nodes(Count)) :-
 1425    rdf_statistics_(searched_nodes(Count)).
 1426rdf_statistics(lookup(Index, Count)) :-
 1427    functor(Indexed, indexed, 16),
 1428    rdf_statistics_(Indexed),
 1429    index(Index, I),
 1430    Arg is I + 1,
 1431    arg(Arg, Indexed, Count),
 1432    Count \== 0.
 1433rdf_statistics(hash_quality(Index, Size, Quality,Optimize)) :-
 1434    rdf_statistics_(hash_quality(List)),
 1435    member(hash(Place,Size,Quality,Optimize), List),
 1436    index(Index, Place).
 1437rdf_statistics(triples_by_graph(Graph, Count)) :-
 1438    rdf_graph_(Graph, Count).
 1439
 1440index(rdf(-,-,-,-), 0).
 1441index(rdf(+,-,-,-), 1).
 1442index(rdf(-,+,-,-), 2).
 1443index(rdf(+,+,-,-), 3).
 1444index(rdf(-,-,+,-), 4).
 1445index(rdf(+,-,+,-), 5).
 1446index(rdf(-,+,+,-), 6).
 1447index(rdf(+,+,+,-), 7).
 1448
 1449index(rdf(-,-,-,+), 8).
 1450index(rdf(+,-,-,+), 9).
 1451index(rdf(-,+,-,+), 10).
 1452index(rdf(+,+,-,+), 11).
 1453index(rdf(-,-,+,+), 12).
 1454index(rdf(+,-,+,+), 13).
 1455index(rdf(-,+,+,+), 14).
 1456index(rdf(+,+,+,+), 15).
 1457
 1458
 1459                 /*******************************
 1460                 *           PREDICATES         *
 1461                 *******************************/
 1462
 1463%!  rdf_current_predicate(?Predicate) is nondet.
 1464%
 1465%   True when Predicate is a   currently known predicate. Predicates
 1466%   are created if a triples is created  that uses this predicate or
 1467%   a property of the predicate   is  set using rdf_set_predicate/2.
 1468%   The predicate may (no longer) have triples associated with it.
 1469%
 1470%   Note that resources that have  =|rdf:type|= =|rdf:Property|= are
 1471%   not automatically included in the  result-set of this predicate,
 1472%   while _all_ resources that appear as   the  second argument of a
 1473%   triple _are_ included.
 1474%
 1475%   @see rdf_predicate_property/2.
 1476
 1477rdf_current_predicate(P, DB) :-
 1478    rdf_current_predicate(P),
 1479    (   rdf(_,P,_,DB)
 1480    ->  true
 1481    ).
 1482
 1483%!  rdf_predicate_property(?Predicate, ?Property)
 1484%
 1485%   Query properties of  a  defined   predicate.  Currently  defined
 1486%   properties are given below.
 1487%
 1488%     * symmetric(Bool)
 1489%     True if the predicate is defined to be symetric. I.e., {A} P
 1490%     {B} implies {B} P {A}. Setting symmetric is equivalent to
 1491%     inverse_of(Self).
 1492%
 1493%     * inverse_of(Inverse)
 1494%     True if this predicate is the inverse of Inverse. This
 1495%     property is used by rdf_has/3, rdf_has/4, rdf_reachable/3 and
 1496%     rdf_reachable/5.
 1497%
 1498%     * transitive(Bool)
 1499%     True if this predicate is transitive. This predicate is
 1500%     currently not used. It might be used to make rdf_has/3 imply
 1501%     rdf_reachable/3 for transitive predicates.
 1502%
 1503%     * triples(Triples)
 1504%     Unify Triples with the number of existing triples using this
 1505%     predicate as second argument. Reporting the number of triples
 1506%     is intended to support query optimization.
 1507%
 1508%     * rdf_subject_branch_factor(-Float)
 1509%     Unify Float with the average number of triples associated with
 1510%     each unique value for the subject-side of this relation. If
 1511%     there are no triples the value 0.0 is returned. This value is
 1512%     cached with the predicate and recomputed only after
 1513%     substantial changes to the triple set associated to this
 1514%     relation. This property is intended for path optimalisation
 1515%     when solving conjunctions of rdf/3 goals.
 1516%
 1517%     * rdf_object_branch_factor(-Float)
 1518%     Unify Float with the average number of triples associated with
 1519%     each unique value for the object-side of this relation. In
 1520%     addition to the comments with the subject_branch_factor
 1521%     property, uniqueness of the object value is computed from the
 1522%     hash key rather than the actual values.
 1523%
 1524%     * rdfs_subject_branch_factor(-Float)
 1525%     Same as =rdf_subject_branch_factor=, but also considering
 1526%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1527%
 1528%     * rdfs_object_branch_factor(-Float)
 1529%     Same as =rdf_object_branch_factor=, but also considering
 1530%     triples of `subPropertyOf' this relation. See also rdf_has/3.
 1531%
 1532%   @see rdf_set_predicate/2.
 1533
 1534rdf_predicate_property(P, Prop) :-
 1535    var(P),
 1536    !,
 1537    rdf_current_predicate(P),
 1538    rdf_predicate_property_(P, Prop).
 1539rdf_predicate_property(P, Prop) :-
 1540    rdf_predicate_property_(P, Prop).
 1541
 1542%!  rdf_set_predicate(+Predicate, +Property) is det.
 1543%
 1544%   Define a property of  the   predicate.  This predicate currently
 1545%   supports the following properties:
 1546%
 1547%       - symmetric(+Boolean)
 1548%       Set/unset the predicate as being symmetric.  Using
 1549%       symmetric(true) is the same as inverse_of(Predicate),
 1550%       i.e., creating a predicate that is the inverse of
 1551%       itself.
 1552%       - transitive(+Boolean)
 1553%       Sets the transitive property.
 1554%       - inverse_of(+Predicate2)
 1555%       Define Predicate as the inverse of Predicate2. An inverse
 1556%       relation is deleted using inverse_of([]).
 1557%
 1558%   The `transitive` property is currently not used. The `symmetric`
 1559%   and `inverse_of` properties are considered   by  rdf_has/3,4 and
 1560%   rdf_reachable/3.
 1561%
 1562%   @tbd    Maintain these properties based on OWL triples.
 1563
 1564
 1565                 /*******************************
 1566                 *            SNAPSHOTS         *
 1567                 *******************************/
 1568
 1569%!  rdf_snapshot(-Snapshot) is det.
 1570%
 1571%   Take a snapshot of the current state   of  the RDF store. Later,
 1572%   goals may be executed in the  context   of  the database at this
 1573%   moment using rdf_transaction/3 with  the   =snapshot=  option. A
 1574%   snapshot created outside  a  transaction   exists  until  it  is
 1575%   deleted. Snapshots taken inside a transaction   can only be used
 1576%   inside this transaction.
 1577
 1578%!  rdf_delete_snapshot(+Snapshot) is det.
 1579%
 1580%   Delete a snapshot as obtained   from  rdf_snapshot/1. After this
 1581%   call, resources used for maintaining the snapshot become subject
 1582%   to garbage collection.
 1583
 1584%!  rdf_current_snapshot(?Term) is nondet.
 1585%
 1586%   True when Term is a currently known snapshot.
 1587%
 1588%   @bug    Enumeration of snapshots is slow.
 1589
 1590rdf_current_snapshot(Term) :-
 1591    current_blob(Term, rdf_snapshot).
 1592
 1593
 1594                 /*******************************
 1595                 *          TRANSACTION         *
 1596                 *******************************/
 1597
 1598%!  rdf_transaction(:Goal) is semidet.
 1599%
 1600%   Same as rdf_transaction(Goal, user, []).  See rdf_transaction/3.
 1601
 1602%!  rdf_transaction(:Goal, +Id) is semidet.
 1603%
 1604%   Same as rdf_transaction(Goal, Id, []).  See rdf_transaction/3.
 1605
 1606%!  rdf_transaction(:Goal, +Id, +Options) is semidet.
 1607%
 1608%   Run Goal in an RDF  transaction.   Compared to the ACID model,
 1609%   RDF transactions have the following properties:
 1610%
 1611%     1. Modifications inside the transactions become all atomically
 1612%        visible to the outside world if Goal succeeds or remain
 1613%        invisible if Goal fails or throws an exception.  I.e.,
 1614%        the _atomicy_ property is fully supported.
 1615%     2. _Consistency_ is not guaranteed. Later versions may
 1616%        implement consistency constraints that will be checked
 1617%        serialized just before the actual commit of a transaction.
 1618%     3. Concurrently executing transactions do not infuence each
 1619%        other.  I.e., the _isolation_ property is fully supported.
 1620%     4. _Durability_ can be activated by loading
 1621%        library(semweb/rdf_persistency).
 1622%
 1623%   Processed options are:
 1624%
 1625%     * snapshot(+Snapshot)
 1626%     Execute Goal using the state of the RDF store as stored in
 1627%     Snapshot.  See rdf_snapshot/1.  Snapshot can also be the
 1628%     atom =true=, which implies that an anonymous snapshot is
 1629%     created at the current state of the store.  Modifications
 1630%     due to executing Goal are only visible to Goal.
 1631
 1632rdf_transaction(Goal) :-
 1633    rdf_transaction(Goal, user, []).
 1634rdf_transaction(Goal, Id) :-
 1635    rdf_transaction(Goal, Id, []).
 1636
 1637%!  rdf_active_transaction(?Id) is nondet.
 1638%
 1639%   True if Id is the identifier of  a transaction in the context of
 1640%   which  this  call  is  executed.  If  Id  is  not  instantiated,
 1641%   backtracking yields transaction identifiers   starting  with the
 1642%   innermost nested transaction. Transaction   identifier terms are
 1643%   not copied, need not be ground   and  can be instantiated during
 1644%   the transaction.
 1645
 1646rdf_active_transaction(Id) :-
 1647    rdf_active_transactions_(List),
 1648    member(Id, List).
 1649
 1650%!  rdf_monitor(:Goal, +Options)
 1651%
 1652%   Call Goal if specified actions occur on the database.
 1653
 1654rdf_monitor(Goal, Options) :-
 1655    monitor_mask(Options, 0xffff, Mask),
 1656    rdf_monitor_(Goal, Mask).
 1657
 1658monitor_mask([], Mask, Mask).
 1659monitor_mask([H|T], Mask0, Mask) :-
 1660    update_mask(H, Mask0, Mask1),
 1661    monitor_mask(T, Mask1, Mask).
 1662
 1663update_mask(-X, Mask0, Mask) :-
 1664    !,
 1665    monitor_mask(X, M),
 1666    Mask is Mask0 /\ \M.
 1667update_mask(+X, Mask0, Mask) :-
 1668    !,
 1669    monitor_mask(X, M),
 1670    Mask is Mask0 \/ M.
 1671update_mask(X, Mask0, Mask) :-
 1672    monitor_mask(X, M),
 1673    Mask is Mask0 \/ M.
 1674
 1675%!  monitor_mask(Name, Mask)
 1676%
 1677%   Mask bit for the monitor events.  Note that this must be kept
 1678%   consistent with the enum broadcast_id defined in rdf_db.c
 1679
 1680                                        % C-defined broadcasts
 1681monitor_mask(assert,       0x0001).
 1682monitor_mask(assert(load), 0x0002).
 1683monitor_mask(retract,      0x0004).
 1684monitor_mask(update,       0x0008).
 1685monitor_mask(new_literal,  0x0010).
 1686monitor_mask(old_literal,  0x0020).
 1687monitor_mask(transaction,  0x0040).
 1688monitor_mask(load,         0x0080).
 1689monitor_mask(create_graph, 0x0100).
 1690monitor_mask(reset,        0x0200).
 1691                                        % prolog defined broadcasts
 1692monitor_mask(parse,        0x1000).
 1693monitor_mask(unload,       0x1000).     % FIXME: Duplicate
 1694                                        % mask for all
 1695monitor_mask(all,          0xffff).
 1696
 1697%rdf_broadcast(Term, MaskName) :-
 1698%%      monitor_mask(MaskName, Mask),
 1699%%      rdf_broadcast_(Term, Mask).
 1700
 1701
 1702                 /*******************************
 1703                 *            WARM              *
 1704                 *******************************/
 1705
 1706%!  rdf_warm_indexes
 1707%
 1708%   Warm all indexes.  See rdf_warm_indexes/1.
 1709
 1710rdf_warm_indexes :-
 1711    findall(Index, rdf_index(Index), Indexes),
 1712    rdf_warm_indexes(Indexes).
 1713
 1714rdf_index(s).
 1715rdf_index(p).
 1716rdf_index(o).
 1717rdf_index(sp).
 1718rdf_index(o).
 1719rdf_index(po).
 1720rdf_index(spo).
 1721rdf_index(g).
 1722rdf_index(sg).
 1723rdf_index(pg).
 1724
 1725%!  rdf_warm_indexes(+Indexes) is det.
 1726%
 1727%   Create the named indexes.  Normally,   the  RDF database creates
 1728%   indexes on lazily the first time they are needed. This predicate
 1729%   serves two purposes: it provides an   explicit  way to make sure
 1730%   that the required indexes  are   present  and  creating multiple
 1731%   indexes at the same time is more efficient.
 1732
 1733
 1734                 /*******************************
 1735                 *          DUPLICATES          *
 1736                 *******************************/
 1737
 1738%!  rdf_update_duplicates is det.
 1739%
 1740%   Update the duplicate administration of the RDF store. This marks
 1741%   every triple that is potentionally  a   duplicate  of another as
 1742%   duplicate. Being potentially a  duplicate   means  that subject,
 1743%   predicate and object are equivalent and   the  life-times of the
 1744%   two triples overlap.
 1745%
 1746%   The duplicates marks are used to  reduce the administrative load
 1747%   of avoiding duplicate answers.  Normally,   the  duplicates  are
 1748%   marked using a background thread that   is  started on the first
 1749%   query that produces a substantial amount of duplicates.
 1750
 1751:- public
 1752    rdf_update_duplicates_thread/0. 1753
 1754%!  rdf_update_duplicates_thread
 1755%
 1756%   Start a thread to initialize the duplicate administration.
 1757
 1758rdf_update_duplicates_thread :-
 1759    thread_create(rdf_update_duplicates, _,
 1760                  [ detached(true),
 1761                    alias('__rdf_duplicate_detecter')
 1762                  ]).
 1763
 1764%!  rdf_update_duplicates is det.
 1765%
 1766%   Update the duplicate administration. If   this  adminstration is
 1767%   up-to-date, each triples that _may_ have a duplicate is flagged.
 1768%   The predicate rdf/3 uses this administration to speedup checking
 1769%   for duplicate answers.
 1770%
 1771%   This predicate is normally  executed   from  a background thread
 1772%   named =__rdf_duplicate_detecter= which is created   when a query
 1773%   discovers that checking for duplicates becomes too expensive.
 1774
 1775
 1776                 /*******************************
 1777                 *    QUICK BINARY LOAD/SAVE    *
 1778                 *******************************/
 1779
 1780%!  rdf_save_db(+File) is det.
 1781%!  rdf_save_db(+File, +Graph) is det.
 1782%
 1783%   Save triples into File in a   quick-to-load binary format. If Graph
 1784%   is supplied only triples flagged to originate from that database
 1785%   are  added.  Files  created  this  way    can  be  loaded  using
 1786%   rdf_load_db/1.
 1787
 1788:- create_prolog_flag(rdf_triple_format, 3, [type(integer)]). 1789
 1790rdf_save_db(File) :-
 1791    current_prolog_flag(rdf_triple_format, Version),
 1792    setup_call_cleanup(
 1793        open(File, write, Out, [type(binary)]),
 1794        ( set_stream(Out, record_position(false)),
 1795          rdf_save_db_(Out, _, Version)
 1796        ),
 1797        close(Out)).
 1798
 1799
 1800rdf_save_db(File, Graph) :-
 1801    current_prolog_flag(rdf_triple_format, Version),
 1802    setup_call_cleanup(
 1803        open(File, write, Out, [type(binary)]),
 1804        ( set_stream(Out, record_position(false)),
 1805          rdf_save_db_(Out, Graph, Version)
 1806        ),
 1807        close(Out)).
 1808
 1809
 1810%!  rdf_load_db_no_admin(+File, +Id, -Graphs) is det.
 1811%
 1812%   Load triples from a  .trp  file   without  updating  the  source
 1813%   administration. Id is  handled  to   monitor  action.  Graphs is
 1814%   a list of graph-names encountered in File.
 1815
 1816rdf_load_db_no_admin(File, Id, Graphs) :-
 1817    open(File, read, In, [type(binary)]),
 1818    set_stream(In, record_position(false)),
 1819    call_cleanup(rdf_load_db_(In, Id, Graphs), close(In)).
 1820
 1821
 1822%!  check_loaded_cache(+Graph, +Graphs, +Modified) is det.
 1823%
 1824%   Verify the loaded cache file and optionally fix the modification
 1825%   time (new versions save this along with the snapshot).
 1826%
 1827%   @tbd    What to do if there is a cache mismatch? Delete the loaded
 1828%           graphs and fail?
 1829
 1830check_loaded_cache(DB, [DB], _Modified) :- !.
 1831check_loaded_cache(DB, Graphs, _) :-
 1832    print_message(warning, rdf(inconsistent_cache(DB, Graphs))).
 1833
 1834
 1835%!  rdf_load_db(+File) is det.
 1836%
 1837%   Load triples from a file created using rdf_save_db/2.
 1838
 1839rdf_load_db(File) :-
 1840    uri_file_name(URL, File),
 1841    rdf_load_db_no_admin(File, URL, _Graphs).
 1842
 1843
 1844                 /*******************************
 1845                 *          LOADING RDF         *
 1846                 *******************************/
 1847
 1848:- multifile
 1849    rdf_open_hook/8,
 1850    rdf_open_decode/4,              % +Encoding, +File, -Stream, -Cleanup
 1851    rdf_load_stream/3,              % +Format, +Stream, +Options
 1852    rdf_file_type/2,                % ?Extension, ?Format
 1853    rdf_storage_encoding/2,         % ?Extension, ?Encoding
 1854    url_protocol/1.                 % ?Protocol
 1855
 1856%!  rdf_load(+FileOrList) is det.
 1857%
 1858%   Same as rdf_load(FileOrList, []).  See rdf_load/2.
 1859
 1860%!  rdf_load(+FileOrList, :Options) is det.
 1861%
 1862%   Load RDF data. Options provides   additional processing options.
 1863%   Defined options are:
 1864%
 1865%       * blank_nodes(+ShareMode)
 1866%       How to handle equivalent blank nodes.  If =share= (default),
 1867%       equivalent blank nodes are shared in the same resource.
 1868%
 1869%       * base_uri(+URI)
 1870%       URI that is used for rdf:about="" and other RDF constructs
 1871%       that are relative to the base uri.  Default is the source
 1872%       URL.
 1873%
 1874%       * concurrent(+Jobs)
 1875%       If FileOrList is a list of files, process the input files
 1876%       using Jobs threads concurrently.  Default is the mininum
 1877%       of the number of cores and the number of inputs.  Higher
 1878%       values can be useful when loading inputs from (slow)
 1879%       network connections.  Using 1 (one) does not use
 1880%       separate worker threads.
 1881%
 1882%       * format(+Format)
 1883%       Specify the source format explicitly. Normally this is
 1884%       deduced from the filename extension or the mime-type. The
 1885%       core library understands the formats xml (RDF/XML) and
 1886%       triples (internal quick load and cache format).  Plugins,
 1887%       such as library(semweb/turtle) extend the set of recognised
 1888%       extensions.
 1889%
 1890%       * graph(?Graph)
 1891%       Named graph in which to load the data.  It is *not* allowed
 1892%       to load two sources into the same named graph.  If Graph is
 1893%       unbound, it is unified to the graph into which the data is
 1894%       loaded.  The default graph is a =file://= URL when loading
 1895%       a file or, if the specification is a URL, its normalized
 1896%       version without the optional _|#fragment|_.
 1897%
 1898%       * if(Condition)
 1899%       When to load the file. One of =true=, =changed= (default) or
 1900%       =not_loaded=.
 1901%
 1902%       * modified(-Modified)
 1903%       Unify Modified with one of =not_modified=, cached(File),
 1904%       last_modified(Stamp) or =unknown=.
 1905%
 1906%       * cache(Bool)
 1907%       If =false=, do not use or create a cache file.
 1908%
 1909%       * register_namespaces(Bool)
 1910%       If =true= (default =false=), register =xmlns= namespace
 1911%       declarations or Turtle =|@prefix|= prefixes using
 1912%       rdf_register_prefix/3 if there is no conflict.
 1913%
 1914%       * silent(+Bool)
 1915%       If =true=, the message reporting completion is printed using
 1916%       level =silent=. Otherwise the level is =informational=. See
 1917%       also print_message/2.
 1918%
 1919%   Other  options  are  forwarded  to  process_rdf/3.  By  default,
 1920%   rdf_load/2 only loads RDF/XML from files.  It can be extended to
 1921%   load data from other formats and   locations  using plugins. The
 1922%   full set of plugins relevant to   support  different formats and
 1923%   locations is below:
 1924%
 1925%     ==
 1926%     :- use_module(library(semweb/turtle)).        % Turtle and TRiG
 1927%     :- use_module(library(semweb/rdf_ntriples)).
 1928%     :- use_module(library(semweb/rdf_zlib_plugin)).
 1929%     :- use_module(library(semweb/rdf_http_plugin)).
 1930%     :- use_module(library(http/http_ssl_plugin)).
 1931%     ==
 1932%
 1933%   @see    rdf_db:rdf_open_hook/3, library(semweb/rdf_persistency) and
 1934%           library(semweb/rdf_cache)
 1935
 1936:- dynamic
 1937    rdf_loading/3.                          % Graph, Queue, Thread
 1938
 1939rdf_load(Spec) :-
 1940    rdf_load(Spec, []).
 1941
 1942:- if(\+current_predicate(concurrent/3)). 1943concurrent(_, Goals, _) :-
 1944    forall(member(G, Goals), call(G)).
 1945:- endif. 1946
 1947% Note that we kill atom garbage collection.  This improves performance
 1948% with about 15% loading the LUBM Univ_50 benchmark.
 1949
 1950rdf_load(Spec, M:Options) :-
 1951    must_be(list, Options),
 1952    current_prolog_flag(agc_margin, Old),
 1953    setup_call_cleanup(
 1954        set_prolog_flag(agc_margin, 0),
 1955        rdf_load_noagc(Spec, M, Options),
 1956        set_prolog_flag(agc_margin, Old)).
 1957
 1958rdf_load_noagc(List, M, Options) :-
 1959    is_list(List),
 1960    !,
 1961    flatten(List, Inputs),          % Compatibility: allow nested lists
 1962    maplist(must_be(ground), Inputs),
 1963    length(Inputs, Count),
 1964    load_jobs(Count, Jobs, Options),
 1965    (   Jobs =:= 1
 1966    ->  forall(member(Spec, Inputs),
 1967               rdf_load_one(Spec, M, Options))
 1968    ;   maplist(load_goal(Options, M), Inputs, Goals),
 1969        concurrent(Jobs, Goals, [])
 1970    ).
 1971rdf_load_noagc(One, M, Options) :-
 1972    must_be(ground, One),
 1973    rdf_load_one(One, M, Options).
 1974
 1975load_goal(Options, M, Spec, rdf_load_one(Spec, M, Options)).
 1976
 1977load_jobs(_, Jobs, Options) :-
 1978    option(concurrent(Jobs), Options),
 1979    !,
 1980    must_be(positive_integer, Jobs).
 1981load_jobs(Count, Jobs, _) :-
 1982    current_prolog_flag(cpu_count, CPUs),
 1983    CPUs > 0,
 1984    !,
 1985    Jobs is max(1, min(CPUs, Count)).
 1986load_jobs(_, 1, _).
 1987
 1988
 1989rdf_load_one(Spec, M, Options) :-
 1990    source_url(Spec, Protocol, SourceURL),
 1991    load_graph(SourceURL, Graph, Options),
 1992    setup_call_cleanup(
 1993        with_mutex(rdf_load_file,
 1994                   rdf_start_load(SourceURL, Loading)),
 1995        rdf_load_file(Loading, Spec, SourceURL, Protocol,
 1996                      Graph, M, Options),
 1997        rdf_end_load(Loading)).
 1998
 1999%!  rdf_start_load(+SourceURL, -WhatToDo) is det.
 2000%!  rdf_end_load(+WhatToDo) is det.
 2001%!  rdf_load_file(+WhatToDo, +Spec, +SourceURL, +Protocol, +Graph,
 2002%!                +Module, +Options) is det.
 2003%
 2004%   Of these three predicates, rdf_load_file/7   does the real work.
 2005%   The others deal with the  possibility   that  the graph is being
 2006%   loaded by another thread. In that case,   we  wait for the other
 2007%   thread to complete the work.
 2008%
 2009%   @tbd    What if both threads disagree on what is loaded into the
 2010%           graph?
 2011%   @see    Code is modelled closely after how concurrent loading
 2012%           is handled in SWI-Prolog's boot/init.pl
 2013
 2014rdf_start_load(SourceURL, queue(Queue)) :-
 2015    rdf_loading(SourceURL, Queue, LoadThread),
 2016    \+ thread_self(LoadThread),
 2017    !,
 2018    debug(rdf(load), '~p is being loaded by thread ~w; waiting ...',
 2019          [ SourceURL, LoadThread]).
 2020rdf_start_load(SourceURL, Ref) :-
 2021    thread_self(Me),
 2022    message_queue_create(Queue),
 2023    assertz(rdf_loading(SourceURL, Queue, Me), Ref).
 2024
 2025rdf_end_load(queue(_)) :- !.
 2026rdf_end_load(Ref) :-
 2027    clause(rdf_loading(_, Queue, _), _, Ref),
 2028    erase(Ref),
 2029    thread_send_message(Queue, done),
 2030    message_queue_destroy(Queue).
 2031
 2032rdf_load_file(queue(Queue), _Spec, _SourceURL, _Protocol, _Graph, _M, _Options) :-
 2033    !,
 2034    catch(thread_get_message(Queue, _), _, true).
 2035rdf_load_file(_Ref, _Spec, SourceURL, Protocol, Graph, M, Options) :-
 2036    debug(rdf(load), 'RDF: Loading ~q into ~q', [SourceURL, Graph]),
 2037    statistics(cputime, T0),
 2038    rdf_open_input(SourceURL, Protocol, Graph,
 2039                   In, Cleanup, Modified, Format, Options),
 2040    supported_format(Format, Cleanup),
 2041    return_modified(Modified, Options),
 2042    (   Modified == not_modified
 2043    ->  Action = none
 2044    ;   Modified = cached(CacheFile)
 2045    ->  do_unload(Graph),
 2046        catch(rdf_load_db_no_admin(CacheFile, cache(Graph), Graphs), _, fail),
 2047        check_loaded_cache(Graph, Graphs, Modified),
 2048        Action = load
 2049    ;   option(base_uri(BaseURI), Options, Graph),
 2050        (   var(BaseURI)
 2051        ->  BaseURI = SourceURL
 2052        ;   true
 2053        ),
 2054        once(phrase(derived_options(Options, NSList), Extra)),
 2055        merge_options([ base_uri(BaseURI),
 2056                        graph(Graph),
 2057                        format(Format)
 2058                      | Extra
 2059                      ], Options, RDFOptions),
 2060        do_unload(Graph),
 2061        graph_modified(Modified, ModifiedStamp),
 2062        rdf_set_graph_source(Graph, SourceURL, ModifiedStamp),
 2063        call_cleanup(rdf_load_stream(Format, In, M:RDFOptions),
 2064                     Cleanup),
 2065        save_cache(Graph, SourceURL, Options),
 2066        register_file_ns(NSList),
 2067        format_action(Format, Action)
 2068    ),
 2069    rdf_statistics_(triples(Graph, Triples)),
 2070    report_loaded(Action, SourceURL, Graph, Triples, T0, Options).
 2071
 2072supported_format(Format, _Cleanup) :-
 2073    rdf_file_type(_, Format),
 2074    !.
 2075supported_format(Format, Cleanup) :-
 2076    call(Cleanup),
 2077    existence_error(rdf_format_plugin, Format).
 2078
 2079format_action(triples, load) :- !.
 2080format_action(_, parsed).
 2081
 2082save_cache(Graph, SourceURL, Options) :-
 2083    option(cache(true), Options, true),
 2084    rdf_cache_file(SourceURL, write, CacheFile),
 2085    !,
 2086    catch(save_cache(Graph, CacheFile), E,
 2087          print_message(warning, E)).
 2088save_cache(_, _, _).
 2089
 2090derived_options([], _) -->
 2091    [].
 2092derived_options([H|T], NSList) -->
 2093    (   {   H == register_namespaces(true)
 2094        ;   H == (register_namespaces = true)
 2095        }
 2096    ->  [ namespaces(NSList) ]
 2097    ;   []
 2098    ),
 2099    derived_options(T, NSList).
 2100
 2101graph_modified(last_modified(Stamp), Stamp).
 2102graph_modified(unknown, Stamp) :-
 2103    get_time(Stamp).
 2104
 2105return_modified(Modified, Options) :-
 2106    option(modified(M0), Options),
 2107    !,
 2108    M0 = Modified.
 2109return_modified(_, _).
 2110
 2111
 2112                 /*******************************
 2113                 *        INPUT HANDLING        *
 2114                 *******************************/
 2115
 2116/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 2117This section deals with pluggable input sources.  The task of the input
 2118layer is
 2119
 2120    * Decide on the graph-name
 2121    * Decide on the source-location
 2122    * Decide whether loading is needed (if-modified)
 2123    * Decide on the serialization in the input
 2124
 2125The protocol must ensure minimal  overhead,   in  particular for network
 2126protocols. E.g. for HTTP we want to make a single call on the server and
 2127use If-modified-since to verify that we need not reloading this file.
 2128- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 2129
 2130%!  rdf_open_input(+SourceURL, +Protocol, +Graph,
 2131%!                 -Stream, -Cleanup, -Modified, -Format, +Options)
 2132%
 2133%   Open an input source.
 2134%
 2135%   Options processed:
 2136%
 2137%       * graph(Graph)
 2138%       * db(Graph)
 2139%       * if(Condition)
 2140%       * cache(Cache)
 2141%       * format(Format)
 2142%
 2143%   @param  Modified is one of =not_modified=, last_modified(Time),
 2144%           cached(CacheFile) or =unknown=
 2145
 2146rdf_open_input(SourceURL, Protocol, Graph,
 2147               Stream, Cleanup, Modified, Format, Options) :-
 2148    option(if(If), Options, changed),
 2149    (   If == true
 2150    ->  true
 2151    ;   rdf_graph_source_(Graph, SourceURL, HaveModified)
 2152    ->  true
 2153    ;   option(cache(true), Options, true),
 2154        rdf_cache_file(SourceURL, read, CacheFile)
 2155    ->  time_file(CacheFile, HaveModified)
 2156    ;   true
 2157    ),
 2158    option(format(Format), Options, _),
 2159    open_input_if_modified(Protocol, SourceURL, HaveModified,
 2160                           Stream, Cleanup, Modified0, Format, Options),
 2161    (   Modified0 == not_modified
 2162    ->  (   nonvar(CacheFile)
 2163        ->  Modified = cached(CacheFile)
 2164        ;   Modified = not_modified
 2165        )
 2166    ;   Modified = Modified0
 2167    ).
 2168
 2169
 2170%!  source_url(+Spec, -Class, -SourceURL) is det.
 2171%
 2172%   Determine class and url of the source.  Class is one of
 2173%
 2174%       * stream(Stream)
 2175%       * file
 2176%       * a url-protocol (e.g., =http=)
 2177
 2178source_url(stream(In), stream(In), SourceURL) :-
 2179    !,
 2180    (   stream_property(In, file_name(File))
 2181    ->  to_url(File, SourceURL)
 2182    ;   gensym('stream://', SourceURL)
 2183    ).
 2184source_url(Stream, Class, SourceURL) :-
 2185    is_stream(Stream),
 2186    !,
 2187    source_url(stream(Stream), Class, SourceURL).
 2188source_url(Spec, Protocol, SourceURL) :-
 2189    compound(Spec),
 2190    !,
 2191    source_file(Spec, Protocol, SourceURL).
 2192source_url(FileURL, Protocol, SourceURL) :-             % or return FileURL?
 2193    uri_file_name(FileURL, File),
 2194    !,
 2195    source_file(File, Protocol, SourceURL).
 2196source_url(SourceURL0, Protocol, SourceURL) :-
 2197    is_url(SourceURL0, Protocol, SourceURL),
 2198    !.
 2199source_url(File, Protocol, SourceURL) :-
 2200    source_file(File, Protocol, SourceURL).
 2201
 2202source_file(Spec, file(SExt), SourceURL) :-
 2203    findall(Ext, valid_extension(Ext), Exts),
 2204    absolute_file_name(Spec, File, [access(read), extensions([''|Exts])]),
 2205    storage_extension(_Plain, SExt, File),
 2206    uri_file_name(SourceURL, File).
 2207
 2208to_url(URL, URL) :-
 2209    uri_is_global(URL),
 2210    !.
 2211to_url(File, URL) :-
 2212    absolute_file_name(File, Path),
 2213    uri_file_name(URL, Path).
 2214
 2215storage_extension(Plain, SExt, File) :-
 2216    file_name_extension(Plain, SExt, File),
 2217    SExt \== '',
 2218    rdf_storage_encoding(SExt, _),
 2219    !.
 2220storage_extension(File, '', File).
 2221
 2222%!  load_graph(+SourceURL, -Graph, +Options) is det.
 2223%
 2224%   Graph is the graph into which  we   load  the  data. Tries these
 2225%   options:
 2226%
 2227%     1. The graph(Graph) option
 2228%     2. The db(Graph) option (backward compatibility)
 2229%     3. The base_uri(BaseURI) option
 2230%     4. The source URL
 2231
 2232load_graph(Source, Graph, Options) :-
 2233    (   option(graph(Graph), Options)
 2234    ;   option(db(Graph), Options)
 2235    ),
 2236    !,
 2237    load_graph2(Source, Graph, Options).
 2238load_graph(Source, Graph, Options) :-
 2239    load_graph2(Source, Graph, Options).
 2240
 2241load_graph2(_, Graph, _) :-
 2242    ground(Graph),
 2243    !.
 2244load_graph2(_Source, Graph, Options) :-
 2245    option(base_uri(Graph), Options),
 2246    Graph \== [],
 2247    ground(Graph),
 2248    !.
 2249load_graph2(Source, Graph, _) :-
 2250    load_graph(Source, Graph).
 2251
 2252load_graph(SourceURL, BaseURI) :-
 2253    file_name_extension(BaseURI, Ext, SourceURL),
 2254    rdf_storage_encoding(Ext, _),
 2255    !.
 2256load_graph(SourceURL, SourceURL).
 2257
 2258
 2259open_input_if_modified(stream(In), SourceURL, _, In, true,
 2260                       unknown, Format, _) :-
 2261    !,
 2262    (   var(Format)
 2263    ->  guess_format(SourceURL, Format)
 2264    ;   true
 2265    ).
 2266open_input_if_modified(file(SExt), SourceURL, HaveModified, Stream, Cleanup,
 2267                       Modified, Format, _) :-
 2268    !,
 2269    uri_file_name(SourceURL, File),
 2270    (   SExt == '' -> Plain = File; file_name_extension(Plain, SExt, File)),
 2271    time_file(File, LastModified),
 2272    (   nonvar(HaveModified),
 2273        HaveModified >= LastModified
 2274    ->  Modified = not_modified,
 2275        Cleanup = true
 2276    ;   storage_open(SExt, File, Stream, Cleanup),
 2277        Modified = last_modified(LastModified),
 2278        (   var(Format)
 2279        ->  guess_format(Plain, Format)
 2280        ;   true
 2281        )
 2282    ).
 2283open_input_if_modified(file, SourceURL, HaveModified, Stream, Cleanup,
 2284                       Modified, Format, Options) :-
 2285    !,
 2286    open_input_if_modified(file(''), SourceURL, HaveModified,
 2287                           Stream, Cleanup,
 2288                           Modified, Format, Options).
 2289open_input_if_modified(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 2290                       Modified, Format, Options) :-
 2291    rdf_open_hook(Protocol, SourceURL, HaveModified, Stream, Cleanup,
 2292                  Modified, Format, Options).
 2293
 2294guess_format(File, Format) :-
 2295    file_name_extension(_, Ext, File),
 2296    (   rdf_file_type(Ext, Format)
 2297    ->  true
 2298    ;   Format = xml,
 2299        print_message(warning, rdf(guess_format(Ext)))
 2300    ).
 2301
 2302%!  storage_open(+Extension, +File, -Stream, -Cleanup)
 2303%
 2304%   Open the low-level storage. Note  that   the  file  is opened as
 2305%   binary. This is the same  as   for  HTTP  resources. The correct
 2306%   encoding will be set by the XML parser or the Turtle parser.
 2307
 2308storage_open('', File, Stream, close(Stream)) :-
 2309    !,
 2310    open(File, read, Stream, [type(binary)]).
 2311storage_open(Ext, File, Stream, Cleanup) :-
 2312    rdf_storage_encoding(Ext, Encoding),
 2313    rdf_open_decode(Encoding, File, Stream, Cleanup).
 2314
 2315valid_extension(Ext) :-
 2316    rdf_file_type(Ext, _).
 2317valid_extension(Ext) :-
 2318    rdf_storage_encoding(Ext, _).
 2319
 2320%!  is_url(@Term, -Scheme, -URL) is semidet.
 2321%
 2322%   True if Term is an atom denoting URL of the given Scheme. URL is
 2323%   normalized  (see  uri_normalized/2)  and   a  possible  fragment
 2324%   identifier (#fragment) is removed. This  predicate only succeeds
 2325%   if  the  scheme  is   registered    using   the  multifile  hook
 2326%   url_protocol/1.
 2327
 2328is_url(URL, Scheme, FetchURL) :-
 2329    atom(URL),
 2330    uri_is_global(URL),
 2331    uri_normalized(URL, URL1),              % case normalization
 2332    uri_components(URL1, Components),
 2333    uri_data(scheme, Components, Scheme0),
 2334    url_protocol(Scheme0),
 2335    !,
 2336    Scheme = Scheme0,
 2337    uri_data(fragment, Components, _, Components1),
 2338    uri_components(FetchURL, Components1).
 2339
 2340url_protocol(file).                     % built-in
 2341
 2342%!  rdf_file_type(+Extension, -Format) is semidet.
 2343%
 2344%   True if Format  is  the  format   belonging  to  the  given file
 2345%   extension.  This predicate is multifile and can thus be extended
 2346%   by plugins.
 2347
 2348rdf_file_type(xml,   xml).
 2349rdf_file_type(rdf,   xml).
 2350rdf_file_type(rdfs,  xml).
 2351rdf_file_type(owl,   xml).
 2352rdf_file_type(htm,   xhtml).
 2353rdf_file_type(html,  xhtml).
 2354rdf_file_type(xhtml, xhtml).
 2355rdf_file_type(trp,   triples).
 2356
 2357
 2358%!  rdf_file_encoding(+Extension, -Format) is semidet.
 2359%
 2360%   True if Format describes the storage encoding of file.
 2361
 2362rdf_storage_encoding('', plain).
 2363
 2364
 2365%!  rdf_load_stream(+Format, +Stream, :Options)
 2366%
 2367%   Load RDF data from Stream.
 2368%
 2369%   @tbd    Handle mime-types?
 2370
 2371rdf_load_stream(xml, Stream, Options) :-
 2372    !,
 2373    graph(Options, Graph),
 2374    rdf_transaction(load_stream(Stream, Options),
 2375                    parse(Graph)).
 2376rdf_load_stream(xhtml, Stream, M:Options) :-
 2377    !,
 2378    graph(Options, Graph),
 2379    rdf_transaction(load_stream(Stream, M:[embedded(true)|Options]),
 2380                    parse(Graph)).
 2381rdf_load_stream(triples, Stream, Options) :-
 2382    !,
 2383    graph(Options, Graph),
 2384    rdf_load_db_(Stream, Graph, _Graphs).
 2385
 2386load_stream(Stream, M:Options) :-
 2387    process_rdf(Stream, assert_triples, M:Options),
 2388    option(graph(Graph), Options),
 2389    rdf_graph_clear_modified_(Graph).
 2390
 2391
 2392%!  report_loaded(+Action, +Source, +DB, +Triples, +StartCPU, +Options)
 2393
 2394report_loaded(none, _, _, _, _, _) :- !.
 2395report_loaded(Action, Source, DB, Triples, T0, Options) :-
 2396    statistics(cputime, T1),
 2397    Time is T1 - T0,
 2398    (   option(silent(true), Options)
 2399    ->  Level = silent
 2400    ;   Level = informational
 2401    ),
 2402    print_message(Level,
 2403                  rdf(loaded(Action, Source, DB, Triples, Time))).
 2404
 2405
 2406%!  rdf_unload(+Source) is det.
 2407%
 2408%   Identify the graph loaded from Source and use rdf_unload_graph/1
 2409%   to erase this graph.
 2410%
 2411%   @deprecated     For compatibility, this predicate also accepts a
 2412%                   graph name instead of a source specification.
 2413%                   Please update your code to use
 2414%                   rdf_unload_graph/1.
 2415
 2416rdf_unload(Spec) :-
 2417    source_url(Spec, _Protocol, SourceURL),
 2418    rdf_graph_source_(Graph, SourceURL, _),
 2419    !,
 2420    rdf_unload_graph(Graph).
 2421rdf_unload(Graph) :-
 2422    atom(Graph),
 2423    rdf_graph(Graph),
 2424    !,
 2425    warn_deprecated_unload(Graph),
 2426    rdf_unload_graph(Graph).
 2427rdf_unload(_).
 2428
 2429:- dynamic
 2430    warned/0. 2431
 2432warn_deprecated_unload(_) :-
 2433    warned,
 2434    !.
 2435warn_deprecated_unload(Graph) :-
 2436    assertz(warned),
 2437    print_message(warning, rdf(deprecated(rdf_unload(Graph)))).
 2438
 2439
 2440%!  rdf_unload_graph(+Graph) is det.
 2441%
 2442%   Remove Graph from the RDF store.  Succeeds silently if the named
 2443%   graph does not exist.
 2444
 2445rdf_unload_graph(Graph) :-
 2446    must_be(atom, Graph),
 2447    (   rdf_graph(Graph)
 2448    ->  rdf_transaction(do_unload(Graph), unload(Graph))
 2449    ;   true
 2450    ).
 2451
 2452do_unload(Graph) :-
 2453    (   rdf_graph_(Graph, Triples),
 2454        Triples > 0
 2455    ->  rdf_retractall(_,_,_,Graph)
 2456    ;   true
 2457    ),
 2458    rdf_destroy_graph(Graph).
 2459
 2460                 /*******************************
 2461                 *         GRAPH QUERIES        *
 2462                 *******************************/
 2463
 2464%!  rdf_create_graph(+Graph) is det.
 2465%
 2466%   Create an RDF graph without triples.   Succeeds  silently if the
 2467%   graph already exists.
 2468
 2469
 2470%!  rdf_graph(?Graph) is nondet.
 2471%
 2472%   True when Graph is an existing graph.
 2473
 2474rdf_graph(Graph) :-
 2475    rdf_graph_(Graph, _Triples).
 2476
 2477%!  rdf_source(?Graph, ?SourceURL) is nondet.
 2478%
 2479%   True if named Graph is loaded from SourceURL.
 2480%
 2481%   @deprecated Use rdf_graph_property(Graph, source(SourceURL)).
 2482
 2483rdf_source(Graph, SourceURL) :-
 2484    rdf_graph(Graph),
 2485    rdf_graph_source_(Graph, SourceURL, _Modified).
 2486
 2487%!  rdf_source(?Source)
 2488%
 2489%   True if Source is a loaded source.
 2490%
 2491%   @deprecated     Use rdf_graph/1 or rdf_source/2.
 2492
 2493rdf_source(SourceURL) :-
 2494    rdf_source(_Graph, SourceURL).
 2495
 2496%!  rdf_make
 2497%
 2498%   Reload all loaded files that have been modified since the last
 2499%   time they were loaded.
 2500
 2501rdf_make :-
 2502    findall(Source-Graph, modified_graph(Source, Graph), Modified),
 2503    forall(member(Source-Graph, Modified),
 2504           catch(rdf_load(Source, [graph(Graph), if(changed)]), E,
 2505                 print_message(error, E))).
 2506
 2507modified_graph(SourceURL, Graph) :-
 2508    rdf_graph(Graph),
 2509    rdf_graph_source_(Graph, SourceURL, Modified),
 2510    \+ sub_atom(SourceURL, 0, _, _, 'stream://'),
 2511    Modified > 0.
 2512
 2513%!  rdf_graph_property(?Graph, ?Property) is nondet.
 2514%
 2515%   True when Property is a property of Graph.  Defined properties
 2516%   are:
 2517%
 2518%       * hash(Hash)
 2519%       Hash is the (MD5-)hash for the content of Graph.
 2520%       * modified(Boolean)
 2521%       True if the graph is modified since it was loaded or
 2522%       rdf_set_graph/2 was called with modified(false).
 2523%       * source(Source)
 2524%       The graph is loaded from the Source (a URL)
 2525%       * source_last_modified(?Time)
 2526%       Time is the last-modified timestamp of Source at the moment
 2527%       that the graph was loaded from Source.
 2528%       * triples(Count)
 2529%       True when Count is the number of triples in Graph.
 2530%
 2531%    Additional graph properties can be added  by defining rules for
 2532%    the multifile predicate  property_of_graph/2.   Currently,  the
 2533%    following extensions are defined:
 2534%
 2535%       - library(semweb/rdf_persistency)
 2536%         - persistent(Boolean)
 2537%           Boolean is =true= if the graph is persistent.
 2538
 2539rdf_graph_property(Graph, Property) :-
 2540    rdf_graph(Graph),
 2541    property_of_graph(Property, Graph).
 2542
 2543:- multifile
 2544    property_of_graph/2. 2545
 2546property_of_graph(hash(Hash), Graph) :-
 2547    rdf_md5(Graph, Hash).
 2548property_of_graph(modified(Boolean), Graph) :-
 2549    rdf_graph_modified_(Graph, Boolean, _).
 2550property_of_graph(source(URL), Graph) :-
 2551    rdf_graph_source_(Graph, URL, _).
 2552property_of_graph(source_last_modified(Time), Graph) :-
 2553    rdf_graph_source_(Graph, _, Time),
 2554    Time > 0.0.
 2555property_of_graph(triples(Count), Graph) :-
 2556    rdf_graph_(Graph, Count).
 2557
 2558%!  rdf_set_graph(+Graph, +Property) is det.
 2559%
 2560%   Set properties of Graph.  Defined properties are:
 2561%
 2562%       * modified(false)
 2563%       Set the modified state of Graph to false.
 2564
 2565rdf_set_graph(Graph, modified(Modified)) :-
 2566    must_be(oneof([false]), Modified),
 2567    rdf_graph_clear_modified_(Graph).
 2568
 2569
 2570%!  save_cache(+DB, +Cache) is det.
 2571%
 2572%   Save triples belonging to DB in the file Cache.
 2573
 2574save_cache(DB, Cache) :-
 2575    current_prolog_flag(rdf_triple_format, Version),
 2576    setup_call_cleanup(
 2577        catch(open(Cache, write, CacheStream, [type(binary)]), _, fail),
 2578        rdf_save_db_(CacheStream, DB, Version),
 2579        close(CacheStream)).
 2580
 2581%!  assert_triples(+Triples, +Source)
 2582%
 2583%   Assert a list of triples into the database. Foir security
 2584%   reasons we check we aren't inserting anything but nice RDF
 2585%   triples.
 2586
 2587assert_triples([], _).
 2588assert_triples([rdf(S,P,O)|T], DB) :-
 2589    !,
 2590    rdf_assert(S, P, O, DB),
 2591    assert_triples(T, DB).
 2592assert_triples([H|_], _) :-
 2593    throw(error(type_error(rdf_triple, H), _)).
 2594
 2595
 2596                 /*******************************
 2597                 *             RESET            *
 2598                 *******************************/
 2599
 2600%!  rdf_reset_db
 2601%
 2602%   Remove all triples from the RDF database and reset all its
 2603%   statistics.
 2604%
 2605%   @bug    This predicate checks for active queries, but this check is
 2606%           not properly synchronized and therefore the use of this
 2607%           predicate is unsafe in multi-threaded contexts. It is
 2608%           mainly used to run functionality tests that need to
 2609%           start with an empty database.
 2610
 2611rdf_reset_db :-
 2612    reset_gensym('_:genid'),
 2613    rdf_reset_db_.
 2614
 2615
 2616                 /*******************************
 2617                 *           SAVE RDF           *
 2618                 *******************************/
 2619
 2620%!  rdf_save(+Out) is det.
 2621%
 2622%   Same as rdf_save(Out, []).  See rdf_save/2 for details.
 2623
 2624%!  rdf_save(+Out, :Options) is det.
 2625%
 2626%   Write RDF data as RDF/XML. Options is a list of one or more of
 2627%   the following options:
 2628%
 2629%           * graph(+Graph)
 2630%           Save only triples associated to the given named Graph.
 2631%
 2632%           * anon(Bool)
 2633%           If false (default true) do not save blank nodes that do
 2634%           not appear (indirectly) as object of a named resource.
 2635%
 2636%           * base_uri(URI)
 2637%           BaseURI used. If present, all URIs that can be
 2638%           represented relative to this base are written using
 2639%           their shorthand.  See also =write_xml_base= option
 2640%
 2641%           * convert_typed_literal(:Convertor)
 2642%           Call Convertor(-Type, -Content, +RDFObject), providing
 2643%           the opposite for the convert_typed_literal option of
 2644%           the RDF parser.
 2645%
 2646%           * document_language(+Lang)
 2647%           Initial xml:lang saved with rdf:RDF element
 2648%
 2649%           * encoding(Encoding)
 2650%           Encoding for the output.  Either utf8 or iso_latin_1
 2651%
 2652%           * inline(+Bool)
 2653%           If =true= (default =false=), inline resources when
 2654%           encountered for the first time. Normally, only bnodes
 2655%           are handled this way.
 2656%
 2657%           * namespaces(+List)
 2658%           Explicitely specify saved namespace declarations. See
 2659%           rdf_save_header/2 option namespaces for details.
 2660%
 2661%           * sorted(+Boolean)
 2662%           If =true= (default =false=), emit subjects sorted on
 2663%           the full URI.  Useful to make file comparison easier.
 2664%
 2665%           * write_xml_base(Bool)
 2666%           If =false=, do _not_ include the =|xml:base|=
 2667%           declaration that is written normally when using the
 2668%           =base_uri= option.
 2669%
 2670%           * xml_attributes(+Bool)
 2671%           If =false= (default =true=), never use xml attributes to
 2672%           save plain literal attributes, i.e., always used an XML
 2673%           element as in =|<name>Joe</name>|=.
 2674%
 2675%   @param Out      Location to save the data.  This can also be a
 2676%                   file-url (=|file://path|=) or a stream wrapped
 2677%                   in a term stream(Out).
 2678%   @see rdf_save_db/1
 2679
 2680:- thread_local
 2681    named_anon/2,                   % +Resource, -Id
 2682    inlined/1.                      % +Resource
 2683
 2684rdf_save(File) :-
 2685    rdf_save2(File, []).
 2686
 2687rdf_save(Spec, M:Options0) :-
 2688    is_list(Options0),
 2689    !,
 2690    meta_options(save_meta_option, M:Options0, Options),
 2691    to_file(Spec, File),
 2692    rdf_save2(File, Options).
 2693rdf_save(Spec, _:DB) :-
 2694    atom(DB),                      % backward compatibility
 2695    !,
 2696    to_file(Spec, File),
 2697    rdf_save2(File, [graph(DB)]).
 2698
 2699save_meta_option(convert_typed_literal).
 2700
 2701to_file(URL, File) :-
 2702    atom(URL),
 2703    uri_file_name(URL, File),
 2704    !.
 2705to_file(File, File).
 2706
 2707rdf_save2(File, Options) :-
 2708    option(encoding(Encoding), Options, utf8),
 2709    valid_encoding(Encoding),
 2710    open_output(File, Encoding, Out, Close),
 2711    flag(rdf_db_saved_subjects, OSavedSubjects, 0),
 2712    flag(rdf_db_saved_triples, OSavedTriples, 0),
 2713    call_cleanup(rdf_do_save(Out, Options),
 2714                 Reason,
 2715                 cleanup_save(Reason,
 2716                              File,
 2717                              OSavedSubjects,
 2718                              OSavedTriples,
 2719                              Close)).
 2720
 2721open_output(stream(Out), Encoding, Out,
 2722            set_stream(Out, encoding(Old))) :-
 2723    !,
 2724    stream_property(Out, encoding(Old)),
 2725    set_stream(Out, encoding(Encoding)).
 2726open_output(File, Encoding, Out,
 2727            close(Out)) :-
 2728    open(File, write, Out, [encoding(Encoding)]).
 2729
 2730valid_encoding(Enc) :-
 2731    (   xml_encoding_name(Enc, _)
 2732    ->  true
 2733    ;   throw(error(domain_error(encoding, Enc), _))
 2734    ).
 2735
 2736
 2737cleanup_save(Reason,
 2738             File,
 2739             OSavedSubjects,
 2740             OSavedTriples,
 2741             Close) :-
 2742    call(Close),
 2743    flag(rdf_db_saved_subjects, SavedSubjects, OSavedSubjects),
 2744    flag(rdf_db_saved_triples, SavedTriples, OSavedTriples),
 2745    retractall(named_anon(_, _)),
 2746    retractall(inlined(_)),
 2747    (   Reason == exit
 2748    ->  print_message(informational,
 2749                      rdf(saved(File, SavedSubjects, SavedTriples)))
 2750    ;   format(user_error, 'Reason = ~w~n', [Reason])
 2751    ).
 2752
 2753rdf_do_save(Out, Options0) :-
 2754    rdf_save_header(Out, Options0, Options),
 2755    graph(Options, DB),
 2756    (   option(sorted(true), Options, false)
 2757    ->  (   var(DB)
 2758        ->  setof(Subject, rdf_subject(Subject), Subjects)
 2759        ;   findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2760            sort(SubjectList, Subjects)
 2761        ),
 2762        forall(member(Subject, Subjects),
 2763               rdf_save_non_anon_subject(Out, Subject, Options))
 2764    ;   forall(rdf_subject_in_graph(Subject, DB),
 2765               rdf_save_non_anon_subject(Out, Subject, Options))
 2766    ),
 2767    rdf_save_footer(Out),
 2768    !.                                  % dubious cut; without the
 2769                                        % cleanup handlers isn't called!?
 2770
 2771%!  rdf_subject_in_graph(-Subject, ?DB) is nondet.
 2772%
 2773%   True when Subject is a subject in the   graph  DB. If DB is unbound,
 2774%   all  subjects  are  enumerated.  Otherwise   we  have  two  options:
 2775%   enumerate all subjects and filter by graph or collect all triples of
 2776%   the graph and get the unique subjects.   The  first is attractive if
 2777%   the graph is big compared  to  the   DB,  also  because  it does not
 2778%   require memory, the second if the graph is small compared to the DB.
 2779
 2780rdf_subject_in_graph(Subject, DB) :-
 2781    var(DB),
 2782    !,
 2783    rdf_subject(Subject).
 2784rdf_subject_in_graph(Subject, DB) :-
 2785    rdf_statistics(triples(AllTriples)),
 2786    rdf_graph_property(DB, triples(DBTriples)),
 2787    DBTriples > AllTriples // 10,
 2788    !,
 2789    rdf_resource(Subject),
 2790    (   rdf(Subject, _, _, DB:_)
 2791    ->  true
 2792    ).
 2793rdf_subject_in_graph(Subject, DB) :-
 2794    findall(Subject, rdf(Subject, _, _, DB:_), SubjectList),
 2795    list_to_set(SubjectList, Subjects),
 2796    member(Subject, Subjects).
 2797
 2798
 2799graph(Options0, DB) :-
 2800    strip_module(Options0, _, Options),
 2801    (   memberchk(graph(DB0), Options)
 2802    ->  DB = DB0
 2803    ;   memberchk(db(DB0), Options)
 2804    ->  DB = DB0
 2805    ;   true                            % leave unbound
 2806    ).
 2807
 2808
 2809%!  rdf_save_header(+Fd, +Options)
 2810%
 2811%   Save XML document header, doctype and open the RDF environment.
 2812%   This predicate also sets up the namespace notation.
 2813%
 2814%   Save an RDF header, with the XML header, DOCTYPE, ENTITY and
 2815%   opening the rdf:RDF element with appropriate namespace
 2816%   declarations. It uses the primitives from section 3.5 to
 2817%   generate the required namespaces and desired short-name. Options
 2818%   is one of:
 2819%
 2820%     * graph(+URI)
 2821%     Only search for namespaces used in triples that belong to the
 2822%     given named graph.
 2823%
 2824%     * namespaces(+List)
 2825%     Where List is a list of namespace abbreviations. With this
 2826%     option, the expensive search for all namespaces that may be
 2827%     used by your data is omitted. The namespaces =rdf= and =rdfs=
 2828%     are added to the provided List. If a namespace is not
 2829%     declared, the resource is emitted in non-abreviated form.
 2830
 2831rdf_save_header(Out, Options) :-
 2832    rdf_save_header(Out, Options, _).
 2833
 2834rdf_save_header(Out, Options, OptionsOut) :-
 2835    is_list(Options),
 2836    !,
 2837    stream_property(Out, encoding(Enc)),
 2838    xml_encoding(Enc, Encoding),
 2839    format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]),
 2840    format(Out, '<!DOCTYPE rdf:RDF [', []),
 2841    header_namespaces(Options, NSIdList),
 2842    nsmap(NSIdList, NsMap),
 2843    append(Options, [nsmap(NsMap)], OptionsOut),
 2844    forall(member(Id=URI, NsMap),
 2845           (   xml_quote_attribute(URI, NSText0, Enc),
 2846               xml_escape_parameter_entity(NSText0, NSText),
 2847               format(Out, '~N    <!ENTITY ~w \'~w\'>', [Id, NSText])
 2848           )),
 2849    format(Out, '~N]>~n~n', []),
 2850    format(Out, '<rdf:RDF', []),
 2851    (   member(Id, NSIdList),
 2852        format(Out, '~N    xmlns:~w="&~w;"~n', [Id, Id]),
 2853        fail
 2854    ;   true
 2855    ),
 2856    (   option(base_uri(Base), Options),
 2857        option(write_xml_base(true), Options, true)
 2858    ->  xml_quote_attribute(Base, BaseText, Enc),
 2859        format(Out, '~N    xml:base="~w"~n', [BaseText])
 2860    ;   true
 2861    ),
 2862    (   memberchk(document_language(Lang), Options)
 2863    ->  format(Out, '~N    xml:lang="~w"', [Lang])
 2864    ;   true
 2865    ),
 2866    format(Out, '>~n', []).
 2867rdf_save_header(Out, FileRef, OptionsOut) :-    % compatibility
 2868    atom(FileRef),
 2869    rdf_save_header(Out, [graph(FileRef)], OptionsOut).
 2870
 2871xml_encoding(Enc, Encoding) :-
 2872    (   xml_encoding_name(Enc, Encoding)
 2873    ->  true
 2874    ;   throw(error(domain_error(rdf_encoding, Enc), _))
 2875    ).
 2876
 2877xml_encoding_name(ascii,       'US-ASCII').
 2878xml_encoding_name(iso_latin_1, 'ISO-8859-1').
 2879xml_encoding_name(utf8,        'UTF-8').
 2880
 2881%!  nsmap(+NSIds, -Map:list(id=uri)) is det.
 2882%
 2883%   Create a namespace-map that is compatible to xml_write/2
 2884%   for dealing with XML-Literals
 2885
 2886nsmap([], []).
 2887nsmap([Id|T0], [Id=URI|T]) :-
 2888    ns(Id, URI),
 2889    nsmap(T0, T).
 2890
 2891%!  xml_escape_parameter_entity(+In, -Out) is det.
 2892%
 2893%   Escape % as &#37; for entity declarations.
 2894
 2895xml_escape_parameter_entity(In, Out) :-
 2896    sub_atom(In, _, _, _, '%'),
 2897    !,
 2898    atom_codes(In, Codes),
 2899    phrase(escape_parent(Codes), OutCodes),
 2900    atom_codes(Out, OutCodes).
 2901xml_escape_parameter_entity(In, In).
 2902
 2903escape_parent([]) --> [].
 2904escape_parent([H|T]) -->
 2905    (   { H == 37 }
 2906    ->  "&#37;"
 2907    ;   [H]
 2908    ),
 2909    escape_parent(T).
 2910
 2911
 2912%!  header_namespaces(Options, -List)
 2913%
 2914%   Get namespaces we will define as entities
 2915
 2916header_namespaces(Options, List) :-
 2917    memberchk(namespaces(NSL0), Options),
 2918    !,
 2919    sort([rdf,rdfs|NSL0], List).
 2920header_namespaces(Options, List) :-
 2921    graph(Options, DB),
 2922    used_namespace_entities(List, DB).
 2923
 2924%!  rdf_graph_prefixes(?Graph, -List:ord_set) is det.
 2925%!  rdf_graph_prefixes(?Graph, -List:ord_set, :Options) is det.
 2926%
 2927%   List is a sorted list of  prefixes (namepaces) in Graph. Options
 2928%   defined are:
 2929%
 2930%       * filter(:Filter)
 2931%       optional Filter argument is used to filter the results. It
 2932%       is called with 3 additional arguments:
 2933%
 2934%           ==
 2935%           call(Filter, Where, Prefix, URI)
 2936%           ==
 2937%
 2938%       The Where argument gives the location of the prefix ans is
 2939%       one of =subject=, =predicate=, =object= or =type=. The
 2940%       Prefix argument is the potentionally new prefix and URI is
 2941%       the full URI that is being processed.
 2942%
 2943%       * expand(:Goal)
 2944%       Hook to generate the graph.  Called using
 2945%
 2946%           ==
 2947%           call(Goal,S,P,O,Graph)
 2948%           ==
 2949%
 2950%       * min_count(+Count)
 2951%       Only include prefixes that appear at least N times.  Default
 2952%       is 1. Declared prefixes are always returned if found at
 2953%       least one time.
 2954%
 2955%       * get_prefix(:GetPrefix)
 2956%       Predicate to extract the candidate prefix from an IRI.  Default
 2957%       is iri_xml_namespace/2.
 2958
 2959
 2960:- thread_local
 2961    graph_prefix/3. 2962:- meta_predicate
 2963    rdf_graph_prefixes(?, -, :). 2964
 2965rdf_graph_prefixes(Graph, List) :-
 2966    rdf_graph_prefixes(Graph, List, []).
 2967
 2968rdf_graph_prefixes(Graph, List, M:QOptions) :-
 2969    is_list(QOptions),
 2970    !,
 2971    meta_options(is_meta, M:QOptions, Options),
 2972    option(filter(Filter), Options, true),
 2973    option(expand(Expand), Options, rdf_db),
 2974    option(min_count(MinCount), Options, 1),
 2975    option(get_prefix(GetPrefix), Options, iri_xml_namespace),
 2976    call_cleanup(prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix),
 2977                 retractall(graph_prefix(_,_,_))),
 2978    sort(Prefixes, List).
 2979rdf_graph_prefixes(Graph, List, M:Filter) :-
 2980    rdf_graph_prefixes(Graph, List, M:[filter(Filter)]).
 2981
 2982is_meta(filter).
 2983is_meta(expand).
 2984is_meta(get_prefix).
 2985
 2986
 2987prefixes(Expand, Graph, Prefixes, Filter, MinCount, GetPrefix) :-
 2988    (   call(Expand, S, P, O, Graph),
 2989        add_ns(subject, GetPrefix, Filter, S, MinCount, s(S)),
 2990        add_ns(predicate, GetPrefix, Filter, P, MinCount, sp(S,P)),
 2991        add_ns_obj(GetPrefix, Filter, O, MinCount, spo(S,P,O)),
 2992        fail
 2993    ;   true
 2994    ),
 2995    findall(Prefix, graph_prefix(Prefix, MinCount, _), Prefixes).
 2996
 2997add_ns(Where, GetPrefix, Filter, S, MinCount, Context) :-
 2998    \+ rdf_is_bnode(S),
 2999    call(GetPrefix, S, Full),
 3000    Full \== '',
 3001    !,
 3002    (   graph_prefix(Full, MinCount, _)
 3003    ->  true
 3004    ;   Filter == true
 3005    ->  add_ns(Full, Context)
 3006    ;   call(Filter, Where, Full, S)
 3007    ->  add_ns(Full, Context)
 3008    ;   true
 3009    ).
 3010add_ns(_, _, _, _, _, _).
 3011
 3012add_ns(Full, Context) :-
 3013    graph_prefix(Full, _, Contexts),
 3014    memberchk(Context, Contexts),
 3015    !.
 3016add_ns(Full, Context) :-
 3017    retract(graph_prefix(Full, C0, Contexts)),
 3018    !,
 3019    C1 is C0+1,
 3020    asserta(graph_prefix(Full, C1, [Context|Contexts])).
 3021add_ns(Full, _) :-
 3022    ns(_, Full),
 3023    !,
 3024    asserta(graph_prefix(Full, _, _)).
 3025add_ns(Full, Context) :-
 3026    asserta(graph_prefix(Full, 1, [Context])).
 3027
 3028
 3029add_ns_obj(GetPrefix, Filter, O, MinCount, Context) :-
 3030    atom(O),
 3031    !,
 3032    add_ns(object, GetPrefix, Filter, O, MinCount, Context).
 3033add_ns_obj(GetPrefix, Filter, literal(type(Type, _)), MinCount, _) :-
 3034    atom(Type),
 3035    !,
 3036    add_ns(type, GetPrefix, Filter, Type, MinCount, t(Type)).
 3037add_ns_obj(_, _, _, _, _).
 3038
 3039
 3040%!  used_namespace_entities(-List, ?Graph) is det.
 3041%
 3042%   Return the namespace aliases that are actually used in Graph. In
 3043%   addition, this predicate creates ns<N>   aliases  for namespaces
 3044%   used in predicates because RDF/XML cannot write predicates other
 3045%   than as an XML name.
 3046
 3047used_namespace_entities(List, Graph) :-
 3048    decl_used_predicate_ns(Graph),
 3049    used_namespaces(List, Graph).
 3050
 3051used_namespaces(List, DB) :-
 3052    rdf_graph_prefixes(DB, FullList),
 3053    ns_abbreviations(FullList, List0),
 3054    sort([rdf|List0], List).
 3055
 3056ns_abbreviations([], []).
 3057ns_abbreviations([H0|T0], [H|T]) :-
 3058    ns(H, H0),
 3059    !,
 3060    ns_abbreviations(T0, T).
 3061ns_abbreviations([_|T0], T) :-
 3062    ns_abbreviations(T0, T).
 3063
 3064
 3065/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 3066For every URL used as a predicate  we   *MUST*  define a namespace as we
 3067cannot use names holding /, :, etc. as XML identifiers.
 3068- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 3069
 3070:- thread_local
 3071    predicate_ns/2. 3072
 3073decl_used_predicate_ns(DB) :-
 3074    retractall(predicate_ns(_,_)),
 3075    (   rdf_current_predicate(P, DB),
 3076        decl_predicate_ns(P),
 3077        fail
 3078    ;   true
 3079    ).
 3080
 3081decl_predicate_ns(Pred) :-
 3082    predicate_ns(Pred, _),
 3083    !.
 3084decl_predicate_ns(Pred) :-
 3085    rdf_global_id(NS:Local, Pred),
 3086    xml_name(Local),
 3087    !,
 3088    assert(predicate_ns(Pred, NS)).
 3089decl_predicate_ns(Pred) :-
 3090    atom_codes(Pred, Codes),
 3091    append(NSCodes, LocalCodes, Codes),
 3092    xml_codes(LocalCodes),
 3093    !,
 3094    (   NSCodes \== []
 3095    ->  atom_codes(NS, NSCodes),
 3096        (   ns(Id, NS)
 3097        ->  assert(predicate_ns(Pred, Id))
 3098        ;   between(1, infinite, N),
 3099            atom_concat(ns, N, Id),
 3100            \+ ns(Id, _)
 3101        ->  rdf_register_ns(Id, NS),
 3102            print_message(informational,
 3103                          rdf(using_namespace(Id, NS)))
 3104        ),
 3105        assert(predicate_ns(Pred, Id))
 3106    ;   assert(predicate_ns(Pred, -)) % no namespace used
 3107    ).
 3108
 3109xml_codes([]).
 3110xml_codes([H|T]) :-
 3111    xml_code(H),
 3112    xml_codes(T).
 3113
 3114xml_code(X) :-
 3115    code_type(X, csym),
 3116    !.
 3117xml_code(0'-).                          % Match 0'-
 3118
 3119
 3120%!  rdf_save_footer(Out:stream) is det.
 3121%
 3122%   Finish XML generation and write the document footer.
 3123%
 3124%   @see rdf_save_header/2, rdf_save_subject/3.
 3125
 3126rdf_save_footer(Out) :-
 3127    retractall(named_anon(_, _)),
 3128    retractall(inlined(_)),
 3129    format(Out, '</rdf:RDF>~n', []).
 3130
 3131%!  rdf_save_non_anon_subject(+Out, +Subject, +Options)
 3132%
 3133%   Save an object.  Anonymous objects not saved if anon(false)
 3134%   is present in the Options list.
 3135
 3136rdf_save_non_anon_subject(_Out, Subject, Options) :-
 3137    rdf_is_bnode(Subject),
 3138    (   memberchk(anon(false), Options)
 3139    ;   graph(Options, DB),
 3140        rdf_db(_, _, Subject, DB)
 3141    ),
 3142    !.
 3143rdf_save_non_anon_subject(Out, Subject, Options) :-
 3144    rdf_save_subject(Out, Subject, Options),
 3145    flag(rdf_db_saved_subjects, X, X+1).
 3146
 3147
 3148%!  rdf_save_subject(+Out, +Subject:resource, +Options) is det.
 3149%
 3150%   Save the triples associated to Subject to Out. Options:
 3151%
 3152%     * graph(+Graph)
 3153%     Only save properties from Graph.
 3154%     * base_uri(+URI)
 3155%     * convert_typed_literal(:Goal)
 3156%     * document_language(+XMLLang)
 3157%
 3158%   @see rdf_save/2 for a description of these options.
 3159
 3160rdf_save_subject(Out, Subject, Options) :-
 3161    is_list(Options),
 3162    !,
 3163    option(base_uri(BaseURI), Options, '-'),
 3164    (   rdf_save_subject(Out, Subject, BaseURI, 0, Options)
 3165    ->  format(Out, '~n', [])
 3166    ;   throw(error(rdf_save_failed(Subject), 'Internal error'))
 3167    ).
 3168rdf_save_subject(Out, Subject, DB) :-
 3169    (   var(DB)
 3170    ->  rdf_save_subject(Out, Subject, [])
 3171    ;   rdf_save_subject(Out, Subject, [graph(DB)])
 3172    ).
 3173
 3174
 3175%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 3176%!                   +Indent:int, +Options) is det.
 3177%
 3178%   Save properties of Subject.
 3179%
 3180%   @param Indent   Current indentation
 3181
 3182rdf_save_subject(_, Subject, _, _, _) :-
 3183    inlined(Subject),
 3184    !.
 3185rdf_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 3186    do_save_subject(Out, Subject, BaseURI, Indent, Options).
 3187
 3188do_save_subject(Out, Subject, BaseURI, Indent, Options) :-
 3189    graph(Options, DB),
 3190    findall(Pred=Object, rdf_db(Subject, Pred, Object, DB), Atts0),
 3191    sort(Atts0, Atts),              % remove duplicates
 3192    length(Atts, L),
 3193    (   length(Atts0, L0),
 3194        Del is L0-L,
 3195        Del > 0
 3196    ->  print_message(informational,
 3197                      rdf(save_removed_duplicates(Del, Subject)))
 3198    ;   true
 3199    ),
 3200    rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options),
 3201    flag(rdf_db_saved_triples, X, X+L).
 3202
 3203rdf_db(Subject, Pred, Object, DB) :-
 3204    var(DB),
 3205    !,
 3206    rdf(Subject, Pred, Object).
 3207rdf_db(Subject, Pred, Object, DB) :-
 3208    rdf(Subject, Pred, Object, DB:_).
 3209
 3210%!  rdf_save_subject(+Out:stream, +Subject:resource, +BaseURI,
 3211%!                   +Atts:list(Pred=Obj), +Indent:int, +Options) is det.
 3212%
 3213%   Save triples defined by Atts on Subject.
 3214
 3215rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 3216    rdf_equal(rdf:type, RdfType),
 3217    select(RdfType=Type, Atts, Atts1),
 3218    \+ rdf_is_bnode(Type),
 3219    rdf_id(Type, BaseURI, TypeId),
 3220    xml_is_name(TypeId),
 3221    !,
 3222    format(Out, '~*|<', [Indent]),
 3223    rdf_write_id(Out, TypeId),
 3224    save_about(Out, BaseURI, Subject),
 3225    save_attributes(Atts1, BaseURI, Out, TypeId, Indent, Options).
 3226rdf_save_subject(Out, Subject, BaseURI, Atts, Indent, Options) :-
 3227    format(Out, '~*|<rdf:Description', [Indent]),
 3228    save_about(Out, BaseURI, Subject),
 3229    save_attributes(Atts, BaseURI, Out, rdf:'Description', Indent, Options).
 3230
 3231xml_is_name(_NS:Atom) :-
 3232    !,
 3233    xml_name(Atom).
 3234xml_is_name(Atom) :-
 3235    xml_name(Atom).
 3236
 3237%!  save_about(+Out, +BaseURI, +Subject) is det.
 3238%
 3239%   Save the rdf:about. If Subject is a  blank node, save the nodeID
 3240%   if any.
 3241
 3242save_about(Out, _, Subject) :-
 3243    rdf_is_bnode(Subject),
 3244    !,
 3245    (   named_anon(Subject, NodeID)
 3246    ->  format(Out, ' rdf:nodeID="~w"', [NodeID])
 3247    ;   true
 3248    ).
 3249save_about(Out, BaseURI, Subject) :-
 3250    stream_property(Out, encoding(Encoding)),
 3251    rdf_value(Subject, BaseURI, QSubject, Encoding),
 3252    format(Out, ' rdf:about="~w"', [QSubject]).
 3253
 3254%!  save_attributes(+List, +BaseURI, +Stream, +Element, +Indent, +Options)
 3255%
 3256%   Save the attributes.  Short literal attributes are saved in the
 3257%   tag.  Others as the content of the description element.  The
 3258%   begin tag has already been filled.
 3259
 3260save_attributes(Atts, BaseURI, Out, Element, Indent, Options) :-
 3261    split_attributes(Atts, InTag, InBody, Options),
 3262    SubIndent is Indent + 2,
 3263    save_attributes2(InTag, BaseURI, tag, Out, SubIndent, Options),
 3264    (   InBody == []
 3265    ->  format(Out, '/>~n', [])
 3266    ;   format(Out, '>~n', []),
 3267        save_attributes2(InBody, BaseURI, body, Out, SubIndent, Options),
 3268        format(Out, '~N~*|</', [Indent]),
 3269        rdf_write_id(Out, Element),
 3270        format(Out, '>~n', [])
 3271    ).
 3272
 3273%!  split_attributes(+Attributes, -HeadAttrs, -BodyAttr, Options)
 3274%
 3275%   Split attribute (Name=Value) list into attributes for the head
 3276%   and body. Attributes can only be in the head if they are literal
 3277%   and appear only one time in the attribute list.
 3278
 3279split_attributes(Atts, [], Atts, Options) :-
 3280    option(xml_attributes(false), Options),
 3281    !.
 3282split_attributes(Atts, HeadAttr, BodyAttr, _) :-
 3283    duplicate_attributes(Atts, Dupls, Singles),
 3284    simple_literal_attributes(Singles, HeadAttr, Rest),
 3285    append(Dupls, Rest, BodyAttr).
 3286
 3287%!  duplicate_attributes(+Attrs, -Duplicates, -Singles)
 3288%
 3289%   Extract attributes that appear more than onces as we cannot
 3290%   dublicate an attribute in the head according to the XML rules.
 3291
 3292duplicate_attributes([], [], []).
 3293duplicate_attributes([H|T], Dupls, Singles) :-
 3294    H = (Name=_),
 3295    named_attributes(Name, T, D, R),
 3296    D \== [],
 3297    append([H|D], Dupls2, Dupls),
 3298    !,
 3299    duplicate_attributes(R, Dupls2, Singles).
 3300duplicate_attributes([H|T], Dupls2, [H|Singles]) :-
 3301    duplicate_attributes(T, Dupls2, Singles).
 3302
 3303named_attributes(_, [], [], []) :- !.
 3304named_attributes(Name, [H|T], D, R) :-
 3305    (   H = (Name=_)
 3306    ->  D = [H|DT],
 3307        named_attributes(Name, T, DT, R)
 3308    ;   R = [H|RT],
 3309        named_attributes(Name, T, D, RT)
 3310    ).
 3311
 3312%!  simple_literal_attributes(+Attributes, -Inline, -Body)
 3313%
 3314%   Split attributes for (literal) attributes to be used in the
 3315%   begin-tag and ones that have to go into the body of the description.
 3316
 3317simple_literal_attributes([], [], []).
 3318simple_literal_attributes([H|TA], [H|TI], B) :-
 3319    in_tag_attribute(H),
 3320    !,
 3321    simple_literal_attributes(TA, TI, B).
 3322simple_literal_attributes([H|TA], I, [H|TB]) :-
 3323    simple_literal_attributes(TA, I, TB).
 3324
 3325in_tag_attribute(_=literal(Text)) :-
 3326    atom(Text),                     % may not have lang qualifier
 3327    atom_length(Text, Len),
 3328    Len < 60.
 3329
 3330%!  save_attributes(+List, +BaseURI, +TagOrBody, +Stream)
 3331%
 3332%   Save a list of attributes.
 3333
 3334save_attributes2([], _, _, _, _, _).
 3335save_attributes2([H|T], BaseURI, Where, Out, Indent, Options) :-
 3336    save_attribute(Where, H, BaseURI, Out, Indent, Options),
 3337    save_attributes2(T, BaseURI, Where, Out, Indent, Options).
 3338
 3339save_attribute(tag, Name=literal(Value), BaseURI, Out, Indent, _DB) :-
 3340    AttIndent is Indent + 2,
 3341    rdf_id(Name, BaseURI, NameText),
 3342    stream_property(Out, encoding(Encoding)),
 3343    xml_quote_attribute(Value, QVal, Encoding),
 3344    format(Out, '~N~*|', [AttIndent]),
 3345    rdf_write_id(Out, NameText),
 3346    format(Out, '="~w"', [QVal]).
 3347save_attribute(body, Name=literal(Literal0), BaseURI, Out, Indent, Options) :-
 3348    !,
 3349    rdf_id(Name, BaseURI, NameText),
 3350    (   memberchk(convert_typed_literal(Converter), Options),
 3351        call(Converter, Type, Content, Literal0)
 3352    ->  Literal = type(Type, Content)
 3353    ;   Literal = Literal0
 3354    ),
 3355    save_body_literal(Literal, NameText, BaseURI, Out, Indent, Options).
 3356save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 3357    rdf_is_bnode(Value),
 3358    !,
 3359    rdf_id(Name, BaseURI, NameText),
 3360    format(Out, '~N~*|<', [Indent]),
 3361    rdf_write_id(Out, NameText),
 3362    (   named_anon(Value, NodeID)
 3363    ->  format(Out, ' rdf:nodeID="~w"/>', [NodeID])
 3364    ;   (   rdf(S1, Name, Value),
 3365            rdf(S2, P2, Value),
 3366            (S1 \== S2 ; Name \== P2)
 3367        ->  predicate_property(named_anon(_,_), number_of_clauses(N)),
 3368            atom_concat('bn', N, NodeID),
 3369            assertz(named_anon(Value, NodeID))
 3370        ;   true
 3371        ),
 3372        SubIndent is Indent + 2,
 3373        (   rdf_collection(Value)
 3374        ->  save_about(Out, BaseURI, Value),
 3375            format(Out, ' rdf:parseType="Collection">~n', []),
 3376            rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 3377        ;   format(Out, '>~n', []),
 3378            rdf_save_subject(Out, Value, BaseURI, SubIndent, Options)
 3379        ),
 3380        format(Out, '~N~*|</', [Indent]),
 3381        rdf_write_id(Out, NameText),
 3382        format(Out, '>~n', [])
 3383    ).
 3384save_attribute(body, Name=Value, BaseURI, Out, Indent, Options) :-
 3385    option(inline(true), Options),
 3386    has_attributes(Value, Options),
 3387    \+ inlined(Value),
 3388    !,
 3389    assertz(inlined(Value)),
 3390    rdf_id(Name, BaseURI, NameText),
 3391    format(Out, '~N~*|<', [Indent]),
 3392    rdf_write_id(Out, NameText),
 3393    SubIndent is Indent + 2,
 3394    (   rdf_collection(Value)
 3395    ->  save_about(Out, BaseURI, Value),
 3396        format(Out, ' rdf:parseType="Collection">~n', []),
 3397        rdf_save_list(Out, Value, BaseURI, SubIndent, Options)
 3398    ;   format(Out, '>~n', []),
 3399        do_save_subject(Out, Value, BaseURI, SubIndent, Options)
 3400    ),
 3401    format(Out, '~N~*|</', [Indent]),
 3402    rdf_write_id(Out, NameText),
 3403    format(Out, '>~n', []).
 3404save_attribute(body, Name=Value, BaseURI, Out, Indent, _DB) :-
 3405    stream_property(Out, encoding(Encoding)),
 3406    rdf_value(Value, BaseURI, QVal, Encoding),
 3407    rdf_id(Name, BaseURI, NameText),
 3408    format(Out, '~N~*|<', [Indent]),
 3409    rdf_write_id(Out, NameText),
 3410    format(Out, ' rdf:resource="~w"/>', [QVal]).
 3411
 3412has_attributes(URI, Options) :-
 3413    graph(Options, DB),
 3414    rdf_db(URI, _, _, DB),
 3415    !.
 3416
 3417%!  save_body_literal(+Literal, +NameText, +BaseURI,
 3418%!                    +Out, +Indent, +Options).
 3419
 3420save_body_literal(lang(Lang, Value),
 3421                  NameText, BaseURI, Out, Indent, Options) :-
 3422    !,
 3423    format(Out, '~N~*|<', [Indent]),
 3424    rdf_write_id(Out, NameText),
 3425    (   memberchk(document_language(Lang), Options)
 3426    ->  write(Out, '>')
 3427    ;   rdf_id(Lang, BaseURI, LangText),
 3428        format(Out, ' xml:lang="~w">', [LangText])
 3429    ),
 3430    save_attribute_value(Value, Out, Indent),
 3431    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 3432save_body_literal(type(Type, DOM),
 3433                  NameText, _BaseURI, Out, Indent, Options) :-
 3434    rdf_equal(Type, rdf:'XMLLiteral'),
 3435    !,
 3436    (   atom(DOM)
 3437    ->  format(Out, '~N~*|<', [Indent]),
 3438        rdf_write_id(Out, NameText),
 3439        format(Out, ' rdf:parseType="Literal">~w</', [DOM]),
 3440        rdf_write_id(Out, NameText), write(Out, '>')
 3441    ;   save_xml_literal(DOM, NameText, Out, Indent, Options)
 3442    ).
 3443save_body_literal(type(Type, Value),
 3444                  NameText, BaseURI, Out, Indent, _) :-
 3445    !,
 3446    format(Out, '~N~*|<', [Indent]),
 3447    rdf_write_id(Out, NameText),
 3448    stream_property(Out, encoding(Encoding)),
 3449    rdf_value(Type, BaseURI, QVal, Encoding),
 3450    format(Out, ' rdf:datatype="~w">', [QVal]),
 3451    save_attribute_value(Value, Out, Indent),
 3452    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 3453save_body_literal(Literal,
 3454                  NameText, _, Out, Indent, _) :-
 3455    atomic(Literal),
 3456    !,
 3457    format(Out, '~N~*|<', [Indent]),
 3458    rdf_write_id(Out, NameText),
 3459    write(Out, '>'),
 3460    save_attribute_value(Literal, Out, Indent),
 3461    write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>').
 3462save_body_literal(DOM,
 3463                  NameText, BaseURI, Out, Indent, Options) :-
 3464    rdf_equal(Type, rdf:'XMLLiteral'),
 3465    save_body_literal(type(Type, DOM),
 3466                      NameText, BaseURI, Out, Indent, Options).
 3467
 3468save_attribute_value(Value, Out, _) :-  % strings
 3469    atom(Value),
 3470    !,
 3471    stream_property(Out, encoding(Encoding)),
 3472    xml_quote_cdata(Value, QVal, Encoding),
 3473    write(Out, QVal).
 3474save_attribute_value(Value, Out, _) :-  % numbers
 3475    number(Value),
 3476    !,
 3477    writeq(Out, Value).             % quoted: preserve floats
 3478save_attribute_value(Value, _Out, _) :-
 3479    throw(error(save_attribute_value(Value), _)).
 3480
 3481%!  save_xml_literal(+DOM, +Attr, +Out, +Indent, +Options) is det.
 3482%
 3483%   Save an XMLLiteral value. We already emitted
 3484%
 3485%           ==
 3486%           <prop parseType="literal"
 3487%           ==
 3488%
 3489%   but  not  the  terminating  =|>|=.  We  need  to  establish  the
 3490%   namespaces used in the DOM. The   namespaces in the rdf document
 3491%   are in the nsmap-option of Options.
 3492
 3493save_xml_literal(DOM, Attr, Out, Indent, Options) :-
 3494    xml_is_dom(DOM),
 3495    !,
 3496    memberchk(nsmap(NsMap), Options),
 3497    id_to_atom(Attr, Atom),
 3498    xml_write(Out,
 3499              element(Atom, ['rdf:parseType'='Literal'], DOM),
 3500              [ header(false),
 3501                indent(Indent),
 3502                nsmap(NsMap)
 3503              ]).
 3504save_xml_literal(NoDOM, _, _, _, _) :-
 3505    must_be(xml_dom, NoDOM).
 3506
 3507id_to_atom(NS:Local, Atom) :-
 3508    !,
 3509    atomic_list_concat([NS,Local], :, Atom).
 3510id_to_atom(ID, ID).
 3511
 3512
 3513%!  rdf_collection(+URI) is semidet.
 3514%
 3515%   True  if  URI  represents  an  RDF    list  that  fits  the  RDF
 3516%   parseType=collection syntax. This means it is   a linked list of
 3517%   bnode-cells with a rdf:first that is   a  resource, optionally a
 3518%   rdf:type that is an rdf:list and the list ends in an rdf:nil.
 3519
 3520:- rdf_meta
 3521    rdf_collection(r),
 3522    collection_p(r,r). 3523
 3524rdf_collection(rdf:nil) :- !.
 3525rdf_collection(Cell) :-
 3526    rdf_is_bnode(Cell),
 3527    findall(F, rdf(Cell, rdf:first, F), [_]),
 3528    findall(F, rdf(Cell, rdf:rest, F), [Rest]),
 3529    forall(rdf(Cell, P, V),
 3530           collection_p(P, V)),
 3531    rdf_collection(Rest).
 3532
 3533collection_p(rdf:first, V) :- atom(V).
 3534collection_p(rdf:rest, _).
 3535collection_p(rdf:type, rdf:'List').
 3536
 3537
 3538%!  rdf_save_list(+Out, +List, +BaseURI, +Indent, +Options)
 3539
 3540rdf_save_list(_, List, _, _, _) :-
 3541    rdf_equal(List, rdf:nil),
 3542    !.
 3543rdf_save_list(Out, List, BaseURI, Indent, Options) :-
 3544    rdf_has(List, rdf:first, First),
 3545    (   rdf_is_bnode(First)
 3546    ->  nl(Out),
 3547        rdf_save_subject(Out, First, BaseURI, Indent, Options)
 3548    ;   stream_property(Out, encoding(Encoding)),
 3549        rdf_value(First, BaseURI, QVal, Encoding),
 3550        format(Out, '~N~*|<rdf:Description rdf:about="~w"/>',
 3551               [Indent, QVal])
 3552    ),
 3553    flag(rdf_db_saved_triples, X, X+3),
 3554    (   rdf_has(List, rdf:rest, List2),
 3555        \+ rdf_equal(List2, rdf:nil)
 3556    ->  rdf_save_list(Out, List2, BaseURI, Indent, Options)
 3557    ;   true
 3558    ).
 3559
 3560
 3561%!  rdf_id(+Resource, +BaseURI, -NSLocal)
 3562%
 3563%   Generate a NS:Local  name  for   Resource  given  the  indicated
 3564%   default namespace. This call is used for elements.
 3565
 3566rdf_id(Id, BaseURI, Local) :-
 3567    assertion(atom(BaseURI)),
 3568    atom_concat(BaseURI, Local, Id),
 3569    sub_atom(Local, 0, 1, _, #),
 3570    !.
 3571rdf_id(Id, _, NS:Local) :-
 3572    iri_xml_namespace(Id, Full, Local),
 3573    ns(NS, Full),
 3574    !.
 3575rdf_id(Id, _, NS:Local) :-
 3576    ns(NS, Full),
 3577    Full \== '',
 3578    atom_concat(Full, Local, Id),
 3579    !.
 3580rdf_id(Id, _, Id).
 3581
 3582
 3583%!  rdf_write_id(+Out, +NSLocal) is det.
 3584%
 3585%   Write an identifier. We cannot use native write on it as both NS
 3586%   and Local can be operators.
 3587
 3588rdf_write_id(Out, NS:Local) :-
 3589    !,
 3590    format(Out, '~w:~w', [NS, Local]).
 3591rdf_write_id(Out, Atom) :-
 3592    write(Out, Atom).
 3593
 3594%!  rdf_value(+Resource, +BaseURI, -Text, +Encoding)
 3595%
 3596%   According  to  "6.4  RDF  URI  References"  of  the  RDF  Syntax
 3597%   specification, a URI reference is  UNICODE string not containing
 3598%   control sequences, represented as  UTF-8   and  then  as escaped
 3599%   US-ASCII.
 3600
 3601rdf_value(Base, Base, '', _) :- !.
 3602rdf_value(V, Base, Text, Encoding) :-
 3603    atom_concat(Base, Local, V),
 3604    sub_atom(Local, 0, _, _, #),
 3605    !,
 3606    xml_quote_attribute(Local, Text, Encoding).
 3607rdf_value(V, _, Text, Encoding) :-
 3608    ns(NS, Full),
 3609    atom_concat(Full, Local, V),
 3610    xml_is_name(Local),
 3611    !,
 3612    xml_quote_attribute(Local, QLocal, Encoding),
 3613    atomic_list_concat(['&', NS, (';'), QLocal], Text).
 3614rdf_value(V, _, Q, Encoding) :-
 3615    xml_quote_attribute(V, Q, Encoding).
 3616
 3617
 3618                 /*******************************
 3619                 *       MATCH AND COMPARE      *
 3620                 *******************************/
 3621
 3622%!  rdf_compare(-Dif, +Object1, +Object2) is det.
 3623%
 3624%   Compare  two  object  terms.  Where  SPARQL  defines  a  partial
 3625%   ordering, we define a complete ordering   of terms. The ordering
 3626%   is defines as:
 3627%
 3628%     - Blank nodes < IRIs < Literals
 3629%     - Numeric literals < other literals
 3630%     - Numeric literals are compared by value and then by type,
 3631%       where Integer < Decimal < Double
 3632%     - Other literals are compare lexically, case insensitive.
 3633%       If equal, uppercase preceeds lowercase.  If still equal,
 3634%       the types are compared lexically.
 3635
 3636%!  rdf_match_label(+How, +Pattern, +Label) is semidet.
 3637%
 3638%   True if Label matches Pattern according to   How.  How is one of
 3639%   `icase`, `substring`, `word`, `prefix` or   `like`. For backward
 3640%   compatibility, `exact` is a synonym for `icase`.
 3641
 3642
 3643                 /*******************************
 3644                 *      DEPRECATED MATERIAL     *
 3645                 *******************************/
 3646
 3647%!  rdf_split_url(+Prefix, +Local, -URL) is det.
 3648%!  rdf_split_url(-Prefix, -Local, +URL) is det.
 3649%
 3650%   Split/join a URL.  This functionality is moved to library(sgml).
 3651%
 3652%   @deprecated Use iri_xml_namespace/3. Note that the argument
 3653%   order is iri_xml_namespace(+IRI, -Namespace, -Localname).
 3654
 3655rdf_split_url(Prefix, Local, URL) :-
 3656    atomic(URL),
 3657    !,
 3658    iri_xml_namespace(URL, Prefix, Local).
 3659rdf_split_url(Prefix, Local, URL) :-
 3660    atom_concat(Prefix, Local, URL).
 3661
 3662%!  rdf_url_namespace(+URL, -Namespace)
 3663%
 3664%   Namespace is the namespace of URL.
 3665%
 3666%   @deprecated Use iri_xml_namespace/2
 3667
 3668rdf_url_namespace(URL, Prefix) :-
 3669    iri_xml_namespace(URL, Prefix).
 3670
 3671
 3672                 /*******************************
 3673                 *            LITERALS          *
 3674                 *******************************/
 3675
 3676%!  rdf_new_literal_map(-Map) is det.
 3677%
 3678%   Create a new literal map, returning an opaque handle.
 3679
 3680%!  rdf_destroy_literal_map(+Map) is det.
 3681%
 3682%   Destroy a literal map. After this call,   further use of the Map
 3683%   handle is illegal. Additional synchronisation  is needed if maps
 3684%   that are shared between threads are   destroyed to guarantee the
 3685%   handle    is    no    longer    used.    In    some    scenarios
 3686%   rdf_reset_literal_map/1 provides a safe alternative.
 3687
 3688%!  rdf_reset_literal_map(+Map) is det.
 3689%
 3690%   Delete all content from the literal map.
 3691
 3692%!  rdf_insert_literal_map(+Map, +Key, +Value) is det.
 3693%
 3694%   Add a relation between  Key  and  Value   to  the  map.  If this
 3695%   relation already exists no action is performed.
 3696
 3697%!  rdf_insert_literal_map(+Map, +Key, +Value, -KeyCount) is det.
 3698%
 3699%   As rdf_insert_literal_map/3. In addition, if Key is a new key in
 3700%   Map, unify KeyCount with the number of  keys in Map. This serves
 3701%   two purposes. Derived maps, such as  the stem and metaphone maps
 3702%   need to know about new  keys   and  it avoids additional foreign
 3703%   calls for doing the progress in rdf_litindex.pl.
 3704
 3705%!  rdf_delete_literal_map(+Map, +Key) is det.
 3706%
 3707%   Delete Key and all associated values from the map.
 3708
 3709%!  rdf_delete_literal_map(+Map, +Key, +Value) is det.
 3710%
 3711%   Delete the association between Key and Value from the map.
 3712
 3713%!  rdf_find_literal_map(+Map, +KeyList, -ValueList) is det.
 3714%
 3715%   Unify ValueList with an ordered set  of values associated to all
 3716%   keys from KeyList. Each key in  KeyList   is  either an atom, an
 3717%   integer or a term not(Key).  If   not-terms  are provided, there
 3718%   must be at least one positive keywords. The negations are tested
 3719%   after establishing the positive matches.
 3720
 3721%!  rdf_keys_in_literal_map(+Map, +Spec, -Answer) is det.
 3722%
 3723%   Realises various queries on the key-set:
 3724%
 3725%     * all
 3726%
 3727%     Unify Answer with an ordered list of all keys.
 3728%     * key(+Key)
 3729%
 3730%     Succeeds if Key is a key in the map and unify Answer with the
 3731%     number of values associated with the key. This provides a fast
 3732%     test of existence without fetching the possibly large
 3733%     associated value set as with rdf_find_literal_map/3.
 3734%
 3735%     * prefix(+Prefix)
 3736%     Unify Answer with an ordered set of all keys that have the
 3737%     given prefix. See section 3.1 for details on prefix matching.
 3738%     Prefix must be an atom. This call is intended for
 3739%     auto-completion in user interfaces.
 3740%
 3741%     * ge(+Min)
 3742%     Unify Answer with all keys that are larger or equal to the
 3743%     integer Min.
 3744%
 3745%     * le(+Max)
 3746%     Unify Answer with all keys that are smaller or equal to the integer
 3747%     Max.
 3748%
 3749%     * between(+Min, +Max) Unify
 3750%     Answer with all keys between Min and Max (including).
 3751
 3752%!  rdf_statistics_literal_map(+Map, -KeyValue)
 3753%
 3754%   Query some statistics of the map. Provides KeyValue are:
 3755%
 3756%     * size(-Keys, -Relations)
 3757%     Unify Keys with the total key-count of the index and Relation
 3758%     with the total Key-Value count.
 3759
 3760
 3761
 3762                 /*******************************
 3763                 *             MISC             *
 3764                 *******************************/
 3765
 3766%!  rdf_version(-Version) is det.
 3767%
 3768%   True when Version is the numerical version-id of this library.
 3769%   The version is computed as
 3770%
 3771%           Major*10000 + Minor*100 + Patch.
 3772
 3773%!  rdf_set(+Term) is det.
 3774%
 3775%   Set properties of the RDF store.  Currently defines:
 3776%
 3777%     * hash(+Hash, +Parameter, +Value)
 3778%     Set properties for a triple index.  Hash is one of =s=,
 3779%     =p=, =sp=, =o=, =po=, =spo=, =g=, =sg= or =pg=.  Parameter
 3780%     is one of:
 3781%
 3782%       - size
 3783%       Value defines the number of entries in the hash-table.
 3784%       Value is rounded _down_ to a power of 2.  After setting
 3785%       the size explicitly, auto-sizing for this table is
 3786%       disabled.  Setting the size smaller than the current
 3787%       size results in a =permission_error= exception.
 3788%
 3789%       - average_chain_len
 3790%       Set maximum average collision number for the hash.
 3791%
 3792%       - optimize_threshold
 3793%       Related to resizing hash-tables.  If 0, all triples are
 3794%       moved to the new size by the garbage collector.  If more
 3795%       then zero, those of the last Value resize steps remain at
 3796%       their current location.  Leaving cells at their current
 3797%       location reduces memory fragmentation and slows down
 3798%       access.
 3799
 3800%!  rdf_md5(+Graph, -MD5) is det.
 3801%
 3802%   True when MD5 is the MD5 hash for  all triples in graph. The MD5
 3803%   digest itself is represented as an   atom holding a 32-character
 3804%   hexadecimal   string.   The   library   maintains   the   digest
 3805%   incrementally on rdf_load/[1,2], rdf_load_db/1, rdf_assert/[3,4]
 3806%   and  rdf_retractall/[3,4].  Checking  whether   the  digest  has
 3807%   changed since the last rdf_load/[1,2]  call provides a practical
 3808%   means for checking whether the file needs to be saved.
 3809%
 3810%   @deprecated New code should use rdf_graph_property(Graph,
 3811%   hash(Hash)).
 3812
 3813%!  rdf_generation(-Generation) is det.
 3814%
 3815%   True when Generation is the current  generation of the database.
 3816%   Each modification to the database  increments the generation. It
 3817%   can be used to check the validity of cached results deduced from
 3818%   the database. Committing a non-empty  transaction increments the
 3819%   generation by one.
 3820%
 3821%   When inside a transaction,  Generation  is   unified  to  a term
 3822%   _TransactionStartGen_ + _InsideTransactionGen_. E.g.,  4+3 means
 3823%   that the transaction was started at   generation 4 of the global
 3824%   database and we have  created  3   new  generations  inside  the
 3825%   transaction. Note that this choice  of representation allows for
 3826%   comparing  generations  using  Prolog  arithmetic.  Comparing  a
 3827%   generation in one  transaction  with   a  generation  in another
 3828%   transaction is meaningless.
 3829
 3830%!  rdf_estimate_complexity(?Subject, ?Predicate, ?Object, -Complexity)
 3831%
 3832%   Return the number of alternatives as   indicated by the database
 3833%   internal hashed indexing. This is a rough measure for the number
 3834%   of alternatives we can expect for   an  rdf_has/3 call using the
 3835%   given three arguments. When  called   with  three variables, the
 3836%   total number of triples is returned.   This  estimate is used in
 3837%   query  optimisation.  See  also    rdf_predicate_property/2  and
 3838%   rdf_statistics/1 for additional information to help optimizers.
 3839
 3840%!  rdf_debug(+Level) is det.
 3841%
 3842%   Set debugging to Level.  Level is an integer 0..9.  Default is
 3843%   0 no debugging.
 3844
 3845%!  rdf_atom_md5(+Text, +Times, -MD5) is det.
 3846%
 3847%   Computes the MD5 hash from Text, which is an atom, string or list of
 3848%   character codes. Times is  an  integer  >=   1.  When  >  0, the MD5
 3849%   algorithm is repeated Times times on the generated hash. This can be
 3850%   used for password encryption algorithms   to  make generate-and-test
 3851%   loops slow.
 3852%
 3853%   @deprecated Obviously, password hash  primitives   do  not belong in
 3854%   this library. The  library(crypto)  from   the  \const{ssl}  package
 3855%   provides extensive support for  hashes.   The  \const{clib}  package
 3856%   provides library(crypt) to  access  the   OS  (Unix)  password  hash
 3857%   implementation as well as  lightweight   implementations  of several
 3858%   popular hashes.
 3859
 3860
 3861                 /*******************************
 3862                 *             MESSAGES         *
 3863                 *******************************/
 3864
 3865:- multifile
 3866    prolog:message//1. 3867
 3868prolog:message(rdf(Term)) -->
 3869    message(Term).
 3870
 3871message(loaded(How, What, BaseURI, Triples, Time)) -->
 3872    how(How),
 3873    source(What),
 3874    into(What, BaseURI),
 3875    in_time(Triples, Time).
 3876message(save_removed_duplicates(N, Subject)) -->
 3877    [ 'Removed ~d duplicate triples about "~p"'-[N,Subject] ].
 3878message(saved(File, SavedSubjects, SavedTriples)) -->
 3879    [ 'Saved ~D triples about ~D subjects into ~p'-
 3880      [SavedTriples, SavedSubjects, File]
 3881    ].
 3882message(using_namespace(Id, NS)) -->
 3883    [ 'Using namespace id ~w for ~w'-[Id, NS] ].
 3884message(inconsistent_cache(DB, Graphs)) -->
 3885    [ 'RDF cache file for ~w contains the following graphs'-[DB], nl,
 3886      '~t~8|~p'-[Graphs]
 3887    ].
 3888message(guess_format(Ext)) -->
 3889    [ 'Unknown file-extension: ~w.  Assuming RDF/XML'-[Ext] ].
 3890message(meta(not_expanded(G))) -->
 3891    [ 'rdf_meta/1: ~p is not expanded'-[G] ].
 3892message(deprecated(rdf_unload(Graph))) -->
 3893    [ 'rdf_unload/1: Use ~q'-[rdf_unload_graph(Graph)] ].
 3894
 3895
 3896how(load)   --> [ 'Loaded' ].
 3897how(parsed) --> [ 'Parsed' ].
 3898
 3899source(SourceURL) -->
 3900    { uri_file_name(SourceURL, File),
 3901      !,
 3902      file_base_name(File, Base)    % TBD: relative file?
 3903    },
 3904    [ ' "~w"'-[Base] ].
 3905source(SourceURL) -->
 3906    [ ' "~w"'-[SourceURL] ].
 3907
 3908into(_, _) --> [].                      % TBD
 3909
 3910in_time(Triples, ParseTime) -->
 3911    [ ' in ~2f sec; ~D triples'-[ParseTime, Triples]
 3912    ]