35
36:- module(rdf_litindex,
37 [ rdf_set_literal_index_option/1, 38 rdf_tokenize_literal/2, 39 rdf_find_literal/2, 40 rdf_find_literals/2, 41 rdf_token_expansions/2, 42 rdf_stopgap_token/1, 43
44 rdf_literal_index/2, 45 rdf_delete_literal_index/1 46 ]). 47:- use_module(rdf_db). 48:- use_module(library(debug)). 49:- use_module(library(lists)). 50:- use_module(library(error)). 51:- use_module(library(apply)). 52:- if(exists_source(library(snowball))). 53:- use_module(library(snowball)). 54:- else. 55:- use_module(library(porter_stem)). 56:- endif. 57:- use_module(library(double_metaphone)). 58
66
67:- dynamic
68 literal_map/2, 69 map_building/2, 70 new_token/2, 71 setting/1,
72 stopgap/1. 73:- volatile
74 literal_map/2. 75:- multifile
76 tokenization/2, 77 exclude_from_index/2. 78
79
80setting(verbose(false)). 81setting(index_threads(1)). 82setting(index(thread(1))). 83setting(stopgap_threshold(50000)). 84
106
107rdf_set_literal_index_option([]) :- !.
108rdf_set_literal_index_option([H|T]) :-
109 !,
110 set_option(H),
111 rdf_set_literal_index_option(T).
112rdf_set_literal_index_option(Option) :-
113 set_option(Option).
114
115set_option(Term) :-
116 check_option(Term),
117 functor(Term, Name, Arity),
118 functor(General, Name, Arity),
119 retractall(setting(General)),
120 assert(setting(Term)).
121
122check_option(X) :-
123 var(X),
124 !,
125 instantiation_error(X).
126check_option(verbose(X)) :-
127 !,
128 must_be(boolean, X).
129check_option(index_threads(Count)) :-
130 !,
131 must_be(nonneg, Count).
132check_option(stopgap_threshold(Count)) :-
133 !,
134 must_be(nonneg, Count).
135check_option(index(How)) :-
136 !,
137 must_be(oneof([default,thread(_),self]), How).
138check_option(Option) :-
139 domain_error(literal_option, Option).
140
141
142 145
175
176rdf_find_literal(Spec, Literal) :-
177 rdf_find_literals(Spec, Literals),
178 member(Literal, Literals).
179
180rdf_find_literals(Spec, Literals) :-
181 compile_spec(Spec, DNF),
182 DNF \== @(stopgap),
183 token_index(Map),
184 lookup(DNF, Map, _, SuperSet),
185 flatten(SuperSet, Set0),
186 sort(Set0, Literals).
187
192
193rdf_token_expansions(prefix(Prefix), [prefix(Prefix, Tokens)]) :-
194 token_index(Map),
195 rdf_keys_in_literal_map(Map, prefix(Prefix), Tokens).
196rdf_token_expansions(sounds(Like), [sounds(Like, Tokens)]) :-
197 metaphone_index(Map),
198 rdf_find_literal_map(Map, [Like], Tokens).
199rdf_token_expansions(stem(Like), [stem(Like, Tokens)]) :-
200 stem_index(Map),
201 rdf_find_literal_map(Map, [Like], Tokens).
202rdf_token_expansions(Spec, Expansions) :-
203 compile_spec(Spec, DNF),
204 token_index(Map),
205 lookup(DNF, Map, SCS, _),
206 flatten(SCS, CS),
207 sort(CS, Expansions0),
208 join_expansions(Expansions0, Expansions).
209
210join_expansions([], []).
211join_expansions([H0|T0], [H|T]) :-
212 untag(H0, Tag, V0),
213 Tag =.. L0,
214 append(L0, [[V0|Values]], L1),
215 H =.. L1,
216 join_expansions_by_tag(T0, Tag, T1, Values),
217 join_expansions(T1, T).
218
219join_expansions_by_tag([H|T0], Tag, T, [V0|VT]) :-
220 untag(H, Tag, V0),
221 !,
222 join_expansions_by_tag(T0, Tag, T, VT).
223join_expansions_by_tag(L, _, L, []).
224
225lookup(@(false), _, [], []) :- !.
226lookup(or(H0,T0), Map, [CH|CT], [H|T]) :-
227 !,
228 lookup(H0, Map, CH, H),
229 lookup(T0, Map, CT, T).
230lookup(H0, Map, [C], [H]) :-
231 lookup1(H0, Map, C, H).
232
233lookup1(Conj, Map, Cond, Literals) :-
234 phrase(conj_to_list(Conj), List),
235 !,
236 rdf_find_literal_map(Map, List, Literals),
237 ( Literals \== []
238 -> phrase(conj_to_cond(Conj), Cond)
239 ; Cond = []
240 ).
241lookup1(_, _, _, []).
242
243conj_to_list(and(A,B)) -->
244 !,
245 conj_to_list(A),
246 conj_to_list(B).
247conj_to_list(@(false)) -->
248 !,
249 {fail}.
250conj_to_list(Tagged) -->
251 { untag(Tagged, L) },
252 !,
253 [L].
254conj_to_list(L) -->
255 [L].
256
257
258conj_to_cond(and(A,B)) -->
259 !,
260 conj_to_cond(A),
261 conj_to_cond(B).
262conj_to_cond(Tagged) -->
263 { untag(Tagged, _) },
264 !,
265 [ Tagged ].
266conj_to_cond(_) -->
267 [].
268
269
273
274compile_spec(Spec, DNF) :-
275 expand_fuzzy(Spec, Spec2),
276 nnf(Spec2, NNF),
277 dnf(NNF, DNF).
278
279
280expand_fuzzy(Var, _) :-
281 var(Var),
282 !,
283 throw(error(instantiation_error, _)).
284expand_fuzzy(sounds(Like), Or) :-
285 !,
286 ( atom(Like)
287 -> metaphone_index(Map),
288 double_metaphone(Like, Key),
289 rdf_find_literal_map(Map, [Key], Tokens),
290 list_to_or(Tokens, sounds(Like), Or)
291 ; expand_fuzzy(Like, Or)
292 ).
293expand_fuzzy(stem(Like), Or) :-
294 !,
295 expand_fuzzy(stem(Like, en), Or).
296expand_fuzzy(stem(Like, Lang), Or) :-
297 !,
298 ( atom(Like)
299 -> stem_index(Map),
300 stem(Like, Lang, Key),
301 rdf_find_literal_map(Map, [Key], Tokens),
302 list_to_or(Tokens, stem(Like), Or)
303 ; expand_fuzzy(Like, Or)
304 ).
305expand_fuzzy(prefix(Prefix), Or) :-
306 !,
307 ( atom(Prefix)
308 -> token_index(Map),
309 rdf_keys_in_literal_map(Map, prefix(Prefix), Tokens),
310 list_to_or(Tokens, prefix(Prefix), Or)
311 ; expand_fuzzy(Prefix, Or)
312 ).
313expand_fuzzy(case(String), Or) :-
314 !,
315 ( atom(String)
316 -> token_index(Map),
317 rdf_keys_in_literal_map(Map, case(String), Tokens),
318 list_to_or(Tokens, case(String), Or)
319 ; expand_fuzzy(String, Or)
320 ).
321expand_fuzzy(or(A0, B0), E) :-
322 !,
323 expand_fuzzy(A0, A),
324 expand_fuzzy(B0, B),
325 simplify(or(A,B), E).
326expand_fuzzy(and(A0, B0), E) :-
327 !,
328 expand_fuzzy(A0, A),
329 expand_fuzzy(B0, B),
330 simplify(and(A,B), E).
331expand_fuzzy(not(A0), not(A)) :-
332 !,
333 expand_fuzzy(A0, A).
334expand_fuzzy(between(Low, High), Or) :-
335 !,
336 token_index(Map),
337 rdf_keys_in_literal_map(Map, between(Low, High), Tokens),
338 list_to_or(Tokens, between(Low, High), Or).
339expand_fuzzy(le(High), Or) :-
340 !,
341 token_index(Map),
342 rdf_keys_in_literal_map(Map, le(High), Tokens),
343 list_to_or(Tokens, le(High), Or).
344expand_fuzzy(ge(Low), Or) :-
345 !,
346 token_index(Map),
347 rdf_keys_in_literal_map(Map, ge(Low), Tokens),
348 list_to_or(Tokens, ge(Low), Or).
349expand_fuzzy(Token, Result) :-
350 atomic(Token),
351 !,
352 ( rdf_stopgap_token(Token)
353 -> Result = @(stopgap)
354 ; Result = Token
355 ).
356expand_fuzzy(Token, _) :-
357 throw(error(type_error(Token, boolean_expression), _)).
358
359simplify(Expr0, Expr) :-
360 simple(Expr0, Expr),
361 !.
362simplify(Expr, Expr).
363
364simple(and(@(false), _), @(false)).
365simple(and(_, @(false)), @(false)).
366simple(and(@(stopgap), Token), Token).
367simple(and(Token, @(stopgap)), Token).
368simple(or(@(false), X), X).
369simple(or(X, @(false)), X).
370simple(or(@(stopgap), Token), Token).
371simple(or(Token, @(stopgap)), Token).
372
373
374list_to_or([], _, @(false)) :- !.
375list_to_or([X], How, One) :-
376 !,
377 tag(How, X, One).
378list_to_or([H0|T0], How, or(H, T)) :-
379 tag(How, H0, H),
380 list_to_or(T0, How, T).
381
382tag(sounds(X), Y, sounds(X,Y)).
383tag(stem(X), Y, stem(X,Y)).
384tag(prefix(X), Y, prefix(X,Y)).
385tag(case(X), Y, case(X,Y)).
386tag(between(L,H), Y, between(L,H,Y)).
387tag(ge(L), Y, ge(L,Y)).
388tag(le(H), Y, le(H,Y)).
389
390untag(sounds(_,Y), Y).
391untag(stem(_,Y), Y).
392untag(prefix(_,Y), Y).
393untag(case(_,Y), Y).
394untag(between(_,_,Y), Y).
395untag(le(_,Y), Y).
396untag(ge(_,Y), Y).
397
398untag(sounds(X,Y), sounds(X), Y).
399untag(stem(X,Y), stem(X), Y).
400untag(prefix(X,Y), prefix(X), Y).
401untag(case(X,Y), case(X), Y).
402untag(between(L,H,Y), between(L,H), Y).
403untag(ge(L,Y), ge(L), Y).
404untag(le(H,Y), le(H), Y).
405
406
411
412nnf(not(not(A0)), A) :-
413 !,
414 nnf(A0, A).
415nnf(not(and(A0,B0)), or(A,B)) :-
416 !,
417 nnf(not(A0), A),
418 nnf(not(B0), B).
419nnf(not(or(A0,B0)), and(A,B)) :-
420 !,
421 nnf(not(A0), A),
422 nnf(not(B0), B).
423nnf(A, A).
424
425
429
430dnf(or(A0,B0), or(A, B)) :-
431 !,
432 dnf(A0, A),
433 dnf(B0, B).
434dnf(and(A0,B0), DNF):-
435 !,
436 dnf(A0, A1),
437 dnf(B0, B1),
438 dnf1(and(A1,B1), DNF).
439dnf(DNF, DNF).
440
441dnf1(and(A0, or(B,C)), or(P,Q)) :-
442 !,
443 dnf1(and(A0,B), P),
444 dnf1(and(A0,C), Q).
445dnf1(and(or(B,C), A0), or(P,Q)) :-
446 !,
447 dnf1(and(A0,B), P),
448 dnf1(and(A0,C), Q).
449dnf1(DNF, DNF).
450
451
452 455
461
462token_index(Map) :-
463 literal_map(token, Map),
464 !,
465 wait_for_map(token).
466token_index(Map) :-
467 rdf_new_literal_map(Map),
468 assert(literal_map(token, Map)),
469 register_token_updater,
470 message_queue_create(Queue),
471 assert(map_building(token, Queue)),
472 thread_create(make_literal_index(Queue), _,
473 [ alias('__rdf_tokenizer'),
474 detached(true)
475 ]),
476 wait_for_map(token).
477
478register_token_updater :-
479 Monitor = [ reset,
480 new_literal,
481 old_literal
482 ],
483 ( setting(index(default))
484 -> create_update_literal_thread(1),
485 rdf_monitor(thread_monitor_literal, Monitor)
486 ; setting(index(thread(N)))
487 -> create_update_literal_thread(N),
488 rdf_monitor(thread_monitor_literal, Monitor)
489 ; rdf_monitor(monitor_literal, Monitor)
490 ).
491
492make_literal_index(Queue) :-
493 call_cleanup(
494 make_literal_index,
495 ( message_queue_destroy(Queue),
496 retractall(map_building(token, _)))).
497
501
502make_literal_index :-
503 setting(index_threads(N)),
504 !,
505 threaded_literal_index(N),
506 verbose('~N', []).
507make_literal_index :-
508 current_prolog_flag(cpu_count, X),
509 threaded_literal_index(X),
510 verbose('~N', []).
511
512threaded_literal_index(N) :-
513 N > 1,
514 !,
515 message_queue_create(Q, [max_size(1000)]),
516 create_index_threads(N, Q, Ids),
517 forall(rdf_current_literal(Literal),
518 thread_send_message(Q, Literal)),
519 forall(between(1, N, _),
520 thread_send_message(Q, done(true))),
521 maplist(thread_join, Ids, _).
522threaded_literal_index(_) :-
523 forall(rdf_current_literal(Literal),
524 register_literal(Literal)).
525
526create_index_threads(N, Q, [Id|T]) :-
527 N > 0,
528 !,
529 thread_create(index_worker(Q), Id, []),
530 N2 is N - 1,
531 create_index_threads(N2, Q, T).
532create_index_threads(_, _, []) :- !.
533
534index_worker(Queue) :-
535 repeat,
536 thread_get_message(Queue, Msg),
537 work(Msg).
538
539work(done(true)) :- !.
540work(Literal) :-
541 register_literal(Literal),
542 fail.
543
544
548
549clean_token_index :-
550 forall(literal_map(_, Map),
551 rdf_reset_literal_map(Map)),
552 retractall(stopgap(_)).
553
557
558rdf_delete_literal_index(Type) :-
559 must_be(atom, Type),
560 ( retract(literal_map(Type, Map))
561 -> rdf_reset_literal_map(Map) 562 ).
563
564 567
577
578create_update_literal_thread(Threads) :-
579 message_queue_create(_,
580 [ alias(rdf_literal_monitor_queue),
581 max_size(50000)
582 ]),
583 forall(between(1, Threads, _),
584 create_index_worker(initial)).
585
586:- dynamic
587 index_worker_id/1,
588 extra_worker_count/1. 589
590create_index_worker(Status) :-
591 ( retract(index_worker_id(Id0))
592 -> true
593 ; Id0 = 1
594 ),
595 succ(Id0, Id1),
596 assertz(index_worker_id(Id1)),
597 atom_concat(rdf_literal_monitor_, Id0, Alias),
598 inc_extra_worker_count(Status),
599 thread_create(monitor_literals(Status), _,
600 [ alias(Alias)
601 ]).
602
603monitor_literals(initial) :-
604 set_prolog_flag(agc_margin, 0), 605 repeat,
606 thread_get_message(rdf_literal_monitor_queue, Literal),
607 register_literal(Literal),
608 fail.
609monitor_literals(extra) :-
610 set_prolog_flag(agc_margin, 0),
611 repeat,
612 ( thread_get_message(rdf_literal_monitor_queue, Literal,
613 [ timeout(1)
614 ])
615 -> register_literal(Literal),
616 fail
617 ; !
618 ),
619 with_mutex(create_index_worker, dec_extra_worker_count),
620 thread_self(Me),
621 thread_detach(Me).
622
623thread_monitor_literal(new_literal(Literal)) :-
624 !,
625 thread_send_message(rdf_literal_monitor_queue, Literal).
626thread_monitor_literal(Action) :-
627 !,
628 monitor_literal(Action).
629
634
635check_index_workers(Alias, Keys) :-
636 max_extra_workers(Max),
637 Max > 0,
638 message_queue_property(Queue, alias(Alias)),
639 message_queue_property(Queue, size(Size)),
640 Size > 10000,
641 \+ ( extra_worker_count(Extra),
642 Extra >= Max
643 ),
644 !,
645 debug(rdf_litindex,
646 'Creating extra literal indexer (Queue=~D, Keys=~D)',
647 [Size, Keys]),
648 with_mutex(create_index_worker, create_index_worker(extra)).
649check_index_workers(_, _).
650
(extra) :-
652 !,
653 ( retract(extra_worker_count(C0))
654 -> C is C0+1
655 ; C = 1
656 ),
657 asserta(extra_worker_count(C)).
658inc_extra_worker_count(_).
659
:-
661 retract(extra_worker_count(C0)),
662 !,
663 C is C0-1,
664 asserta(extra_worker_count(C)).
665dec_extra_worker_count.
666
(Max) :-
668 current_prolog_flag(cpu_count, Count),
669 Max is Count//2.
670
671
672 675
676monitor_literal(new_literal(Literal)) :-
677 register_literal(Literal).
678monitor_literal(old_literal(Literal)) :-
679 unregister_literal(Literal).
680monitor_literal(transaction(begin, reset)) :-
681 rdf_monitor(monitor_literal, [-old_literal]),
682 clean_token_index.
683monitor_literal(transaction(end, reset)) :-
684 rdf_monitor(monitor_literal, [+old_literal]).
685
689
690register_literal(Literal) :-
691 ( rdf_tokenize_literal(Literal, Tokens0)
692 -> sort(Tokens0, Tokens),
693 text_of(Literal, Lang, Text),
694 literal_map(token, Map),
695 add_tokens(Tokens, Lang, Text, Map)
696 ; true
697 ).
698
699add_tokens([], _, _, _).
700add_tokens([H|T], Lang, Literal, Map) :-
701 rdf_insert_literal_map(Map, H, Literal, Keys),
702 ( var(Keys)
703 -> ( rdf_keys_in_literal_map(Map, key(H), Count),
704 setting(stopgap_threshold(Threshold)),
705 Count > Threshold
706 -> assert(stopgap(H)),
707 rdf_delete_literal_map(Map, H)
708 ; true
709 )
710 ; forall(new_token(H, Lang), true),
711 ( Keys mod 1000 =:= 0
712 -> progress(Map, 'Tokens'),
713 ( Keys mod 10000 =:= 0
714 -> check_index_workers(rdf_literal_monitor_queue, Keys)
715 ; true
716 )
717 ; true
718 )
719 ),
720 add_tokens(T, Lang, Literal, Map).
721
722
728
729unregister_literal(Literal) :-
730 text_of(Literal, _Lang, Text),
731 ( rdf(_,_,literal(Text))
732 -> true 733 ; rdf_tokenize_literal(Literal, Tokens0),
734 sort(Tokens0, Tokens),
735 literal_map(token, Map),
736 del_tokens(Tokens, Text, Map)
737 ).
738
739del_tokens([], _, _).
740del_tokens([H|T], Literal, Map) :-
741 rdf_delete_literal_map(Map, H, Literal),
742 del_tokens(T, Literal, Map).
743
744
749
750rdf_tokenize_literal(Literal, Tokens) :-
751 tokenization(Literal, Tokens),
752 !. 753rdf_tokenize_literal(Literal, Tokens) :-
754 text_of(Literal, _Lang, Text),
755 atom(Text),
756 tokenize_atom(Text, Tokens0),
757 select_tokens(Tokens0, Tokens).
758
759select_tokens([], []).
760select_tokens([H|T0], T) :-
761 ( exclude_from_index(token, H)
762 -> select_tokens(T0, T)
763 ; number(H)
764 -> ( integer(H),
765 between(-1073741824, 1073741823, H)
766 -> T = [H|T1],
767 select_tokens(T0, T1)
768 ; select_tokens(T0, T)
769 )
770 ; atom_length(H, 1)
771 -> select_tokens(T0, T)
772 ; default_stopgap(H)
773 -> select_tokens(T0, T)
774 ; stopgap(H)
775 -> select_tokens(T0, T)
776 ; T = [H|T1],
777 select_tokens(T0, T1)
778 ).
779
790
791rdf_stopgap_token(Token) :-
792 ( var(Token)
793 -> rdf_stopgap_token2(Token)
794 ; rdf_stopgap_token2(Token), !
795 ).
796
797rdf_stopgap_token2(Token) :-
798 exclude_from_index(token, Token).
799rdf_stopgap_token2(Token) :-
800 default_stopgap(Token).
801rdf_stopgap_token2(Token) :-
802 atom(Token),
803 atom_length(Token, 1).
804rdf_stopgap_token2(Token) :-
805 stopgap(Token).
806
813
814default_stopgap(and).
815default_stopgap(an).
816default_stopgap(or).
817default_stopgap(of).
818default_stopgap(on).
819default_stopgap(in).
820default_stopgap(this).
821default_stopgap(the).
822
823
831
832text_of(type(xsd:string, Text), en, Text) :- !.
833text_of(type(_, Text), -, Text) :- !.
834text_of(lang(Lang, Text), Lang, Text) :- !.
835text_of(Text, en, Text) :- atom(Text), !.
836text_of(Text, -, Text) :- integer(Text).
837
838
839 842
848
849stem_index(Map) :-
850 literal_map(stem, Map),
851 !,
852 wait_for_map(stem).
853stem_index(Map) :-
854 rdf_new_literal_map(Map),
855 assert(literal_map(stem, Map)),
856 assert((new_token(Token, Lang) :- add_stem(Token, Lang, Map))),
857 message_queue_create(Queue),
858 assert(map_building(stem, Queue)),
859 thread_create(fill_stem_index(Map, Queue), _,
860 [ alias('__rdf_stemmer'),
861 detached(true)
862 ]),
863 wait_for_map(stem).
864
865wait_for_map(MapName) :-
866 ( map_building(MapName, Queue)
867 -> catch(thread_get_message(Queue, _), _, true),
868 wait_for_map(MapName)
869 ; true
870 ).
871
872fill_stem_index(StemMap, Queue) :-
873 call_cleanup(
874 forall(rdf_current_literal(Literal),
875 stem_literal_tokens(Literal, StemMap)),
876 ( message_queue_destroy(Queue),
877 retractall(map_building(stem, _)))).
878
879stem_literal_tokens(Literal, StemMap) :-
880 rdf_tokenize_literal(Literal, Tokens),
881 !,
882 sort(Tokens, Tokens1),
883 text_of(Literal, Lang, _Text),
884 insert_tokens_stem(Tokens1, Lang, StemMap).
885stem_literal_tokens(_,_).
886
887insert_tokens_stem([], _, _).
888insert_tokens_stem([Token|T], Lang, Map) :-
889 ( atom(Token)
890 -> ( stem(Token, Lang, Stem)
891 -> rdf_insert_literal_map(Map, Stem, Token, Keys),
892 ( integer(Keys),
893 Keys mod 1000 =:= 0
894 -> progress(Map, 'Stem')
895 ; true
896 )
897 ; true
898 )
899 ; true
900 ),
901 insert_tokens_stem(T, Lang, Map).
902
903
904add_stem(Token, Lang, Map) :-
905 stem(Lang, Token, Stem),
906 rdf_insert_literal_map(Map, Stem, Token, _).
907
908:- if(current_predicate(snowball/3)). 909stem(Token, LangSpec, Stem) :-
910 main_lang(LangSpec, Lang),
911 downcase_atom(Token, Lower),
912 catch(snowball(Lang, Lower, Stem), _, fail).
913:- else. 914stem(Token, _Lang, Stem) :-
915 downcase_atom(Token, Lower),
916 porter_stem(Lower, Stem).
917:- endif. 918
919main_lang(LangSpec, Lang) :-
920 sub_atom(LangSpec, Before, _, _, -),
921 !,
922 sub_atom(LangSpec, 0, Before, _, Lang).
923main_lang(LangSpec, Lang) :-
924 downcase_atom(LangSpec, Lang).
925
926
927 930
931
932metaphone_index(Map) :-
933 literal_map(metaphone, Map),
934 !,
935 wait_for_map(metaphone).
936metaphone_index(Map) :-
937 rdf_new_literal_map(Map),
938 assert(literal_map(metaphone, Map)),
939 assert((new_token(Token, Lang) :- add_metaphone(Token, Lang, Map))),
940 message_queue_create(Queue),
941 assert(map_building(metaphone, Queue)),
942 thread_create(fill_metaphone_index(Map, Queue), _,
943 [ alias('__rdf_metaphone_indexer'),
944 detached(true)
945 ]),
946 wait_for_map(metaphone).
947
948fill_metaphone_index(MetaphoneMap, Queue) :-
949 call_cleanup(
950 fill_metaphone_index(MetaphoneMap),
951 ( message_queue_destroy(Queue),
952 retractall(map_building(metaphone, _)))).
953
954fill_metaphone_index(MetaphoneMap) :-
955 token_index(TokenMap),
956 rdf_keys_in_literal_map(TokenMap, all, Tokens),
957 metaphone(Tokens, MetaphoneMap).
958
959metaphone([], _).
960metaphone([Token|T], Map) :-
961 ( atom(Token),
962 double_metaphone(Token, SoundEx)
963 -> rdf_insert_literal_map(Map, SoundEx, Token, Keys),
964 ( integer(Keys),
965 Keys mod 1000 =:= 0
966 -> progress(Map, 'Metaphone')
967 ; true
968 )
969 ; true
970 ),
971 metaphone(T, Map).
972
973
974add_metaphone(Token, _Lang, Map) :-
975 atom(Token),
976 !,
977 double_metaphone(Token, SoundEx),
978 rdf_insert_literal_map(Map, SoundEx, Token).
979add_metaphone(_, _, _).
980
997
998rdf_literal_index(token, Map) :-
999 !,
1000 token_index(Map).
1001rdf_literal_index(stem, Map) :-
1002 !,
1003 stem_index(Map).
1004rdf_literal_index(metaphone, Map) :-
1005 !,
1006 metaphone_index(Map).
1007rdf_literal_index(Type, _Map) :-
1008 domain_error(literal_index, Type).
1009
1010
1011 1014
1015verbose(Fmt, Args) :-
1016 setting(verbose(true)),
1017 !,
1018 format(user_error, Fmt, Args).
1019verbose(_, _).
1020
1021progress(Map, Which) :-
1022 setting(verbose(true)),
1023 !,
1024 rdf_statistics_literal_map(Map, size(Keys, Values)),
1025 format(user_error,
1026 '\r~t~w: ~12|Keys: ~t~D~15+; Values: ~t~D~20+',
1027 [Which, Keys, Values]).
1028progress(_,_)