Bug 10363: FIX regression - prevent duplicate
[koha.git] / C4 / Tags.pm
blob619c692bfea5ef7396f1a69aaec2462d0e750ce1
1 package C4::Tags;
3 # Copyright Liblime 2008
4 # Parts Copyright ACPL 2011
6 # This file is part of Koha.
8 # Koha is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3 of the License, or
11 # (at your option) any later version.
13 # Koha is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with Koha; if not, see <http://www.gnu.org/licenses>.
21 use strict;
22 use warnings;
23 use Carp;
24 use Exporter;
26 use C4::Context;
27 use C4::Debug;
28 #use Data::Dumper;
29 use constant TAG_FIELDS => qw(tag_id borrowernumber biblionumber term language date_created);
30 use constant TAG_SELECT => "SELECT " . join(',', TAG_FIELDS) . "\n FROM tags_all\n";
32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
33 our $ext_dict;
35 BEGIN {
36 $VERSION = 3.07.00.049;
37 @ISA = qw(Exporter);
38 @EXPORT_OK = qw(
39 &get_tag &get_tags &get_tag_rows
40 &add_tags &add_tag
41 &delete_tag_row_by_id
42 &remove_tag
43 &delete_tag_rows_by_ids
44 &get_approval_rows
45 &blacklist
46 &whitelist
47 &is_approved
48 &approval_counts
49 &get_count_by_tag_status
50 &get_filters
51 stratify_tags
53 # %EXPORT_TAGS = ();
54 $ext_dict = C4::Context->preference('TagsExternalDictionary');
55 if ($debug) {
56 require Data::Dumper;
57 import Data::Dumper qw(:DEFAULT);
58 print STDERR __PACKAGE__ . " external dictionary = " . ($ext_dict||'none') . "\n";
60 if ($ext_dict) {
61 require Lingua::Ispell;
62 import Lingua::Ispell qw(spellcheck add_word_lc save_dictionary);
66 =head1 C4::Tags.pm - Support for user tagging of biblios.
68 More verose debugging messages are sent in the presence of non-zero $ENV{"DEBUG"}.
70 =cut
72 INIT {
73 $ext_dict and $Lingua::Ispell::path = $ext_dict;
74 $debug and print STDERR "\$Lingua::Ispell::path = $Lingua::Ispell::path\n";
77 sub get_filters {
78 my $query = "SELECT * FROM tags_filters ";
79 my ($sth);
80 if (@_) {
81 $sth = C4::Context->dbh->prepare($query . " WHERE filter_id = ? ");
82 $sth->execute(shift);
83 } else {
84 $sth = C4::Context->dbh->prepare($query);
85 $sth->execute;
87 return $sth->fetchall_arrayref({});
90 # (SELECT count(*) FROM tags_all ) as tags_all,
91 # (SELECT count(*) FROM tags_index ) as tags_index,
93 sub approval_counts {
94 my $query = "SELECT
95 (SELECT count(*) FROM tags_approval WHERE approved= 1) as approved_count,
96 (SELECT count(*) FROM tags_approval WHERE approved=-1) as rejected_count,
97 (SELECT count(*) FROM tags_approval WHERE approved= 0) as unapproved_count
99 my $sth = C4::Context->dbh->prepare($query);
100 $sth->execute;
101 my $result = $sth->fetchrow_hashref();
102 $result->{approved_total} = $result->{approved_count} + $result->{rejected_count} + $result->{unapproved_count};
103 $debug and warn "counts returned: " . Dumper $result;
104 return $result;
107 =head2 get_count_by_tag_status
109 get_count_by_tag_status($status);
111 Takes a status and gets a count of tags with that status
113 =cut
115 sub get_count_by_tag_status {
116 my ($status) = @_;
117 my $dbh = C4::Context->dbh;
118 my $query =
119 "SELECT count(*) FROM tags_approval WHERE approved=?";
120 my $sth = $dbh->prepare($query);
121 $sth->execute( $status );
122 return $sth->fetchrow;
125 sub remove_tag {
126 my $tag_id = shift or return;
127 my $user_id = (@_) ? shift : undef;
128 my $rows = (defined $user_id) ?
129 get_tag_rows({tag_id=>$tag_id, borrowernumber=>$user_id}) :
130 get_tag_rows({tag_id=>$tag_id}) ;
131 $rows or return 0;
132 (scalar(@$rows) == 1) or return; # should never happen (duplicate ids)
133 my $row = shift(@$rows);
134 ($tag_id == $row->{tag_id}) or return 0;
135 my $tags = get_tags({term=>$row->{term}, biblionumber=>$row->{biblionumber}});
136 my $index = shift(@$tags);
137 $debug and print STDERR
138 sprintf "remove_tag: tag_id=>%s, biblionumber=>%s, weight=>%s, weight_total=>%s\n",
139 $row->{tag_id}, $row->{biblionumber}, $index->{weight}, $index->{weight_total};
140 if ($index->{weight} <= 1) {
141 delete_tag_index($row->{term},$row->{biblionumber});
142 } else {
143 decrement_weight($row->{term},$row->{biblionumber});
145 if ($index->{weight_total} <= 1) {
146 delete_tag_approval($row->{term});
147 } else {
148 decrement_weight_total($row->{term});
150 delete_tag_row_by_id($tag_id);
153 sub delete_tag_index {
154 (@_) or return;
155 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_index WHERE term = ? AND biblionumber = ? LIMIT 1");
156 $sth->execute(@_);
157 return $sth->rows || 0;
159 sub delete_tag_approval {
160 (@_) or return;
161 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_approval WHERE term = ? LIMIT 1");
162 $sth->execute(shift);
163 return $sth->rows || 0;
165 sub delete_tag_row_by_id {
166 (@_) or return;
167 my $sth = C4::Context->dbh->prepare("DELETE FROM tags_all WHERE tag_id = ? LIMIT 1");
168 $sth->execute(shift);
169 return $sth->rows || 0;
171 sub delete_tag_rows_by_ids {
172 (@_) or return;
173 my $i=0;
174 foreach(@_) {
175 $i += delete_tag_row_by_id($_);
177 ($i == scalar(@_)) or
178 warn sprintf "delete_tag_rows_by_ids tried %s tag_ids, only succeeded on $i", scalar(@_);
179 return $i;
182 sub get_tag_rows {
183 my $hash = shift || {};
184 my @ok_fields = TAG_FIELDS;
185 push @ok_fields, 'limit'; # push the limit! :)
186 my $wheres;
187 my $limit = "";
188 my @exe_args = ();
189 foreach my $key (keys %$hash) {
190 $debug and print STDERR "get_tag_rows arg. '$key' = ", $hash->{$key}, "\n";
191 unless (length $key) {
192 carp "Empty argument key to get_tag_rows: ignoring!";
193 next;
195 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
196 carp "get_tag_rows received unreconized argument key '$key'.";
197 next;
199 if ($key eq 'limit') {
200 my $val = $hash->{$key};
201 unless ($val =~ /^(\d+,)?\d+$/) {
202 carp "Non-nuerical limit value '$val' ignored!";
203 next;
205 $limit = " LIMIT $val\n";
206 } else {
207 $wheres .= ($wheres) ? " AND $key = ?\n" : " WHERE $key = ?\n";
208 push @exe_args, $hash->{$key};
211 my $query = TAG_SELECT . ($wheres||'') . $limit;
212 $debug and print STDERR "get_tag_rows query:\n $query\n",
213 "get_tag_rows query args: ", join(',', @exe_args), "\n";
214 my $sth = C4::Context->dbh->prepare($query);
215 if (@exe_args) {
216 $sth->execute(@exe_args);
217 } else {
218 $sth->execute;
220 return $sth->fetchall_arrayref({});
223 sub get_tags { # i.e., from tags_index
224 my $hash = shift || {};
225 my @ok_fields = qw(term biblionumber weight limit sort approved);
226 my $wheres;
227 my $limit = "";
228 my $order = "";
229 my @exe_args = ();
230 foreach my $key (keys %$hash) {
231 $debug and print STDERR "get_tags arg. '$key' = ", $hash->{$key}, "\n";
232 unless (length $key) {
233 carp "Empty argument key to get_tags: ignoring!";
234 next;
236 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
237 carp "get_tags received unreconized argument key '$key'.";
238 next;
240 if ($key eq 'limit') {
241 my $val = $hash->{$key};
242 unless ($val =~ /^(\d+,)?\d+$/) {
243 carp "Non-nuerical limit value '$val' ignored!";
244 next;
246 $limit = " LIMIT $val\n";
247 } elsif ($key eq 'sort') {
248 foreach my $by (split /\,/, $hash->{$key}) {
249 unless (
250 $by =~ /^([-+])?(term)/ or
251 $by =~ /^([-+])?(biblionumber)/ or
252 $by =~ /^([-+])?(weight)/
254 carp "get_tags received illegal sort order '$by'";
255 next;
257 if ($order) {
258 $order .= ", ";
259 } else {
260 $order = " ORDER BY ";
262 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
265 } else {
266 my $whereval = $hash->{$key};
267 my $longkey = ($key eq 'term' ) ? 'tags_index.term' :
268 ($key eq 'approved') ? 'tags_approval.approved' : $key;
269 my $op = ($whereval =~ s/^(>=|<=)// or
270 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
271 $wheres .= ($wheres) ? " AND $longkey $op ?\n" : " WHERE $longkey $op ?\n";
272 push @exe_args, $whereval;
275 my $query = "
276 SELECT tags_index.term as term,biblionumber,weight,weight_total
277 FROM tags_index
278 LEFT JOIN tags_approval
279 ON tags_index.term = tags_approval.term
280 " . ($wheres||'') . $order . $limit;
281 $debug and print STDERR "get_tags query:\n $query\n",
282 "get_tags query args: ", join(',', @exe_args), "\n";
283 my $sth = C4::Context->dbh->prepare($query);
284 if (@exe_args) {
285 $sth->execute(@exe_args);
286 } else {
287 $sth->execute;
289 return $sth->fetchall_arrayref({});
292 sub get_approval_rows { # i.e., from tags_approval
293 my $hash = shift || {};
294 my @ok_fields = qw(term approved date_approved approved_by weight_total limit sort borrowernumber);
295 my $wheres;
296 my $limit = "";
297 my $order = "";
298 my @exe_args = ();
299 foreach my $key (keys %$hash) {
300 $debug and print STDERR "get_approval_rows arg. '$key' = ", $hash->{$key}, "\n";
301 unless (length $key) {
302 carp "Empty argument key to get_approval_rows: ignoring!";
303 next;
305 unless (1 == scalar grep {/^ $key $/x} @ok_fields) {
306 carp "get_approval_rows received unreconized argument key '$key'.";
307 next;
309 if ($key eq 'limit') {
310 my $val = $hash->{$key};
311 unless ($val =~ /^(\d+,)?\d+$/) {
312 carp "Non-numerical limit value '$val' ignored!";
313 next;
315 $limit = " LIMIT $val\n";
316 } elsif ($key eq 'sort') {
317 foreach my $by (split /\,/, $hash->{$key}) {
318 unless (
319 $by =~ /^([-+])?(term)/ or
320 $by =~ /^([-+])?(biblionumber)/ or
321 $by =~ /^([-+])?(borrowernumber)/ or
322 $by =~ /^([-+])?(weight_total)/ or
323 $by =~ /^([-+])?(approved(_by)?)/ or
324 $by =~ /^([-+])?(date_approved)/
326 carp "get_approval_rows received illegal sort order '$by'";
327 next;
329 if ($order) {
330 $order .= ", ";
331 } else {
332 $order = " ORDER BY " unless $order;
334 $order .= $2 . " " . ((!$1) ? '' : $1 eq '-' ? 'DESC' : $1 eq '+' ? 'ASC' : '') . "\n";
337 } else {
338 my $whereval = $hash->{$key};
339 my $op = ($whereval =~ s/^(>=|<=)// or
340 $whereval =~ s/^(>|=|<)// ) ? $1 : '=';
341 $wheres .= ($wheres) ? " AND $key $op ?\n" : " WHERE $key $op ?\n";
342 push @exe_args, $whereval;
345 my $query = "
346 SELECT tags_approval.term AS term,
347 tags_approval.approved AS approved,
348 tags_approval.date_approved AS date_approved,
349 tags_approval.approved_by AS approved_by,
350 tags_approval.weight_total AS weight_total,
351 CONCAT(borrowers.surname, ', ', borrowers.firstname) AS approved_by_name
352 FROM tags_approval
353 LEFT JOIN borrowers
354 ON tags_approval.approved_by = borrowers.borrowernumber ";
355 $query .= ($wheres||'') . $order . $limit;
356 $debug and print STDERR "get_approval_rows query:\n $query\n",
357 "get_approval_rows query args: ", join(',', @exe_args), "\n";
358 my $sth = C4::Context->dbh->prepare($query);
359 if (@exe_args) {
360 $sth->execute(@exe_args);
361 } else {
362 $sth->execute;
364 return $sth->fetchall_arrayref({});
367 sub is_approved {
368 my $term = shift or return;
369 my $sth = C4::Context->dbh->prepare("SELECT approved FROM tags_approval WHERE term = ?");
370 $sth->execute($term);
371 unless ($sth->rows) {
372 $ext_dict and return (spellcheck($term) ? 0 : 1); # spellcheck returns empty on OK word
373 return 0;
375 return $sth->fetchrow;
378 sub get_tag_index {
379 my $term = shift or return;
380 my $sth;
381 if (@_) {
382 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?");
383 $sth->execute($term,shift);
384 } else {
385 $sth = C4::Context->dbh->prepare("SELECT * FROM tags_index WHERE term = ?");
386 $sth->execute($term);
388 return $sth->fetchrow_hashref;
391 sub whitelist {
392 my $operator = shift;
393 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
394 if ($ext_dict) {
395 foreach (@_) {
396 spellcheck($_) or next;
397 add_word_lc($_);
400 foreach (@_) {
401 my $aref = get_approval_rows({term=>$_});
402 if ($aref and scalar @$aref) {
403 mod_tag_approval($operator,$_,1);
404 } else {
405 add_tag_approval($_,$operator);
408 return scalar @_;
410 # note: there is no "unwhitelist" operation because there is no remove for Ispell.
411 # The blacklist regexps should operate "in front of" the whitelist, so if you approve
412 # a term mistakenly, you can still reverse it. But there is no going back to "neutral".
413 sub blacklist {
414 my $operator = shift;
415 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
416 foreach (@_) {
417 my $aref = get_approval_rows({term=>$_});
418 if ($aref and scalar @$aref) {
419 mod_tag_approval($operator,$_,-1);
420 } else {
421 add_tag_approval($_,$operator,-1);
424 return scalar @_;
426 sub add_filter {
427 my $operator = shift;
428 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
429 my $query = "INSERT INTO tags_blacklist (regexp,y,z) VALUES (?,?,?)";
430 # my $sth = C4::Context->dbh->prepare($query);
431 return scalar @_;
433 sub remove_filter {
434 my $operator = shift;
435 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
436 my $query = "REMOVE FROM tags_blacklist WHERE blacklist_id = ?";
437 # my $sth = C4::Context->dbh->prepare($query);
438 # $sth->execute($term);
439 return scalar @_;
442 sub add_tag_approval { # or disapproval
443 $debug and warn "add_tag_approval(" . join(", ",map {defined($_) ? $_ : 'UNDEF'} @_) . ")";
444 my $term = shift or return;
445 my $query = "SELECT * FROM tags_approval WHERE term = ?";
446 my $sth = C4::Context->dbh->prepare($query);
447 $sth->execute($term);
448 ($sth->rows) and return increment_weight_total($term);
449 my $operator = shift || 0;
450 my $approval = (@_ ? shift : 0); # default is unapproved
451 my @exe_args = ($term); # all 3 queries will use this argument
452 if ($operator) {
453 $query = "INSERT INTO tags_approval (term,approved_by,approved,date_approved) VALUES (?,?,?,NOW())";
454 push @exe_args, $operator, $approval;
455 } elsif ($approval) {
456 $query = "INSERT INTO tags_approval (term,approved,date_approved) VALUES (?,?,NOW())";
457 push @exe_args, $approval;
458 } else {
459 $query = "INSERT INTO tags_approval (term,date_approved) VALUES (?,NOW())";
461 $debug and print STDERR "add_tag_approval query: $query\nadd_tag_approval args: (" . join(", ", @exe_args) . ")\n";
462 $sth = C4::Context->dbh->prepare($query);
463 $sth->execute(@exe_args);
464 return $sth->rows;
467 sub mod_tag_approval {
468 my $operator = shift;
469 defined $operator or return; # have to test defined to allow =0 (kohaadmin)
470 my $term = shift or return;
471 my $approval = (scalar @_ ? shift : 1); # default is to approve
472 my $query = "UPDATE tags_approval SET approved_by=?, approved=?, date_approved=NOW() WHERE term = ?";
473 $debug and print STDERR "mod_tag_approval query: $query\nmod_tag_approval args: ($operator,$approval,$term)\n";
474 my $sth = C4::Context->dbh->prepare($query);
475 $sth->execute($operator,$approval,$term);
478 sub add_tag_index {
479 my $term = shift or return;
480 my $biblionumber = shift or return;
481 my $query = "SELECT * FROM tags_index WHERE term = ? AND biblionumber = ?";
482 my $sth = C4::Context->dbh->prepare($query);
483 $sth->execute($term,$biblionumber);
484 ($sth->rows) and return increment_weight($term,$biblionumber);
485 $query = "INSERT INTO tags_index (term,biblionumber) VALUES (?,?)";
486 $debug and print STDERR "add_tag_index query: $query\nadd_tag_index args: ($term,$biblionumber)\n";
487 $sth = C4::Context->dbh->prepare($query);
488 $sth->execute($term,$biblionumber);
489 return $sth->rows;
492 sub get_tag { # by tag_id
493 (@_) or return;
494 my $sth = C4::Context->dbh->prepare(TAG_SELECT . "WHERE tag_id = ?");
495 $sth->execute(shift);
496 return $sth->fetchrow_hashref;
499 sub increment_weights {
500 increment_weight(@_);
501 increment_weight_total(shift);
503 sub decrement_weights {
504 decrement_weight(@_);
505 decrement_weight_total(shift);
507 sub increment_weight_total {
508 _set_weight_total('weight_total+1',shift);
510 sub increment_weight {
511 _set_weight('weight+1',shift,shift);
513 sub decrement_weight_total {
514 _set_weight_total('weight_total-1',shift);
516 sub decrement_weight {
517 _set_weight('weight-1',shift,shift);
519 sub _set_weight_total {
520 my $sth = C4::Context->dbh->prepare("
521 UPDATE tags_approval
522 SET weight_total=" . (shift) . "
523 WHERE term=?
524 "); # note: CANNOT use "?" for weight_total (see the args above).
525 $sth->execute(shift); # just the term
527 sub _set_weight {
528 my $dbh = C4::Context->dbh;
529 my $sth = $dbh->prepare("
530 UPDATE tags_index
531 SET weight=" . (shift) . "
532 WHERE term=?
533 AND biblionumber=?
535 $sth->execute(@_);
538 sub add_tag { # biblionumber,term,[borrowernumber,approvernumber]
539 my $biblionumber = shift or return;
540 my $term = shift or return;
541 my $borrowernumber = (@_) ? shift : 0; # the user, default to kohaadmin
542 $term =~ s/^\s+//;
543 $term =~ s/\s+$//;
544 ($term) or return; # must be more than whitespace
545 my $rows = get_tag_rows({biblionumber=>$biblionumber, borrowernumber=>$borrowernumber, term=>$term, limit=>1});
546 my $query = "INSERT INTO tags_all
547 (borrowernumber,biblionumber,term,date_created)
548 VALUES (?,?,?,NOW())";
549 $debug and print STDERR "add_tag query: $query\n",
550 "add_tag query args: ($borrowernumber,$biblionumber,$term)\n";
551 if (scalar @$rows) {
552 $debug and carp "Duplicate tag detected. Tag not added.";
553 return;
555 # add to tags_all regardless of approaval
556 my $sth = C4::Context->dbh->prepare($query);
557 $sth->execute($borrowernumber,$biblionumber,$term);
559 # then
560 if (scalar @_) { # if arg remains, it is the borrowernumber of the approver: tag is pre-approved.
561 my $approver = shift;
562 $debug and print STDERR "term '$term' pre-approved by borrower #$approver\n";
563 add_tag_approval($term,$approver,1);
564 add_tag_index($term,$biblionumber,$approver);
565 } elsif (is_approved($term) >= 1) {
566 $debug and print STDERR "term '$term' approved by whitelist\n";
567 add_tag_approval($term,0,1);
568 add_tag_index($term,$biblionumber,1);
569 } else {
570 $debug and print STDERR "term '$term' NOT approved (yet)\n";
571 add_tag_approval($term);
572 add_tag_index($term,$biblionumber);
576 # This takes a set of tags, as returned by C<get_approval_rows> and divides
577 # them up into a number of "strata" based on their weight. This is useful
578 # to display them in a number of different sizes.
580 # Usage:
581 # ($min, $max) = stratify_tags($strata, $tags);
582 # $stratum: the number of divisions you want
583 # $tags: the tags, as provided by get_approval_rows
584 # $min: the minimum stratum value
585 # $max: the maximum stratum value. This may be the same as $min if there
586 # is only one weight. Beware of divide by zeros.
587 # This will add a field to the tag called "stratum" containing the calculated
588 # value.
589 sub stratify_tags {
590 my ( $strata, $tags ) = @_;
591 return (0,0) if !@$tags;
592 my ( $min, $max );
593 foreach (@$tags) {
594 my $w = $_->{weight_total};
595 $min = $w if ( !defined($min) || $min > $w );
596 $max = $w if ( !defined($max) || $max < $w );
599 # normalise min to zero
600 $max = $max - $min;
601 my $orig_min = $min;
602 $min = 0;
604 # if min and max are the same, just make it 1
605 my $span = ( $strata - 1 ) / ( $max || 1 );
606 foreach (@$tags) {
607 my $w = $_->{weight_total};
608 $_->{stratum} = int( ( $w - $orig_min ) * $span );
610 return ( $min, $max );
614 __END__
616 =head2 add_tag(biblionumber,term[,borrowernumber])
618 =head3 TO DO: Add real perldoc
620 =cut
622 =head2 External Dictionary (Ispell) [Recommended]
624 An external dictionary can be used as a means of "pre-populating" and tracking
625 allowed terms based on the widely available Ispell dictionary. This can be the system
626 dictionary or a personal version, but in order to support whitelisting, it must be
627 editable to the process running Koha.
629 To enable, enter the absolute path to the ispell dictionary in the system
630 preference "TagsExternalDictionary".
632 Using external Ispell is recommended for both ease of use and performance. Note that any
633 language version of Ispell can be installed. It is also possible to modify the dictionary
634 at the command line to affect the desired content.
636 WARNING: The default Ispell dictionary includes (properly spelled) obscenities! Users
637 should build their own wordlist and recompile Ispell based on it. See man ispell for
638 instructions.
640 =head2 Table Structure
642 The tables used by tags are:
643 tags_all
644 tags_index
645 tags_approval
646 tags_blacklist
648 Your first thought may be that this looks a little complicated. It is, but only because
649 it has to be. I'll try to explain.
651 tags_all - This table would be all we really need if we didn't care about moderation or
652 performance or tags disappearing when borrowers are removed. Too bad, we do. Otherwise
653 though, it contains all the relevant info about a given tag:
654 tag_id - unique id number for it
655 borrowernumber - user that entered it
656 biblionumber - book record it is attached to
657 term - tag "term" itself
658 language - perhaps used later to influence weighting
659 date_created - date and time it was created
661 tags_approval - Since we need to provide moderation, this table is used to track it. If no
662 external dictionary is used, this table is the sole reference for approval and rejection.
663 With an external dictionary, it tracks pending terms and past whitelist/blacklist actions.
664 This could be called an "approved terms" table. See above regarding the External Dictionary.
665 term - tag "term" itself
666 approved - Negative, 0 or positive if tag is rejected, pending or approved.
667 date_approved - date of last action
668 approved_by - staffer performing the last action
669 weight_total - total occurrence of term in any biblio by any users
671 tags_index - This table is for performance, because by far the most common operation will
672 be fetching tags for a list of search results. We will have a set of biblios, and we will
673 want ONLY their approved tags and overall weighting. While we could implement a query that
674 would traverse tags_all filtered against tags_approval, the performance implications of
675 trying to calculate that and the "weight" (number of times a tag appears) on the fly are drastic.
676 term - approved term as it appears in tags_approval
677 biblionumber - book record it is attached to
678 weight - number of times tag applied by any user
680 tags_blacklist - A set of regular expression filters. Unsurprisingly, these should be perl-
681 compatible (PCRE) for your version of perl. Since this is a blacklist, a term will be
682 blocked if it matches any of the given patterns. WARNING: do not add blacklist regexps
683 if you do not understand their operation and interaction. It is quite easy to define too
684 simple or too complex a regexp and effectively block all terms. The blacklist operation is
685 fairly resource intensive, since every line of tags_blacklist will need to be read and compared.
686 It is recommended that tags_blacklist be used minimally, and only by an administrator with an
687 understanding of regular expression syntax and performance.
689 So the best way to think about the different tables is that they are each tailored to a certain
690 use. Note that tags_approval and tags_index do not rely on the user's borrower mapping, so
691 the tag population can continue to grow even if a user (along with their corresponding
692 rows in tags_all) is removed.
694 =head2 Tricks
696 If you want to auto-populate some tags for debugging, do something like this:
698 mysql> select biblionumber from biblio where title LIKE "%Health%";
699 +--------------+
700 | biblionumber |
701 +--------------+
702 | 18 |
703 | 22 |
704 | 24 |
705 | 30 |
706 | 44 |
707 | 45 |
708 | 46 |
709 | 49 |
710 | 111 |
711 | 113 |
712 | 128 |
713 | 146 |
714 | 155 |
715 | 518 |
716 | 522 |
717 | 524 |
718 | 530 |
719 | 544 |
720 | 545 |
721 | 546 |
722 | 549 |
723 | 611 |
724 | 613 |
725 | 628 |
726 | 646 |
727 | 655 |
728 +--------------+
729 26 rows in set (0.00 sec)
731 Then, take those numbers and type/pipe them into this perl command line:
732 perl -ne 'use C4::Tags qw(get_tags add_tag); use Data::Dumper;chomp; add_tag($_,"health",51,1); print Dumper get_tags({limit=>5,term=>"health",});'
734 Note, the borrowernumber in this example is 51. Use your own or any arbitrary valid borrowernumber.
736 =cut