1 package Koha
::Indexer
::Utils
;
3 # Copyright (c) 2012 Equinox Software, Inc.
4 # This file is part of Koha.
6 # Koha is free software; you can redistribute it and/or modify it under the
7 # terms of the GNU General Public License as published by the Free Software
8 # Foundation; either version 2 of the License, or (at your option) any later
11 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
12 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License along with
16 # Koha; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 =head1 Koha::Indexer::Utils
27 Koha::Indexer::Utils - utility functions for managing search indexes
31 This modules contains utility functions for managing various aspects
32 of Koha's bibliographic and authority search indexes.
38 =head2 zebra_record_abs_to_dom
40 $dom_config = Koha::Indexer::Utils::zebra_record_abs_to_dom($record_abs_config, $marcflavour);
42 Given a string containing the contents of a records.abs configuration file as
43 used by Zebra's GRS-1 filter, emit an equivalent DOM configuration.
47 our $idxNS = 'http://www.koha-community.org/schemas/index-defs';
49 sub zebra_record_abs_to_dom
{
51 my $marcflavour = shift;
54 my @grs1_cfg_lines = split /\n/, $grs1_cfg, -1;
57 # generate an arrayref of structures representing
58 # each records.abs line
59 for (my $i = 0; $i <= $#grs1_cfg_lines; $i++) {
60 my $line = $grs1_cfg_lines[$i];
61 next if _can_ignore_grs1_cfg_line
($line);
62 my $grs1_def = _parse_grs1_cfg_line
($line);
63 $grs1_def->{orig_def
} = $line;
64 $grs1_def->{lineno
} = $i + 1;
65 push @
$grs1_defs, $grs1_def;
68 # map the index definitions to a DOM tree representing
69 # the index definitions -- if you squint hard, you
70 # can see the beginnings of a more general definition language
71 # for Koha index definitions
72 my $dom_cfg = XML
::LibXML
::Document
->new('1.0', 'utf-8');
73 my $root = $dom_cfg->createElement('index_defs');
74 $root->setNamespace($idxNS, 'kohaidx');
75 foreach my $grs1_def (@
$grs1_defs) {
76 _append_grs1_def_to_dom_cfg
($dom_cfg, $root, $grs1_def, $marcflavour);
79 # and emit the result as a string
80 $dom_cfg->setDocumentElement($root);
81 return $dom_cfg->toString(1);
85 # bunch of utility functions for zebra_record_abs_to_dom
87 sub _can_ignore_grs1_cfg_line
{
89 return 1 if $line =~ /^\s*$/ or
91 $line =~ /^(encoding|name|attset|esetname|marc|systag|xpath)/ or
92 $line =~ /^all/; # DOM filter automatically indexes all tokens, so
93 # no need to deal with 'all any' lines in record.abs
97 sub _parse_grs1_cfg_line
{
101 if ($line =~ /^melm\s+(.*)/ || $line =~ m!^xelm /record/(.*)!) {
102 $grs1_def = _parse_xelm_melm
($1);
107 sub _parse_xelm_melm
{
110 my ($field, $index_defs) = split /\s+/, $line, 2;
112 # munge fixed field range indicators
113 $index_defs =~ s/range\(data,(\d+),(\d+)\)/$1:$2/g;
115 my ($tag, $subfield) = split /\$/, $field, 2;
118 subfield
=> $subfield,
119 index_defs
=> [ map { _parse_grs1_index_def
($_) } split /,/, $index_defs ],
123 sub _parse_grs1_index_def
{
124 my $index_def = shift;
126 my @parts = split /:/, $index_def, -1;
128 $parsed_def->{name
} = shift @parts;
129 $parsed_def->{index_type
} = shift @parts;
130 $parsed_def->{offset
} = shift @parts;
131 $parsed_def->{length} = shift @parts;
132 # if the original index definition didn't specify an index
133 # type, set it 'w' -- the DOM filter needs the index type
134 # to be specified explicitly
135 $parsed_def->{index_type
} = 'w' unless defined $parsed_def->{index_type
};
139 sub _append_grs1_def_to_dom_cfg
{
142 my $grs1_def = shift;
143 my $marcflavour = shift;
145 my $comment = $dom_cfg->createComment('record.abs line ' .
146 $grs1_def->{lineno
} . ': ' .
147 $grs1_def->{orig_def
});
148 $root->appendChild($comment);
150 if (defined $grs1_def->{tag
} && defined $grs1_def->{subfield
}) {
151 my $dom_def = $dom_cfg->createElementNS($idxNS, 'index_subfields');
152 $dom_def->setAttribute('tag', $grs1_def->{tag
});
153 $dom_def->setAttribute('subfields', $grs1_def->{subfield
});
154 _append_target_indexes
($dom_cfg, $dom_def, $grs1_def);
155 $root->appendChild($dom_def);
156 } elsif (defined $grs1_def->{tag
} and $grs1_def->{tag
} eq 'leader') {
158 _append_grs1_defs_for_leader
($dom_cfg, $root, $grs1_def);
159 } elsif (defined $grs1_def->{tag
} and $grs1_def->{tag
} < 10) {
160 # we're a control field
161 _append_grs1_defs_for_control_field
($dom_cfg, $root, $grs1_def);
162 } elsif (defined $grs1_def->{tag
}) {
163 # we're indexing an entire variable data field
164 my $dom_def = $dom_cfg->createElementNS($idxNS, 'index_data_field');
165 $dom_def->setAttribute('tag', $grs1_def->{tag
});
166 _append_target_indexes
($dom_cfg, $dom_def, $grs1_def);
167 $root->appendChild($dom_def);
171 sub _append_target_indexes
{
174 my $grs1_def = shift;
176 foreach my $index_def (@
{ $grs1_def->{index_defs
} }) {
177 _append_one_target_index
($dom_cfg, $dom_def, $index_def);
181 sub _append_one_target_index
{
184 my $index_def = shift;
185 my $tgt_idx = $dom_cfg->createElementNS($idxNS, 'target_index');
186 my $index_name = "$index_def->{name}:$index_def->{index_type}";
187 $tgt_idx->appendText($index_name);
188 $dom_def->appendChild($tgt_idx);
191 sub _append_grs1_defs_for_leader
{
194 my $grs1_def = shift;
195 foreach my $index_def (@
{ $grs1_def->{index_defs
} }) {
196 my $dom_def = $dom_cfg->createElementNS($idxNS, 'index_leader');
197 if (defined $index_def->{offset
} && defined $index_def->{length}) {
198 $dom_def->setAttribute('offset', $index_def->{offset
});
199 $dom_def->setAttribute('length', $index_def->{length});
201 _append_one_target_index
($dom_cfg, $dom_def, $index_def);
202 $root->appendChild($dom_def);
206 sub _append_grs1_defs_for_control_field
{
209 my $grs1_def = shift;
210 foreach my $index_def (@
{ $grs1_def->{index_defs
} }) {
211 my $dom_def = $dom_cfg->createElementNS($idxNS, 'index_control_field');
212 $dom_def->setAttribute('tag', $grs1_def->{tag
});
213 if (defined $index_def->{offset
} && defined $index_def->{length}) {
214 $dom_def->setAttribute('offset', $index_def->{offset
});
215 $dom_def->setAttribute('length', $index_def->{length});
217 _append_one_target_index
($dom_cfg, $dom_def, $index_def);
218 $root->appendChild($dom_def);