[Patches] [PATCH] Bug 5166 - Reintroduce a zebraqueue daemon, make several rebuild_zebra.pl functions a lib
koha-patchbot at kohaaloha.com
koha-patchbot at kohaaloha.com
Thu Dec 22 03:05:03 NZDT 2011
From: Tomas Cohen Arazi <tomascohen at gmail.com>
Date: Fri, 25 Nov 2011 19:14:12 -0300
Subject: [PATCH] Bug 5166 - Reintroduce a zebraqueue daemon, make several rebuild_zebra.pl functions a lib
Refactorized. Removed per-language databases scripts. Created atomicupdate file to
be introduced later in updatedatabase.pl by the QA/RM.
Is was refactored against origin/master.
Anything I could do to help on this bug just let me know so there are no further delays.
Regards
To+
---
C4/Catalog/Zebra.pm | 474 ++++++++++++++++++
.../bug_5166-zebraqueuedaemon_is_back.pl | 14 +
installer/data/mysql/sysprefs.sql | 5 +
.../en/modules/admin/preferences/searching.pref | 33 ++
misc/bin/koha-zebraqueue-ctl.sh | 6 +-
misc/bin/zebraqueue_daemon.pl | 518 +++-----------------
6 files changed, 604 insertions(+), 446 deletions(-)
create mode 100644 C4/Catalog/Zebra.pm
create mode 100644 installer/data/mysql/atomicupdate/bug_5166-zebraqueuedaemon_is_back.pl
diff --git a/C4/Catalog/Zebra.pm b/C4/Catalog/Zebra.pm
new file mode 100644
index 0000000..458694c
--- /dev/null
+++ b/C4/Catalog/Zebra.pm
@@ -0,0 +1,474 @@
+package C4::Catalog::Zebra;
+#
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA 02111-1307 USA
+
+# Derived from rebuild_zebra.pl (2005-08-11) Paul Poulain and others
+# Rewriten 02/03/2011 by Tomas Cohen Arazi (tomascohen at gmail.com)
+# Universidad Nacional de Cordoba / Argentina
+
+# Library for managing updates in zebra, usually from zebraqueue
+
+use strict;
+use warnings;
+use C4::Context;
+use Getopt::Long;
+use File::Temp qw/ tempdir /;
+use File::Path;
+use Time::HiRes qw(time);
+use C4::Biblio;
+use C4::AuthoritiesMarc;
+
+use vars qw($VERSION @ISA @EXPORT);
+
+BEGIN {
+ # set the version for version checking
+ $VERSION = 0.01;
+
+ require Exporter;
+ @ISA = qw(Exporter);
+ @EXPORT = qw(
+ &UpdateAuths
+ &UpdateBiblios
+ &UpdateAuthsAndBiblios
+ &IndexZebraqueueRecords
+ );
+}
+
+
+=head1 NAME
+
+C4::Catalog::Zebra
+
+Comment:
+ This should be used when merging the rest of the rebuild_zebra.pl indexing logic
+ my $nosanitize = (C4::Context->preference('ZebraNoSanitize')) ? 1 : 0;
+
+
+=head2 UpdateAuths
+
+ ( $num_records_updated ) = &UpdateAuths ();
+
+returns the number of updated+deleted authority records
+
+=cut
+
+sub UpdateAuths
+{
+ # Update authorities
+ return IndexZebraqueueRecords('authority');
+}
+
+=head2 UpdateBiblios
+
+ ( $num_records_updated ) = &UpdateBiblios ();
+
+returns the number of updated+deleted biblio records
+
+=cut
+
+sub UpdateBiblios
+{
+ # Update authorities
+ return IndexZebraqueueRecords('biblio');
+}
+
+=head2 UpdateAuthsAndBiblios
+
+ ( $num_records_updated ) = &UpdateAuthsAndBiblios ();
+
+returns the number of updated+deleted authority and biblio records
+
+=cut
+
+sub UpdateAuthsAndBiblios
+{
+ my $ret;
+ # Update authorities
+ $ret = UpdateAuths();
+
+ # Update biblios
+ $ret += UpdateBiblios();
+
+ return $ret;
+}
+
+=head2 IndexZebraqueueRecords
+
+ ( $num_records_updated ) = &IndexZebraqueueRecords ($record_type);
+
+returns the number of updated+deleted $record_type records
+
+Comment :
+$record_type can be either 'biblio' or 'authority'
+
+=cut
+
+sub IndexZebraqueueRecords
+{
+ my ($record_type) = @_;
+ my $as_xml = (C4::Context->preference('ZebraUseXml')) ? 1 : 0;
+ my $noxml = ($as_xml) ? 0 : 1;
+ my $record_format = ($as_xml) ? 'marcxml' : 'iso2709' ;
+
+ my ($num_records_updated,$num_records_deleted);
+
+ $num_records_deleted = (IndexZebraqueueByAction('deleted',$record_type,$record_format,$as_xml,$noxml)||0);
+ $num_records_updated = (IndexZebraqueueByAction('updated',$record_type,$record_format,$as_xml,$noxml)||0);
+
+ return $num_records_deleted + $num_records_updated;
+}
+
+=head2 IndexZebraqueueByAction
+
+ ( $num_records_updated ) = &IndexZebraqueueByAction ($action,$record_type,
+ $record_format,$as_xml,$noxml);
+
+returns the number of updated+deleted $record_type records
+
+Comment :
+$record_type can be 'biblio' or 'authority'
+$record_format can be 'marcxml' or 'iso2709'
+$action can be 'updated' or 'deleted'
+$as_xml and $noxml are maintained for legacy reasons, one is enough. They
+indicate whether to use marcxml for indexing in zebra or iso2709. They should
+all be deduced from C4::Context->preference('ZebraUseXml').
+
+=cut
+
+sub IndexZebraqueueByAction
+{
+ my ($action,$record_type,$record_format,$as_xml,$noxml) = @_;
+ my ($num_records_exported,$ret,$zaction);
+
+ if ($action eq 'updated' or $action eq 'deleted') {
+ # get records by action
+ my $entries = select_zebraqueue_records($record_type, $action);
+ # Create tmp dir
+ my $directory = File::Temp->newdir();
+
+ # get records from zebraqueue, export to file for zebraidx
+ if ($action eq 'updated') {
+ $zaction = 'update';
+ $num_records_exported = export_marc_records_from_list($record_type,
+ $entries, "$directory", $as_xml, $noxml);
+ } else {
+ # $action eq 'deleted'
+ $zaction = 'delete';
+ $num_records_exported = generate_deleted_marc_records($record_type,
+ $entries, "$directory", $as_xml);
+ }
+
+ if ($num_records_exported) {
+ # log export
+ my $time = localtime(time);
+ print "$time $num_records_exported $record_type record(s) exported for $zaction\n";
+ # TODO error handling / and better logging
+ $ret = DoIndexing($record_type,$zaction,"$directory",$record_format);
+ if ($ret) {
+ print "$time $num_records_exported $record_type record(s) $action\n";
+ mark_zebraqueue_batch_done($entries);
+ print "$time $num_records_exported $record_type record(s) marked done in zebraqueue\n";
+ }
+ # /TODO
+ }
+ } else {
+ # Wrong action
+ $ret = -1;
+ }
+
+ return $ret;
+}
+
+
+sub select_zebraqueue_records {
+ my ($record_type, $update_type) = @_;
+
+ my $dbh = C4::Context->dbh;
+ my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
+ my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
+
+ my $sth = $dbh->prepare(<<'SQL');
+ SELECT id, biblio_auth_number
+ FROM zebraqueue
+ WHERE server = ?
+ AND operation = ?
+ AND done = 0
+ ORDER BY id DESC;
+SQL
+
+ $sth->execute($server, $op);
+ my $entries = $sth->fetchall_arrayref({});
+}
+
+sub mark_zebraqueue_batch_done {
+ my ($entries) = @_;
+
+ my $dbh = C4::Context->dbh;
+
+ $dbh->{AutoCommit} = 0;
+ my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
+ $dbh->commit();
+ foreach my $id (map { $_->{id} } @$entries) {
+ $sth->execute($id);
+ }
+ $dbh->{AutoCommit} = 1;
+}
+
+sub export_marc_records_from_list {
+ my ($record_type, $entries, $directory, $as_xml, $noxml) = @_;
+ my $verbose_logging = (C4::Context->preference('ZebraqueueVerboseLogging')) ? 1 : 0;
+
+ my $num_exported = 0;
+ open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+ my $i = 0;
+ my %found = ();
+ foreach my $record_number ( map { $_->{biblio_auth_number} }
+ grep { !$found{ $_->{biblio_auth_number} }++ }
+ @$entries ) {
+ print "." if ( $verbose_logging );
+ print "\r$i" unless ($i++ %100 or !$verbose_logging);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
+ if (defined $marc) {
+ # FIXME - when more than one record is exported and $as_xml is true,
+ # the output file is not valid XML - it's just multiple <record> elements
+ # strung together with no single root element. zebraidx doesn't seem
+ # to care, though, at least if you're using the GRS-1 filter. It does
+ # care if you're using the DOM filter, which requires valid XML file(s).
+ print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc();
+ $num_exported++;
+ }
+ }
+ print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
+ close OUT;
+ return $num_exported;
+}
+
+sub generate_deleted_marc_records {
+ my ($record_type, $entries, $directory, $as_xml) = @_;
+ my $verbose_logging = (C4::Context->preference('ZebraqueueVerboseLogging')) ? 1 : 0;
+
+ my $num_exported = 0;
+ open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+ my $i = 0;
+ foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
+ print "\r$i" unless ($i++ %100 or !$verbose_logging);
+ print "." if ( $verbose_logging );
+
+ my $marc = MARC::Record->new();
+ if ($record_type eq 'biblio') {
+ fix_biblio_ids($marc, $record_number, $record_number);
+ } else {
+ fix_authority_id($marc, $record_number);
+ }
+ if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+ fix_unimarc_100($marc);
+ }
+
+ print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc();
+ $num_exported++;
+ }
+ print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
+ close OUT;
+ return $num_exported;
+}
+
+sub get_corrected_marc_record {
+ my ($record_type, $record_number, $noxml) = @_;
+
+ my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
+
+ if (defined $marc) {
+ fix_leader($marc);
+ if ($record_type eq 'biblio') {
+ my $succeeded = fix_biblio_ids($marc, $record_number);
+ return unless $succeeded;
+ } else {
+ fix_authority_id($marc, $record_number);
+ }
+ if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+ fix_unimarc_100($marc);
+ }
+ }
+
+ return $marc;
+}
+
+sub get_raw_marc_record {
+ my ($record_type, $record_number, $noxml) = @_;
+ my $dbh = C4::Context->dbh;
+
+ my $marc;
+ if ($record_type eq 'biblio') {
+ if ($noxml) {
+ my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
+ $fetch_sth->execute($record_number);
+ if (my ($blob) = $fetch_sth->fetchrow_array) {
+ $marc = MARC::Record->new_from_usmarc($blob);
+ $fetch_sth->finish();
+ } else {
+ return; # failure to find a bib is not a problem -
+ # a delete could have been done before
+ # trying to process a record update
+ }
+ } else {
+ eval { $marc = GetMarcBiblio($record_number,1); };
+ if ($@) {
+ # here we do warn since catching an exception
+ # means that the bib was found but failed
+ # to be parsed
+ warn "error retrieving biblio $record_number";
+ return;
+ }
+ }
+ } else {
+ eval { $marc = GetAuthority($record_number); };
+ if ($@) {
+ warn "error retrieving authority $record_number";
+ return;
+ }
+ }
+ return $marc;
+}
+
+sub fix_leader {
+ # FIXME - this routine is suspect
+ # It blanks the Leader/00-05 and Leader/12-16 to
+ # force them to be recalculated correct when
+ # the $marc->as_usmarc() or $marc->as_xml() is called.
+ # But why is this necessary? It would be a serious bug
+ # in MARC::Record (definitely) and MARC::File::XML (arguably)
+ # if they are emitting incorrect leader values.
+ my $marc = shift;
+
+ my $leader = $marc->leader;
+ substr($leader, 0, 5) = ' ';
+ substr($leader, 10, 7) = '22 ';
+ $marc->leader(substr($leader, 0, 24));
+}
+
+sub fix_biblio_ids {
+ # FIXME - it is essential to ensure that the biblionumber is present,
+ # otherwise, Zebra will choke on the record. However, this
+ # logic belongs in the relevant C4::Biblio APIs.
+ my $marc = shift;
+ my $biblionumber = shift;
+ my $biblioitemnumber;
+ my $dbh = C4::Context->dbh;
+
+ if (@_) {
+ $biblioitemnumber = shift;
+ } else {
+ my $sth = $dbh->prepare(
+ "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
+ $sth->execute($biblionumber);
+ ($biblioitemnumber) = $sth->fetchrow_array;
+ $sth->finish;
+ unless ($biblioitemnumber) {
+ warn "failed to get biblioitemnumber for biblio $biblionumber";
+ return 0;
+ }
+ }
+
+ # FIXME - this is cheating on two levels
+ # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
+ # 2. Making sure that the biblionumber and biblioitemnumber are correct and
+ # present in the MARC::Record object ought to be part of GetMarcBiblio.
+ #
+ # On the other hand, this better for now than what rebuild_zebra.pl used to
+ # do, which was duplicate the code for inserting the biblionumber
+ # and biblioitemnumber
+ C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
+
+ return 1;
+}
+
+sub fix_authority_id {
+ # FIXME - as with fix_biblio_ids, the authid must be present
+ # for Zebra's sake. However, this really belongs
+ # in C4::AuthoritiesMarc.
+ my ($marc, $authid) = @_;
+ unless ($marc->field('001') and $marc->field('001')->data() eq $authid){
+ $marc->delete_field($marc->field('001'));
+ $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
+ }
+}
+
+sub fix_unimarc_100 {
+ # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
+ my $marc = shift;
+
+ my $string;
+ if ( length($marc->subfield( 100, "a" )) == 35 ) {
+ $string = $marc->subfield( 100, "a" );
+ my $f100 = $marc->field(100);
+ $marc->delete_field($f100);
+ }
+ else {
+ $string = POSIX::strftime( "%Y%m%d", localtime );
+ $string =~ s/\-//g;
+ $string = sprintf( "%-*s", 35, $string );
+ }
+ substr( $string, 22, 6, "frey50" );
+ unless ( length($marc->subfield( 100, "a" )) == 35 ) {
+ $marc->delete_field($marc->field(100));
+ $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
+ }
+}
+
+=head2 DoIndexing
+
+ ( $error_code ) = &DoIndexing($record_type,$op,$record_dir,$record_format);
+
+returns the corresponding zebraidx error code
+
+Comment :
+$record_type can be 'biblio' or 'authority'
+$zaction can be 'delete' or 'update'
+$record_dir is the directory where the exported records are
+$record_format can be 'marcxml' or 'iso2709'
+
+=cut
+
+sub DoIndexing {
+ my ($record_type, $zaction, $record_dir, $record_format) = @_;
+ my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
+ my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
+ my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
+ my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
+ my $noshadow = (C4::Context->preference('ZebraNoshadow')) ? '-n' : '';
+ my $zebraidx_log_opt = " -v none,fatal ";
+
+ # TODO better error handling!!
+ system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $zaction $record_dir");
+ system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
+ # /TODO
+
+ return 1;
+}
+
+
+END { }
+
+1;
+__END__
+
+=head1 AUTHOR
+
+Koha Development Team <http://koha-community.org/>
+
+Tomas Cohen Arazi tomascohen at gmail.com
+
+=cut
diff --git a/installer/data/mysql/atomicupdate/bug_5166-zebraqueuedaemon_is_back.pl b/installer/data/mysql/atomicupdate/bug_5166-zebraqueuedaemon_is_back.pl
new file mode 100644
index 0000000..e7e92df
--- /dev/null
+++ b/installer/data/mysql/atomicupdate/bug_5166-zebraqueuedaemon_is_back.pl
@@ -0,0 +1,14 @@
+#! /usr/bin/perl
+use strict;
+use warnings;
+use C4::Context;
+my $dbh=C4::Context->dbh;
+
+
+$dbh->do("INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraUseXml','1','Tell Zebra to use MARCXML instead of ISO2907 for indexing. Very important for libraries with records bigger than the allowed by ISO2907 (e.g. with lots of items in a single record).',NULL,'YesNo')");
+$dbh->do("INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraNoshadow','0','Tell Zebra to use shadow records when updating. Prevents locking on records while updating the database. Refer to zebra documentation for more info on the drawbacks.',NULL,'YesNo')");
+$dbh->do("INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraqueueVerboseLogging','0','Tell zebraqueue daemon to be more verbose on logging.',NULL,'YesNo')");
+$dbh->do("INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraBiblioUpdateRatio','6','By default, tell zebraqueue daemon to search for updates every ZebraBiblioUpdateRatio*ZebraAuthUpdateRatio seconds in the biblios database.',NULL,'Integer')");
+$dbh->do("INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraAuthUpdateRatio','10','By default, tell zebraqueue daemon to search for updates every ZebraAuthUpdateRatio seconds in the authorities database.',NULL,'Integer')");
+
+print "Upgrade done (Add sysprefs to control zebraqueue_daemon scripts: ZebraUseXml, ZebraNoshadow, ZebraqueueVerboseLogging, ZebraBiblioUpdateRatio, ZebraAuthUpdateRatio)\n";
diff --git a/installer/data/mysql/sysprefs.sql b/installer/data/mysql/sysprefs.sql
index 128c9b2..465c14f 100755
--- a/installer/data/mysql/sysprefs.sql
+++ b/installer/data/mysql/sysprefs.sql
@@ -328,4 +328,9 @@ INSERT INTO systempreferences (variable,value,explanation,options,type) VALUES('
INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('OpacKohaUrl','1',"Show 'Powered by Koha' text on OPAC footer.",NULL,NULL);
INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES('EasyAnalyticalRecords','0','If on, display in the catalogue screens tools to easily setup analytical record relationships','','YesNo');
INSERT INTO systempreferences (variable,value,explanation,options,type) VALUES('OpacShowRecentComments',0,'If ON a link to recent comments will appear in the OPAC masthead',NULL,'YesNo');
+INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraUseXml','1','Tell Zebra to use MARCXML instead of ISO2907 for indexing. Very important for libraries with records bigger than the allowed by ISO2907 (e.g. with lots of items in a single record).',NULL,'YesNo');
+INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraNoshadow','0','Tell Zebra to use shadow records when updating. Prevents locking on records while updating the database. Refer to zebra documentation for more info on the drawbacks.',NULL,'YesNo');
+INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraqueueVerboseLogging','0','Tell zebraqueue daemon to be more verbose on logging.',NULL,'YesNo');
+INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraBiblioUpdateRatio','6','By default, tell zebraqueue daemon to search for updates every ZebraBiblioUpdateRatio*ZebraAuthUpdateRatio seconds in the biblios database.',NULL,'Integer');
+INSERT INTO `systempreferences` (variable,value,explanation,options,type) VALUES ('ZebraAuthUpdateRatio','10','By default, tell zebraqueue daemon to search for updates every ZebraAuthUpdateRatio seconds in the authorities database.',NULL,'Integer');
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref
index 12075ec..ab62c5c 100644
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/searching.pref
@@ -69,6 +69,39 @@ Searching:
yes: Include
no: "Don't include"
- subdivisions for searches generated by clicking on subject tracings.
+ -
+ - pref: ZebraUseXml
+ type: boolean
+ choices:
+ no: "Don't use"
+ yes: Use
+ - MARCXML instead of ISO2907 for indexing in Zebra. Very important for libraries with records bigger than the allowed by ISO2907 (e.g. with lots of items in a single record).
+ -
+ - pref: ZebraNoshadow
+ type: boolean
+ choices:
+ yes: "Don't use"
+ no: Use
+ - shadow records when updating Zebra indexes. Prevents locking on records while updating the database. Refer to zebra documentation for more info on the drawbacks.
+ -
+ - Set to
+ - pref: ZebraAuthUpdateRatio
+ class: integer
+ default: 10
+ - seconds the interval for searching authority record updates.
+ -
+ - Set to
+ - pref: ZebraBiblioUpdateRatio
+ class: integer
+ default: 6
+ - x ZebraAuthUpdateRatio seconds the interval for searching record updates.
+ -
+ - pref: ZebraqueueVerboseLogging
+ type: boolean
+ choices:
+ no: "Don't use"
+ yes: Use
+ - verbose logging in zebraqueue daemon.
Search Form:
-
- Show checkboxes to search by
diff --git a/misc/bin/koha-zebraqueue-ctl.sh b/misc/bin/koha-zebraqueue-ctl.sh
index d3444f0..4f3b26b 100755
--- a/misc/bin/koha-zebraqueue-ctl.sh
+++ b/misc/bin/koha-zebraqueue-ctl.sh
@@ -23,15 +23,15 @@ fi
case "$1" in
start)
echo "Starting Zebraqueue Daemon"
- daemon --name=$NAME --errlog=$ERRLOG --stdout=$STDOUT --output=$OUTPUT --verbose=1 --respawn --delay=30 $OTHERUSER -- perl -I $PERL5LIB $ZEBRAQUEUE -f $KOHA_CONF
+ daemon --name=$NAME --errlog=$ERRLOG --stdout=$STDOUT --output=$OUTPUT --verbose=1 --respawn --delay=30 $OTHERUSER -- perl -I $PERL5LIB $ZEBRAQUEUE
;;
stop)
echo "Stopping Zebraqueue Daemon"
- daemon --name=$NAME --errlog=$ERRLOG --stdout=$STDOUT --output=$OUTPUT --verbose=1 --respawn --delay=30 $OTHERUSER --stop -- perl -I $PERL5LIB $ZEBRAQUEUE -f $KOHA_CONF
+ daemon --name=$NAME --errlog=$ERRLOG --stdout=$STDOUT --output=$OUTPUT --verbose=1 --respawn --delay=30 $OTHERUSER --stop -- perl -I $PERL5LIB $ZEBRAQUEUE
;;
restart)
echo "Restarting the Zebraqueue Daemon"
- daemon --name=$NAME --errlog=$ERRLOG --stdout=$STDOUT --output=$OUTPUT --verbose=1 --respawn --delay=30 $OTHERUSER --restart -- perl -I $PERL5LIB $ZEBRAQUEUE -f $KOHA_CONF
+ daemon --name=$NAME --errlog=$ERRLOG --stdout=$STDOUT --output=$OUTPUT --verbose=1 --respawn --delay=30 $OTHERUSER --restart -- perl -I $PERL5LIB $ZEBRAQUEUE
;;
*)
echo "Usage: /etc/init.d/$NAME {start|stop|restart}"
diff --git a/misc/bin/zebraqueue_daemon.pl b/misc/bin/zebraqueue_daemon.pl
index 6181a94..94525d7 100755
--- a/misc/bin/zebraqueue_daemon.pl
+++ b/misc/bin/zebraqueue_daemon.pl
@@ -1,475 +1,107 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
-# daemon to watch the zebraqueue and update zebra as needed
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+# Suite 330, Boston, MA 02111-1307 USA
+
+# Writen 02/03/2011 by Tomas Cohen Arazi (tomascohen at gmail.com)
+# Universidad Nacional de Cordoba / Argentina
+
+# Daemon to watch the zebraqueue table and update zebra indexes as needed
use strict;
-#use warnings; FIXME - Bug 2505
BEGIN {
# find Koha's Perl modules
# test carefully before changing this
use FindBin;
- eval { require "$FindBin::Bin/kohalib.pl" };
+ eval { require "$FindBin::Bin/../kohalib.pl" };
}
-
-use POE qw(Wheel::SocketFactory Wheel::ReadWrite Filter::Stream Driver::SysRW);
-use Unix::Syslog qw(:macros);
-
+use POE;
+use Time::HiRes qw(time);
use C4::Context;
-use C4::Biblio;
-use C4::Search;
-use C4::AuthoritiesMarc;
-use XML::Simple;
-use POSIX;
-use utf8;
-
-
-# wait periods governing connection attempts
-my $min_connection_wait = 1; # start off at 1 second
-my $max_connection_wait = 1024; # max about 17 minutes
-
-# keep separate wait period for bib and authority Zebra databases
-my %zoom_connection_waits = ();
-
-my $db_connection_wait = $min_connection_wait;
-
-# ZOOM and Z39.50 errors that are potentially
-# resolvable by connecting again and retrying
-# the operation
-my %retriable_zoom_errors = (
- 10000 => 'ZOOM_ERROR_CONNECT',
- 10001 => 'ZOOM_ERROR_MEMORY',
- 10002 => 'ZOOM_ERROR_ENCODE',
- 10003 => 'ZOOM_ERROR_DECODE',
- 10004 => 'ZOOM_ERROR_CONNECTION_LOST',
- 10005 => 'ZOOM_ERROR_INIT',
- 10006 => 'ZOOM_ERROR_INTERNAL',
- 10007 => 'ZOOM_ERROR_TIMEOUT',
-);
-
-# structure to store updates that have
-# failed and are to be retrieved. The
-# structure is a hashref of hashrefs,
-# e.g.,
-#
-# $postoned_updates->{$server}->{$record_number} = 1;
-#
-# If an operation is attempted and fails because
-# of a retriable error (see above), the daemon
-# will try several times to recover as follows:
-#
-# 1. close and reopen the connection to the
-# Zebra server, unless the error was a timeout,
-# in which case
-# 2. retry the operation
-#
-# If, after trying this five times, the operation still
-# fails, the daemon will mark the record number as
-# postponed, and try to process other entries in
-# zebraqueue. When an update is postponed, the
-# error will be reported to syslog.
-#
-# If more than 100 postponed updates are
-# accumulated, the daemon will assume that
-# something is seriously wrong, complain loudly,
-# and abort. If running under the daemon(1) command,
-# this means that the daemon will respawn.
-#
-my $num_postponed_updates = 0;
-my $postponed_updates = {};
-
-my $max_operation_attempts = 5;
-my $max_postponed_updates = 100;
-
-# Zebra connection timeout
-my $zconn_timeout = 30;
-my $zconn_timeout_multiplier = 1.5;
-my $max_zconn_timeout = 120;
-
-my $ident = "Koha Zebraqueue ";
-
-my $debug = 0;
-Unix::Syslog::openlog $ident, LOG_PID, LOG_LOCAL0;
-
-Unix::Syslog::syslog LOG_INFO, "Starting Zebraqueue log at " . scalar localtime(time) . "\n";
-
-sub handler_start {
-
- # Starts session. Only ever called once only really used to set an alias
- # for the POE kernel
- my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
-
- my $time = localtime(time);
- Unix::Syslog::syslog LOG_INFO, "$time POE Session ", $session->ID, " has started.\n";
-
- # check status
-# $kernel->yield('status_check');
- $kernel->yield('sleep');
-}
-
-sub handler_sleep {
-
- # can be used to slow down loop execution if needed
- my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
- use Time::HiRes qw (sleep);
- Time::HiRes::sleep(0.5);
- #sleep 1;
- $kernel->yield('status_check');
-}
-
-sub handler_check {
- # check if we need to do anything, at the moment just checks the zebraqueue, it could check other things too
- my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
- my $dbh = get_db_connection();
- my $sth = $dbh->prepare("SELECT count(*) AS opcount FROM zebraqueue WHERE done = 0");
- $sth->execute;
- my $data = $sth->fetchrow_hashref();
- if ($data->{'opcount'} > 0) {
- Unix::Syslog::syslog LOG_INFO, "$data->{'opcount'} operations waiting to be run\n";
- $sth->finish();
- $dbh->commit(); # needed so that we get current state of zebraqueue next time
- # we enter handler_check
- $kernel->yield('do_ops');
- }
- else {
- $sth->finish();
- $dbh->commit(); # needed so that we get current state of zebraqueue next time
- # we enter handler_check
- $kernel->yield('sleep');
- }
-}
-
-sub zebraop {
- # execute operations waiting in the zebraqueue
- my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
- my $dbh = get_db_connection();
- my $readsth = $dbh->prepare("SELECT id, biblio_auth_number, operation, server FROM zebraqueue WHERE done = 0 ORDER BY id DESC");
- $readsth->execute();
- Unix::Syslog::syslog LOG_INFO, "Executing zebra operations\n";
-
- my $completed_updates = {};
- ZEBRAQUEUE: while (my $data = $readsth->fetchrow_hashref()) {
- warn "Inside while loop" if $debug;
-
- my $id = $data->{'id'};
- my $op = $data->{'operation'};
- $op = 'recordDelete' if $op =~ /delete/i; # delete ops historically have been coded
- # either delete_record or recordDelete
- my $record_number = $data->{'biblio_auth_number'};
- my $server = $data->{'server'};
-
- next ZEBRAQUEUE if exists $postponed_updates->{$server}->{$record_number};
- next ZEBRAQUEUE if exists $completed_updates->{$server}->{$record_number}->{$op};
-
- my $ok = 0;
- my $record;
- if ($op eq 'recordDelete') {
- $ok = process_delete($dbh, $server, $record_number);
- }
- else {
- $ok = process_update($dbh, $server, $record_number, $id);
- }
- if ($ok == 1) {
- mark_done($dbh, $record_number, $op, $server);
- $completed_updates->{$server}->{$record_number}->{$op} = 1;
- if ($op eq 'recordDelete') {
- $completed_updates->{$server}->{$record_number}->{'specialUpdate'} = 1;
- }
- }
- }
- $readsth->finish();
- $dbh->commit();
- $kernel->yield('sleep');
-}
-
-sub process_delete {
- my $dbh = shift;
- my $server = shift;
- my $record_number = shift;
-
- my $record;
- my $ok = 0;
- eval {
- warn "Searching for record to delete" if $debug;
- # 1st read the record in zebra, we have to get it from zebra as its no longer in the db
- my $Zconn = get_zebra_connection($server);
- my $results = $Zconn->search_pqf( '@attr 1=Local-number '.$record_number);
- $results->option(elementSetName => 'marcxml');
- $record = $results->record(0)->raw();
- };
- if ($@) {
- # this doesn't exist, so no need to wail on zebra to delete it
- if ($@->code() eq 13) {
- $ok = 1;
- } else {
- # caught a ZOOM::Exception
- my $message = _format_zoom_error_message($@);
- postpone_update($server, $record_number, $message);
- }
- } else {
- # then, delete the record
- warn "Deleting record" if $debug;
- $ok = zebrado($record, 'recordDelete', $server, $record_number);
- }
- return $ok;
-}
-
-sub process_update {
- my $dbh = shift;
- my $server = shift;
- my $record_number = shift;
- my $id = shift;
-
- my $record;
- my $ok = 0;
-
- warn "Updating record" if $debug;
- # get the XML
- my $marcxml;
- if ($server eq "biblioserver") {
- my $marc = GetMarcBiblio($record_number);
- $marcxml = $marc->as_xml_record() if $marc;
- }
- elsif ($server eq "authorityserver") {
- $marcxml = C4::AuthoritiesMarc::GetAuthorityXML($record_number);
- }
- # check it's XML, just in case
- eval {
- my $hashed = XMLin($marcxml);
- }; ### is it a proper xml? broken xml may crash ZEBRA- slow but safe
- ## it's Broken XML-- Should not reach here-- but if it does -lets protect ZEBRA
- if ($@) {
- Unix::Syslog::syslog LOG_ERR, "$server record $record_number is malformed: $@";
- mark_done_by_id($dbh, $id, $server);
- $ok = 0;
- } else {
- # ok, we have everything, do the operation in zebra !
- $ok = zebrado($marcxml, 'specialUpdate', $server, $record_number);
- }
- return $ok;
-}
+use C4::Catalog::Zebra;
-sub mark_done_by_id {
- my $dbh = shift;
- my $id = shift;
- my $server = shift;
- my $delsth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ? AND server = ? AND done = 0");
- $delsth->execute($id, $server);
-}
-
-sub mark_done {
- my $dbh = shift;
- my $record_number = shift;
- my $op = shift;
- my $server = shift;
-
- my $delsth;
- if ($op eq 'recordDelete') {
- # if it's a deletion, we can delete every request on this biblio : in case the user
- # did a modif (or item deletion) just before biblio deletion, there are some specialUpdate
- # that are pending and can't succeed, as we don't have the XML anymore
- # so, delete everything for this biblionumber
- $delsth = $dbh->prepare_cached("UPDATE zebraqueue SET done = 1
- WHERE biblio_auth_number = ?
- AND server = ?
- AND done = 0");
- $delsth->execute($record_number, $server);
- } else {
- # if it's not a deletion, delete every pending specialUpdate for this biblionumber
- # in case the user add biblio, then X items, before this script runs
- # this avoid indexing X+1 times where just 1 is enough.
- $delsth = $dbh->prepare("UPDATE zebraqueue SET done = 1
- WHERE biblio_auth_number = ?
- AND operation = 'specialUpdate'
- AND server = ?
- AND done = 0");
- $delsth->execute($record_number, $server);
- }
-}
-sub zebrado {
- ###Accepts a $server variable thus we can use it to update biblios, authorities or other zebra dbs
- my ($record, $op, $server, $record_number) = @_;
+my $authUpdateRatio;
+my $biblioUpdateRatio;
+my $tickCounter;
- unless ($record) {
- my $message = "error updating index for $server $record $record_number: no source record";
- postpone_update($server, $record_number, $message);
- return 0;
- }
- my $attempts = 0;
- my $ok = 0;
- ATTEMPT: while ($attempts < $max_operation_attempts) {
- $attempts++;
- warn "Attempt $attempts for $op for $server $record_number" if $debug;
- my $Zconn = get_zebra_connection($server);
+sub handler_start
+{
+ my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
+ my $time = localtime(time);
- my $Zpackage = $Zconn->package();
- $Zpackage->option(action => $op);
- $Zpackage->option(record => $record);
+ print "$time Zebraqueue daemon started\n";
- eval { $Zpackage->send("update") };
- if ($@ && $@->isa("ZOOM::Exception")) {
- my $message = _format_zoom_error_message($@);
- my $error = $@->code();
- if (exists $retriable_zoom_errors{$error}) {
- warn "reattempting operation $op for $server $record_number" if $debug;
- warn "last Zebra error was $message" if $debug;
- $Zpackage->destroy();
-
- if ($error == 10007 and $zconn_timeout < $max_zconn_timeout) {
- # bump up connection timeout
- $zconn_timeout = POSIX::ceil($zconn_timeout * $zconn_timeout_multiplier);
- $zconn_timeout = $max_zconn_timeout if $zconn_timeout > $max_zconn_timeout;
- Unix::Syslog::syslog LOG_INFO, "increased Zebra connection timeout to $zconn_timeout\n";
- warn "increased Zebra connection timeout to $zconn_timeout" if $debug;
- }
- next ATTEMPT;
- } else {
- postpone_update($server, $record_number, $message);
- }
- }
- # FIXME - would be more efficient to send a ES commit
- # after a batch of records, rather than commiting after
- # each one - Zebra handles updates relatively slowly.
- eval { $Zpackage->send('commit'); };
- if ($@) {
- # operation succeeded, but commit
- # did not - we have a problem
- my $message = _format_zoom_error_message($@);
- postpone_update($server, $record_number, $message);
- } else {
- $ok = 1;
- last ATTEMPT;
- }
- }
-
- unless ($ok) {
- my $message = "Made $attempts attempts to index $server record $record_number without success";
- postpone_update($server, $record_number, $message);
- }
-
- return $ok;
-}
+ # Initialize counter
+ $tickCounter = 0;
-sub postpone_update {
- my ($server, $record_number, $message) = @_;
- warn $message if $debug;
- $message .= "\n" unless $message =~ /\n$/;
- Unix::Syslog::syslog LOG_ERR, $message;
- $postponed_updates->{$server}->{$record_number} = 1;
+ # Get timer settings
+ $authUpdateRatio = (C4::Context->preference("ZebraAuthUpdateRatio")||10);
+ $biblioUpdateRatio = (C4::Context->preference("ZebraBiblioUpdateRatio")||6);
- $num_postponed_updates++;
- if ($num_postponed_updates > $max_postponed_updates) {
- warn "exiting, over $max_postponed_updates postponed indexing updates";
- Unix::Syslog::syslog LOG_ERR, "exiting, over $max_postponed_updates postponed indexing updates";
- Unix::Syslog::closelog;
- exit;
- }
-}
+ # Log
+ my $authPrefsString = (C4::Context->preference("ZebraAuthUpdateRatio") ? 'syspref' : 'default');
+ print "$time Authorities update ratio (secs): $authUpdateRatio ($authPrefsString)\n";
+ my $biblioUpdateSecs = $biblioUpdateRatio * $authUpdateRatio;
+ my $biblioPrefsString = (C4::Context->preference("ZebraBiblioUpdateRatio") ? 'syspref' : 'default');
+ print "$time Biblios update ratio (secs): $biblioUpdateSecs ($biblioPrefsString)\n";
-sub handler_stop {
- my $heap = $_[HEAP];
- my $time = localtime(time);
- Unix::Syslog::syslog LOG_INFO, "$time Session ", $_[SESSION]->ID, " has stopped.\n";
- delete $heap->{session};
+ $kernel->delay(tick => $authUpdateRatio);
}
-# get a DB connection
-sub get_db_connection {
- my $dbh;
-
- $db_connection_wait = $min_connection_wait unless defined $db_connection_wait;
- while (1) {
- eval {
- # note that C4::Context caches the
- # DB handle; C4::Context->dbh() will
- # check that handle first before returning
- # it. If the connection is bad, it
- # then tries (once) to create a new one.
- $dbh = C4::Context->dbh();
- };
- unless ($@) {
- # C4::Context->dbh dies if it cannot
- # establish a connection
- $db_connection_wait = $min_connection_wait;
- $dbh->{AutoCommit} = 0; # do this to reduce number of
- # commits to zebraqueue
- return $dbh;
- }
-
- # connection failed
- my $error = "failed to connect to DB: $DBI::errstr";
- warn $error if $debug;
- Unix::Syslog::syslog LOG_ERR, $error;
- sleep $db_connection_wait;
- $db_connection_wait *= 2 unless $db_connection_wait >= $max_connection_wait;
- }
+sub handler_stop
+{
+ my $heap = $_[HEAP];
+ my $time = localtime(time);
+ # Log
+ print "$time Zebraqueue daemon stopped - POE Session ended\n";
+ delete $heap->{session};
}
-# get a Zebra connection
-sub get_zebra_connection {
- my $server = shift;
-
- # start connection retry wait queue if necessary
- $zoom_connection_waits{$server} = $min_connection_wait unless exists $zoom_connection_waits{$server};
- # try to connect to Zebra forever until we succeed
- while (1) {
- # what follows assumes that C4::Context->Zconn
- # makes only one attempt to create a new connection;
- my $Zconn = C4::Context->Zconn($server, 0, 1, '', 'xml');
- $Zconn->option('timeout' => $zconn_timeout);
+sub handler_tick
+{
+ my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
+ my $ret = 0;
+ $tickCounter = $tickCounter + 1;
- # it is important to note that if the existing connection
- # stored by C4::Context has an error (any type of error)
- # from the last transaction, C4::Context->Zconn closes
- # it and establishes a new one. Therefore, the
- # following check will succeed if we have a new, good
- # connection or we're using a previously established
- # connection that has experienced no errors.
- if ($Zconn->errcode() == 0) {
- $zoom_connection_waits{$server} = $min_connection_wait;
- return $Zconn;
- }
- # connection failed
- my $error = _format_zoom_error_message($Zconn);
- warn $error if $debug;
- Unix::Syslog::syslog LOG_ERR, $error;
- sleep $zoom_connection_waits{$server};
- $zoom_connection_waits{$server} *= 2 unless $zoom_connection_waits{$server} >= $max_connection_wait;
- }
-}
-
-# given a ZOOM::Exception or
-# ZOOM::Connection object, generate
-# a human-reaable error message
-sub _format_zoom_error_message {
- my $err = shift;
+ # Calculate if we have to update biblios too
+ # Check: biblioUpdateRatio ?= tickCounter
+ if ($biblioUpdateRatio == $tickCounter) {
+ # Update biblios and auths
+ $ret = C4::Catalog::Zebra::UpdateAuthsAndBiblios();
+ # Reset counter
+ $tickCounter = 0;
+ } else {
+ # Update only auths
+ $ret = C4::Catalog::Zebra::UpdateAuths();
+ }
- my $message = "";
- if (ref($err) eq 'ZOOM::Connection') {
- $message = $err->errmsg() . " (" . $err->diagset . " " . $err->errcode() . ") " . $err->addinfo();
- } elsif (ref($err) eq 'ZOOM::Exception') {
- $message = $err->message() . " (" . $err->diagset . " " . $err->code() . ") " . $err->addinfo();
- }
- return $message;
+ $kernel->delay(tick => $authUpdateRatio);
}
POE::Session->create(
- inline_states => {
- _start => \&handler_start,
- sleep => \&handler_sleep,
- status_check => \&handler_check,
- do_ops => \&zebraop,
- _stop => \&handler_stop,
- },
+ inline_states => {
+ _start => \&handler_start,
+ tick => \&handler_tick,
+ _stop => \&handler_stop,
+ },
);
-# start the kernel
-$poe_kernel->run();
-
-Unix::Syslog::closelog;
-
-exit;
+POE::Kernel->run();
+exit 0;
--
1.7.0.4
More information about the Patches
mailing list