[Patches] [PATCH] Bug 7286: fix rebuild_zebra.pl to add rebuild_zebra_sliced.zsh
koha-patchbot at kohaaloha.com
koha-patchbot at kohaaloha.com
Sat Dec 3 14:16:32 NZDT 2011
From: =?UTF-8?q?St=C3=A9phane=20Delaune?= <stephane.delaune at biblibre.com>
Date: Fri, 2 Dec 2011 18:10:10 +0100
Subject: [PATCH] Bug 7286: fix rebuild_zebra.pl to add rebuild_zebra_sliced.zsh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Stéphane Delaune <stephane.delaune at biblibre.com>
---
misc/migration_tools/rebuild_zebra.pl | 54 ++++++++++-
misc/migration_tools/rebuild_zebra_sliced.zsh | 127 +++++++++++++++++++++++++
2 files changed, 176 insertions(+), 5 deletions(-)
create mode 100755 misc/migration_tools/rebuild_zebra_sliced.zsh
diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl
index fa87f22..8264121 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -34,6 +34,10 @@ my $want_help;
my $as_xml;
my $process_zebraqueue;
my $do_not_clear_zebraqueue;
+my $item_limit;
+my $min;
+my $where;
+my $ofset;
my $verbose_logging;
my $zebraidx_log_opt = " -v none,fatal,warn ";
my $result = GetOptions(
@@ -51,6 +55,10 @@ my $result = GetOptions(
'x' => \$as_xml,
'y' => \$do_not_clear_zebraqueue,
'z' => \$process_zebraqueue,
+ 'l:i' => \$item_limit,
+ 'where:s' => \$where,
+ 'min:i' => \$min,
+ 'ofset:i' => \$ofset,
'v' => \$verbose_logging,
);
@@ -78,6 +86,12 @@ if ( !$as_xml and $nosanitize ) {
die $msg;
}
+if ( $nosanitize and $item_limit ) {
+ my $msg = "Cannot specify both -item_limit and -nosanitize\n";
+ $msg .= "Please do '$0 --help' to see usage.\n";
+ die $msg;
+}
+
if ($process_zebraqueue and ($skip_export or $reset)) {
my $msg = "Cannot specify -r or -s if -z is specified\n";
$msg .= "Please do '$0 --help' to see usage.\n";
@@ -294,13 +308,21 @@ sub select_all_records {
}
sub select_all_authorities {
- my $sth = $dbh->prepare("SELECT authid FROM auth_header");
+ my $strsth=qq{SELECT authid from auth_header};
+ $strsth.=qq{ WHERE $where } if ($where);
+ $strsth.=qq{ LIMIT $min } if ($min && !$ofset);
+ $strsth.=qq{ LIMIT $min,$ofset } if ($min && $ofset);
+ my $sth = $dbh->prepare($strsth);
$sth->execute();
return $sth;
}
sub select_all_biblios {
- my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
+ my $strsth = qq{ SELECT biblionumber FROM biblioitems };
+ $strsth.=qq{ WHERE $where } if ($where);
+ $strsth.=qq{ LIMIT $min } if ($min && !$ofset);
+ $strsth.=qq{ LIMIT $min,$ofset } if ($ofset);
+ my $sth = $dbh->prepare($strsth);
$sth->execute();
return $sth;
}
@@ -406,6 +428,7 @@ sub generate_deleted_marc_records {
my $marc = MARC::Record->new();
if ($record_type eq 'biblio') {
fix_biblio_ids($marc, $record_number, $record_number);
+ fix_biblio_items( $marc ) if $item_limit;
} else {
fix_authority_id($marc, $record_number);
}
@@ -431,10 +454,14 @@ sub get_corrected_marc_record {
if (defined $marc) {
fix_leader($marc);
- if ($record_type eq 'authority') {
- fix_authority_id($marc, $record_number);
+ if ( $record_type eq 'biblio' ) {
+ my $succeeded = fix_biblio_ids( $marc, $record_number );
+ fix_biblio_items( $marc ) if $item_limit;
+ return unless $succeeded;
+ } else {
+ fix_authority_id( $marc, $record_number );
}
- if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+ if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
fix_unimarc_100($marc);
}
}
@@ -498,6 +525,18 @@ sub fix_leader {
$marc->leader(substr($leader, 0, 24));
}
+sub fix_biblio_items {
+ my $marc = shift;
+
+ my ($itemtagfield, $itemtagsubfield) = GetMarcFromKohaField('items.itemnumber','');
+
+ my $i = 0;
+ for my $itemfield ( $marc->field($itemtagfield) ) {
+ $marc->delete_field($itemfield) if $i >= $item_limit;
+ $i++;
+ }
+}
+
sub fix_biblio_ids {
# FIXME - it is essential to ensure that the biblionumber is present,
# otherwise, Zebra will choke on the record. However, this
@@ -635,9 +674,14 @@ Parameters:
the same records - specify -y to override this.
Cannot be used with -z.
+ -l set a maximum number of exported items per biblio.
+ Doesn't work with -nosanitize.
+
-v increase the amount of logging. Normally only
warnings and errors from the indexing are shown.
+ -min 1234 minimum biblionumber
+ -ofset 1243 count biblios to process
-munge-config Deprecated option to try
to fix Zebra config files.
--help or -h show this message.
diff --git a/misc/migration_tools/rebuild_zebra_sliced.zsh b/misc/migration_tools/rebuild_zebra_sliced.zsh
new file mode 100755
index 0000000..1e9554d
--- /dev/null
+++ b/misc/migration_tools/rebuild_zebra_sliced.zsh
@@ -0,0 +1,127 @@
+#!/usr/bin/zsh
+
+# Copyright 2011 BibLibre SARL
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+#
+# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with Koha; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+INCREMENT=$1
+BIBLIOSTART=$2
+BIBLIOEND=$3
+
+#echo " | $INCREMENT , $BIBLIOSTART , $BIBLIOEND | ";
+# this script rebuild the zebra index recursively
+# BIBLIOSTART is the record number to BIBLIOSTART on
+# BIBLIOEND is the record number to BIBLIOEND on
+# increment specify how many records we must try at once
+# At the BIBLIOEND of each "chunk", this script checks if the indexing process has been OK
+# if it hasn't, the slice is splitted in 10, and the reindexing is called again on each smaller chunk
+# if the increment goes to 1, it means we tried to reindex 1 by 1, and the failing indexing concern wrong records
+
+# the logs are stored in a directory called logs/ that must be a subdirectory of reindex.zsh
+
+# at the BIBLIOEND of the script, just type :
+#grep -l "previous transaction" `ls rebuild1.*.err`
+# the result contains all the biblios that have not been indexed
+# WARNING : the numbers are not the biblionumber but the record number, they can be reached by :
+# SELECT biblionumber FROM biblio LIMIT YourNumberHere,1;
+
+# EXAMPLE to run the script on a 800 000 biblios database :
+# ./reindex.zsh 50000 0 800000
+# will reindex the DB, BIBLIOSTARTing with chunks of 50k biblios
+#if { grep -E "previous transaction" `dirname $0`/logs/rebuild$INCREMENT.$i.err } ; then
+
+lastbiblionumber=`perl -e '#!/usr/bin/perl
+use C4::Context;
+my $dbh = C4::Context->dbh;
+my $querylastbiblionumber = "SELECT max(biblionumber) FROM biblio;";
+my $sthlastbiblionumber = $dbh->prepare($querylastbiblionumber);
+$sthlastbiblionumber->execute();
+my ( $lastbiblionumber ) = $sthlastbiblionumber->fetchrow_array; print $lastbiblionumber;'`
+#echo $lastbiblionumber;
+let "maxbiblionumber = $lastbiblionumber + 1"
+if [ $# = 2 ]
+then
+ BIBLIOEND=$lastbiblionumber
+elif [ $# = 1 ]
+then
+ BIBLIOSTART=0
+ BIBLIOEND=$lastbiblionumber
+elif [ $# = 0 ]
+then
+ INCREMENT=10000
+ BIBLIOSTART=0
+ BIBLIOEND=$lastbiblionumber
+fi
+if [[ $INCREMENT =~ ^10*$ ]]
+then
+else
+ echo "The first argument (INCREMENT) must be 1 or a multiple of 10"
+ exit 2
+fi
+if [[ $BIBLIOSTART =~ ^[0-9]*$ ]]
+then
+else
+ echo "The second argument (BIBLIOSTART) must be an integer"
+ exit 2
+fi
+if [[ $BIBLIOEND =~ ^[0-9]*$ ]]
+then
+else
+ echo "The third argument (BIBLIOEND) must be an integer"
+ exit 2
+fi
+if [ $BIBLIOSTART -lt $BIBLIOEND ]
+then
+else
+ echo "The second argument (BIBLIOSTART) must be lower than the third argument (BIBLIOEND)"
+ exit 2
+fi
+if [ $BIBLIOEND -lt $maxbiblionumber ]
+then
+else
+ echo "end"
+ exit 1
+fi
+ ls `dirname $0`/logs/ >/dev/null 2>&1
+ if [ $? != 0 ]
+ then
+ mkdir `dirname $0`/logs
+ else
+ rm `dirname $0`/logs/*
+ fi
+ #/home/koha/src/misc/migration_tools/rebuild_zebra.pl -r -b -v -x -nosanitize -ofset 1 -min 1
+ for ((i=$BIBLIOSTART ; i<$BIBLIOEND ; i=i+$INCREMENT))
+ do
+ echo "I = " $i "with increment " $INCREMENT
+ `dirname $0`/rebuild_zebra.pl -b -v -x -nosanitize -d /tmp/rebuild -k -ofset $INCREMENT -min $i > `dirname $0`/logs/rebuild$INCREMENT.$i.log 2> `dirname $0`/logs/rebuild$INCREMENT.$i.err
+ if (($INCREMENT >1 ));
+ then
+ if { grep -q "previous transaction" `dirname $0`/logs/rebuild$INCREMENT.$i.err } ;
+ then
+ echo "I must split $i (increment $INCREMENT) because previous transaction didn't reach commit"
+ ((subincrement=$INCREMENT/10))
+ ((newBIBLIOEND=$i+$INCREMENT))
+ $0 $subincrement $i $newBIBLIOEND
+ elif { ! grep -q "Records: $INCREMENT" `dirname $0`/logs/rebuild$INCREMENT.$i.err } ;
+ then
+ echo "I must split $i (increment $INCREMENT) because index was uncomplete, less than $INCREMENT records indexed"
+ ((subincrement=$INCREMENT/10))
+ ((newBIBLIOEND=$i+$INCREMENT))
+ $0 $subincrement $i $newBIBLIOEND
+ fi
+ fi
+ done
+exit 0
--
1.7.0.4
More information about the Patches
mailing list