added unset_zone_id context for clients
[voldemort/jeffpc.git] / bin / rebalance-shuffle.sh
blob4dcf015c249ba0351ecffcaf9693656793e26658
1 #!/bin/bash -e
4 # Copyright 2013 LinkedIn, Inc
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
18 # This script shuffles a current cluster.xml (to achieve a more balanced cluster) and then
19 # outputs a final-cluster.xml corresponding to it. It also outputs the plan and the cost
20 # to achieve this final cluster config. Eg.
22 # Argument = -c current_cluster -s current_stores -o output dir
23 # The final cluster is placed in output_dir/
25 # The script also exposes an optional argument -m <max contiguous partitons>.
26 # When invoked with the -m option the script first splits runs of partition that are bigger than
27 # <max contiguous partitons> and then repartitions the cluster.
29 # Eg. Argument = -c current_cluster -s current_stores -o output dir -m 3
31 # This script can be used again on its own output. That is, if first attempt only gets you half-way
32 # to where you want to go in terms of repartitioning, then take output final-cluster xml and use as
33 # input to this tool again.
35 # This script uses getopts which means only single character switches are allowed.
36 # Using getopt would allow for multi charcter switch names but would come at a
37 # cost of not being cross compatible.
39 # Function to display usage
40 usage_and_exit() {
41 echo "ERROR: $1."
42 cat <<EOF
44 Usage: $0 options
45 OPTIONS:
46 -h Show this message
47 -c Current cluster that describes the cluster.
48 -s Current stores that describes the store. If you do not have info about the stores yet, look
49 under 'voldemort_home/config/tools/' for some store examples.
50 -o Output dir where all interim and final files will be stored.
51 The directory will be created if it does not exist yet.
52 -m Max allowed contiguous partition IDs within a zone (optional argument)
53 EOF
54 exit 1
57 # initialize variables
58 current_cluster=""
59 current_stores=""
60 output_dir=""
61 max_contiguous_partitions=-1
63 # Figure out voldemort home directory
64 dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
65 vold_home="$(dirname "$dir")"
67 # Parse options
68 while getopts “hc:s:o:m:” OPTION
70 case $OPTION in
72 usage_and_exit
73 exit 1
76 current_cluster=$OPTARG
77 echo "[rebalance-shuffle] Will rebalance on the cluster described in '$current_cluster'."
80 current_stores=$OPTARG
81 echo "[rebalance-shuffle] Will rebalance on the stores described in '$current_stores'."
84 output_dir=$OPTARG
85 mkdir -p $output_dir
86 echo "[rebalance-shuffle] Using '$output_dir' for all interim and final files generated."
89 max_contiguous_partitions=$OPTARG
90 echo "[rebalance-shuffle] Using '$max_contiguous_partitions' for max_contiguous_partitions."
93 usage_and_exit
95 esac
96 done
98 if [[ -z $current_cluster ]] || [[ -z $current_stores ]] || [[ -z $output_dir ]]
99 then
100 printf "\n"
101 echo "[rebalance-shuffle] Missing argument. Check again."
102 usage_and_exit
103 exit 1
106 if [ ! -e $current_cluster ]; then
107 usage_and_exit "File '$current_cluster' does not exist."
110 if [ ! -e $current_stores ]; then
111 usage_and_exit "File '$current_stores' does not exist."
114 # The final cluster.xml after shuffling is generated as described in the steps below:
115 # Step 0 : This step is executed only if the max_contiguous_partitions is passed to the
116 # script. In this step, lengthy runs of contiguous partitions are broken down
117 # to a maximum run of max_contiguous_partitions value. The final-cluster.xml
118 # from this step is then fed into step 1.
119 # Step 1: Current cluster.xml is fed to the repartitioner along with random swap
120 # attempts. The repartitioner randomly swaps the partitions
121 # and tries to balance the ring.
122 # Step 2: A plan is generated on how to reach from the orignal cluster topology to
123 # the one that is generated in step 1.
125 swap_attempts=250
126 max_contiguous_attempts=50
127 attempts=10
128 swap_successes=250
130 # Step 0
131 if [ "$max_contiguous_partitions" -ne "-1" ]; then
132 mkdir -p $output_dir/step0/
133 $vold_home/bin/run-class.sh voldemort.tools.RepartitionerCLI \
134 --current-cluster $current_cluster \
135 --current-stores $current_stores \
136 --max-contiguous-partitions $max_contiguous_partitions \
137 --attempts $max_contiguous_attempts \
138 --output-dir $output_dir/step0/
140 if [ ! -e $output_dir/step0/final-cluster.xml ]; then
141 usage_and_exit "File '$final-cluster.xml' does not exist."
144 current_cluster=$output_dir/step0/final-cluster.xml
145 echo "[rebalance-shuffle] Will rebalance on the cluster in '$current_cluster'."
148 # Step 1
149 mkdir -p $output_dir/step1
150 $vold_home/bin/run-class.sh voldemort.tools.RepartitionerCLI \
151 --current-cluster $current_cluster \
152 --current-stores $current_stores \
153 --output-dir $output_dir/step1/ \
154 --enable-random-swaps \
155 --attempts $attempts \
156 --random-swap-attempts $swap_attempts \
157 --random-swap-successes $swap_successes
159 if [ ! -e $output_dir/step1/final-cluster.xml ]; then
160 usage_and_exit "File '$final-cluster.xml' does not exist."
163 # Step 2
164 mkdir -p $output_dir/step2/
165 $vold_home/bin/run-class.sh voldemort.tools.RebalancePlanCLI \
166 --current-cluster $current_cluster \
167 --current-stores $current_stores \
168 --final-cluster $output_dir/step1/final-cluster.xml \
169 --output-dir $output_dir/step2/
171 echo "[rebalance-shuffle] Placing final-cluster.xml in '$output_dir'"
172 cp $output_dir/step2/final-cluster.xml $output_dir/final-cluster.xml
173 echo "[rebalance-shuffle] Placing plan.out in '$output_dir'"
174 cp $output_dir/step2/plan.out $output_dir/plan.out
175 echo "[rebalance-shuffle] Placing final-cluster.xml.analysis in '$output_dir'"
176 cp $output_dir/step1/final-cluster.xml.analysis $output_dir/final-cluster.xml.analysis