4 # Copyright 2013 LinkedIn, Inc
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
18 # Python 2.7+ required
19 # This script encapsulates the cluster.xml generation for a zoned and a non-zoned
20 # cluster. Passing --zones <num of zones> switch to the script generates a zoned cluster
21 # config. A non zoned cluster is generated otherwise.
23 # The newly generated cluster.xml file is placed in the output dir.
25 # Example use for a zoned cluster :
26 # python generate_cluster_xml.py --file <file with host names, one host per line>
27 # --name <name of the cluster>
28 # --nodes <number of nodes>
29 # --partitions <number of partitions>
30 # --sock-port <port no>
31 # --admin-port <port no>
32 # --http-port <port no>
33 # --current-stores <current_stores.xml>
34 # --output-dir <output directory>
35 # --zones <number of zones>
37 # For non zoned cluster use :
38 # python generate_cluster_xml.py --file <file with host names, one host per line>
39 # --name <name of the cluster>
40 # --nodes <number of nodes>
41 # --partitions <number of partitions>
42 # --sock-port <port no>
43 # --admin-port <port no>
44 # --http-port <port no>
45 # --current-stores <current_stores.xml>
46 # --output-dir <output directory>
48 # Note the absence of the --zones switch for the non zoned cluster use case.
59 print "Python 2.7 or higher is needed"
63 rseed
= int(random
.randint(00000000001,99999999999))
65 # Setup and argument parser
66 parser
= argparse
.ArgumentParser(description
='Build a voldemort cluster.xml.')
67 # Add supported arguments
68 parser
.add_argument('-f', '--file', type=str, dest
='file',
69 help='the file of the list of hosts(one per line)')
70 parser
.add_argument('-N', '--name', type=str, default
='voldemort', dest
='name',
71 help='the name you want to give the cluster')
72 parser
.add_argument('-n', '--nodes', type=int, default
=6, dest
='nodes',
73 help='the number of nodes in the cluster')
74 parser
.add_argument('-p', '--partitions', type=int, default
=1500,
75 dest
='partitions', help='number of partitions')
76 parser
.add_argument('-sp', '--socket-port', type=int, default
=6666,
77 dest
='sock_port', help='socket port number')
78 parser
.add_argument('-ap', '--admin-port', type=int, default
=6667,
79 dest
='admin_port', help='admin port number')
80 parser
.add_argument('-hp', '--http-port', type=int, default
=6665,
81 dest
='http_port', help='http port number')
82 parser
.add_argument('-s', '--current-stores', type=str, default
= "config/tools/dummy-stores-3zoned.xml",
83 dest
='current_stores',
84 help='Path to current stores xml. If you do not have info about the stores yet'
85 'use config/tools/dummy-stores.xml from the root voldemort home folder.')
86 parser
.add_argument('-o', '--output-dir', type=str, dest
='output_dir',
87 help='output directory location')
88 parser
.add_argument('-z', '--zones', type=int, dest
='zones',
89 help='For non zoned clusters do not provide this argument.'
90 'For zoned clusters provide this argument with at least two zones.')
92 genType
= parser
.add_mutually_exclusive_group()
93 genType
.add_argument('--seed', type=int, default
=rseed
, dest
='seed',
94 help='seed for randomizing partition distribution')
97 args
= parser
.parse_args()
99 # Check if the input file exists
101 with
open(args
.file): pass
103 print 'File does not exist'
105 # create output-dir if it does not exist
107 os
.makedirs(args
.output_dir
)
108 except OSError as exception
:
109 if exception
.errno
!= errno
.EEXIST
:
112 # Open a new file named cluster.xml
113 clusterXMLFilePath
= os
.path
.join(os
.path
.abspath(args
.output_dir
), 'cluster.xml')
114 fileHandle
= open(clusterXMLFilePath
, 'w')
116 # TODO : It would be ideal to have the script accept a list of zone ids.
120 print "For non zoned clusters do not provide this argument."
121 print "For zoned clusters provide this argument with at least two zones."
123 if (args
.nodes
% zones
) != 0:
124 print "Number of nodes must be evenly divisible by number of zones"
129 hostList
= open(args
.file).readlines()
130 nodes
= len(hostList
)
133 partitions
= args
.partitions
135 http_port
= args
.http_port
136 sock_port
= args
.sock_port
137 admin_port
= args
.admin_port
139 vold_home
= os
.pardir
140 current_stores
= os
.path
.join(vold_home
, args
.current_stores
);
142 # Generate the full list of partition IDs
143 part_ids
= range(partitions
)
145 print 'Warning : The number of partitions seems to be low. Assuming max of 3 zones and 50 nodes ' \
146 'per zone, a partition value of 1500 is recommended as it ensures an average of 10 ' \
147 'partitions per node.'
148 print 'Warning : The number of partitions seems to be low. Recommended value is 1500 or more.'
150 # Generate full list of zone IDs
152 zone_ids
= range(zones
)
155 # Shuffle up the partitions
157 random
.shuffle(part_ids
)
159 # Printing cluster.xml
160 print >> fileHandle
, "<cluster>"
161 print >> fileHandle
, " <name>%s</name>" % name
164 for i
in range(args
.zones
):
165 print >> fileHandle
, " <zone>"
166 print >> fileHandle
, " <zone-id>%d</zone-id>" % i
167 proximityList
= list()
168 for j
in range(1, len(zone_ids
) ):
169 proximityList
.append(zone_ids
[(i
+j
)%len(zone_ids
)])
170 print >> fileHandle
, " <proximity-list>%s</proximity-list>" % str(proximityList
).strip('[]')
171 print >> fileHandle
, " </zone>"
173 # TODO : Currently, random partitions are assigned to the nodes in a round robin fashion.
174 # A better approach would be to have some intelligence in the allocation such that
175 # consecutive partition-ids do not land on the same node.
177 for i
in xrange(nodes
):
179 node_partitions
= list()
180 while j
< len(part_ids
):
181 node_partitions
.append(str(part_ids
[j
]))
183 partitionslist
= ", ".join(node_partitions
);
185 print >> fileHandle
, " <server>"
186 print >> fileHandle
, " <id>%d</id>" % i
188 print >> fileHandle
, " <host>%s</host>" % hostList
[i
].strip()
190 print >> fileHandle
, " <host>host%d</host>" % i
191 print >> fileHandle
, " <http-port>%d</http-port>" % http_port
192 print >> fileHandle
, " <socket-port>%d</socket-port>" % sock_port
193 print >> fileHandle
, " <admin-port>%d</admin-port>" % admin_port
194 print >> fileHandle
, " <partitions>%s</partitions>" % partitionslist
195 # If zones are being used, assign a zone-id
197 print >> fileHandle
, " <zone-id>%d</zone-id>" % zone_id
198 if zone_id
== (zones
- 1):
202 print >> fileHandle
, " </server>"
203 print >> fileHandle
, "</cluster>"
207 # For zoned clusters call rebalance-new-cluster.sh
209 scriptPath
= vold_home
+ '/bin/rebalance-new-zoned-cluster.sh'
210 cmd
= [scriptPath
, '-v', vold_home
, '-c', clusterXMLFilePath
, '-s', current_stores
,
211 '-o', os
.path
.abspath(args
.output_dir
)]