Checking in changes prior to tagging of version 2.73.
[MogileFS-Server.git] / lib / MogileFS / Worker / JobMaster.pm
blobd70eb395d720e7a44cfcc036bb820687afa57db0
1 package MogileFS::Worker::JobMaster;
2 # manages/monitors the internal queues for various workers.
3 # decided to have one of these per tracker instead of have workers
4 # all elect one per job type... should be able to reuse more code, and avoid
5 # relying on too many database locks.
7 use strict;
8 use base 'MogileFS::Worker';
9 use fields (
10 'fsck_queue_limit',
11 'repl_queue_limit',
12 'dele_queue_limit',
13 'rebl_queue_limit',
15 use MogileFS::Util qw(every error debug encode_url_args);
16 use MogileFS::Config;
17 use MogileFS::Server;
19 use constant DEF_FSCK_QUEUE_MAX => 20_000;
20 use constant DEF_FSCK_QUEUE_INJECT => 1000;
22 use constant DEF_REBAL_QUEUE_MAX => 10_000;
23 use constant DEF_REBAL_QUEUE_INJECT => 500;
25 sub new {
26 my ($class, $psock) = @_;
27 my $self = fields::new($class);
28 $self->SUPER::new($psock);
30 return $self;
33 sub watchdog_timeout { 120; }
35 # heartbeat all of the queues constantly.
36 # if a queue drops below a watermark, check for more work.
37 # NOTE: Uh. now that I think about it, should queue_check just return
38 # the status for all queues in one roundtrip? :(
39 # It's separate in case future workers want to manage their own queues, or
40 # this gets split up...
41 sub work {
42 my $self = shift;
44 $self->{fsck_queue_limit} = 100;
45 $self->{repl_queue_limit} = 100;
46 $self->{dele_queue_limit} = 100;
47 $self->{rebl_queue_limit} = 100;
49 Danga::Socket->AddOtherFds($self->psock_fd, sub{ $self->read_from_parent });
51 # kick off the initial run
52 $self->check_queues;
53 Danga::Socket->EventLoop;
56 # 'pings' parent and populates all queues.
57 sub check_queues {
58 my $self = shift;
60 my $active = 0;
61 if ($self->validate_dbh) {
62 $self->send_to_parent("queue_depth all");
63 my $sto = Mgd::get_store();
64 $self->parent_ping;
65 $active += $self->_check_replicate_queues($sto);
66 $active += $self->_check_delete_queues($sto);
67 $active += $self->_check_fsck_queues($sto);
68 $active += $self->_check_rebal_queues($sto);
71 # don't sleep if active (just avoid recursion)
72 Danga::Socket->AddTimer($active ? 0 : 1, sub { $self->check_queues });
75 sub _check_delete_queues {
76 my $self = shift;
77 my $sto = shift;
78 my ($need_fetch, $new_limit) =
79 queue_depth_check($self->queue_depth('delete'),
80 $self->{dele_queue_limit});
81 return unless $need_fetch;
82 my @to_del = $sto->grab_files_to_delete2($new_limit);
83 $self->{dele_queue_limit} = @to_del ? $new_limit : 100;
84 return unless @to_del;
85 for my $todo (@to_del) {
86 $self->send_to_parent("queue_todo delete " .
87 encode_url_args($todo));
89 return 1;
92 # NOTE: we only maintain one queue per worker, but we can easily
93 # give specialized work per worker by tagging the $todo href.
94 # in the case of replication, we want a normal "replication" queue,
95 # but also "drain" and "rebalance" queues. So use $todo->{type} or something.
96 # Drain/rebalance will be way awesomer with a queue attached:
97 # "drain 5% of devid 5" or "drain 10G off devids 7,8,9"
98 # hell, drain barely works if you encounter errors. Using a work queue
99 # should fix that.
100 # FIXME: Don't hardcode the min queue depth.
101 sub _check_replicate_queues {
102 my $self = shift;
103 my $sto = shift;
104 my ($need_fetch, $new_limit) =
105 queue_depth_check($self->queue_depth('replicate'),
106 $self->{repl_queue_limit});
107 return unless $need_fetch;
108 my @to_repl = $sto->grab_files_to_replicate($new_limit);
109 $self->{repl_queue_limit} = @to_repl ? $new_limit : 100;
110 return unless @to_repl;
111 # don't need to shuffle or sort, since we're the only tracker to get this
112 # list.
113 for my $todo (@to_repl) {
114 $todo->{_type} = 'replicate'; # could be 'drain', etc.
115 $self->send_to_parent("queue_todo replicate " .
116 encode_url_args($todo));
118 return 1;
121 # FSCK is going to be a little odd... We still need a single "global"
122 # fsck worker to do the queue injection, but need to locally poll data.
123 sub _check_fsck_queues {
124 my $self = shift;
125 my $sto = shift;
126 my $fhost = MogileFS::Config->server_setting_cached('fsck_host');
127 if ($fhost && $fhost eq MogileFS::Config->hostname) {
128 $self->_inject_fsck_queues($sto);
131 # Queue depth algorithm:
132 # if internal queue is less than 30% full, fetch more.
133 # if internal queue bottomed out, increase fetch limit by 50.
134 # fetch more work
135 # if no work fetched, reset limit to 100 (default)
136 my ($need_fetch, $new_limit) =
137 queue_depth_check($self->queue_depth('fsck'),
138 $self->{fsck_queue_limit});
139 return unless $need_fetch;
140 my @to_fsck = $sto->grab_files_to_queued(FSCK_QUEUE,
141 'type, flags', $new_limit);
142 $self->{fsck_queue_limit} = @to_fsck ? $new_limit : 100;
143 return unless @to_fsck;
144 for my $todo (@to_fsck) {
145 $self->send_to_parent("queue_todo fsck " . encode_url_args($todo));
147 return 1;
150 sub _inject_fsck_queues {
151 my $self = shift;
152 my $sto = shift;
154 $sto->fsck_log_summarize;
155 my $queue_size = $sto->file_queue_length(FSCK_QUEUE);
156 my $max_queue =
157 MogileFS::Config->server_setting_cached('queue_size_for_fsck') ||
158 DEF_FSCK_QUEUE_MAX;
159 return if ($queue_size >= $max_queue);
161 my $max_checked = MogileFS::Config->server_setting('fsck_highest_fid_checked') || 0;
162 my $fid_at_end = MogileFS::Config->server_setting('fsck_fid_at_end');
163 my $to_inject =
164 MogileFS::Config->server_setting_cached('queue_rate_for_fsck') ||
165 DEF_FSCK_QUEUE_INJECT;
166 my $fids = $sto->get_fidids_between($max_checked, $fid_at_end, $to_inject);
167 unless (@$fids) {
168 MogileFS::Config->set_server_setting('fsck_highest_fid_checked',
169 $max_checked);
171 # set these last since tests/scripts may rely on these to
172 # determine when fsck (injection) is complete
173 $sto->set_server_setting("fsck_host", undef);
174 $sto->set_server_setting("fsck_stop_time", $sto->get_db_unixtime);
175 return;
178 $sto->enqueue_many_for_todo($fids, FSCK_QUEUE, 0);
180 my $nmax = $fids->[-1];
181 MogileFS::Config->set_server_setting('fsck_highest_fid_checked', $nmax);
184 sub _check_rebal_queues {
185 my $self = shift;
186 my $sto = shift;
187 my $rhost = MogileFS::Config->server_setting_cached('rebal_host');
188 if ($rhost && $rhost eq MogileFS::Config->hostname) {
189 $self->_inject_rebalance_queues($sto);
192 my ($need_fetch, $new_limit) =
193 queue_depth_check($self->queue_depth('rebalance'),
194 $self->{rebl_queue_limit});
195 return unless $need_fetch;
196 my @to_rebal = $sto->grab_files_to_queued(REBAL_QUEUE,
197 'type, flags, devid, arg', $new_limit);
198 $self->{rebl_queue_limit} = @to_rebal ? $new_limit : 100;
199 return unless @to_rebal;
200 for my $todo (@to_rebal) {
201 $todo->{_type} = 'rebalance';
202 $self->send_to_parent("queue_todo rebalance " . encode_url_args($todo));
204 return 1;
207 sub _inject_rebalance_queues {
208 my $self = shift;
209 my $sto = shift;
211 my $queue_size = $sto->file_queue_length(REBAL_QUEUE);
212 my $max_queue =
213 MogileFS::Config->server_setting_cached('queue_size_for_rebal') ||
214 DEF_REBAL_QUEUE_MAX;
215 return if ($queue_size >= $max_queue);
217 my $to_inject =
218 MogileFS::Config->server_setting_cached('queue_rate_for_rebal') ||
219 DEF_REBAL_QUEUE_INJECT;
221 # TODO: Cache the rebal object. Requires explicitly blowing it up at the
222 # end of a run or ... I guess whenever the host sees it's not the rebal
223 # host.
224 my $rebal = MogileFS::Rebalance->new;
225 my $signal = MogileFS::Config->server_setting('rebal_signal');
226 my $rebal_pol = MogileFS::Config->server_setting('rebal_policy');
227 my $rebal_state = MogileFS::Config->server_setting('rebal_state');
228 $rebal->policy($rebal_pol);
230 my @devs = Mgd::device_factory()->get_all;
231 if ($rebal_state) {
232 $rebal->load_state($rebal_state);
233 } else {
234 $rebal->init(\@devs);
237 # Stopping is done via signal so we can note stop time in the state,
238 # and un-drain any devices that should be un-drained.
239 if ($signal && $signal eq 'stop') {
240 $rebal->stop;
241 $rebal_state = $rebal->save_state;
242 $sto->set_server_setting('rebal_signal', undef);
243 $sto->set_server_setting("rebal_host", undef);
244 $sto->set_server_setting('rebal_state', $rebal_state);
245 return;
248 my $devfids = $rebal->next_fids_to_rebalance(\@devs, $sto, $to_inject);
250 # undefined means there's no work left.
251 if (! defined $devfids) {
252 # Append some info to a rebalance log table?
253 # Leave state in the system for inspection post-run.
254 # TODO: Emit some sort of syslog/status line.
255 $rebal->finish;
256 $rebal_state = $rebal->save_state;
257 $sto->set_server_setting('rebal_state', $rebal_state);
258 $sto->set_server_setting("rebal_host", undef);
259 return;
262 # Empty means nothing to queue this round.
263 if (@$devfids) {
264 # I wish there was less data serialization in the world.
265 map { $_->[2] = join(',', @{$_->[2]}) } @$devfids;
266 $sto->enqueue_many_for_todo($devfids, REBAL_QUEUE, 0);
269 $rebal_state = $rebal->save_state;
270 MogileFS::Config->set_server_setting("rebal_state", $rebal_state);
273 # takes the current queue depth and fetch limit
274 # returns whether or not to fetch, and new fetch limit.
275 # TODO: separate a fetch limit from a queue limit...
276 # so we don't hammer the DB with giant transactions, but loop
277 # fast trying to keep the queue full.
278 sub queue_depth_check {
279 my $max_limit =
280 MogileFS::Config->server_setting_cached('internal_queue_limit')
281 || 500;
283 my ($depth, $limit) = @_;
284 if ($depth == 0) {
285 $limit += 50 unless $limit >= $max_limit;
286 return (1, $limit);
287 } elsif ($depth / $limit < 0.70) {
288 return (1, $limit);
290 return (0, $limit);
295 # Local Variables:
296 # mode: perl
297 # c-basic-indent: 4
298 # indent-tabs-mode: nil
299 # End: