Giant awful patch to enable new object monitoring
[MogileFS-Server.git] / lib / MogileFS / Worker / JobMaster.pm
blobb9c2f66528653cf7cc6ea3ac78dccfbe28bcd167
1 package MogileFS::Worker::JobMaster;
2 # manages/monitors the internal queues for various workers.
3 # decided to have one of these per tracker instead of have workers
4 # all elect one per job type... should be able to reuse more code, and avoid
5 # relying on too many database locks.
7 use strict;
8 use base 'MogileFS::Worker';
9 use fields (
10 'fsck_queue_limit',
11 'repl_queue_limit',
12 'dele_queue_limit',
13 'rebl_queue_limit',
15 use MogileFS::Util qw(every error debug encode_url_args);
16 use MogileFS::Config;
18 use constant DEF_FSCK_QUEUE_MAX => 20_000;
19 use constant DEF_FSCK_QUEUE_INJECT => 1000;
21 use constant DEF_REBAL_QUEUE_MAX => 10_000;
22 use constant DEF_REBAL_QUEUE_INJECT => 500;
24 sub new {
25 my ($class, $psock) = @_;
26 my $self = fields::new($class);
27 $self->SUPER::new($psock);
29 return $self;
32 sub watchdog_timeout { 120; }
34 # heartbeat all of the queues constantly.
35 # if a queue drops below a watermark, check for more work.
36 # NOTE: Uh. now that I think about it, should queue_check just return
37 # the status for all queues in one roundtrip? :(
38 # It's separate in case future workers want to manage their own queues, or
39 # this gets split up...
40 sub work {
41 my $self = shift;
43 $self->{fsck_queue_limit} = 100;
44 $self->{repl_queue_limit} = 100;
45 $self->{dele_queue_limit} = 100;
46 $self->{rebl_queue_limit} = 100;
48 every(1, sub {
49 # 'pings' parent and populates all queues.
50 $self->send_to_parent("queue_depth all");
51 $self->validate_dbh;
52 my $sto = Mgd::get_store();
53 $self->read_from_parent(1);
54 my $active = 0;
55 $active += $self->_check_replicate_queues($sto);
56 $active += $self->_check_delete_queues($sto);
57 $active += $self->_check_fsck_queues($sto);
58 $active += $self->_check_rebal_queues($sto);
59 $_[0]->(0) if $active;
60 });
63 sub _check_delete_queues {
64 my $self = shift;
65 my $sto = shift;
66 my ($need_fetch, $new_limit) =
67 queue_depth_check($self->queue_depth('delete'),
68 $self->{dele_queue_limit});
69 return unless $need_fetch;
70 my @to_del = $sto->grab_files_to_delete2($new_limit);
71 $self->{dele_queue_limit} = @to_del ? $new_limit : 100;
72 return unless @to_del;
73 for my $todo (@to_del) {
74 $self->send_to_parent("queue_todo delete " .
75 encode_url_args($todo));
77 return 1;
80 # NOTE: we only maintain one queue per worker, but we can easily
81 # give specialized work per worker by tagging the $todo href.
82 # in the case of replication, we want a normal "replication" queue,
83 # but also "drain" and "rebalance" queues. So use $todo->{type} or something.
84 # Drain/rebalance will be way awesomer with a queue attached:
85 # "drain 5% of devid 5" or "drain 10G off devids 7,8,9"
86 # hell, drain barely works if you encounter errors. Using a work queue
87 # should fix that.
88 # FIXME: Don't hardcode the min queue depth.
89 sub _check_replicate_queues {
90 my $self = shift;
91 my $sto = shift;
92 my ($need_fetch, $new_limit) =
93 queue_depth_check($self->queue_depth('replicate'),
94 $self->{repl_queue_limit});
95 return unless $need_fetch;
96 my @to_repl = $sto->grab_files_to_replicate($new_limit);
97 $self->{repl_queue_limit} = @to_repl ? $new_limit : 100;
98 return unless @to_repl;
99 # don't need to shuffle or sort, since we're the only tracker to get this
100 # list.
101 for my $todo (@to_repl) {
102 $todo->{_type} = 'replicate'; # could be 'drain', etc.
103 $self->send_to_parent("queue_todo replicate " .
104 encode_url_args($todo));
106 return 1;
109 # FSCK is going to be a little odd... We still need a single "global"
110 # fsck worker to do the queue injection, but need to locally poll data.
111 sub _check_fsck_queues {
112 my $self = shift;
113 my $sto = shift;
114 my $fhost = MogileFS::Config->server_setting('fsck_host');
115 if ($fhost && $fhost eq MogileFS::Config->hostname) {
116 $self->_inject_fsck_queues($sto);
119 # Queue depth algorithm:
120 # if internal queue is less than 30% full, fetch more.
121 # if internal queue bottomed out, increase fetch limit by 50.
122 # fetch more work
123 # if no work fetched, reset limit to 100 (default)
124 my ($need_fetch, $new_limit) =
125 queue_depth_check($self->queue_depth('fsck'),
126 $self->{fsck_queue_limit});
127 return unless $need_fetch;
128 my @to_fsck = $sto->grab_files_to_queued(FSCK_QUEUE,
129 'type, flags', $new_limit);
130 $self->{fsck_queue_limit} = @to_fsck ? $new_limit : 100;
131 return unless @to_fsck;
132 for my $todo (@to_fsck) {
133 $self->send_to_parent("queue_todo fsck " . encode_url_args($todo));
135 return 1;
138 sub _inject_fsck_queues {
139 my $self = shift;
140 my $sto = shift;
142 $sto->fsck_log_summarize;
143 my $queue_size = $sto->file_queue_length(FSCK_QUEUE);
144 my $max_queue =
145 MogileFS::Config->server_setting_cached('queue_size_for_fsck', 60) ||
146 DEF_FSCK_QUEUE_MAX;
147 return if ($queue_size >= $max_queue);
149 my $max_checked = MogileFS::Config->server_setting('fsck_highest_fid_checked') || 0;
150 my $to_inject =
151 MogileFS::Config->server_setting_cached('queue_rate_for_fsck', 60) ||
152 DEF_FSCK_QUEUE_INJECT;
153 my $fids = $sto->get_fidids_above_id($max_checked, $to_inject);
154 unless (@$fids) {
155 $sto->set_server_setting("fsck_host", undef);
156 $sto->set_server_setting("fsck_stop_time", $sto->get_db_unixtime);
157 MogileFS::Config->set_server_setting('fsck_highest_fid_checked',
158 $max_checked);
159 return;
162 $sto->enqueue_many_for_todo($fids, FSCK_QUEUE, 0);
164 my $nmax = $fids->[-1];
165 MogileFS::Config->set_server_setting('fsck_highest_fid_checked', $nmax);
168 sub _check_rebal_queues {
169 my $self = shift;
170 my $sto = shift;
171 my $rhost = MogileFS::Config->server_setting('rebal_host');
172 if ($rhost && $rhost eq MogileFS::Config->hostname) {
173 $self->_inject_rebalance_queues($sto);
176 my ($need_fetch, $new_limit) =
177 queue_depth_check($self->queue_depth('rebalance'),
178 $self->{rebl_queue_limit});
179 return unless $need_fetch;
180 my @to_rebal = $sto->grab_files_to_queued(REBAL_QUEUE,
181 'type, flags, devid, arg', $new_limit);
182 $self->{rebl_queue_limit} = @to_rebal ? $new_limit : 100;
183 return unless @to_rebal;
184 for my $todo (@to_rebal) {
185 $todo->{_type} = 'rebalance';
186 $self->send_to_parent("queue_todo rebalance " . encode_url_args($todo));
188 return 1;
191 sub _inject_rebalance_queues {
192 my $self = shift;
193 my $sto = shift;
195 my $queue_size = $sto->file_queue_length(REBAL_QUEUE);
196 my $max_queue =
197 MogileFS::Config->server_setting_cached('queue_size_for_rebal', 60) ||
198 DEF_REBAL_QUEUE_MAX;
199 return if ($queue_size >= $max_queue);
201 my $to_inject =
202 MogileFS::Config->server_setting_cached('queue_rate_for_rebal', 60) ||
203 DEF_REBAL_QUEUE_INJECT;
205 # TODO: Cache the rebal object. Requires explicitly blowing it up at the
206 # end of a run or ... I guess whenever the host sees it's not the rebal
207 # host.
208 my $rebal = MogileFS::Rebalance->new;
209 my $signal = MogileFS::Config->server_setting('rebal_signal');
210 my $rebal_pol = MogileFS::Config->server_setting('rebal_policy');
211 my $rebal_state = MogileFS::Config->server_setting('rebal_state');
212 $rebal->policy($rebal_pol);
214 my @devs = MogileFS::Device->devices;
215 if ($rebal_state) {
216 $rebal->load_state($rebal_state);
217 } else {
218 $rebal->init(\@devs);
221 # Stopping is done via signal so we can note stop time in the state,
222 # and un-drain any devices that should be un-drained.
223 if ($signal && $signal eq 'stop') {
224 $rebal->stop;
225 $rebal_state = $rebal->save_state;
226 $sto->set_server_setting('rebal_signal', undef);
227 $sto->set_server_setting("rebal_host", undef);
228 $sto->set_server_setting('rebal_state', $rebal_state);
229 return;
232 my $devfids = $rebal->next_fids_to_rebalance(\@devs, $sto, $to_inject);
234 # undefined means there's no work left.
235 if (! defined $devfids) {
236 # Append some info to a rebalance log table?
237 # Leave state in the system for inspection post-run.
238 # TODO: Emit some sort of syslog/status line.
239 $rebal->finish;
240 $rebal_state = $rebal->save_state;
241 $sto->set_server_setting('rebal_state', $rebal_state);
242 $sto->set_server_setting("rebal_host", undef);
243 return;
246 # Empty means nothing to queue this round.
247 if (@$devfids) {
248 # I wish there was less data serialization in the world.
249 map { $_->[2] = join(',', @{$_->[2]}) } @$devfids;
250 $sto->enqueue_many_for_todo($devfids, REBAL_QUEUE, 0);
253 $rebal_state = $rebal->save_state;
254 MogileFS::Config->set_server_setting("rebal_state", $rebal_state);
257 # takes the current queue depth and fetch limit
258 # returns whether or not to fetch, and new fetch limit.
259 # TODO: separate a fetch limit from a queue limit...
260 # so we don't hammer the DB with giant transactions, but loop
261 # fast trying to keep the queue full.
262 sub queue_depth_check {
263 my $max_limit =
264 MogileFS::Config->server_setting_cached('internal_queue_limit', 120)
265 || 500;
267 my ($depth, $limit) = @_;
268 if ($depth == 0) {
269 $limit += 50 unless $limit >= $max_limit;
270 return (1, $limit);
271 } elsif ($depth / $limit < 0.70) {
272 return (1, $limit);
274 return (0, $limit);
279 # Local Variables:
280 # mode: perl
281 # c-basic-indent: 4
282 # indent-tabs-mode: nil
283 # End: