MDL-63303 message: fix bugs in message drawer
[moodle.git] / analytics / tests / prediction_test.php
blob34d82f7ab3d4f199bddc142001e3532fa264071c
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * Unit tests for evaluation, training and prediction.
20 * @package core_analytics
21 * @copyright 2017 David MonllaĆ³ {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 defined('MOODLE_INTERNAL') || die();
27 global $CFG;
28 require_once(__DIR__ . '/fixtures/test_indicator_max.php');
29 require_once(__DIR__ . '/fixtures/test_indicator_min.php');
30 require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
31 require_once(__DIR__ . '/fixtures/test_indicator_random.php');
32 require_once(__DIR__ . '/fixtures/test_target_shortname.php');
33 require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
35 require_once(__DIR__ . '/../../course/lib.php');
37 /**
38 * Unit tests for evaluation, training and prediction.
40 * @package core_analytics
41 * @copyright 2017 David MonllaĆ³ {@link http://www.davidmonllao.com}
42 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
44 class core_analytics_prediction_testcase extends advanced_testcase {
46 /**
47 * test_static_prediction
49 * @return void
51 public function test_static_prediction() {
52 global $DB;
54 $this->resetAfterTest(true);
55 $this->setAdminuser();
57 $model = $this->add_perfect_model('test_static_target_shortname');
58 $model->enable('\core\analytics\time_splitting\no_splitting');
59 $this->assertEquals(1, $model->is_enabled());
60 $this->assertEquals(1, $model->is_trained());
62 // No training for static models.
63 $results = $model->train();
64 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
65 $this->assertEmpty($trainedsamples);
66 $this->assertEmpty($DB->count_records('analytics_used_files',
67 array('modelid' => $model->get_id(), 'action' => 'trained')));
69 // Now we create 2 hidden courses (only hidden courses are getting predictions).
70 $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
71 $course1 = $this->getDataGenerator()->create_course($courseparams);
72 $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
73 $course2 = $this->getDataGenerator()->create_course($courseparams);
75 $result = $model->predict();
77 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
78 $correct = array($course1->id => 1, $course2->id => 0);
79 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
80 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
82 // The range index is not important here, both ranges prediction will be the same.
83 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
86 // 1 range for each analysable.
87 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
88 $this->assertCount(2, $predictedranges);
89 $this->assertEquals(1, $DB->count_records('analytics_used_files',
90 array('modelid' => $model->get_id(), 'action' => 'predicted')));
91 // 2 predictions for each range.
92 $this->assertEquals(2, $DB->count_records('analytics_predictions',
93 array('modelid' => $model->get_id())));
95 // No new generated files nor records as there are no new courses available.
96 $model->predict();
97 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
98 $this->assertCount(2, $predictedranges);
99 $this->assertEquals(1, $DB->count_records('analytics_used_files',
100 array('modelid' => $model->get_id(), 'action' => 'predicted')));
101 $this->assertEquals(2, $DB->count_records('analytics_predictions',
102 array('modelid' => $model->get_id())));
106 * test_ml_training_and_prediction
108 * @dataProvider provider_ml_training_and_prediction
109 * @param string $timesplittingid
110 * @param int $predictedrangeindex
111 * @param int $nranges
112 * @param string $predictionsprocessorclass
113 * @return void
115 public function test_ml_training_and_prediction($timesplittingid, $predictedrangeindex, $nranges, $predictionsprocessorclass) {
116 global $DB;
118 $this->resetAfterTest(true);
119 $this->setAdminuser();
120 set_config('enabled_stores', 'logstore_standard', 'tool_log');
122 $ncourses = 10;
124 // Generate training data.
125 $params = array(
126 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
127 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
129 for ($i = 0; $i < $ncourses; $i++) {
130 $name = 'a' . random_string(10);
131 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
132 $this->getDataGenerator()->create_course($courseparams);
134 for ($i = 0; $i < $ncourses; $i++) {
135 $name = 'b' . random_string(10);
136 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
137 $this->getDataGenerator()->create_course($courseparams);
140 // We repeat the test for all prediction processors.
141 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
142 if ($predictionsprocessor->is_ready() !== true) {
143 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
146 $model = $this->add_perfect_model();
147 $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
149 // No samples trained yet.
150 $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
152 $results = $model->train();
153 $this->assertEquals(1, $model->is_enabled());
154 $this->assertEquals(1, $model->is_trained());
156 // 20 courses * the 3 model indicators * the number of time ranges of this time splitting method.
157 $indicatorcalc = 20 * 3 * $nranges;
158 $this->assertEquals($indicatorcalc, $DB->count_records('analytics_indicator_calc'));
160 // 1 training file was created.
161 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
162 $this->assertCount(1, $trainedsamples);
163 $samples = json_decode(reset($trainedsamples)->sampleids, true);
164 $this->assertCount($ncourses * 2, $samples);
165 $this->assertEquals(1, $DB->count_records('analytics_used_files',
166 array('modelid' => $model->get_id(), 'action' => 'trained')));
167 // Check that analysable files for training are stored under labelled filearea.
168 $fs = get_file_storage();
169 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
170 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
171 $this->assertEmpty($fs->get_directory_files(\context_system::instance()->id, 'analytics',
172 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
174 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
175 $course1 = $this->getDataGenerator()->create_course($courseparams);
176 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
177 $course2 = $this->getDataGenerator()->create_course($courseparams);
179 // They will not be skipped for prediction though.
180 $result = $model->predict();
182 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
183 $correct = array($course1->id => 1, $course2->id => 0);
184 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
185 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
187 // The range index is not important here, both ranges prediction will be the same.
188 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
191 // 1 range will be predicted.
192 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
193 $this->assertCount(1, $predictedranges);
194 foreach ($predictedranges as $predictedrange) {
195 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
196 $sampleids = json_decode($predictedrange->sampleids, true);
197 $this->assertCount(2, $sampleids);
198 $this->assertContains($course1->id, $sampleids);
199 $this->assertContains($course2->id, $sampleids);
201 $this->assertEquals(1, $DB->count_records('analytics_used_files',
202 array('modelid' => $model->get_id(), 'action' => 'predicted')));
203 // 2 predictions.
204 $this->assertEquals(2, $DB->count_records('analytics_predictions',
205 array('modelid' => $model->get_id())));
207 // Check that analysable files to get predictions are stored under unlabelled filearea.
208 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
209 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
210 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
211 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
213 // No new generated files nor records as there are no new courses available.
214 $model->predict();
215 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
216 $this->assertCount(1, $predictedranges);
217 foreach ($predictedranges as $predictedrange) {
218 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
220 $this->assertEquals(1, $DB->count_records('analytics_used_files',
221 array('modelid' => $model->get_id(), 'action' => 'predicted')));
222 $this->assertEquals(2, $DB->count_records('analytics_predictions',
223 array('modelid' => $model->get_id())));
225 // New samples that can be used for prediction.
226 $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
227 $course3 = $this->getDataGenerator()->create_course($courseparams);
228 $courseparams = $params + array('shortname' => 'dddddd', 'fullname' => 'dddddd', 'visible' => 0);
229 $course4 = $this->getDataGenerator()->create_course($courseparams);
231 $result = $model->predict();
233 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
234 $this->assertCount(1, $predictedranges);
235 foreach ($predictedranges as $predictedrange) {
236 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
237 $sampleids = json_decode($predictedrange->sampleids, true);
238 $this->assertCount(4, $sampleids);
239 $this->assertContains($course1->id, $sampleids);
240 $this->assertContains($course2->id, $sampleids);
241 $this->assertContains($course3->id, $sampleids);
242 $this->assertContains($course4->id, $sampleids);
244 $this->assertEquals(2, $DB->count_records('analytics_used_files',
245 array('modelid' => $model->get_id(), 'action' => 'predicted')));
246 $this->assertEquals(4, $DB->count_records('analytics_predictions',
247 array('modelid' => $model->get_id())));
248 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
249 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
250 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
251 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
253 // New visible course (for training).
254 $course5 = $this->getDataGenerator()->create_course(array('shortname' => 'aaa', 'fullname' => 'aa'));
255 $course6 = $this->getDataGenerator()->create_course();
256 $result = $model->train();
257 $this->assertEquals(2, $DB->count_records('analytics_used_files',
258 array('modelid' => $model->get_id(), 'action' => 'trained')));
259 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
260 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
261 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
262 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
264 set_config('enabled_stores', '', 'tool_log');
265 get_log_manager(true);
269 * provider_ml_training_and_prediction
271 * @return array
273 public function provider_ml_training_and_prediction() {
274 $cases = array(
275 'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 0, 1),
276 'quarters' => array('\core\analytics\time_splitting\quarters', 3, 4)
279 // We need to test all system prediction processors.
280 return $this->add_prediction_processors($cases);
284 * Test the system classifiers returns.
286 * This test checks that all mlbackend plugins in the system are able to return proper status codes
287 * even under weird situations.
289 * @dataProvider provider_ml_classifiers_return
290 * @param int $success
291 * @param int $nsamples
292 * @param int $classes
293 * @param string $predictionsprocessorclass
294 * @return void
296 public function test_ml_classifiers_return($success, $nsamples, $classes, $predictionsprocessorclass) {
297 $this->resetAfterTest();
299 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
300 if ($predictionsprocessor->is_ready() !== true) {
301 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
304 if ($nsamples % count($classes) != 0) {
305 throw new \coding_exception('The number of samples should be divisible by the number of classes');
307 $samplesperclass = $nsamples / count($classes);
309 // Metadata (we pass 2 classes even if $classes only provides 1 class samples as we want to test
310 // what the backend does in this case.
311 $dataset = "nfeatures,targetclasses,targettype" . PHP_EOL;
312 $dataset .= "3,\"[0,1]\",\"discrete\"" . PHP_EOL;
314 // Headers.
315 $dataset .= "feature1,feature2,feature3,target" . PHP_EOL;
316 foreach ($classes as $class) {
317 for ($i = 0; $i < $samplesperclass; $i++) {
318 $dataset .= "1,0,1,$class" . PHP_EOL;
322 $trainingfile = array(
323 'contextid' => \context_system::instance()->id,
324 'component' => 'analytics',
325 'filearea' => 'labelled',
326 'itemid' => 123,
327 'filepath' => '/',
328 'filename' => 'whocares.csv'
330 $fs = get_file_storage();
331 $dataset = $fs->create_file_from_string($trainingfile, $dataset);
333 // Training should work correctly if at least 1 sample of each class is included.
334 $dir = make_request_directory();
335 $result = $predictionsprocessor->train_classification('whatever', $dataset, $dir);
337 switch ($success) {
338 case 'yes':
339 $this->assertEquals(\core_analytics\model::OK, $result->status);
340 break;
341 case 'no':
342 $this->assertNotEquals(\core_analytics\model::OK, $result->status);
343 break;
344 case 'maybe':
345 default:
346 // We just check that an object is returned so we don't have an empty check,
347 // what we really want to check is that an exception was not thrown.
348 $this->assertInstanceOf(\stdClass::class, $result);
353 * test_ml_classifiers_return provider
355 * We can not be very specific here as test_ml_classifiers_return only checks that
356 * mlbackend plugins behave and expected and control properly backend errors even
357 * under weird situations.
359 * @return array
361 public function provider_ml_classifiers_return() {
362 // Using verbose options as the first argument for readability.
363 $cases = array(
364 '1-samples' => array('maybe', 1, [0]),
365 '2-samples-same-class' => array('maybe', 2, [0]),
366 '2-samples-different-classes' => array('yes', 2, [0, 1]),
367 '4-samples-different-classes' => array('yes', 4, [0, 1])
370 // We need to test all system prediction processors.
371 return $this->add_prediction_processors($cases);
375 * Basic test to check that prediction processors work as expected.
377 * @dataProvider provider_ml_test_evaluation
378 * @param string $modelquality
379 * @param int $ncourses
380 * @param array $expected
381 * @param string $predictionsprocessorclass
382 * @return void
384 public function test_ml_evaluation($modelquality, $ncourses, $expected, $predictionsprocessorclass) {
385 $this->resetAfterTest(true);
386 $this->setAdminuser();
387 set_config('enabled_stores', 'logstore_standard', 'tool_log');
389 $sometimesplittings = '\core\analytics\time_splitting\weekly,' .
390 '\core\analytics\time_splitting\single_range,' .
391 '\core\analytics\time_splitting\quarters';
392 set_config('timesplittings', $sometimesplittings, 'analytics');
394 if ($modelquality === 'perfect') {
395 $model = $this->add_perfect_model();
396 } else if ($modelquality === 'random') {
397 $model = $this->add_random_model();
398 } else {
399 throw new \coding_exception('Only perfect and random accepted as $modelquality values');
402 // Generate training data.
403 $params = array(
404 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
405 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
407 for ($i = 0; $i < $ncourses; $i++) {
408 $name = 'a' . random_string(10);
409 $params = array('shortname' => $name, 'fullname' => $name) + $params;
410 $this->getDataGenerator()->create_course($params);
412 for ($i = 0; $i < $ncourses; $i++) {
413 $name = 'b' . random_string(10);
414 $params = array('shortname' => $name, 'fullname' => $name) + $params;
415 $this->getDataGenerator()->create_course($params);
418 // We repeat the test for all prediction processors.
419 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
420 if ($predictionsprocessor->is_ready() !== true) {
421 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
424 $model->update(false, false, false, get_class($predictionsprocessor));
425 $results = $model->evaluate();
427 // We check that the returned status includes at least $expectedcode code.
428 foreach ($results as $timesplitting => $result) {
429 $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
430 $filtered = $result->status & $expected[$timesplitting];
431 $this->assertEquals($expected[$timesplitting], $filtered, $message);
434 set_config('enabled_stores', '', 'tool_log');
435 get_log_manager(true);
439 * test_read_indicator_calculations
441 * @return void
443 public function test_read_indicator_calculations() {
444 global $DB;
446 $this->resetAfterTest(true);
448 $starttime = 123;
449 $endtime = 321;
450 $sampleorigin = 'whatever';
452 $indicator = $this->getMockBuilder('test_indicator_max')->setMethods(['calculate_sample'])->getMock();
453 $indicator->expects($this->never())->method('calculate_sample');
455 $existingcalcs = array(111 => 1, 222 => 0.5);
456 $sampleids = array(111 => 111, 222 => 222);
457 list($values, $unused) = $indicator->calculate($sampleids, $sampleorigin, $starttime, $endtime, $existingcalcs);
461 * test_not_null_samples
463 public function test_not_null_samples() {
464 $this->resetAfterTest(true);
466 $classname = '\core\analytics\time_splitting\quarters';
467 $timesplitting = \core_analytics\manager::get_time_splitting($classname);
468 $timesplitting->set_analysable(new \core_analytics\site());
470 $ranges = array(
471 array('start' => 111, 'end' => 222, 'time' => 222),
472 array('start' => 222, 'end' => 333, 'time' => 333)
474 $samples = array(123 => 123, 321 => 321);
476 $indicator1 = $this->getMockBuilder('test_indicator_max')
477 ->setMethods(['calculate_sample'])
478 ->getMock();
479 $indicator1->method('calculate_sample')
480 ->willReturn(null);
482 $indicator2 = \core_analytics\manager::get_indicator('test_indicator_min');
484 // Samples with at least 1 not null value are returned.
485 $params = array(
486 $samples,
487 'whatever',
488 array($indicator1, $indicator2),
489 $ranges
491 $dataset = phpunit_util::call_internal_method($timesplitting, 'calculate_indicators', $params, $classname);
492 $this->assertArrayHasKey('123-0', $dataset);
493 $this->assertArrayHasKey('123-1', $dataset);
494 $this->assertArrayHasKey('321-0', $dataset);
495 $this->assertArrayHasKey('321-1', $dataset);
497 // Samples with only null values are not returned.
498 $params = array(
499 $samples,
500 'whatever',
501 array($indicator1),
502 $ranges
504 $dataset = phpunit_util::call_internal_method($timesplitting, 'calculate_indicators', $params, $classname);
505 $this->assertArrayNotHasKey('123-0', $dataset);
506 $this->assertArrayNotHasKey('123-1', $dataset);
507 $this->assertArrayNotHasKey('321-0', $dataset);
508 $this->assertArrayNotHasKey('321-1', $dataset);
512 * provider_ml_test_evaluation
514 * @return array
516 public function provider_ml_test_evaluation() {
518 $cases = array(
519 'bad' => array(
520 'modelquality' => 'random',
521 'ncourses' => 50,
522 'expectedresults' => array(
523 // The course duration is too much to be processed by in weekly basis.
524 '\core\analytics\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
525 '\core\analytics\time_splitting\single_range' => \core_analytics\model::LOW_SCORE,
526 '\core\analytics\time_splitting\quarters' => \core_analytics\model::LOW_SCORE,
529 'good' => array(
530 'modelquality' => 'perfect',
531 'ncourses' => 50,
532 'expectedresults' => array(
533 // The course duration is too much to be processed by in weekly basis.
534 '\core\analytics\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
535 '\core\analytics\time_splitting\single_range' => \core_analytics\model::OK,
536 '\core\analytics\time_splitting\quarters' => \core_analytics\model::OK,
540 return $this->add_prediction_processors($cases);
544 * add_random_model
546 * @return \core_analytics\model
548 protected function add_random_model() {
550 $target = \core_analytics\manager::get_target('test_target_shortname');
551 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
552 foreach ($indicators as $key => $indicator) {
553 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
556 $model = \core_analytics\model::create($target, $indicators);
558 // To load db defaults as well.
559 return new \core_analytics\model($model->get_id());
563 * add_perfect_model
565 * @param string $targetclass
566 * @return \core_analytics\model
568 protected function add_perfect_model($targetclass = 'test_target_shortname') {
570 $target = \core_analytics\manager::get_target($targetclass);
571 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
572 foreach ($indicators as $key => $indicator) {
573 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
576 $model = \core_analytics\model::create($target, $indicators);
578 // To load db defaults as well.
579 return new \core_analytics\model($model->get_id());
583 * add_prediction_processors
585 * @param array $cases
586 * @return array
588 protected function add_prediction_processors($cases) {
590 $return = array();
592 // We need to test all system prediction processors.
593 $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
594 foreach ($predictionprocessors as $classfullname => $unused) {
595 foreach ($cases as $key => $case) {
596 $newkey = $key . '-' . $classfullname;
597 $return[$newkey] = $case + array('predictionsprocessorclass' => $classfullname);
601 return $return;