MDL-72395 composer: Update to newer dependencies
[moodle.git] / analytics / tests / prediction_test.php
blob43af3a6cb3331ace8910a9b4257831a8eb53acd7
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * Unit tests for evaluation, training and prediction.
20 * NOTE: in order to execute this test using a separate server for the
21 * python ML backend you need to define these variables in your config.php file:
23 * define('TEST_MLBACKEND_PYTHON_HOST', '127.0.0.1');
24 * define('TEST_MLBACKEND_PYTHON_PORT', 5000);
25 * define('TEST_MLBACKEND_PYTHON_USERNAME', 'default');
26 * define('TEST_MLBACKEND_PYTHON_PASSWORD', 'sshhhh');
28 * @package core_analytics
29 * @copyright 2017 David MonllaĆ³ {@link http://www.davidmonllao.com}
30 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
33 defined('MOODLE_INTERNAL') || die();
35 global $CFG;
36 require_once(__DIR__ . '/fixtures/test_indicator_max.php');
37 require_once(__DIR__ . '/fixtures/test_indicator_min.php');
38 require_once(__DIR__ . '/fixtures/test_indicator_null.php');
39 require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
40 require_once(__DIR__ . '/fixtures/test_indicator_random.php');
41 require_once(__DIR__ . '/fixtures/test_indicator_multiclass.php');
42 require_once(__DIR__ . '/fixtures/test_target_shortname.php');
43 require_once(__DIR__ . '/fixtures/test_target_shortname_multiclass.php');
44 require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
46 require_once(__DIR__ . '/../../course/lib.php');
48 /**
49 * Unit tests for evaluation, training and prediction.
51 * @package core_analytics
52 * @copyright 2017 David MonllaĆ³ {@link http://www.davidmonllao.com}
53 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
55 class core_analytics_prediction_testcase extends advanced_testcase {
57 /**
58 * Purge all the mlbackend outputs.
60 * This is done automatically for mlbackends using the web server dataroot but
61 * other mlbackends may store files elsewhere and these files need to be removed.
63 * @return null
65 public function tearDown(): void {
66 $this->setAdminUser();
68 $models = \core_analytics\manager::get_all_models();
69 foreach ($models as $model) {
70 $model->delete();
74 /**
75 * test_static_prediction
77 * @return void
79 public function test_static_prediction() {
80 global $DB;
82 $this->resetAfterTest(true);
83 $this->setAdminuser();
85 $model = $this->add_perfect_model('test_static_target_shortname');
86 $model->enable('\core\analytics\time_splitting\no_splitting');
87 $this->assertEquals(1, $model->is_enabled());
88 $this->assertEquals(1, $model->is_trained());
90 // No training for static models.
91 $results = $model->train();
92 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
93 $this->assertEmpty($trainedsamples);
94 $this->assertEmpty($DB->count_records('analytics_used_files',
95 array('modelid' => $model->get_id(), 'action' => 'trained')));
97 // Now we create 2 hidden courses (only hidden courses are getting predictions).
98 $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
99 $course1 = $this->getDataGenerator()->create_course($courseparams);
100 $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
101 $course2 = $this->getDataGenerator()->create_course($courseparams);
103 $result = $model->predict();
105 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
106 $correct = array($course1->id => 1, $course2->id => 0);
107 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
108 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
110 // The range index is not important here, both ranges prediction will be the same.
111 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
114 // 1 range for each analysable.
115 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
116 $this->assertCount(2, $predictedranges);
117 // 2 predictions for each range.
118 $this->assertEquals(2, $DB->count_records('analytics_predictions',
119 array('modelid' => $model->get_id())));
121 // No new generated records as there are no new courses available.
122 $model->predict();
123 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
124 $this->assertCount(2, $predictedranges);
125 $this->assertEquals(2, $DB->count_records('analytics_predictions',
126 array('modelid' => $model->get_id())));
130 * test_model_contexts
132 public function test_model_contexts() {
133 global $DB;
135 $this->resetAfterTest(true);
136 $this->setAdminuser();
138 $misc = $DB->get_record('course_categories', ['name' => 'Miscellaneous']);
139 $miscctx = \context_coursecat::instance($misc->id);
141 $category = $this->getDataGenerator()->create_category();
142 $categoryctx = \context_coursecat::instance($category->id);
144 // One course per category.
145 $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0,
146 'category' => $category->id);
147 $course1 = $this->getDataGenerator()->create_course($courseparams);
148 $course1ctx = \context_course::instance($course1->id);
149 $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0,
150 'category' => $misc->id);
151 $course2 = $this->getDataGenerator()->create_course($courseparams);
153 $model = $this->add_perfect_model('test_static_target_shortname');
155 // Just 1 category.
156 $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$categoryctx->id]);
157 $this->assertCount(1, $model->predict()->predictions);
159 // Now with 2 categories.
160 $model->update(true, false, false, false, [$categoryctx->id, $miscctx->id]);
162 // The courses in the new category are processed.
163 $this->assertCount(1, $model->predict()->predictions);
165 // Clear the predictions generated by the model and predict() again.
166 $model->clear();
167 $this->assertCount(2, $model->predict()->predictions);
169 // Course context restriction.
170 $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$course1ctx->id]);
172 // Nothing new as the course was already analysed.
173 $result = $model->predict();
174 $this->assertTrue(empty($result->predictions));
176 $model->clear();
177 $this->assertCount(1, $model->predict()->predictions);
181 * test_ml_training_and_prediction
183 * @dataProvider provider_ml_training_and_prediction
184 * @param string $timesplittingid
185 * @param int $predictedrangeindex
186 * @param int $nranges
187 * @param string $predictionsprocessorclass
188 * @param array $forcedconfig
189 * @return void
191 public function test_ml_training_and_prediction($timesplittingid, $predictedrangeindex, $nranges, $predictionsprocessorclass,
192 $forcedconfig) {
193 global $DB;
195 $this->resetAfterTest(true);
197 $this->set_forced_config($forcedconfig);
198 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
200 $this->setAdminuser();
201 set_config('enabled_stores', 'logstore_standard', 'tool_log');
203 // Generate training data.
204 $ncourses = 10;
205 $this->generate_courses($ncourses);
207 $model = $this->add_perfect_model();
209 $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
211 // No samples trained yet.
212 $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
214 $results = $model->train();
215 $this->assertEquals(1, $model->is_enabled());
216 $this->assertEquals(1, $model->is_trained());
218 // 20 courses * the 3 model indicators * the number of time ranges of this time splitting method.
219 $indicatorcalc = 20 * 3 * $nranges;
220 $this->assertEquals($indicatorcalc, $DB->count_records('analytics_indicator_calc'));
222 // 1 training file was created.
223 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
224 $this->assertCount(1, $trainedsamples);
225 $samples = json_decode(reset($trainedsamples)->sampleids, true);
226 $this->assertCount($ncourses * 2, $samples);
227 $this->assertEquals(1, $DB->count_records('analytics_used_files',
228 array('modelid' => $model->get_id(), 'action' => 'trained')));
229 // Check that analysable files for training are stored under labelled filearea.
230 $fs = get_file_storage();
231 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
232 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
233 $this->assertEmpty($fs->get_directory_files(\context_system::instance()->id, 'analytics',
234 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
236 $params = [
237 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
238 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
240 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
241 $course1 = $this->getDataGenerator()->create_course($courseparams);
242 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
243 $course2 = $this->getDataGenerator()->create_course($courseparams);
245 // They will not be skipped for prediction though.
246 $result = $model->predict();
248 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
249 $correct = array($course1->id => 1, $course2->id => 0);
250 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
251 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
253 // The range index is not important here, both ranges prediction will be the same.
254 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
257 // 1 range will be predicted.
258 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
259 $this->assertCount(1, $predictedranges);
260 foreach ($predictedranges as $predictedrange) {
261 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
262 $sampleids = json_decode($predictedrange->sampleids, true);
263 $this->assertCount(2, $sampleids);
264 $this->assertContainsEquals($course1->id, $sampleids);
265 $this->assertContainsEquals($course2->id, $sampleids);
267 $this->assertEquals(1, $DB->count_records('analytics_used_files',
268 array('modelid' => $model->get_id(), 'action' => 'predicted')));
269 // 2 predictions.
270 $this->assertEquals(2, $DB->count_records('analytics_predictions',
271 array('modelid' => $model->get_id())));
273 // Check that analysable files to get predictions are stored under unlabelled filearea.
274 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
275 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
276 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
277 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
279 // No new generated files nor records as there are no new courses available.
280 $model->predict();
281 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
282 $this->assertCount(1, $predictedranges);
283 foreach ($predictedranges as $predictedrange) {
284 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
286 $this->assertEquals(1, $DB->count_records('analytics_used_files',
287 array('modelid' => $model->get_id(), 'action' => 'predicted')));
288 $this->assertEquals(2, $DB->count_records('analytics_predictions',
289 array('modelid' => $model->get_id())));
291 // New samples that can be used for prediction.
292 $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
293 $course3 = $this->getDataGenerator()->create_course($courseparams);
294 $courseparams = $params + array('shortname' => 'dddddd', 'fullname' => 'dddddd', 'visible' => 0);
295 $course4 = $this->getDataGenerator()->create_course($courseparams);
297 $result = $model->predict();
299 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
300 $this->assertCount(1, $predictedranges);
301 foreach ($predictedranges as $predictedrange) {
302 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
303 $sampleids = json_decode($predictedrange->sampleids, true);
304 $this->assertCount(4, $sampleids);
305 $this->assertContainsEquals($course1->id, $sampleids);
306 $this->assertContainsEquals($course2->id, $sampleids);
307 $this->assertContainsEquals($course3->id, $sampleids);
308 $this->assertContainsEquals($course4->id, $sampleids);
310 $this->assertEquals(2, $DB->count_records('analytics_used_files',
311 array('modelid' => $model->get_id(), 'action' => 'predicted')));
312 $this->assertEquals(4, $DB->count_records('analytics_predictions',
313 array('modelid' => $model->get_id())));
314 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
315 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
316 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
317 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
319 // New visible course (for training).
320 $course5 = $this->getDataGenerator()->create_course(array('shortname' => 'aaa', 'fullname' => 'aa'));
321 $course6 = $this->getDataGenerator()->create_course();
322 $result = $model->train();
323 $this->assertEquals(2, $DB->count_records('analytics_used_files',
324 array('modelid' => $model->get_id(), 'action' => 'trained')));
325 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
326 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
327 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
328 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
330 // Confirm that the files associated to the model are deleted on clear and on delete. The ML backend deletion
331 // processes will be triggered by these actions and any exception there would result in a failed test.
332 $model->clear();
333 $this->assertEquals(0, $DB->count_records('analytics_used_files',
334 array('modelid' => $model->get_id(), 'action' => 'trained')));
335 $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
336 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
337 $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
338 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
339 $model->delete();
341 set_config('enabled_stores', '', 'tool_log');
342 get_log_manager(true);
346 * provider_ml_training_and_prediction
348 * @return array
350 public function provider_ml_training_and_prediction() {
351 $cases = array(
352 'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 0, 1),
353 'quarters' => array('\core\analytics\time_splitting\quarters', 3, 4)
356 // We need to test all system prediction processors.
357 return $this->add_prediction_processors($cases);
361 * test_ml_export_import
363 * @param string $predictionsprocessorclass The class name
364 * @param array $forcedconfig
365 * @dataProvider provider_ml_processors
367 public function test_ml_export_import($predictionsprocessorclass, $forcedconfig) {
368 $this->resetAfterTest(true);
370 $this->set_forced_config($forcedconfig);
371 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
373 $this->setAdminuser();
374 set_config('enabled_stores', 'logstore_standard', 'tool_log');
376 // Generate training data.
377 $ncourses = 10;
378 $this->generate_courses($ncourses);
380 $model = $this->add_perfect_model();
382 $model->update(true, false, '\core\analytics\time_splitting\quarters', get_class($predictionsprocessor));
384 $model->train();
385 $this->assertTrue($model->trained_locally());
387 $this->generate_courses(10, ['visible' => 0]);
389 $originalresults = $model->predict();
391 $zipfilename = 'model-zip-' . microtime() . '.zip';
392 $zipfilepath = $model->export_model($zipfilename);
394 $modelconfig = new \core_analytics\model_config();
395 list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
396 $this->assertNotFalse($mlbackend);
398 $importmodel = \core_analytics\model::import_model($zipfilepath);
399 $importmodel->enable();
401 // Now predict using the imported model without prior training.
402 $importedmodelresults = $importmodel->predict();
404 foreach ($originalresults->predictions as $sampleid => $prediction) {
405 $this->assertEquals($importedmodelresults->predictions[$sampleid]->prediction, $prediction->prediction);
408 $this->assertFalse($importmodel->trained_locally());
410 $zipfilename = 'model-zip-' . microtime() . '.zip';
411 $zipfilepath = $model->export_model($zipfilename, false);
413 $modelconfig = new \core_analytics\model_config();
414 list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
415 $this->assertFalse($mlbackend);
417 set_config('enabled_stores', '', 'tool_log');
418 get_log_manager(true);
422 * provider_ml_processors
424 * @return array
426 public function provider_ml_processors() {
427 $cases = [
428 'case' => [],
431 // We need to test all system prediction processors.
432 return $this->add_prediction_processors($cases);
435 * Test the system classifiers returns.
437 * This test checks that all mlbackend plugins in the system are able to return proper status codes
438 * even under weird situations.
440 * @dataProvider provider_ml_classifiers_return
441 * @param int $success
442 * @param int $nsamples
443 * @param int $classes
444 * @param string $predictionsprocessorclass
445 * @param array $forcedconfig
446 * @return void
448 public function test_ml_classifiers_return($success, $nsamples, $classes, $predictionsprocessorclass, $forcedconfig) {
449 $this->resetAfterTest();
451 $this->set_forced_config($forcedconfig);
452 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
454 if ($nsamples % count($classes) != 0) {
455 throw new \coding_exception('The number of samples should be divisible by the number of classes');
457 $samplesperclass = $nsamples / count($classes);
459 // Metadata (we pass 2 classes even if $classes only provides 1 class samples as we want to test
460 // what the backend does in this case.
461 $dataset = "nfeatures,targetclasses,targettype" . PHP_EOL;
462 $dataset .= "3,\"[0,1]\",\"discrete\"" . PHP_EOL;
464 // Headers.
465 $dataset .= "feature1,feature2,feature3,target" . PHP_EOL;
466 foreach ($classes as $class) {
467 for ($i = 0; $i < $samplesperclass; $i++) {
468 $dataset .= "1,0,1,$class" . PHP_EOL;
472 $trainingfile = array(
473 'contextid' => \context_system::instance()->id,
474 'component' => 'analytics',
475 'filearea' => 'labelled',
476 'itemid' => 123,
477 'filepath' => '/',
478 'filename' => 'whocares.csv'
480 $fs = get_file_storage();
481 $dataset = $fs->create_file_from_string($trainingfile, $dataset);
483 // Training should work correctly if at least 1 sample of each class is included.
484 $dir = make_request_directory();
485 $modeluniqueid = 'whatever' . microtime();
486 $result = $predictionsprocessor->train_classification($modeluniqueid, $dataset, $dir);
488 switch ($success) {
489 case 'yes':
490 $this->assertEquals(\core_analytics\model::OK, $result->status);
491 break;
492 case 'no':
493 $this->assertNotEquals(\core_analytics\model::OK, $result->status);
494 break;
495 case 'maybe':
496 default:
497 // We just check that an object is returned so we don't have an empty check,
498 // what we really want to check is that an exception was not thrown.
499 $this->assertInstanceOf(\stdClass::class, $result);
502 // Purge the directory used in this test (useful in case the mlbackend is storing files
503 // somewhere out of the default moodledata/models dir.
504 $predictionsprocessor->delete_output_dir($dir, $modeluniqueid);
508 * test_ml_classifiers_return provider
510 * We can not be very specific here as test_ml_classifiers_return only checks that
511 * mlbackend plugins behave and expected and control properly backend errors even
512 * under weird situations.
514 * @return array
516 public function provider_ml_classifiers_return() {
517 // Using verbose options as the first argument for readability.
518 $cases = array(
519 '1-samples' => array('maybe', 1, [0]),
520 '2-samples-same-class' => array('maybe', 2, [0]),
521 '2-samples-different-classes' => array('yes', 2, [0, 1]),
522 '4-samples-different-classes' => array('yes', 4, [0, 1])
525 // We need to test all system prediction processors.
526 return $this->add_prediction_processors($cases);
530 * Tests correct multi-classification.
532 * @dataProvider provider_test_multi_classifier
533 * @param string $timesplittingid
534 * @param string $predictionsprocessorclass
535 * @param array|null $forcedconfig
536 * @throws coding_exception
537 * @throws moodle_exception
539 public function test_ml_multi_classifier($timesplittingid, $predictionsprocessorclass, $forcedconfig) {
540 global $DB;
542 $this->resetAfterTest(true);
543 $this->setAdminuser();
544 set_config('enabled_stores', 'logstore_standard', 'tool_log');
546 $this->set_forced_config($forcedconfig);
548 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
549 if ($predictionsprocessor->is_ready() !== true) {
550 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
552 // Generate training courses.
553 $ncourses = 5;
554 $this->generate_courses_multiclass($ncourses);
555 $model = $this->add_multiclass_model();
556 $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
557 $results = $model->train();
559 $params = [
560 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
561 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
563 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
564 $course1 = $this->getDataGenerator()->create_course($courseparams);
565 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
566 $course2 = $this->getDataGenerator()->create_course($courseparams);
567 $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
568 $course3 = $this->getDataGenerator()->create_course($courseparams);
570 // They will not be skipped for prediction though.
571 $result = $model->predict();
572 // The $course1 predictions should be 0 == 'a', $course2 should be 1 == 'b' and $course3 should be 2 == 'c'.
573 $correct = array($course1->id => 0, $course2->id => 1, $course3->id => 2);
574 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
575 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
577 // The range index is not important here, both ranges prediction will be the same.
578 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
581 set_config('enabled_stores', '', 'tool_log');
582 get_log_manager(true);
586 * Provider for the multi_classification test.
588 * @return array
590 public function provider_test_multi_classifier() {
591 $cases = array(
592 'notimesplitting' => array('\core\analytics\time_splitting\no_splitting'),
595 // Add all system prediction processors.
596 return $this->add_prediction_processors($cases);
600 * Basic test to check that prediction processors work as expected.
602 * @coversNothing
603 * @dataProvider provider_ml_test_evaluation_configuration
604 * @param string $modelquality
605 * @param int $ncourses
606 * @param array $expected
607 * @param string $predictionsprocessorclass
608 * @param array $forcedconfig
609 * @return void
611 public function test_ml_evaluation_configuration($modelquality, $ncourses, $expected, $predictionsprocessorclass,
612 $forcedconfig) {
613 $this->resetAfterTest(true);
615 $this->set_forced_config($forcedconfig);
616 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
618 $this->setAdminuser();
619 set_config('enabled_stores', 'logstore_standard', 'tool_log');
621 $sometimesplittings = '\core\analytics\time_splitting\single_range,' .
622 '\core\analytics\time_splitting\quarters';
623 set_config('defaulttimesplittingsevaluation', $sometimesplittings, 'analytics');
625 if ($modelquality === 'perfect') {
626 $model = $this->add_perfect_model();
627 } else if ($modelquality === 'random') {
628 $model = $this->add_random_model();
629 } else {
630 throw new \coding_exception('Only perfect and random accepted as $modelquality values');
633 // Generate training data.
634 $this->generate_courses($ncourses);
636 $model->update(false, false, false, get_class($predictionsprocessor));
637 $results = $model->evaluate();
639 // We check that the returned status includes at least $expectedcode code.
640 foreach ($results as $timesplitting => $result) {
641 $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
642 $filtered = $result->status & $expected[$timesplitting];
643 $this->assertEquals($expected[$timesplitting], $filtered, $message);
645 $options = ['evaluation' => true, 'reuseprevanalysed' => true];
646 $result = new \core_analytics\local\analysis\result_file($model->get_id(), true, $options);
647 $timesplittingobj = \core_analytics\manager::get_time_splitting($timesplitting);
648 $analysable = new \core_analytics\site();
649 $cachedanalysis = $result->retrieve_cached_result($timesplittingobj, $analysable);
650 $this->assertInstanceOf(\stored_file::class, $cachedanalysis);
653 set_config('enabled_stores', '', 'tool_log');
654 get_log_manager(true);
658 * Tests the evaluation of already trained models.
660 * @coversNothing
661 * @dataProvider provider_ml_processors
662 * @param string $predictionsprocessorclass
663 * @param array $forcedconfig
664 * @return null
666 public function test_ml_evaluation_trained_model($predictionsprocessorclass, $forcedconfig) {
667 $this->resetAfterTest(true);
669 $this->set_forced_config($forcedconfig);
670 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
672 $this->setAdminuser();
673 set_config('enabled_stores', 'logstore_standard', 'tool_log');
675 $model = $this->add_perfect_model();
677 // Generate training data.
678 $this->generate_courses(50);
680 $model->update(true, false, '\\core\\analytics\\time_splitting\\quarters', get_class($predictionsprocessor));
681 $model->train();
683 $zipfilename = 'model-zip-' . microtime() . '.zip';
684 $zipfilepath = $model->export_model($zipfilename);
685 $importmodel = \core_analytics\model::import_model($zipfilepath);
687 $results = $importmodel->evaluate(['mode' => 'trainedmodel']);
688 $this->assertEquals(0, $results['\\core\\analytics\\time_splitting\\quarters']->status);
689 $this->assertEquals(1, $results['\\core\\analytics\\time_splitting\\quarters']->score);
691 set_config('enabled_stores', '', 'tool_log');
692 get_log_manager(true);
696 * test_read_indicator_calculations
698 * @return void
700 public function test_read_indicator_calculations() {
701 global $DB;
703 $this->resetAfterTest(true);
705 $starttime = 123;
706 $endtime = 321;
707 $sampleorigin = 'whatever';
709 $indicator = $this->getMockBuilder('test_indicator_max')->onlyMethods(['calculate_sample'])->getMock();
710 $indicator->expects($this->never())->method('calculate_sample');
712 $existingcalcs = array(111 => 1, 222 => -1);
713 $sampleids = array(111 => 111, 222 => 222);
714 list($values, $unused) = $indicator->calculate($sampleids, $sampleorigin, $starttime, $endtime, $existingcalcs);
718 * test_not_null_samples
720 public function test_not_null_samples() {
721 $this->resetAfterTest(true);
723 $timesplitting = \core_analytics\manager::get_time_splitting('\core\analytics\time_splitting\quarters');
724 $timesplitting->set_analysable(new \core_analytics\site());
726 $ranges = array(
727 array('start' => 111, 'end' => 222, 'time' => 222),
728 array('start' => 222, 'end' => 333, 'time' => 333)
730 $samples = array(123 => 123, 321 => 321);
732 $target = \core_analytics\manager::get_target('test_target_shortname');
733 $indicators = array('test_indicator_null', 'test_indicator_min');
734 foreach ($indicators as $key => $indicator) {
735 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
737 $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
739 $analyser = $model->get_analyser();
740 $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
741 $analysis = new \core_analytics\analysis($analyser, false, $result);
743 // Samples with at least 1 not null value are returned.
744 $params = array(
745 $timesplitting,
746 $samples,
747 $ranges
749 $dataset = phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
750 '\core_analytics\analysis');
751 $this->assertArrayHasKey('123-0', $dataset);
752 $this->assertArrayHasKey('123-1', $dataset);
753 $this->assertArrayHasKey('321-0', $dataset);
754 $this->assertArrayHasKey('321-1', $dataset);
757 $indicators = array('test_indicator_null');
758 foreach ($indicators as $key => $indicator) {
759 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
761 $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
763 $analyser = $model->get_analyser();
764 $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
765 $analysis = new \core_analytics\analysis($analyser, false, $result);
767 // Samples with only null values are not returned.
768 $params = array(
769 $timesplitting,
770 $samples,
771 $ranges
773 $dataset = phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
774 '\core_analytics\analysis');
775 $this->assertArrayNotHasKey('123-0', $dataset);
776 $this->assertArrayNotHasKey('123-1', $dataset);
777 $this->assertArrayNotHasKey('321-0', $dataset);
778 $this->assertArrayNotHasKey('321-1', $dataset);
782 * provider_ml_test_evaluation_configuration
784 * @return array
786 public function provider_ml_test_evaluation_configuration() {
788 $cases = array(
789 'bad' => array(
790 'modelquality' => 'random',
791 'ncourses' => 50,
792 'expectedresults' => array(
793 '\core\analytics\time_splitting\single_range' => \core_analytics\model::LOW_SCORE,
794 '\core\analytics\time_splitting\quarters' => \core_analytics\model::LOW_SCORE,
797 'good' => array(
798 'modelquality' => 'perfect',
799 'ncourses' => 50,
800 'expectedresults' => array(
801 '\core\analytics\time_splitting\single_range' => \core_analytics\model::OK,
802 '\core\analytics\time_splitting\quarters' => \core_analytics\model::OK,
806 return $this->add_prediction_processors($cases);
810 * add_random_model
812 * @return \core_analytics\model
814 protected function add_random_model() {
816 $target = \core_analytics\manager::get_target('test_target_shortname');
817 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
818 foreach ($indicators as $key => $indicator) {
819 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
822 $model = \core_analytics\model::create($target, $indicators);
824 // To load db defaults as well.
825 return new \core_analytics\model($model->get_id());
829 * add_perfect_model
831 * @param string $targetclass
832 * @return \core_analytics\model
834 protected function add_perfect_model($targetclass = 'test_target_shortname') {
835 $target = \core_analytics\manager::get_target($targetclass);
836 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
837 foreach ($indicators as $key => $indicator) {
838 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
841 $model = \core_analytics\model::create($target, $indicators);
843 // To load db defaults as well.
844 return new \core_analytics\model($model->get_id());
848 * Generates model for multi-classification
850 * @param string $targetclass
851 * @return \core_analytics\model
852 * @throws coding_exception
853 * @throws moodle_exception
855 public function add_multiclass_model($targetclass = 'test_target_shortname_multiclass') {
856 $target = \core_analytics\manager::get_target($targetclass);
857 $indicators = array('test_indicator_fullname', 'test_indicator_multiclass');
858 foreach ($indicators as $key => $indicator) {
859 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
862 $model = \core_analytics\model::create($target, $indicators);
863 return new \core_analytics\model($model->get_id());
867 * Generates $ncourses courses
869 * @param int $ncourses The number of courses to be generated.
870 * @param array $params Course params
871 * @return null
873 protected function generate_courses($ncourses, array $params = []) {
875 $params = $params + [
876 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
877 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
880 for ($i = 0; $i < $ncourses; $i++) {
881 $name = 'a' . random_string(10);
882 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
883 $this->getDataGenerator()->create_course($courseparams);
885 for ($i = 0; $i < $ncourses; $i++) {
886 $name = 'b' . random_string(10);
887 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
888 $this->getDataGenerator()->create_course($courseparams);
893 * Generates ncourses for multi-classification
895 * @param int $ncourses The number of courses to be generated.
896 * @param array $params Course params
897 * @return null
899 protected function generate_courses_multiclass($ncourses, array $params = []) {
901 $params = $params + [
902 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
903 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
906 for ($i = 0; $i < $ncourses; $i++) {
907 $name = 'a' . random_string(10);
908 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
909 $this->getDataGenerator()->create_course($courseparams);
911 for ($i = 0; $i < $ncourses; $i++) {
912 $name = 'b' . random_string(10);
913 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
914 $this->getDataGenerator()->create_course($courseparams);
916 for ($i = 0; $i < $ncourses; $i++) {
917 $name = 'c' . random_string(10);
918 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
919 $this->getDataGenerator()->create_course($courseparams);
924 * Forces some configuration values.
926 * @param array $forcedconfig
928 protected function set_forced_config($forcedconfig) {
929 \core_analytics\manager::reset_prediction_processors();
931 if (empty($forcedconfig)) {
932 return;
934 foreach ($forcedconfig as $pluginname => $pluginconfig) {
935 foreach ($pluginconfig as $name => $value) {
936 set_config($name, $value, $pluginname);
942 * Is the provided processor ready using the current configuration in the site?
944 * @param string $predictionsprocessorclass
945 * @return \core_analytics\predictor
947 protected function is_predictions_processor_ready(string $predictionsprocessorclass) {
948 // We repeat the test for all prediction processors.
949 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
950 $ready = $predictionsprocessor->is_ready();
951 if ($ready !== true) {
952 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready: ' . $ready);
955 return $predictionsprocessor;
959 * add_prediction_processors
961 * @param array $cases
962 * @return array
964 protected function add_prediction_processors($cases) {
966 $return = array();
968 if (defined('TEST_MLBACKEND_PYTHON_HOST') && defined('TEST_MLBACKEND_PYTHON_PORT')
969 && defined('TEST_MLBACKEND_PYTHON_USERNAME') && defined('TEST_MLBACKEND_PYTHON_USERNAME')) {
970 $testpythonserver = true;
973 // We need to test all prediction processors in the system.
974 $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
975 foreach ($predictionprocessors as $classfullname => $predictionsprocessor) {
976 foreach ($cases as $key => $case) {
978 if (!$predictionsprocessor instanceof \mlbackend_python\processor || empty($testpythonserver)) {
979 $extraparams = ['predictionsprocessor' => $classfullname, 'forcedconfig' => null];
980 $return[$key . '-' . $classfullname] = $case + $extraparams;
981 } else {
983 // We want the configuration to be forced during the test as things like importing models create new
984 // instances of ML backend processors during the process.
985 $forcedconfig = ['mlbackend_python' => ['useserver' => true, 'host' => TEST_MLBACKEND_PYTHON_HOST,
986 'port' => TEST_MLBACKEND_PYTHON_PORT, 'secure' => false, 'username' => TEST_MLBACKEND_PYTHON_USERNAME,
987 'password' => TEST_MLBACKEND_PYTHON_PASSWORD]];
988 $casekey = $key . '-' . $classfullname . '-server';
989 $return[$casekey] = $case + ['predictionsprocessor' => $classfullname, 'forcedconfig' => $forcedconfig];
994 return $return;