Moodle release 3.11.12
[moodle.git] / analytics / tests / prediction_test.php
blob4ba5df7c361d18a1fa6cb49745affe29e69542d5
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 namespace core_analytics;
19 defined('MOODLE_INTERNAL') || die();
21 global $CFG;
22 require_once(__DIR__ . '/fixtures/test_indicator_max.php');
23 require_once(__DIR__ . '/fixtures/test_indicator_min.php');
24 require_once(__DIR__ . '/fixtures/test_indicator_null.php');
25 require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
26 require_once(__DIR__ . '/fixtures/test_indicator_random.php');
27 require_once(__DIR__ . '/fixtures/test_indicator_multiclass.php');
28 require_once(__DIR__ . '/fixtures/test_target_shortname.php');
29 require_once(__DIR__ . '/fixtures/test_target_shortname_multiclass.php');
30 require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
32 require_once(__DIR__ . '/../../course/lib.php');
34 /**
35 * Unit tests for evaluation, training and prediction.
37 * NOTE: in order to execute this test using a separate server for the
38 * python ML backend you need to define these variables in your config.php file:
40 * define('TEST_MLBACKEND_PYTHON_HOST', '127.0.0.1');
41 * define('TEST_MLBACKEND_PYTHON_PORT', 5000);
42 * define('TEST_MLBACKEND_PYTHON_USERNAME', 'default');
43 * define('TEST_MLBACKEND_PYTHON_PASSWORD', 'sshhhh');
45 * @package core_analytics
46 * @copyright 2017 David MonllaĆ³ {@link http://www.davidmonllao.com}
47 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
49 class prediction_test extends \advanced_testcase {
51 /**
52 * Purge all the mlbackend outputs.
54 * This is done automatically for mlbackends using the web server dataroot but
55 * other mlbackends may store files elsewhere and these files need to be removed.
57 * @return null
59 public function tearDown(): void {
60 $this->setAdminUser();
62 $models = \core_analytics\manager::get_all_models();
63 foreach ($models as $model) {
64 $model->delete();
68 /**
69 * test_static_prediction
71 * @return void
73 public function test_static_prediction() {
74 global $DB;
76 $this->resetAfterTest(true);
77 $this->setAdminuser();
79 $model = $this->add_perfect_model('test_static_target_shortname');
80 $model->enable('\core\analytics\time_splitting\no_splitting');
81 $this->assertEquals(1, $model->is_enabled());
82 $this->assertEquals(1, $model->is_trained());
84 // No training for static models.
85 $results = $model->train();
86 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
87 $this->assertEmpty($trainedsamples);
88 $this->assertEmpty($DB->count_records('analytics_used_files',
89 array('modelid' => $model->get_id(), 'action' => 'trained')));
91 // Now we create 2 hidden courses (only hidden courses are getting predictions).
92 $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
93 $course1 = $this->getDataGenerator()->create_course($courseparams);
94 $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
95 $course2 = $this->getDataGenerator()->create_course($courseparams);
97 $result = $model->predict();
99 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
100 $correct = array($course1->id => 1, $course2->id => 0);
101 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
102 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
104 // The range index is not important here, both ranges prediction will be the same.
105 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
108 // 1 range for each analysable.
109 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
110 $this->assertCount(2, $predictedranges);
111 // 2 predictions for each range.
112 $this->assertEquals(2, $DB->count_records('analytics_predictions',
113 array('modelid' => $model->get_id())));
115 // No new generated records as there are no new courses available.
116 $model->predict();
117 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
118 $this->assertCount(2, $predictedranges);
119 $this->assertEquals(2, $DB->count_records('analytics_predictions',
120 array('modelid' => $model->get_id())));
124 * test_model_contexts
126 public function test_model_contexts() {
127 global $DB;
129 $this->resetAfterTest(true);
130 $this->setAdminuser();
132 $misc = $DB->get_record('course_categories', ['name' => 'Miscellaneous']);
133 $miscctx = \context_coursecat::instance($misc->id);
135 $category = $this->getDataGenerator()->create_category();
136 $categoryctx = \context_coursecat::instance($category->id);
138 // One course per category.
139 $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0,
140 'category' => $category->id);
141 $course1 = $this->getDataGenerator()->create_course($courseparams);
142 $course1ctx = \context_course::instance($course1->id);
143 $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0,
144 'category' => $misc->id);
145 $course2 = $this->getDataGenerator()->create_course($courseparams);
147 $model = $this->add_perfect_model('test_static_target_shortname');
149 // Just 1 category.
150 $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$categoryctx->id]);
151 $this->assertCount(1, $model->predict()->predictions);
153 // Now with 2 categories.
154 $model->update(true, false, false, false, [$categoryctx->id, $miscctx->id]);
156 // The courses in the new category are processed.
157 $this->assertCount(1, $model->predict()->predictions);
159 // Clear the predictions generated by the model and predict() again.
160 $model->clear();
161 $this->assertCount(2, $model->predict()->predictions);
163 // Course context restriction.
164 $model->update(true, false, '\core\analytics\time_splitting\no_splitting', false, [$course1ctx->id]);
166 // Nothing new as the course was already analysed.
167 $result = $model->predict();
168 $this->assertTrue(empty($result->predictions));
170 $model->clear();
171 $this->assertCount(1, $model->predict()->predictions);
175 * test_ml_training_and_prediction
177 * @dataProvider provider_ml_training_and_prediction
178 * @param string $timesplittingid
179 * @param int $predictedrangeindex
180 * @param int $nranges
181 * @param string $predictionsprocessorclass
182 * @param array $forcedconfig
183 * @return void
185 public function test_ml_training_and_prediction($timesplittingid, $predictedrangeindex, $nranges, $predictionsprocessorclass,
186 $forcedconfig) {
187 global $DB;
189 $this->resetAfterTest(true);
191 $this->set_forced_config($forcedconfig);
192 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
194 $this->setAdminuser();
195 set_config('enabled_stores', 'logstore_standard', 'tool_log');
197 // Generate training data.
198 $ncourses = 10;
199 $this->generate_courses($ncourses);
201 $model = $this->add_perfect_model();
203 $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
205 // No samples trained yet.
206 $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
208 $results = $model->train();
209 $this->assertEquals(1, $model->is_enabled());
210 $this->assertEquals(1, $model->is_trained());
212 // 20 courses * the 3 model indicators * the number of time ranges of this time splitting method.
213 $indicatorcalc = 20 * 3 * $nranges;
214 $this->assertEquals($indicatorcalc, $DB->count_records('analytics_indicator_calc'));
216 // 1 training file was created.
217 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
218 $this->assertCount(1, $trainedsamples);
219 $samples = json_decode(reset($trainedsamples)->sampleids, true);
220 $this->assertCount($ncourses * 2, $samples);
221 $this->assertEquals(1, $DB->count_records('analytics_used_files',
222 array('modelid' => $model->get_id(), 'action' => 'trained')));
223 // Check that analysable files for training are stored under labelled filearea.
224 $fs = get_file_storage();
225 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
226 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
227 $this->assertEmpty($fs->get_directory_files(\context_system::instance()->id, 'analytics',
228 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
230 $params = [
231 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
232 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
234 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
235 $course1 = $this->getDataGenerator()->create_course($courseparams);
236 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
237 $course2 = $this->getDataGenerator()->create_course($courseparams);
239 // They will not be skipped for prediction though.
240 $result = $model->predict();
242 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
243 $correct = array($course1->id => 1, $course2->id => 0);
244 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
245 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
247 // The range index is not important here, both ranges prediction will be the same.
248 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
251 // 1 range will be predicted.
252 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
253 $this->assertCount(1, $predictedranges);
254 foreach ($predictedranges as $predictedrange) {
255 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
256 $sampleids = json_decode($predictedrange->sampleids, true);
257 $this->assertCount(2, $sampleids);
258 $this->assertContainsEquals($course1->id, $sampleids);
259 $this->assertContainsEquals($course2->id, $sampleids);
261 $this->assertEquals(1, $DB->count_records('analytics_used_files',
262 array('modelid' => $model->get_id(), 'action' => 'predicted')));
263 // 2 predictions.
264 $this->assertEquals(2, $DB->count_records('analytics_predictions',
265 array('modelid' => $model->get_id())));
267 // Check that analysable files to get predictions are stored under unlabelled filearea.
268 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
269 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
270 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
271 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
273 // No new generated files nor records as there are no new courses available.
274 $model->predict();
275 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
276 $this->assertCount(1, $predictedranges);
277 foreach ($predictedranges as $predictedrange) {
278 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
280 $this->assertEquals(1, $DB->count_records('analytics_used_files',
281 array('modelid' => $model->get_id(), 'action' => 'predicted')));
282 $this->assertEquals(2, $DB->count_records('analytics_predictions',
283 array('modelid' => $model->get_id())));
285 // New samples that can be used for prediction.
286 $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
287 $course3 = $this->getDataGenerator()->create_course($courseparams);
288 $courseparams = $params + array('shortname' => 'dddddd', 'fullname' => 'dddddd', 'visible' => 0);
289 $course4 = $this->getDataGenerator()->create_course($courseparams);
291 $result = $model->predict();
293 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
294 $this->assertCount(1, $predictedranges);
295 foreach ($predictedranges as $predictedrange) {
296 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
297 $sampleids = json_decode($predictedrange->sampleids, true);
298 $this->assertCount(4, $sampleids);
299 $this->assertContainsEquals($course1->id, $sampleids);
300 $this->assertContainsEquals($course2->id, $sampleids);
301 $this->assertContainsEquals($course3->id, $sampleids);
302 $this->assertContainsEquals($course4->id, $sampleids);
304 $this->assertEquals(2, $DB->count_records('analytics_used_files',
305 array('modelid' => $model->get_id(), 'action' => 'predicted')));
306 $this->assertEquals(4, $DB->count_records('analytics_predictions',
307 array('modelid' => $model->get_id())));
308 $this->assertCount(1, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
309 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
310 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
311 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
313 // New visible course (for training).
314 $course5 = $this->getDataGenerator()->create_course(array('shortname' => 'aaa', 'fullname' => 'aa'));
315 $course6 = $this->getDataGenerator()->create_course();
316 $result = $model->train();
317 $this->assertEquals(2, $DB->count_records('analytics_used_files',
318 array('modelid' => $model->get_id(), 'action' => 'trained')));
319 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
320 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
321 $this->assertCount(2, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
322 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
324 // Confirm that the files associated to the model are deleted on clear and on delete. The ML backend deletion
325 // processes will be triggered by these actions and any exception there would result in a failed test.
326 $model->clear();
327 $this->assertEquals(0, $DB->count_records('analytics_used_files',
328 array('modelid' => $model->get_id(), 'action' => 'trained')));
329 $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
330 \core_analytics\dataset_manager::LABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
331 $this->assertCount(0, $fs->get_directory_files(\context_system::instance()->id, 'analytics',
332 \core_analytics\dataset_manager::UNLABELLED_FILEAREA, $model->get_id(), '/analysable/', true, false));
333 $model->delete();
335 set_config('enabled_stores', '', 'tool_log');
336 get_log_manager(true);
340 * provider_ml_training_and_prediction
342 * @return array
344 public function provider_ml_training_and_prediction() {
345 $cases = array(
346 'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 0, 1),
347 'quarters' => array('\core\analytics\time_splitting\quarters', 3, 4)
350 // We need to test all system prediction processors.
351 return $this->add_prediction_processors($cases);
355 * test_ml_export_import
357 * @param string $predictionsprocessorclass The class name
358 * @param array $forcedconfig
359 * @dataProvider provider_ml_processors
361 public function test_ml_export_import($predictionsprocessorclass, $forcedconfig) {
362 $this->resetAfterTest(true);
364 $this->set_forced_config($forcedconfig);
365 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
367 $this->setAdminuser();
368 set_config('enabled_stores', 'logstore_standard', 'tool_log');
370 // Generate training data.
371 $ncourses = 10;
372 $this->generate_courses($ncourses);
374 $model = $this->add_perfect_model();
376 $model->update(true, false, '\core\analytics\time_splitting\quarters', get_class($predictionsprocessor));
378 $model->train();
379 $this->assertTrue($model->trained_locally());
381 $this->generate_courses(10, ['visible' => 0]);
383 $originalresults = $model->predict();
385 $zipfilename = 'model-zip-' . microtime() . '.zip';
386 $zipfilepath = $model->export_model($zipfilename);
388 $modelconfig = new \core_analytics\model_config();
389 list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
390 $this->assertNotFalse($mlbackend);
392 $importmodel = \core_analytics\model::import_model($zipfilepath);
393 $importmodel->enable();
395 // Now predict using the imported model without prior training.
396 $importedmodelresults = $importmodel->predict();
398 foreach ($originalresults->predictions as $sampleid => $prediction) {
399 $this->assertEquals($importedmodelresults->predictions[$sampleid]->prediction, $prediction->prediction);
402 $this->assertFalse($importmodel->trained_locally());
404 $zipfilename = 'model-zip-' . microtime() . '.zip';
405 $zipfilepath = $model->export_model($zipfilename, false);
407 $modelconfig = new \core_analytics\model_config();
408 list($modelconfig, $mlbackend) = $modelconfig->extract_import_contents($zipfilepath);
409 $this->assertFalse($mlbackend);
411 set_config('enabled_stores', '', 'tool_log');
412 get_log_manager(true);
416 * provider_ml_processors
418 * @return array
420 public function provider_ml_processors() {
421 $cases = [
422 'case' => [],
425 // We need to test all system prediction processors.
426 return $this->add_prediction_processors($cases);
429 * Test the system classifiers returns.
431 * This test checks that all mlbackend plugins in the system are able to return proper status codes
432 * even under weird situations.
434 * @dataProvider provider_ml_classifiers_return
435 * @param int $success
436 * @param int $nsamples
437 * @param int $classes
438 * @param string $predictionsprocessorclass
439 * @param array $forcedconfig
440 * @return void
442 public function test_ml_classifiers_return($success, $nsamples, $classes, $predictionsprocessorclass, $forcedconfig) {
443 $this->resetAfterTest();
445 $this->set_forced_config($forcedconfig);
446 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
448 if ($nsamples % count($classes) != 0) {
449 throw new \coding_exception('The number of samples should be divisible by the number of classes');
451 $samplesperclass = $nsamples / count($classes);
453 // Metadata (we pass 2 classes even if $classes only provides 1 class samples as we want to test
454 // what the backend does in this case.
455 $dataset = "nfeatures,targetclasses,targettype" . PHP_EOL;
456 $dataset .= "3,\"[0,1]\",\"discrete\"" . PHP_EOL;
458 // Headers.
459 $dataset .= "feature1,feature2,feature3,target" . PHP_EOL;
460 foreach ($classes as $class) {
461 for ($i = 0; $i < $samplesperclass; $i++) {
462 $dataset .= "1,0,1,$class" . PHP_EOL;
466 $trainingfile = array(
467 'contextid' => \context_system::instance()->id,
468 'component' => 'analytics',
469 'filearea' => 'labelled',
470 'itemid' => 123,
471 'filepath' => '/',
472 'filename' => 'whocares.csv'
474 $fs = get_file_storage();
475 $dataset = $fs->create_file_from_string($trainingfile, $dataset);
477 // Training should work correctly if at least 1 sample of each class is included.
478 $dir = make_request_directory();
479 $modeluniqueid = 'whatever' . microtime();
480 $result = $predictionsprocessor->train_classification($modeluniqueid, $dataset, $dir);
482 switch ($success) {
483 case 'yes':
484 $this->assertEquals(\core_analytics\model::OK, $result->status);
485 break;
486 case 'no':
487 $this->assertNotEquals(\core_analytics\model::OK, $result->status);
488 break;
489 case 'maybe':
490 default:
491 // We just check that an object is returned so we don't have an empty check,
492 // what we really want to check is that an exception was not thrown.
493 $this->assertInstanceOf(\stdClass::class, $result);
496 // Purge the directory used in this test (useful in case the mlbackend is storing files
497 // somewhere out of the default moodledata/models dir.
498 $predictionsprocessor->delete_output_dir($dir, $modeluniqueid);
502 * test_ml_classifiers_return provider
504 * We can not be very specific here as test_ml_classifiers_return only checks that
505 * mlbackend plugins behave and expected and control properly backend errors even
506 * under weird situations.
508 * @return array
510 public function provider_ml_classifiers_return() {
511 // Using verbose options as the first argument for readability.
512 $cases = array(
513 '1-samples' => array('maybe', 1, [0]),
514 '2-samples-same-class' => array('maybe', 2, [0]),
515 '2-samples-different-classes' => array('yes', 2, [0, 1]),
516 '4-samples-different-classes' => array('yes', 4, [0, 1])
519 // We need to test all system prediction processors.
520 return $this->add_prediction_processors($cases);
524 * Tests correct multi-classification.
526 * @dataProvider provider_test_multi_classifier
527 * @param string $timesplittingid
528 * @param string $predictionsprocessorclass
529 * @param array|null $forcedconfig
530 * @throws coding_exception
531 * @throws moodle_exception
533 public function test_ml_multi_classifier($timesplittingid, $predictionsprocessorclass, $forcedconfig) {
534 global $DB;
536 $this->resetAfterTest(true);
537 $this->setAdminuser();
538 set_config('enabled_stores', 'logstore_standard', 'tool_log');
540 $this->set_forced_config($forcedconfig);
542 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
543 if ($predictionsprocessor->is_ready() !== true) {
544 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
546 // Generate training courses.
547 $ncourses = 5;
548 $this->generate_courses_multiclass($ncourses);
549 $model = $this->add_multiclass_model();
550 $model->update(true, false, $timesplittingid, get_class($predictionsprocessor));
551 $results = $model->train();
553 $params = [
554 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
555 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
557 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
558 $course1 = $this->getDataGenerator()->create_course($courseparams);
559 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
560 $course2 = $this->getDataGenerator()->create_course($courseparams);
561 $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
562 $course3 = $this->getDataGenerator()->create_course($courseparams);
564 // They will not be skipped for prediction though.
565 $result = $model->predict();
566 // The $course1 predictions should be 0 == 'a', $course2 should be 1 == 'b' and $course3 should be 2 == 'c'.
567 $correct = array($course1->id => 0, $course2->id => 1, $course3->id => 2);
568 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
569 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
571 // The range index is not important here, both ranges prediction will be the same.
572 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
575 set_config('enabled_stores', '', 'tool_log');
576 get_log_manager(true);
580 * Provider for the multi_classification test.
582 * @return array
584 public function provider_test_multi_classifier() {
585 $cases = array(
586 'notimesplitting' => array('\core\analytics\time_splitting\no_splitting'),
589 // Add all system prediction processors.
590 return $this->add_prediction_processors($cases);
594 * Basic test to check that prediction processors work as expected.
596 * @coversNothing
597 * @dataProvider provider_ml_test_evaluation_configuration
598 * @param string $modelquality
599 * @param int $ncourses
600 * @param array $expected
601 * @param string $predictionsprocessorclass
602 * @param array $forcedconfig
603 * @return void
605 public function test_ml_evaluation_configuration($modelquality, $ncourses, $expected, $predictionsprocessorclass,
606 $forcedconfig) {
607 $this->resetAfterTest(true);
609 $this->set_forced_config($forcedconfig);
610 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
612 $this->setAdminuser();
613 set_config('enabled_stores', 'logstore_standard', 'tool_log');
615 $sometimesplittings = '\core\analytics\time_splitting\single_range,' .
616 '\core\analytics\time_splitting\quarters';
617 set_config('defaulttimesplittingsevaluation', $sometimesplittings, 'analytics');
619 if ($modelquality === 'perfect') {
620 $model = $this->add_perfect_model();
621 } else if ($modelquality === 'random') {
622 $model = $this->add_random_model();
623 } else {
624 throw new \coding_exception('Only perfect and random accepted as $modelquality values');
627 // Generate training data.
628 $this->generate_courses($ncourses);
630 $model->update(false, false, false, get_class($predictionsprocessor));
631 $results = $model->evaluate();
633 // We check that the returned status includes at least $expectedcode code.
634 foreach ($results as $timesplitting => $result) {
635 $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
636 $filtered = $result->status & $expected[$timesplitting];
637 $this->assertEquals($expected[$timesplitting], $filtered, $message);
639 $options = ['evaluation' => true, 'reuseprevanalysed' => true];
640 $result = new \core_analytics\local\analysis\result_file($model->get_id(), true, $options);
641 $timesplittingobj = \core_analytics\manager::get_time_splitting($timesplitting);
642 $analysable = new \core_analytics\site();
643 $cachedanalysis = $result->retrieve_cached_result($timesplittingobj, $analysable);
644 $this->assertInstanceOf(\stored_file::class, $cachedanalysis);
647 set_config('enabled_stores', '', 'tool_log');
648 get_log_manager(true);
652 * Tests the evaluation of already trained models.
654 * @coversNothing
655 * @dataProvider provider_ml_processors
656 * @param string $predictionsprocessorclass
657 * @param array $forcedconfig
658 * @return null
660 public function test_ml_evaluation_trained_model($predictionsprocessorclass, $forcedconfig) {
661 $this->resetAfterTest(true);
663 $this->set_forced_config($forcedconfig);
664 $predictionsprocessor = $this->is_predictions_processor_ready($predictionsprocessorclass);
666 $this->setAdminuser();
667 set_config('enabled_stores', 'logstore_standard', 'tool_log');
669 $model = $this->add_perfect_model();
671 // Generate training data.
672 $this->generate_courses(50);
674 $model->update(true, false, '\\core\\analytics\\time_splitting\\quarters', get_class($predictionsprocessor));
675 $model->train();
677 $zipfilename = 'model-zip-' . microtime() . '.zip';
678 $zipfilepath = $model->export_model($zipfilename);
679 $importmodel = \core_analytics\model::import_model($zipfilepath);
681 $results = $importmodel->evaluate(['mode' => 'trainedmodel']);
682 $this->assertEquals(0, $results['\\core\\analytics\\time_splitting\\quarters']->status);
683 $this->assertEquals(1, $results['\\core\\analytics\\time_splitting\\quarters']->score);
685 set_config('enabled_stores', '', 'tool_log');
686 get_log_manager(true);
690 * test_read_indicator_calculations
692 * @return void
694 public function test_read_indicator_calculations() {
695 global $DB;
697 $this->resetAfterTest(true);
699 $starttime = 123;
700 $endtime = 321;
701 $sampleorigin = 'whatever';
703 $indicator = $this->getMockBuilder('test_indicator_max')->onlyMethods(['calculate_sample'])->getMock();
704 $indicator->expects($this->never())->method('calculate_sample');
706 $existingcalcs = array(111 => 1, 222 => -1);
707 $sampleids = array(111 => 111, 222 => 222);
708 list($values, $unused) = $indicator->calculate($sampleids, $sampleorigin, $starttime, $endtime, $existingcalcs);
712 * test_not_null_samples
714 public function test_not_null_samples() {
715 $this->resetAfterTest(true);
717 $timesplitting = \core_analytics\manager::get_time_splitting('\core\analytics\time_splitting\quarters');
718 $timesplitting->set_analysable(new \core_analytics\site());
720 $ranges = array(
721 array('start' => 111, 'end' => 222, 'time' => 222),
722 array('start' => 222, 'end' => 333, 'time' => 333)
724 $samples = array(123 => 123, 321 => 321);
726 $target = \core_analytics\manager::get_target('test_target_shortname');
727 $indicators = array('test_indicator_null', 'test_indicator_min');
728 foreach ($indicators as $key => $indicator) {
729 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
731 $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
733 $analyser = $model->get_analyser();
734 $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
735 $analysis = new \core_analytics\analysis($analyser, false, $result);
737 // Samples with at least 1 not null value are returned.
738 $params = array(
739 $timesplitting,
740 $samples,
741 $ranges
743 $dataset = \phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
744 '\core_analytics\analysis');
745 $this->assertArrayHasKey('123-0', $dataset);
746 $this->assertArrayHasKey('123-1', $dataset);
747 $this->assertArrayHasKey('321-0', $dataset);
748 $this->assertArrayHasKey('321-1', $dataset);
751 $indicators = array('test_indicator_null');
752 foreach ($indicators as $key => $indicator) {
753 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
755 $model = \core_analytics\model::create($target, $indicators, '\core\analytics\time_splitting\no_splitting');
757 $analyser = $model->get_analyser();
758 $result = new \core_analytics\local\analysis\result_array($model->get_id(), false, $analyser->get_options());
759 $analysis = new \core_analytics\analysis($analyser, false, $result);
761 // Samples with only null values are not returned.
762 $params = array(
763 $timesplitting,
764 $samples,
765 $ranges
767 $dataset = \phpunit_util::call_internal_method($analysis, 'calculate_indicators', $params,
768 '\core_analytics\analysis');
769 $this->assertArrayNotHasKey('123-0', $dataset);
770 $this->assertArrayNotHasKey('123-1', $dataset);
771 $this->assertArrayNotHasKey('321-0', $dataset);
772 $this->assertArrayNotHasKey('321-1', $dataset);
776 * provider_ml_test_evaluation_configuration
778 * @return array
780 public function provider_ml_test_evaluation_configuration() {
782 $cases = array(
783 'bad' => array(
784 'modelquality' => 'random',
785 'ncourses' => 50,
786 'expectedresults' => array(
787 '\core\analytics\time_splitting\single_range' => \core_analytics\model::LOW_SCORE,
788 '\core\analytics\time_splitting\quarters' => \core_analytics\model::LOW_SCORE,
791 'good' => array(
792 'modelquality' => 'perfect',
793 'ncourses' => 50,
794 'expectedresults' => array(
795 '\core\analytics\time_splitting\single_range' => \core_analytics\model::OK,
796 '\core\analytics\time_splitting\quarters' => \core_analytics\model::OK,
800 return $this->add_prediction_processors($cases);
804 * add_random_model
806 * @return \core_analytics\model
808 protected function add_random_model() {
810 $target = \core_analytics\manager::get_target('test_target_shortname');
811 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
812 foreach ($indicators as $key => $indicator) {
813 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
816 $model = \core_analytics\model::create($target, $indicators);
818 // To load db defaults as well.
819 return new \core_analytics\model($model->get_id());
823 * add_perfect_model
825 * @param string $targetclass
826 * @return \core_analytics\model
828 protected function add_perfect_model($targetclass = 'test_target_shortname') {
829 $target = \core_analytics\manager::get_target($targetclass);
830 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
831 foreach ($indicators as $key => $indicator) {
832 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
835 $model = \core_analytics\model::create($target, $indicators);
837 // To load db defaults as well.
838 return new \core_analytics\model($model->get_id());
842 * Generates model for multi-classification
844 * @param string $targetclass
845 * @return \core_analytics\model
846 * @throws coding_exception
847 * @throws moodle_exception
849 public function add_multiclass_model($targetclass = 'test_target_shortname_multiclass') {
850 $target = \core_analytics\manager::get_target($targetclass);
851 $indicators = array('test_indicator_fullname', 'test_indicator_multiclass');
852 foreach ($indicators as $key => $indicator) {
853 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
856 $model = \core_analytics\model::create($target, $indicators);
857 return new \core_analytics\model($model->get_id());
861 * Generates $ncourses courses
863 * @param int $ncourses The number of courses to be generated.
864 * @param array $params Course params
865 * @return null
867 protected function generate_courses($ncourses, array $params = []) {
869 $params = $params + [
870 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
871 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
874 for ($i = 0; $i < $ncourses; $i++) {
875 $name = 'a' . random_string(10);
876 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
877 $this->getDataGenerator()->create_course($courseparams);
879 for ($i = 0; $i < $ncourses; $i++) {
880 $name = 'b' . random_string(10);
881 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
882 $this->getDataGenerator()->create_course($courseparams);
887 * Generates ncourses for multi-classification
889 * @param int $ncourses The number of courses to be generated.
890 * @param array $params Course params
891 * @return null
893 protected function generate_courses_multiclass($ncourses, array $params = []) {
895 $params = $params + [
896 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
897 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
900 for ($i = 0; $i < $ncourses; $i++) {
901 $name = 'a' . random_string(10);
902 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
903 $this->getDataGenerator()->create_course($courseparams);
905 for ($i = 0; $i < $ncourses; $i++) {
906 $name = 'b' . random_string(10);
907 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
908 $this->getDataGenerator()->create_course($courseparams);
910 for ($i = 0; $i < $ncourses; $i++) {
911 $name = 'c' . random_string(10);
912 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
913 $this->getDataGenerator()->create_course($courseparams);
918 * Forces some configuration values.
920 * @param array $forcedconfig
922 protected function set_forced_config($forcedconfig) {
923 \core_analytics\manager::reset_prediction_processors();
925 if (empty($forcedconfig)) {
926 return;
928 foreach ($forcedconfig as $pluginname => $pluginconfig) {
929 foreach ($pluginconfig as $name => $value) {
930 set_config($name, $value, $pluginname);
936 * Is the provided processor ready using the current configuration in the site?
938 * @param string $predictionsprocessorclass
939 * @return \core_analytics\predictor
941 protected function is_predictions_processor_ready(string $predictionsprocessorclass) {
942 // We repeat the test for all prediction processors.
943 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
944 $ready = $predictionsprocessor->is_ready();
945 if ($ready !== true) {
946 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready: ' . $ready);
949 return $predictionsprocessor;
953 * add_prediction_processors
955 * @param array $cases
956 * @return array
958 protected function add_prediction_processors($cases) {
960 $return = array();
962 if (defined('TEST_MLBACKEND_PYTHON_HOST') && defined('TEST_MLBACKEND_PYTHON_PORT')
963 && defined('TEST_MLBACKEND_PYTHON_USERNAME') && defined('TEST_MLBACKEND_PYTHON_USERNAME')) {
964 $testpythonserver = true;
967 // We need to test all prediction processors in the system.
968 $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
969 foreach ($predictionprocessors as $classfullname => $predictionsprocessor) {
970 foreach ($cases as $key => $case) {
972 if (!$predictionsprocessor instanceof \mlbackend_python\processor || empty($testpythonserver)) {
973 $extraparams = ['predictionsprocessor' => $classfullname, 'forcedconfig' => null];
974 $return[$key . '-' . $classfullname] = $case + $extraparams;
975 } else {
977 // We want the configuration to be forced during the test as things like importing models create new
978 // instances of ML backend processors during the process.
979 $forcedconfig = ['mlbackend_python' => ['useserver' => true, 'host' => TEST_MLBACKEND_PYTHON_HOST,
980 'port' => TEST_MLBACKEND_PYTHON_PORT, 'secure' => false, 'username' => TEST_MLBACKEND_PYTHON_USERNAME,
981 'password' => TEST_MLBACKEND_PYTHON_PASSWORD]];
982 $casekey = $key . '-' . $classfullname . '-server';
983 $return[$casekey] = $case + ['predictionsprocessor' => $classfullname, 'forcedconfig' => $forcedconfig];
988 return $return;