1 # {{{ include statements
3 start include statements
9 use ext
::Math
::MatrixReal
;
11 if ( $PsN::config
-> {'_'} -> {'use_database'} ) {
15 end include statements
17 # }}} include statements
23 foreach my $attribute ( 'logfile', 'raw_results_file' ) {
24 if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
25 ref($this -> {$attribute}) eq 'HASH' ) ) {
26 my $tmp = $this -> {$attribute};
27 if ( not defined $tmp and $attribute eq 'logfile' ) {
30 $this -> {$attribute} = [];
31 for ( my $i = 1; $i <= scalar @
{$this -> {'models'}}; $i++ ) {
33 if ( $name =~ /\./ ) {
40 OSspecific
::absolute_path
( $this -> {'directory'}, $name );
41 push ( @
{$this -> {$attribute}}, $ldir.$name ) ;
45 if ( $PsN::config
-> {'_'} -> {'use_database'} ) {
46 my( $found_log, $found_cdd_id ) = $this -> read_cdd_log
;
48 $this -> register_cdd_in_database
unless ( $found_cdd_id );
50 $this -> log_object
unless ( $found_log and $found_cdd_id );
51 print "Found ",$this -> {'cdd_id'},"\n";
58 # {{{ register_cdd_in_database
60 start register_cdd_in_database
62 if ( $PsN::config
-> {'_'} -> {'use_database'} ) {
63 my $dbh = DBI
-> connect("DBI:mysql:host=".$PsN::config
-> {'_'} -> {'database_server'}.
64 ";databse=".$PsN::config
-> {'_'} -> {'project'},
65 $PsN::config
-> {'_'} -> {'user'},
66 $PsN::config
-> {'_'} -> {'password'},
69 # bins and case_column can be defined for more than one model. Skip
70 # registration of these for now.
71 # $sth = $dbh -> prepare("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
72 # ".cdd ( tool_id, bins, case_column ) ".
73 # "VALUES (".$self -> {'tool_id'}.", '".$self -> {'bins'}.
74 # "', '".$self -> {'case_column'}."' )");
75 $sth = $dbh -> prepare
("INSERT INTO ".$PsN::config
-> {'_'} -> {'project'}.
77 "VALUES (".$self -> {'tool_id'}." )");
79 $self -> {'cdd_id'} = $sth->{'mysql_insertid'};
84 end register_cdd_in_database
86 # }}} register_cdd_in_database
88 # {{{ register_mfit_results
90 start register_mfit_results
92 if ( $PsN::config
-> {'_'} -> {'use_database'} ) {
93 my $dbh = DBI
-> connect("DBI:mysql:host=".$PsN::config
-> {'_'} -> {'database_server'}.
94 ";databse=".$PsN::config
-> {'_'} -> {'project'},
95 $PsN::config
-> {'_'} -> {'user'},
96 $PsN::config
-> {'_'} -> {'password'},
98 $dbh -> do( "LOCK TABLES ".$PsN::config
-> {'_'} -> {'project'}.
99 ".cdd_modelfit_results WRITE" );
100 my $sth = $dbh -> prepare
( "SELECT MAX(cdd_modelfit_results_id)".
101 " FROM ".$PsN::config
-> {'_'} -> {'project'}.
102 ".cdd_modelfit_results" );
103 $sth -> execute
or debug
-> die( message
=> $sth->errstr ) ;
104 my $select_arr = $sth -> fetchall_arrayref
;
105 $first_res_id = defined $select_arr -> [0][0] ?
($select_arr -> [0][0] + 1) : 0;
106 $last_res_id = $first_res_id + $#cook_score;
110 for( my $i = 0; $i <= $#cook_score; $i++ ) {
111 $insert_values = $insert_values."," if ( defined $insert_values );
112 $insert_values = $insert_values.
113 "('".$self -> {'cdd_id'}."', '".$self -> {'model_ids'}[$model_number-1].
114 "', '".$self -> {'prepared_model_ids'}[($model_number-1)*($#cook_score+1)+$i].
116 "','$cook_score[$i]', '$covariance_ratio[$i]', '$projections[$i][0]', '$projections[$i][01]', '$outside_n_sd[$i]' )";
118 $dbh -> do("INSERT INTO ".$PsN::config
-> {'_'} -> {'project'}.
119 ".cdd_modelfit_results ".
120 "( cdd_id, orig_model_id, model_id, ".
121 "bin, cook_score, covariance_ratio, ".
122 "pca_component_1, pca_component_2, outside_n_sd ) ".
123 "VALUES $insert_values");
124 $dbh -> do( "UNLOCK TABLES" );
128 end register_mfit_results
130 # }}} register_mfit_results
135 if( -e
$self -> {'directory'}.'object.txt' ) {
137 open( OLOG
, '<'.$self -> {'directory'}.'object.txt' );
140 for ( my $i = 1; $i < $#olog; $i++ ) {
141 $str = $str.$olog[$i];
144 my %tmp = eval( $str );
146 if( exists $tmp{'cdd_id'} ) {
147 $self -> {'cdd_id'} = $tmp{'cdd_id'};
156 # {{{ llp_pre_fork_setup
158 start llp_pre_fork_setup
160 $self -> modelfit_pre_fork_setup
;
162 end llp_pre_fork_setup
164 # }}} llp_pre_fork_setup
166 # {{{ modelfit_pre_fork_setup
168 start modelfit_pre_fork_setup
170 # These attributes can be given as a
171 # 1. A scalar : used for all models and problems
172 # 2. A 1-dim. array : specified per problem but same for all models
173 # 3. A 2-dim. array : specified per problem and model
174 my $bins = $self -> {'bins'};
175 # my $idxs = $self -> {'grouping_indexes'};
176 my $case_columns = $self -> {'case_columns'};
178 if ( defined $bins ) {
179 unless ( ref( \
$bins ) eq 'SCALAR' or
180 ( ref( $bins ) eq 'ARRAY' and scalar @
{$bins} > 0 ) ) {
181 debug
-> die( message
=> "Attribute bins is ",
182 "defined as a ",ref( $bins ),
183 "and is neither a scalar or a non-zero size array." );
184 } elsif ( ref( \
$bins ) eq 'SCALAR' ) {
186 foreach my $model ( @
{$self -> {'models'}} ) {
188 foreach my $problem ( @
{$model -> problems
} ) {
189 push( @pr_bins, $bins );
191 push( @mo_bins, \
@pr_bins );
193 $self -> {'bins'} = \
@mo_bins;
194 } elsif ( ref( $bins ) eq 'ARRAY' ) {
195 unless ( ref( \
$bins -> [0] ) eq 'SCALAR' or
196 ( ref( $bins -> [0] ) eq 'ARRAY' and scalar @
{$bins -> [0]} > 0 ) ) {
197 debug
-> die( message
=> "Attribute bins is ",
198 "defined as a ",ref( $bins -> [0] ),
199 "and is neither a scalar or a non-zero size array." );
200 } elsif ( ref(\
$bins -> [0]) eq 'SCALAR' ) {
202 foreach my $model ( @
{$self -> {'models'}} ) {
203 push( @mo_bins, $bins );
205 $self -> {'bins'} = \
@mo_bins;
210 foreach my $model ( @
{$self -> {'models'}} ) {
212 foreach my $data ( @
{$model -> datas
} ) {
213 push( @pr_bins, $data -> count_ind
);
215 push( @mo_bins, \
@pr_bins );
217 $self -> {'bins'} = \
@mo_bins;
220 if ( defined $case_columns ) {
221 if ( defined $case_columns ) {
222 unless ( ref( \
$case_columns ) eq 'SCALAR' or
223 ( ref( $case_columns ) eq 'ARRAY' and scalar @
{$case_columns} > 0 ) ) {
224 debug
-> die( message
=> "Attribute case_columns is ",
225 "defined as a ",ref( $case_columns ),
226 "and is neither a scalar or a non-zero size array." );
227 } elsif ( ref( \
$case_columns ) eq 'SCALAR' ) {
229 my @mo_case_columns = ();
230 foreach my $model ( @
{$self -> {'models'}} ) {
231 my @pr_case_columns = ();
232 for( my $i = 1; $i <= scalar @
{$model -> problems
}; $i++ ) {
233 if ( not $case_columns =~ /^\d/ ) {
235 my ( $junk, $column_position ) = $model ->
236 _get_option_val_pos
( name
=> $case_columns,
237 record_name
=> 'input',
238 problem_numbers
=> [$i] );
239 # We assume that there is no duplicate column names
240 push ( @pr_case_columns, $column_position->[0][0] );
243 push ( @pr_case_columns, $case_columns );
246 push( @mo_case_columns, \
@pr_case_columns );
248 $self -> {'case_columns'} = \
@mo_case_columns;
249 } elsif ( ref( $case_columns ) eq 'ARRAY' ) {
251 unless ( ref( \
$case_columns -> [0] ) eq 'SCALAR' or
252 ( ref( $case_columns -> [0] ) eq 'ARRAY' and
253 scalar @
{$case_columns -> [0]} > 0 ) ) {
254 debug
-> die( message
=> "Second dimension of attribute case_columns is ",
255 "defined as a ",ref( $case_columns -> [0]),
256 "and is neither a scalar or a non-zero size array." );
257 } elsif ( ref(\
$case_columns -> [0]) eq 'SCALAR' ) {
259 my @mo_case_columns = ();
260 foreach my $model ( @
{$self -> {'models'}} ) {
261 my @pr_case_columns = ();
262 for( my $i = 1; $i <= scalar @
{$model -> problems
}; $i++ ) {
263 if ( not $case_columns =~ /^\d/ ) {
265 my ( $junk, $column_position ) = $model ->
266 _get_option_val_pos
( name
=> $case_columns->[$i-1],
267 record_name
=> 'input',
268 problem_numbers
=> [$i] );
269 # We assume that there is no duplicate column names
270 push ( @pr_case_columns, $column_position->[0][0] );
273 push ( @pr_case_columns, $case_columns -> [$i-1] );
276 push( @mo_case_columns, \
@pr_case_columns );
278 $self -> {'case_columns'} = \
@mo_case_columns;
279 } elsif ( ref($case_columns -> [0]) eq 'ARRAY' ) {
281 my @mo_case_columns = ();
282 for( my $j = 0; $j < scalar @
{$self -> {'models'}}; $j++ ) {
283 my @pr_case_columns = ();
284 for( my $i = 1; $i <= scalar @
{$self -> {'models'} -> problems
}; $i++ ) {
285 if ( not $case_columns =~ /^\d/ ) {
287 my ( $junk, $column_position ) = $self -> {'models'} -> [$j] ->
288 _get_option_val_pos
( name
=> $case_columns->[$j]->[$i-1],
289 record_name
=> 'input',
290 problem_numbers
=> [$i] );
291 # We assume that there is no duplicate column names
292 push ( @pr_case_columns, $column_position->[0][0] );
295 push ( @pr_case_columns, $case_columns -> [$j]->[$i-1] );
298 push( @mo_case_columns, \
@pr_case_columns );
300 $self -> {'case_columns'} = \
@mo_case_columns;
304 debug
-> die( message
=> "case_columns is not specified for model $model_number" );
308 end modelfit_pre_fork_setup
310 # }}} modelfit_pre_fork_setup
316 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
317 $self -> {'threads'} -> [1]:$self -> {'threads'};
318 $self -> general_setup
( model_number
=> $model_number,
319 class => 'tool::modelfit',
320 subm_threads
=> $subm_threads );
330 if (ref( $self -> {'threads'} ) eq 'ARRAY') {
331 @subm_threads = @
{$self -> {'threads'}};
332 unshift(@subm_threads);
334 @subm_threads = ($self -> {'threads'});
336 $self -> general_setup
( model_number
=> $model_number,
337 class => 'tool::llp',
338 subm_threads
=> \
@subm_threads );
347 # Sub tool threads can be given as scalar or reference to an array?
348 my $subm_threads = $parm{'subm_threads'};
349 my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
350 $self -> {'threads'} -> [0]:$self -> {'threads'};
351 # case_column names are matched in the model, not the data!
353 my $model = $self -> {'models'} -> [$model_number-1];
354 my @bins = @
{$self -> {'bins'} -> [$model_number-1]};
356 # Check which models that hasn't been run and run them
357 # This will be performed each step but will only result in running
358 # models at the first step, if at all.
360 # If more than one process is used, there is a VERY high risk of interaction
361 # between the processes when creating directories for model fits. Therefore
362 # the {'directory'} attribute is given explicitly below.
365 unless ( $model -> is_run
) {
367 # ----------------------- Run original run ------------------------------
372 if ( defined $self -> {'subtool_arguments'} ) {
373 %subargs = %{$self -> {'subtool_arguments'}};
375 if( $self -> {'nonparametric_etas'} or
376 $self -> {'nonparametric_marginals'} ) {
377 $model -> add_nonparametric_code
;
380 my $orig_fit = tool
::modelfit
->
381 new
( reference_object
=> $self,
383 threads
=> $subm_threads,
384 directory
=> $self -> {'directory'}.'/orig_modelfit_dir'.$model_number,
386 parent_threads
=> $own_threads,
387 parent_tool_id
=> $self -> {'tool_id'},
389 raw_results
=> undef,
390 prepared_models
=> undef,
394 # $Data::Dumper::Maxdepth=1;
395 # die Dumper $orig_fit;
396 # ( models => [$model],
398 # run_on_lsf => $self -> {'run_on_lsf'},
399 # lsf_queue => $self -> {'lsf_queue'},
400 # lsf_options => $self -> {'lsf_options'},
401 # lsf_job_name => $self -> {'lsf_job_name'},
402 # lsf_project_name => $self -> {'lsf_project_name'},
404 # run_on_nordugrid => $self -> {'run_on_nordugrid'},
405 # cpu_time => $self -> {'cpu_time'},
407 # parent_tool_id => $self -> {'tool_id'},
410 # nm_version => $self -> {'nm_version'},
411 # picky => $self -> {'picky'},
412 # compress => $self -> {'compress'},
413 # threads => $subm_threads,
414 # retries => $self -> {'retries'},
415 # base_directory => $self -> {'directory'},
416 # directory => $self -> {'directory'}.'/orig_modelfit_dir'.$model_number,
417 # parent_threads => $own_threads );
419 ui
-> print( category
=> 'cdd',
420 message
=> 'Executing base model.' );
429 # ------------------------ Print a log-header -----------------------------
433 open( LOG
, ">>".$self -> {'logfile'}[$model_number-1] );
434 my $ui_text = sprintf("%-5s",'RUN').','.sprintf("%20s",'FILENAME ').',';
435 print LOG
sprintf("%-5s",'RUN'),',',sprintf("%20s",'FILENAME '),',';
436 foreach my $param ( 'ofv', 'theta', 'omega', 'sigma' ) {
437 my $accessor = $param eq 'ofv' ?
$param : $param.'s';
438 my $orig_ests = $model -> outputs
-> [0] -> $accessor;
440 if( defined $orig_ests ){
441 for ( my $j = 0; $j < scalar @
{$orig_ests}; $j++ ) {
442 if ( $param eq 'ofv' ) {
443 my $label = uc($param)."_".($j+1);
444 $ui_text = $ui_text.sprintf("%12s",$label).',';
445 print LOG
sprintf("%12s",$label),',';
447 # Loop the parameter numbers (skip sub problem level)
448 if( defined $orig_ests -> [$j] and
449 defined $orig_ests -> [$j][0] ){
450 for ( my $num = 1; $num <= scalar @
{$orig_ests -> [$j][0]}; $num++ ) {
451 my $label = uc($param).$num."_".($j+1);
452 $ui_text = $ui_text.sprintf("%12s",$label).',';
453 print LOG
sprintf("%12s",$label),',';
465 # ------------------------ Log original run -------------------------------
469 open( LOG
, ">>".$self -> {'logfile'}[$model_number-1] );
470 $ui_text = sprintf("%5s",'0').','.sprintf("%20s",$model -> filename
).',';
471 print LOG
sprintf("%5s",'0'),',',sprintf("%20s",$model -> filename
),',';
472 foreach my $param ( 'ofv', 'theta', 'omega', 'sigma' ) {
473 my $accessor = $param eq 'ofv' ?
$param : $param.'s';
474 my $orig_ests = $model -> outputs
-> [0] -> $accessor;
476 if( defined $orig_ests ) {
477 for ( my $j = 0; $j < scalar @
{$orig_ests}; $j++ ) {
478 if ( $param eq 'ofv' ) {
479 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0]).',';
480 print LOG
sprintf("%12f",$orig_ests -> [$j][0]),',';
482 # Loop the parameter numbers (skip sub problem level)
483 if( defined $orig_ests -> [$j] and
484 defined $orig_ests -> [$j][0] ){
485 for ( my $num = 0; $num < scalar @
{$orig_ests -> [$j][0]}; $num++ ) {
486 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0][$num]).',';
487 print LOG
sprintf("%12f",$orig_ests -> [$j][0][$num]),',';
499 # --------------------- Initiate some variables ----------------------------
503 my $case_column = $self -> {'case_columns'} -> [$model_number-1] -> [0];
505 # Case-deletion Diagnostics will only work for models with one problem.
506 my $orig_data = $model -> datas
-> [0];
508 if ( not defined $orig_data ) {
509 debug
-> die( message
=> "No data file to resample from found in ".$model -> full_name
);
512 my @problems = @
{$model -> problems
};
515 my ( @skipped_ids, @skipped_keys, @skipped_values );
517 my %orig_factors = %{$orig_data -> factors
( column
=> $case_column )};
518 my $maxbins = scalar keys %orig_factors;
519 my $pr_bins = ( defined $bins[0] and $bins[0] <= $maxbins ) ?
522 my $done = ( -e
$self -> {'directory'}."/m$model_number/done" ) ?
1 : 0;
524 my ( @seed, $new_datas, $skip_ids, $skip_keys, $skip_values, $remainders );
530 # -------------- Create new case-deleted data sets ----------------------
534 # Keep the new sample data objects i memory and write them to disk later
535 # with a proper name.
537 ( $new_datas, $skip_ids, $skip_keys, $skip_values, $remainders )
538 = $orig_data -> case_deletion
( case_column
=> $case_column,
539 selection
=> $self -> {'selection_method'},
542 directory
=> $self -> {'directory'}.'/m'.$model_number );
543 my $ndatas = scalar @
{$new_datas};
544 open( DB
, ">".$self -> {'directory'}."m$model_number/done.database.models" );
546 for ( my $j = 1; $j <= $ndatas; $j++ ) {
547 my @names = ( 'cdd_'.$j, 'rem_'.$j );
548 my @datasets = ( $new_datas -> [$j-1], $remainders -> [$j-1] );
549 foreach my $i ( 0, 1 ) {
550 my $dataset = $datasets[$i];
551 my ($data_dir, $data_file) = OSspecific
::absolute_path
( $self -> {'directory'}.'/m'.$model_number,
553 # $dataset -> directory( $data_dir );
554 # $dataset -> filename( $data_file );
556 my $newmodel = $model -> copy
( filename
=> $data_dir.$names[$i].'.mod',
559 $newmodel -> ignore_missing_files
(1);
560 $newmodel -> datafiles
( new_names
=> [$names[$i].'.dta'] );
561 $newmodel -> outputfile
( $data_dir.$names[$i].".lst" );
562 $newmodel -> datas
-> [0] = $dataset;
564 # set MAXEVAL=0. Again, CDD will only work for one $PROBLEM
565 $newmodel -> maxeval
( new_values
=> [[0]] );
568 if( $self -> {'nonparametric_etas'} or
569 $self -> {'nonparametric_marginals'} ) {
570 $newmodel -> add_nonparametric_code
;
574 push( @
{$new_models[$i]}, $newmodel );
575 $model_ids[$j*$i+$j-1] = $newmodel -> register_in_database
( force
=> 1 ) ;
576 print DB
$model_ids[$j*$i+$j-1],"\n";
577 $self -> {'prepared_model_ids'}[($model_number-1)*$ndatas*2+$j*$i+$j-1] =
578 $model_ids[$j*$i+$j-1];
582 # my $new_data = $new_datas -> [$j-1];
583 # my ($data_dir, $data_file) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
584 # 'cdd_'.$j.'.dta' );
585 # $new_data -> directory( $data_dir );
586 # $new_data -> filename( $data_file );
587 # $new_data -> flush;
588 # my $newmodel = $model -> copy( filename => $data_dir."cdd_$j.mod",
591 # $newmodel -> ignore_missing_files(1);
592 # $newmodel -> datafiles( new_names => ["cdd_$j.dta"] );
593 # $newmodel -> outputfile( $data_dir."cdd_$j.lst" );
594 # $newmodel -> datas -> [0] = $new_data;
595 # $newmodel -> _write;
596 # push( @new_models, $newmodel );
597 # $model_ids[$j-1] = $newmodel -> register_in_database( force => 1 );
598 # print DB $model_ids[$j-1],"\n";
599 # $self -> {'prepared_model_ids'}[($model_number-1)*$ndatas+$j-1] =
603 if ( not -e
$self -> {'directory'}."m$model_number/done.database.tool_models" ) {
604 $self -> register_tm_relation
( model_ids
=> \
@model_ids,
605 prepared_models
=> 1 );
606 open( DB
, ">".$self -> {'directory'}."m$model_number/done.database.tool_models" );
610 # Create a checkpoint. Log the samples and individuals.
611 open( DONE
, ">".$self -> {'directory'}."/m$model_number/done" ) ;
612 print DONE
"Sampling from ",$orig_data -> filename
, " performed\n";
613 print DONE
"$pr_bins bins\n";
614 print DONE
"Skipped individuals:\n";
615 for( my $k = 0; $k < scalar @
{$skip_ids}; $k++ ) {
616 print DONE
join(',',@
{$skip_ids -> [$k]}),"\n";
618 print DONE
"Skipped keys:\n";
619 for( my $k = 0; $k < scalar @
{$skip_keys}; $k++ ) {
620 print DONE
join(',',@
{$skip_keys -> [$k]}),"\n";
622 print DONE
"Skipped values:\n";
623 for( my $k = 0; $k < scalar @
{$skip_values}; $k++ ) {
624 print DONE
join(',',@
{$skip_values -> [$k]}),"\n";
626 @seed = random_get_seed
;
627 print DONE
"seed: @seed\n";
630 open( SKIP
, ">".$self -> {'directory'}."skipped_individuals".$model_number.".csv" ) ;
631 for( my $k = 0; $k < scalar @
{$skip_ids}; $k++ ) {
632 print SKIP
join(',',@
{$skip_ids -> [$k]}),"\n";
635 open( SKIP
, ">".$self -> {'directory'}."skipped_keys".$model_number.".csv" ) ;
636 for( my $k = 0; $k < scalar @
{$skip_keys}; $k++ ) {
637 print SKIP
join(',',@
{$skip_keys -> [$k]}),"\n";
645 # --------- Recreate the datasets and models from a checkpoint ----------
649 ui
-> print( category
=> 'cdd',
650 message
=> "Recreating models from a previous run" );
651 open( DONE
, $self -> {'directory'}."/m$model_number/done" );
654 my ( $junk, $junk, $stored_filename, $junk ) = split(' ',$rows[0],4);
655 my ( $stored_bins, $junk ) = split(' ',$rows[1],2);
656 my ( @stored_ids, @stored_keys, @stored_values );
657 for ( my $k = 3; $k < 3+$stored_bins; $k++ ) {
659 my @bin_ids = split(',', $rows[$k] );
660 push( @stored_ids, \
@bin_ids );
662 for ( my $k = 4+$stored_bins; $k < 4+2*$stored_bins; $k++ ) {
664 my @bin_keys = split(',', $rows[$k] );
665 push( @stored_keys, \
@bin_keys );
667 for ( my $k = 5+2*$stored_bins; $k < 5+3*$stored_bins; $k++ ) {
669 my @bin_values = split(',', $rows[$k] );
670 push( @stored_values, \
@bin_values );
672 @seed = split(' ',$rows[5+3*$stored_bins]);
673 $skip_ids = \
@stored_ids;
674 $skip_keys = \
@stored_keys;
675 $skip_values = \
@stored_values;
676 shift( @seed ); # get rid of 'seed'-word
678 # Reinitiate the model objects
680 my $reg_relations = 0;
681 if ( -e
$self -> {'directory'}."m$model_number/done.database.models" ) {
682 open( DB
, $self -> {'directory'}."m$model_number/done.database.models" );
686 open( DB
, ">".$self -> {'directory'}."m$model_number/done.database.models" );
689 for ( my $j = 1; $j <= $stored_bins; $j++ ) {
690 my @names = ( 'cdd_'.$j, 'rem_'.$j );
691 foreach my $i ( 0, 1 ) {
692 my ($model_dir, $filename) = OSspecific
::absolute_path
( $self -> {'directory'}.'/m'.
695 my ($out_dir, $outfilename) = OSspecific
::absolute_path
( $self -> {'directory'}.'/m'.
698 my $new_mod = model
->
699 new
( directory
=> $model_dir,
700 filename
=> $filename,
701 outputfile
=> $outfilename,
702 extra_files
=> $model -> extra_files
,
704 ignore_missing_files
=> 1,
706 model_id
=> $model_ids[$j*$i+$j-1] );
707 push( @
{$new_models[$i]}, $new_mod );
709 if( not defined $model_ids[$j*$i+$j-1] ) {
710 $model_ids[$j*$i+$j-1] = $new_mod -> register_in_database
;
711 print DB
$model_ids[$j-1],"\n";
713 $self -> {'prepared_model_ids'}[($model_number-1)*
714 $stored_bins+$j*$i+$j-1] =
715 $model_ids[$j*$i+$j-1];
718 # my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
720 # 'cdd_'.$j.'.mod' );
721 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
723 # '/cdd_'.$j.'.lst' );
724 # my $new_mod = model ->
725 # new( directory => $model_dir,
726 # filename => $filename,
727 # outputfile => $outfilename,
728 # extra_files => $model -> extra_files,
730 # ignore_missing_files => 1,
732 # model_id => $model_ids[$j-1] );
733 # push( @new_models, $new_mod );
735 # if( not defined $model_ids[$j-1] ) {
736 # $model_ids[$j-1] = $new_mod -> register_in_database;
737 # print DB $model_ids[$j-1],"\n";
739 # $self -> {'prepared_model_ids'}[($model_number-1)*$stored_bins+$j-1] =
741 my $nl = $j == $stored_bins ?
"" : "\r";
742 ui
-> print( category
=> 'cdd',
743 message
=> ui
-> status_bar
( sofar
=> $j+1,
744 goal
=> $stored_bins+1 ).$nl,
749 if ( not -e
$self -> {'directory'}."m$model_number/done.database.tool_models" ) {
750 $self -> register_tm_relation
( model_ids
=> \
@model_ids,
751 prepared_models
=> 1 ) if ( $reg_relations );
752 open( DB
, ">".$self -> {'directory'}."m$model_number/done.database.tool_models" );
756 ui
-> print( category
=> 'cdd',
757 message
=> " ... done." );
758 random_set_seed
( @seed );
759 ui
-> print( category
=> 'cdd',
760 message
=> "Using $stored_bins previously sampled case-deletion sets ".
761 "from $stored_filename" )
762 unless $self -> {'parent_threads'} > 1;
767 push( @skipped_ids, $skip_ids );
768 push( @skipped_keys, $skip_keys );
769 push( @skipped_values, $skip_values );
771 # Use only the first half (the case-deleted) of the data sets.
772 $self -> {'prepared_models'}[$model_number-1]{'own'} = $new_models[0];
774 # The remainders are left until the analyze step
775 $self -> {'prediction_models'}[$model_number-1]{'own'} = $new_models[1];
777 # --------------------- Create the sub tools ------------------------------
782 $subdir =~ s/tool:://;
783 my @subtools = @
{$self -> {'subtools'}};
786 if ( defined $self -> {'subtool_arguments'} ) {
787 %subargs = %{$self -> {'subtool_arguments'}};
789 push( @
{$self -> {'tools'}},
791 new
( reference_object
=> $self,
792 models
=> $new_models[0],
793 threads
=> $subm_threads,
794 directory
=> $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
795 _raw_results_callback
=> $self ->
796 _modelfit_raw_results_callback
( model_number
=> $model_number ),
797 subtools
=> \
@subtools,
798 parent_threads
=> $own_threads,
799 parent_tool_id
=> $self -> {'tool_id'},
801 raw_results
=> undef,
802 prepared_models
=> undef,
807 # ( models => $new_models[0],
808 # nm_version => $self -> {'nm_version'},
809 # run_on_lsf => $self -> {'run_on_lsf'},
810 # lsf_queue => $self -> {'lsf_queue'},
811 # lsf_options => $self -> {'lsf_options'},
812 # lsf_job_name => $self -> {'lsf_job_name'},
813 # lsf_project_name => $self -> {'lsf_project_name'},
815 # run_on_nordugrid => $self -> {'run_on_nordugrid'},
816 # cpu_time => $self -> {'cpu_time'},
818 # parent_tool_id => $self -> {'tool_id'},
820 # picky => $self -> {'picky'},
821 # compress => $self -> {'compress'},
822 # threads => $subm_threads,
823 # retries => $self -> {'retries'},
824 # base_directory => $self -> {'directory'},
825 # directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
826 # _raw_results_callback => $self ->
827 # _modelfit_raw_results_callback( model_number => $model_number ),
828 # subtools => \@subtools,
829 # parent_threads => $own_threads,
834 open( SKIP
, ">".$self -> {'directory'}."skipped_values".$model_number.".csv" ) ;
835 for( my $k = 0; $k < scalar @
{$skip_values}; $k++ ) {
836 print SKIP
join(',',@
{$skip_values -> [$k]}),"\n";
851 # $proc_results{'skipped.section.identifiers'} = $self -> {'skipped.section.identifiers'};
852 # $proc_results{'skipped_ids'} = $self -> {'skipped_ids'};
853 # $proc_results{'skipped_keys'} = $self -> {'skipped_keys'};
854 # $proc_results{'skipped_values'} = $self -> {'skipped_values'};
856 push( @
{$self -> {'results'} -> {'own'}}, \
%proc_results );
862 # {{{ _modelfit_raw_results_callback
864 start _modelfit_raw_results_callback
867 # Use the cdd's raw_results file.
868 # The cdd and the bootstrap's callback methods are identical
869 # in the beginning, then the cdd callback adds cook.scores and
873 OSspecific
::absolute_path
( $self -> {'directory'},
874 $self -> {'raw_results_file'}[$model_number-1] );
875 my $orig_mod = $self -> {'models'}[$model_number-1];
876 my $xv = $self -> {'cross_validate'};
878 my $modelfit = shift;
880 my %max_hash = %{$mh_ref};
881 $modelfit -> raw_results_file
( $dir.$file );
882 if( $cross_validation_set ) {
883 $modelfit -> raw_results_append
( 1 ) if( not $self -> {'bca_mode'} ); # overwrite when doing a jackknife
884 my ( @new_header, %param_names );
885 foreach my $row ( @
{$modelfit -> {'raw_results'}} ) {
886 unshift( @
{$row}, 'cross_validation' );
888 $modelfit -> {'raw_results_header'} = undef; # May be a bit silly to do...
893 my ($raw_results_row,$nonp_rows) = $self -> create_raw_results_rows
( max_hash
=> $mh_ref,
895 raw_line_structure
=> \
%dummy );
897 $orig_mod -> outputs
-> [0] -> flush
;
899 unshift( @
{$modelfit -> {'raw_results'}}, @
{$raw_results_row} );
901 &{$self -> {'_raw_results_callback'}}( $self, $modelfit )
902 if ( defined $self -> {'_raw_results_callback'} );
904 if( $xv and not $self -> {'bca_mode'} ) {
905 foreach my $row ( @
{$modelfit -> {'raw_results'}} ) {
906 unshift( @
{$row}, 'cdd' );
908 unshift( @
{$modelfit -> {'raw_results_header'}}, 'method' );
914 end _modelfit_raw_results_callback
916 # }}} _modelfit_raw_results_callback
918 # {{{ modelfit_analyze
920 start modelfit_analyze
922 # Only valid for one problem and one sub problem.
924 if ( $self -> {'cross_validate'} ) {
926 # --- Evaluate the models on the remainder data sets ----
931 $i < scalar @
{$self -> {'prediction_models'}[$model_number-1]{'own'}};
933 $self -> {'prediction_models'}[$model_number-1]{'own'}[$i] ->
934 update_inits
( from_model
=> $self ->
935 {'prepared_models'}[$model_number-1]{'own'}[$i]);
936 $self -> {'prediction_models'}[$model_number-1]{'own'}[$i] ->
937 remove_records
( type
=> 'covariance' );
938 $self -> {'prediction_models'}[$model_number-1]{'own'}[$i] -> _write
;
941 OSspecific
::absolute_path
( $self -> {'directory'},
942 $self -> {'raw_results_file'}[$model_number-1] );
943 my $xv_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
944 $self -> {'threads'} -> [1]:$self -> {'threads'};
945 my $mod_eval = tool
::modelfit
->
946 new
( reference_object
=> $self,
948 {'prediction_models'}[$model_number-1]{'own'},
949 base_directory
=> $self -> {'directory'},
950 directory
=> $self -> {'directory'}.
951 'evaluation_dir'.$model_number,
952 threads
=> $xv_threads,
953 _raw_results_callback
=> $self ->
954 _modelfit_raw_results_callback
( model_number
=> $model_number,
955 cross_validation_set
=> 1 ),
956 parent_tool_id
=> $self -> {'tool_id'},
958 raw_results
=> undef,
959 prepared_models
=> undef,
962 $Data::Dumper
::Maxdepth
= 2;
963 print "Running xv runs\n";
970 # ------------ Cook-scores and Covariance-Ratios ----------
972 # {{{ Cook-scores and Covariance-Ratios
974 # ---------------------- Cook-score -----------------------
978 my ( @cook_score, @cov_ratio );
979 if( $self -> models
-> [$model_number-1] ->
980 outputs
-> [0] -> covariance_step_successful
-> [0][0]) {
982 ui
-> print( category
=> 'cdd',
983 message
=> "Calculating the cook-scores" );
987 my $output_harvest = $self ->
988 harvest_output
( accessors
=> ['est_thetas','est_omegas','est_sigmas'],
989 search_output
=> 1 );
991 # Calculate the changes
992 foreach my $param ( 'est_thetas', 'est_omegas', 'est_sigmas' ) {
993 my $orig_est = $self -> models
-> [$model_number-1] -> outputs
-> [0] -> $param;
995 my $est = defined $output_harvest -> {$param} ?
996 $output_harvest -> {$param} -> [$model_number-1]{'own'} : [];
998 for ( my $i = 0; $i < scalar @
{$est}; $i++ ) {
999 if( defined $est->[$i][0][0] ) {
1000 my $n_par = scalar @
{$est->[$i][0][0]};
1001 # Since we use the _estimated_ parameters there should be no undefined elements
1002 # Not sure what to do if we find one... /Lasse
1003 for( my $j = 0; $j < $n_par; $j++ ) {
1004 push( @
{$changes[$i]}, $orig_est->[0][0][$j]-$est->[$i][0][0][$j]);
1011 my $inverse_covariance_matrix = $self -> models
-> [$model_number-1] ->
1012 outputs
-> [0] -> inverse_covariance_matrix
-> [0][0];
1014 # Equation: sqrt((orig_est-est(i))'*inv_cov_matrix*(orig_est-est(i)))
1015 for ( my $i = 0; $i <= $#changes; $i++ ) {
1016 if( defined $changes[$i] and
1017 scalar @
{$changes[$i]} > 0 and
1018 defined $inverse_covariance_matrix ) {
1019 my $vec_changes = Math
::MatrixReal
->
1020 new_from_cols
( [$changes[$i]] );
1021 $cook_score[$i] = $inverse_covariance_matrix*$vec_changes;
1022 $cook_score[$i] = ~$vec_changes*$cook_score[$i];
1024 $cook_score[$i] = undef;
1027 my $nl = $i == $#changes ?
"" : "\r";
1028 ui
-> print( category
=> 'cdd',
1029 message
=> ui
-> status_bar
( sofar
=> $i+1,
1030 goal
=> $#changes+1 ).$nl,
1035 # Calculate the square root
1036 # The matrixreal object holds a 1x1 matrix in the first position of its array.
1037 for ( my $i = 0; $i <= $#cook_score; $i++ ) {
1038 if( defined $cook_score[$i] and
1039 $cook_score[$i][0][0][0] >= 0 ) {
1040 $cook_score[$i] = sqrt($cook_score[$i][0][0][0]);
1042 open( LOG
, ">>".$self -> {'logfile'}[$model_number-1] );
1044 if( defined $cook_score[$i] ) {
1045 $mes = "Negative squared cook-score ",$cook_score[$i][0][0][0];
1047 $mes = "Undefined squared cook-score";
1049 $mes .= "; can't take the square root.\n",
1050 "The cook-score for model $model_number and cdd bin $i was set to zero\n";
1053 debug
-> warn( level
=> 1,
1056 $cook_score[$i] = 0;
1057 # $cook_score[$i] = undef;
1062 $self -> {'cook_scores'} = \
@cook_score;
1064 ui
-> print( category
=> 'cdd',
1065 message
=> " ... done." );
1069 # ------------------- Covariance Ratio --------------------
1071 # {{{ Covariance Ratio
1073 if( $self -> models
-> [$model_number-1] ->
1074 outputs
-> [0] -> covariance_step_successful
-> [0][0]) {
1076 # {{{ sub clear dots
1080 my @matrix = @
{$m_ref};
1081 # get rid of '........'
1083 foreach ( @matrix ) {
1084 push( @clear, $_ ) unless ( $_ eq '.........' );
1086 # print Dumper \@clear;
1092 # {{{ sub make square
1096 my @matrix = @
{$m_ref};
1097 # Make the matrix square:
1098 my $elements = scalar @matrix; # = M*(M+1)/2
1099 my $M = -0.5 + sqrt( 0.25 + 2 * $elements );
1101 for ( my $m = 1; $m <= $M; $m++ ) {
1102 for ( my $n = 1; $n <= $m; $n++ ) {
1103 push( @
{$square[$m-1]}, $matrix[($m-1)*$m/2 + $n - 1] );
1104 unless ( $m == $n ) {
1105 push( @
{$square[$n-1]}, $matrix[($m-1)*$m/2 + $n - 1] );
1114 ui
-> print( category
=> 'cdd',
1115 message
=> "Calculating the covariance-ratios" );
1117 # Equation: sqrt(det(cov_matrix(i))/det(cov_matrix(orig)))
1118 my $cov_linear = $self -> models
-> [$model_number-1] ->
1119 outputs
-> [0] -> raw_covmatrix
-> [0][0];
1121 if( defined $cov_linear ) {
1122 my $orig_cov = Math
::MatrixReal
->
1123 new_from_cols
( make_square
( clear_dots
( $cov_linear ) ) );
1124 $orig_det = $orig_cov -> det
();
1126 # AUTOLOAD: raw_covmatrix
1128 my $output_harvest = $self -> harvest_output
( accessors
=> ['raw_covmatrix'],
1129 search_output
=> 1 );
1131 my $est_cov = defined $output_harvest -> {'raw_covmatrix'} ?
$output_harvest -> {'raw_covmatrix'} -> [$model_number-1]{'own'} : [];
1133 my $mods = scalar @
{$est_cov};
1134 for ( my $i = 0; $i < scalar @
{$est_cov}; $i++ ) {
1135 if ( $orig_det != 0 and defined $est_cov->[$i][0][0] ) {
1136 my $cov = Math
::MatrixReal
->
1137 new_from_cols
( make_square
( clear_dots
( $est_cov->[$i][0][0] ) ) );
1138 my $ratio = $cov -> det
() / $orig_det;
1140 push( @cov_ratio, sqrt( $ratio ) );
1142 open( LOG
, ">>".$self -> {'logfile'}[$model_number-1] );
1143 print LOG
"Negative covariance ratio ",$ratio,
1144 "; can't take the square root.\n",
1145 "The covariance ratio for model $model_number and cdd bin $i was set to one (1)\n";
1146 # "The covariance ratio for model $model_number and cdd bin $i was set to undef\n";
1148 push( @cov_ratio, 1 );
1149 # push( @cov_ratio, undef );
1152 open( LOG
, ">>".$self -> {'logfile'}[$model_number-1] );
1153 print LOG
"The determinant of the cov-matrix of the original run was zero\n",
1154 "or the determinant of cdd bin $i was undefined\n",
1155 "The covariance ratio for model $model_number and cdd bin $i was set to one (1)\n";
1156 # "The covariance ratio for model $model_number and cdd bin $i was set to undef\n";
1158 push( @cov_ratio, 1 );
1159 # push( @cov_ratio, undef );
1162 my $nl = $i == $mods-1 ?
"" : "\r";
1163 ui
-> print( category
=> 'cdd',
1164 message
=> ui
-> status_bar
( sofar
=> $i+1,
1165 goal
=> $mods ).$nl,
1171 $self -> {'covariance_ratios'} = \
@cov_ratio;
1173 ui
-> print( category
=> 'cdd',
1174 message
=> " ... done." );
1176 # }}} Covariance Ratio
1178 # - Perform a PCA on the cook-score:covariance-ratio data --
1182 my ( @outside_n_sd, $eig_ref, $eig_vec_ref, $proj_ref, $std_ref );
1184 if( $self -> models
-> [$model_number-1] ->
1185 outputs
-> [0] -> covariance_step_successful
-> [0][0]) {
1187 ( $eig_ref, $eig_vec_ref, $proj_ref, $std_ref ) =
1188 # die Dumper [\@cook_score,\@cov_ratio];
1189 $self -> pca
( data_matrix
=> [\
@cook_score,\
@cov_ratio] );
1190 my @projections = @
{$proj_ref};
1191 my @standard_deviation = @
{$std_ref};
1195 # ---- Mark the runs with CS-CR outside N standard deviations of the PCA ----
1199 for( my $i = 0; $i <= $#projections; $i++ ) {
1200 my $vector_length = 0;
1201 for( my $j = 0; $j <= 1; $j++ ) {
1202 $vector_length += $projections[$i][$j]**2;
1204 $vector_length = sqrt( $vector_length );
1206 for( my $j = 0; $j <= 1; $j++ ) {
1207 $n_sd += (($projections[$i][$j]/$vector_length)*$standard_deviation[$j])**2;
1209 $n_sd = ( $self -> {'outside_n_sd_check'} * sqrt( $n_sd ) );
1210 $outside_n_sd[$i] = $vector_length > $n_sd ?
1 : 0;
1214 $self -> {'outside_n_sd'} = \
@outside_n_sd;
1218 my %covariance_return_section;
1219 $covariance_return_section{'name'} = 'Diagnostics';
1220 $covariance_return_section{'labels'} = [[],['cook.scores','covariance.ratios','outside.n.sd']];
1223 for( my $i = 0; $i <= $#cov_ratio; $i ++ ){
1224 push( @res_array , [$cook_score[$i],$cov_ratio[$i],$outside_n_sd[$i]] );
1227 $covariance_return_section{'values'} = \
@res_array;
1229 push( @
{$self -> {'results'}[$model_number-1]{'own'}},\
%covariance_return_section );
1233 # --------- Relative estimate change and Jackknife bias ----------
1235 # {{{ Relative change of the parameter estimates
1237 my $output_harvest = $self -> harvest_output
( accessors
=> ['ofv', 'thetas', 'omegas', 'sigmas','sethetas', 'seomegas', 'sesigmas'],
1238 search_output
=> 1 );
1241 $return_section{'name'} = 'relative.changes';
1242 $return_section{'labels'} = [[],[]];
1244 my %bias_return_section;
1245 $bias_return_section{'name'} = 'Jackknife.bias.estimate';
1246 $bias_return_section{'labels'} = [['bias','relative.bias'],[]];
1248 my ( @bias, @bias_num, @b_orig, @rel_bias );
1250 foreach my $param ( 'thetas', 'omegas', 'sigmas' ) {
1251 my $orig_est = $self -> {'models'} -> [$model_number-1] -> outputs
-> [0] -> $param;
1252 if ( defined $orig_est->[0][0] ) {
1253 for ( my $j = 0; $j < scalar @
{$orig_est->[0][0]}; $j++ ) {
1254 $b_orig[$k++] = $orig_est->[0][0][$j];
1261 for ( my $i = 0; $i < scalar @
{$output_harvest -> {'ofv'} -> [$model_number-1]{'own'}}; $i++ ) {
1264 foreach my $param ( 'ofv', 'thetas', 'omegas', 'sigmas',
1265 'sethetas', 'seomegas', 'sesigmas',) {
1267 my $orig_est = $self -> {'models'} -> [$model_number-1] -> outputs
-> [0] -> $param;
1268 my $est = defined $output_harvest -> {$param} ?
$output_harvest -> {$param} -> [$model_number-1]{'own'} : [];
1270 if ( $param eq 'ofv' ) {
1271 if ( defined $orig_est->[0][0] and $orig_est->[0][0] != 0 ) {
1272 push( @values, ($est->[$i][0][0]-$orig_est->[0][0])/$orig_est->[0][0]*100 );
1274 push( @values, 'INF' );
1277 push( @
{$return_section{'labels'} -> [1]}, $param );
1281 if( defined $est->[$i][0][0] ){
1282 for ( my $j = 0; $j < scalar @
{$est->[$i][0][0]}; $j++ ) {
1283 if ( defined $orig_est->[0][0][$j] and $orig_est->[0][0][$j] != 0 ) {
1284 push( @values, ($est->[$i][0][0][$j]-$orig_est->[0][0][$j])/$orig_est->[0][0][$j]*100);
1285 if( substr($param,0,2) ne 'se' ) {
1286 $bias[$k] += $est->[$i][0][0][$j];
1290 push( @values, 'INF' );
1291 if( substr($param,0,2) ne 'se' ) {
1295 if( substr($param,0,2) eq 'se' ) {
1296 push( @
{$return_section{'labels'} -> [1]}, uc(substr($param,0,4)).($j+1) );
1298 my $lbl = uc(substr($param,0,2)).($j+1);
1299 push( @
{$bias_return_section{'labels'} -> [1]}, $lbl );
1300 push( @
{$return_section{'labels'} -> [1]}, $lbl );
1308 push( @rel_ests, \
@values );
1313 for( my $i = 0; $i <= $#bias_num; $i++ ) {
1314 # The [0] is there to handle the fact that thw bins
1315 # attribute is restructured per model and problem
1316 next if( not defined $bias[$i] );
1317 $bias[$i] = ($self -> {'bins'}[$model_number-1][0]-1)*
1318 ($bias[$i]/$bias_num[$i]-$b_orig[$i]);
1319 if( defined $b_orig[$i] and $b_orig[$i] != 0 ) {
1320 $rel_bias[$i] = $bias[$i]/$b_orig[$i]*100;
1322 $rel_bias[$i] = undef;
1325 $bias_return_section{'values'} = [\
@bias,\
@rel_bias];
1327 $return_section{'values'} = \
@rel_ests ;
1328 push( @
{$self -> {'results'}[$model_number-1]{'own'}},\
%return_section );
1329 push( @
{$self -> {'results'}[$model_number-1]{'own'}},\
%bias_return_section );
1331 # }}} Relative change of the parameter estimates
1334 $self -> update_raw_results
(model_number
=> $model_number);
1336 # ------------- Register the results in a Database ----------------
1338 if( not -e
$self -> {'directory'}."m$model_number/done.database.results" ) {
1339 open( DB
, ">".$self -> {'directory'}."m$model_number/done.database.results" );
1340 my ( $start_id, $last_id ) = $self ->
1341 register_mfit_results
( model_number
=> $model_number,
1342 cook_score
=> \
@cook_score,
1343 covariance_ratio
=> \
@cov_ratio,
1344 projections
=> $proj_ref,
1345 outside_n_sd
=> \
@outside_n_sd );
1346 print DB
"$start_id-$last_id\n";
1350 # experimental: to save memory
1351 $self -> {'prepared_models'}[$model_number-1]{'own'} = undef;
1352 if( defined $PsN::config
-> {'_'} -> {'R'} and
1353 -e
$PsN::lib_dir
. '/R-scripts/cdd.R' ) {
1354 # copy the cdd R-script
1355 cp
( $PsN::lib_dir
. '/R-scripts/cdd.R', $self -> {'directory'} );
1356 # Execute the script
1357 system( $PsN::config
-> {'_'} -> {'R'}." CMD BATCH cdd.R" );
1360 end modelfit_analyze
1362 # }}} modelfit_analyze
1367 my $D = Math
::MatrixReal
->
1368 new_from_rows
( \
@data_matrix );
1369 my @n_dim = @
{$data_matrix[0]};
1370 my @d_dim = @data_matrix;
1371 my $n = scalar @n_dim;
1372 my $d = scalar @d_dim;
1373 map( $_=(1/$n), @n_dim );
1374 my $frac_vec_n = Math
::MatrixReal
->
1375 new_from_cols
( [\
@n_dim] );
1376 map( $_=1, @n_dim );
1377 map( $_=1, @d_dim );
1378 my $one_vec_n = Math
::MatrixReal
->
1379 new_from_cols
( [\
@n_dim] );
1380 my $one_vec_d = Math
::MatrixReal
->
1381 new_from_cols
( [\
@d_dim] );
1382 my $one_vec_d_n = $one_vec_d * ~$one_vec_n;
1383 my $M = $D*$frac_vec_n;
1384 my $M_matrix = $M * ~$one_vec_n;
1386 # Calculate the mean-subtracted data
1387 my $S = $D-$M_matrix;
1389 # compue the empirical covariance matrix
1392 # compute the eigenvalues and vectors
1393 my ($l, $V) = $C -> sym_diagonalize
();
1395 # Project the original data on the eigenvectors
1399 # l, V and projections are all MatrixReal objects.
1400 # We need to return the normal perl equivalents.
1401 @eigenvalues = @
{$l->[0]};
1402 @eigenvectors = @
{$V->[0]};
1403 @std = @
{$self -> std
( data_matrix
=> $P -> [0] )};
1404 # Make $P a n * d matrix
1406 @projections = @
{$P->[0]};
1414 my ( @sum, @pow_2_sum );
1415 if ( defined $data_matrix[0] ) {
1416 my $n = scalar @
{$data_matrix[0]};
1417 for( my $i = 0; $i <= $#data_matrix; $i++ ) {
1418 for( my $j = 0; $j < $n; $j++ ) {
1419 $sum[$i] = $sum[$i]+$data_matrix[$i][$j];
1420 $pow_2_sum[$i] += $data_matrix[$i][$j]*$data_matrix[$i][$j];
1422 $std[$i] = sqrt( ( $n*$pow_2_sum[$i] - $sum[$i]*$sum[$i] ) / ($n*$n) );
1429 # {{{ modelfit_post_fork_analyze
1431 start modelfit_post_fork_analyze
1433 # my @modelfit_results = @{ $self -> {'results'} -> {'subtools'} };
1434 my @modelfit_results = @
{ $self -> {'results'} };
1436 ui
-> print( category
=> 'cdd',
1437 message
=> "Soon done" );
1439 end modelfit_post_fork_analyze
1441 # }}} modelfit_post_fork_analyze
1443 # {{{ modelfit_results
1445 start modelfit_results
1447 my @orig_models = @
{$self -> {'models'}};
1448 my @orig_raw_results = ();
1449 foreach my $orig_model ( @orig_models ) {
1450 my $orig_output = $orig_model -> outputs
-> [0];
1451 push( @orig_raw_results, $orig_output -> $accessor );
1453 # my @models = @{$self -> {'prepared_models'}};
1454 my @outputs = @
{$self -> {'results'}};
1456 my @raw_results = ();
1458 foreach my $mod ( @outputs ) {
1460 foreach my $output ( @
{$mod -> {'subset_outputs'}} ) {
1461 push( @raw_inner, $output -> $accessor );
1463 push( @raw_results, \
@raw_inner );
1465 if ( $format eq 'relative' or $format eq 'relative_percent' ) {
1467 for ( my $i = 0; $i <= $#orig_raw_results; $i++ ) {
1468 print "Model\t$i\n";
1469 my @rel_subset = ();
1470 for ( my $i2 = 0; $i2 < scalar @
{$raw_results[$i]}; $i2++ ) {
1471 print "Subset Model\t$i2\n";
1473 for ( my $j = 0; $j < scalar @
{$orig_raw_results[$i]}; $j++ ) {
1474 print "Problem\t$j\n";
1475 if( ref( $orig_raw_results[$i][$j] ) eq 'ARRAY' ) {
1476 my @rel_subprob = ();
1477 for ( my $k = 0; $k < scalar @
{$orig_raw_results[$i][$j]}; $k++ ) {
1478 print "Subprob\t$k\n";
1479 if( ref( $orig_raw_results[$i][$j][$k] ) eq 'ARRAY' ) {
1480 my @rel_instance = ();
1481 for ( my $l = 0; $l < scalar @
{$orig_raw_results[$i][$j][$k]}; $l++ ) {
1482 print "Instance\t$l\n";
1483 my $orig = $orig_raw_results[$i][$j][$k][$l];
1484 my $res = $raw_results[$i][$i2][$j][$k][$l];
1485 if( defined $orig and ! $orig == 0 ) {
1486 print "ORIGINAL $orig\n";
1487 print "SUBSET $res\n";
1488 print "RELATIVE ",$res/$orig,"\n";
1489 if ( $format eq 'relative_percent' ) {
1490 push( @rel_instance, ($res/$orig-1)*100 );
1492 push( @rel_instance, $res/$orig );
1495 push( @rel_instance, 'NA' );
1497 push( @rel_subprob,\
@rel_instance );
1499 } elsif( ref( $orig_raw_results[$i][$j][$k] ) eq 'SCALAR' ) {
1500 print "One instance per problem\n";
1501 my $orig = $orig_raw_results[$i][$j][$k];
1502 my $res = $raw_results[$i][$i2][$j][$k];
1503 if( defined $orig and ! $orig == 0 ) {
1504 print "ORIGINAL $orig\n";
1505 print "SUBSET $res\n";
1506 print "RELATIVE ",$res/$orig,"\n";
1507 if ( $format eq 'relative_percent' ) {
1508 push( @rel_subprob, ($res/$orig-1)*100 );
1510 push( @rel_subprob, $res/$orig );
1513 push( @rel_subprob, 'NA' );
1516 print "WARNING: tool::cdd -> modelfit_results: neither\n\t".
1517 "array or scalar reference found at layer 4 in result data\n\t".
1518 "structure (found ",ref( $orig_raw_results[$i][$j][$k] ),")\n";
1521 push( @rel_prob, \
@rel_subprob );
1522 } elsif( ref( $orig_raw_results[$i][$j] ) eq 'SCALAR' ) {
1523 print "One instance per problem\n";
1524 my $orig = $orig_raw_results[$i][$j];
1525 my $res = $raw_results[$i][$i2][$j];
1526 if( defined $orig and ! $orig == 0 ) {
1527 print "ORIGINAL $orig\n";
1528 print "SUBSET $res\n";
1529 print "RELATIVE ",$res/$orig,"\n";
1530 if ( $format eq 'relative_percent' ) {
1531 push( @rel_prob, ($res/$orig-1)*100 );
1533 push( @rel_prob, $res/$orig );
1536 push( @rel_prob, 'NA' );
1539 print "WARNING: tool::cdd -> modelfit_results: neither\n\t".
1540 "array or scalar reference found at layer 3 in result data\n\t".
1541 "structure (found ",ref( $orig_raw_results[$i][$j] ),")\n";
1544 push( @rel_subset, \
@rel_prob );
1546 push( @results, \
@rel_subset );
1549 @results = @raw_results;
1552 end modelfit_results
1554 # }}} modelfit_results
1556 # {{{ relative_estimates
1558 start relative_estimates
1560 my $accessor = $parameter.'s';
1561 my @params = $self -> $accessor;
1563 # print "Parameter: $parameter\n";
1564 # sub process_inner_results {
1565 # my $res_ref = shift;
1568 # foreach my $res ( @{$res_ref} ) {
1569 # if ( ref ( $res ) eq 'ARRAY' ) {
1570 # process_inner_results( $res, $pad );
1572 # print "RELEST $pad\t$res\n";
1576 # process_inner_results( \@params, 0 );
1578 my @orig_params = $self -> $accessor( original_models
=> 1 );
1579 # [?][model][prob][subp][#]
1580 # print "ORIG TH1: ",$orig_params[0][0][0][0][0],"\n";
1581 for ( my $i = 0; $i < scalar @params; $i++ ) {
1584 for ( my $j = 0; $j < scalar @
{$params[$i]}; $j++ ) {
1585 # Loop over data sets
1587 for ( my $k = 1; $k < scalar @
{$params[$i]->[$j]}; $k++ ) {
1588 # Loop over problems (sort of, at least)
1590 for ( my $l = 0; $l < scalar @
{$params[$i]->[$j]->[$k]}; $l++ ) {
1591 # Loop over sub problems (sort of, at least)
1593 for ( my $m = 0; $m < scalar @
{$params[$i]->[$j]->[$k]->[$l]}; $m++ ) {
1594 # Loop over the params
1596 for ( my $n = 0; $n < scalar @
{$params[$i][$j][$k][$l][$m]}; $n++ ) {
1597 my $orig = $orig_params[$i][$j][$l][$m][$n];
1598 # my $orig = $params[$i][$j][0][$l][$m][$n];
1599 my $prep = $params[$i][$j][$k][$l][$m][$n];
1601 if ( $percentage ) {
1602 push( @par, ($prep/$orig*100)-100 );
1604 push( @par, $prep/$orig );
1607 push( @par, $PsN::out_miss_data
);
1610 push( @sub, \
@par );
1612 push( @prob, \
@sub );
1614 push( @prep, \
@prob );
1616 push( @mod, \
@prep );
1618 push( @relative_estimates, \
@mod );
1621 end relative_estimates
1623 # }}} relative_estimates
1625 # {{{ relative_confidence_limits
1627 start relative_confidence_limits
1629 my @params = @
{$self -> confidence_limits
( class => 'tool::llp',
1630 parameter
=> $parameter )};
1631 for ( my $i = 0; $i < scalar @params; $i++ ) {
1634 for ( my $j = 1; $j < scalar @
{$params[$i]}; $j++ ) {
1635 # Loop over data sets
1637 my @nums = sort {$a <=> $b} keys %{$params[$i][$j]};
1638 foreach my $num ( @nums ) {
1640 for ( my $n = 0; $n < scalar @
{$params[$i][$j]->{$num}}; $n++ ) {
1642 for ( my $o = 0; $o < scalar @
{$params[$i][$j]->{$num}->[$n]}; $o++ ) {
1643 # OBS: the [0] in the $j position points at the first element i.e
1644 # the results of the tool run on the original model
1645 my $orig = $params[$i][0]->{$num}->[$n][$o];
1646 my $prep = $params[$i][$j]->{$num}->[$n][$o];
1647 print "ORIG: $orig, PREP: $prep\n";
1649 if ( $percentage ) {
1650 push( @side_lim, ($prep/$orig*100)-100 );
1652 push( @side_lim, $prep/$orig );
1655 push( @side_lim, $PsN::out_miss_data
);
1658 push( @prob_lim, \
@side_lim );
1660 $num_lim{$num} = \
@prob_lim;
1662 push( @mod, \
%num_lim );
1664 push( @relative_limits, \
@mod );
1667 end relative_confidence_limits
1669 # }}} relative_confidence_limits
1671 # {{{ llp_print_results
1673 start llp_print_results
1675 # NOTE! Only valid for models with one problem and one sub problem!
1677 my %relative_values;
1678 $relative_values{'theta_cis'} = $self ->
1679 relative_confidence_limits
( parameter
=> 'theta',
1681 $relative_values{'omega_cis'} = $self ->
1682 relative_confidence_limits
( parameter
=> 'omega',
1684 $relative_values{'sigma_cis'} = $self ->
1685 relative_confidence_limits
( parameter
=> 'sigma',
1687 $relative_values{'thetas'} = $self ->
1688 relative_estimates
( parameter
=> 'theta',
1690 $relative_values{'omegas'} = $self ->
1691 relative_estimates
( parameter
=> 'omega',
1693 $relative_values{'sigmas'} = $self ->
1694 relative_estimates
( parameter
=> 'sigma',
1696 $relative_values{'sethetas'} = $self ->
1697 relative_estimates
( parameter
=> 'setheta',
1699 $relative_values{'seomegas'} = $self ->
1700 relative_estimates
( parameter
=> 'seomega',
1702 $relative_values{'sesigmas'} = $self ->
1703 relative_estimates
( parameter
=> 'sesigma',
1707 $prep_values{'theta_cis'} = $self -> confidence_limits
( class => 'tool::llp',
1708 parameter
=> 'theta' );;
1709 $prep_values{'omega_cis'} = $self -> confidence_limits
( class => 'tool::llp',
1710 parameter
=> 'omega' );;
1711 $prep_values{'sigma_cis'} = $self -> confidence_limits
( class => 'tool::llp',
1712 parameter
=> 'sigma' );;
1713 $prep_values{'thetas'} = $self -> thetas
;
1714 $prep_values{'omegas'} = $self -> omegas
;
1715 $prep_values{'sigmas'} = $self -> sigmas
;
1716 $prep_values{'sethetas'} = $self -> sethetas
;
1717 $prep_values{'seomegas'} = $self -> seomegas
;
1718 $prep_values{'sesigmas'} = $self -> sesigmas
;
1722 open( RES
, ">".$self -> {'results_file'} );
1723 print RES
"Case-Deletion Diagnostic with Log-Likelihood Profiling\n";
1725 for ( my $i = 0; $i < scalar @
{$relative_values{'theta_cis'}}; $i++ ) {
1726 print RES
"MODEL:;",$i+1,"\n";
1727 foreach my $param ( 'theta_cis', 'omega_cis', 'sigma_cis' ) {
1728 print RES
"\n",uc($param),":\n";
1729 # Loop over data sets
1731 my @nums = sort {$a <=> $b} keys %{$relative_values{$param}[$i][0]};
1733 foreach my $num ( @nums ) {
1734 printf RES
"$num;;;;";
1737 foreach my $num ( @nums ) {
1738 for ( my $o = 0; $o < scalar @
{$relative_values{$param}[$i][0]->{$num}[0]}; $o++ ) {
1739 my $side = $o == 0 ?
'lower' : 'upper';
1740 printf RES
";$side;rel diff (%)";
1745 foreach my $num ( @nums ) {
1746 for ( my $o = 0; $o < scalar @
{$relative_values{$param}[$i][0]->{$num}[0]}; $o++ ) {
1747 printf RES
";%7.5f",$prep_values{$param}[$i][0]->{$num}[0][$o];
1752 for ( my $j = 0; $j < scalar @
{$relative_values{$param}[$i]}; $j++ ) {
1753 printf RES
"%-7d",$j+1;
1754 my @nums = sort {$a <=> $b} keys %{$relative_values{$param}[$i][$j]};
1755 foreach my $num ( @nums ) {
1756 for ( my $n = 0; $n < scalar @
{$relative_values{$param}[$i][$j]->{$num}}; $n++ ) {
1757 for ( my $o = 0; $o < scalar @
{$relative_values{$param}[$i][$j]->{$num}[$n]}; $o++ ) {
1758 my $rel = $relative_values{$param}[$i][$j]->{$num}[$n][$o];
1759 my $prep = $prep_values{$param}[$i][$j+1]->{$num}[$n][$o];
1760 printf RES
";%7.5f",$prep;
1761 printf RES
";%3.0f",$rel;
1769 # Skipped id's, keys and values:
1773 # sub process_inner_results {
1774 # my $res_ref = shift;
1777 # foreach my $res ( @{$res_ref} ) {
1778 # if ( ref ( $res ) eq 'ARRAY' ) {
1779 # print "$pad ARRAY size ",scalar @{$res},"\n";
1780 # process_inner_results( $res, $pad );
1781 # } elsif ( ref ( $res ) eq 'HASH' ) {
1782 # print "$pad HASH keys ",keys %{$res},"\n";
1784 # print "$pad OTHER\n";
1788 # process_inner_results( $self -> {'results'}, 0 );
1791 foreach my $own ( @
{$self -> {'results'} -> {'own'}} ) {
1792 # print "REF1: ",ref($mod),"\n";
1793 # foreach my $prob ( @{$mod} ) {
1794 # print "REF2: ",ref($prob),"\n";
1795 # foreach my $subprob ( @{$prob} ) {
1796 # print "REF3: ",ref($subprob),"\n";
1797 # print "KEYS: ",keys %{$subprob},"\n";
1800 print RES
"MODEL $i\n";
1801 foreach my $param ( 'skipped_ids', 'skipped_keys', 'skipped_values' ) {
1802 print RES
uc($param),"\n";
1804 foreach my $prep ( @
{$own -> {$param}} ) {
1805 print RES
"Bin no;$j;";
1806 foreach my $val ( @
{$prep} ) {
1817 # for ( my $j = 0; $j < scalar @{$relative_values{'thetas_cis'}->[$i]}; $j++ ) {
1818 # print RES "MODEL:;",$j+1,"\n";
1819 # # Loop over problems (sort of, at least)
1820 # for ( my $l = 0; $l < scalar @{$relative_values{'thetas_cis'}->[$i]->[$j]->[0]}; $l++ ) {
1821 # # Loop over sub problems (sort of, at least)
1822 # for ( my $m = 0; $m < scalar @{$relative_values{'thetas_cis'}->[$i]->[$j]->[0]->[$l]}; $m++ ) {
1823 # # foreach my $param ( 'thetas', 'omegas', 'sigmas',
1824 # # 'sethetas', 'seomegas', 'sesigmas' ) {
1825 # foreach my $param ( 'theta_cis' ) {
1826 # print RES uc($param),":\n\n";
1827 # # Here one could add printing of parameter names, i.e. 'CL V...' or 'TH1 TH2...'
1828 # # Loop over data sets
1829 # for ( my $k = 0; $k < scalar @{$relative_values{$param}->[$i]->[$j]}; $k++ ) {
1830 # printf RES "%-7d",$k+1;
1831 # for ( my $n = 0; $n < scalar @{$relative_values{$param}[$i][$j][$k][$l][$m]}; $n++ ) {
1832 # for ( my $o = 0; $o < scalar @{$relative_values{$param}[$i][$j][$k][$l][$m][$n]}; $o++ ) {
1833 # my $rel = $relative_values{$param}->[$i][$j][$k][$l][$m][$n];
1834 # my $prep = $prep_values{$param}->[$j][$k][$l][$m][$n];
1835 # printf RES ";%7.5f",$prep;
1836 # printf RES ";%3.0f",$rel;
1852 end llp_print_results
1854 # }}} llp_print_results
1856 # {{{ general_print_results
1858 start general_print_results
1860 unless ( defined $self -> {'results'} ) {
1861 print "WARNING: cdd->general_print_results: no return values defined;\n"
1862 ."cannot print results\n";
1865 my %results = %{$self -> {'results'}};
1867 open( RES
, ">".$self -> {'results_file'} );
1868 print RES
"Case-Deletion Diagnostic\n";
1872 unless ( defined $results{'own'} ) {
1873 print "WARNING: cdd->general_print_results: no own return values defined;\n"
1874 ."cannot print results\n";
1879 my @own_results = @
{$results{'own'}};
1880 foreach my $result_unit ( @own_results ) {
1881 print RES
$result_unit -> {'name'},"\n";
1882 print RES
$result_unit -> {'comment'},"\n";
1883 my @values = defined $result_unit{'values'} ? @
{$result_unit{'values'}} : ();
1884 my @labels = defined $result_unit{'labels'} ? @
{$result_unit{'labels'}} : ();
1886 for ( my $i = 0; $i <= $#values; $i++ ) {
1888 for ( my $j = 0; $j <= $#values[$i]; $j++ ) {
1889 # Loop the sub problems
1890 for ( my $k = 0; $k <= $#values[$i][$j]; $k++ ) {
1891 # Loop the first result dimension
1892 for (my $l = 0; $l <= $#values[$i][$j][$k]; $l++ ) {
1893 # Loop the second result dimension
1894 for ( my $m = 0; $m <= $#values[$i][$j][$k][$l]; $m++ ) {
1895 # Loop the second result dimension
1896 for ( my $m = 0; $m <= $#values[$i][$j]$k][$l]; $m++ ) {
1897 foreach my $model_res ( @values ) {
1898 foreach my $prob_res ( @
{$model_unit} ) {
1899 foreach my $subprob_res ( @
{$prob_unit} ) {
1900 foreach my $subprob_res ( @
{$prob_unit} ) {
1904 end general_print_results
1906 # }}} general_print_results
1908 # {{{ modelfit_print_results
1910 start modelfit_print_results
1912 my @parameters = ( 'theta', 'omega', 'sigma',
1913 'setheta', 'seomega', 'sesigma' );
1914 my %relative_values;
1917 foreach my $parameter ( @parameters ) {
1918 my $accessor = $parameter.'s';
1919 $relative_values{$parameter} = $self ->
1920 relative_estimates
( parameter
=> $parameter,
1922 $prep_values{$parameter} = $self -> $accessor;
1923 $orig_values{$parameter} = $self -> $accessor( original_models
=> 1 );
1925 # sub process_results {
1926 # my $res_ref = shift;
1929 # foreach my $res ( @{$res_ref} ) {
1930 # if ( ref ( $res ) eq 'ARRAY' ) {
1931 # process_results( $res, $pad );
1933 # print "final $pad\t$res\n";
1937 # process_results( $relative_values{'thetas'}, 0 );
1941 print "Calling nthetas\n";
1942 $nparam{'thetas'} = $self -> nthetas
( original_models
=> 1 );
1943 print "Done that\n";
1944 open( RES
, ">".$self -> {'results_file'} );
1945 print RES
"Case-Deletion Diagnostic\n";
1946 # Date information to be added
1947 # print RES "Date:;;;;",$self -> {'date'},"\n";
1949 print RES
"Modelfiles:";
1950 foreach my $model ( @
{$self -> {'models'}} ) {
1951 print RES
";;;;",$model -> filename
,"\n";
1954 # Based on columns and number of datasets might better be shown if split by
1955 # model and problem:
1956 print RES
"Based on columns:";
1957 my $vars = $self -> {'case_columns'};
1958 if ( ref( $vars ) eq 'ARRAY' ) {
1959 foreach my $vars2 ( @
{$vars} ) {
1960 if ( ref( $vars2 ) eq 'ARRAY' ) {
1961 foreach my $vars3 ( @
{$vars2} ) {
1962 print RES
";;;;$vars3\n";
1965 print RES
";;;;$vars2\n";
1969 print RES
";;;;$vars\n";
1972 print RES
"Number of data sets:";
1973 my $bins = $self -> {'bins'};
1974 if ( ref( $bins ) eq 'ARRAY' ) {
1975 foreach my $bins2 ( @
{$bins} ) {
1976 if ( ref( $bins2 ) eq 'ARRAY' ) {
1977 foreach my $bins3 ( @
{$bins2} ) {
1978 print RES
";;;;$bins3\n";
1981 print RES
";;;;$bins2\n";
1985 print RES
";;;;$bins\n";
1988 print RES
"Selection:;;;;",$self-> {'selection_method'},"\n";
1989 if ( defined $self -> {'seed'} ) {
1990 print RES
"Seed number:;;;;",$self -> {'seed'},"\n";
1992 print RES
"No seed number specified\n";
1995 # TODO: $skip_keys etc from data->case_deletion must be transferred back to
1996 # the main process and appropriate attributes set.
1998 print RES
"\n\n\n\n";
2000 # process_results( $relative_values{'thetas'}->[0]->[0]->[0], 0 );
2002 for ( my $i = 0; $i < scalar @
{$relative_values{'theta'}}; $i++ ) {
2004 for ( my $j = 0; $j < scalar @
{$relative_values{'theta'}->[$i]}; $j++ ) {
2005 print RES
"MODEL:;",$j+1,"\n";
2006 # Loop over problems (sort of, at least)
2007 for ( my $l = 0; $l < scalar @
{$relative_values{'theta'}->[$i]->[$j]->[0]}; $l++ ) {
2008 # Loop over sub problems (sort of, at least)
2009 for ( my $m = 0; $m < scalar @
{$relative_values{'theta'}->[$i]->[$j]->[0]->[$l]}; $m++ ) {
2010 foreach my $param ( @parameters ) {
2011 print RES
uc($param),":\n\n";
2012 # Here one could add printing of parameter names, i.e. 'CL V...' or 'TH1 TH2...'
2013 # Loop over data sets
2015 for ( my $n = 1; $n <=scalar @
{$relative_values{$param}[$i][$j][0][$l][$m]}; $n++ ) {
2016 printf RES
"estimate;rel diff (%);";
2019 for ( my $n = 1; $n <= scalar @
{$relative_values{$param}[$i][$j][0][$l][$m]}; $n++ ) {
2024 for ( my $n = 0; $n < scalar @
{$relative_values{$param}[$i][$j][0][$l][$m]}; $n++ ) {
2025 printf RES
";%7.5f",$orig_values{$param}[$j][$l][$m][$n];
2029 for ( my $k = 0; $k < scalar @
{$relative_values{$param}->[$i]->[$j]}; $k++ ) {
2030 printf RES
"%-7d",$k+1;
2031 for ( my $n = 0; $n < scalar @
{$relative_values{$param}[$i][$j][$k][$l][$m]}; $n++ ) {
2032 my $rel = $relative_values{$param}->[$i][$j][$k][$l][$m][$n];
2033 my $prep = $prep_values{$param}->[$j][$k+1][$l][$m][$n];
2034 printf RES
";%7.5f",$prep;
2035 printf RES
";%3.0f",$rel;
2048 # sub process_inner_results {
2049 # my $res_ref = shift;
2052 # foreach my $res ( @{$res_ref} ) {
2053 # if ( ref ( $res ) eq 'ARRAY' ) {
2054 # print "$pad ARRAY size ",scalar @{$res},"\n";
2055 # process_inner_results( $res, $pad );
2056 # } elsif ( ref ( $res ) eq 'HASH' ) {
2057 # print "$pad HASH keys ",keys %{$res},"\n";
2059 # print "$pad OTHER\n";
2063 # process_inner_results( $self -> {'results'}, 0 );
2065 # Skipped id's, keys and values:
2068 foreach my $own ( @
{$self -> {'results'} -> {'own'}} ) {
2069 print RES
"MODEL $i\n";
2070 foreach my $param ( 'skipped_ids', 'skipped_keys', 'skipped_values' ) {
2071 print RES
uc($param),"\n";
2073 foreach my $prep ( @
{$own -> {$param}} ) {
2074 print RES
"Bin no;$j;";
2075 foreach my $val ( @
{$prep} ) {
2087 end modelfit_print_results
2089 # }}} modelfit_print_results
2091 # {{{ prepare_results
2093 start prepare_results
2095 if ( not defined $self -> {'raw_results'} ) {
2096 $self -> read_raw_results
();
2106 my ($outside_n_sd, $cook, $covrat );
2107 for( my $i = 0; $i < scalar @
{$self -> {'raw_results_header'} -> [0]} ; $i++) {
2108 if( $self -> {'raw_results_header'} -> [0][$i] eq 'outside.n.sd' ){
2111 if( $self -> {'raw_results_header'} -> [0][$i] eq 'cook.scores' ){
2114 if( $self -> {'raw_results_header'} -> [0][$i] eq 'cov.ratios' ){
2121 my $outcome = shift;
2123 my $l = (7 - length( $outcome ))/2;
2124 my $text = sprintf( "%-66s%2s%7s%-5s", $name, '[ ', $outcome. ' ' x
$l, ' ]' );
2125 print $text, "\n\n";
2126 print $file $text if defined $file;
2129 my ( @num, @cs, @cr ) ;
2130 for( my $model_i = 0; $model_i <= $#{$self -> {'raw_results'}}; $model_i++ ){
2131 for( my $prep_mod_i = 0; $prep_mod_i <= $#{$self -> {'raw_results'} -> [$model_i]}; $prep_mod_i++ ){
2132 my $test_val = $self -> {'raw_results'} -> [$model_i] -> [$prep_mod_i] -> [$outside_n_sd];
2133 if( defined($test_val) and $test_val == 1 ){
2134 push( @num, ($prep_mod_i) ); # prep_mod_i includes the original run as 0
2135 push( @cs, $self -> {'raw_results'} -> [$model_i] -> [$prep_mod_i] -> [$cook] );
2136 push( @cr, $self -> {'raw_results'} -> [$model_i] -> [$prep_mod_i] -> [$covrat] );
2142 acknowledge
( 'No outlying case-deleted data set was found', 'OK' );
2145 acknowledge
( (scalar @num).' case-deleted data sets were marked as outliers', 'WARNING' );
2146 printf( "\t%-20s%20s\t%20s\n", 'Data set', 'cook-score', 'covariance-ratio' );
2147 for( my $i = 0; $i <= $#num; $i++ ) {
2148 printf( "\t%-20s%14s%3.3f\t%14s%3.3f\n", $num[$i], ' ', $cs[$i], ' ', $cr[$i] );
2157 # {{{ update_raw_results
2159 start update_raw_results
2165 # foreach my $section( @{$self -> {'results'}[0] -> {'own'}} ){
2166 # if( $section -> {'name'} eq 'cook.scores' ){
2167 # $cook_scores = $section -> {'values'};
2169 # if( $section -> {'name'} eq 'cov.ratio' ){
2170 # $cov_ratios = $section -> {'values'};
2172 # if( $section -> {'name'} eq 'outside.n.sd' ){
2173 # $outside_n_sd = $section -> {'values'};
2178 OSspecific
::absolute_path
( $self -> {'directory'},
2179 $self -> {'raw_results_file'}[$model_number-1] );
2180 open( RRES
, $dir.$file );
2183 open( RRES
, '>',$dir.$file );
2186 print RRES
$rres[0] . ",cook.scores,cov.ratios,outside.n.sd\n";
2188 print RRES
$rres[1] . ",0,1,0\n";
2191 for( my $i = 2 ; $i <= $#rres; $i ++ ) {
2192 my $row_str = $rres[$i];
2194 $row_str .= sprintf( ",%.5f,%.5f,%1f\n" ,
2195 $self -> {'cook_scores'} -> [$i-2],
2196 $self -> {'covariance_ratios'} -> [$i-2],
2197 $self -> {'outside_n_sd'} -> [$i-2] );
2198 print RRES
$row_str;
2202 end update_raw_results
2204 # }}} update_raw_results
2206 # {{{ create_R_scripts
2207 start create_R_scripts
2209 unless( -e
$PsN::lib_dir
. '/R-scripts/cdd.R' ){
2210 'debug' -> die( message
=> 'CDD R-script are not installed, no matlab scripts will be generated.' );
2213 cp
( $PsN::lib_dir
. '/R-scripts/cdd.R', $self -> {'directory'} );
2215 end create_R_scripts