bin/nonp_bootstrap_v2

   1 #!/usr/local/bin/perl
   2
   3 # Only for Development
   4 #use FindBin qw($Bin);
   5 #use lib "$Bin/../PsN_2_2_0_patches_serial/lib";
   6
   7 # Don't edit the line below, it must look exactly like this.
   8 # Everything above this line will be replaced #
   9
  10 # Perl includes #
  11 use strict;
  12 use Getopt::Long;
  13 # External modules #
  14 use Math::Random;
  15 # PsN includes #
  16 use PsN;
  17 use tool::modelfit;
  18 use tool::bootstrap;
  19 use model;
  20 use debug;
  21 use common_options;
  22 use ui;
  23 use File::Copy 'cp';
  24 use table_file;
  25
  26 ################################################################################
  27 #
  28 # This is the nonp_bootstrap version 2. First the command line options
  29 # are parsed using Getopt::Long. PsN "common options" are used to fine
  30 # tune the running.
  31 #
  32 # Prerequisites for the nonp_bootstrap is a that the first $TABLE
  33 # record prints the wished value and its probability per individual.
  34 #
  35 ################################################################################
  36
  37 # {{{ options handling
  38
  39 my $cmd_line = $0 . " " . join( " ", @ARGV );
  40
  41 my %options;
  42
  43 my %optional_options = ('theta_names:s' => '',
  44                         'samples:i' => '',
  45                         'result_file' => '',
  46                         "bca"=>'',
  47                         "mplots"=>'',
  48                         "rplots"=>'',
  49                         "sample_size:i"=>'',
  50                         "stratify_on:s"=>'',
  51                         "skip_minimization_terminated"=>'',
  52                         "skip_covariance_step_terminated"=>'',
  53                         "skip_with_covstep_warnings"=>'',
  54                         "skip_estimate_near_boundary"=>'',
  55                         "minimization_successful_limit:f"=>'',
  56                         "covariance_step_successful_limit:f"=>'',
  57                         "covariance_step_warnings_limit:f"=>'',
  58                         "estimate_near_boundary_limit:f"=>'',
  59                         "estimate_near_boundary_limit:f"=>'',
  60                         "se_confidence_intervals_level:f"=>'',
  61                         "percentile_confidence_intervals_level:f"=>'',
  62                         "bca_confidence_intervals_level:f"=>'',
  63                         "se_confidence_intervals_check:f"=>'',
  64                         "percentile_confidence_intervals_check:f"=>'',
  65                         "bca_confidence_intervals_check:f"=>'',
  66                         "large_bias_limit:f"=>''
  67                         );
  68
  69 my $res = GetOptions( \%options,
  70                       @common_options::get_opt_strings,
  71                       keys(%optional_options) );
  72
  73 exit unless $res;
  74
  75 common_options::set_globals( \%options, 'nonp_bootstrap' );
  76 common_options::get_defaults( \%options, 'nonp_bootstrap' );
  77 common_options::sanity_checks( \%options, 'nonp_bootstrap' );
  78 common_options::online_help('nonp_bootstrap', \%options, undef,{},{});
  79
  80 unless( $options{'samples'} ){
  81   $options{'samples'} = 200;
  82 }
  83
  84 unless( $options{'result_file'} ){
  85   $options{'result_file'} = 'result.csv';
  86 }
  87
  88 if ( scalar( @ARGV ) < 1 ) {
  89   print "A model file must be specified. Use 'nonp_bootstrap -h' for help.\n";
  90   exit;
  91 }
  92
  93 my $eval_string = common_options::model_parameters(\%options);
  94
  95 # }}}
  96
  97 ################################################################################
  98 #
  99 # The original model file is parsed and bootstrapped.
 100 #
 101 ################################################################################
 102
 103 # {{{ orig+bootstrap
 104
 105 my $model;
 106
 107 $model = model -> new ( eval( $eval_string ),
 108                         extra_output                => ['fort.80'],
 109                         filename                    => $ARGV[0],
 110                         ignore_missing_output_files => 1 );
 111
 112 my( $base_dir, $dummy) = OSspecific::absolute_path( '', '' );
 113
 114 my $directory ;
 115
 116 if ( defined $options{'directory'} ) {
 117   my $dummy;
 118   ( $directory, $dummy ) = OSspecific::absolute_path( $options{'directory'}, '');
 119 } else {
 120   $directory = OSspecific::unique_path( 'nonp_bootstrap_dir' ,
 121                                         $base_dir );
 122 }
 123
 124 unless( -d $directory ){
 125   mkdir( $directory );
 126
 127   open(CMD, ">", $directory . "/command.txt");
 128   print CMD $cmd_line, "\n";
 129   close(CMD);
 130 }
 131
 132 chdir( $directory );
 133
 134 my $type = defined $options{'bca'} ? 'bca' : undef;
 135
 136 my $bs = tool::bootstrap ->
 137     new ( eval( $common_options::parameters ),
 138           directory => 'bootstrap',
 139           models                                     => [ $model ],
 140           samples                                    => $options{'samples'},
 141           subjects                           => $options{'sample_size'},
 142           type                               => $type,
 143           stratify_on                          => $options{'stratify_on'},
 144           skip_minimization_terminated         => $options{'skip_minimization_terminated'},
 145           skip_covariance_step_terminated      => $options{'skip_covariance_step_terminated'},
 146           skip_with_covstep_warnings           => $options{'skip_with_covstep_warnings'},
 147           skip_estimate_near_boundary          => $options{'skip_estimate_near_boundary'},
 148           minimization_successful_limit        => $options{'minimization_successful_limit'},
 149           covariance_step_successful_limit     => $options{'covariance_step_successful_limit'},
 150           covariance_step_warnings_limit       => $options{'covariance_step_warnings_limit'},
 151           estimate_near_boundary_limit         => $options{'estimate_near_boundary_limit'},
 152           estimate_near_boundary_limit         => $options{'estimate_near_boundary_limit'},
 153           se_confidence_intervals_level         => $options{'se_confidence_intervals_level'},
 154           percentile_confidence_intervals_level => $options{'percentile_confidence_intervals_level'},
 155           bca_confidence_intervals_level        => $options{'bca_confidence_intervals_level'},
 156           se_confidence_intervals_check         => $options{'se_confidence_intervals_check'},
 157           percentile_confidence_intervals_check => $options{'percentile_confidence_intervals_check'},
 158           bca_confidence_intervals_check        => $options{'bca_confidence_intervals_check'},
 159           large_bias_limit                     => $options{'large_bias_limit'} );
 160
 161 $bs -> run;
 162 $bs -> prepare_results();
 163 $bs -> print_results();
 164
 165 # }}}
 166
 167 ################################################################################
 168 #
 169 # Collect the bootstrap results and create $samples number of new
 170 # models. Each new model should have MAXE=0, $NONP and initial
 171 # estimates fixed also it should use the original dataset. This is to
 172 # get table files with P in them.
 173 #
 174 ################################################################################
 175
 176 # {{{ read bootstrap + create new models
 177
 178 my @bs_models = @{$bs -> {'prepared_models'}[0]{'own'}};
 179 my @new_bs_models;
 180
 181 my $id = 1;
 182
 183 my $nr_of_old_thetas = $model -> nthetas;
 184
 185 foreach my $bs_model ( @bs_models ) {
 186   my $copy = $bs_model -> copy( filename => 'bs_model_'.$id++.'.mod' );
 187
 188   $copy -> update_inits( from_output => $bs_model -> outputs -> [0] );
 189
 190   $copy -> datafiles( new_names => $model -> datafiles(absolute_path => 1) );
 191
 192   $copy -> maxeval( new_values => [[0]] );
 193
 194   my $nomegas = $copy -> nomegas -> [0];
 195
 196   $copy -> add_records( type => 'nonparametric',
 197                         record_strings => ['MARGINALS UNCONDITIONAL'] );
 198
 199   $copy -> set_option( record_name => 'abbreviated',
 200                        option_name => 'COMRES',
 201                        option_value => ($nomegas+1) );
 202
 203   my $table_string;
 204
 205   my $code_block;
 206   my $use_pk = 0;
 207
 208   if( defined $copy -> pk ){
 209     $code_block = $copy -> pk;
 210     $use_pk = 1;
 211   } elsif( defined $copy -> pred ){
 212     $code_block = $copy -> pred;
 213   } else {
 214     die "Error: No \$PK or \$PRED found... \n";
 215   }
 216
 217   push( @{$code_block},'   JD = DEN_');
 218
 219   for( 1..$nomegas ){
 220     $table_string = 'ETA' . ($nomegas - $_ + 1) ." ". $table_string . " DN$_";
 221     push(@{$code_block},"   DN$_=CDEN_($_)" );
 222   }
 223
 224   if( $use_pk ){
 225     $copy -> pk( new_pk => $code_block );
 226   } else {
 227     $copy -> pred( new_pred => $code_block );
 228   }
 229
 230   if( defined $options{'theta_names'} ){
 231     my @names = split( /,/ , $options{'theta_names'} );
 232     my $nr_names = scalar( @names );
 233     unless( $nr_names == $nr_of_old_thetas ){
 234       die "There are $nr_of_old_thetas thetas in the model, but you only specified $nr_names in the 'theta_names' options\n" ;
 235     }
 236     $table_string .= ' ' . join( ' ', @names );
 237   } else {
 238     my $theta_labels = $model -> labels( 'parameter_type' => 'theta' );
 239
 240     foreach ( @{$theta_labels -> [0]} ){
 241       if( $id == 2 and /TH\d+/ ){
 242         print "\nWarning: There is a generic theta label($_) in the model.(you might want to use the 'theta_names' option).\n\n" ;
 243       }
 244     }
 245
 246     $table_string .= ' ' . join( ' ', @{$theta_labels -> [0]} );
 247   }
 248
 249   $copy -> add_records( type => 'table',
 250                         record_strings => ["ID JD $table_string NOPRINT ONEHEADER FIRSTONLY FILE=patab"] );
 251
 252   $copy -> fixed( parameter_type => 'theta',
 253                   new_values => [[(1) x $copy -> nthetas ]] );
 254   $copy -> fixed( parameter_type => 'omega',
 255                   new_values => [[(1) x $nomegas ]] );
 256   $copy -> fixed( parameter_type => 'sigma',
 257                   new_values => [[(1) x $copy -> nsigmas -> [0] ]] );
 258
 259   $copy -> remove_records( type => 'covariance' );
 260
 261   $copy -> extra_output( ['patab'] );
 262
 263   $copy -> _write;
 264
 265   push( @new_bs_models, $copy );
 266 }
 267
 268 my $modelfit;
 269
 270 $modelfit = tool::modelfit ->
 271     new ( eval( $common_options::parameters ),
 272           prepend_model_file_name => 1,
 273           directory => 'tables',
 274           models => \@new_bs_models );
 275
 276 $modelfit -> run;
 277
 278 # }}}
 279
 280 ################################################################################
 281 #
 282 # Do one run per individual initial clearence and get fort.80 from
 283 # each run.
 284 #
 285 ################################################################################
 286
 287 # {{{ Fort.80 creation
 288
 289 open( KEYS, "<", $bs -> directory() . "/included_keys1.csv" );
 290
 291 my @parameter_P_sums;
 292 my @result_header;
 293 my $batch_nr = 0;
 294
 295 # {{{ create iofvconf
 296
 297 unless( -e 'iofvcont.f' ){
 298   open(IOFVCONT , '>', 'iofvcont.f');
 299
 300   print IOFVCONT <<'EOF';
 301       subroutine contr (icall,cnt,ier1,ier2)
 302       parameter (no=1000)
 303       common /rocm1/ y(no),data(no,3),nobs
 304       integer nobs, un
 305       double precision cnt,y
 306       DATA un /80/
 307       if (icall.le.1) return
 308       call ncontr (cnt,ier1,ier2,l2r)
 309 C     individual obj. funct. value for indiv. jj = cnt
 310       write(un,10) data(1,1),cnt
 311    10 FORMAT(1F6.2,1F19.4)
 312       return
 313       end
 314 EOF
 315    close( IOFVCONT );
 316 }
 317
 318 # }}}
 319
 320 # TODO update all initial values
 321
 322 foreach my $ofv_model ( @new_bs_models ) {
 323   $batch_nr++;
 324   my @new_ofv_models;
 325
 326   unless ( -e "ofv_b$batch_nr" ){
 327     mkdir( "ofv_b$batch_nr" );
 328   }
 329   chdir( "ofv_b$batch_nr" );
 330   cp( '../iofvcont.f', './iofvcont.f' ) or print "Warning: unable to copy iofvcont.f\n";
 331
 332   my $table = table_file -> new( filename => "bs_model_$batch_nr.patab",
 333                                  directory => '../' );
 334
 335   my $nomegas = $ofv_model -> nomegas -> [0];
 336
 337   my @individual_etas;
 338   my @individual_etas_probabilities;
 339
 340   my $joint_densities = $table -> column_to_array( column => 1 ); # 1 is JD
 341
 342   for( 0..($nomegas-1) ){
 343
 344     my $individual_eta = $table -> column_to_array( column => $_ + 1 + 1 ); # first plus 1 is for ID column, second is for JD
 345
 346     push( @individual_etas , $individual_eta );
 347     push( @individual_etas_probabilities , $joint_densities );
 348
 349   }
 350
 351
 352   my @individual_thetas;
 353
 354   for( 0..($nr_of_old_thetas-1) ){
 355     my $individual_theta = $table -> column_to_array( column => ($nomegas) + ($nomegas) + 1 + 1 + $_ );
 356
 357     push( @individual_thetas, $individual_theta );
 358
 359   }
 360
 361   my $number_of_individuals = @{$individual_etas[0]};
 362
 363   unless( -e 'model.done' ){
 364     foreach my $id( 0..($number_of_individuals-1) ){
 365
 366       my $copy = $ofv_model -> copy( filename => 'ofv_model_'.($id+1).'.mod' );
 367
 368       $copy -> update_inits( from_output => $ofv_model -> outputs -> [0] );
 369
 370
 371       my @theta_inits;
 372
 373       for( 0..($nr_of_old_thetas-1) ) {
 374         push( @theta_inits, $individual_thetas[$_]->[$id] );
 375       }
 376
 377       $copy -> lower_bounds( parameter_type => 'theta',
 378                              parameter_numbers => [[1..$nr_of_old_thetas]],
 379                              new_values => [[(-10000) x $nr_of_old_thetas]]) ;
 380
 381       $copy -> upper_bounds( parameter_type => 'theta',
 382                              parameter_numbers => [[1..$nr_of_old_thetas]],
 383                              new_values => [[(10000) x $nr_of_old_thetas]]) ;
 384
 385       $copy -> initial_values( parameter_type => 'theta',
 386                                parameter_numbers => [[1..$nr_of_old_thetas]],
 387                                new_values => [\@theta_inits]) ;
 388
 389       $copy -> add_records( type => 'contr',
 390                             record_strings => ['DATA=(ID)'] );
 391
 392       $copy -> add_option( record_name => 'subroutine',
 393                            option_name => 'CONTR',
 394                            option_value => 'iofvcont.f' );
 395
 396       $copy -> lower_bounds( parameter_type => 'omega',
 397                              parameter_numbers => [[1..$copy -> nomegas -> [0]]],
 398                              new_values => [[(-1) x $copy -> nomegas -> [0]]] );
 399
 400       $copy -> initial_values( parameter_type => 'omega',
 401                                parameter_numbers => [[1..$copy -> nomegas -> [0]]],
 402                                new_values => [[(0) x $copy -> nomegas -> [0]]] );
 403
 404       $copy -> extra_output( ['fort.80'] );
 405
 406       $copy -> remove_records( type => 'table' );
 407       $copy -> remove_records( type => 'abbreviated' );
 408       $copy -> remove_records( type => 'nonparametric' );
 409
 410       $copy -> _write;
 411
 412       push( @new_ofv_models, $copy );
 413     }
 414
 415     my $new_modelfit = tool::modelfit ->
 416         new ( eval( $common_options::parameters ),
 417               prepend_model_file_name => 1,
 418               directory => 'modelfit',
 419               models => \@new_ofv_models );
 420
 421     open(CMD, ">", $new_modelfit -> directory . "/command.txt");
 422     print CMD $cmd_line, "\n";
 423     close(CMD);
 424
 425     $new_modelfit -> run;
 426
 427     open( TMP, ">", 'models.done' );
 428     print TMP "1";
 429     close( TMP );
 430   }
 431   # Post process fort.80
 432
 433   my $id = 1;
 434   my @all_ind_ofv;
 435
 436   foreach( 1..$number_of_individuals ){
 437     open (IND, '<', "ofv_model_$id.fort.80") || die("Couldn't open ofv_model_$id.mod.fort.80 for reading: $! \n");
 438     $id++;
 439     my @ind_ofv;
 440     while(<IND>) {
 441       chomp;
 442       my ($junk,$ind_ofv) = split(' ',$_);
 443       push @ind_ofv, $ind_ofv;
 444     }
 445     close IND;
 446     @ind_ofv = splice(@ind_ofv,-$number_of_individuals);
 447     push( @all_ind_ofv, \@ind_ofv );
 448   }
 449
 450   dump_csv( "ind_ofv.csv", \@all_ind_ofv );
 451
 452   dump_csv( "ind_etas.csv", \@individual_etas_probabilities );
 453
 454   foreach my $parameter( 0..($nomegas - 1) ){
 455
 456     my @result;
 457     my @columns;
 458     for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
 459
 460       my $p = $individual_etas_probabilities[$parameter] -> [$ind_ofv];
 461
 462       for( my $individual=0;$individual < $number_of_individuals; $individual++) {
 463         my $ofv = $all_ind_ofv[$ind_ofv] -> [$individual];
 464         my $LP = exp((-$ofv)/2)*$p;
 465         $result[$ind_ofv] -> [$individual] = $LP;
 466         push(@{$columns[$individual]},$LP);
 467       }
 468
 469     }
 470
 471     my @column_sums;
 472
 473     foreach my $column( @columns ){
 474       my $bad_sum=0;
 475       foreach my $val ( @{$column} ){
 476         $bad_sum+=$val;
 477       }
 478       my $sum=0;
 479       foreach my $val( sort( {$a <=> $b} @{$column} ) ) {
 480         $sum+=$val;
 481       }
 482
 483       push(@column_sums, $sum);
 484
 485     }
 486
 487     dump_csv( "LP_sum_$parameter.csv",[\@column_sums]);
 488
 489     dump_csv( "L_times_P_ETA$parameter.csv", \@result );
 490
 491     for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
 492
 493       for( my $individual=0;$individual <= $#{$result[$ind_ofv]}; $individual++ ) {
 494         if( $column_sums[$individual] == 0 ){
 495           print "Warning: Sum of probabilties is zero\n";
 496           $result[$ind_ofv] -> [$individual] = 'NaN';
 497         } else {
 498           $result[$ind_ofv] -> [$individual] = (($result[$ind_ofv] -> [$individual] / $column_sums[$individual])
 499                                                 / $number_of_individuals);
 500         }
 501       }
 502     }
 503
 504     dump_csv( "P_values_ETA$parameter.csv", \@result );
 505
 506     my $included_keys_string = <KEYS>;
 507
 508     my @included_keys = split( /,/ , $included_keys_string );
 509
 510     my @bootstrapped_set;
 511     for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
 512       for( my $id = 0; $id < $number_of_individuals ; $id ++ ) {
 513         $bootstrapped_set[$ind_ofv]->[$id] = $result[$ind_ofv]->[$included_keys[$id]];
 514       }
 515     }
 516
 517     dump_csv( "bootstrapped_P_values_ETA$parameter.csv", \@bootstrapped_set );
 518
 519     my @parameter_P_sum;
 520
 521     for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
 522       for( my $id = 0 ; $id < $number_of_individuals ; $id++ ){
 523         $parameter_P_sum[$ind_ofv] += $bootstrapped_set[$ind_ofv]->[$id];
 524       }
 525     }
 526
 527
 528     # Sort bootstrapped_set together with parameter_P_sums
 529
 530     my %index;
 531
 532     for( 0 .. $number_of_individuals-1 ){
 533       $index{$_} = $individual_etas[$parameter] -> [$_];
 534     }
 535
 536     my @order = sort( {$index{$a} <=> $index{$b}} keys %index );
 537
 538     my $previous_probability = 0;
 539
 540     my (@new_p_sum, @new_etas);
 541
 542     my $p_cumulative = 0;
 543
 544     foreach my $idx ( @order ){
 545       $p_cumulative += $parameter_P_sum[$idx];
 546
 547       push( @new_p_sum , $p_cumulative );
 548       push( @new_etas , $individual_etas[$parameter] -> [$idx] );
 549     }
 550
 551     @parameter_P_sum = @new_p_sum;
 552     @individual_etas[$parameter] = \@new_etas;
 553
 554     # Add header and push on big results table
 555
 556     unshift( @new_etas, "BS$batch_nr ETA$parameter" );
 557     unshift( @new_p_sum, "BS$batch_nr P_sum" );
 558     push( @parameter_P_sums, \@new_etas );
 559     push( @parameter_P_sums, \@new_p_sum );
 560
 561   }
 562   chdir( '..' );
 563 }
 564
 565 dump_T_csv( $options{'result_file'}, \@parameter_P_sums );
 566
 567 close( KEYS );
 568
 569 # }}}
 570
 571 # {{{ helper routines
 572
 573 sub dump_csv {
 574   my $file_name = shift;
 575   my $arr = shift;
 576
 577   open( FILE, ">", $file_name );
 578
 579   foreach my $sub_arr( @{$arr} ){
 580     print( FILE join( ',', @{$sub_arr} ), "\n" );
 581   }
 582
 583   close( FILE );
 584
 585 }
 586
 587 sub dump_T_csv {
 588   my $file_name = shift;
 589   my $arr = shift;
 590
 591   open( FILE, ">", $file_name );
 592
 593   for( my $x = 0;$x < @{$arr->[0]}; $x++ ){
 594     for( my $y = 0;$y < @{$arr}; $y++ ){
 595       if( $y > 0 and $y < @{$arr} ){
 596         print( FILE ',' );
 597       }
 598       print( FILE $arr->[$y][$x]  );
 599     }
 600     print( FILE "\n" );
 601   }
 602
 603   close( FILE );
 604 }
 605
 606 sub dump_arrays {
 607   my ($filename,$header,$ids,$first,$second) = @_;
 608
 609   open FILE , ">", $filename;
 610   print FILE $header;
 611   for( 0..$#{$ids}  ){
 612     print FILE join(',',(($ids->[$_]+1), $first -> [$_], $second -> [$_])), "\n";
 613   }
 614   close FILE;
 615 }
 616
 617 # }}}