Fixed $PRIOR support and bugfix in shrinkage results handling
[PsN.git] / bin / bootstrap
blobc2cfe21823f15436eb7e15bee2ebe041200369bf
1 #!/usr/local/bin/perl
3 use FindBin qw($Bin);
4 use lib "$Bin/../lib";
6 # Don't edit the line below, it must look exactly like this.
7 # Everything above this line will be replaced #
9 use PsN;
10 use model;
11 use tool::bootstrap;
12 use strict;
13 use debug;
14 use ui;
15 use Getopt::Long;
16 use common_options;
17 use Data::Dumper;
19 my $cmd_line = $0 . " " . join( " ", @ARGV );
21 ## Configure the command line parsing
22 Getopt::Long::config("auto_abbrev");
24 ## Declare the options
25 my %options;
27 my %required_options = ();
28 my %optional_options = ("samples:i"=>'',
29 "sample_size:s"=>'',
30 "stratify_on:s"=>'',
31 "bca"=>'',
32 "mplots"=>'',
33 "rplots"=>'',
34 "skip_minimization_terminated!"=>'',
35 "skip_covariance_step_terminated!"=>'',
36 "skip_with_covstep_warnings!"=>'',
37 "skip_estimate_near_boundary!"=>'',
38 "minimization_successful_limit:f"=>'',
39 "covariance_step_successful_limit:f"=>'',
40 "covariance_step_warnings_limit:f"=>'',
41 "estimate_near_boundary_limit:f"=>'',
42 "estimate_near_boundary_limit:f"=>'',
43 "se_confidence_intervals_level:f"=>'',
44 "percentile_confidence_intervals_level:f"=>'',
45 "bca_confidence_intervals_level:f"=>'',
46 "se_confidence_intervals_check:f"=>'',
47 "percentile_confidence_intervals_check:f"=>'',
48 "bca_confidence_intervals_check:f"=>'',
49 "large_bias_limit:f"=>'' );
51 my $res = GetOptions( \%options,
52 @common_options::get_opt_strings,
53 keys(%required_options),
54 keys(%optional_options) );
56 exit unless $res;
58 common_options::set_globals( \%options, 'bootstrap' );
59 common_options::get_defaults( \%options, 'bootstrap' );
60 common_options::sanity_checks( \%options, 'bootstrap' );
62 my %help_text;
63 $help_text{Pre_help_message} = <<'EOF';
64 <h3 class="heading1">bootstrap</h3>
66 Perl script for non-parametric bootstrap of NONMEM runs.
68 <h3 class="heading1">Usage:</h3>
69 EOF
71 $help_text{Description} = <<'EOF';
72 <h3 class="heading1">Description:</h3>
74 The Bootstrap can be used to calculate bias, standard errors and
75 confidence intervals. It does so by resampling with replacement
76 from the data, see Efron B, An Introduction to the Bootstrap,
77 Chap. & Hall, London UK, 1993. To compute standard errors for
78 all parameters of a model using the non-parametric bootstrap
79 implemented here, roughly 200 model fits are necessary. To assess
80 95% confidence intervals approximatly 2000 runs will suffice.
81 EOF
82 $help_text{Examples} = <<'EOF';
83 <h3 class="heading1">Example:</h3>
85 <p class="option">bootstrap -samples=200 run89.mod</p>
87 This will run a non-parametric bootstrap of 200 samples and give
88 you good estimates of the standard errors of the parameter
89 estimates. You may get some estimates for the confidence
90 intervals too, but they will generally not be of high quality.
93 <p class="option">bootstrap -samples=2000 -bca run89.mod</p>
95 This will run a non-parametric bootstrap using the BCa technique
96 (See An introduction to the bootstrap, Efron, 1993). The BCa is
97 intended for caclulation of second-order correct confidence
98 intervals.
101 <p class="option">bootstrap -samples=2000 -bca -stratify_on=5 run89.mod </p>
103 This is the same BCa approach as above but with stratification
104 on the factors of column five.
107 $help_text{Options} = <<'EOF';
108 <h3 class="heading1">Options:</h3>
110 The options are given here in their long form. Any option may be
111 abbreviated to any nonconflicting prefix. The -threads option
112 may be abbreviated to <span class="option">-t</span> (or even <span class="option">-thr</span>) but -<span class="option">debug</span> may not be
113 abbreviated to <span class="option">-d</span> because it conflicts with <span class="option">-debug_packages</span> and
114 <span class="option">-debug_subroutines</span>.
116 The following options are valid:
119 $help_text{-samples} = <<'EOF';
120 <p class="option">-samples</p>
122 The number of bootstrap samples.
125 $help_text{-sample_size} = <<'EOF';
126 <p class="option">-sample_size</p>
128 The number of subjects in each bootstrap data set. The default
129 value is set to the number of individuals in the original data
130 set.
132 When the resampling is stratified, the sample_size option can be
133 used to specify the exact number of samples that should be drawn
134 from each strata. Below follows an example of the syntax that
135 should be used in such a case. Stratification is here done based
136 on the study number, STUD, with the values 1001, 1002 and 1003.
138 -sample_size='1001=>12,1002=>24,1003=>10'
140 This example specifies that the bootstrap should use 12 samples
141 from study 1001, 24 samples from 1002 and 10 from study 1003.
143 If only one sample size is used together with stratified
144 resampling (the default case; sample_size=number of individuals
145 in the data set), the strata are assigned samples in proportion
146 to their size in the data set. Please note that this usage of
147 the sample_size option does not guarantee that the sum of the
148 samples of the strata is equal to the given sample_size since
149 PsN needs to round the figures to the closest integer. For a
150 sample size equal to the number of individuals in the data set,
151 the sum will however always be correct.
155 $help_text{-bca} = <<'EOF';
156 <p class="option">-bca</p>
158 Using the <span class="option">-bca </span>option, the bootstrap
159 utility will calculate the confidence intervals through the BCa
160 method. The default approach however, is not to use the BCa (see
161 Efron B, An introduction to the Bootstrap, 1993). The BCa is
162 intended for calculation of second-order correct confidence
163 intervals.
166 $help_text{-stratify_on} = <<'EOF';
167 <p class="option">-stratify_on=integer|string</p>
169 It may be necessary to use stratification in the resampling
170 procedure. For example, if the original data consists of two
171 groups of patients - say 10 patients with full pharmacokinetic
172 profiles and 90 patients with sparse steady state concentration
173 measurements - it may be wise to restrict the resampling
174 procedure to resample within the two groups, producing bootstrap
175 data sets that all contain 10 rich + 90 sparse data patients but
176 with different compositions. The default is not to use
177 stratification. Set <span class="option">-stratify_on</span> to
178 the column that defines the two groups. If a string is used with
179 stratify_on the header in the datafile is used to map the string
180 to a column number.
182 Note that the option sample_size has a different behavior when
183 stratified resampling is used.
186 $help_text{-skip_covariance_step_terminated} = <<'EOF';
187 <p class="option">-skip_covariance_step_terminated</p>
189 With this option enabled, the bootstrap will skip all samples
190 where the NONMEM run terminated the covariance step.
193 $help_text{-skip_with_covstep_warnings} = <<'EOF';
194 <p class="option">-skip_with_covstep_warnings</p>
196 With this option enabled, the bootstrap will skip all samples
197 where the NONMEM run had warnings from the covariance step.
200 $help_text{-skip_minimization_terminated} = <<'EOF';
201 <p class="option">-skip_minimization_terminated</p>
203 With this option enabled, the bootstrap will skip all samples
204 where the NONMEM run terminated the minimization step.
207 $help_text{-skip_estimate_near_boundary} = <<'EOF';
208 <p class="option">-skip_estimate_near_boundary</p>
210 With this option enabled, the bootstrap will skip all samples
211 where the NONMEM run signal that some estimates are near its
212 boundary.
215 $help_text{-estimate_near_boundary_limit} = <<'EOF';
216 <p class="option">-estimate_near_boundary_limit='number'</p>
218 If the <span class="option">-summarize</span> or <span class="option">-summary</span> options are set, the bootstrap
219 will do a set of diagnostics checks. Among other things it
220 checks the ratio between runs with estimates near their
221 boundaries and those without. If the ratio is to high ( by
222 default 20% ) a warning is printed to the screen. You can change
223 the ratio with <span class="option">-estimate_near_boundary_limit</span>.
226 $help_text{-covariance_step_successful_limit} = <<'EOF';
227 <p class="option">-covariance_step_successful_limit='number'</p>
229 If the <span class="option">-summarize</span> or <span class="option">-summary</span> options are set, the bootstrap
230 will do a set of diagnostics checks. Among other things it
231 checks the ratio between runs with covariance step succesful and
232 those without. If the ratio is to high ( by default 80% ) a
233 warning is printed to the screen. You can change the ratio with
234 <span class="option">-covariance_step_successful_limit</span>.
237 $help_text{-covariance_step_warnings_limit} = <<'EOF';
238 <p class="option">-covariance_step_warnings_limit='number'</p>
240 If the <span class="option">-summarize</span> or <span class="option">-summary</span> options are set, the bootstrap
241 will do a set of diagnostics checks. Among other things it
242 checks the ratio between runs with warnings in the covariance
243 step and those without. If the ratio is to high (by default 20%)
244 a warning is printed to the screen. You can change the ratio
245 with <span class="option">-covariance_step_warnings_limit</span>.
248 $help_text{-minimization_successful_limit} = <<'EOF';
249 <p class="option">-minimization_successful_limit='number'</p>
251 If the <span class="option">-summarize</span> or <span class="option">-summary</span> options are set, the bootstrap
252 will do a set of diagnostics checks. Among other things it
253 checks the ratio between runs with successful minimizations and
254 those without. If the ratio is to high (by default 80%) a
255 warning is printed to the screen. You can change the ratio with
256 <span class="option">-minimization_successful_limit</span>.
259 $help_text{-mplots} = <<'EOF';
260 <p class="option">-mplots</p>
262 Generate matlab scripts for making various plots of the result.
265 $help_text{-rplots} = <<'EOF';
266 <p class="option">-rplots</p>
268 Generate R scripts for making various plots of the result.
270 $help_text{Post_help_message} = <<'EOF';
271 Also see 'execute -help' for a description of common options.
274 common_options::online_help('bootstrap',\%options, \%help_text, \%required_options, \%optional_options);
277 ## Check that we do have a model file
278 if ( scalar(@ARGV) < 1 ) {
279 print "A model file must be specified. Use 'bootstrap -h' for help.\n";
280 exit;
283 if( scalar(@ARGV) > 1 ){
284 print "Bootstrap can only handle one modelfile. Use 'bootstrap -h' for help.\n";
285 exit;
288 my $eval_string = common_options::model_parameters(\%options);
290 my $model = model -> new ( eval( $eval_string ),
291 filename => @ARGV[0],
292 ignore_missing_output_files => 1 );
294 if( $options{'shrinkage'} ) {
295 $model -> shrinkage_stats( enabled => 1 );
298 my $type = defined $options{'bca'} ? 'bca' : undef;
301 if( defined $options{'sample_size'} ) {
302 $options{'sample_size'} = 'default=>'.$options{'sample_size'} if( $options{'sample_size'} =~ /^\d+$/ );
303 } else {
304 $options{'sample_size'} = 'default=>'.$model -> datas() -> [0] -> count_ind();
306 my %subj_hash = eval($options{'sample_size'});
307 if( $@ ) {
308 die $@."\nThe sample_size option must be either a single digit or of the format 'strata1=>12,strata2=>34, ...' etc" ;
311 ## Create new Bootstrap object:
312 my $bs = tool::bootstrap ->
313 new ( eval( $common_options::parameters ),
314 top_tool => 1,
315 prepend_model_file_name => 1,
316 models => [ $model ],
317 samples => $options{'samples'},
318 subjects => \%subj_hash,
319 type => $type,
320 stratify_on => $options{'stratify_on'},
321 skip_minimization_terminated => $options{'skip_minimization_terminated'},
322 skip_covariance_step_terminated => $options{'skip_covariance_step_terminated'},
323 skip_with_covstep_warnings => $options{'skip_with_covstep_warnings'},
324 skip_estimate_near_boundary => $options{'skip_estimate_near_boundary'},
325 minimization_successful_limit => $options{'minimization_successful_limit'},
326 covariance_step_successful_limit => $options{'covariance_step_successful_limit'},
327 covariance_step_warnings_limit => $options{'covariance_step_warnings_limit'},
328 estimate_near_boundary_limit => $options{'estimate_near_boundary_limit'},
329 estimate_near_boundary_limit => $options{'estimate_near_boundary_limit'},
330 se_confidence_intervals_level => $options{'se_confidence_intervals_level'},
331 percentile_confidence_intervals_level => $options{'percentile_confidence_intervals_level'},
332 bca_confidence_intervals_level => $options{'bca_confidence_intervals_level'},
333 se_confidence_intervals_check => $options{'se_confidence_intervals_check'},
334 percentile_confidence_intervals_check => $options{'percentile_confidence_intervals_check'},
335 bca_confidence_intervals_check => $options{'bca_confidence_intervals_check'},
336 large_bias_limit => $options{'large_bias_limit'} );
338 open(CMD, ">", $bs -> directory . "/command.txt");
339 print CMD $cmd_line, "\n";
340 close(CMD);
342 if ( $options{'summarize'} ) {
343 $bs -> prepare_results();
344 $bs -> print_results();
345 $bs -> print_summary();
346 } else {
347 $bs -> run;
348 $bs -> prepare_results();
349 $bs -> print_results();
350 # $bs -> print_summary();
353 if( $options{'mplots'} ){
354 $bs -> create_matlab_scripts();
357 if( $options{'rplots'} ){
358 $bs -> create_R_scripts();
361 ui -> print( category => 'bootstrap',
362 message => "Bootstrap done.");