2 * analysis-kaplan-meier.c:
6 * Andreas J. Guelzow <aguelzow@pyrshep.ca>
8 * (C) Copyright 2008 by Andreas J. Guelzow <aguelzow@pyrshep.ca>
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, see <https://www.gnu.org/licenses/>.
25 #include <gnumeric-config.h>
26 #include <glib/gi18n-lib.h>
28 #include <tools/analysis-kaplan-meier.h>
29 #include <tools/analysis-tools.h>
35 #include <sheet-object-graph.h>
37 #include <goffice/goffice.h>
41 analysis_tool_kaplan_meier_engine_run (data_analysis_output_t
*dao
,
42 analysis_tools_data_kaplan_meier_t
*info
)
45 int std_err_col
= info
->censored
? 4 : 3;
46 int prob_col
= info
->censored
? 3 : 2;
47 int repetitions
= ((info
->group_list
== NULL
) ? 1
48 : g_slist_length (info
->group_list
));
49 int colspan
= ((info
->std_err
? 4 : 3) + (info
->censored
? 1 : 0));
51 int logrank_test_y_offset
= 0;
53 GnmExpr
const *expr_data
;
54 GnmExpr
const *expr_group_data
= NULL
;
55 GnmExpr
const *expr_small
;
56 GnmExpr
const *expr_time
;
57 GnmExpr
const *expr_at_risk
;
58 GnmExpr
const *expr_deaths
;
59 GnmExpr
const *expr_censures
= NULL
;
60 GnmExpr
const *expr_prob_zero
;
61 GnmExpr
const *expr_prob
;
62 GnmExpr
const *expr_std_err
= NULL
;
68 GnmFunc
*fd_sqrt
= NULL
;
69 GnmFunc
*fd_min
= NULL
;
71 GogGraph
*graph
= NULL
;
76 GSList
*gl
= info
->group_list
;
78 fd_small
= gnm_func_lookup_or_add_placeholder ("SMALL");
79 gnm_func_inc_usage (fd_small
);
80 fd_if
= gnm_func_lookup_or_add_placeholder ("IF");
81 gnm_func_inc_usage (fd_if
);
82 fd_iserror
= gnm_func_lookup_or_add_placeholder ("ISERROR");
83 gnm_func_inc_usage (fd_iserror
);
84 fd_sum
= gnm_func_lookup_or_add_placeholder ("SUM");
85 gnm_func_inc_usage (fd_sum
);
88 fd_sqrt
= gnm_func_lookup_or_add_placeholder ("SQRT");
89 gnm_func_inc_usage (fd_sqrt
);
92 fd_min
= gnm_func_lookup_or_add_placeholder ("MIN");
93 gnm_func_inc_usage (fd_min
);
96 rows
= info
->base
.range_1
->v_range
.cell
.b
.row
97 - info
->base
.range_1
->v_range
.cell
.a
.row
+ 1;
99 dao_set_italic (dao
, 0, 0, 0, 0);
100 dao_set_cell (dao
, 0, 0, _("Kaplan-Meier"));
106 graph
= g_object_new (GOG_TYPE_GRAPH
, NULL
);
107 chart
= GOG_CHART (gog_object_add_by_name (
108 GOG_OBJECT (graph
), "Chart", NULL
));
110 plot
= gog_plot_new_by_name ("GogXYPlot");
111 go_object_set_property (G_OBJECT (plot
), "interpolation",
112 "Default interpolation", "step-start",
115 gog_object_add_by_name (GOG_OBJECT (chart
),
116 "Plot", GOG_OBJECT (plot
));
117 times
= dao_go_data_vector (dao
, 0, 2, 0, 1+rows
);
120 dao_set_italic (dao
, 0, 1, 0, 1);
121 dao_set_cell (dao
, 0, 1, _("Time"));
123 expr_data
= gnm_expr_new_constant (value_dup (info
->base
.range_1
));
125 if (info
->group_list
!= NULL
&& info
->range_3
!= NULL
)
126 expr_group_data
= gnm_expr_new_constant (value_dup (info
->range_3
));
128 expr_small
= gnm_expr_new_funcall2 (fd_small
,
129 gnm_expr_new_funcall3
132 (gnm_expr_copy (expr_data
),
134 make_cellref (0, -1)),
135 gnm_expr_copy (expr_data
),
136 gnm_expr_new_constant (value_new_string ("N/A"))),
137 gnm_expr_new_constant (value_new_int (1)));
138 expr_time
= gnm_expr_new_funcall3 (fd_if
,
139 gnm_expr_new_funcall1 (fd_iserror
,
140 gnm_expr_copy (expr_small
)),
141 gnm_expr_new_constant (value_new_string ("")),
144 dao_set_cell_int (dao
, 0, 2, 0);
145 for (row
= 1; row
< rows
; row
++)
146 dao_set_cell_array_expr (dao
, 0, 2+row
, gnm_expr_copy (expr_time
));
148 gnm_expr_free (expr_time
);
154 /* Repeated Info start */
155 for (i
= 0; i
< repetitions
; i
++) {
156 GnmExpr
const *expr_group
= NULL
;
158 if (gl
!= NULL
&& gl
->data
!= NULL
) {
159 analysis_tools_kaplan_meier_group_t
*gd
= gl
->data
;
160 if (gd
->name
!= NULL
) {
161 dao_set_italic (dao
, 0, 0, 0, 0);
162 dao_set_cell (dao
, 0, 0, gd
->name
);
164 if (gd
->group_from
== gd
->group_to
)
165 expr_group
= gnm_expr_new_funcall3
168 (gnm_expr_copy (expr_group_data
),
170 gnm_expr_new_constant (value_new_int (gd
->group_from
))),
171 gnm_expr_new_constant (value_new_int (1)),
172 gnm_expr_new_constant (value_new_int (0)));
174 expr_group
= gnm_expr_new_binary
175 (gnm_expr_new_funcall3
178 (gnm_expr_copy (expr_group_data
),
180 gnm_expr_new_constant (value_new_int (gd
->group_from
))),
181 gnm_expr_new_constant (value_new_int (1)),
182 gnm_expr_new_constant (value_new_int (0))),
184 gnm_expr_new_funcall3
187 (gnm_expr_copy (expr_group_data
),
189 gnm_expr_new_constant (value_new_int (gd
->group_to
))),
190 gnm_expr_new_constant (value_new_int (1)),
191 gnm_expr_new_constant (value_new_int (0))));
195 if (expr_group
== NULL
)
196 expr_group
= gnm_expr_new_constant (value_new_int (1));
198 dao_set_italic (dao
, 0, 1, prob_col
, 1);
200 set_cell_text_row (dao
, 0, 1,
206 set_cell_text_row (dao
, 0, 1,
211 dao_set_italic (dao
, std_err_col
, 1, std_err_col
, 1);
212 dao_set_cell (dao
, std_err_col
, 1, _("Standard Error"));
215 expr_at_risk
= gnm_expr_new_funcall3
217 gnm_expr_new_binary (make_cellref (-1-i
*colspan
, 0),
219 gnm_expr_new_constant (value_new_string (""))),
220 gnm_expr_new_constant (value_new_string ("")),
221 gnm_expr_new_funcall1 (fd_sum
,
222 gnm_expr_new_binary (
223 gnm_expr_new_funcall3
226 (gnm_expr_copy (expr_data
),
228 make_cellref (-1-i
*colspan
, 0)),
229 gnm_expr_new_constant (value_new_int (0)),
230 gnm_expr_new_constant (value_new_int (1))),
232 gnm_expr_copy (expr_group
))));
234 if (info
->censored
) {
235 GnmExpr
const *expr_censor
;
237 if (info
->censor_mark
== info
->censor_mark_to
)
238 expr_censor
= gnm_expr_new_funcall3
241 (gnm_expr_new_constant (value_dup (info
->base
.range_2
)),
243 gnm_expr_new_constant (value_new_int (info
->censor_mark
))),
244 gnm_expr_new_constant (value_new_int (1)),
245 gnm_expr_new_constant (value_new_int (0)));
247 expr_censor
= gnm_expr_new_binary
248 (gnm_expr_new_funcall3
251 (gnm_expr_new_constant (value_dup (info
->base
.range_2
)),
253 gnm_expr_new_constant (value_new_int (info
->censor_mark
))),
254 gnm_expr_new_constant (value_new_int (1)),
255 gnm_expr_new_constant (value_new_int (0))),
257 gnm_expr_new_funcall3
260 (gnm_expr_new_constant (value_dup (info
->base
.range_2
)),
262 gnm_expr_new_constant (value_new_int (info
->censor_mark_to
))),
263 gnm_expr_new_constant (value_new_int (1)),
264 gnm_expr_new_constant (value_new_int (0))));
267 expr_deaths
= gnm_expr_new_funcall3
269 gnm_expr_new_binary (make_cellref (-1, 0),
271 gnm_expr_new_constant (value_new_string (""))),
272 gnm_expr_new_constant (value_new_string ("")),
273 gnm_expr_new_funcall1 (fd_sum
,
275 (gnm_expr_copy (expr_group
),
278 (gnm_expr_new_funcall3
281 (gnm_expr_copy (expr_data
),
283 make_cellref (-2-i
*colspan
, 0)),
284 gnm_expr_new_constant (value_new_int (1)),
285 gnm_expr_new_constant (value_new_int (0))),
288 (gnm_expr_new_constant (value_new_int (1)),
290 gnm_expr_copy (expr_censor
))))));
291 expr_censures
= gnm_expr_new_funcall3
293 gnm_expr_new_binary (make_cellref (-1, 0),
295 gnm_expr_new_constant (value_new_string (""))),
296 gnm_expr_new_constant (value_new_string ("")),
297 gnm_expr_new_funcall1 (fd_sum
,
299 (gnm_expr_copy (expr_group
),
302 (gnm_expr_new_funcall3
305 (gnm_expr_copy (expr_data
),
307 make_cellref (-3-i
*colspan
, 0)),
308 gnm_expr_new_constant (value_new_int (1)),
309 gnm_expr_new_constant (value_new_int (0))),
313 expr_deaths
= gnm_expr_new_funcall3
315 gnm_expr_new_binary (make_cellref (-1, 0),
317 gnm_expr_new_constant (value_new_string (""))),
318 gnm_expr_new_constant (value_new_string ("")),
319 gnm_expr_new_funcall1 (fd_sum
,
321 (gnm_expr_copy (expr_group
),
323 gnm_expr_new_funcall3
326 (gnm_expr_copy (expr_data
),
328 make_cellref (-2-i
*colspan
, 0)),
329 gnm_expr_new_constant (value_new_int (1)),
330 gnm_expr_new_constant (value_new_int (0))))));
332 expr_prob_zero
= gnm_expr_new_binary (gnm_expr_new_binary (make_cellref ( - prob_col
, 0),
334 make_cellref (1 - prob_col
, 0)),
336 make_cellref ( - prob_col
, 0));
337 expr_prob
= gnm_expr_new_funcall3 (fd_if
,
339 (make_cellref (1 - prob_col
, 0),
341 gnm_expr_new_constant (value_new_string (""))),
342 gnm_expr_new_constant (value_new_string ("")),
343 gnm_expr_new_binary (gnm_expr_copy (expr_prob_zero
),
345 make_cellref (0, -1)));
348 expr_std_err
= gnm_expr_new_funcall3 (fd_if
,
350 (make_cellref (-1, 0),
352 gnm_expr_new_constant (value_new_string (""))),
353 gnm_expr_new_constant (value_new_string ("")),
354 gnm_expr_new_binary (make_cellref (-1, 0),
356 gnm_expr_new_funcall1
360 (gnm_expr_new_constant
363 make_cellref (-1, 0)),
366 ( - std_err_col
, 0)))));
368 dao_set_format (dao
, std_err_col
, 2, std_err_col
, rows
+ 1, "0.0000");
369 dao_set_cell_expr (dao
, std_err_col
, 2, gnm_expr_copy (expr_std_err
));
372 dao_set_format (dao
, prob_col
, 2, prob_col
, rows
+ 1, "0.00%");
374 dao_set_cell_array_expr (dao
, 0, 2, gnm_expr_copy (expr_at_risk
));
375 dao_set_cell_array_expr (dao
, 1, 2, gnm_expr_copy (expr_deaths
));
376 dao_set_cell_expr (dao
, prob_col
, 2, expr_prob_zero
);
378 if (expr_censures
!= NULL
)
379 dao_set_cell_array_expr (dao
, 2, 2, gnm_expr_copy (expr_censures
));
381 for (row
= 1; row
< rows
; row
++) {
382 dao_set_cell_array_expr (dao
, 0, 2+row
, gnm_expr_copy (expr_at_risk
));
383 dao_set_cell_array_expr (dao
, 1, 2+row
, gnm_expr_copy (expr_deaths
));
384 if (expr_censures
!= NULL
)
385 dao_set_cell_array_expr (dao
, 2, 2+row
, gnm_expr_copy (expr_censures
));
386 dao_set_cell_array_expr (dao
, prob_col
, 2+row
, gnm_expr_copy (expr_prob
));
388 dao_set_cell_expr (dao
, std_err_col
, 2+row
, gnm_expr_copy (expr_std_err
));
391 gnm_expr_free (expr_at_risk
);
392 gnm_expr_free (expr_deaths
);
393 gnm_expr_free (expr_prob
);
394 if (expr_censures
!= NULL
) {
395 gnm_expr_free (expr_censures
);
396 expr_censures
= NULL
;
398 if (expr_std_err
!= NULL
) {
399 gnm_expr_free (expr_std_err
);
403 /* Create Chart if requested */
406 GOData
*probabilities
;
409 probabilities
= dao_go_data_vector (dao
, prob_col
, 2, prob_col
, 1+rows
);
411 g_object_ref (times
);
412 series
= gog_plot_new_series (plot
);
413 gog_series_set_dim (series
, 0, times
, NULL
);
414 gog_series_set_dim (series
, 1, probabilities
, NULL
);
416 style
= go_styled_object_get_style (GO_STYLED_OBJECT (series
));
417 style
->marker
.auto_shape
= FALSE
;
418 go_marker_set_shape (style
->marker
.mark
, GO_MARKER_NONE
);
419 go_styled_object_set_style (GO_STYLED_OBJECT (series
), style
);
421 if (info
->censored
&& info
->ticks
) {
425 expr
= gnm_expr_new_binary
426 (gnm_expr_new_binary (dao_get_rangeref (dao
, prob_col
, 2, prob_col
, 1+rows
),
428 dao_get_rangeref (dao
, 2, 2, 2, 1+rows
)),
430 dao_get_rangeref (dao
, 2, 2, 2, 1+rows
));
432 censures
= gnm_go_data_vector_new_expr (dao
->sheet
, gnm_expr_top_new (expr
));
434 series
= gog_plot_new_series (plot
);
435 g_object_ref (times
);
436 gog_series_set_dim (series
, 0, times
, NULL
);
437 gog_series_set_dim (series
, 1, censures
, NULL
);
439 style
= go_styled_object_get_style (GO_STYLED_OBJECT (series
));
440 style
->marker
.auto_shape
= FALSE
;
441 go_marker_set_shape (style
->marker
.mark
, GO_MARKER_TRIANGLE_DOWN
);
442 style
->line
.dash_type
= GO_LINE_NONE
;
443 style
->line
.auto_dash
= FALSE
;
444 style
->line
.width
= 0;
445 go_styled_object_set_style (GO_STYLED_OBJECT (series
), style
);
449 gnm_expr_free (expr_group
);
451 dao
->offset_col
+= colspan
;
458 so
= sheet_object_graph_new (graph
);
459 g_object_unref (graph
);
460 g_object_unref (times
);
462 dao_set_sheet_object (dao
, 0, 1, so
);
466 dao_set_italic (dao
, 1, 1, 1, 1);
467 dao_set_cell (dao
, 1, 1, _("Median"));
469 dao
->offset_col
+= 2;
470 gl
= info
->group_list
;
472 for (i
= 0; i
< repetitions
; i
++) {
473 /* the next involves (colspan-1) since the median field moves to the right. */
474 gint prob_dx
= - (repetitions
- i
)* (colspan
- 1) - 1;
475 gint times_dx
= - colspan
* repetitions
- i
- 3;
476 GnmExpr
const *expr_median
;
478 dao_set_italic (dao
, 0, 0, 0, 0);
480 if (gl
!= NULL
&& gl
->data
!= NULL
) {
481 analysis_tools_kaplan_meier_group_t
*gd
= gl
->data
;
482 if (gd
->name
!= NULL
) {
483 dao_set_cell (dao
, 0, 0, gd
->name
);
488 expr_prob
= gnm_expr_new_binary
489 (gnm_expr_new_funcall3
492 (make_rangeref(prob_dx
, 1, prob_dx
, rows
),
494 gnm_expr_new_constant (value_new_float (0.5))),
495 gnm_expr_new_constant (value_new_string ("NA")),
496 gnm_expr_new_constant (value_new_int (1))),
498 make_rangeref (times_dx
, 1, times_dx
, rows
));
500 expr_median
= gnm_expr_new_funcall1
502 gnm_expr_new_funcall3
504 gnm_expr_new_funcall1
506 gnm_expr_copy (expr_prob
)),
507 gnm_expr_new_constant (value_new_string ("NA")),
510 dao_set_cell_array_expr (dao
, 0, 1,expr_median
);
512 dao
->offset_col
+= 1;
514 logrank_test_y_offset
= 5;
515 dao
->offset_col
-= (2 + repetitions
);
518 if (info
->logrank_test
) {
520 GnmExpr
const *expr_statistic
= gnm_expr_new_constant (value_new_int (0));
521 GnmExpr
const *expr_p
;
522 GnmExpr
const *expr_n_total
= gnm_expr_new_constant (value_new_int (0));
523 GnmExpr
const *expr_death_total
= gnm_expr_new_constant (value_new_int (0));
525 fd_chidist
= gnm_func_lookup_or_add_placeholder ("CHIDIST");
526 gnm_func_inc_usage (fd_chidist
);
528 dao_set_italic (dao
, 1, logrank_test_y_offset
, 1, logrank_test_y_offset
+3);
529 set_cell_text_col (dao
, 1, logrank_test_y_offset
,
532 "/Degrees of Freedom"
536 for (i
= 0; i
< repetitions
; i
++) {
537 gint atrisk_dx
= - (repetitions
- i
)* colspan
- 2;
538 expr_n_total
= gnm_expr_new_binary
539 (expr_n_total
, GNM_EXPR_OP_ADD
,
540 make_rangeref ( atrisk_dx
,
541 - logrank_test_y_offset
+ 1,
543 - logrank_test_y_offset
+ rows
));
544 expr_death_total
= gnm_expr_new_binary
545 (expr_death_total
, GNM_EXPR_OP_ADD
,
546 make_rangeref ( atrisk_dx
+ 1,
547 - logrank_test_y_offset
+ 1,
549 - logrank_test_y_offset
+ rows
));
552 for (i
= 0; i
< repetitions
; i
++) {
553 GnmExpr
const *expr_expect
;
554 gint atrisk_dx
= - (repetitions
- i
)* colspan
- 2;
556 expr_expect
= gnm_expr_new_binary
558 (gnm_expr_copy (expr_death_total
),
560 make_rangeref (atrisk_dx
,
561 - logrank_test_y_offset
+ 1,
563 - logrank_test_y_offset
+ rows
)),
565 gnm_expr_copy (expr_n_total
));
567 expr_expect
= gnm_expr_new_funcall3 (
569 gnm_expr_new_funcall1 (fd_iserror
,
570 gnm_expr_copy (expr_expect
)),
571 gnm_expr_new_constant (value_new_int (0)),
573 expr_expect
= gnm_expr_new_funcall1 (fd_sum
,
575 expr_expect
= gnm_expr_new_binary (
576 gnm_expr_new_binary (
577 gnm_expr_new_binary (
578 gnm_expr_new_funcall1 (
582 - logrank_test_y_offset
+ 1,
584 - logrank_test_y_offset
+ rows
)),
586 gnm_expr_copy (expr_expect
)),
588 gnm_expr_new_constant (value_new_int (2))),
591 expr_statistic
= gnm_expr_new_binary (
592 expr_statistic
, GNM_EXPR_OP_ADD
, expr_expect
);
594 gnm_expr_free (expr_n_total
);
595 gnm_expr_free (expr_death_total
);
597 dao_set_cell_array_expr (dao
, 2, logrank_test_y_offset
+ 1, expr_statistic
);
599 /* Degree of Freedoms */
600 dao_set_cell_int (dao
, 2, logrank_test_y_offset
+ 2, repetitions
- 1);
603 expr_p
= gnm_expr_new_funcall2 (fd_chidist
,
605 make_cellref (0,-1));
606 dao_set_cell_expr (dao
, 2, logrank_test_y_offset
+ 3, expr_p
);
608 gnm_func_dec_usage (fd_chidist
);
615 gnm_expr_free (expr_data
);
616 if (expr_group_data
!= NULL
)
617 gnm_expr_free (expr_group_data
);
619 gnm_func_dec_usage (fd_small
);
620 gnm_func_dec_usage (fd_if
);
621 gnm_func_dec_usage (fd_iserror
);
622 gnm_func_dec_usage (fd_sum
);
624 gnm_func_dec_usage (fd_sqrt
);
626 gnm_func_dec_usage (fd_min
);
628 dao_redraw_respan (dao
);
634 analysis_tool_kaplan_meier_clear_gl_cb (gpointer data
, G_GNUC_UNUSED gpointer user_data
)
636 analysis_tools_kaplan_meier_group_t
*group
= data
;
638 g_return_if_fail (data
!= NULL
);
640 g_free (group
->name
);
645 analysis_tool_kaplan_meier_engine (G_GNUC_UNUSED GOCmdContext
*gcc
, data_analysis_output_t
*dao
, gpointer specs
,
646 analysis_tool_engine_t selector
, gpointer result
)
648 analysis_tools_data_kaplan_meier_t
*info
= specs
;
653 case TOOL_ENGINE_UPDATE_DESCRIPTOR
:
654 return (dao_command_descriptor (dao
,
655 _("Kaplan-Meier (%s)"),
658 case TOOL_ENGINE_UPDATE_DAO
:
659 multiple
= ((info
->group_list
== NULL
) ? 1 : g_slist_length (info
->group_list
));
660 median
= (info
->median
? (2 + multiple
) : 0);
661 if (median
== 0 && info
->logrank_test
)
663 dao_adjust (dao
, median
+ 1 + multiple
* ((info
->std_err
? 4 : 3) + (info
->censored
? 1 : 0)),
664 info
->base
.range_1
->v_range
.cell
.b
.row
665 - info
->base
.range_1
->v_range
.cell
.a
.row
+ 3);
667 case TOOL_ENGINE_CLEAN_UP
:
668 value_release (info
->range_3
);
669 info
->range_3
= NULL
;
670 g_slist_foreach (info
->group_list
, analysis_tool_kaplan_meier_clear_gl_cb
, NULL
);
671 g_slist_free (info
->group_list
);
672 info
->group_list
= NULL
;
673 return analysis_tool_generic_b_clean (specs
);
674 case TOOL_ENGINE_LAST_VALIDITY_CHECK
:
676 case TOOL_ENGINE_PREPARE_OUTPUT_RANGE
:
677 dao_prepare_output (NULL
, dao
, _("Kaplan-Meier Estimates"));
679 case TOOL_ENGINE_FORMAT_OUTPUT_RANGE
:
680 return dao_format_output (dao
, _("Kaplan-Meier Estimates"));
681 case TOOL_ENGINE_PERFORM_CALC
:
683 return analysis_tool_kaplan_meier_engine_run (dao
, specs
);