lin regr example; must debug, factor into regression.lisp
[CommonLispStat.git] / src / stat-models / R-lmfit.lisp
blobfea199a987917e2ac7049155fcda237e35c3ff9f
1 #|
2 # File src/library/stats/R/lm.R
3 # Part of the R package, http://www.R-project.org
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # A copy of the GNU General Public License is available at
16 # http://www.r-project.org/Licenses/
19 lm <- function (formula, data, subset, weights, na.action,
20 method = "qr", model = TRUE, x = FALSE, y = FALSE,
21 qr = TRUE, singular.ok = TRUE, contrasts = NULL,
22 offset, ...)
24 ret.x <- x
25 ret.y <- y
26 cl <- match.call()
27 mf <- match.call(expand.dots = FALSE)
28 m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"),
29 names(mf), 0L)
30 mf <- mf[c(1L, m)]
31 mf$drop.unused.levels <- TRUE
32 mf[[1L]] <- as.name("model.frame")
33 mf <- eval(mf, parent.frame())
34 if (method == "model.frame")
35 return(mf)
36 else if (method != "qr")
37 warning(gettextf("method = '%s' is not supported. Using 'qr'", method),
38 domain = NA)
39 mt <- attr(mf, "terms") # allow model.frame to update it
40 y <- model.response(mf, "numeric")
41 ## avoid any problems with 1D or nx1 arrays by as.vector.
42 w <- as.vector(model.weights(mf))
43 if(!is.null(w) && !is.numeric(w))
44 stop("'weights' must be a numeric vector")
45 offset <- as.vector(model.offset(mf))
46 if(!is.null(offset)) {
47 if(length(offset) == 1) offset <- rep(offset, NROW(y))
48 else if(length(offset) != NROW(y))
49 stop(gettextf("number of offsets is %d, should equal %d (number of observations)",
50 length(offset), NROW(y)), domain = NA)
53 if (is.empty.model(mt)) {
54 x <- NULL
55 z <- list(coefficients = if (is.matrix(y))
56 matrix(,0,3) else numeric(0), residuals = y,
57 fitted.values = 0 * y, weights = w, rank = 0L,
58 df.residual = if (is.matrix(y)) nrow(y) else length(y))
59 if(!is.null(offset)) {
60 z$fitted.values <- offset
61 z$residuals <- y - offset
64 else {
65 x <- model.matrix(mt, mf, contrasts)
66 z <- if(is.null(w)) lm.fit(x, y, offset = offset,
67 singular.ok=singular.ok, ...)
68 else lm.wfit(x, y, w, offset = offset, singular.ok=singular.ok, ...)
70 class(z) <- c(if(is.matrix(y)) "mlm", "lm")
71 z$na.action <- attr(mf, "na.action")
72 z$offset <- offset
73 z$contrasts <- attr(x, "contrasts")
74 z$xlevels <- .getXlevels(mt, mf)
75 z$call <- cl
76 z$terms <- mt
77 if (model)
78 z$model <- mf
79 if (ret.x)
80 z$x <- x
81 if (ret.y)
82 z$y <- y
83 if (!qr) z$qr <- NULL
87 ## lm.fit() and lm.wfit() have *MUCH* in common [say ``code re-use !'']
88 lm.fit <- function (x, y, offset = NULL, method = "qr", tol = 1e-07,
89 singular.ok = TRUE, ...)
91 if (is.null(n <- nrow(x))) stop("'x' must be a matrix")
92 if(n == 0L) stop("0 (non-NA) cases")
93 p <- ncol(x)
94 if (p == 0L) {
95 ## oops, null model
96 return(list(coefficients = numeric(0), residuals = y,
97 fitted.values = 0 * y, rank = 0,
98 df.residual = length(y)))
100 ny <- NCOL(y)
101 ## treat one-col matrix as vector
102 if(is.matrix(y) && ny == 1)
103 y <- drop(y)
104 if(!is.null(offset))
105 y <- y - offset
106 if (NROW(y) != n)
107 stop("incompatible dimensions")
108 if(method != "qr")
109 warning(gettextf("method = '%s' is not supported. Using 'qr'", method),
110 domain = NA)
111 if(length(list(...)))
112 warning("extra arguments ", paste(names(list(...)), sep=", "),
113 " are just disregarded.")
114 storage.mode(x) <- "double"
115 storage.mode(y) <- "double"
116 z <- .Fortran("dqrls",
117 qr = x, n = n, p = p,
118 y = y, ny = ny,
119 tol = as.double(tol),
120 coefficients = mat.or.vec(p, ny),
121 residuals = y, effects = y, rank = integer(1),
122 pivot = 1:p, qraux = double(p), work = double(2*p),
123 PACKAGE="base")
124 if(!singular.ok && z$rank < p) stop("singular fit encountered")
125 coef <- z$coefficients
126 pivot <- z$pivot
127 ## careful here: the rank might be 0
128 r1 <- seq_len(z$rank)
129 dn <- colnames(x); if(is.null(dn)) dn <- paste("x", 1L:p, sep="")
130 nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank))
131 r2 <- if(z$rank < p) (z$rank+1L):p else integer(0L)
132 if (is.matrix(y)) {
133 coef[r2, ] <- NA
134 coef[pivot, ] <- coef
135 dimnames(coef) <- list(dn, colnames(y))
136 dimnames(z$effects) <- list(nmeffects, colnames(y))
137 } else {
138 coef[r2] <- NA
139 coef[pivot] <- coef
140 names(coef) <- dn
141 names(z$effects) <- nmeffects
143 z$coefficients <- coef
144 r1 <- y - z$residuals ; if(!is.null(offset)) r1 <- r1 + offset
145 qr <- z[c("qr", "qraux", "pivot", "tol", "rank")]
146 colnames(qr$qr) <- colnames(x)[qr$pivot]
147 c(z[c("coefficients", "residuals", "effects", "rank")],
148 list(fitted.values = r1, assign = attr(x, "assign"),
149 qr = structure(qr, class="qr"),
150 df.residual = n - z$rank))
153 lm.wfit <- function (x, y, w, offset = NULL, method = "qr", tol = 1e-7,
154 singular.ok = TRUE, ...)
156 if(is.null(n <- nrow(x))) stop("'x' must be a matrix")
157 if(n == 0) stop("0 (non-NA) cases")
158 ny <- NCOL(y)
159 ## treat one-col matrix as vector
160 if(is.matrix(y) && ny == 1L)
161 y <- drop(y)
162 if(!is.null(offset))
163 y <- y - offset
164 if (NROW(y) != n | length(w) != n)
165 stop("incompatible dimensions")
166 if (any(w < 0 | is.na(w)))
167 stop("missing or negative weights not allowed")
168 if(method != "qr")
169 warning(gettextf("method = '%s' is not supported. Using 'qr'", method),
170 domain = NA)
171 if(length(list(...)))
172 warning("extra arguments ", paste(names(list(...)), sep=", "),
173 " are just disregarded.")
174 x.asgn <- attr(x, "assign")# save
175 zero.weights <- any(w == 0)
176 if (zero.weights) {
177 save.r <- y
178 save.f <- y
179 save.w <- w
180 ok <- w != 0
181 nok <- !ok
182 w <- w[ok]
183 x0 <- x[!ok, , drop = FALSE]
184 x <- x[ok, , drop = FALSE]
185 n <- nrow(x)
186 y0 <- if (ny > 1L) y[!ok, , drop = FALSE] else y[!ok]
187 y <- if (ny > 1L) y[ ok, , drop = FALSE] else y[ok]
189 p <- ncol(x)
190 if (p == 0) {
191 ## oops, null model
192 return(list(coefficients = numeric(0), residuals = y,
193 fitted.values = 0 * y, weights = w, rank = 0L,
194 df.residual = length(y)))
196 storage.mode(y) <- "double"
197 wts <- sqrt(w)
198 z <- .Fortran("dqrls",
199 qr = x * wts, n = n, p = p,
200 y = y * wts, ny = ny,
201 tol = as.double(tol),
202 coefficients = mat.or.vec(p, ny), residuals = y,
203 effects = mat.or.vec(n, ny),
204 rank = integer(1), pivot = 1:p, qraux = double(p),
205 work = double(2 * p),
206 PACKAGE="base")
207 if(!singular.ok && z$rank < p) stop("singular fit encountered")
208 coef <- z$coefficients
209 pivot <- z$pivot
210 r1 <- seq_len(z$rank)
211 dn <- colnames(x); if(is.null(dn)) dn <- paste("x", 1L:p, sep="")
212 nmeffects <- c(dn[pivot[r1]], rep.int("", n - z$rank))
213 r2 <- if(z$rank < p) (z$rank+1L):p else integer(0)
214 if (is.matrix(y)) {
215 coef[r2, ] <- NA
216 coef[pivot, ] <- coef
217 dimnames(coef) <- list(dn, colnames(y))
218 dimnames(z$effects) <- list(nmeffects,colnames(y))
219 } else {
220 coef[r2] <- NA
221 coef[pivot] <- coef
222 names(coef) <- dn
223 names(z$effects) <- nmeffects
225 z$coefficients <- coef
226 z$residuals <- z$residuals/wts
227 z$fitted.values <- y - z$residuals
228 z$weights <- w
229 if (zero.weights) {
230 coef[is.na(coef)] <- 0
231 f0 <- x0 %*% coef
232 if (ny > 1) {
233 save.r[ok, ] <- z$residuals
234 save.r[nok, ] <- y0 - f0
235 save.f[ok, ] <- z$fitted.values
236 save.f[nok, ] <- f0
238 else {
239 save.r[ok] <- z$residuals
240 save.r[nok] <- y0 - f0
241 save.f[ok] <- z$fitted.values
242 save.f[nok] <- f0
244 z$residuals <- save.r
245 z$fitted.values <- save.f
246 z$weights <- save.w
248 if(!is.null(offset))
249 z$fitted.values <- z$fitted.values + offset
250 qr <- z[c("qr", "qraux", "pivot", "tol", "rank")]
251 colnames(qr$qr) <- colnames(x)[qr$pivot]
252 c(z[c("coefficients", "residuals", "fitted.values", "effects",
253 "weights", "rank")],
254 list(assign = x.asgn,
255 qr = structure(qr, class="qr"),
256 df.residual = n - z$rank))
259 print.lm <- function(x, digits = max(3, getOption("digits") - 3), ...)
261 cat("\nCall:\n",deparse(x$call),"\n\n",sep="")
262 if(length(coef(x))) {
263 cat("Coefficients:\n")
264 print.default(format(coef(x), digits=digits),
265 print.gap = 2, quote = FALSE)
266 } else cat("No coefficients\n")
267 cat("\n")
268 invisible(x)
271 summary.lm <- function (object, correlation = FALSE, symbolic.cor = FALSE, ...)
273 z <- object
274 p <- z$rank
275 if (p == 0) {
276 r <- z$residuals
277 n <- length(r)
278 w <- z$weights
279 if (is.null(w)) {
280 rss <- sum(r^2)
281 } else {
282 rss <- sum(w * r^2)
283 r <- sqrt(w) * r
285 resvar <- rss/(n - p)
286 ans <- z[c("call", "terms")]
287 class(ans) <- "summary.lm"
288 ans$aliased <- is.na(coef(object)) # used in print method
289 ans$residuals <- r
290 ans$df <- c(0L, n, length(ans$aliased))
291 ans$coefficients <- matrix(NA, 0L, 4L)
292 dimnames(ans$coefficients)<-
293 list(NULL, c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
294 ans$sigma <- sqrt(resvar)
295 ans$r.squared <- ans$adj.r.squared <- 0
296 return(ans)
298 Qr <- object$qr
299 if (is.null(z$terms) || is.null(Qr))
300 stop("invalid \'lm\' object: no 'terms' nor 'qr' component")
301 n <- NROW(Qr$qr)
302 rdf <- n - p
303 if(is.na(z$df.residual) || rdf != z$df.residual)
304 warning("residual degrees of freedom in object suggest this is not an \"lm\" fit")
305 p1 <- 1:p
306 ## do not want missing values substituted here
307 r <- z$residuals
308 f <- z$fitted.values
309 w <- z$weights
310 if (is.null(w)) {
311 mss <- if (attr(z$terms, "intercept"))
312 sum((f - mean(f))^2) else sum(f^2)
313 rss <- sum(r^2)
314 } else {
315 mss <- if (attr(z$terms, "intercept")) {
316 m <- sum(w * f /sum(w))
317 sum(w * (f - m)^2)
318 } else sum(w * f^2)
319 rss <- sum(w * r^2)
320 r <- sqrt(w) * r
322 resvar <- rss/rdf
323 R <- chol2inv(Qr$qr[p1, p1, drop = FALSE])
324 se <- sqrt(diag(R) * resvar)
325 est <- z$coefficients[Qr$pivot[p1]]
326 tval <- est/se
327 ans <- z[c("call", "terms")]
328 ans$residuals <- r
329 ans$coefficients <-
330 cbind(est, se, tval, 2*pt(abs(tval), rdf, lower.tail = FALSE))
331 dimnames(ans$coefficients)<-
332 list(names(z$coefficients)[Qr$pivot[p1]],
333 c("Estimate", "Std. Error", "t value", "Pr(>|t|)"))
334 ans$aliased <- is.na(coef(object)) # used in print method
335 ans$sigma <- sqrt(resvar)
336 ans$df <- c(p, rdf, NCOL(Qr$qr))
337 if (p != attr(z$terms, "intercept")) {
338 df.int <- if (attr(z$terms, "intercept")) 1L else 0L
339 ans$r.squared <- mss/(mss + rss)
340 ans$adj.r.squared <- 1 - (1 - ans$r.squared) * ((n - df.int)/rdf)
341 ans$fstatistic <- c(value = (mss/(p - df.int))/resvar,
342 numdf = p - df.int, dendf = rdf)
343 } else ans$r.squared <- ans$adj.r.squared <- 0
344 ans$cov.unscaled <- R
345 dimnames(ans$cov.unscaled) <- dimnames(ans$coefficients)[c(1,1)]
346 if (correlation) {
347 ans$correlation <- (R * resvar)/outer(se, se)
348 dimnames(ans$correlation) <- dimnames(ans$cov.unscaled)
349 ans$symbolic.cor <- symbolic.cor
351 if(!is.null(z$na.action)) ans$na.action <- z$na.action
352 class(ans) <- "summary.lm"
356 print.summary.lm <-
357 function (x, digits = max(3, getOption("digits") - 3),
358 symbolic.cor = x$symbolic.cor,
359 signif.stars= getOption("show.signif.stars"), ...)
361 cat("\nCall:\n")#S: ' ' instead of '\n'
362 cat(paste(deparse(x$call), sep="\n", collapse = "\n"), "\n\n", sep="")
363 resid <- x$residuals
364 df <- x$df
365 rdf <- df[2L]
366 cat(if(!is.null(x$w) && diff(range(x$w))) "Weighted ",
367 "Residuals:\n", sep="")
368 if (rdf > 5L) {
369 nam <- c("Min", "1Q", "Median", "3Q", "Max")
370 rq <- if (length(dim(resid)) == 2)
371 structure(apply(t(resid), 1, quantile),
372 dimnames = list(nam, dimnames(resid)[[2]]))
373 else structure(quantile(resid), names = nam)
374 print(rq, digits = digits, ...)
376 else if (rdf > 0L) {
377 print(resid, digits = digits, ...)
378 } else { # rdf == 0 : perfect fit!
379 cat("ALL", df[1], "residuals are 0: no residual degrees of freedom!\n")
381 if (length(x$aliased) == 0L) {
382 cat("\nNo Coefficients\n")
383 } else {
384 if (nsingular <- df[3L] - df[1L])
385 cat("\nCoefficients: (", nsingular,
386 " not defined because of singularities)\n", sep = "")
387 else cat("\nCoefficients:\n")
388 coefs <- x$coefficients
389 if(!is.null(aliased <- x$aliased) && any(aliased)) {
390 cn <- names(aliased)
391 coefs <- matrix(NA, length(aliased), 4, dimnames=list(cn, colnames(coefs)))
392 coefs[!aliased, ] <- x$coefficients
395 printCoefmat(coefs, digits=digits, signif.stars=signif.stars, na.print="NA", ...)
398 cat("\nResidual standard error:",
399 format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n")
400 if(nzchar(mess <- naprint(x$na.action))) cat(" (",mess, ")\n", sep="")
401 if (!is.null(x$fstatistic)) {
402 cat("Multiple R-squared:", formatC(x$r.squared, digits=digits))
403 cat(",\tAdjusted R-squared:",formatC(x$adj.r.squared,digits=digits),
404 "\nF-statistic:", formatC(x$fstatistic[1], digits=digits),
405 "on", x$fstatistic[2], "and",
406 x$fstatistic[3], "DF, p-value:",
407 format.pval(pf(x$fstatistic[1L], x$fstatistic[2L],
408 x$fstatistic[3L], lower.tail = FALSE), digits=digits),
409 "\n")
411 correl <- x$correlation
412 if (!is.null(correl)) {
413 p <- NCOL(correl)
414 if (p > 1L) {
415 cat("\nCorrelation of Coefficients:\n")
416 if(is.logical(symbolic.cor) && symbolic.cor) {# NULL < 1.7.0 objects
417 print(symnum(correl, abbr.colnames = NULL))
418 } else {
419 correl <- format(round(correl, 2), nsmall = 2, digits = digits)
420 correl[!lower.tri(correl)] <- ""
421 print(correl[-1, -p, drop=FALSE], quote = FALSE)
425 cat("\n")#- not in S
426 invisible(x)
429 residuals.lm <-
430 function(object,
431 type = c("working","response", "deviance","pearson", "partial"),
432 ...)
434 type <- match.arg(type)
435 r <- object$residuals
436 res <- switch(type,
437 working =, response = r,
438 deviance=, pearson =
439 if(is.null(object$weights)) r else r * sqrt(object$weights),
440 partial = r
442 res<-naresid(object$na.action, res)
443 if (type=="partial") ## predict already does naresid
444 res<-res+predict(object,type="terms")
448 simulate.lm <- function(object, nsim = 1, seed = NULL, ...)
450 if(!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
451 runif(1) # initialize the RNG if necessary
452 if(is.null(seed))
453 RNGstate <- get(".Random.seed", envir = .GlobalEnv)
454 else {
455 R.seed <- get(".Random.seed", envir = .GlobalEnv)
456 set.seed(seed)
457 RNGstate <- structure(seed, kind = as.list(RNGkind()))
458 on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
460 ftd <- fitted(object)
461 ans <-
462 as.data.frame(ftd +
463 matrix(rnorm(length(ftd) * nsim,
464 sd = sqrt(deviance(object)/
465 df.residual(object))),
466 nrow = length(ftd)))
468 attr(ans, "seed") <- RNGstate
472 #fitted.lm <- function(object, ...)
473 # napredict(object$na.action, object$fitted.values)
475 # coef.lm <- function(object, ...) object$coefficients
477 ## need this for results of lm.fit() in drop1():
478 weights.default <- function(object, ...)
479 naresid(object$na.action, object$weights)
482 deviance.lm <- function(object, ...)
483 sum(weighted.residuals(object)^2, na.rm=TRUE)
485 formula.lm <- function(x, ...)
487 form <- x$formula
488 if( !is.null(form) ) {
489 form <- formula(x$terms) # has . expanded
490 environment(form) <- environment(x$formula)
491 form
492 } else formula(x$terms)
495 family.lm <- function(object, ...) { gaussian() }
497 model.frame.lm <- function(formula, ...)
499 dots <- list(...)
500 nargs <- dots[match(c("data", "na.action", "subset"), names(dots), 0)]
501 if (length(nargs) || is.null(formula$model)) {
502 fcall <- formula$call
503 fcall$method <- "model.frame"
504 fcall[[1]] <- as.name("lm")
505 fcall[names(nargs)] <- nargs
506 # env <- environment(fcall$formula) # always NULL
507 env <- environment(formula$terms)
508 if (is.null(env)) env <- parent.frame()
509 eval(fcall, env, parent.frame())
511 else formula$model
514 variable.names.lm <- function(object, full = FALSE, ...)
516 if(full) dimnames(object$qr$qr)[[2]]
517 else if(object$rank) dimnames(object$qr$qr)[[2]][seq_len(object$rank)]
518 else character(0)
521 case.names.lm <- function(object, full = FALSE, ...)
523 w <- weights(object)
524 dn <- names(residuals(object))
525 if(full || is.null(w)) dn else dn[w!=0]
528 anova.lm <- function(object, ...)
530 if(length(list(object, ...)) > 1L)
531 return(anova.lmlist(object, ...))
532 w <- object$weights
533 ssr <- sum(if(is.null(w)) object$residuals^2 else w*object$residuals^2)
534 dfr <- df.residual(object)
535 p <- object$rank
536 if(p > 0L) {
537 p1 <- 1L:p
538 comp <- object$effects[p1]
539 asgn <- object$assign[object$qr$pivot][p1]
540 nmeffects <- c("(Intercept)", attr(object$terms, "term.labels"))
541 tlabels <- nmeffects[1 + unique(asgn)]
542 ss <- c(unlist(lapply(split(comp^2,asgn), sum)), ssr)
543 df <- c(unlist(lapply(split(asgn, asgn), length)), dfr)
544 } else {
545 ss <- ssr
546 df <- dfr
547 tlabels <- character(0)
549 ms <- ss/df
550 f <- ms/(ssr/dfr)
551 P <- pf(f, df, dfr, lower.tail = FALSE)
552 table <- data.frame(df, ss, ms, f, P)
553 table[length(P), 4:5] <- NA
554 dimnames(table) <- list(c(tlabels, "Residuals"),
555 c("Df","Sum Sq", "Mean Sq", "F value", "Pr(>F)"))
556 if(attr(object$terms,"intercept")) table <- table[-1, ]
557 structure(table, heading = c("Analysis of Variance Table\n",
558 paste("Response:", deparse(formula(object)[[2]]))),
559 class= c("anova", "data.frame"))# was "tabular"
562 anova.lmlist <- function (object, ..., scale = 0, test = "F")
564 objects <- list(object, ...)
565 responses <- as.character(lapply(objects,
566 function(x) deparse(x$terms[[2]])))
567 sameresp <- responses == responses[1]
568 if (!all(sameresp)) {
569 objects <- objects[sameresp]
570 warning("models with response ",
571 deparse(responses[!sameresp]),
572 " removed because response differs from ", "model 1")
575 ns <- sapply(objects, function(x) length(x$residuals))
576 if(any(ns != ns[1L]))
577 stop("models were not all fitted to the same size of dataset")
579 ## calculate the number of models
580 nmodels <- length(objects)
581 if (nmodels == 1)
582 return(anova.lm(object))
584 ## extract statistics
586 resdf <- as.numeric(lapply(objects, df.residual))
587 resdev <- as.numeric(lapply(objects, deviance))
589 ## construct table and title
591 table <- data.frame(resdf, resdev, c(NA, -diff(resdf)),
592 c(NA, -diff(resdev)) )
593 variables <- lapply(objects, function(x)
594 paste(deparse(formula(x)), collapse="\n") )
595 dimnames(table) <- list(1:nmodels,
596 c("Res.Df", "RSS", "Df", "Sum of Sq"))
598 title <- "Analysis of Variance Table\n"
599 topnote <- paste("Model ", format(1:nmodels),": ",
600 variables, sep="", collapse="\n")
602 ## calculate test statistic if needed
604 if(!is.null(test)) {
605 bigmodel <- order(resdf)[1L]
606 scale <- if(scale > 0) scale else resdev[bigmodel]/resdf[bigmodel]
607 table <- stat.anova(table = table, test = test,
608 scale = scale,
609 df.scale = resdf[bigmodel],
610 n = length(objects[bigmodel$residuals]))
612 structure(table, heading = c(title, topnote),
613 class = c("anova", "data.frame"))
616 ## code originally from John Maindonald 26Jul2000
617 predict.lm <-
618 function(object, newdata, se.fit = FALSE, scale = NULL, df = Inf,
619 interval = c("none", "confidence", "prediction"),
620 level = .95, type = c("response", "terms"),
621 terms = NULL, na.action = na.pass, pred.var = res.var/weights,
622 weights = 1, ...)
624 tt <- terms(object)
625 if(missing(newdata) || is.null(newdata)) {
626 mm <- X <- model.matrix(object)
627 mmDone <- TRUE
628 offset <- object$offset
630 else {
631 Terms <- delete.response(tt)
632 m <- model.frame(Terms, newdata, na.action = na.action,
633 xlev = object$xlevels)
634 if(!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m)
635 X <- model.matrix(Terms, m, contrasts.arg = object$contrasts)
636 offset <- if (!is.null(off.num <- attr(tt, "offset")))
637 eval(attr(tt, "variables")[[off.num+1]], newdata)
638 else if (!is.null(object$offset))
639 eval(object$call$offset, newdata)
640 mmDone <- FALSE
642 n <- length(object$residuals) # NROW(object$qr$qr)
643 p <- object$rank
644 p1 <- seq_len(p)
645 piv <- object$qr$pivot[p1]
646 if(p < ncol(X) && !(missing(newdata) || is.null(newdata)))
647 warning("prediction from a rank-deficient fit may be misleading")
648 ### NB: Q[p1,] %*% X[,piv] = R[p1,p1]
649 beta <- object$coefficients
650 predictor <- drop(X[, piv, drop = FALSE] %*% beta[piv])
651 if (!is.null(offset))
652 predictor <- predictor + offset
654 interval <- match.arg(interval)
655 if (interval == "prediction") {
656 if (missing(newdata))
657 warning("Predictions on current data refer to _future_ responses\n")
658 if (missing(newdata) && missing(weights))
660 w <- weights.default(object)
661 if (!is.null(w)) {
662 weights <- w
663 warning("Assuming prediction variance inversely proportional to weights used for fitting\n")
666 if (!missing(newdata) && missing(weights) && !is.null(object$weights) && missing(pred.var))
667 warning("Assuming constant prediction variance even though model fit is weighted\n")
668 if (inherits(weights, "formula")){
669 if (length(weights) != 2L)
670 stop("'weights' as formula should be one-sided")
671 d <- if(missing(newdata) || is.null(newdata))
672 model.frame(object)
673 else
674 newdata
675 weights <- eval(weights[[2L]], d, environment(weights))
679 type <- match.arg(type)
680 if(se.fit || interval != "none") {
681 res.var <-
682 if (is.null(scale)) {
683 r <- object$residuals
684 w <- object$weights
685 rss <- sum(if(is.null(w)) r^2 else r^2 * w)
686 df <- n - p
687 rss/df
688 } else scale^2
689 if(type != "terms") {
690 if(p > 0) {
691 XRinv <-
692 if(missing(newdata) && is.null(w))
693 qr.Q(object$qr)[, p1, drop = FALSE]
694 else
695 X[, piv] %*% qr.solve(qr.R(object$qr)[p1, p1])
696 # NB:
697 # qr.Q(object$qr)[, p1, drop = FALSE] / sqrt(w)
698 # looks faster than the above, but it's slower, and doesn't handle zero
699 # weights properly
701 ip <- drop(XRinv^2 %*% rep(res.var, p))
702 } else ip <- rep(0, n)
706 if (type == "terms") { ## type == "terms" ------------
708 if(!mmDone) { mm <- model.matrix(object); mmDone <- TRUE }
709 ## asgn <- attrassign(mm, tt) :
710 aa <- attr(mm, "assign")
711 ll <- attr(tt, "term.labels")
712 hasintercept <- attr(tt, "intercept") > 0L
713 if (hasintercept)
714 ll <- c("(Intercept)", ll)
715 aaa <- factor(aa, labels = ll)
716 asgn <- split(order(aa), aaa)
717 if (hasintercept) {
718 asgn$"(Intercept)" <- NULL
719 if(!mmDone) { mm <- model.matrix(object); mmDone <- TRUE }
720 avx <- colMeans(mm)
721 termsconst <- sum(avx[piv] * beta[piv])
723 nterms <- length(asgn)
724 if(nterms > 0) {
725 predictor <- matrix(ncol = nterms, nrow = NROW(X))
726 dimnames(predictor) <- list(rownames(X), names(asgn))
728 if (se.fit || interval != "none") {
729 ip <- matrix(ncol = nterms, nrow = NROW(X))
730 dimnames(ip) <- list(rownames(X), names(asgn))
731 Rinv <- qr.solve(qr.R(object$qr)[p1, p1])
733 if(hasintercept)
734 X <- sweep(X, 2L, avx, check.margin=FALSE)
735 unpiv <- rep.int(0L, NCOL(X))
736 unpiv[piv] <- p1
737 ## Predicted values will be set to 0 for any term that
738 ## corresponds to columns of the X-matrix that are
739 ## completely aliased with earlier columns.
740 for (i in seq.int(1L, nterms, length.out = nterms)) {
741 iipiv <- asgn[[i]] # Columns of X, ith term
742 ii <- unpiv[iipiv] # Corresponding rows of Rinv
743 iipiv[ii == 0L] <- 0L
744 predictor[, i] <-
745 if(any(iipiv > 0L)) X[, iipiv, drop = FALSE] %*% beta[iipiv]
746 else 0
747 if (se.fit || interval != "none")
748 ip[, i] <-
749 if(any(iipiv > 0L))
750 as.matrix(X[, iipiv, drop = FALSE] %*%
751 Rinv[ii, , drop = FALSE])^2 %*% rep.int(res.var, p)
752 else 0
754 if (!is.null(terms)) {
755 predictor <- predictor[, terms, drop = FALSE]
756 if (se.fit)
757 ip <- ip[, terms, drop = FALSE]
759 } else { # no terms
760 predictor <- ip <- matrix(0, n,0)
762 attr(predictor, 'constant') <- if (hasintercept) termsconst else 0
765 ### Now construct elements of the list that will be returned
767 if(interval != "none") {
768 tfrac <- qt((1 - level)/2, df)
769 hwid <- tfrac * switch(interval,
770 confidence = sqrt(ip),
771 prediction = sqrt(ip+pred.var)
773 if(type != "terms") {
774 predictor <- cbind(predictor, predictor + hwid %o% c(1, -1))
775 colnames(predictor) <- c("fit", "lwr", "upr")
777 else {
778 lwr <- predictor + hwid
779 upr <- predictor - hwid
782 if(se.fit || interval != "none") se <- sqrt(ip)
783 if(missing(newdata) && !is.null(na.act <- object$na.action)) {
784 predictor <- napredict(na.act, predictor)
785 if(se.fit) se <- napredict(na.act, se)
787 if(type == "terms" && interval != "none") {
788 if(missing(newdata) && !is.null(na.act)) {
789 lwr <- napredict(na.act, lwr)
790 upr <- napredict(na.act, upr)
792 list(fit = predictor, se.fit = se, lwr = lwr, upr = upr,
793 df = df, residual.scale = sqrt(res.var))
794 } else if (se.fit)
795 list(fit = predictor, se.fit = se,
796 df = df, residual.scale = sqrt(res.var))
797 else predictor
800 effects.lm <- function(object, set.sign = FALSE, ...)
802 eff <- object$effects
803 if(is.null(eff)) stop("'object' has no 'effects' component")
804 if(set.sign) {
805 dd <- coef(object)
806 if(is.matrix(eff)) {
807 r <- 1:dim(dd)[1L]
808 eff[r, ] <- sign(dd) * abs(eff[r, ])
809 } else {
810 r <- 1:length(dd)
811 eff[r] <- sign(dd) * abs(eff[r])
814 structure(eff, assign = object$assign, class = "coef")
817 ## plot.lm --> now in ./plot.lm.R
819 model.matrix.lm <- function(object, ...)
821 if(n_match <- match("x", names(object), 0L)) object[[n_match]]
822 else {
823 data <- model.frame(object, xlev = object$xlevels, ...)
824 NextMethod("model.matrix", data = data,
825 contrasts.arg = object$contrasts)
829 ##---> SEE ./mlm.R for more methods, etc. !!
830 predict.mlm <-
831 function(object, newdata, se.fit = FALSE, na.action = na.pass, ...)
833 if(missing(newdata)) return(object$fitted.values)
834 if(se.fit)
835 stop("the 'se.fit' argument is not yet implemented for \"mlm\" objects")
836 if(missing(newdata)) {
837 X <- model.matrix(object)
838 offset <- object$offset
840 else {
841 tt <- terms(object)
842 Terms <- delete.response(tt)
843 m <- model.frame(Terms, newdata, na.action = na.action,
844 xlev = object$xlevels)
845 if(!is.null(cl <- attr(Terms, "dataClasses"))) .checkMFClasses(cl, m)
846 X <- model.matrix(Terms, m, contrasts.arg = object$contrasts)
847 offset <- if (!is.null(off.num <- attr(tt, "offset")))
848 eval(attr(tt, "variables")[[off.num+1]], newdata)
849 else if (!is.null(object$offset))
850 eval(object$call$offset, newdata)
852 piv <- object$qr$pivot[seq(object$rank)]
853 pred <- X[, piv, drop = FALSE] %*% object$coefficients[piv,]
854 if ( !is.null(offset) ) pred <- pred + offset
855 if(inherits(object, "mlm")) pred else pred[, 1L]
858 ## from base/R/labels.R
859 labels.lm <- function(object, ...)
861 tl <- attr(object$terms, "term.labels")
862 asgn <- object$assign[object$qr$pivot[1:object$rank]]
863 tl[unique(asgn)]
867 (defun lm.fit (x y (offset nil) (method 'qr) (tol 1e-07) (singular.ok t) &varargs)
869 (if (not (typep x 'matrix)) (error "x must by a matrix."))
870 (if (= (nrow-non-na x) 0) (error "need >0 non-NA cases (data)"))
871 (if (= (ncol-non-na x) 0) (error "need >0 non-NA vars (model)"))
873 (let ((p (ncol x))
874 (ny (length y))