#!/usr/local/bin/R # Xavier Fernández i Marín # August 2004 # http://xavier-fim.net # some code examples for logistic regression # How many cases do you want? n <- 100 # let's start with n=100. You can play and try other values # Create a dummy dependent variable pre.dv <- rnorm(n) dv <- rep(NA, n) dv[pre.dv>0.5] <- 1 dv[pre.dv<=0.5] <- 0 # Create three independent variables with some NA values pre.var1 <- rep(c(NA, 3, 5, 10, 12), n/5) # repeat until n is reached pre.var1 <- c(pre.var1, 3, 5, 29, 18) # just to be sure that are var1 <- pre.var1[1:n] # exactly n cases var2 <- seq(1,n,1) pre.var3 <- c(NA, 50, NA, 280) pre.var3.b <- rep(pre.var3, n/4) pre.var3.b <- c(pre.var3.b, pre.var3) var3 <- pre.var3.b[1:n] # Create the dataframe with all the values df.with.na <- data.frame(dv, var1, var2, var3) # Up to here, we have a fast way or an elegant way: ##FAST #df <- subset(df.with.na, subset=(is.na(var1) | is.na(var2) | is.na(var3)) ##ELEGANT # Now we want to delete from the dataframe all the values that have # NA in at least one of the independent variables null.indep <- rep(FALSE, n) null.indep[is.na(var1) | is.na(var2) | is.na(var3)] <- TRUE null.indep[is.na(paste("var", 1:3, sep=""))] <- TRUE # attach this newly created variable to de previous dataset df.with.na <- cbind(df.with.na, null.indep) # Now we know the values that are NA # and we create our main dataframe df <- subset(df.with.na, null.indep==FALSE) #### Logistic regression attach(df) mod1 <- glm(dv ~ var1 + var2 + var3, family=binomial("logit")) # Some information that you can get from # only very temptative summary(mod1) anova(mod1, test="Chisq") logLik(mod1) # predicition # also very temptative and the output can be improved... fit1.group <- cut(fitted(mod1), c(0,0.5,1)) tab1 <- table(fit1.group, df$dv) prop.table(tab1) # model fitting AIC(mod1) # for further development # Diagnostics of the model # library (car) required library(car) Anova(mod1)