1 #!/usr/local/bin/R
 2 
 3 
 4 # Xavier Fernández i Marín
 5 # August 2004
 6 # http://xavier-fim.net
 7 
 8 # some code examples for logistic regression
 9 
10 # How many cases do you want?
11 n <- 100        # let's start with n=100. You can play and try other values
12 
13 
14 # Create a dummy dependent variable
15 pre.dv <- rnorm(n)
16 dv <- rep(NA, n)
17 
18 dv[pre.dv>0.5] <- 1
19 dv[pre.dv<=0.5] <- 0
20 
21 
22 # Create three independent variables with some NA values
23 pre.var1 <- rep(c(NA, 3, 5, 10, 12), n/5)       # repeat until n is reached
24 pre.var1 <- c(pre.var1, 3, 5, 29, 18)           # just to be sure that are
25 var1 <- pre.var1[1:n]                           # exactly n cases
26 
27 var2 <- seq(1,n,1)
28 
29 pre.var3 <- c(NA, 50, NA, 280)
30 pre.var3.b <- rep(pre.var3, n/4)
31 pre.var3.b <- c(pre.var3.b, pre.var3)
32 var3 <- pre.var3.b[1:n]
33 
34 
35 # Create the dataframe with all the values
36 df.with.na <- data.frame(dv, var1, var2, var3)
37 
38 
39 # Up to here, we have a fast way or an elegant way:
40 ##FAST
41 #df <- subset(df.with.na, subset=(is.na(var1) | is.na(var2) | is.na(var3))
42 
43 ##ELEGANT
44 # Now we want to delete from the dataframe all the values that have
45 # NA in at least one of the independent variables 
46 null.indep <- rep(FALSE, n)
47 null.indep[is.na(var1) | is.na(var2) | is.na(var3)] <- TRUE
48 null.indep[is.na(paste("var", 1:3, sep=""))] <- TRUE
49 
50 
51 # attach this newly created variable to de previous dataset
52 df.with.na <- cbind(df.with.na, null.indep)
53 
54 # Now we know the values that are NA
55 # and we create our main dataframe
56 df <- subset(df.with.na, null.indep==FALSE)
57 
58 
59 
60 #### Logistic regression
61 attach(df)
62 mod1 <- glm(dv ~ var1 + var2 + var3, family=binomial("logit"))
63 
64 # Some information that you can get from
65 # only very temptative 
66 summary(mod1)
67 anova(mod1, test="Chisq")
68 logLik(mod1)
69 
70 # predicition
71 # also very temptative and the output can be improved...
72 fit1.group <- cut(fitted(mod1), c(0,0.5,1))
73 tab1 <- table(fit1.group, df$dv)
74 prop.table(tab1)
75 
76 # model fitting
77 AIC(mod1)
78 
79 # for further development
80 
81 # Diagnostics of the model
82 # library (car) required
83 library(car)
84 Anova(mod1)