#!/usr/local/bin/R
# Xavier Fernández i Marín
# December 2004
# http://xavier-fim.net
# Basic regression
# Simulate data
sim <- 500 # number of observations
var1 <- rnorm(500)
var2 <- rnorm(500)
var3 <- rnorm(500)
# Simulate a process that links the data with some other variable
# and add some random noise
dependent <- rep(1, sim) + 0.5*var1 - 3*var2 + 2*var3 + rnorm(sim)
# Link the four vectors into a unique dataframe
df <- data.frame(dependent, var1, var2, var3)
pairs(df)
# basic model specification
basic <- lm(dependent ~ var1 + var2 + var3)
# Inspect the output generated by the linear model object
basic
summary(basic)
attributes(basic)
# model search
anova(basic)
step(basic)
# Complex model specification
complex1 <- lm(dependent ~ var1 * var2)
# This includes var1, var2 and the interaction between them
complex2 <- lm(dependent ~ var1 + var1:var2)
# This includes var1 and the interaction between var1 and var2
complex3 <- lm(dependent ~ log(var1) + var2 * var3)
# Includes var1 logged, var2, var3 and interaction var2 and var3
complex4 <- lm(dependent ~ var1 + var2 + I(var2)^2)
# Includes var1, var2 and square of var2
# Plot regression models
plot(basic)
# will ask to hit to see each plot in the same window
# we can plot the four plots in the same window
par(mfrow=c(2,2))
plot(basic)
# It is really easy to do a basic inspection of residuals
# Partial plots of the residuals
plot(var1, basic$residuals)
plot(var2, basic$residuals)
plot(var3, basic$residuals)