#!/usr/local/bin/R

# Xavier Fernández i Marín
# December 2004
# http://xavier-fim.net


# Basic regression

# Simulate data 
sim <- 500             # number of observations
var1 <- rnorm(500)
var2 <- rnorm(500)
var3 <- rnorm(500)

# Simulate a process that links the data with some other variable
# and add some random noise
dependent <- rep(1, sim) + 0.5*var1 - 3*var2 + 2*var3 + rnorm(sim)


# Link the four vectors into a unique dataframe
df <- data.frame(dependent, var1, var2, var3)
pairs(df)


# basic model specification
basic <- lm(dependent ~ var1 + var2 + var3)

# Inspect the output generated by the linear model object
basic
summary(basic)
attributes(basic)

# model search
anova(basic)
step(basic)


# Complex model specification
complex1 <- lm(dependent ~ var1 * var2)
           # This includes var1, var2 and the interaction between them

complex2 <- lm(dependent ~ var1 + var1:var2)
           # This includes var1 and the interaction between var1 and var2

complex3 <- lm(dependent ~ log(var1) + var2 * var3)
           # Includes var1 logged, var2, var3 and interaction var2 and var3

complex4 <- lm(dependent ~ var1 + var2 + I(var2)^2)
           # Includes var1, var2 and square of var2

# Plot regression models
plot(basic)
            # will ask to hit <return> to see each plot in the same window

# we can plot the four plots in the same window
par(mfrow=c(2,2))
plot(basic)
            # It is really easy to do a basic inspection of residuals


# Partial plots of the residuals 
plot(var1, basic$residuals)
plot(var2, basic$residuals)
plot(var3, basic$residuals)