#!/usr/local/bin/R

# Xavier Fernández i Marín 
# December 2004
# http://xavier-fim.net


### Creation and manipulation of basic vectors

ls()			# how many objects do we have in the workspace?

z <- c(1,6,5,7)		# 'c' concatenates different objects
y <- rep(1,4)		# create repeated objects
x <- seq(1,2,0.25)	# create a sequence from 1 to 2 by 0.5
x
x <- c(x[1:4])		# x will be only the first four elements of 'x'
x
x <- c(x[1:length(z)])	# another way to do it.
x

length(z)		# objects stored in object 'z'


k <- z + y + x		# sum the vectors (works if they are of equal length)
k

labels <- c("label1", "label2", "label3", "label4")
labels
	# we can create vectors of other values than numbers

labels.easy <- c(paste("label", 1:4, sep=""))
labels.easy
	# as above, but shows the potentials of the language.

z[z>5]			# get values of 'z' greater than 5
z>5			# is 'z' object greater than 5, true or false

sum(z)		# total sum
z^2		# square
sd(z)		# standard deviation
sqrt(z)		# square root
mean(z)		# mean
log(z)		# logarithm
sort(z)		# sort
sort(z, decreasing=TRUE)	# inverse sort
order(z)	# shows which is the order of the values in 'z' if sorted

easy.test <- t.test(z, mu=8)		# t-test
easy.test




### Creation and manipulation of basic factors

education <- c(0,1,2,3)
levels(education) <- c("none", "primary", "secondary", "university degree")
education

as.numeric(education)	# objects can be coerced to other types


### Creation and manipulation of lists

attributes(easy.test)    # t.test is stored as a list
easy.test$statistic      # get specific values of the list
easy.test$p.value
easy.test$conf.int
easy.test$conf.int[1]


# advanced example to show the power of the language
  # We want to make t.tests for values from 4 to 9
  values.t.test <- 4:9

  # create a matrix with missing data to store the data
  matrix.t.tests <- matrix(data=NA,
                           ncol=3,
                           nrow=length(values.t.test),
                           byrow=TRUE)

  # program the t-tests
  for (i in 1:length(4:9)) {
    temporal.t.test <- t.test(z, mu=i)   # we substitute mu=8 by 'i'
                                         # in each loop
    matrix.t.tests[i,1] <- temporal.t.test$p.value
                                         # store the p.value in the first
                                         # column of the matrix
    matrix.t.tests[i,2] <- temporal.t.test$conf.int[1]
    matrix.t.tests[i,3] <- temporal.t.test$conf.int[2]
  }
  t.tests <- as.data.frame(matrix.t.tests)
  names(t.tests) <- c("p.value", "conf.int.low", "conf.int.high")
                                         # coerce object 't.tests' to be
                                         # a dataframe, instead of a matrix
                                         # only to get a nice output.
                                         # Is is not possible to have
                                         # column names in matrices.
  t.tests




### Data frames

df <- data.frame(z, y, x, k, education)		# create a data frame
df
df[1,]			# get first row
df[,1]			# get first column
df[3,2]			# get third row, second column
df$education		# get specific variable from a data frame
df$z[df$x>=1.5]		# get values of 'z' when 'x' holds the condition

cov(df)		# covariance matrix
cor(df)		# correlations

q <- c(7,9,8,15)
cbind(df, q)		# add a variable to the data frame, colume bind


rm(list=c("z","y","x","k","q","education"))
			# delete values into the workspace except the data frame


attach(df)	# put 'df' into the working path
		# now we can refer to the variables into the data frame
		# without having to use 'df$variable'

subset(df, education!="none")	# extract a subset of the data frame
transform(df, log.z=log(z))	# create a new value in the data frame
				# that is the logarithm of 'z'


o <- order(z)
df[o,]	        	# sort 'df' with the order defined by 'z'


# apply operations to all variables in a dataframe
sapply(df, mean, na.rm=T)
			# for all variables in df, compute the mean
			# with the options specified in the third parameter


mean.df <- sapply(df, mean, na.rm=T)
sd.df   <- sapply(df, sd, na.rm=T)


#### plots, an overview

plot(mean.df, sd.df)	# a basic plot between two variables
plot(mean.df,
     sd.df,
     pch=2,
     col="red",
     xlab="Mean",
     ylab="Standard Deviation",
     main="Nice plot, isn't it?",
     )
# we can strip the commands into different lines


summary(df)
plot(df)	# the plot of an object 'data.frame' is a nice shortcut



# final commands
save.image()          # save an image of our workspace.
                      # If we want to work again with the same objects,
                      # just use 'load.image()'.

ls()                  # see how many objects do we have created
rm(list=ls())         # see again how an object oriented language works