1 #!/usr/local/bin/R
  2 
  3 # Xavier Fernández i Marín 
  4 # December 2004
  5 # http://xavier-fim.net
  6 
  7 
  8 ### Creation and manipulation of basic vectors
  9 
 10 ls()                    # how many objects do we have in the workspace?
 11 
 12 z <- c(1,6,5,7)         # 'c' concatenates different objects
 13 y <- rep(1,4)           # create repeated objects
 14 x <- seq(1,2,0.25)      # create a sequence from 1 to 2 by 0.5
 15 x
 16 x <- c(x[1:4])          # x will be only the first four elements of 'x'
 17 x
 18 x <- c(x[1:length(z)])  # another way to do it.
 19 x
 20 
 21 length(z)               # objects stored in object 'z'
 22 
 23 
 24 k <- z + y + x          # sum the vectors (works if they are of equal length)
 25 k
 26 
 27 labels <- c("label1", "label2", "label3", "label4")
 28 labels
 29         # we can create vectors of other values than numbers
 30 
 31 labels.easy <- c(paste("label", 1:4, sep=""))
 32 labels.easy
 33         # as above, but shows the potentials of the language.
 34 
 35 z[z>5]                  # get values of 'z' greater than 5
 36 z>5                     # is 'z' object greater than 5, true or false
 37 
 38 sum(z)          # total sum
 39 z^2             # square
 40 sd(z)           # standard deviation
 41 sqrt(z)         # square root
 42 mean(z)         # mean
 43 log(z)          # logarithm
 44 sort(z)         # sort
 45 sort(z, decreasing=TRUE)        # inverse sort
 46 order(z)        # shows which is the order of the values in 'z' if sorted
 47 
 48 easy.test <- t.test(z, mu=8)            # t-test
 49 easy.test
 50 
 51 
 52 
 53 
 54 ### Creation and manipulation of basic factors
 55 
 56 education <- c(0,1,2,3)
 57 levels(education) <- c("none", "primary", "secondary", "university degree")
 58 education
 59 
 60 as.numeric(education)   # objects can be coerced to other types
 61 
 62 
 63 ### Creation and manipulation of lists
 64 
 65 attributes(easy.test)    # t.test is stored as a list
 66 easy.test$statistic      # get specific values of the list
 67 easy.test$p.value
 68 easy.test$conf.int
 69 easy.test$conf.int[1]
 70 
 71 
 72 # advanced example to show the power of the language
 73   # We want to make t.tests for values from 4 to 9
 74   values.t.test <- 4:9
 75 
 76   # create a matrix with missing data to store the data
 77   matrix.t.tests <- matrix(data=NA,
 78                            ncol=3,
 79                            nrow=length(values.t.test),
 80                            byrow=TRUE)
 81 
 82   # program the t-tests
 83   for (i in 1:length(4:9)) {
 84     temporal.t.test <- t.test(z, mu=i)   # we substitute mu=8 by 'i'
 85                                          # in each loop
 86     matrix.t.tests[i,1] <- temporal.t.test$p.value
 87                                          # store the p.value in the first
 88                                          # column of the matrix
 89     matrix.t.tests[i,2] <- temporal.t.test$conf.int[1]
 90     matrix.t.tests[i,3] <- temporal.t.test$conf.int[2]
 91   }
 92   t.tests <- as.data.frame(matrix.t.tests)
 93   names(t.tests) <- c("p.value", "conf.int.low", "conf.int.high")
 94                                          # coerce object 't.tests' to be
 95                                          # a dataframe, instead of a matrix
 96                                          # only to get a nice output.
 97                                          # Is is not possible to have
 98                                          # column names in matrices.
 99   t.tests
100 
101 
102 
103 
104 ### Data frames
105 
106 df <- data.frame(z, y, x, k, education)         # create a data frame
107 df
108 df[1,]                  # get first row
109 df[,1]                  # get first column
110 df[3,2]                 # get third row, second column
111 df$education            # get specific variable from a data frame
112 df$z[df$x>=1.5]         # get values of 'z' when 'x' holds the condition
113 
114 cov(df)         # covariance matrix
115 cor(df)         # correlations
116 
117 q <- c(7,9,8,15)
118 cbind(df, q)            # add a variable to the data frame, colume bind
119 
120 
121 rm(list=c("z","y","x","k","q","education"))
122                         # delete values into the workspace except the data frame
123 
124 
125 attach(df)      # put 'df' into the working path
126                 # now we can refer to the variables into the data frame
127                 # without having to use 'df$variable'
128 
129 subset(df, education!="none")   # extract a subset of the data frame
130 transform(df, log.z=log(z))     # create a new value in the data frame
131                                 # that is the logarithm of 'z'
132 
133 
134 o <- order(z)
135 df[o,]                  # sort 'df' with the order defined by 'z'
136 
137 
138 # apply operations to all variables in a dataframe
139 sapply(df, mean, na.rm=T)
140                         # for all variables in df, compute the mean
141                         # with the options specified in the third parameter
142 
143 
144 mean.df <- sapply(df, mean, na.rm=T)
145 sd.df   <- sapply(df, sd, na.rm=T)
146 
147 
148 #### plots, an overview
149 
150 plot(mean.df, sd.df)    # a basic plot between two variables
151 plot(mean.df,
152      sd.df,
153      pch=2,
154      col="red",
155      xlab="Mean",
156      ylab="Standard Deviation",
157      main="Nice plot, isn't it?",
158      )
159 # we can strip the commands into different lines
160 
161 
162 summary(df)
163 plot(df)        # the plot of an object 'data.frame' is a nice shortcut
164 
165 
166 
167 # final commands
168 save.image()          # save an image of our workspace.
169                       # If we want to work again with the same objects,
170                       # just use 'load.image()'.
171 
172 ls()                  # see how many objects do we have created
173 rm(list=ls())         # see again how an object oriented language works
174