# R code from lecture 01 - uses stats202log.txt # # Read data in from a file # # you need to set the path to the folder to which you downloaded stats202log.txt setwd("C:/Users/PatriciaHoffman/Desktop/MLHacker/DavidMease/Lectures/Lecture01") # tell R the data is comma seperated values data<-read.csv("stats202log.txt", sep=" ",header=F) # see what is in the 1st through 5th rows data[1:5,] # see what is in the 1st column data[,1] # find out which data are factors is.factor(data[,1]) data[,1]+10 data[,8] is.factor(data[,8]) is.numeric(data[,8]) # tell R that - means there is No data for that entry data<-read.csv("stats202log.txt",sep=" ",header=F, na.strings = "-") is.factor(data[,8]) is.numeric(data[,8]) # #Creating Data in R # zip_codes<- as.factor(c("94550","00123","43614")) is.factor(zip_codes) aa<-c(1,10,12) aa #Manipulating data - simple operations: aa+10 length(aa) bb<-c(2,6,79) my_data_set<-data.frame(attributeA=aa,attributeB=bb) my_data_set #Indexing data - note [row,colm] my_data_set[,1] my_data_set[1,] my_data_set[3,2] my_data_set[1:2,] my_data_set[c(1,3),] #Matrix Arithmetic: (in Matlab this is aa ./ bb) aa/bb # #Summary Statistics # mean(my_data_set[,1]) median(my_data_set[,1]) sqrt(var(my_data_set[,1])) # #Write a file out # write.csv(my_data_set,"my_data_set_file.csv") # #Get a help file # ?write.csv