R Quick Reference - Justin L Ross

General Notes

Packages:
install.packages(‘package_name’) # install a custom packagelibrary(package_name) # load a custom packagervest # package for scraping websites in R
dplyr

vector_name <- c(1,2,3) # Create a vector called vector_name, assign values 1,2,3 to it.names(vector_name) <- c(‘a’,’b’,’c’) # Give the values in vector_name names, a for the first value, b for the second value and c for the third value.
mat <- matrix(1:25,nrow = 5) # Create a matrix from 1 to 25, with 5 rows.
function_name <- function(a_variable,b_variable,c_constant = 1){b_variable <- 2 #assign variable a value.#function code goes here.return a_variable #function output}

Exercises

### Vector Source Code ###
> vector <- c(1,2,3) #Assigning vector values> vector[1] 1 2 3> names(vector) <- c(‘a’,’b’,’c’) #Assigning names to vector values> vectora b c 1 2 3 > vector < 2 #Checking for values less than 2 a b c TRUE FALSE FALSE > vector[vector < 2] #Checking for values less than 2 and displaying vector names and values associated, rather than true false valuesa 1

### Matrix Source Code ###
> A <- c(1,2,3)> B <- c(4,5,6)> cbind(A,B) #This binds A and B to columns A B[1,] 1 4[2,] 2 5[3,] 3 6> rbind(A,B) #This binds A and B to rows [,1] [,2] [,3]A 1 2 3B 4 5 6> 1:9 # creates sequence of integers 1 to 9[1] 1 2 3 4 5 6 7 8 9> matrix(1:9,byrow =TRUE,nrow=3) #This creates a matrix, 1:9 inputs values 1 to 9, byrow means that the input values are assigned horizontaly, nrow is the number of rows the matrix has. [,1] [,2] [,3][1,] 1 2 3[2,] 4 5 6[3,] 7 8 9> mat <- matrix(1:9,byrow=TRUE,nrow=3)> mat [,1] [,2] [,3][1,] 1 2 3[2,] 4 5 6[3,] 7 8 9> is.matrix(mat) #is.matrix checks to see if a given variable, ‘mat’, is a matrix[1] TRUE> mat2 <- matrix(1:25,byrow = T,nrow = 5)> mat2 [,1] [,2] [,3] [,4] [,5][1,] 1 2 3 4 5[2,] 6 7 8 9 10[3,] 11 12 13 14 15[4,] 16 17 18 19 20[5,] 21 22 23 24 25
> mat2[2:3,2:3] #Collects values [7,8] and [12,13] using index notation [,1] [,2][1,] 7 8[2,] 12 13> mat2[4:5,4:5] #Collects values [19,20] and [24,25] using index notation. This is a square [,1] [,2][1,] 19 20[2,] 24 25> sum(mat2) #Add up all values in mat2 matrix.[1] 325> colsums(mat2) #Sums up columns.Error in colsums(mat2) : could not find function “colsums”> colSums(mat2) #Sums up columns.[1] 55 60 65 70 75> rowSums(mat2)[1] 15 40 65 90 115

> help(runif) #This checks the help for the runif function> runif(20,min=0,max=100) #pass in 20 random values, between a min of 0 and a max of 100 [1] 31.146192 41.287946 53.108591 82.716658 15.522419 [6] 70.172945 44.466671 62.904256 48.862113 34.627040[11] 30.257460 46.095758 10.462524 1.403101 21.049189[16] 87.411352 74.211253 5.480736 40.986765 82.275979> matrix(runif(20),nrow = 4) #This creates a matrix of 20 random values, the matrix has 4 rows [,1] [,2] [,3] [,4][1,] 0.9924826 0.9146828 0.16621811 0.3512262[2,] 0.9180270 0.7426404 0.28068563 0.2033677[3,] 0.2680327 0.4161611 0.91767330 0.7807159[4,] 0.3011400 0.1761156 0.04639861 0.3383925 [,5][1,] 0.76602837[2,] 0.00637992[3,] 0.80063581[4,] 0.53229669

### Data Frames ###
> Name <- c(‘Sam’,’Frank’,’Amy’)> Age <- c(22,25,26)> Weight <- c(150,165,120)> Sex <- c(‘M’,’M’,’F’)> df <- data.frame(row.names = Name, Age,Weight,Sex) #Create a data frame called df, the row names are the Name vector, and columns are age,weight and sex.> print(df) Age Weight SexSam 22 150 MFrank 25 165 MAmy 26 120 F
> is.data.frame(mtcars) #Check if mtcars is a dataframe.[1] TRUE

> mat <- matrix(1:25,nrow = 5)> as.data.frame(mat) #display the matrix mat as a data frame. V1 V2 V3 V4 V51 1 6 11 16 212 2 7 12 17 223 3 8 13 18 234 4 9 14 19 245 5 10 15 20 25> mat [,1] [,2] [,3] [,4] [,5][1,] 1 6 11 16 21[2,] 2 7 12 17 22[3,] 3 8 13 18 23[4,] 4 9 14 19 24[5,] 5 10 15 20 25

> df <- mtcars> head(df) #Counting from the top, choose the first 6 values in the table mpg cyl disp hp drat wt qsec vs am gear carbMazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1> head(df,2) #Choose the first 2 values in the table mpg cyl disp hp drat wt qsec vs am gear carbMazda RX4 21 6 160 110 3.9 2.620 16.46 0 1 4 4Mazda RX4 Wag 21 6 160 110 3.9 2.875 17.02 0 1 4 4

> df$mpg #Grab all the mpg values from the df dataframe. [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4[16] 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 15.8 19.7[31] 15.0 21.4> mean(df$mpg) #Average all the mpg values from the df data frame.[1] 20.09062

> df[df$cyl == 6,] #Select entries from the df dataframe where values in the cyl column are equal to 6. mpg cyl disp hp drat wt qsec vs am gear carbMazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
> subset(df,cyl==6) #Alternative method to select entries from the df dataframe where values in the cyl column are equal to 6. mpg cyl disp hp drat wt qsec vs am gear carbMazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6

> df[,’am’] [1] 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1> df[,c(‘am’,’gear’,’carb’)] am gear carbMazda RX4 1 4 4Mazda RX4 Wag 1 4 4Datsun 710 1 4 1Hornet 4 Drive 0 3 1Hornet Sportabout 0 3 2Valiant 0 3 1Duster 360 0 3 4Merc 240D 0 4 2Merc 230 0 4 2Merc 280 0 4 4Merc 280C 0 4 4Merc 450SE 0 3 3Merc 450SL 0 3 3Merc 450SLC 0 3 3Cadillac Fleetwood 0 3 4Lincoln Continental 0 3 4Chrysler Imperial 0 3 4Fiat 128 1 4 1Honda Civic 1 4 2Toyota Corolla 1 4 1Toyota Corona 0 3 1Dodge Challenger 0 3 2AMC Javelin 0 3 2Camaro Z28 0 3 4Pontiac Firebird 0 3 2Fiat X1-9 1 4 1Porsche 914-2 1 5 2Lotus Europa 1 5 2Ford Pantera L 1 5 4Ferrari Dino 1 5 6Maserati Bora 1 5 8Volvo 142E 1 4 2
> df$performance <- df$hp/df$wt #Add a column to a table called performance, and make it up of hp divided by wt> head(df) mpg cyl disp hp drat wt qsec vs am gear carb performanceMazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 41.98473Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 38.26087Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 40.08621Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 34.21462Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 50.87209Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 30.34682
> df$performance <- round(df$performance, digits = 2) > head(df) mpg cyl disp hp drat wt qsec vs am gear carbMazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 performanceMazda RX4 41.98Mazda RX4 Wag 38.26Datsun 710 40.09Hornet 4 Drive 34.21Hornet Sportabout 50.87Valiant 30.35
> subset(df,hp > 100 & wt > 2.5) #Select a subset of the df dataframe where hp is greater than 100 AND wt is greater than 2.5 mpg cyl disp hp drat wt qsec vs am gearMazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 carb performanceMazda RX4 4 41.98Mazda RX4 Wag 4 38.26Hornet 4 Drive 1 34.21Hornet Sportabout 2 50.87Valiant 1 30.35Duster 360 4 68.63Merc 280 4 35.76Merc 280C 4 35.76Merc 450SE 3 44.23Merc 450SL 3 48.26Merc 450SLC 3 47.62Cadillac Fleetwood 4 39.05Lincoln Continental 4 39.64Chrysler Imperial 4 43.03Dodge Challenger 2 42.61AMC Javelin 2 43.67Camaro Z28 4 63.80Pontiac Firebird 2 45.51Ford Pantera L 4 83.28Ferrari Dino 6 63.18Maserati Bora 8 93.84Volvo 142E 2 39.21> subset(df,hp > 100 & wt > 2.5)$mpg [1] 21.0 21.0 21.4 18.7 18.1 14.3 19.2 17.8 16.4 17.3 15.2 10.4 10.4[14] 14.7 15.5 15.2 13.3 19.2 15.8 19.7 15.0 21.4> mean(subset(df,hp > 100 & wt > 2.5)$mpg)[1] 16.86364

> df[df$hp > 100 & df$wt > 2.5,]$mpg [1] 21.0 21.0 21.4 18.7 18.1 14.3 19.2 17.8 16.4 17.3 15.2 10.4 10.4[14] 14.7 15.5 15.2 13.3 19.2 15.8 19.7 15.0 21.4> mean(df[df$hp > 100 & df$wt > 2.5,]$mpg)[1] 16.86364

> df[‘Hornet Sportabout’,] #Select just the Hornet Sportabout row from the data frame. mpg cyl disp hp drat wt qsec vs am gear carb performanceHornet Sportabout 18.7 8 360 175 3.15 3.44 17.02 0 0 3 2 50.87> df[‘Hornet Sportabout’,]$mpg #Select just the mpg from the Hornet Sportabout row from the data frame.[1] 18.7

### Conditionals Excersizes ###
> x <- 2> if(x%%2 == 0){+ print(“EVEN”)+ }else{+ print(“NOT EVEN”)+ }[1] “EVEN”> x <- matrix()> if (is.matrix(x)){+ print(‘IS A MATRIX’)+ }else{+ print(‘Not a matrix’)+ }[1] “IS A MATRIX”> x <- c(3,7,1)> # first and second place> if (x[1] > x[2]){+ fir <- x[1]+ sec <- x[2]+ }else{+ fir <- x[2]+ sec <- x[1]+ + }> # 3rd was largest > if (x[3] > fir & x[3] > sec){+ thi <- sec+ sec <- fir+ fir <- x[3]+ # 3rd was smallest + }else if (x[3] < fir & x[3] <sec){+ thi <- x[3]+ }else{+ thi <- sec+ sec <- x[3]+ } > > print(paste(fir,sec,thi))[1] “7 3 1”

> x <- c(1,100,2)> > if (x[1]>x[2] & x[1]> x[3]){+ print(x[1])+ }else if (x[2] > x[3]){+ print(x[2])+ }else{+ print(x[3])+ }[1] 100

### Function Excersize ###
> prod <- function(num1,num2){ #prod is function name, num1 and num2 are local var+ return(num1*num2) #return the values associated with num1*num2+ }> print(prod(3,4)) #print the returned value of the prod function with num1 = 3 and num2 = 4.[1] 12

> num_check <- function(num,v){+ for (ele in v){ #for every element in the vector, v+ if (ele == num){ #if one of the elements is equal to the num variable+ return(TRUE) #return the boolean TRUE value+ }+ }+ return(FALSE) #Else return the boolean FALSE value+ }> print(num_check(2,c(1,4,3))) #print the return value of the function when num = 2 and ele = 1,4,3. As there is no 2 in the 1,4,3 vector this will be false.

### Advanced R Programming ###
:: built in r functions
seq() # Create a sequence
seq(0,10,by=2) # 0 is the start, 10 is the end of the sequence and 2 is the interval between them. This would output 0 2 4 6 8 10.

sort() # Sort a vector
a_vector <- c(5,20,1) # A vector with a random set of numbers.sort(a_vector) # This will sort the numbers of v, defaults to ascending so will be 1,5,20.sort(a_vector,decreasing = TRUE) # This will sort the numbers of v, in this case in descending order.
# Sort also works on strings, and sorts based on alphabetical order. Capitalization is meaningless, basically. lowercase a = uppercase A.
rev() # Reverse elements in an object
rev(a_vector) # This will reverse the contents of the vector, so 5,20,1 will become 1,20,5.
str() # Show an objects structure. Whether it is integer, string, etc and its general contents.
append() # Merge objects together, such as vectors and lists.
a_second_vector <- c(50,51,52)
append(a_vector,a_second_vector) # This will add a_second_vector to the end of a_vector to make 5,20,1,50,51,52.
is.matrix(a_vector) # This will check to see if a_vector is a matrix or not.as.matrix(a_vector) # This will convert a_vector into a matrix.

:: Apply
print(sample(x = 1:10,1)) # This prints a random number between 1 and 10, with jumps of 1 so only integers.

addrandom <- function (x){ ran <- sample(1:100,1) return(x+ran)}
print(addrandom(10)) # This function adds a random number to 10.

# lapply stands for ‘list apply’. It applies the addrand function to every element in the a_vector vector. So basically it will run the addrand function for each item in the a_vector list.
a_vector <- (1:5)result <- lapply(a_vector,addrand) print(result)

#sapply stands for simplified lapply, so its basically a simplified version of lapply.
???? I dont really understand sapply so will need to focus more here. The main thing is that it seems to change the way the ouput looks????

:: Math Functions

abs(2) # Gives absolute value, ie distance from 0. In this case it is 2, it would also be 2 if it were negative.
sum() # Performs basic addition
mean() # Gives the average
round(2.3453,digits = 2) # This will round 2.3453 down to 2.34. 5 and above it will round up.
r reference card will give you a pdf with a list of r material, link is: https://cran.r-project.org/doc/contrib/Short-refcard.pdf.

:: Expressions
a_string <- “This string contains the word Zulu”grepl(‘Zulu’,a_string) # grepl will search the string a_string and check it for the string ‘Zulu’. If it finds it it will return the word true.
grep(‘Zulu’,a_string) # this will return the index of where the string ‘Zulu’ occurs.

:: Dates and Timestamps
1998-01-26 # This is the standard format of date class in R. 1998 is the year, 01 is the month and 26 is the day.
Sys.Date() # Outputs systems date, in other words the date of the PC being used.
new.date <- as.Date(“Dec-02-98”,format = “%b-%d-%y”) # Convert a string or other data type into a date. This assigns 98 to the %y or year value, 02 to the %d or date value and December to the %b or month value. Basically the %b, %d and %y convert it.
%d # Day of the month.%m # Decimal number month.%b # Abreviated month name.%B # Full name month.%y # 2 digit year.%Y # 4 digit year.

??? There is some further boring stuff with psixct and somethng else, but I just couldnt be bothered ???

### Data Manipulation ###
Dplyr package: For manipulating data, simple syntax.Tidyr package: Used for tidying data.
:: Dplyr ::
Those familiar with SQL will find dplyr has a lot in common with it in terms of how it functions.
install.packages(‘dplyr’)library(dplyr)
filter(data_frame_name,column1==2,column2==’string’,etc) # filter is almost like a ‘display if’ statement in which observations/rows from data_frame_name are displayed if a specific condition is met, ie column1==2 would display items only if that condition were met.
slice(data_frame_name, 1:10) # Select the rows 1 to 10.
arrange(data_frame_name,column1,column2) # This controls the way in which data is ordered. In this case it would be ordered first by column1 and if they were the same then it would be ordered by column2.
select(data_frame_name,column1) # This will just display column1 from data_frame_name and no other columns.
rename(data_frame_name,new_column_name = column1) # When displayed this renames column1 to new_column_name.
distinct(data_frame_name,column1) # Select observations from data_frame_name where column1 has distinct values, which means unique values so no duplicate values.
mutate(data_frame_name,new_column_name = column1-column2) # Basically you can create a new column, in this case new_column_name, which is a function of existing columns, namely column1 minus column2.
transmute(data_frame_name,new_column_name) # Like mutate, but it will only return new created column. So only new_column_name would be displayed.
summarise(data_frame_name,new_column_name = mean(column1)) # This will ouput the average ovarall value of column1 listed under new_column_name. new_column_name needs to be some sort of aggregate function like mean,sum,etc. ??????
sample_n(data_frame_name,10) # This returns 10 random rows.
sample_frac(data_frame_name,.1) # This returns a random 10% of the rows.

Pipe operator: %>%
?? Start here at the pipe operator

Common Issues

fff

Inzight – graphical user interface for R.