Irucka Embry, EIT [Cherokee Nation Technology Solutions (CNTS) United States Geological Survey (USGS) Contractor] gave this tutorial to his USGS colleagues on Friday, 28 August 2015, as part of a group R tutorial. This tutorial has been modified from its original presentation.



Useful Resources

R Resource Web page: http://www.ecoccs.com/RandUSGS.html (R Resources provided by Irucka Embry)

That Web page contains information related to this series of R Tutorials as well as many other useful resources for the USGS



Tutorial

# Install and load the necessary R packages prior to beginning this tutorial

# If you are unsure if you have all of the packages already installed
install.packages(install.load) # install the install.load package maintained by Irucka Embry
install.load::install_load("openxlsx", "readxl", "lubridate", "ggplot2", "htmlTable") # install and/or load the following packages and dependencies



Download this file (http://www.ecoccs.com/R_Tutorial/28_August_2015/WForkStonesRiver03428200.xlsx) to your working directory on your computer



# If you are sure that you have all of the packages installed
# install.packages(install.load) # install the install.load package maintained by Irucka Embry
install.load::load_package("openxlsx", "readxl", "lubridate", "ggplot2", "htmlTable") # load the packages and dependencies


file <- "WForkStonesRiver03428200.xlsx" # tk_choose.files() # obtain the file using the file dialog
# the function tk_choose.files() was used in the original tutorial, but won't be used here
file # this will print the filename
## [1] "WForkStonesRiver03428200.xlsx"
# Importing Data
stone <- read.xlsx(file, startRow = 2) # read in the spreadsheet chosen with the file dialog starting at Row 2
stone <- stone[-1, ] # remove Row 1
stone[, 3] <- ymd(stone[, 3]) # change Column 3 (datetime) from character class to POSIXct (date format)
stone[, 4] <- as.numeric(stone[, 4]) # change Column 4 [Discharge, cubic feet per second (Mean)] from character class to numeric class
htmlTable(summary(stone[, 4], na.rm = TRUE), header = c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.", "NA's"), rnames = FALSE, align = "l", align.header = "l", css.cell = "padding-left: 0.5em; padding-right: 3em;") # print out a statistical summary for Column 4
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
4.7 37 109 298.1 295 21200 1432
# select only the Rows where the data are Approved (A)
Agrepl <- grepl("A", stone[, 5]) # find all "A"s in Column 5 of stone
Agreplwhich <- which(grepl("A", stone[, 5])) # what are the Row numbers where there is an "A" in Column 5 of stone
stoneA <- stone[which(grepl("A", stone[, 5])), 1:5] # select only the Rows in Columns 1:5 where the data are Approved
htmlTable(head(stoneA, 30), rnames = FALSE, align = "l", align.header = "l", css.cell = "padding-left: 0.5em; padding-right: 8em;") # only print the first 30 rows
agency_cd site_no datetime X01_00060_00003 X01_00060_00003_cd
USGS 03428200 1972-07-20 25 A
USGS 03428200 1972-07-21 20 A
USGS 03428200 1972-07-22 16 A
USGS 03428200 1972-07-23 15 A
USGS 03428200 1972-07-24 16 A
USGS 03428200 1972-07-25 14 A
USGS 03428200 1972-07-26 13 A
USGS 03428200 1972-07-27 13 A
USGS 03428200 1972-07-28 1360 A
USGS 03428200 1972-07-29 854 A
USGS 03428200 1972-07-30 754 A
USGS 03428200 1972-07-31 417 A
USGS 03428200 1972-08-01 210 A
USGS 03428200 1972-08-02 147 A
USGS 03428200 1972-08-03 114 A
USGS 03428200 1972-08-04 88 A
USGS 03428200 1972-08-05 69 A
USGS 03428200 1972-08-06 57 A
USGS 03428200 1972-08-07 60 A
USGS 03428200 1972-08-08 100 A
USGS 03428200 1972-08-09 367 A
USGS 03428200 1972-08-10 232 A
USGS 03428200 1972-08-11 131 A
USGS 03428200 1972-08-12 102 A
USGS 03428200 1972-08-13 137 A
USGS 03428200 1972-08-14 109 A
USGS 03428200 1972-08-15 80 A
USGS 03428200 1972-08-16 64 A
USGS 03428200 1972-08-17 52 A
USGS 03428200 1972-08-18 45 A
# select only the Rows where the data are Provisional (P)
Pgrepl <- grepl("P", stone[, 5]) # find all "P"s in Column 5 of stone
Pgreplwhich <- which(grepl("P", stone[, 5])) # what are the Row numbers where there is an "P" in Column 5 of stone
stoneP <- stone[which(grepl("P", stone[, 5])), 1:5] # select only the Rows in Columns 1:5 where the data are Provisional
htmlTable(head(stoneP, 30), rnames = FALSE, align = "l", align.header = "l", css.cell = "padding-left: 0.5em; padding-right: 8em;") # only print the first 30 rows
agency_cd site_no datetime X01_00060_00003 X01_00060_00003_cd
USGS 03428200 2014-12-18 177 P
USGS 03428200 2014-12-19 170 P
USGS 03428200 2014-12-20 162 P
USGS 03428200 2014-12-21 151 P
USGS 03428200 2014-12-22 140 P
USGS 03428200 2014-12-23 214 P
USGS 03428200 2014-12-24 2690 P
USGS 03428200 2014-12-25 1020 P
USGS 03428200 2014-12-26 591 P
USGS 03428200 2014-12-27 482 P
USGS 03428200 2014-12-28 2090 P
USGS 03428200 2014-12-29 1550 P
USGS 03428200 2014-12-30 878 P
USGS 03428200 2014-12-31 603 P
USGS 03428200 2015-01-01 484 P
USGS 03428200 2015-01-02 413 P
USGS 03428200 2015-01-03 426 P
USGS 03428200 2015-01-04 2140 P
USGS 03428200 2015-01-05 841 P
USGS 03428200 2015-01-06 578 P
USGS 03428200 2015-01-07 459 P
USGS 03428200 2015-01-08 367 P
USGS 03428200 2015-01-09 323 P
USGS 03428200 2015-01-10 276 P
USGS 03428200 2015-01-11 243 P
USGS 03428200 2015-01-12 1010 P
USGS 03428200 2015-01-13 1260 P
USGS 03428200 2015-01-14 625 P
USGS 03428200 2015-01-15 478 P
USGS 03428200 2015-01-16 398 P
# the name of column 3 is datetime
colnames(stone[3])
## [1] "datetime"
# or
names(stone[3])
## [1] "datetime"
# the name of column 4 is X01_00060_00003
colnames(stone[4])
## [1] "X01_00060_00003"
# or
names(stone[4])
## [1] "X01_00060_00003"
# plot the time series using base R plot

plot(stone[, 3], stone[, 4], type = "l", main = "Time Series of West Stone Forks River", xlab = "Year", ylab = "Mean discharge (cfs)") # plot Columns 3 and 4 as a line plot

# Note the last 2 plots are the same, I have just presented 2 different ways of creating the same plot
# plot the time series using ggplot2

ggplot(stone, aes(x = datetime, y = X01_00060_00003)) + geom_line() + labs(x = "Year", y = "Mean discharge (cfs)", title = "Time Series of West Stone Forks River")