I like sports! Since mid-2011 I have logged my sports on the super Merv’s running site. Here I make a little breakdown of this data using R, md and knitr.
Get libraries:
library(lubridate)
library(plyr)
Which data to read?
datadir <- "20170113"
Load the file, skipping the first two lines. Keep the date as a string.
swimfile <- read.csv(paste(datadir,"/Swim.csv",sep=""),sep=",",skip=2,stringsAsFactors=FALSE)
Simplify:
swimsimple <- swimfile[c(1, 4)]
colnames(swimsimple) <- c("Date","distInMiles")
Add a distance in km column:
swimsimple$distInKm = round(swimsimple$distInMiles*1.60934,1)
where round(x,1) gives to the nearest 100m. Make a nicer date and year column:
swimsimple$Date = ymd(swimsimple$Date)
swimsimple$Year = year(swimsimple$Date)
Sum km by year:
SwimsByYear <- aggregate( distInKm ~ Year , data = swimsimple , FUN = sum )
SwimsByYear
## Year distInKm
## 1 2011 42.7
## 2 2012 145.5
## 3 2013 36.8
## 4 2014 60.0
## 5 2015 42.0
## 6 2016 4.8
So how many times did I swim each year?
count(swimsimple, "Year")
## Year freq
## 1 2011 20
## 2 2012 67
## 3 2013 22
## 4 2014 39
## 5 2015 24
## 6 2016 3
Let’s try runs:
runfile <- read.csv(paste(datadir,"/Run.csv",sep=""),sep=",",skip=2,stringsAsFactors=FALSE)
runsimple <- runfile[c(1, 4)]
colnames(runsimple) <- c("Date","distInMiles")
runsimple$distInKm = round(runsimple$distInMiles*1.60934,1)
runsimple$Date = ymd(runsimple$Date)
runsimple$Year = year(runsimple$Date)
RunsByYear <- aggregate( distInKm ~ Year , data = runsimple , FUN = sum )
RunsByYear
## Year distInKm
## 1 2011 234.4
## 2 2012 475.6
## 3 2013 854.2
## 4 2014 826.4
## 5 2015 809.9
## 6 2016 693.3
## 7 2017 32.0
count(runsimple, "Year")
## Year freq
## 1 2011 44
## 2 2012 87
## 3 2013 112
## 4 2014 128
## 5 2015 119
## 6 2016 107
## 7 2017 4
Bikes:
bikefile <- read.csv(paste(datadir,"/Bike.csv",sep=""),sep=",",skip=2,stringsAsFactors=FALSE)
bikesimple <- bikefile[c(1, 4)]
colnames(bikesimple) <- c("Date","distInMiles")
bikesimple$distInKm = round(bikesimple$distInMiles*1.60934,1)
bikesimple$Date = ymd(bikesimple$Date)
bikesimple$Year = year(bikesimple$Date)
BikesByYear <- aggregate( distInKm ~ Year , data = bikesimple , FUN = sum )
BikesByYear
## Year distInKm
## 1 2012 1209.9
## 2 2013 1296.0
## 3 2014 1132.4
## 4 2015 627.8
## 5 2016 2816.3
## 6 2017 60.0
count(bikesimple, "Year")
## Year freq
## 1 2012 41
## 2 2013 31
## 3 2014 21
## 4 2015 16
## 5 2016 112
## 6 2017 3
All the distances in a nice summary table:
ByYear <- cbind(RunsByYear,c(SwimsByYear[,2],0.),c(0.,BikesByYear[,2]))
colnames(ByYear) <- c("Year","Run (km)", "Swim (km)", "Bike (km)")
ByYear
## Year Run (km) Swim (km) Bike (km)
## 1 2011 234.4 42.7 0.0
## 2 2012 475.6 145.5 1209.9
## 3 2013 854.2 36.8 1296.0
## 4 2014 826.4 60.0 1132.4
## 5 2015 809.9 42.0 627.8
## 6 2016 693.3 4.8 2816.3
## 7 2017 32.0 0.0 60.0