I like sports! Since mid-2011 I have logged my sports on the super Merv’s running site. Here I make a little breakdown of this data using R, md and knitr.

Get libraries:

library(lubridate)
library(plyr)

Which data to read?

datadir <- "20170113"

Load the file, skipping the first two lines. Keep the date as a string.

swimfile <- read.csv(paste(datadir,"/Swim.csv",sep=""),sep=",",skip=2,stringsAsFactors=FALSE)

Simplify:

swimsimple <- swimfile[c(1, 4)]
colnames(swimsimple) <- c("Date","distInMiles")

Add a distance in km column:

swimsimple$distInKm = round(swimsimple$distInMiles*1.60934,1)

where round(x,1) gives to the nearest 100m. Make a nicer date and year column:

swimsimple$Date = ymd(swimsimple$Date)
swimsimple$Year = year(swimsimple$Date)

Sum km by year:

SwimsByYear <- aggregate( distInKm ~ Year , data = swimsimple , FUN = sum )
SwimsByYear
##   Year distInKm
## 1 2011     42.7
## 2 2012    145.5
## 3 2013     36.8
## 4 2014     60.0
## 5 2015     42.0
## 6 2016      4.8

So how many times did I swim each year?

count(swimsimple, "Year")
##   Year freq
## 1 2011   20
## 2 2012   67
## 3 2013   22
## 4 2014   39
## 5 2015   24
## 6 2016    3

Let’s try runs:

runfile <- read.csv(paste(datadir,"/Run.csv",sep=""),sep=",",skip=2,stringsAsFactors=FALSE)
runsimple <- runfile[c(1, 4)]
colnames(runsimple) <- c("Date","distInMiles")
runsimple$distInKm = round(runsimple$distInMiles*1.60934,1)
runsimple$Date = ymd(runsimple$Date)
runsimple$Year = year(runsimple$Date)
RunsByYear <- aggregate( distInKm ~ Year , data = runsimple , FUN = sum )
RunsByYear
##   Year distInKm
## 1 2011    234.4
## 2 2012    475.6
## 3 2013    854.2
## 4 2014    826.4
## 5 2015    809.9
## 6 2016    693.3
## 7 2017     32.0
count(runsimple, "Year")
##   Year freq
## 1 2011   44
## 2 2012   87
## 3 2013  112
## 4 2014  128
## 5 2015  119
## 6 2016  107
## 7 2017    4

Bikes:

bikefile <- read.csv(paste(datadir,"/Bike.csv",sep=""),sep=",",skip=2,stringsAsFactors=FALSE)
bikesimple <- bikefile[c(1, 4)]
colnames(bikesimple) <- c("Date","distInMiles")
bikesimple$distInKm = round(bikesimple$distInMiles*1.60934,1)
bikesimple$Date = ymd(bikesimple$Date)
bikesimple$Year = year(bikesimple$Date)
BikesByYear <- aggregate( distInKm ~ Year , data = bikesimple , FUN = sum )
BikesByYear
##   Year distInKm
## 1 2012   1209.9
## 2 2013   1296.0
## 3 2014   1132.4
## 4 2015    627.8
## 5 2016   2816.3
## 6 2017     60.0
count(bikesimple, "Year")
##   Year freq
## 1 2012   41
## 2 2013   31
## 3 2014   21
## 4 2015   16
## 5 2016  112
## 6 2017    3

All the distances in a nice summary table:

ByYear <- cbind(RunsByYear,c(SwimsByYear[,2],0.),c(0.,BikesByYear[,2]))
colnames(ByYear) <- c("Year","Run (km)", "Swim (km)", "Bike (km)")
ByYear
##   Year Run (km) Swim (km) Bike (km)
## 1 2011    234.4      42.7       0.0
## 2 2012    475.6     145.5    1209.9
## 3 2013    854.2      36.8    1296.0
## 4 2014    826.4      60.0    1132.4
## 5 2015    809.9      42.0     627.8
## 6 2016    693.3       4.8    2816.3
## 7 2017     32.0       0.0      60.0