# Create some simple statistics around bus arrival times. # input data : # Trip Time Date # a 15:55 05/15/07 # m 5:47 05/16/07 # a 15:54 05/16/07 # m 5:47 05/17/07 # a 15:59 05/17/07 # m 5:50 05/21/07 # m 5:50 05/22/07 # a 16:00 05/22/07 # m 5:48 05/23/07 # m 5:50 05/24/07 # a 16:00 05/24/07 # m 5:48 05/25/07 # m 5:48 05/29/07 # a 15:59 05/29/07 # m 5:46 05/30/07 # m 5:45 05/31/07 # # To run, start up R and type # >> source("BusStats.R") # # Have your input data in a file called input.dat # # Copyright Alan Jackson, June 2007 # BSD open sourrce license. Feel free to use and modify, only give # me a little credit. # # Thanks to Gabor Grothendieck for helpful suggestions on improving the code. # # require(chron); require(MASS); keep=par(mfrow=c(2,2)) # read input data DF = read.table("input.dat", header=T, as.is=TRUE); # Create date-time and chron objects # add 30 seconds to times since they are truncated to the minute, this will # remove a bias in the numbers DF$Time = chron(times=paste(DF$Time, "30",sep=":")); DF$Date = chron(DF$Date); DF$DateTime = chron(DF$Date, DF$Time); # Put it all into a data frame # Create time of day array tod.morn = as.numeric(subset(DF, Trip == "m")$Time); tod.aft = as.numeric(subset(DF, Trip == "a")$Time); time.morn = subset(DF, Trip=="m")$Time; time.aft = subset(DF, Trip=="a")$Time; date.morn = subset(DF, Trip=="m")$Date; date.aft = subset(DF, Trip=="a")$Date; ############### morning histogram # create base histogram and harvest data hm = hist(tod.morn, xaxt = "n", main = "Morning Bus Arrival Times", xlab = "Time", col = "blue"); Xlabels = sub(":00$", "", chron(times=trunc((hm$mids+.5/60/24)*60*24)/60/24)); axis(1, hm$mids, Xlabels); # fit Poisson distribution deltamids = hm$mids[2]-hm$mids[1]; lambda = as.numeric(fitdistr(c(0,hm$counts,0), 'poisson')$estimate); yhist<-c(0,hm$counts,0); xfit<-round(seq(0,(max(tod.morn)-min(tod.morn)+2*deltamids)*60*24)); yfit<-dpois(xfit,lambda); yscale = max(yhist)/max(yfit); # plot distribution lines(spline(xfit/(24*60)+min(tod.morn),yfit*yscale), col="red", lwd=2); # fit Normal distribution norm.morn = fitdistr(((hm$mids-min(hm$mids))), 'normal'); mean.morn = as.numeric(norm.morn$estimate[1]); sd.morn = as.numeric(norm.morn$estimate[2]); x = c(hm$mids[1] - deltamids, hm$mids, hm$mids[-1]+deltamids); yfit<-dnorm(x-min(hm$mids), mean.morn, sd.morn); yscale = max(yhist)/max(yfit); # plot distribution lines(spline(x,yfit*yscale), col="green", lwd=2); # calculate probabilities of <5% chance of missing bus morn.pmin = mean(time.morn) - qpois(.95, lambda=lambda)/60/24; morn.min = mean(time.morn) - qnorm(.95)*sd.morn; morn.mean = mean(time.morn) ; # add text and legend to plot text(mean(time.morn)+1.4*sd(time.morn), y=max(hm$counts)*0.8, labels=paste("< 5% miss bus\n","norm ", as.character(morn.min),"\npoisson ",as.character(morn.pmin),"\nMean ",as.character(morn.mean),"\nMin ",as.character(min(time.morn)))) ; #legend(mean(time.morn)-1.4*sd(time.morn), y=max(hm$counts)*0.8, legend=c("Normal","Poisson"), col=c("green","red"), lty=1); ############### afternoon histogram ha = hist(tod.aft, xaxt = "n", main = "Afternoon Bus Arrival Times", xlab = "Time", col = "blue"); Xlabels = sub(":00$", "", chron(times=trunc((ha$mids+.5/60/24)*60*24)/60/24)); axis(1, ha$mids, Xlabels); # fit Poisson distribution deltamids = ha$mids[2]-ha$mids[1]; lambda = as.numeric(fitdistr(c(0,ha$counts,0), 'poisson')$estimate); yhist<-c(0,ha$counts,0); xfit<-round(seq(0,(max(tod.aft)-min(tod.aft)+2*deltamids)*60*24)); yfit<-dpois(xfit,lambda); yscale = max(yhist)/max(yfit); # plot distribution lines(spline(xfit/(24*60)+min(tod.aft),yfit*yscale), col="red", lwd=2); # fit Normal distribution norm.aft = fitdistr(((ha$mids-min(ha$mids))), 'normal'); mean.aft = as.numeric(norm.aft$estimate[1]); sd.aft = as.numeric(norm.aft$estimate[2]); x = c(ha$mids[1] - deltamids, ha$mids, ha$mids[-1]+deltamids); yfit<-dnorm(x-min(ha$mids), mean.aft, sd.aft); yscale = max(yhist)/max(yfit); # plot distribution lines(spline(x,yfit*yscale), col="green", lwd=2); # calculate probabilities of <5% chance of missing bus aft.pmin = mean(time.aft) - qpois(.95, lambda=lambda)/60/24; aft.min = mean(time.aft) - qnorm(.95)*sd.aft; aft.mean = mean(time.aft) ; # add text and legend to plot text(mean(time.aft)+1.4*sd(time.aft), y=max(ha$counts)*0.8, labels=paste("< 5% miss bus\n","norm ", as.character(aft.min),"\npoisson ",as.character(aft.pmin),"\nMean ",as.character(aft.mean),"\nMin ",as.character(min(time.aft)))) ; #legend(mean(time.aft)-1.4*sd(time.aft), y=max(ha$counts)*0.8, legend=c("Normal","Poisson"), col=c("green","red"), lty=1); ############### Morning Trend plot plot(date.morn, time.morn, pch=20, type='b', main="Morning Bus Trip Trends", xlab="Date", ylab="Arrival Time"); loessline = loess.smooth(as.numeric(date.morn),time.morn, span=.5); lines(loessline, col="blue", lty=2, lwd=3); ############### Afternoon Trend plot plot(date.aft, time.aft, pch=20, type='b', main="Afternoon Bus Trip Trends", xlab="Date", ylab="Arrival Time"); loessline = loess.smooth(as.numeric(date.aft),time.aft, span=.5); lines(loessline, col="blue", lty=2, lwd=3); ##################### par(keep)