Aliases: aggregate.zoo split.zoo
Keywords: ts
### ** Examples ## averaging over values in a month: # x.date is jan 1,3,5,7; feb 9,11,13; mar 15,17,19 x.date <- as.Date(paste(2004, rep(1:4, 4:1), seq(1,20,2), sep = "-")); x.date
[1] "2004-01-01" "2004-01-03" "2004-01-05" "2004-01-07" "2004-02-09" [6] "2004-02-11" "2004-02-13" "2004-03-15" "2004-03-17" "2004-04-19"
x <- zoo(rnorm(12), x.date); x
2004-01-01 2004-01-03 2004-01-05 2004-01-07 2004-02-09 2004-02-11 0.26758992 -0.20333383 -0.07921181 0.78892774 -1.59444641 -1.55243485 2004-02-13 2004-03-15 2004-03-17 2004-04-19 0.41343957 -0.74059832 -0.76113172 -1.43842639
# coarser dates - jan 1 (4 times), feb 1 (3 times), mar 1 (3 times) x.date2 <- as.Date(paste(2004, rep(1:4, 4:1), 1, sep = "-")); x.date2
[1] "2004-01-01" "2004-01-01" "2004-01-01" "2004-01-01" "2004-02-01" [6] "2004-02-01" "2004-02-01" "2004-03-01" "2004-03-01" "2004-04-01"
x2 <- aggregate(x, x.date2, mean); x2
2004-01-01 2004-02-01 2004-03-01 2004-04-01 0.1934930 -0.9111472 -0.7508650 -1.4384264
# same - uses as.yearmon x2a <- aggregate(x, as.Date(as.yearmon(time(x))), mean); x2a
2004-01-01 2004-02-01 2004-03-01 2004-04-01 0.1934930 -0.9111472 -0.7508650 -1.4384264
# same - uses by function x2b <- aggregate(x, function(tt) as.Date(as.yearmon(tt)), mean); x2b
2004-01-01 2004-02-01 2004-03-01 2004-04-01 0.1934930 -0.9111472 -0.7508650 -1.4384264
# same - uses cut x2c <- aggregate(x, as.Date(cut(time(x), "month")), mean); x2c
2004-01-01 2004-02-01 2004-03-01 2004-04-01 0.1934930 -0.9111472 -0.7508650 -1.4384264
# almost same but times of x2d have yearmon class rather than Date class x2d <- aggregate(x, as.yearmon, mean); x2d
Jan 2004 Feb 2004 Mar 2004 Apr 2004 0.1934930 -0.9111472 -0.7508650 -1.4384264
# compare time series plot(x) lines(x2, col = 2)
## aggregate a daily time series to a quarterly series # create zoo series tt <- as.Date("2000-1-1") + 0:300 z.day <- zoo(0:300, tt) # function which returns corresponding first "Date" of quarter first.of.quarter <- function(tt) as.Date(as.yearqtr(tt)) # average z over quarters # 1. via "yearqtr" index (regular) # 2. via "Date" index (not regular) z.qtr1 <- aggregate(z.day, as.yearqtr, mean) z.qtr2 <- aggregate(z.day, first.of.quarter, mean) # The last one used the first day of the quarter but suppose # we want the first day of the quarter that exists in the series # (and the series does not necessarily start on the first day # of the quarter). z.day[!duplicated(as.yearqtr(time(z.day)))]
2000-01-01 2000-04-01 2000-07-01 2000-10-01 0 91 182 274
# This is the same except it uses the last day of the quarter. # It requires R 2.6.0 which introduced the fromLast= argument. ## Not run: ##D z.day[!duplicated(as.yearqtr(time(z.day)), fromLast = TRUE)] ## End(Not run) # The aggregated series above are of class "zoo" (because z.day # was "zoo"). To create a regular series of class "zooreg", # the frequency can be automatically chosen zr.qtr1 <- aggregate(z.day, as.yearqtr, mean, regular = TRUE) # or specified explicitely zr.qtr2 <- aggregate(z.day, as.yearqtr, mean, frequency = 4) ## aggregate on month and extend to monthly time series if(require(chron)) { y <- zoo(matrix(11:15, nrow = 5, ncol = 2), chron(c(15, 20, 80, 100, 110))) colnames(y) <- c("A", "B") # aggregate by month using first of month as times for coarser series # using first day of month as repesentative time y2 <- aggregate(y, as.Date(as.yearmon(time(y))), head, 1) # fill in missing months by merging with an empty series containing # a complete set of 1st of the months yrt2 <- range(time(y2)) y0 <- zoo(,seq(from = yrt2[1], to = yrt2[2], by = "month")) merge(y2, y0) }
A B 1970-01-01 11 11 1970-02-01 NA NA 1970-03-01 13 13 1970-04-01 14 14
# given daily series keep only first point in each month at # day 21 or more z <- zoo(101:200, as.Date("2000-01-01") + seq(0, length = 100, by = 2)) zz <- z[as.numeric(format(time(z), "%d")) >= 21] zz[!duplicated(as.yearmon(time(zz)))]
2000-01-21 2000-02-22 2000-03-21 2000-04-22 2000-05-22 2000-06-21 111 127 141 157 172 187
# same except times are of "yearmon" class aggregate(zz, as.yearmon, head, 1)
Jan 2000 Feb 2000 Mar 2000 Apr 2000 May 2000 Jun 2000 111 127 141 157 172 187
# aggregate POSIXct seconds data every 10 minutes Sys.setenv(TZ = "GMT") tt <- seq(10, 2000, 10) x <- zoo(tt, structure(tt, class = c("POSIXt", "POSIXct"))) aggregate(x, time(x) - as.numeric(time(x)) %% 600, mean)
1970-01-01 00:00:00 1970-01-01 00:10:00 1970-01-01 00:20:00 1970-01-01 00:30:00 300 895 1495 1900
# aggregate weekly series to a series with frequency of 52 per year suppressWarnings(RNGversion("3.5.0")) set.seed(1) z <- zooreg(1:100 + rnorm(100), start = as.Date("2001-01-01"), deltat = 7) # new.freq() converts dates to a grid of freq points per year # yd is sequence of dates of firsts of years # yy is years of the same sequence # last line interpolates so dates, d, are transformed to year + frac of year # so first week of 2001 is 2001.0, second week is 2001 + 1/52, third week # is 2001 + 2/52, etc. new.freq <- function(d, freq = 52) { y <- as.Date(cut(range(d), "years")) + c(0, 367) yd <- seq(y[1], y[2], "year") yy <- as.numeric(format(yd, "%Y")) floor(freq * approx(yd, yy, xout = d)$y) / freq } # take last point in each period aggregate(z, new.freq, tail, 1)
2001(1) 2001(2) 2001(3) 2001(4) 2001(5) 2001(6) 2001(7) 2001(8) 2.183643 2.164371 5.595281 5.329508 5.179532 7.487429 8.738325 9.575781 2001(9) 2001(10) 2001(11) 2001(12) 2001(13) 2001(14) 2001(15) 2001(16) 9.694612 12.511781 12.389843 12.378759 11.785300 16.124931 15.955066 16.983810 2001(17) 2001(18) 2001(19) 2001(20) 2001(21) 2001(22) 2001(23) 2001(24) 18.943836 19.821221 20.593901 21.918977 22.782136 23.074565 22.010648 25.619826 2001(25) 2001(26) 2001(27) 2001(28) 2001(29) 2001(30) 2001(31) 2001(32) 25.943871 26.844204 26.529248 28.521850 30.417942 32.358680 31.897212 33.387672 2001(33) 2001(34) 2001(35) 2001(36) 2001(37) 2001(38) 2001(39) 2001(40) 33.946195 33.622940 35.585005 36.605710 37.940687 40.100025 40.763176 40.835476 2001(41) 2001(42) 2001(43) 2001(44) 2001(45) 2001(46) 2001(47) 2001(48) 41.746638 43.696963 44.556663 44.311244 45.292505 47.364582 48.768533 48.887654 2001(49) 2001(50) 2001(51) 2001(52) 2002(1) 2002(2) 2002(3) 2002(4) 50.881108 51.398106 51.387974 53.341120 52.870637 56.433024 57.980400 56.632779 2002(5) 2002(6) 2002(7) 2002(8) 2002(9) 2002(10) 2002(11) 2002(12) 56.955865 59.569720 59.864945 63.401618 61.960760 63.689739 64.028002 64.256727 2002(13) 2002(14) 2002(15) 2002(16) 2002(17) 2002(18) 2002(19) 2002(20) 66.188792 65.195041 69.465555 69.153253 72.172612 71.475510 71.290054 73.610726 2002(21) 2002(22) 2002(23) 2002(24) 2002(25) 2002(26) 2002(27) 2002(28) 73.065902 73.746367 76.291446 76.556708 78.001105 79.074341 79.410479 80.431331 2002(29) 2002(30) 2002(31) 2002(32) 2002(33) 2002(34) 2002(35) 2002(36) 81.864821 84.178087 82.476433 85.593946 86.332950 88.063100 87.695816 89.370019 2002(37) 2002(38) 2002(39) 2002(40) 2002(41) 2002(42) 2002(43) 2002(44) 90.267099 90.457480 93.207868 94.160403 94.700214 96.586833 96.558486 95.723408 2002(45) 2002(46) 2002(47) 97.426735 97.775387 99.526599
# or, take mean of all points in each aggregate(z, new.freq, mean)
2001(1) 2001(2) 2001(3) 2001(4) 2001(5) 2001(6) 2001(7) 2001(8) 1.278595 2.164371 5.595281 5.329508 5.179532 7.487429 8.738325 9.575781 2001(9) 2001(10) 2001(11) 2001(12) 2001(13) 2001(14) 2001(15) 2001(16) 9.694612 12.511781 12.389843 12.378759 11.785300 16.124931 15.955066 16.983810 2001(17) 2001(18) 2001(19) 2001(20) 2001(21) 2001(22) 2001(23) 2001(24) 18.943836 19.821221 20.593901 21.918977 22.782136 23.074565 22.010648 25.619826 2001(25) 2001(26) 2001(27) 2001(28) 2001(29) 2001(30) 2001(31) 2001(32) 25.943871 26.844204 26.529248 28.521850 30.417942 32.358680 31.897212 33.387672 2001(33) 2001(34) 2001(35) 2001(36) 2001(37) 2001(38) 2001(39) 2001(40) 33.946195 33.622940 35.585005 36.605710 37.940687 40.100025 40.763176 40.835476 2001(41) 2001(42) 2001(43) 2001(44) 2001(45) 2001(46) 2001(47) 2001(48) 41.746638 43.696963 44.556663 44.311244 45.292505 47.364582 48.768533 48.887654 2001(49) 2001(50) 2001(51) 2001(52) 2002(1) 2002(2) 2002(3) 2002(4) 50.881108 51.398106 51.387974 53.341120 52.870637 56.433024 57.980400 56.632779 2002(5) 2002(6) 2002(7) 2002(8) 2002(9) 2002(10) 2002(11) 2002(12) 56.955865 59.569720 59.864945 63.401618 61.960760 63.689739 64.028002 64.256727 2002(13) 2002(14) 2002(15) 2002(16) 2002(17) 2002(18) 2002(19) 2002(20) 66.188792 65.195041 69.465555 69.153253 72.172612 71.475510 71.290054 73.610726 2002(21) 2002(22) 2002(23) 2002(24) 2002(25) 2002(26) 2002(27) 2002(28) 73.065902 73.746367 76.291446 76.556708 78.001105 79.074341 79.410479 80.431331 2002(29) 2002(30) 2002(31) 2002(32) 2002(33) 2002(34) 2002(35) 2002(36) 81.864821 84.178087 82.476433 85.593946 86.332950 88.063100 87.695816 89.370019 2002(37) 2002(38) 2002(39) 2002(40) 2002(41) 2002(42) 2002(43) 2002(44) 90.267099 90.457480 93.207868 94.160403 94.700214 96.586833 96.558486 95.723408 2002(45) 2002(46) 2002(47) 97.426735 97.775387 99.526599
# example of taking means in the presence of NAs z.na <- zooreg(c(1:364, NA), start = as.Date("2001-01-01")) aggregate(z.na, as.yearqtr, mean, na.rm = TRUE)
2001 Q1 2001 Q2 2001 Q3 2001 Q4 45.5 136.0 227.5 319.0
# Find the sd of all days that lie in any Jan, all days that lie in # any Feb, ..., all days that lie in any Dec (i.e. output is vector with # 12 components) aggregate(z, format(time(z), "%m"), sd)
01 02 03 04 05 06 07 08 27.931987 27.985392 27.679111 27.774779 27.328400 27.840695 27.520893 28.058606 09 10 11 12 27.806512 28.066187 27.406884 1.589453