Below is the uncorrected machine-read text of this chapter, intended to provide our own search engines and external engines with highly rich, chapter-representative searchable text of each book. Because it is UNCORRECTED material, please consider the following text as a useful but insufficient proxy for the authoritative book pages.
Appendix E R Code used in Report Figures 3-5 and 5-3 R CODE FOR FIGURE 3-5 ## Code to reproduce Figure 3-5. ## Author: Christopher Paciorek (UC Berkeley) ## R version: 4.3.2 library(evd) # Version 2.3-6.1 ## Location and scale parameter estimates from GEV fit to GHCN daily ## precipitation data for for Berkeley, California, but qualitative ## results are similar for parameters from GEV fits for other US locations. location <- 4.5 # in centimeters scale <- 1.5 # in centimeters ## Generate grid of shape parameter values for plotting. shape_grid <- c(seq(-0.25, 0, length = 30), seq(0, 0.2, length = 20)) bounded_shape_grid <- shape_grid[shape_grid < 0] ## Calculate return values as quantiles of GEV distribution. rv_0.0001 <- sapply(shape_grid, function(shape) qgev(1-1/1e4, location, scale, shape)) rv_0.00001 <- sapply(shape_grid, function(shape) qgev(1-1/1e5, location, scale, shape)) rv_0.000001 <- sapply(shape_grid, function(shape) qgev(1-1/1e6, location, scale, shape)) ## Calculate upper bounds for negative shape parameter values. ub <- sapply(bounded_shape_grid, function(shape) location-scale/shape) png('plot-shape-rv.png', height = 500, width = 600) plot(shape_grid, rv_0.000001, type = 'l', col = 'red', xlab = 'shape parameter', ylab = 'AEP level or upper bound (cm)', ylim = c(0,120)) lines(shape_grid, rv_0.00001, col = 'blue') lines(shape_grid, rv_0.0001, col = 'green') lines(bounded_shape_grid, ub, col = 'black') legend('topleft', legend = c('p = 0.0001', 'p = 0.00001', 'p = 0.000001', 'upper bound'), col = c('green', 'blue','red','black'), lty = rep(1,4), bty='n') text(x = .1, y = 0, "no upper bound") text(x = -.1, y = 0, "upper bound present") dev.off() R CODE FOR SAMPLE SIZE CALCULATION (FIGURE 5-3) ## R version: 4.3.2 library(evd) # Version 2.3-6.1 Prepublication copy 193
194 Modernizing Probable Maximum Precipitation Estimation library(pracma) # Version 2.4.4 calc_sample_size <- function(location = 0, scale = 1, shape = .1, return_period = 100, se_proportion = 0.125, upper_bound = FALSE, verbose = FALSE) { ## Calculates sample size needed to limit the standard error of the estimated ## return value (or upper bound) to `se_proportion` of the estimate, using ## the expected information matrix for the GEV distribution and the delta method ## to approximate the variance of the estimate as a function of the estimated ## parameters. ## If the `se_proportion` is 0.125, that corresponds to a confidence interval ## whose length is half (50% = 0.125*4) the magnitude of the return value (or upper bound). ## Arguments: ## location: location parameter of GEV model. ## scale: scale parameter of GEV model. ## shape: shape parameter of GEV model. ## return_period: desired return period (years); not used if upper_bound is TRUE. ## se_proportion: desired magnitude of standard error of return value (or upper bound) ## as a proportion of the return value (or upper bound) estimate. ## upper_bound: determine sample size for the upper bound rather than return value. ## Output: estimate of the sample size (in years for GEV block-maxima estimation and ## number of exceedances for threshold exceedance-based estimation). ## Authors: Daniel Cooley (Colorado State University) and Christopher Paciorek (UC Berkeley). if(shape >= 1/2){ stop("Method not supported if variance is infinite (shape >= 1/2)") } if(upper_bound & shape >= 0){ stop("Cannot have upper bound if shape >= 0") } ## Calculate the expected information matrix. ## Set up grid of observation values based on quantiles. y_low <- qgev(1e-5, loc = location, scale = scale, shape = shape) y_high <- qgev(1 - 1e-5, loc = location, scale = scale, shape = shape) y_len <- 2000 y <- seq(y_low, y_high, length.out = y_len) ## Evaluate density. dens_values <- dgev(y, loc = location, scale = scale, shape = shape) dens_array <- array(dens_values, dim = c(y_len,3,3)) Prepublication copy
Appendix E 195 ## Use numerical differentiation to obtain Hessian. param_vec <- c(location, scale, shape) hess_array <- array(dim = c(y_len,3,3)) loglik <- function(par, y){ # Wrapper providing param vec as first argument. return(dgev(y, loc = par[1], scale = par[2], shape = par[3], log = T)) } for(i in seq(1, y_len)){ hess_array[i,,] <- hessian(loglik, param_vec, y = y[i]) } ## Alternative to avoid the loop: ## tmp <- sapply(y, function(val) hessian(loglik, param_vec, y =val)) ## hess_array <- array(t(tmp), c(2000,3,3)) ## Use trapezoidal rule to estimate integral over the density. integrand_array <- -hess_array * dens_array diff_array <- array(diff(y), dim = c((y_len-1),3,3)) trapezoids <- (integrand_array[-1,,] + integrand_array[-y_len,,])/2 * diff_array exp_info_mtx <- apply(trapezoids, c(2,3), sum) cov_mtx <- solve(exp_info_mtx) # Asymptotic var-cov matrix of parameter estimates. ## Use the information matrix to estimate the sample size. if(upper_bound) { ub_fun <- function(par) { return(par[1]-par[2]/par[3]) } ub <- ub_fun(c(location,scale,shape)) se <- se_proportion * ub if(verbose) print(paste("upper bound is", ub_fun(c(location,scale,shape)))) grad_vec <- grad(ub_fun, param_vec) } else { prob <- 1-1/return_period qTile <- qgev(prob, loc = location, scale = scale, shape = shape) se <- se_proportion * qTile if(verbose) print(paste(prob, "quantile is", round(qTile, 2))) qTileFn <- function(par, p){ return(qgev(p, loc = par[1], scale = par[2], shape = par[3])) } grad_vec <- grad(qTileFn, param_vec, p = prob) } ## Calculate variance of return value based on delta method. delta_var <- t(grad_vec) %*% cov_mtx %*% grad_vec ## Invert to determine sample size. n <- ceiling(delta_var/se^2) return(n) Prepublication copy
196 Modernizing Probable Maximum Precipitation Estimation } ## Example usage. ## Sample size for depth corresponding to annual exceedance probability for p=0.0001. location <- 4.5 scale <- 1.5 shape <- 0 calc_sample_size(location, scale, shape, return_period = 10000, se_proportion = 0.125) ## Sample size for estimating upper bound. location <- 4.5 scale <- 1.5 shape <- -0.1 calc_sample_size(location, scale, shape, upper_bound = TRUE, se_proportion = 0.125) Prepublication copy