Guided exercise using calibration parameter from literature
This script is organised in 2 parts:
Part 1: (Step 1 to 7) provides examples of the application of LAI-NDVI (esponential relationship) and LAI-WDVI (CLAIR) models using parametrization from literature
Part 2: (Step 8): develop a LAI-WDVI model parameter set by inverting the CLAIR model.
Part 3: (EXERCISE): Study the LAI NDVI relationshio and develop a function to estimate LAI from NDVI using the training data. Use the testing data to assess the error.
Please note that the Leaf Area Index data are provided under a Data Disclosure Policy: any use of the data provided with this case study, beyond the scope of this case study, is restricted and it must be communicated to francesco.vuolo@boku.ac.at
First of all, we will load in the required in-situ LAI measurements and the corresponding Sentinel-2 reflectance data.
The reflectance data in the “./CLAIR/LAI_dataset.csv” file are organised in the following order
column number, S2 band name, wavelength, corresponding spectral region
LAI from is-situ measurements
# Load data from csv file
data<-read.csv(file='./CLAIR/LAI_dataset.csv',header = TRUE,sep = ',')
#explore the first lines of the dataset
head(data)
## Crop LAI_insitu Standard_Error_insitu SMP LAI.S2.BOKU B02..490nm.
## 1 Maize 1.310 0.0490 22 1.2742500 630.2500
## 2 Carrot 4.200 0.2720 23 4.1026667 285.3333
## 3 Winter cereal 1.810 0.0875 23 1.3933333 333.0000
## 4 Winter cereal 3.260 0.0748 23 3.0370000 204.3333
## 5 Beet 0.896 0.1740 23 0.8090000 581.2500
## 6 Maize 1.380 0.0342 23 0.7613333 939.0000
## B03.560nm. B04.665nm. B05.705nm. B06.740nm. B07.783nm. B08..842. B08A.865nm.
## 1 947.7500 796.0000 1065.0000 2835.000 3393.000 3406.000 3614.000
## 2 805.0000 296.3333 1340.0000 5100.667 5920.000 5819.000 6039.333
## 3 638.3333 554.0000 1104.0000 2307.333 2657.667 2751.333 2851.667
## 4 415.3333 255.6667 731.6667 2611.333 3449.000 3524.000 3595.000
## 5 1068.2500 940.2500 1988.5000 3299.000 3550.000 3562.750 3789.500
## 6 1253.3333 1248.6667 1810.6667 3256.000 3845.000 3803.667 4117.333
## B11.1610nm. B12.2190nm.
## 1 2232.000 1398.0000
## 2 2375.000 1237.3333
## 3 1651.000 1058.3333
## 4 1304.000 687.3333
## 5 2712.500 1894.0000
## 6 3205.333 2397.0000
# explore the explerimental data
lai <- data[,2]
reflectance <- data[,c(6:12,14,15)]
# plot reflectance in 6 different LAI groups
par(mfrow=c(2,3)) # set the plotting area into a 1*2 array
matplot( t(reflectance[lai < 1 ,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= ' LAI < 1 ', col = 1)
matplot( t(reflectance[lai >= 1 & lai < 1.5,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= ' 1 <= LAI < 1.5', col = 1)
matplot( t(reflectance[lai >= 1.5 & lai < 2,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '1.5 <= LAI < 2', col = 1)
matplot( t(reflectance[lai >= 2 & lai < 2.5,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= ' 2 <= LAI < 2.5', col = 1)
matplot( t(reflectance[lai >= 2.5 & lai < 3,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '2.5 <= LAI < 3', col = 1)
matplot( t(reflectance[lai >= 3 ,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= ' LAI > 3', col = 1)
# We will split the dataset in two parts (50/50) in Part 2.
# For Part 1, we will use all data together
data_training <- data[1:20,] #[1:50,]
data_testing <- data[21:100,] #[51:100,]
#plost an histogram of the LAI insitu data
par(mfrow=c(1,2)) # set the plotting area into a 1*2 array
hist(data_testing$LAI_insitu)
hist(data_training$LAI_insitu)
LAI-NDVI
# Calculate NDVI (usng NIR band B08 and RED B04)
NDVI <- (data_testing$B08..842. - data_testing$B04.665nm.) / (data_testing$B08..842. + data_testing$B04.665nm.)
# Verify the NDVI distribution and LAI (in-situ measurements) vs NDVI relationship
hist.default(NDVI,breaks = 30,xlim=c(0,1))
#Calculate LAI using empirical equaition from literature (model and coefficients are obtained from a previous calibration exercise)
#repalce "xxx" with the model parameter from literature.
LAI_NDVI <- 0.158 * exp (3.51 * NDVI)
# Study the LAI distribution and compore it to measuremenets
hist.default(LAI_NDVI,breaks = 30,xlim=c(0,6))
# Fitting Linear Models (we do this to calculate how strong the correlation between testing data and estimate is)
fit <- lm(LAI_NDVI~data_testing$LAI_insitu)
# Extract Coefficients of linear fitting (intercept and slope of linear fitting function)
coefs <- coef(fit)
# Calculate root mean square error
rmse <- round(sqrt(mean((data_testing$LAI_insitu-LAI_NDVI)^2)), 2)
coefs <- coef(fit)
intercept <- round(coefs[1], 3)
slope <- round(coefs[2],3)
# Coefficient of correlation R-squared
r2 <- round(summary(fit)$r.squared, 3)
# Prepare a text to add to the plot
eqn <- bquote(~~ r^2 == .(r2) * "," ~~ RMSE == .(rmse))
# Plot results
par(pty="s")
plot(data_testing$LAI_insitu,LAI_NDVI,xlim=c(0,8),ylim=c(0,8),xlab="LAI measurements", ylab="LAI NDVI Sentinel-2", pch=3, col=3)
# add linear fitting model
abline(fit, lty = 2)
# add the R-squared and RMSE to the plot
text(3, 8, eqn, pos = 1.2)
pl_LAI_NDVI <- recordPlot()
# http://r-statistics.co/Linear-Regression.html
LAI-WDIV CLAIR model
# Soil line slope and alfa from literature
SLS <- 1.1
alfa <- 0.35
WDVI_inf <- 7000
# Calculate WDVI
# WDVI = ρ_nir - ρ_red * ρ_(s nir)/ρ_(s red)
WDVI <- data_testing$B08..842. - data_testing$B04.665nm.*SLS
# study the WDVI distribution and take the maximum value to set the WDVI infinitive (WDVI_inf) parameter
hist.default(WDVI,breaks = 30,xlim=c(0,8000))
# Calculate LAI using the LAI - WDVI semi-empirical model
# LAI_WDVI = -1 / α * ln ( 1 - WDVI / WDVI∞ )
LAI_WDVI <- -1 / alfa * log(1 - WDVI / WDVI_inf)
# study the LAI distribution and compore to measuremenets
hist.default(LAI_WDVI,breaks = 20,xlim=c(0,8))
# Fitting Linear Models
fit <- lm(LAI_WDVI~data_testing$LAI_insitu)
# Extract Coefficients of linear fitting
coefs <- coef(fit)
# Calculate root mean square error
rmse <- round(sqrt(mean((data_testing$LAI_insitu-LAI_WDVI)^2)), 2)
coefs <- coef(fit)
intercept <- round(coefs[1], 3)
slope <- round(coefs[2],3)
# Coefficient of correlation
r2 <- round(summary(fit)$r.squared, 3)
# Prepare a text to add to the plot
eqn <- bquote(~~ r^2 == .(r2) * "," ~~ RMSE == .(rmse))
# Plot results
par(pty="s")
plot(data_testing$LAI_insitu,LAI_WDVI,xlim=c(0,8),ylim=c(0,8),xlab="LAI measurements", ylab="LAI WDVI Sentinel-2 ", pch=3, col=4)
abline(fit, lty = 2)
text(3, 8, eqn, pos = 1.2)
pl_LAI_WDVI <- recordPlot()
LAI in-situ vs LAI BOKU (s2.boku.eodc.eu)
The LAI BOKU (s2.boku.ac.at) is derived using an Artificial Neural Network (ANN) and atmospherically corrected Sentinel-2 reflectance data (using all spectral bands). The advantage is that the approch is global and does not require site specific calibration. In this example, we take the LAI data from the S2.BOKU portal and we compare it to the in-situ LAI measurements.
The LAI BOKU data are provided directly into the CSV file we loaded in Step 1 (data_testing$LAI.S2.BOKU). Here we only do the comparison and look at results.
Raster data: LAI-NDVI
Until now, we have been working with data from the CSV file. How can we apply the existing LAI-NDVI model to raster data?
# ========================================================
# Open raster (S2A_T33UXP_20170528T095031...)
# ========================================================
# Load raster data provided with the exercise
infls <- list.files(path= './raster_data/Surface_Reflectance_L2A/', pattern='.tif$',full.names=TRUE)
# create a layer stack
rasterData <- raster::stack(infls) # uses raster package
# rename the layer bands in the raster object
names(rasterData)<-c('B02','B03','B04','B05','B06','B07','B08','B11','B12','B8A')
show(rasterData)
## class : RasterStack
## dimensions : 860, 2440, 2098400, 10 (nrow, ncol, ncell, nlayers)
## resolution : 10, 10 (x, y)
## extent : 619200, 643600, 5335800, 5344400 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=33 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0
## names : B02, B03, B04, B05, B06, B07, B08, B11, B12, B8A
## min values : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
## max values : 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535
# ========================================================
# Calculate LAI based on NDVI
# ========================================================
# Calculate NDVI
NDVI <- (rasterData$B08 - rasterData$B04) / (rasterData$B08 + rasterData$B04)
# Calculate LAI
LAI_NDVI <- 0.158 * exp (3.51 * NDVI)
hist.default(getValues(NDVI),breaks = 100,xlim=c(-1,1))
cuts=c(0.5, 1, 2, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 5, 6) #set breaks
pal <- colorRampPalette(c("brown","green"))
plot(LAI_NDVI,breaks=cuts, col = pal(12))
# Save raster
# uncomment to execute this line
raster::writeRaster(NDVI,'./results/S2A_T33UXP_20170528T095031_NDVI.tif',format="GTiff",datatype='FLT4S',overwrite=TRUE)
raster::writeRaster(LAI_NDVI,'./results/S2A_T33UXP_20170528T095031_LAI_NDVI.tif',format="GTiff",datatype='FLT4S',overwrite=TRUE)
# Save data using a scaling factor of 1000 and Integer format to save disk space
LAI_NDVI <- LAI_NDVI * 1000
raster::writeRaster(LAI_NDVI,'./results/S2A_T33UXP_20170528T095031_LAI_NDVI_sf1000.tif',format="GTiff",datatype='INT2S',overwrite=TRUE)
Raster data: LAI-WDIV CLAIR model
# ========================================================
# Calculate the Weighted difference vegetation index (WDVI)
# ========================================================
r_B08 <- rasterData$B08
r_B04 <- rasterData$B04
# create a random sample
s = sample(10000)
# plot the feature space (RED vs NIR) for all pixel types
plot(r_B04[s],r_B08[s])
# select only the bare soil pixels (quick and dirty way)
rs_B08 <- rasterData$B08[NDVI > 0.1 & NDVI < 0.25]
rs_B04 <- rasterData$B04[NDVI > 0.1 & NDVI < 0.25]
# plot only the bare soil pixels
plot(rs_B04[s],rs_B08[s])
# find the soil line slope
fit <- lm(rs_B08[s]~rs_B04[s])
coefs <- coef(fit)
slope <- round(coefs[2],3)
plot(rs_B04[s],rs_B08[s])
abline(fit, lty = 1, col = 2)
# obtain the soil line slope
SLS <- slope # from soil line slope analysis
SLS
## rs_B04[s]
## 1.249
# Calculate the WDVI
WDVI <- rasterData$B08 - rasterData$B04 * SLS
# obtain the WDVIinf value
hist.default(getValues(WDVI),breaks = 100,xlim=c(0,8000))
summary(WDVI)
## Warning in .local(object, ...): summary is an estimate based on a sample of 1e+05 cells (4.77% of all cells)
## layer
## Min. -2194.544
## 1st Qu. 733.095
## Median 2667.972
## 3rd Qu. 3859.963
## Max. 6710.095
## NA's 0.000
WDVI_inf <- mean(getValues(WDVI)) + 3 * sd(getValues(WDVI))
WDVI_inf
## [1] 7204.347
# ========================================================
# Calculate the Leaf Area Index, based on the CLAIR model
# ========================================================
alfa <- 0.34 # from literature
LAI_WDVI <- - 1 / alfa * log(1 - WDVI / WDVI_inf)
## Warning in setValues(r, log(values(x), base = base)): NaNs produced
plot(LAI_WDVI,breaks=cuts, col = pal(12))
# ========================================================
# Removing out-of-range LAI values
# ========================================================
LAI_WDVI_cut <- LAI_WDVI
LAI_WDVI_cut[LAI_WDVI_cut < 0] <- 9999
LAI_WDVI_cut[LAI_WDVI_cut > 7] <- 9999
LAI_WDVI_cut <- LAI_WDVI_cut * 1000
raster::writeRaster(LAI_WDVI_cut,'./results/S2A_T33UXP_20170528T095031_LAI_WDVI_sf1000.tif',format="GTiff",datatype='INT2S',overwrite=TRUE)
Compare LAI-NDVI with the LAI BOKU
LAI_ANN <- raster::raster('./raster_data/S2A_T33UXP_20170528T095031_LAI_10m.tif')
LAI_NDVI <- raster::raster('./results/S2A_T33UXP_20170528T095031_LAI_NDVI_sf1000.tif')
# select a random sample of pixels to plot
s = sample(10000)
par(pty="s")
plot(LAI_ANN[s],LAI_NDVI[s],xlim=c(0,8000),ylim=c(0,8000),xlab="LAI_ANN", ylab="LAI_NDVI", pch=3, col=10)
# Analyse where most differences are geographically located
plot( (LAI_ANN-LAI_NDVI) )
You can explore this further in QGIS
Get the α value of the CLAIR model
We looked at how to derive the soil line slope (SLS) and the WDVIinf. What about the α value?
We will derive the α coefficient using an unconstrained nonlinear optimization method, which minimizes the Root Mean Square Error (RMSE) (our cost function) between measurements and predictions of LAI values.
Measurements of LAI are provided (CSV file).
Predictions are generated using the reflectance data extractred in correspondance of the LAI measurements. An initial α value of 0.35 will be used as first guess based on literature data.
# First, let´s derive the precise Soil Line Slope
# We prepared a selection of about 60 sample points of bare soils (obtained from visual inspection) and the reflectance of these points is used for calculation of the soil line slope.
data_soils<-read.csv(file='./CLAIR/BareSoil_dataset.csv',header = TRUE,sep = ',')
# Fitting Linear Models
fit <- lm(data_soils$B8~data_soils$B4-1)
# Extract Coefficients of linear fitting
coefs <- coef(fit)
# slope of the linear fitting (specific Soil Line Slope)
SLS <- as.numeric(round(coefs[1], 3))
# Plot feature space x-axis: Red and y-axis: NIR
# from the raster experiment (quick and dirty)
plot(rs_B04[s],rs_B08[s],pch=1,col=5)
# from precise bare soil selection
points(data_soils$B4,data_soils$B8,col="brown",xlim=c(0,6000),ylim=c(0,6000),xlab="Refl. [RED]", ylab="Refl. [NIR]", pch=3)
# Add Straight Lines to a Plot
abline(fit, lty = 2)
# new soil line slope
SLS
## [1] 1.322
Calibrate the CLAIR model parameter (alfa) using the training dataset
Get the WDVI first
# Calculate WDVI (note that we are using the training data).
WDVI <- data_training$B08A.865nm. - data_training$B04.665nm. * SLS
WDVI_inf<-7200
Calibrate the CLAIR model parameter (alfa) using the training dataset
Get the optimal alfa value
# LAI insitu data from the traning part of the data
LAI_training <- data_training$LAI_insitu
n_training <- length(LAI_training)
# Invert the LAI CLAIR model to find alfa. This is written in the form of a function that we can optimise in R.
alfa_search <- function(alfa) sqrt(sum((LAI_training -((-log(1-(WDVI/WDVI_inf)))*(1/alfa)))^2)/ n_training )
# Search the interval from lower to upper for a minimum in the function alfa_search
a<-optimise(f = alfa_search,lower = 0.1, upper = 1)
a$minimum
## [1] 0.2957987
# the result will be the new alfa value that minimizes the error between estimated and measured LAI values.
new_alfa <- a$minimum
new_alfa
## [1] 0.2957987
# alternative method package "optimization"
output<-optimization::optim_nm( alfa_search , start = c(1), maximum = FALSE, trace = TRUE)
plot(output)
output$par
##
## 0.2958984
This concludes the guided tour.
LAI-NDVI, LAI-WDVI, LAI-BOKU and LAI-WDVI optimized
# Plot LAI NDVI
replayPlot(pl_LAI_NDVI)
# Plot LAI WDVI
replayPlot(pl_LAI_WDVI)
# Plot LAI WDVI with optimized alfa
replayPlot(pl_LAI_WDVI_cal)
# Plot LAI BOKU data
replayPlot(pl_LAI_BOKU)
Study the LAI-NDVI relationship and develop a function to estimte LAI from NDVI
# LAI_insitu
LAI_training <- data_training$LAI_insitu
LAI_testing <- data_testing$LAI_insitu
# Calculate NDVI (usng NIR band B08 and RED B04)
NDVI_training <- (data_training$B08..842. - data_training$B04.665nm.) / (data_training$B08..842. + data_training$B04.665nm.)
NDVI_testing <- ( data_testing$B08..842. - data_testing$B04.665nm.) / ( data_testing$B08..842. + data_testing$B04.665nm.)
# scatterplot NDVI vs LAI
par(pty="s")
plot(NDVI_training,LAI_training,xlim=c(0,1),ylim=c(0,8),xlab=" NDVI ", ylab="LAI measurements (training set)", pch=3, col=1)
# which function type would best fit the trend in the data?
# Fitting a Linear Model
model_linear <- lm(LAI_training~NDVI_training)
coefs <- coef(model_linear)
intercept <- coefs[1]
slope <- coefs[2]
# fit non-linear model
model_nonlinear <- nls(LAI_training ~ exp(a + b * NDVI_training), start = list(a = 0, b = 0))
coefs <- coef(model_nonlinear)
a <- coefs[1]
b <- coefs[2]
# Plot results
par(pty="s")
plot( slope * NDVI_testing + intercept , LAI_testing,xlim=c(0,8),ylim=c(0,8),xlab="LAI NDVI (linear model)", ylab="LAI measurements", pch=3, col=1)
plot( exp(a + b * NDVI_testing) , LAI_testing,xlim=c(0,8),ylim=c(0,8),xlab="LAI NDVI (nonlinear model)", ylab="LAI measurements", pch=3, col=2)
Calculate and report the r-squared and the RMSE for the results of the: