Estimation of Leaf Area Index

Start

Guided exercise using calibration parameter from literature

This script is organised in 2 parts:

  • Part 1: (Step 1 to 7) provides examples of the application of LAI-NDVI (esponential relationship) and LAI-WDVI (CLAIR) models using parametrization from literature

  • Part 2: (Step 8): develop a LAI-WDVI model parameter set by inverting the CLAIR model.

  • Part 3: (EXERCISE): Study the LAI NDVI relationshio and develop a function to estimate LAI from NDVI using the training data. Use the testing data to assess the error.

Please note that the Leaf Area Index data are provided under a Data Disclosure Policy: any use of the data provided with this case study, beyond the scope of this case study, is restricted and it must be communicated to

First of all, we will load in the required in-situ LAI measurements and the corresponding Sentinel-2 reflectance data.

The reflectance data in the “./CLAIR/LAI_dataset.csv” file are organised in the following order

column number, S2 band name, wavelength, corresponding spectral region

  • 6 B2 490, Blue
  • 7 B3 560, Green
  • 8 B4 665, Red
  • 9 B5 705, red-edge 1
  • 10 B6 740, red-edge 2
  • 11 B7 783, red-edge 3
  • 12 B8 842, NIR 1
  • 13 B8a 865, NIR 2
  • 14 B11 1610, SWIR 1
  • 15 B12 2190, SWIR 2

Step 1

LAI from is-situ measurements

# Load data from csv file
data<-read.csv(file='./CLAIR/LAI_dataset.csv',header = TRUE,sep = ',')

#explore the first lines of the dataset
head(data)
##            Crop LAI_insitu Standard_Error_insitu SMP LAI.S2.BOKU B02..490nm.
## 1         Maize      1.310                0.0490  22   1.2742500    630.2500
## 2        Carrot      4.200                0.2720  23   4.1026667    285.3333
## 3 Winter cereal      1.810                0.0875  23   1.3933333    333.0000
## 4 Winter cereal      3.260                0.0748  23   3.0370000    204.3333
## 5          Beet      0.896                0.1740  23   0.8090000    581.2500
## 6         Maize      1.380                0.0342  23   0.7613333    939.0000
##   B03.560nm. B04.665nm. B05.705nm. B06.740nm. B07.783nm. B08..842. B08A.865nm.
## 1   947.7500   796.0000  1065.0000   2835.000   3393.000  3406.000    3614.000
## 2   805.0000   296.3333  1340.0000   5100.667   5920.000  5819.000    6039.333
## 3   638.3333   554.0000  1104.0000   2307.333   2657.667  2751.333    2851.667
## 4   415.3333   255.6667   731.6667   2611.333   3449.000  3524.000    3595.000
## 5  1068.2500   940.2500  1988.5000   3299.000   3550.000  3562.750    3789.500
## 6  1253.3333  1248.6667  1810.6667   3256.000   3845.000  3803.667    4117.333
##   B11.1610nm. B12.2190nm.
## 1    2232.000   1398.0000
## 2    2375.000   1237.3333
## 3    1651.000   1058.3333
## 4    1304.000    687.3333
## 5    2712.500   1894.0000
## 6    3205.333   2397.0000
# explore the explerimental data
lai         <- data[,2]
reflectance <- data[,c(6:12,14,15)]

# plot reflectance in 6 different LAI groups
par(mfrow=c(2,3))    # set the plotting area into a 1*2 array
matplot( t(reflectance[lai < 1 ,]),             ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '       LAI < 1  ', col = 1)
matplot( t(reflectance[lai >= 1 & lai < 1.5,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '  1 <= LAI < 1.5', col = 1)
matplot( t(reflectance[lai >= 1.5 & lai < 2,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '1.5 <= LAI < 2', col = 1)
matplot( t(reflectance[lai >= 2 & lai < 2.5,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '  2 <= LAI < 2.5', col = 1)
matplot( t(reflectance[lai >= 2.5 & lai < 3,]), ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '2.5 <= LAI < 3', col = 1)
matplot( t(reflectance[lai >= 3 ,]),            ylim=c(0,6000),type='l', xlab="S2 bands", ylab="Reflectance",main= '        LAI > 3', col = 1)

# We will split the dataset in two parts (50/50) in Part 2. 
# For Part 1, we will use all data together

  data_training <- data[1:20,] #[1:50,] 
  data_testing  <- data[21:100,] #[51:100,]

#plost an histogram of the LAI insitu data
  par(mfrow=c(1,2))    # set the plotting area into a 1*2 array

  hist(data_testing$LAI_insitu)
  hist(data_training$LAI_insitu)

Step 2

LAI-NDVI

# Calculate NDVI (usng NIR band B08 and RED B04)
  NDVI <- (data_testing$B08..842. - data_testing$B04.665nm.) / (data_testing$B08..842. + data_testing$B04.665nm.)

# Verify the NDVI distribution and LAI (in-situ measurements) vs NDVI relationship 
  hist.default(NDVI,breaks = 30,xlim=c(0,1))

#Calculate LAI using empirical equaition from literature (model and coefficients are obtained from a previous calibration exercise)

#repalce "xxx" with the model parameter from literature. 
  LAI_NDVI <- 0.158 * exp (3.51 * NDVI)

# Study the LAI distribution and compore it to measuremenets   
  hist.default(LAI_NDVI,breaks = 30,xlim=c(0,6))

# Fitting Linear Models (we do this to calculate how strong the correlation between testing data and estimate is)
  fit <- lm(LAI_NDVI~data_testing$LAI_insitu)

#  Extract Coefficients of linear fitting (intercept and slope of linear fitting function)
  coefs <- coef(fit)

# Calculate root mean square error
  rmse <- round(sqrt(mean((data_testing$LAI_insitu-LAI_NDVI)^2)), 2)
  coefs <- coef(fit)
  intercept <- round(coefs[1], 3)
  slope <- round(coefs[2],3)
  
# Coefficient of correlation R-squared 
  r2 <- round(summary(fit)$r.squared, 3)
  
# Prepare a text to add to the plot
  eqn <- bquote(~~ r^2 == .(r2) * "," ~~ RMSE == .(rmse))
  
# Plot results
  par(pty="s")
  plot(data_testing$LAI_insitu,LAI_NDVI,xlim=c(0,8),ylim=c(0,8),xlab="LAI measurements", ylab="LAI NDVI Sentinel-2", pch=3, col=3)
  # add linear fitting model
  abline(fit, lty = 2)
  # add the R-squared and RMSE to the plot
  text(3, 8, eqn, pos = 1.2)

  pl_LAI_NDVI <- recordPlot()
  
  # http://r-statistics.co/Linear-Regression.html

Step 3

LAI-WDIV CLAIR model

# Soil line slope and alfa from literature
  SLS      <- 1.1
  alfa     <- 0.35
  WDVI_inf <- 7000
  
# Calculate WDVI
# WDVI = ρ_nir - ρ_red * ρ_(s nir)/ρ_(s red)
  
  WDVI <- data_testing$B08..842. - data_testing$B04.665nm.*SLS

# study the WDVI distribution and take the maximum value to set the WDVI infinitive (WDVI_inf) parameter
  hist.default(WDVI,breaks = 30,xlim=c(0,8000))

# Calculate LAI using the LAI - WDVI semi-empirical model
# LAI_WDVI =  -1 / α * ln ( 1 - WDVI / WDVI∞ )
  
  LAI_WDVI <- -1 / alfa * log(1 - WDVI / WDVI_inf)
  
# study the LAI distribution and compore to measuremenets   
  hist.default(LAI_WDVI,breaks = 20,xlim=c(0,8))

#  Fitting Linear Models  
  fit <- lm(LAI_WDVI~data_testing$LAI_insitu)

#  Extract Coefficients of linear fitting
  coefs <- coef(fit)

# Calculate root mean square error
  
  rmse <- round(sqrt(mean((data_testing$LAI_insitu-LAI_WDVI)^2)), 2)
  coefs <- coef(fit)
  intercept <- round(coefs[1], 3)
  slope <- round(coefs[2],3)

# Coefficient of correlation
  r2 <- round(summary(fit)$r.squared, 3)

# Prepare a text to add to the plot
  eqn <- bquote(~~ r^2 == .(r2) * "," ~~ RMSE == .(rmse))
  
# Plot results

  par(pty="s")
  plot(data_testing$LAI_insitu,LAI_WDVI,xlim=c(0,8),ylim=c(0,8),xlab="LAI measurements", ylab="LAI WDVI Sentinel-2 ", pch=3, col=4)
  abline(fit, lty = 2)
  text(3, 8, eqn, pos = 1.2)

  pl_LAI_WDVI <- recordPlot()

Step 4

LAI in-situ vs LAI BOKU (s2.boku.eodc.eu)

The LAI BOKU (s2.boku.ac.at) is derived using an Artificial Neural Network (ANN) and atmospherically corrected Sentinel-2 reflectance data (using all spectral bands). The advantage is that the approch is global and does not require site specific calibration. In this example, we take the LAI data from the S2.BOKU portal and we compare it to the in-situ LAI measurements.

The LAI BOKU data are provided directly into the CSV file we loaded in Step 1 (data_testing$LAI.S2.BOKU). Here we only do the comparison and look at results.

Step 5

Raster data: LAI-NDVI

Until now, we have been working with data from the CSV file. How can we apply the existing LAI-NDVI model to raster data?

# ========================================================
# Open raster (S2A_T33UXP_20170528T095031...)
# ========================================================

# Load raster data provided with the exercise
  infls <- list.files(path= './raster_data/Surface_Reflectance_L2A/', pattern='.tif$',full.names=TRUE)

# create a layer stack
  rasterData <- raster::stack(infls) # uses raster package

# rename the layer bands in the raster object
  names(rasterData)<-c('B02','B03','B04','B05','B06','B07','B08','B11','B12','B8A')

  show(rasterData)
## class      : RasterStack 
## dimensions : 860, 2440, 2098400, 10  (nrow, ncol, ncell, nlayers)
## resolution : 10, 10  (x, y)
## extent     : 619200, 643600, 5335800, 5344400  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=33 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names      :   B02,   B03,   B04,   B05,   B06,   B07,   B08,   B11,   B12,   B8A 
## min values :     0,     0,     0,     0,     0,     0,     0,     0,     0,     0 
## max values : 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535
# ========================================================
# Calculate LAI based on NDVI
# ========================================================
    
# Calculate NDVI
  NDVI <- (rasterData$B08 - rasterData$B04) / (rasterData$B08 + rasterData$B04)
  
# Calculate LAI
  LAI_NDVI <- 0.158 * exp (3.51 * NDVI)

  hist.default(getValues(NDVI),breaks = 100,xlim=c(-1,1))

  cuts=c(0.5, 1, 2, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 5, 6) #set breaks
  pal <- colorRampPalette(c("brown","green"))

  plot(LAI_NDVI,breaks=cuts, col = pal(12))

# Save raster 
# uncomment to execute this line
raster::writeRaster(NDVI,'./results/S2A_T33UXP_20170528T095031_NDVI.tif',format="GTiff",datatype='FLT4S',overwrite=TRUE)
raster::writeRaster(LAI_NDVI,'./results/S2A_T33UXP_20170528T095031_LAI_NDVI.tif',format="GTiff",datatype='FLT4S',overwrite=TRUE)
  
# Save data using a scaling factor of 1000 and Integer format to save disk space 
LAI_NDVI <- LAI_NDVI * 1000
raster::writeRaster(LAI_NDVI,'./results/S2A_T33UXP_20170528T095031_LAI_NDVI_sf1000.tif',format="GTiff",datatype='INT2S',overwrite=TRUE)

Step 6

Raster data: LAI-WDIV CLAIR model

# ========================================================
# Calculate the Weighted difference vegetation index (WDVI) 
# ========================================================
  
  r_B08 <- rasterData$B08
  r_B04 <- rasterData$B04

# create a random sample 
  s = sample(10000)

# plot the feature space (RED vs NIR) for all pixel types
  plot(r_B04[s],r_B08[s])

# select only the bare soil pixels (quick and dirty way)
  rs_B08 <- rasterData$B08[NDVI > 0.1 & NDVI < 0.25]
  rs_B04 <- rasterData$B04[NDVI > 0.1 & NDVI < 0.25]
  
# plot only the bare soil pixels
  plot(rs_B04[s],rs_B08[s])
  
# find the soil line slope
  fit <- lm(rs_B08[s]~rs_B04[s])
  
  coefs <- coef(fit)
  slope <- round(coefs[2],3)
  
  plot(rs_B04[s],rs_B08[s])
  abline(fit, lty = 1, col = 2)

# obtain the soil line slope
  SLS  <- slope # from soil line slope analysis
  
  SLS
## rs_B04[s] 
##     1.249
# Calculate the WDVI
  WDVI <- rasterData$B08 - rasterData$B04 * SLS
  
# obtain the WDVIinf value
  hist.default(getValues(WDVI),breaks = 100,xlim=c(0,8000))

  summary(WDVI)
## Warning in .local(object, ...): summary is an estimate based on a sample of 1e+05 cells (4.77% of all cells)
##             layer
## Min.    -2194.544
## 1st Qu.   733.095
## Median   2667.972
## 3rd Qu.  3859.963
## Max.     6710.095
## NA's        0.000
  WDVI_inf <- mean(getValues(WDVI)) + 3 * sd(getValues(WDVI))
  
  WDVI_inf
## [1] 7204.347
# ========================================================
# Calculate the Leaf Area Index, based on the CLAIR model
# ========================================================
  
  alfa     <- 0.34 # from literature 
  
  LAI_WDVI <- - 1 / alfa * log(1 - WDVI / WDVI_inf)
## Warning in setValues(r, log(values(x), base = base)): NaNs produced
  plot(LAI_WDVI,breaks=cuts, col = pal(12))

# ========================================================
# Removing out-of-range LAI values
# ========================================================
  
  LAI_WDVI_cut <- LAI_WDVI
  
  LAI_WDVI_cut[LAI_WDVI_cut < 0] <- 9999
  LAI_WDVI_cut[LAI_WDVI_cut > 7] <- 9999
  
LAI_WDVI_cut <- LAI_WDVI_cut * 1000
raster::writeRaster(LAI_WDVI_cut,'./results/S2A_T33UXP_20170528T095031_LAI_WDVI_sf1000.tif',format="GTiff",datatype='INT2S',overwrite=TRUE)

Step 7

Compare LAI-NDVI with the LAI BOKU

LAI_ANN <- raster::raster('./raster_data/S2A_T33UXP_20170528T095031_LAI_10m.tif')
LAI_NDVI <- raster::raster('./results/S2A_T33UXP_20170528T095031_LAI_NDVI_sf1000.tif')

# select a random sample of pixels to plot
s = sample(10000)

  par(pty="s")
  plot(LAI_ANN[s],LAI_NDVI[s],xlim=c(0,8000),ylim=c(0,8000),xlab="LAI_ANN", ylab="LAI_NDVI", pch=3, col=10)

  # Analyse where most differences are geographically located 
  plot( (LAI_ANN-LAI_NDVI) )

You can explore this further in QGIS

Step 8

Get the α value of the CLAIR model

We looked at how to derive the soil line slope (SLS) and the WDVIinf. What about the α value?

We will derive the α coefficient using an unconstrained nonlinear optimization method, which minimizes the Root Mean Square Error (RMSE) (our cost function) between measurements and predictions of LAI values.

Measurements of LAI are provided (CSV file).

Predictions are generated using the reflectance data extractred in correspondance of the LAI measurements. An initial α value of 0.35 will be used as first guess based on literature data.

# First, let´s derive the precise Soil Line Slope 
# We prepared a selection of about 60 sample points of bare soils (obtained from visual inspection) and the reflectance of  these points is used for calculation of the soil line slope.

  data_soils<-read.csv(file='./CLAIR/BareSoil_dataset.csv',header = TRUE,sep = ',')

#  Fitting Linear Models  
  fit <- lm(data_soils$B8~data_soils$B4-1)

#  Extract Coefficients of linear fitting
  coefs <- coef(fit)
  
# slope of the linear fitting (specific Soil Line Slope)
  SLS <- as.numeric(round(coefs[1], 3))

# Plot feature space x-axis: Red and y-axis: NIR  
  # from the raster experiment (quick and dirty)
  plot(rs_B04[s],rs_B08[s],pch=1,col=5)
  
  # from precise bare soil selection
  points(data_soils$B4,data_soils$B8,col="brown",xlim=c(0,6000),ylim=c(0,6000),xlab="Refl. [RED]", ylab="Refl. [NIR]", pch=3)
#  Add Straight Lines to a Plot  
  abline(fit, lty = 2)

# new soil line slope 
  SLS
## [1] 1.322

Calibrate the CLAIR model parameter (alfa) using the training dataset

Get the WDVI first

# Calculate WDVI (note that we are using the training data). 
WDVI <- data_training$B08A.865nm. - data_training$B04.665nm. * SLS

WDVI_inf<-7200

Calibrate the CLAIR model parameter (alfa) using the training dataset

Get the optimal alfa value

# LAI insitu data from the traning part of the data
  LAI_training <- data_training$LAI_insitu

  n_training <- length(LAI_training)
  
# Invert the LAI CLAIR model to find alfa. This is written in the form of a function that we can optimise in R. 
  alfa_search <- function(alfa) sqrt(sum((LAI_training -((-log(1-(WDVI/WDVI_inf)))*(1/alfa)))^2)/ n_training )

# Search the interval from lower to upper for a minimum in the function alfa_search
  a<-optimise(f = alfa_search,lower = 0.1, upper = 1)

  a$minimum
## [1] 0.2957987
# the result will be the new alfa value that minimizes the error between estimated and measured LAI values.

new_alfa <- a$minimum

new_alfa
## [1] 0.2957987
# alternative method package "optimization"
output<-optimization::optim_nm( alfa_search , start = c(1), maximum = FALSE, trace = TRUE)
plot(output)

output$par
##           
## 0.2958984

This concludes the guided tour.

Summary

LAI-NDVI, LAI-WDVI, LAI-BOKU and LAI-WDVI optimized

# Plot LAI NDVI
replayPlot(pl_LAI_NDVI)

# Plot LAI WDVI
replayPlot(pl_LAI_WDVI)

# Plot LAI WDVI with optimized alfa
replayPlot(pl_LAI_WDVI_cal)

# Plot LAI BOKU data
replayPlot(pl_LAI_BOKU)

Exercise

Study the LAI-NDVI relationship and develop a function to estimte LAI from NDVI

# LAI_insitu
LAI_training <- data_training$LAI_insitu
LAI_testing  <- data_testing$LAI_insitu

# Calculate NDVI (usng NIR band B08 and RED B04)
  NDVI_training <- (data_training$B08..842. - data_training$B04.665nm.) / (data_training$B08..842. + data_training$B04.665nm.)
  NDVI_testing  <- ( data_testing$B08..842. -  data_testing$B04.665nm.) / ( data_testing$B08..842. +  data_testing$B04.665nm.)

# scatterplot NDVI vs LAI 
  par(pty="s")
  plot(NDVI_training,LAI_training,xlim=c(0,1),ylim=c(0,8),xlab=" NDVI ", ylab="LAI measurements (training set)", pch=3, col=1)

# which function type would best fit the trend in the data? 
  
#  Fitting a Linear Model  
  model_linear <- lm(LAI_training~NDVI_training)
  
  coefs <- coef(model_linear)
  intercept <- coefs[1]
  slope     <- coefs[2]
  
# fit non-linear model
  model_nonlinear <- nls(LAI_training ~ exp(a + b * NDVI_training), start = list(a = 0, b = 0))

  coefs <- coef(model_nonlinear)
  a  <- coefs[1]
  b  <- coefs[2]
  
# Plot results
  par(pty="s")
  plot( slope * NDVI_testing + intercept , LAI_testing,xlim=c(0,8),ylim=c(0,8),xlab="LAI NDVI (linear model)", ylab="LAI measurements", pch=3, col=1)

  plot( exp(a + b * NDVI_testing)        , LAI_testing,xlim=c(0,8),ylim=c(0,8),xlab="LAI NDVI (nonlinear model)", ylab="LAI measurements", pch=3, col=2)

Calculate and report the r-squared and the RMSE for the results of the:

  • linear model (LAI NDVI (linear) vs LAI measuremnets)
  • linear model (LAI NDVI (nonlinear) vs LAI measuremnets)