# The algorithm is written in R. The lines starting with or followed by "#" are comments for functions, command lines, or delimiters of separate functions.
###### Load packages 
###### Author: Qingyuan Yang and Marcus Bursik
###### License: GPL. Use at your own risk.
###### Requires: to run the examples, the appropriate CSV files are needed.
library(gstat)
library(sp)
library(rgdal)
library(RandomFields)
library(maptools)
library(raster)
library(deldir)
library(ggplot2)
library(segmented)
library(poweRlaw)
library(segmented)
library(geoR)
library(combinat)


###### Workflow #########################################################################################################################################
######(1) Calculate the distances, downwind distances of sample sites with respect to the input       <------ function "prep"				######
######    source vent location and wind direction.	 															######
######																								######
######(2) Apply segmented/simple linear regression to the datasets: the log-thickness is the linear   <------ function "sample.reg"			######
######    combination of distance and downwind distance plus a constant.													######
######																								######
######(3) Record the fitted values and residuals from the fitting in step (2)                         <------ implemented by command lines		######
######																								######
######(4) Form a grid for mapping of the trend and the final result		            		          <------ function "form.grid"			######
######																								######
######(5) Form the trend model 											                              <------ function "prep2" and "trendmodel"	######
######																								######
######(6) Form and fit the variogram function of the residuals recorded in step (3)	                  <------ function "var.read"			######
######																								######
######(7) Implemnt ordinary kriging using the residuals in step (3) and the variogram models          <------ function "prediction"			######
######    from step (6) to form the local thickness variation. We take the sum of 			                    							######
######    the trend model in step (5) and the local variation to form the final thickness distribution								######
######																								######
###### End of workflow ##################################################################################################################################



###### Functions ########################################################################################################################################
prep <- function(sv, td, ang){	
											
### See step (1) in the workflow above
### Input variables: 									  ### Data type			  ### Column naes
###		sv: the source vent location							dataframe				"x" and "y"
###		td: the tephra thickness dataset						dataframe				"x", "y", "rz" (real thickness), "z"(log-transformed thickness)
###		ang: the inferred wind direction (from north clockwise)		a single value			     	   -

### Intermediate variables:
###		pts: the coordinates of sample sites
###		nr: the number of observations
###		sv.mtx: the matrix prepared for the computation of distance and downwind distance
###		disp.mtx: the unit vector pointing along the wind direction for the computation of downwind distance
###		dist, dd, cd: computed distances, downwind distances, and crosswind distances from the sample sites to
###		the source vent with the inferred wind direction

### Output:
###		The output is a dataframe that contains the original thickness dataset and the calculated distances, downwind
###		distances, and the crosswind distances of the sample sites.


### Method:
###		This function uses a simple dot product to calculate the downwind distance

	pts <- as.matrix(td[,c(1,2)])									### Extract the coordinates of sample sites
	sv <- as.matrix(sv)
	nr <- nrow(pts)	


	sv.mtx <- matrix(ncol=2, nrow=nr)								### Repeated rows of the coordinates of the source vent
	sv.mtx[,1] <- rep(sv[1], nr)
	sv.mtx[,2] <- rep(sv[2], nr)


	disp.mtx <- c(sin(ang*pi/180),cos(ang*pi/180))						### Form the unit vector pointing to the wind direction

	
	pts.mtx <- pts-sv.mtx										### Calculate the distances, downwind distances, and crosswind distances
	pts.sq <- (pts.mtx)^2										### of the sample sites
	dist<- (pts.sq[,1]+pts.sq[,2])^0.5
	dd <- pts.mtx %*% disp.mtx
	cd <-(dist^2-dd^2)^0.5
      

	output <- as.data.frame(cbind(pts, dist, dd, cd, td[, c(3,4)]))			### Reorganize the data for output
	colnames(output) <- c("x", "y", "dist", "dd", "cd", "rz", "z")
		

	return(output)
}

###-----------------

prep2 <- function(sv, td, ang){

### This function is written for the computation of the trend model, which requires distances and 
### downwind distances of all the cells within the region of interest. Therefore this function works
### exactly the same way as function "prep", except that the trend thicknesses of these cells are to
### be computed. This function is called in the function "trendmodel" for further calculation of trend thickness.


### Input variables: 									  ### Data type			  ### Column names
###		sv: the source vent location			     				dataframe				"x" and "y"
###		td: the coordinates of the cells of the grid				dataframe or matrix		does not matter, as they are transfered into matrix in this function
###		ang: the inferred wind direction (from north clockwise)		a single value			     -


### Output variables:
### 		The output is a dataframe that records the coordinates, distances, downwind distances, and crosswind distances
###		of the cells of the grid.

	pts <- as.matrix(td[,c(1,2)])
	sv <- as.matrix(sv)
	nr <- nrow(pts)	


	sv.mtx <- matrix(ncol=2, nrow=nr)
	sv.mtx[,1] <- rep(sv[1], nr)
	sv.mtx[,2] <- rep(sv[2], nr)
	disp.mtx <- c(sin(ang*pi/180),cos(ang*pi/180))

	
	pts.mtx <- pts-sv.mtx
	pts.sq <- (pts.mtx)^2
	dist<- (pts.sq[,1]+pts.sq[,2])^0.5
	dd <- pts.mtx %*% disp.mtx
	cd <-(dist^2-dd^2)^0.5
      

	output <- as.data.frame(cbind(pts, dist, dd, cd))
	colnames(output) <- c("x", "y", "dist", "dd", "cd")
		

	return(output)
}

###-----------------

sample.reg <- function(td, segdist, segdd){

### See step (2) of the workflow above
### This function provides two alternative fitting schemes for the fitting of thickness with distance and downwind distance:
### 	(1) Use linear regression with the "segmented" fitting function to simulate
###	    the different decay rates of tephra thickness within and outside the plume corner.

###	(2) Use simple linear regression for the fitting.

###	This method will tend to use the segmented fitting option as it is better reconciled with the physics of tephra settling.
###   If error occurs, meaning that the data are inappropriate for option(1), a "tryCatch" function is set up to switch to 
###	option(2).

###	Input variables:
###		td: the output from function "prep"
###		segdist: the inferred breakpoint for distance, this can be estimated by plotting the thickness against distance of the observations.
###		segdd: the inferred breakpoint for downwind distance.

### 	Intermediate functions:
###		segfit: segmented fitting function
###		simplefit: simple linear regression

### 	Output variables:
###		An object that can be regarded as the fitted model. It contains the fitted values, residuals and the
###		coefficients of the fitting. It can be used to predict the trend thickness for the grid and also store the residuals 
###		for ordinary kriging.

###	*** NOTES ***
###		The segemented fitting runs iterative processes. Sometimes even with the same "segdist" and "segdd" inputs, it still
###		comes up with different breakpoints and consequently slightly different coefficients. Most of the time, such variation
###		in the models is negligible, but we still suggest users to be careful when dealing with the segmented fitting.
###		The goodness of fit can be examined by printing or plotting the residuals, which are stored in the output.
### 		If option (1) is being used, the output also contains the values of the breakpoints for the two variables.
		
	segfit <-function(segdist, segdd){															### Segmented fitting function
		reg.mod <- lm(z~ dist + dd,data=td)
			seg.lmmodel <- segmented(reg.mod, seg.Z = ~dist + dd, psi = list(dist = c(segdist), dd = c(segdd)))
      	return(seg.lmmodel)
	}


	simplefit <-function(){																	### Simple linear regression function
		reg.mod <- lm(z ~ dist + dd ,data=td)
      	return(reg.mod)
	}

	
	result = tryCatch(segfit(segdist, segdd),error=function(e){simplefit()})									### TryCatch structure to automatically switch to simple linear regression
	

  return(result)																			### when error occurs
  }

###-----------------

form.grid <- function(td, cellsize, displacement.coef, expanding.coef){
### See step (4) in the workflow above
### This function first finds the bounding box of the sample sites. It can be used as the extent of the grid.
### However, sometimes one needs a larger extent. To do that, "displacement.coef" and "expanding.coef" are set.
### "displacement.coef" determines if the lowerleft corner of the bounding box should be further shifted to 
### the lower left. "expanding.coef" determines if the length and width of the bouding box should be expanded.
### These two values are all ratios. For example, for a 500*500 squared bounding box, if the 'displacement.coef"
### is set to be 1, then the lowerleft corner (if it was 0,0), will be moved to -500, -500. If the "expanding.coef"
### is set to be 2, then the size of the new bounding box will be 1000*1000.

### Input variables:
### td: the output from function "prep"
### cellsize: cellsize (a single value, as they are squared cells)
### displacement.coef: explained above (a single value)
### expanding.coef: explained above (a single value)


### Output variables:
### An empty grid


  td.sp <- SpatialPoints(cbind(td$x, td$y))
  bbox <- bbox(td.sp)
  cs <- c(cellsize, cellsize)
  cc <- as.numeric(ceiling(bbox[,1] + (cs / 2) - diff(t(bbox)) * displacement.coef))
  cd <- ceiling(diff(t(bbox)) / cs) * expanding.coef
  td.temp.grid <- GridTopology(cellcentre.offset = cc, cellsize = cs, cells.dim = cd)
  td.grid <- SpatialGrid(td.temp.grid)
  return(td.grid)
} 

###-----------------

trendmodel <- function(td, sv, grid, ang, ftng.rslt){
### See step (5) in the workflow above
### This function calculates the distances, downwind distances of the cells of the grid, and makes the
### prediction of trend thickness using the model from function "sample.reg".

### Input variables:
###		td: the output from function "prep"
###		sv: the source vent coordinate
###		grid: the output from function "form.grid"
###		ang: the inferred wind direction (from north clockwise)
###		ftng.rslt: the output from function "sample.reg"


### Output variables:
### A dataframe that contains the coordinates, distances, downwind and crosswind distances, trend thicknesses and 
### trend thicknesses in logarithm form for all the cells within the grid.

	td.grid <- SpatialPoints(as(grid, "SpatialPixels"))
	td.grid.coord <- coordinates(td.grid)


		grided<- prep2(sv, td.grid.coord, ang)

	
		gridz <- predict(ftng.rslt, data.frame(cbind(dist = grided$dist, dd = grided$dd)))
		gridrz <- 10^gridz

		
	output <- as.data.frame(cbind(grided, gridrz, gridz))
	colnames(output) <- c("x", "y", "dist", "dd", "cd", "zr", "z")

	
	return(output)
}

###-----------------

var.read <- function(td, width.val, cutoff.val, var.par, var.model){
### This function reads the locations of the sample sites and the residuals from the fitting in function "sample.reg" to 
### form and fit a variogram model. This function serves as a preparation for ordinary kriging.

### Input variables:
###		td: the output from function "prep"
###		width.val & cutoff.val: the parameters to calculate the experimental variogram. "width.val" denotes the
###		averaged lag for the computation, and "cutoff.val" is the maximum lag that is taken into consideration for the 
###		calculation.
###		var.par: the parameters required for fitting variogram models. It includes three values, namely sill, range, and nugget.
###		Detailed explanations of these parameters can be found in any geostatistics reference.
###		var.model: a text variable which describes the shape of different variogram models, type the command line "show.vgms()" to view a list
###		of variogram models.

### Output variables:
###		The output is a list, the first object of the list is the variogram model, which is the main output.
###		The second object is the plot of the experimental variogram and the fitted variogram curve.
###		The third  object is the values of the experimental variogram

  td.sp <- SpatialPointsDataFrame(cbind(td$x, td$y), td)
  td.var <- variogram(res ~ 1, td.sp, width=width.val, cutoff = cutoff.val)



  td.vgm <- vgm(psill=var.par[1], model=var.model, range=var.par[2], nugget=var.par[3])
  td.fit <- fit.variogram(td.var, td.vgm, fit.sills = c(TRUE, TRUE)) 
  return(list(td.fit,plot(td.var,td.fit),td.var))
}

###-----------------

prediction <- function(td, max, min, maxdist, grid, trend, variog){
### This function first implements ordinary kriging on the residuals to construct the local variation map, and then
### takes the sum of trend and local variation to present the final result.

### Input variables:
###		td: the output from function "prep"
###		max:
###		min:
###		maxdist:
###			The "max" and "min" variables describe the maximum and minimum number of observations taken into account 
###			for the kriging estimation at a single cell. "maxdist" is used to define a maximum radius beyond which the 
###			sample sites are assumed non-related to a particular cell.
###		grid: the output from function "form.grid"
###		trend: the output from function "trendmodel"
###		variog: the first object of the output from function "var.read"

### Output variables:
###		The output is a dataframe that contains the columns of the output of function "trendmodel". In addition to that,
###		it also has the predicted thickness and its logarithm, and the variance of kriging estimation.
###		Here the variance only represents the variance from kriging the log-residuals of the observations and thus cannot
###		be used to characterize the total variance of this method.


	td.sp <- SpatialPointsDataFrame(cbind(td$x, td$y), td)  
	td.temp.k <- krige(res ~ 1, td.sp, nmax = max, nmin = min, maxdist = maxdist, grid, variog)
	k.result <- td.temp.k$var1.pred
	k.result[is.na(k.result)] <- 0
	k.variance <- td.temp.k$var1.var
	k.rz <- 10^(k.result + trend$z)


	output <- cbind(trend, k.result, k.variance, k.rz)


return(output)
}


###### End of functions ####################################################################################################



###### Demo ################################################################################################################

setwd("C:\\Users\\working directory")										### Set working directory, "\" has to be replaced by "\\".  Change to "/" for *nix, mac.
td <- read.table("thickness dataset.csv", header=TRUE, sep=",")						### Read the thickness dataset
sv <- read.table("source vent.csv", header=TRUE, sep=",")							### Read source vent location

td <- prep(sv, td, 20)													### Calculate the distances, downwind distances, and crosswind distances of the sample sites,
																### here the wind direction is 20 degree from north clockwise.

model <- sample.reg(td, 3000,3000)											### Form either the segmented or the simple linear regression model of thickness against
																### distance and downwind distance. The breakpoints for the two variables are set as
																### 3000 and 3000 meters.

td$fit <- model$fitted.values												### Expand "td" by storing the fitted values and
td$res <- model$residuals												### residuals from the fitting in the last command line

grid <- form.grid(td, 500, 0.1, 1.2)											### Form the grid.

trend <- trendmodel(td, sv, grid, 20, model)									### Form the trend. Note the wind direction has to be set again for
																### the calculation of grid cells, and the two wind directions have to be the same.
	
var <- var.read(td, 300,10000, c(0.04,5000,0.01), "Cir")							### Form the variogram model. The parameters are explained in the 
																### description of function "var.read"

result <- prediction(td, 50,20, 5000, grid,trend, var[[1]])							### Calculate the local variation, and take its sum and trend thickness for final result.


result.sp <- SpatialPixelsDataFrame(cbind(trend$x, trend$y), result)					### Turn the final result into "SpatialPixelsDataFrame" for visualization.
spplot(result.sp["k.rz"])	or	spplot(result.sp["zr"])							### Plot the final result or the trend model.

levels <- c(0,250,500,1000,2000,3000,5000,10000,20000)								### Set up the levels for plotting the isopach map.
isomap <- ContourLines2SLDF(contourLines(as.image.SpatialGridDataFrame(result.sp["k.rz"]), levels = levels))
																### Form the isopach map

plot(isomap)															### Plot the isopach map
																### Note: "isomap" is a "SpatialLinesDataFrame" and can be used in GIS programs for further mapping purposes.
																### Geo-referencing can be done in R or GIS programs.

																### ***Notes on infer the extent of tephra fall deposits:
																### (1) Add one unit thickness to all the observations and log-transform them.
																### (2) Apply these transformed log-thickness to the method
																### (3) Specify the "levels <- c(1)" and apply it to "ContourLines2SLDF" function to find the unit thickness isopach
																### (4) The resultant contour is the inferred extent.
																### ***
																### The resultant contour is strongly influenced by the trend model,
																### as normally fewer observations are made on the distal and thin parts of a tephra fall deposit


###### End of demo ##########################################################################################################