library(terra)
library(dplyr)
library(corrplot)
library(pROC)
library(sf)

rm(list=ls())


############################################################
# 1. Load occurrence data
############################################################

iucn <- st_read("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1/GIS/IUCN/Pelodytes_ibericus.shp")
# Convert polygon to terra
iucn_vect <- vect(iucn)

plot(iucn_vect)

occ <- read.csv("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1/GIS/GBIF/Pelodytes_punctatus_20260227.csv")
head(occ)

occ <- occ %>%
  select(species, decimalLongitude, decimalLatitude)

head(occ)


# Convert to spatial vector object (WGS84)
occ_sp <- vect(
  occ,
  geom = c("decimalLongitude", "decimalLatitude"),
  crs = "EPSG:4326"
)

plot(occ_sp, add=T)


# keep points inside polygon IUCN
occ_sp <- occ_sp[iucn_vect, ]



############################################################
# 2. Load environmental predictors
############################################################

all_bioclim <- terra::rast("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/Env_var/climate/wc2.1_country/ESP_wc2.1_30s_bio.tif")
plot(all_bioclim)

predictor_dir <- "C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/Env_var/Bioclim"

bio_1  <- rast(file.path(predictor_dir, "bio_1.asc"))
bio_5  <- rast(file.path(predictor_dir, "bio_5.asc"))
bio_12 <- rast(file.path(predictor_dir, "bio_12.asc"))
bio_14 <- rast(file.path(predictor_dir, "bio_14.asc"))


setwd("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/HRL")

forest_r <- rast(
  "forest.asc"
)

names(forest_r) <- "forest_r"
crs(forest_r) <- crs(bio_14)


# Stack predictors
predictors <- c(bio_1, bio_5, bio_12, bio_14, forest_r)

plot(predictors)



############################################################
# 3. Extract predictor values at occurrence points
############################################################

occ_vals <- extract(predictors, occ_sp, cells = TRUE)


# Remove duplicates by spatial cell
presence_data <- occ_vals %>%
  na.omit() %>%
  group_by(cell) %>%
  slice_head(n = 1) %>%
  mutate(presence = 1)



############################################################
# 4. Generate pseudo-absences
############################################################

absences <- spatSample(
  bio_1,
  size = 5000,
  method = "random",
  replace = FALSE,
  na.rm = TRUE,
  cells = TRUE,
  as.points = TRUE
)

absences

# Remove pseudo-absences overlapping presences
library(sf)
library(terra)
library(dplyr)

absences <- absences %>%
  dplyr::filter(!cell %in% presence_data$cell) 

abs_vals <- extract(predictors, absences, cells = TRUE)

abs_data <- abs_vals %>%
  na.omit() %>%
  mutate(presence = 0)

head(abs_data)


############################################################
# 5. Build modelling dataset
############################################################

sdm_data <- bind_rows(
  presence_data,
  abs_data
)

head(sdm_data)
table(sdm_data$presence)


# Balance classes (optional but good for SDM)
sdm_data <- sdm_data %>%
  group_by(presence) %>%
  sample_n(min(table(sdm_data$presence)))

table(sdm_data$presence)



############################################################
# 6. Check predictor correlation
############################################################

cor_mat <- cor(
  sdm_data[, c("bio_1","bio_5","bio_12","bio_14", "forest_r")],
  method = "spearman",
  use = "complete.obs"
)


corrplot.mixed(
  cor_mat,
  tl.cex = 0.6,
  number.cex = 0.8,
  addCoefasPercent = TRUE
)



############################################################
# 7. Cross-validation setup
############################################################

k <- 5

folds <- sample(
  rep(1:k, length.out = nrow(sdm_data))
)

auc_values <- numeric(k)



############################################################
# 8. Cross-validated logistic regression SDM
############################################################

for(i in 1:k){
  
  # Split training and testing data
  train_data <- sdm_data[folds != i, ]
  test_data  <- sdm_data[folds == i, ]
  
  
  # Fit additive logistic model
  model <- glm(
    presence ~ bio_1 + bio_5 + bio_12 + forest_r,
    family = binomial,
    data = train_data
  )
  
  
  # Predict on test fold
  test_data$pred <- predict(
    model,
    newdata = test_data,
    type = "response"
  )
  
  # Evaluate AUC
  roc_obj <- roc(
    test_data$presence,
    test_data$pred
  )
  
  auc_values[i] <- auc(roc_obj)
}




############################################################
# 9. Cross-validation performance
############################################################

print("Cross-validated AUC values:")
print(auc_values)

cat("\nMean CV AUC:", mean(auc_values), "\n")



############################################################
# 10. Fit final model using all data
############################################################

final_model <- glm(
  presence ~ bio_1 + bio_5 + bio_12 + forest_r,
  family = binomial,
  data = sdm_data
)

summary(final_model)



############################################################
# 11. Predict habitat suitability map
############################################################

prediction_map <- predict(
  predictors,
  final_model,
  type = "response"
)


plot(
  prediction_map,
  main = "Predicted Frog Habitat Suitability"
)




############################################################
# 12. ROC curve on full dataset (optional)
############################################################

sdm_data$pred_prob <- predict(
  final_model,
  type = "response"
)

roc_obj <- roc(
  sdm_data$presence,
  sdm_data$pred_prob
)

plot(roc_obj, main = "ROC Curve - SDM")
auc(roc_obj)
coords(roc_obj, "best", best.method = "youden")

threshold <- 0.51

binary_map <- prediction_map >= threshold
plot(binary_map)


setwd("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/outputs")




############################################################
# 13. Predict the future
############################################################
Iberia <- st_read("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/Administrative/Iberia.shp")
Iberia
Iberia4326 <- st_transform(Iberia, 4326)



bio_future_2030 <- rast("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/Env_var/Future_climate/worldclim_2.1/wc2.1_2.5m_bioc_HadGEM3-GC31-LL_ssp245_2021-2040.tif")
names(bio_future_2030) <- paste0("bio_",1:19)

bio_future_2030 <- crop(bio_future_2030, Iberia4326, mask=T) # takes a while, 2'
plot(bio_future_2030)

bio_future_2030 <- project(bio_future_2030, predictors)
bio_future_2030 <- c(bio_future_2030, forest_r)

prediction_map_2030 <- predict(
  bio_future_2030,
  final_model,
  type = "response"
)

binary_map_2030 <- prediction_map_2030 >= threshold
plot(binary_map_2030)


plot(
  c(prediction_map, prediction_map_2030),
  main = c("2000", "2030")
)

plot(
  c(binary_map, binary_map_2030),
  main = c("2000", "2030")
)




############################################################
# save rasters
############################################################

writeRaster(prediction_map, "prediction_map.tif", overwrite=T)
writeRaster(binary_map, "binary_map.tif", overwrite=T)