library(terra)
library(dplyr)
library(corrplot)
library(pROC)


rm(list=ls())



############################################################
# 1. Load occurrence data
############################################################

iucn <- st_read("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1/GIS/IUCN/Pelodytes_ibericus.shp")
# Convert polygon to terra
iucn_vect <- vect(iucn)


occ <- read.csv("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1/GIS/GBIF/Pelodytes_punctatus_20260227.csv")

occ <- occ %>%
  select(species, decimalLongitude, decimalLatitude)

# Convert to spatial vector object (WGS84)
occ_sp <- vect(
  occ,
  geom = c("decimalLongitude", "decimalLatitude"),
  crs = "EPSG:4326"
)

# keep points inside polygon IUCN
occ_sp <- occ_sp[iucn_vect, ]




############################################################
# 2. Load environmental predictors
############################################################

predictor_dir <- "C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/Env_var/Bioclim"

bio_1  <- rast(file.path(predictor_dir, "bio_1.asc"))
bio_5  <- rast(file.path(predictor_dir, "bio_5.asc"))
bio_12 <- rast(file.path(predictor_dir, "bio_12.asc"))
bio_14 <- rast(file.path(predictor_dir, "bio_14.asc"))


setwd("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/HRL")

forest_r <- rast(
  "forest.asc"
)

names(forest_r) <- "forest_r"
crs(forest_r) <- crs(bio_14)


# Stack predictors
predictors <- c(bio_1, bio_5, bio_12, bio_14, forest_r)

plot(predictors)



############################################################
# 3. Extract predictor values at occurrence points
############################################################

occ_vals <- extract(predictors, occ_sp, cells = TRUE)


# Remove duplicates by spatial cell
presence_data <- occ_vals %>%
  na.omit() %>%
  group_by(cell) %>%
  slice_head(n = 1) %>%
  mutate(presence = 1)



############################################################
# 4. Generate pseudo-absences
############################################################


absences <- spatSample(
  bio_1,
  size = 5000,
  method = "random",
  replace = FALSE,
  na.rm = TRUE,
  cells = TRUE,
  as.points = TRUE
)


# Remove pseudo-absences overlapping presences
absences <- absences %>%
  filter(!cell %in% presence_data$cell) 

abs_vals <- extract(predictors, absences, cells = TRUE)

abs_data <- abs_vals %>%
  na.omit() %>%
  mutate(presence = 0)

head(abs_data)


############################################################
# 5. Build modelling dataset
############################################################

sdm_data <- bind_rows(
  presence_data,
  abs_data
)

head(sdm_data)
table(sdm_data$presence)


# Balance classes (optional but good for SDM)
sdm_data <- sdm_data %>%
  group_by(presence) %>%
  sample_n(min(table(sdm_data$presence)))

table(sdm_data$presence)



############################################################
# 6. Check predictor correlation
############################################################

cor_mat <- cor(
  sdm_data[, c("bio_1","bio_5","bio_12","bio_14", "forest_r")],
  method = "spearman",
  use = "complete.obs"
)

corrplot.mixed(
  cor_mat,
  tl.cex = 0.6,
  number.cex = 0.8,
  addCoefasPercent = TRUE
)



############################################################
# 7. Cross-validation setup
############################################################

k <- 5

folds <- sample(
  rep(1:k, length.out = nrow(sdm_data))
)

auc_values <- numeric(k)



############################################################
# 8. Cross-validated logistic regression SDM
############################################################

for(i in 1:k){
  
  # Split training and testing data
  train_data <- sdm_data[folds != i, ]
  test_data  <- sdm_data[folds == i, ]
  
  
  # Fit additive logistic model
  model <- glm(
    presence ~ bio_1 + bio_5 + bio_12 + forest_r,
    family = binomial,
    data = train_data
  )
  
  
  # Predict on test fold
  test_data$pred <- predict(
    model,
    newdata = test_data,
    type = "response"
  )
  
  # Evaluate AUC
  roc_obj <- roc(
    test_data$presence,
    test_data$pred
  )
  
  auc_values[i] <- auc(roc_obj)
}




############################################################
# 9. Cross-validation performance
############################################################

print("Cross-validated AUC values:")
print(auc_values)

cat("\nMean CV AUC:", mean(auc_values), "\n")



############################################################
# 10. Fit final model using all data
############################################################

final_model <- glm(
  presence ~ bio_1 + bio_5 + bio_12 + forest_r,
  family = binomial,
  data = sdm_data
)

summary(final_model)



############################################################
# 11. Predict habitat suitability map
############################################################

prediction_map <- predict(
  predictors,
  final_model,
  type = "response"
)


plot(
  prediction_map,
  main = "Predicted Frog Habitat Suitability"
)




############################################################
# 12. ROC curve on full dataset (optional)
############################################################

sdm_data$pred_prob <- predict(
  final_model,
  type = "response"
)

roc_obj <- roc(
  sdm_data$presence,
  sdm_data$pred_prob
)

plot(roc_obj, main = "ROC Curve - SDM")
auc(roc_obj)
coords(roc_obj, "best", best.method = "youden")

threshold <- 0.51

binary_map <- prediction_map >= threshold
plot(binary_map)


setwd("C:/Users/fjascensao/OneDrive - Universidade de Lisboa/Aulas/MIBC/Classes/Class_1_20260302/outputs")

# save raster
writeRaster(prediction_map, "prediction_map.tif", overwrite=T)
writeRaster(binary_map, "binary_map.tif", overwrite=T)
