This file provides all the code permitting the reproduction of examples in the article submitted. More specifically, we display here the maps and curves of the data, and we seek to detect multi-pollutant clusters: 1) on the data from which we have removed the temporal component by averaging over time, 2) on the NO2 pollutant only but taking into account the temporal variability, and 3) on all pollutants by taking into account the temporal variability.

As the computation time of some spatial scan statistics can be a bit long in this file, we also provide a file “frevent_restricted_time.R” which is a version with fewer permutations for the p-value estimation, that is 99 instead of the default value which is 999 (version of this file). In the generated html file, the time taken by each method is indicated at the end of each section.

library(HDSpatialScan)

set.seed(123)

Load the data

####################
#  Load the data   #
####################
data("map_sites")
data("multi_data")
data("funi_data")
data("fmulti_data")

Visualize the data

#########################
#  Visualize the data   #
#########################

library(dplyr)
library(RColorBrewer)
library(sp)

mean_data_no2 <- data.frame(canton = map_sites$identifiant, mean_no2 = multi_data[,1])
mean_data_o3 <- data.frame(canton = map_sites$identifiant, mean_o3 = multi_data[,2])
mean_data_pm10 <- data.frame(canton = map_sites$identifiant, mean_pm10 = multi_data[,3])
mean_data_pm2.5 <- data.frame(canton = map_sites$identifiant, mean_pm2.5 = multi_data[,4])

map_sites@data <- left_join(map_sites@data, mean_data_no2, by = c("identifiant" = "canton"))
map_sites@data <- left_join(map_sites@data, mean_data_o3, by = c("identifiant" = "canton"))
map_sites@data <- left_join(map_sites@data, mean_data_pm10, by = c("identifiant" = "canton"))
map_sites@data <- left_join(map_sites@data, mean_data_pm2.5, by = c("identifiant" = "canton"))

# Cut the mean concentrations in 5 classes
classes_no2 <- quantile(multi_data[,1], probs = c(0,0.2,0.4,0.6,0.8,1))
classes_o3 <- quantile(multi_data[,2], probs = c(0,0.2,0.4,0.6,0.8,1))
classes_pm10 <- quantile(multi_data[,3], probs = c(0,0.2,0.4,0.6,0.8,1))
classes_pm2.5 <- quantile(multi_data[,4], probs = c(0,0.2,0.4,0.6,0.8,1))

# Choice of colors
palette <- brewer.pal(n = 5, name = "YlOrRd")

map_sites@data$classe_no2 <- as.character(cut(map_sites@data$mean_no2, breaks = classes_no2, labels = palette, include.lowest = TRUE))
map_sites@data$classe_o3 <- as.character(cut(map_sites@data$mean_o3, breaks = classes_o3, labels = palette, include.lowest = TRUE))
map_sites@data$classe_pm10 <- as.character(cut(map_sites@data$mean_pm10, breaks = classes_pm10, labels = palette, include.lowest = TRUE))
map_sites@data$classe_pm2.5 <- as.character(cut(map_sites@data$mean_pm2.5, breaks = classes_pm2.5, labels = palette, include.lowest = TRUE))

legend_no2 <- as.character(levels(cut(map_sites@data$mean_no2, breaks = classes_no2, include.lowest = TRUE)))
legend_o3 <- as.character(levels(cut(map_sites@data$mean_o3, breaks = classes_o3, include.lowest = TRUE)))
legend_pm10 <- as.character(levels(cut(map_sites@data$mean_pm10, breaks = classes_pm10, include.lowest = TRUE)))
legend_pm2.5 <- as.character(levels(cut(map_sites@data$mean_pm2.5, breaks = classes_pm2.5, include.lowest = TRUE)))
plot(map_sites, col = map_sites@data$classe_no2, border = "black", lwd = 0.5)
legend("topleft", legend = legend_no2, fill = palette, cex=1,
       title = expression(paste("Average ", NO[2], " concentration in ", mu, "g/", m^3)),
       bty = "n")

plot of chunk unnamed-chunk-4

plot(map_sites, col = map_sites@data$classe_o3, border = "black", lwd = 0.5)
legend("topleft", legend = legend_o3, fill = palette, cex=1,
       title = expression(paste("Average ", O[3], " concentration in ", mu, "g/", m^3)),
       bty = "n")

plot of chunk unnamed-chunk-5

plot(map_sites, col = map_sites@data$classe_pm10, border = "black", lwd = 0.5)
legend("topleft", legend = legend_pm10, fill = palette, cex=1,
       title = expression(paste("Average ", PM[10], " concentration in ", mu, "g/", m^3)),
       bty = "n")

plot of chunk unnamed-chunk-6

plot(map_sites, col = map_sites@data$classe_pm2.5, border = "black", lwd = 0.5)
legend("topleft", legend = legend_pm2.5, fill = palette, cex=1,
       title = expression(paste("Average ", PM[2.5], " concentration in ", mu, "g/", m^3)),
       bty = "n")

plot of chunk unnamed-chunk-7

# The curves
times <- c("2020-05-01", "2020-05-02", "2020-05-03", "2020-05-04","2020-05-05","2020-05-06","2020-05-07","2020-05-08","2020-05-09","2020-05-10","2020-05-11","2020-05-12","2020-05-13","2020-05-14","2020-05-15","2020-05-16","2020-05-17","2020-05-18","2020-05-19","2020-05-20","2020-05-21","2020-05-22","2020-05-23","2020-05-24","2020-05-25","2020-05-26","2020-05-27","2020-05-28","2020-05-29","2020-05-30","2020-05-31","2020-06-01","2020-06-02","2020-06-03","2020-06-04","2020-06-05","2020-06-06","2020-06-07","2020-06-08","2020-06-09","2020-06-10","2020-06-11","2020-06-12","2020-06-13","2020-06-14","2020-06-15","2020-06-16","2020-06-17","2020-06-18","2020-06-19","2020-06-20","2020-06-21","2020-06-22","2020-06-23","2020-06-24","2020-06-25")
temp_no2 <- matrix(nrow = length(fmulti_data), ncol = length(times))
temp_o3 <- matrix(nrow = length(fmulti_data), ncol = length(times))
temp_pm10 <- matrix(nrow = length(fmulti_data), ncol = length(times))
temp_pm2.5 <- matrix(nrow = length(fmulti_data), ncol = length(times))

for(i in 1:length(fmulti_data)){
  temp_no2[i,] <- fmulti_data[[i]][1,]
  temp_o3[i,] <- fmulti_data[[i]][2,]
  temp_pm10[i,] <- fmulti_data[[i]][3,]
  temp_pm2.5[i,] <- fmulti_data[[i]][4,]
}

mini <- min(temp_no2)
maxi <- max(temp_no2)
plot(NULL, xlim = c(1,ncol(temp_no2)), ylim = c(mini, maxi), xlab = "", xaxt = 'n', ylab = expression(paste(NO[2], " concentration in ", mu, "g/", m^3)), cex.axis = 0.85, cex.lab = 0.85)
axis(1, at = c(1:ncol(temp_no2))[which(c(1:ncol(temp_no2))%%5==1)], labels = times[which(c(1:ncol(temp_no2))%%5==1)], cex.axis = 0.85, cex.lab = 0.85, las = 3)
for(i in 1:nrow(temp_no2)){
  lines(x = c(1:ncol(temp_no2)), y = temp_no2[i,], col = "black", lwd = 1)
}

plot of chunk unnamed-chunk-8

mini <- min(temp_o3)
maxi <- max(temp_o3)
plot(NULL, xlim = c(1,ncol(temp_o3)), ylim = c(mini, maxi), xlab = "", xaxt = 'n', ylab = expression(paste(O[3], " concentration in ", mu, "g/", m^3)), cex.axis = 0.85, cex.lab = 0.85)
axis(1, at = c(1:ncol(temp_o3))[which(c(1:ncol(temp_o3))%%5==1)], labels = times[which(c(1:ncol(temp_o3))%%5==1)], cex.axis = 0.85, cex.lab = 0.85, las = 3)
for(i in 1:nrow(temp_o3)){
  lines(x = c(1:ncol(temp_o3)), y = temp_o3[i,], col = "black", lwd = 1)
}

plot of chunk unnamed-chunk-9

mini <- min(temp_pm10)
maxi <- max(temp_pm10)
plot(NULL, xlim = c(1,ncol(temp_pm10)), ylim = c(mini, maxi), xlab = "", xaxt = 'n', ylab = expression(paste(PM[10], " concentration in ", mu, "g/", m^3)), cex.axis = 0.85, cex.lab = 0.85)
axis(1, at = c(1:ncol(temp_pm10))[which(c(1:ncol(temp_pm10))%%5==1)], labels = times[which(c(1:ncol(temp_pm10))%%5==1)], cex.axis = 0.85, cex.lab = 0.85, las = 3)
for(i in 1:nrow(temp_pm10)){
  lines(x = c(1:ncol(temp_pm10)), y = temp_pm10[i,], col = "black", lwd = 1)
}

plot of chunk unnamed-chunk-10

mini <- min(temp_pm2.5)
maxi <- max(temp_pm2.5)
plot(NULL, xlim = c(1,ncol(temp_pm2.5)), ylim = c(mini, maxi), xlab = "", xaxt = 'n', ylab = expression(paste(PM[2.5], " concentration in ", mu, "g/", m^3)), cex.axis = 0.85, cex.lab = 0.85)
axis(1, at = c(1:ncol(temp_pm2.5))[which(c(1:ncol(temp_pm2.5))%%5==1)], labels = times[which(c(1:ncol(temp_pm2.5))%%5==1)], cex.axis = 0.85, cex.lab = 0.85, las = 3)
for(i in 1:nrow(temp_pm2.5)){
  lines(x = c(1:ncol(temp_pm2.5)), y = temp_pm2.5[i,], col = "black", lwd = 1)
}

plot of chunk unnamed-chunk-11

Apply a multivariate spatial scan statistic

##################################################
#  Apply a multivariate spatial scan statistic   #
##################################################

Is the data gaussian at least for each component ?

hist(multi_data[,1])

plot of chunk unnamed-chunk-13

hist(multi_data[,2])

plot of chunk unnamed-chunk-13

hist(multi_data[,3])

plot of chunk unnamed-chunk-13

hist(multi_data[,4])

plot of chunk unnamed-chunk-13

qqnorm(multi_data[,1])
qqline(multi_data[,1], col = "red")

plot of chunk unnamed-chunk-13

qqnorm(multi_data[,2])
qqline(multi_data[,2], col = "red")

plot of chunk unnamed-chunk-13

qqnorm(multi_data[,3])
qqline(multi_data[,3], col = "red")

plot of chunk unnamed-chunk-13

qqnorm(multi_data[,4])
qqline(multi_data[,4], col = "red")

plot of chunk unnamed-chunk-13

not really : we apply the MNP

coords <- coordinates(map_sites)

time_start <- Sys.time()
res_mnp <- SpatialScan(method = "MNP", data = multi_data, sites_coord = coords, system = "WGS84", mini = 1, maxi = nrow(coords)/2, type_minimaxi = "sites/indiv", mini_post = 0, maxi_post = 10, type_minimaxi_post = "radius", nbCPU = 7, variable_names = c("NO2", "O3", "PM10", "PM2.5"))$MNP
## ##### Multivariate nonparametric scan statistic ##### 
## Computation of the scan statistic 
## ---Done--- 
## Estimation of the statistical significance 
## ---Done--- 
## Finalization 
## ======================================================
time_stop <- Sys.time()

# print
print(res_mnp)
## MNP scan procedure 
## #################### 
## 8 significant clusters have been detected by the scan procedure with p-values of 0.001, 0.001, 0.001, 0.005, 0.006, 0.012, 0.012, 0.024
# see the location of the most likely cluster
plot(x = res_mnp, type = "schema", system_conv = "+init=epsg:2154", only.MLC = TRUE)

plot of chunk unnamed-chunk-14

plot(x = res_mnp, type = "map", spobject = map_sites, only.MLC = TRUE)

plot of chunk unnamed-chunk-14

plot(x = res_mnp, type = "map2", spobject = map_sites, only.MLC = TRUE)

plot of chunk unnamed-chunk-14

# characteristics
summary(res_mnp, type_summ = "nparam", only.MLC = TRUE)
## $basic_summary
##         Cluster 1
## p-value     0.001
## Radius      9.999
## 
## $complete_summary
##                 Overall Inside cluster 1 Outside cluster 1
## Number of sites 169.000           12.000           157.000
## Q25 NO2           8.673           11.327             8.635
## Median NO2        9.183           11.721             9.075
## Q75 NO2           9.848           12.382             9.692
## Q25 O3           66.778           67.527            66.721
## Median O3        67.895           67.609            67.961
## Q75 O3           68.564           67.922            68.658
## Q25 PM10         16.397           17.483            16.205
## Median PM10      16.970           17.877            16.933
## Q75 PM10         17.372           17.962            17.266
## Q25 PM2.5         9.132           10.584             9.113
## Median PM2.5      9.833           10.678             9.790
## Q75 PM2.5        10.213           10.919            10.107
plotSummary(res_mnp, type = "median", only.MLC = TRUE)

plot of chunk unnamed-chunk-16

The computation of this spatial scan statistic (MNP) took 1.89 min

Apply a functional univariate spatial scan statistic

###########################################################
#  Apply a functional univariate spatial scan statistic   #
###########################################################

time_start <- Sys.time()
res_urbfss <- SpatialScan(method = "URBFSS", data = funi_data, sites_coord = coords, system = "WGS84", mini = 1, maxi = nrow(coords)/2, type_minimaxi = "sites/indiv", mini_post = 0, maxi_post = 10, type_minimaxi_post = "radius", nbCPU = 7)$URBFSS
## ##### Univariate rank-based functional scan statistic ##### 
## Computation of the scan statistic 
## ---Done--- 
## Estimation of the statistical significance 
## ---Done--- 
## Finalization 
## ======================================================
time_stop <- Sys.time()

# print
print(res_urbfss)
## URBFSS scan procedure 
## ####################### 
## 3 significant clusters have been detected by the scan procedure with p-values of 0.001, 0.002, 0.015
# see the location of the most likely cluster
plot(res_urbfss, type = "map2", spobject = map_sites, only.MLC = TRUE)

plot of chunk unnamed-chunk-17

# characteristics
summary(res_urbfss, type_summ = "nparam", only.MLC = TRUE)
## $basic_summary
##         Cluster 1
## p-value     0.001
## Radius      9.999
## 
## $complete_summary
##                 Overall Inside cluster 1 Outside cluster 1
## Number of sites 169.000           15.000           154.000
## Q25               6.909            8.742             6.811
## Median            9.036           11.194             8.844
## Q75              11.575           13.981            11.323
plotCurves(res_urbfss, add_median = TRUE, only.MLC = TRUE)

plot of chunk unnamed-chunk-19

plotSummary(res_urbfss, type = "median", only.MLC = TRUE)

plot of chunk unnamed-chunk-20

The computation of this spatial scan statistic (URBFSS) took 2.54 min

Apply a functional multivariate spatial scan statistic

#############################################################
#  Apply a functional multivariate spatial scan statistic   #
#############################################################

time_start <- Sys.time()
res_mrbfss <- SpatialScan(method = "MRBFSS", data = fmulti_data, sites_coord = coords, system = "WGS84", mini = 1, maxi = nrow(coords)/2, type_minimaxi = "sites/indiv", mini_post = 0, maxi_post = 10, type_minimaxi_post = "radius", nbCPU = 7, variable_names = c("NO2", "O3", "PM10", "PM2.5"))$MRBFSS
## ##### Multivariate rank-based functional scan statistic ##### 
## Computation of the scan statistic 
## ---Done--- 
## Estimation of the statistical significance 
## ---Done--- 
## Finalization 
## ======================================================
time_stop <- Sys.time()

# print
print(res_mrbfss)
## MRBFSS scan procedure 
## ####################### 
## 10 significant clusters have been detected by the scan procedure with p-values of 0.001, 0.001, 0.001, 0.001, 0.001, 0.005, 0.006, 0.032, 0.039, 0.042
# see the location of the most likely cluster
plot(res_mrbfss, type = "map2", spobject = map_sites, only.MLC = TRUE)

plot of chunk unnamed-chunk-21

# characteristics
summary(res_mrbfss, type_summ = "nparam", only.MLC = TRUE)
## $basic_summary
##         Cluster 1
## p-value     0.001
## Radius      9.999
## 
## $complete_summary
##                 Overall Inside cluster 1 Outside cluster 1
## Number of sites 169.000           15.000           154.000
## Q25 NO2           6.909            8.742             6.811
## Median NO2        9.036           11.194             8.844
## Q75 NO2          11.575           13.981            11.323
## Q25 O3           58.800           57.459            58.905
## Median O3        66.787           66.247            66.853
## Q75 O3           74.723           74.924            74.698
## Q25 PM10         12.131           13.788            12.070
## Median PM10      16.092           17.565            16.014
## Q75 PM10         19.775           20.754            19.632
## Q25 PM2.5         6.212            7.566             6.129
## Median PM2.5      8.308            9.069             8.211
## Q75 PM2.5        10.926           12.140            10.829
plotCurves(res_mrbfss, add_median = TRUE, only.MLC = TRUE)

plot of chunk unnamed-chunk-23plot of chunk unnamed-chunk-23plot of chunk unnamed-chunk-23plot of chunk unnamed-chunk-23

plotSummary(res_mrbfss, type = "median", only.MLC = TRUE)

plot of chunk unnamed-chunk-24plot of chunk unnamed-chunk-24plot of chunk unnamed-chunk-24plot of chunk unnamed-chunk-24

The computation of this spatial scan statistic (MRBFSS) took 35.28 min

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Gentoo/Linux
## 
## Matrix products: default
## BLAS:   /usr/lib64/libblas.so.3.10.0
## LAPACK: /usr/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C         LC_TIME=C           
##  [4] LC_COLLATE=C         LC_MONETARY=C        LC_MESSAGES=C       
##  [7] LC_PAPER=C           LC_NAME=C            LC_ADDRESS=C        
## [10] LC_TELEPHONE=C       LC_MEASUREMENT=C     LC_IDENTIFICATION=C 
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] sp_1.4-6            RColorBrewer_1.1-2  dplyr_1.0.9        
## [4] HDSpatialScan_1.0.3
## 
## loaded via a namespace (and not attached):
##  [1] xfun_0.31          tidyselect_1.1.1   purrr_0.3.4       
##  [4] pbapply_1.5-0      sf_1.0-4           lattice_0.20-41   
##  [7] vctrs_0.4.1        generics_0.1.1     testthat_3.1.0    
## [10] htmltools_0.5.2    utf8_1.2.2         blob_1.2.2        
## [13] rlang_1.0.3        e1071_1.7-9        pillar_1.6.4      
## [16] glue_1.6.2         withr_2.5.0        DBI_1.1.1         
## [19] fmsb_0.7.0         matrixStats_0.61.0 lifecycle_1.0.1   
## [22] stringr_1.4.0      rgeos_0.5-8        htmlwidgets_1.5.3 
## [25] SpatialNP_1.1-4    evaluate_0.15      knitr_1.39        
## [28] fastmap_1.1.0      parallel_4.0.5     class_7.3-18      
## [31] markdown_1.1       fansi_0.5.0        highr_0.9         
## [34] Rcpp_1.0.7         KernSmooth_2.23-18 DT_0.16           
## [37] classInt_0.4-3     plotrix_3.7-8      desc_1.4.0        
## [40] pkgload_1.1.0      mime_0.12          swfscMisc_1.4.3   
## [43] mapdata_2.3.0      TeachingDemos_2.12 digest_0.6.28     
## [46] stringi_1.7.5      grid_4.0.5         rprojroot_2.0.2   
## [49] rgdal_1.5-28       cli_3.3.0          tools_4.0.5       
## [52] magrittr_2.0.3     maps_3.4.0         proxy_0.4-26      
## [55] tibble_3.1.6       crayon_1.4.2       pkgconfig_2.0.3   
## [58] ellipsis_0.3.2     assertthat_0.2.1   rstudioapi_0.13   
## [61] R6_2.5.1           units_0.7-2        compiler_4.0.5