From 2af868e78dea1fb3ce7f9f21414113869c0afcd4 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 6 Jan 2025 14:47:21 +0100 Subject: [PATCH 01/14] =?UTF-8?q?fix:=20correction=20du=20echarts4r=20qui?= =?UTF-8?q?=20ne=20groupait=20plus=20par=20ann=C3=A9e=20automatiquement?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- maquette_espece.qmd | 3 ++- maquette_espece_page.qmd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/maquette_espece.qmd b/maquette_espece.qmd index 6a4167a..5b600b6 100644 --- a/maquette_espece.qmd +++ b/maquette_espece.qmd @@ -182,6 +182,7 @@ vec_col = c(colfunc(length(unique(df_sp$annee))-1), "#f40b0b") ### Phénologie ```{r} aes_echarts(plot_e = df_freq_rel %>% + group_by(annee) %>% e_charts(num_semaine) %>% e_line(freq_rel, symbol='none'), xlab = "Semaine de participation", @@ -213,7 +214,7 @@ histo_grega(df_grega = df_gregarite, ### Indice de grégarité par espèce ```{r} -names_grega = unique(df_gregarite_all %>% arrange(classif) %>% select(nom_espece) %>% as.vector()) +names_grega = unique(df_gregarite_all %>% arrange(classif) %>% select(nom_espece) %>% as.data.frame()) color_txt = rep("black", length(names_grega$nom_espece)) position_sp = which(unique(names_grega$nom_espece) == sp_name) color_txt[position_sp] = "red" diff --git a/maquette_espece_page.qmd b/maquette_espece_page.qmd index c1bb55d..be5c3d6 100644 --- a/maquette_espece_page.qmd +++ b/maquette_espece_page.qmd @@ -134,7 +134,7 @@ carte_point_jardin(france = france, df_jp = df_jardin_point, strftime(max(df_sp$date_collection), "%Y"))) ``` -* **Titre** : Carte des jardins participants à l’Opéraion papillons ayant observé l’espèce au moins une fois +* **Titre** : Carte des jardins participants à l’Opération papillons ayant observé l’espèce au moins une fois * **Explication** : Où observe-t-on cette espèce ? Chaque point sur la carte représente un jardin participant à l’Opération papillons. Les points rouges représentent les jardins où l’espèce n’a pas été observée, et les points verts représentent les jardins ayant observé au moins une fois cette espèce. @@ -212,6 +212,7 @@ vec_col = c(colfunc(length(unique(df_sp$annee))-1), "#f40b0b") ### Phénologie ```{r} aes_echarts(plot_e = df_freq_rel %>% + group_by(annee) %>% e_charts(num_semaine) %>% e_line(freq_rel, symbol='none'), xlab = "Semaine de participation", -- GitLab From b58bc0a96cc098780b7a2ef21edd0ba15c2c7c81 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 6 Jan 2025 14:47:59 +0100 Subject: [PATCH 02/14] refactor: suppression des warnings de dplyr::summarise en ajoutant .groups = 'drop' --- fonctions/create_df_one_sp.R | 49 ++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/fonctions/create_df_one_sp.R b/fonctions/create_df_one_sp.R index c68b934..0b6fb45 100644 --- a/fonctions/create_df_one_sp.R +++ b/fonctions/create_df_one_sp.R @@ -68,8 +68,7 @@ nb_jardin_obs = length(unique(df_sp_ab$jardin_id)) # Abondance maximale (calculée en groupant sur les années et les départements) nb_max_ab = df_sp %>% group_by(annee, nom_departement, nom_region) %>% - summarise(sum_ab = sum(abondance)) %>% - ungroup() %>% + summarise(sum_ab = sum(abondance), .groups = 'drop') %>% filter(sum_ab == max(sum_ab)) %>% as.data.frame() @@ -84,7 +83,7 @@ df_dep = df_sp %>% group_by(dept_code) %>% summarise(n = sum(abondance), nb_participation = n_distinct(participation_id), - nb_jard = n_distinct(jardin_id)) %>% + nb_jard = n_distinct(jardin_id), .groups = 'drop') %>% mutate(ab_moy = n/nb_jard, ab_rel = n/nb_participation, cl_ab = case_when(n == 0 ~ "0", @@ -127,7 +126,8 @@ nb_idv_cpt = sum(df_sp_ab$abondance) df_repartition = df_all_sp %>% group_by(nom_espece) %>% summarise(sum_ab = sum(abondance), - rel_ab = sum(abondance)/sum(df_all_sp$abondance)) %>% + rel_ab = sum(abondance)/sum(df_all_sp$abondance), + .groups = 'drop') %>% arrange(sum_ab) %>% mutate(couleur = c(rep("#3138cc", 10), rep("#6893fc", 9), rep("#90d3ff", 9)), couleur = if_else(nom_espece == sp_name, color_flag, couleur)) @@ -144,7 +144,8 @@ df_repartition = df_all_sp %>% df_nb_obs_date <- df_sp %>% mutate(date = as.Date(date_collection)) %>% group_by(date) %>% - summarise(n = n()) %>% + summarise(n = n(), + .groups = 'drop') %>% arrange(date) #----- Abondance moyenne par département -----# @@ -153,7 +154,8 @@ df_nb_obs_date <- df_sp %>% df_dep_y = df_sp %>% group_by(dept_code, annee) %>% summarise(n = sum(abondance), - nb_jard = n_distinct(jardin_id)) %>% + nb_jard = n_distinct(jardin_id), + .groups = 'drop') %>% mutate(ab_moy = n/nb_jard, cl_ab = case_when(n == 0 ~ "0", n >= 1 & n <= 25 ~ "1-25", @@ -179,13 +181,15 @@ cat_carte_moy = c("0", "0-1", "2-5", "6-10", "+ de 10") nb_part_par_sem = df_all_sp %>% mutate(num_semaine = as.integer(num_semaine)) %>% group_by(annee, num_semaine) %>% - summarise(nb_part = n_distinct(participation_id)) + summarise(nb_part = n_distinct(participation_id), + .groups = 'drop') # Abondance relative df_ab_rel <- df_sp %>% mutate(num_semaine = as.integer(num_semaine)) %>% group_by(annee, num_semaine, date_collection) %>% - summarise(sum_ab = sum(abondance)) %>% # Somme des abondances + summarise(sum_ab = sum(abondance), + .groups = 'drop') %>% # Somme des abondances left_join(nb_part_par_sem, by = c("annee" = "annee", "num_semaine" = "num_semaine")) %>% mutate(sum_ab_rel = sum_ab/nb_part) %>% # Division par le nombre de participations @@ -200,7 +204,8 @@ df_ab_rel <- df_sp %>% df_freq_rel <- df_sp_ab %>% mutate(num_semaine = as.integer(num_semaine)) %>% group_by(annee, num_semaine) %>% - summarise(sum_obs = n()) %>% # Somme des observations + summarise(sum_obs = n(), + .groups = 'drop') %>% # Somme des observations full_join(nb_part_par_sem, by = c("annee" = "annee", "num_semaine" = "num_semaine")) %>% mutate(freq_rel = if_else(is.na(sum_obs), 0, sum_obs/nb_part)) %>% # Division par le nombre de participations @@ -211,7 +216,7 @@ df_date_wm = df_sp %>% filter(abondance !=0, annee != strftime(Sys.Date()+365/2, "%Y")) %>% mutate(semaine = as.integer(strftime(date_collection, '%V'))) %>% group_by(annee) %>% - summarise(sum_sp = weighted.mean(semaine, abondance)) + summarise(sum_sp = weighted.mean(semaine, abondance), .groups = 'drop') df_date_wm_sqrt = df_sp %>% filter(abondance !=0, annee != strftime(Sys.Date()+365/2, "%Y")) %>% @@ -220,7 +225,7 @@ df_date_wm_sqrt = df_sp %>% mutate(minus = abondance*((semaine - sum_sp)^2) ) %>% group_by(annee, sum_sp) %>% summarise(sum_minus = sum(minus), - n = n()) %>% + n = n(), .groups = 'drop') %>% mutate(rmse = sqrt(sum_minus/n)) ######################################### @@ -231,7 +236,8 @@ df_date_wm_sqrt = df_sp %>% df_moyenne_greg = df_all_sp %>% filter(abondance!= 0) %>% group_by(nom_espece) %>% - summarise(m_abn = mean(abondance), n = n()) %>% + summarise(m_abn = mean(abondance), n = n(), + .groups = 'drop') %>% mutate(sd = 1.96*sqrt(m_abn/n)) %>% arrange(desc(m_abn)) %>% as.data.frame() @@ -251,7 +257,7 @@ df_gregarite_all = df_all_sp %>% mutate(ab_grega = factor(if_else(abondance == 1, "1 individu", "+ de 1 individu"), levels = c("1 individu", "+ de 1 individu"))) %>% group_by(nom_espece, ab_grega) %>% - summarise(n = n()) %>% + summarise(n = n(), .groups = 'drop') %>% group_by(nom_espece) %>% mutate(sum_n = sum(n)) %>% ungroup() %>% @@ -302,7 +308,7 @@ lst_param = list(bois, champ, prairie, environnement) # Df des jardins positionnés sur la carte df_jardin_point = df_sp %>% group_by(jardin_id, latitude, longitude) %>% - summarise(sum_ab = sum(abondance)) %>% + summarise(sum_ab = sum(abondance), .groups = 'drop') %>% filter(!is.na(latitude)) %>% mutate(Présence = if_else(sum_ab == 0, "Espèce non observée", "Espèce observée"), alpha = if_else(sum_ab == 0, 0.7, 1)) %>% @@ -315,16 +321,15 @@ bary_function <- function(df, df <- df %>% group_by(!!!syms(gb1)) %>% # On groupe selon les paramètres de gb1 - summarise(sum_ab = sum(abondance)) %>% + summarise(sum_ab = sum(abondance), .groups = 'drop') %>% filter(!is.na(latitude)) %>% - ungroup() %>% mutate(lat_pond = latitude*sum_ab, # Calcul des latitudes et long_pond = longitude*sum_ab) %>% # longitudes pondérées group_by(!!!syms(gb2)) %>% # On groupe selon les paramètres de gb2 # Pour chaque année, on somme les latitudes et longitudes pondérées # et l'abondance totale summarise(across(matches("*_pond"), \(x) sum(x, na.rm = TRUE)), - across(matches("sum_ab"), sum)) %>% + across(matches("sum_ab"), sum), .groups = 'drop') %>% # On divise la latitude et la longitude pondérée de chaque année par # la pondération (donc la somme des abondances) mutate(latitude = lat_pond/sum_ab, @@ -336,14 +341,14 @@ bary_function <- function(df, # Df barycentre de tous les jardins chaque année df_bary_base<- df_all_sp %>% group_by(annee, jardin_id, latitude, longitude) %>% - summarise(sum_ab = n()) %>% + summarise(sum_ab = n(), .groups = 'drop') %>% filter(!is.na(latitude)) %>% ungroup() %>% mutate(lat_pond = latitude*sum_ab, long_pond = longitude*sum_ab) %>% group_by(annee) %>% summarise(across(matches("*_pond"), \(x) sum(x, na.rm = TRUE)), - across(matches("sum_ab"), sum)) %>% + across(matches("sum_ab"), sum), .groups = 'drop') %>% mutate(latitude = lat_pond/sum_ab, longitude = long_pond/sum_ab, nom_espece = "Jardins", @@ -413,7 +418,7 @@ vec_name = c(sp_name, (df_oui %>% arrange(desc(corr)))$nom[1:5]) df_coocc = df_all_sp %>% filter(nom_espece %in% vec_name) %>% group_by(nom_espece, an_sem) %>% - summarise(sum_ab = sum(abondance)) %>% + summarise(sum_ab = sum(abondance), .groups = 'drop') %>% group_by(nom_espece) %>% mutate(sum_sp = sum(sum_ab), sum_ab_norm = sum_ab/sum_sp, @@ -425,7 +430,7 @@ df_coocc = df_all_sp %>% df_nbsp_all = df_all_sp %>% filter(abondance != 0) %>% group_by(nom_espece) %>% - summarise(n = n()) + summarise(n = n(), .groups = 'drop') if (file.exists("data/rdata/df_heatmap.rds") & Sys.Date()-as.Date(file.info("data/rdata/df_heatmap.rds")$ctime) <= 1) { @@ -469,7 +474,7 @@ df_histo_test = df_all_sp %>% abondance > 9 ~ "+ de 10"), levels = c("1", "2 à 4", "5 à 9", "+ de 10"))) %>% group_by(nom_espece, ab_grega) %>% - summarise(n = n()) %>% + summarise(n = n(), .groups = 'drop') %>% group_by(nom_espece) %>% mutate(sum_n = sum(n)) %>% ungroup() %>% -- GitLab From 0e2d26a1a58636fd436895cf6109be02f9bfa5a5 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Wed, 12 Feb 2025 16:17:16 +0100 Subject: [PATCH 03/14] refactor: rename df_all_sp into df_opj --- fonctions/create_df_one_sp.R | 245 ++++++++++++++---- .../{create_df_all_sp.R => create_df_opj.R} | 12 +- maquette_espece_page.qmd | 5 +- 3 files changed, 200 insertions(+), 62 deletions(-) rename fonctions/{create_df_all_sp.R => create_df_opj.R} (71%) diff --git a/fonctions/create_df_one_sp.R b/fonctions/create_df_one_sp.R index 0b6fb45..b6876eb 100644 --- a/fonctions/create_df_one_sp.R +++ b/fonctions/create_df_one_sp.R @@ -22,33 +22,73 @@ if (!exists("sp_name")) { sp_name = "Amaryllis" } +if (!exists("is.histo")) { + is.histo = FALSE +} + ######################################### #---------- Dataframe initial ----------# ######################################### # Df de toutes les espèces -if (!exists("df_all_sp")) { - source("fonctions/create_df_all_sp.R") +if (!exists("df_opj")) { + source("fonctions/create_df_opj.R") } -df_all_sp = df_all_sp %>% +df_opj = df_opj %>% filter(!is.na(dept_code), # suppression des départements nuls str_length(dept_code)==2, # suppression des drom-com annee >= 2019, nom_espece %in% liste_principale) %>% # suppression des données avant 2019 - mutate(an_sem = if_else(as.numeric(num_semaine) < 10, + mutate(date = as.Date(date_collection), + an_sem = if_else(as.numeric(num_semaine) < 10, paste0(annee, "-S0", num_semaine), paste0(annee, "-S", num_semaine))) %>% left_join(reg_dep, by = c("dept_code" = "code_departement")) # ajout des départements # Df de l'historique -if (file.exists("data/history/donnees_opj_hebdo_ok.csv")) { - df_history = read.csv2(file = "data/history/donnees_opj_hebdo_ok.csv", - header = F, encoding = "latin-1") +if (is.histo) { + df_old_data = readRDS(file = "data/rdata/df_old_data.rds") + df_opj = bind_rows(df_opj, df_old_data) + rm("df_old_data") + + df_opj_new = df_opj %>% + filter(annee >= 2019) + + df_opj_old = df_opj %>% + filter(annee < 2019) + + df_sp_new = df_opj_new %>% + filter(nom_espece == sp_name) + + df_sp_old = df_opj_old %>% + filter(nom_espece == sp_name) + + df_sp_ab_new = df_sp_new %>% + filter(abondance != 0) + + df_sp_ab_old = df_sp_old %>% + filter(abondance != 0) +}else{ + df_opj_new = df_opj + + df_opj_old = df_opj + + df_sp_new = df_opj %>% + filter(nom_espece == sp_name) + + df_sp_old = df_opj %>% + filter(nom_espece == sp_name) + + df_sp_ab_new = df_sp %>% + filter(abondance != 0) + + df_sp_ab_old = df_sp %>% + filter(abondance != 0) } # Df d'une espèce -df_sp = df_all_sp %>% +df_sp = df_opj %>% filter(nom_espece == sp_name) df_sp_ab = df_sp %>% @@ -62,7 +102,7 @@ france <- read_sf(paste0("carte/contour-des-departements.geojson")) ######################################### # Nombre de jardins participant aux observations -nb_jardin = length(unique(df_all_sp$jardin_id)) +nb_jardin = length(unique(df_opj$jardin_id)) # Nombre de jardins où un individu a été observé nb_jardin_obs = length(unique(df_sp_ab$jardin_id)) # Abondance maximale (calculée en groupant sur les années et les départements) @@ -78,35 +118,57 @@ nb_max_ab = df_sp %>% #----- Carte d'abondance -----# -# Df abondance sur toutes les données -df_dep = df_sp %>% - group_by(dept_code) %>% - summarise(n = sum(abondance), - nb_participation = n_distinct(participation_id), - nb_jard = n_distinct(jardin_id), .groups = 'drop') %>% - mutate(ab_moy = n/nb_jard, - ab_rel = n/nb_participation, - cl_ab = case_when(n == 0 ~ "0", - n > 0 & n <= 50 ~ "1-50", - n > 50 & n <= 100 ~ "51-100", - n > 100 & n <= 300 ~ "101-300", - n > 300 & n <= 500 ~ "301-500", - n > 500 ~ "+ de 500"), - cl_moy = case_when(ab_moy == 0 ~ "0", - ab_moy > 0 & ab_moy <= 2 ~ "1-2", - ab_moy > 2 & ab_moy <= 5 ~ "3-5", - ab_moy > 5 & ab_moy <= 10 ~ "6-10", - ab_moy > 10 ~ "+ de 10"), - cl_qual = case_when(ab_rel == 0 ~ "Pas de détection", - ab_rel > 0 & ab_rel <= 0.2 ~ "Peu abondant", - ab_rel > 0.2 & ab_rel <= 0.4 ~ "Abondant", - ab_rel > 0.4 & ab_rel <= 0.6 ~ "Très abondant", - ab_rel > 0.6 ~ "Extrêmement abondant")) +# Df abondance sur toutes les données (all_data) +# fct df abondance +fct_df_abondance <- function(df){ + return(df_abondance <- df %>% + filter(!is.na(jardin_id)) %>% + group_by(dept_code) %>% + summarise(n = sum(abondance), + nb_participation = n_distinct(participation_id), + nb_jard = n_distinct(jardin_id), + nb_j_nul = sum(sapply(split(abondance, jardin_id), + function(x) all(x == 0))), + nb_j_non_nul = sum(sapply(split(abondance, jardin_id), + function(x) any(x > 0))), + .groups = 'drop') %>% + mutate(ab_moy = n/nb_jard, + ab_rel = n/nb_participation, + prc_vu = nb_j_non_nul / nb_jard, + cl_ab = case_when(n == 0 ~ "0", + n > 0 & n <= 50 ~ "1-50", + n > 50 & n <= 100 ~ "51-100", + n > 100 & n <= 300 ~ "101-300", + n > 300 & n <= 500 ~ "301-500", + n > 500 ~ "+ de 500"), + cl_moy = case_when(ab_moy == 0 ~ "0", + ab_moy > 0 & ab_moy <= 2 ~ "1-2", + ab_moy > 2 & ab_moy <= 5 ~ "3-5", + ab_moy > 5 & ab_moy <= 10 ~ "6-10", + ab_moy > 10 ~ "+ de 10"), + cl_qual = case_when(ab_rel == 0 ~ "Pas de détection", + ab_rel > 0 & ab_rel <= 0.2 ~ "Peu abondant", + ab_rel > 0.2 & ab_rel <= 0.4 ~ "Abondant", + ab_rel > 0.4 & ab_rel <= 0.6 ~ "Très abondant", + ab_rel > 0.6 ~ "Extrêmement abondant"), + cl_jard = case_when(prc_vu == 0 ~ "0%", + prc_vu > 0 & prc_vu <= 0.2 ~ "0%-20%", + prc_vu > 0.2 & prc_vu <= 0.4 ~ "20%-40%", + prc_vu > 0.4 & prc_vu <= 0.6 ~ "40%-60%", + prc_vu > 0.6 ~ "60%-100%")) ) +} + +df_dep = fct_df_abondance(df = df_sp) +df_dep_old = fct_df_abondance(df = df_sp_old) +df_dep_new = fct_df_abondance(df = df_sp_new) df_dep = df_dep[c(1:6, 29:30, 7:28, 31:96),] +df_dep_old = df_dep_old[c(1:6, 29:30, 7:28, 31:96),] +df_dep_new = df_dep_new[c(1:6, 29:30, 7:28, 31:96),] cat_carte_all = c("0", "1-50", "51-100", "101-300", "301-500", "+ de 500") cat_carte_all_moy = c("0", "1-2", "3-5", "6-10", "+ de 10") +cat_carte_jard = c("0%", "0%-20%", "20%-40%", "40%-60%", "60%-100%") cat_carte_tendance_moy = c("Pas de détection", "Peu abondant", "Abondant", "Très abondant", "Extrêmement abondant") couleurs = c("#7f7f7f", "#ffef6c", "#f7b905", "#ff7400", "#ff0000", "#950000") @@ -122,11 +184,31 @@ nb_idv_cpt = sum(df_sp_ab$abondance) #----- Graphiques -----# -# Df abondance par espèce -df_repartition = df_all_sp %>% +# Df abondance par espèce (post 2019) +df_repartition = df_opj_new %>% + group_by(nom_espece) %>% + summarise(sum_ab = sum(abondance), + rel_ab = sum(abondance)/sum(df_opj_new$abondance), + .groups = 'drop') %>% + arrange(sum_ab) %>% + mutate(couleur = c(rep("#3138cc", 10), rep("#6893fc", 9), rep("#90d3ff", 9)), + couleur = if_else(nom_espece == sp_name, color_flag, couleur)) + +# Df abondance par espèce (old data) +df_repartition_old = df_opj_old %>% group_by(nom_espece) %>% summarise(sum_ab = sum(abondance), - rel_ab = sum(abondance)/sum(df_all_sp$abondance), + rel_ab = sum(abondance)/sum(df_opj_old$abondance), + .groups = 'drop') %>% + arrange(sum_ab) %>% + mutate(couleur = c(rep("#3138cc", 10), rep("#6893fc", 9), rep("#90d3ff", 9)), + couleur = if_else(nom_espece == sp_name, color_flag, couleur)) + +# Df abondance par espèce (all data) +df_repartition_new_old = df_opj %>% + group_by(nom_espece) %>% + summarise(sum_ab = sum(abondance), + rel_ab = sum(abondance)/sum(df_opj$abondance), .groups = 'drop') %>% arrange(sum_ab) %>% mutate(couleur = c(rep("#3138cc", 10), rep("#6893fc", 9), rep("#90d3ff", 9)), @@ -142,7 +224,6 @@ df_repartition = df_all_sp %>% # Semaine df_nb_obs_date <- df_sp %>% - mutate(date = as.Date(date_collection)) %>% group_by(date) %>% summarise(n = n(), .groups = 'drop') %>% @@ -178,16 +259,31 @@ cat_carte_moy = c("0", "0-1", "2-5", "6-10", "+ de 10") #----- Indicateurs relatifs -----# # Calcul du nombre de participations sur toute l'opération par semaine -nb_part_par_sem = df_all_sp %>% +nb_part_par_sem = df_opj %>% + mutate(num_semaine = as.integer(num_semaine)) %>% + group_by(annee, num_semaine) %>% + summarise(nb_part = n_distinct(participation_id), + .groups = 'drop') + +# New +nb_part_par_sem_new = df_opj_new %>% + mutate(num_semaine = as.integer(num_semaine)) %>% + group_by(annee, num_semaine) %>% + summarise(nb_part = n_distinct(participation_id), + .groups = 'drop') + +# Old +nb_part_par_sem_old = df_opj_old %>% mutate(num_semaine = as.integer(num_semaine)) %>% group_by(annee, num_semaine) %>% summarise(nb_part = n_distinct(participation_id), .groups = 'drop') # Abondance relative +# All data df_ab_rel <- df_sp %>% mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine, date_collection) %>% + group_by(annee, num_semaine, date) %>% summarise(sum_ab = sum(abondance), .groups = 'drop') %>% # Somme des abondances left_join(nb_part_par_sem, by = c("annee" = "annee", @@ -201,6 +297,7 @@ df_ab_rel <- df_sp %>% ungroup() # Phénologie +# All data df_freq_rel <- df_sp_ab %>% mutate(num_semaine = as.integer(num_semaine)) %>% group_by(annee, num_semaine) %>% @@ -211,16 +308,38 @@ df_freq_rel <- df_sp_ab %>% mutate(freq_rel = if_else(is.na(sum_obs), 0, sum_obs/nb_part)) %>% # Division par le nombre de participations arrange(num_semaine) +# New data +df_freq_rel_new <- df_sp_ab_new %>% + mutate(num_semaine = as.integer(num_semaine)) %>% + group_by(annee, num_semaine) %>% + summarise(sum_obs = n(), + .groups = 'drop') %>% # Somme des observations + full_join(nb_part_par_sem_new, by = c("annee" = "annee", + "num_semaine" = "num_semaine")) %>% + mutate(freq_rel = if_else(is.na(sum_obs), 0, sum_obs/nb_part)) %>% # Division par le nombre de participations + arrange(num_semaine) + +# Old data +df_freq_rel_old <- df_sp_ab_old %>% + mutate(num_semaine = as.integer(num_semaine)) %>% + group_by(annee, num_semaine) %>% + summarise(sum_obs = n(), + .groups = 'drop') %>% # Somme des observations + full_join(nb_part_par_sem_old, by = c("annee" = "annee", + "num_semaine" = "num_semaine")) %>% + mutate(freq_rel = if_else(is.na(sum_obs), 0, sum_obs/nb_part)) %>% # Division par le nombre de participations + arrange(num_semaine) + # Présence moyenne df_date_wm = df_sp %>% filter(abondance !=0, annee != strftime(Sys.Date()+365/2, "%Y")) %>% - mutate(semaine = as.integer(strftime(date_collection, '%V'))) %>% + mutate(semaine = as.integer(strftime(date, '%V'))) %>% group_by(annee) %>% summarise(sum_sp = weighted.mean(semaine, abondance), .groups = 'drop') df_date_wm_sqrt = df_sp %>% filter(abondance !=0, annee != strftime(Sys.Date()+365/2, "%Y")) %>% - mutate(semaine = as.integer(strftime(date_collection, '%V'))) %>% + mutate(semaine = as.integer(strftime(date, '%V'))) %>% left_join(df_date_wm, by = c("annee" = "annee")) %>% mutate(minus = abondance*((semaine - sum_sp)^2) ) %>% group_by(annee, sum_sp) %>% @@ -233,7 +352,7 @@ df_date_wm_sqrt = df_sp %>% ######################################### # Moyenne d'abondance -df_moyenne_greg = df_all_sp %>% +df_moyenne_greg = df_opj %>% filter(abondance!= 0) %>% group_by(nom_espece) %>% summarise(m_abn = mean(abondance), n = n(), @@ -252,7 +371,7 @@ df_gregarite = data.frame(nb_idv = as.numeric(names(summary(as.factor(df_sp_ab$a nb_idv >= 10 ~ "10 et +")) # Toutes les espèces -df_gregarite_all = df_all_sp %>% +df_gregarite_all = df_opj %>% filter(abondance!= 0) %>% mutate(ab_grega = factor(if_else(abondance == 1, "1 individu", "+ de 1 individu"), levels = c("1 individu", "+ de 1 individu"))) %>% @@ -305,8 +424,26 @@ lst_param = list(bois, champ, prairie, environnement) #----- Position + barycentre -----# -# Df des jardins positionnés sur la carte -df_jardin_point = df_sp %>% +# Df des jardins positionnés sur la carte (new data) +df_jardin_point = df_sp_new %>% + group_by(jardin_id, latitude, longitude) %>% + summarise(sum_ab = sum(abondance), .groups = 'drop') %>% + filter(!is.na(latitude)) %>% + mutate(Présence = if_else(sum_ab == 0, "Espèce non observée", "Espèce observée"), + alpha = if_else(sum_ab == 0, 0.7, 1)) %>% + arrange(Présence) + +# Df des jardins positionnés sur la carte (old data) +df_jardin_point_old = df_sp_old %>% + group_by(jardin_id, latitude, longitude) %>% + summarise(sum_ab = sum(abondance), .groups = 'drop') %>% + filter(!is.na(latitude)) %>% + mutate(Présence = if_else(sum_ab == 0, "Espèce non observée", "Espèce observée"), + alpha = if_else(sum_ab == 0, 0.7, 1)) %>% + arrange(Présence) + +# Df des jardins positionnés sur la carte (all data) +df_jardin_point_new_old = df_sp %>% group_by(jardin_id, latitude, longitude) %>% summarise(sum_ab = sum(abondance), .groups = 'drop') %>% filter(!is.na(latitude)) %>% @@ -339,7 +476,7 @@ bary_function <- function(df, } # Df barycentre de tous les jardins chaque année -df_bary_base<- df_all_sp %>% +df_bary_base<- df_opj %>% group_by(annee, jardin_id, latitude, longitude) %>% summarise(sum_ab = n(), .groups = 'drop') %>% filter(!is.na(latitude)) %>% @@ -361,7 +498,7 @@ df_bary_one_sp <- cbind(bary_function(df = df_sp), color = "red")) # Df des barycentres pour toutes les espèces -df_bary_all_sp <- bary_function(df = df_all_sp, +df_bary_all_sp <- bary_function(df = df_opj, gb1 = c("annee", "jardin_id", "latitude", "longitude", "nom_espece"), gb2 = c("annee", "nom_espece")) %>% @@ -377,10 +514,10 @@ df_bary_all_sp <- bary_function(df = df_all_sp, #------------ Co-occurence -------------# ######################################### -df_co = df_all_sp %>% +df_co = df_opj %>% filter(participation_id %in% unique(df_sp_ab$participation_id)) -df_occurence = df_all_sp %>% +df_occurence = df_opj %>% select(participation_id, an_sem, annee, nom_espece, abondance) %>% pivot_wider(names_from = nom_espece, values_from = abondance)%>% filter(!!sym(sp_name) != 0)%>% @@ -393,7 +530,7 @@ df_oui = apply(df_occurence, 2, function(x){return(length(which(x=="OUI"))/lengt df_oui = sort(round(df_oui*100, digits = 2), decreasing = TRUE) df_oui = data.frame(nom = names(df_oui), corr = as.numeric(df_oui)) -all_names = unique(df_all_sp$nom_espece) +all_names = unique(df_opj$nom_espece) names_no_sp = all_names[-which(all_names == sp_name)] df_oui = df_oui %>% dplyr::arrange(nom) @@ -415,7 +552,7 @@ df_tab = df_oui %>% vec_name = c(sp_name, (df_oui %>% arrange(desc(corr)))$nom[1:5]) -df_coocc = df_all_sp %>% +df_coocc = df_opj %>% filter(nom_espece %in% vec_name) %>% group_by(nom_espece, an_sem) %>% summarise(sum_ab = sum(abondance), .groups = 'drop') %>% @@ -427,7 +564,7 @@ df_coocc = df_all_sp %>% relocate(nom_espece, .before = an_sem) # Nombre d'observations totales de chaque espèce -df_nbsp_all = df_all_sp %>% +df_nbsp_all = df_opj %>% filter(abondance != 0) %>% group_by(nom_espece) %>% summarise(n = n(), .groups = 'drop') @@ -440,7 +577,7 @@ if (file.exists("data/rdata/df_heatmap.rds") & df_heatmap = data.frame() for (name in rev(df_repartition$nom_espece)) { - df_tmp = df_all_sp %>% + df_tmp = df_opj %>% select(participation_id, an_sem, annee, nom_espece, abondance) %>% arrange(factor(nom_espece, levels = rev(df_repartition$nom_espece))) %>% pivot_wider(names_from = nom_espece, values_from = abondance) %>% @@ -466,7 +603,7 @@ if (file.exists("data/rdata/df_heatmap.rds") & -df_histo_test = df_all_sp %>% +df_histo_test = df_opj %>% filter(abondance!= 0) %>% mutate(ab_grega = factor(case_when(abondance == 1 ~ "1", abondance <= 4 ~ "2 à 4", diff --git a/fonctions/create_df_all_sp.R b/fonctions/create_df_opj.R similarity index 71% rename from fonctions/create_df_all_sp.R rename to fonctions/create_df_opj.R index 15cd218..3f43cf1 100644 --- a/fonctions/create_df_all_sp.R +++ b/fonctions/create_df_opj.R @@ -6,7 +6,7 @@ # # Date: 2024-03-26 # -# Script Name: fonctions/create_df_all_sp.R +# Script Name: fonctions/create_df_opj.R # # Script Description: Création du data frame opération papillons avec toutes # les espèces. Interrogation de la base de données mosaic et enregistrement @@ -30,21 +30,21 @@ source("fonctions/var.R") # ----------------------------------------------- # Mise à jour -if (!file.exists("data/rdata/df_all_sp.rds") | # Si le fichier n'existe pas OU +if (!file.exists("data/rdata/df_opj.rds") | # Si le fichier n'existe pas OU (strftime(Sys.Date(), "%A") == "lundi" & # [que la date du jour est un lundi ET - Sys.Date()-as.Date(file.info("data/rdata/df_all_sp.rds")$ctime) > 5)) { # que le fichier a plus de 5 jours] + Sys.Date()-as.Date(file.info("data/rdata/df_opj.rds")$ctime) > 5)) { # que le fichier a plus de 5 jours] # Lecture depuis la base mosaic - df_all_sp = import_from_mosaic(query = read_sql_query("SQL/export_a_plat_OPJ.sql"), + df_opj = import_from_mosaic(query = read_sql_query("SQL/export_a_plat_OPJ.sql"), database_name = "spgp") # On sauvegarde si on ne se trouve pas sur le serveur gitlab if (Sys.getenv("CI") != "true") { # Sauvegarde du df en format RDS - saveRDS(object = df_all_sp, file = "data/rdata/df_all_sp.rds") + saveRDS(object = df_opj, file = "data/rdata/df_opj.rds") } }else{ # Lecture du fichier RDS - df_all_sp = readRDS("data/rdata/df_all_sp.rds") + df_opj = readRDS("data/rdata/df_opj.rds") } diff --git a/maquette_espece_page.qmd b/maquette_espece_page.qmd index be5c3d6..8ae277c 100644 --- a/maquette_espece_page.qmd +++ b/maquette_espece_page.qmd @@ -22,6 +22,7 @@ execute: # Variables constantes sp_name = params$sp_name source("fonctions/var.R") +is.histo = FALSE ``` @@ -40,7 +41,7 @@ if (Sys.getenv("CI") != "true") { source("fonctions/function_graphics.R") source("fonctions/function_import_from_mosaic.R") # Création des data frame -source("fonctions/create_df_all_sp.R") +source("fonctions/create_df_opj.R") source("fonctions/create_df_one_sp.R") ``` @@ -83,7 +84,7 @@ Nombre de fois où au moins un individu a été observé : ## Infobulle 2 (abondance) ::: {.valuebox icon=none color="#5bbdd6"} -Les participants ont compté `r paste0(label_number(accuracy = 10)(nb_idv_cpt), " ", sp_name, " sur les ", label_number(accuracy = 10)(sum(df_all_sp$abondance)), " papillons au total.")` +Les participants ont compté `r paste0(label_number(accuracy = 10)(nb_idv_cpt), " ", sp_name, " sur les ", label_number(accuracy = 10)(sum(df_opj$abondance)), " papillons au total.")` ::: -- GitLab From f91375e63393f496042c7549f75935b4712a94ad Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Wed, 12 Feb 2025 16:17:59 +0100 Subject: [PATCH 04/14] feat: modification pour l'ajout du df historique --- fonctions/function_graphics.R | 8 +- maquette_espece.qmd | 185 ++++++++++++++++++++++++++++++++-- 2 files changed, 179 insertions(+), 14 deletions(-) diff --git a/fonctions/function_graphics.R b/fonctions/function_graphics.R index 804ce70..befe876 100644 --- a/fonctions/function_graphics.R +++ b/fonctions/function_graphics.R @@ -325,10 +325,10 @@ graph_pic <- function(df_pic, x = "annee", y = "sum_sp", ecart = "rmse", geom_hline(yintercept = 25, color = "#606060", linetype = "dashed") + geom_hline(yintercept = 38, color = "#606060", linetype = "dashed") + geom_hline(yintercept = 51, color = "#606060", linetype = "dashed") + - annotate("text", x = 2018.5, y = 5.5, label = "Hiver", color = "#234aa6") + - annotate("text", x = 2018.5, y = 18.5, label = "Printemps", color = "#5cda30") + - annotate("text", x = 2018.5, y = 31.5, label = "Été", color = "#da4c30") + - annotate("text", x = 2018.5, y = 44.5, label = "Automne", color = "#e7972a") + + annotate("text", x = 2005.5, y = 5.5, label = "Hiver", color = "#234aa6") + + annotate("text", x = 2005.5, y = 18.5, label = "Printemps", color = "#5cda30") + + annotate("text", x = 2005.5, y = 31.5, label = "Été", color = "#da4c30") + + annotate("text", x = 2005.5, y = 44.5, label = "Automne", color = "#e7972a") + geom_errorbar(aes(x=!!sym(x), ymin= !!sym(y)-!!sym(ecart), ymax=!!sym(y)+!!sym(ecart)), width=0.4, colour="#bb680e", alpha=0.9, linewidth=1.3) + geom_point(colour = "#ffa600", size = 3) + diff --git a/maquette_espece.qmd b/maquette_espece.qmd index 5b600b6..11b50cf 100644 --- a/maquette_espece.qmd +++ b/maquette_espece.qmd @@ -22,6 +22,7 @@ execute: # Variables constantes sp_name = params$sp_name source("fonctions/var.R") +is.histo = TRUE ``` @@ -42,7 +43,7 @@ if (Sys.getenv("CI") != "true") { source("fonctions/function_graphics.R") source("fonctions/function_import_from_mosaic.R") # Création des data frame -source("fonctions/create_df_all_sp.R") +source("fonctions/create_df_opj.R") source("fonctions/create_df_one_sp.R") ``` @@ -74,7 +75,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor i # Chiffres clés {orientation="columns"} -## Texte {width="40%"} +## Texte {width="20%"} ### Infobulle @@ -87,7 +88,7 @@ Nombre de fois où au moins un individu a été observé : ### Infobulle ::: {.valuebox icon=none color="#5bbdd6"} -Les participants ont compté `r paste0(label_number(accuracy = 10)(nb_idv_cpt), " ", sp_name, " sur les ", label_number(accuracy = 10)(sum(df_all_sp$abondance)), " papillons au total.")` +Les participants ont compté `r paste0(label_number(accuracy = 10)(nb_idv_cpt), " ", sp_name, " sur les ", label_number(accuracy = 10)(sum(df_opj$abondance)), " papillons au total.")` ::: ### Explication {width="60%"} @@ -96,8 +97,10 @@ Classement : `r paste0(which(rev(df_repartition$nom_espece) == sp_name), "/", le Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -## Graphiques {height="60%"} +## Graphiques {height="80%"} +:::panel-tabset +### 2019-2024 ```{r} gg_histo_plotly(df_hp = df_repartition, limits = df_repartition$nom_espece, couleur = df_repartition$couleur) @@ -105,6 +108,23 @@ gg_histo_plotly(df_hp = df_repartition, limits = df_repartition$nom_espece, # print("plotly repartition espece") ``` +### 2006-2018 +```{r} +gg_histo_plotly(df_hp = df_repartition_old, limits = df_repartition_old$nom_espece, + couleur = df_repartition_old$couleur) + +# print("plotly repartition espece") +``` + +### 2006-2024 +```{r} +gg_histo_plotly(df_hp = df_repartition_new_old, limits = df_repartition_new_old$nom_espece, + couleur = df_repartition_new_old$couleur) + +# print("plotly repartition espece") +``` +::: + # Cartographie {orientation="columns"} ## Textes {width="30%"} @@ -123,10 +143,13 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor i ## Graphiques {width="70%"} -:::panel-tabset +::::panel-tabset ### Jardins +:::panel-tabset + +#### 2019-2024 ```{r} carte_point_jardin(france = france, df_jp = df_jardin_point, title = paste0("Positions des jardins ayant participé entre ", @@ -134,28 +157,135 @@ carte_point_jardin(france = france, df_jp = df_jardin_point, strftime(max(df_sp$date_collection), "%Y"))) ``` +#### 2006-2018 +```{r} +carte_point_jardin(france = france, df_jp = df_jardin_point_old, + title = paste0("Positions des jardins ayant participé entre ", + strftime(min(df_sp$date_collection), "%Y"), " et ", + strftime(max(df_sp$date_collection), "%Y"))) +``` + +#### 2006-2024 +```{r} +carte_point_jardin(france = france, df_jp = df_jardin_point_new_old, + title = paste0("Positions des jardins ayant participé entre ", + strftime(min(df_sp$date_collection), "%Y"), " et ", + strftime(max(df_sp$date_collection), "%Y"))) +``` +::: + ### Abondance +:::panel-tabset +#### 2019-2024 ```{r} -print(carte_ab(shape_map = france, fill_map = df_dep$cl_qual, fill_color = couleurs, +print(carte_ab(shape_map = france, fill_map = df_dep_new$cl_qual, fill_color = couleurs, fill_cat = cat_carte_tendance_moy, fill_title = "Abondance relative entre 2019 et 2024", map_title = paste0("Carte de distribution de l'abondance relative de l'espèce ", sp_name))) ``` +#### 2006-2018 +```{r} +print(carte_ab(shape_map = france, fill_map = df_dep_old$cl_qual, fill_color = couleurs, + fill_cat = cat_carte_tendance_moy, fill_title = "Abondance relative entre 2006 et 2018", + map_title = paste0("Carte de distribution de l'abondance relative de l'espèce ", sp_name))) + +``` + +#### 2006-2024 +```{r} +print(carte_ab(shape_map = france, fill_map = df_dep$cl_qual, fill_color = couleurs, + fill_cat = cat_carte_tendance_moy, fill_title = "Abondance relative entre 2006 et 2024", + map_title = paste0("Carte de distribution de l'abondance relative de l'espèce ", sp_name))) + +``` + +::: + +### % jardins + +:::panel-tabset +#### 2019-2024 +```{r} +print(carte_ab(shape_map = france, fill_map = df_dep_new$cl_jard, fill_color = couleurs, + fill_cat = cat_carte_jard, fill_title = "% jardins observés entre 2019 et 2024", + map_title = paste0("Carte de distribution des jardins avec présence de l'espèce ", sp_name))) + +``` + +#### 2006-2018 +```{r} +print(carte_ab(shape_map = france, fill_map = df_dep_old$cl_jard, fill_color = couleurs, + fill_cat = cat_carte_jard, fill_title = "% jardins observés entre 2006 et 2018", + map_title = paste0("Carte de distribution des jardins avec présence de l'espèce ", sp_name))) + + +``` + +#### 2006-2024 +```{r} +print(carte_ab(shape_map = france, fill_map = df_dep$cl_jard, fill_color = couleurs, + fill_cat = cat_carte_jard, fill_title = "% jardins observés entre 2006 et 2024", + map_title = paste0("Carte de distribution des jardins avec présence de l'espèce ", sp_name))) + +``` + ::: +:::: + # Phénologie inter-annuelle ## Abondance relative +:::panel-tabset +### 2019-2024 +```{r} +#| title: "Abondance de l'espèce et pression d'échantillonnage" + +datemin = as.Date("2019-01-01") - 7 +datemax = max(df_sp$date) + 7 + +gg1 <- gg_histo(df_histo = df_ab_rel %>% filter(annee >= 2019), + y = "sum_ab_rel", dmin = datemin, dmax = datemax, + ytxt = "Abondance relative", title = "Abondance relative par semaine") + +gg2 <- gg_line(df_line = df_nb_obs_date %>% filter(date >= "2019-01-01"), + dmin = datemin, dmax = datemax, ytxt = "Sessions", + title = "Nombre de sessions d'observation par semaine") + +grid.arrange(gg1, gg2, ncol = 1, heights=c(2, 1)) + +``` + +### 2006-2018 ```{r} #| title: "Abondance de l'espèce et pression d'échantillonnage" -datemin = min(as.Date(df_sp$date_collection)) - 7 -datemax = max(as.Date(df_sp$date_collection)) + 7 +datemin = min(df_sp$date) - 7 +datemax = as.Date("2019-01-01") + 7 -gg1 <- gg_histo(df_histo = df_ab_rel %>% mutate(date = as.Date(date_collection)) , +gg1 <- gg_histo(df_histo = df_ab_rel %>% filter(annee < 2019), + y = "sum_ab_rel", dmin = datemin, dmax = datemax, + ytxt = "Abondance relative", title = "Abondance relative par semaine") + +gg2 <- gg_line(df_line = df_nb_obs_date %>% filter(date < "2019-01-01"), + dmin = datemin, dmax = datemax, ytxt = "Sessions", + title = "Nombre de sessions d'observation par semaine") + +grid.arrange(gg1, gg2, ncol = 1, heights=c(2, 1)) + +``` + +### 2006-2024 +```{r} +#| title: "Abondance de l'espèce et pression d'échantillonnage" + +datemin = min(df_sp$date) - 7 +datemax = max(df_sp$date) + 7 + +gg1 <- gg_histo(df_histo = df_ab_rel, y = "sum_ab_rel", dmin = datemin, dmax = datemax, ytxt = "Abondance relative", title = "Abondance relative par semaine") @@ -166,6 +296,8 @@ grid.arrange(gg1, gg2, ncol = 1, heights=c(2, 1)) ``` +::: + ## Texte explicatif Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. @@ -176,11 +308,43 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor i ```{r} colfunc <- colorRampPalette(c("#a4dbff", "#0b6cab")) -vec_col = c(colfunc(length(unique(df_sp$annee))-1), "#f40b0b") ``` ### Phénologie +:::panel-tabset +#### 2019-2024 +```{r} +vec_col = c(colfunc(length(unique(df_sp_new$annee))-1), "#f40b0b") +aes_echarts(plot_e = df_freq_rel_new %>% + group_by(annee) %>% + e_charts(num_semaine) %>% + e_line(freq_rel, symbol='none'), + xlab = "Semaine de participation", + ylab = "Fréquence relative", + title = "Nombre d'observations par jardin chaque semaine selon les années", + line_color = vec_col) + +# print("Echarts phénologie") +``` + +#### 2006-2018 ```{r} +vec_col = c(colfunc(length(unique(df_sp_old$annee))-1), "#f40b0b") +aes_echarts(plot_e = df_freq_rel_old %>% + group_by(annee) %>% + e_charts(num_semaine) %>% + e_line(freq_rel, symbol='none'), + xlab = "Semaine de participation", + ylab = "Fréquence relative", + title = "Nombre d'observations par jardin chaque semaine selon les années", + line_color = vec_col) + +# print("Echarts phénologie") +``` + +#### 2006-2024 +```{r} +vec_col = c(colfunc(length(unique(df_sp$annee))-1), "#f40b0b") aes_echarts(plot_e = df_freq_rel %>% group_by(annee) %>% e_charts(num_semaine) %>% @@ -193,6 +357,7 @@ aes_echarts(plot_e = df_freq_rel %>% # print("Echarts phénologie") ``` +::: ## Texte {width="30%"} -- GitLab From 2efecde05d173b79c48b8122e1b71bf6e96e7d61 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Wed, 12 Feb 2025 16:18:27 +0100 Subject: [PATCH 05/14] refactor: modification requete --- SQL/export_a_plat_OPJ.sql | 159 +++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 78 deletions(-) diff --git a/SQL/export_a_plat_OPJ.sql b/SQL/export_a_plat_OPJ.sql index c1f11fc..0f11e46 100644 --- a/SQL/export_a_plat_OPJ.sql +++ b/SQL/export_a_plat_OPJ.sql @@ -1,79 +1,82 @@ -SELECT - pp.id as participation_id, - pp.startDate as date_collection, - year(pp.startDate) as annee, - po.nbTaxons as abondance, - st_x(soa.geopoint) as longitude, - st_y(soa.geopoint) as latitude, - soa.deptCode as dept_code, - soa.postalCode as code_postal, - soa.userId as user_id, - soay.observationAreaId as jardin_id, - t.value as num_semaine, - t2.value as nom_espece, - t3.value as freq_passage, - t4.value as type_environnement, - t5.value as surface, - t6.value as distance_bois, - t7.value as distance_champs, - t8.value as distance_prairie, - t9.value as type_Engrais, - t10.value as type_Insecticide, - t11.value as type_Herbicide, - t12.value as type_Fongicide, - t13.value as type_AntiLimace, - t14.value as type_BouillieBordelaise, - t15.value as frequence_Engrais, - t16.value as frequence_Insecticide, - t17.value as frequence_Herbicide, - t18.value as frequence_Fongicide, - t19.value as frequence_AntiLimace, - t20.value as frequence_BouillieBordelaise -FROM - spgp.pj_observation po -LEFT JOIN spgp.pj_participation pp on - pp.id = po.participationId -LEFT JOIN spgp.spj_observation_area_year soay on - pp.observationAreaYearId = soay.id -LEFT JOIN spgp.spj_observation_area soa on - soay.observationAreaId = soa.id -LEFT JOIN thesaurus t on - pp.weekId = t.id -LEFT JOIN thesaurus t2 on - po.taxonId = t2.id -LEFT JOIN thesaurus t3 on - pp.frequencePassageId = t3.id -LEFT JOIN thesaurus t4 on - soay.environmentId = t4.id -LEFT JOIN thesaurus t5 on - soay.surfaceId = t5.id -LEFT JOIN thesaurus t6 on - soay.distanceBoisId = t6.id -LEFT JOIN thesaurus t7 on - soay.distanceChampId = t7.id -LEFT JOIN thesaurus t8 on - soay.distancePrairieId = t8.id -LEFT JOIN thesaurus t9 on - soay.typeEngraisId = t9.id -LEFT JOIN thesaurus t10 on - soay.typeInsecticideId = t10.id -LEFT JOIN thesaurus t11 on - soay.typeHerbicideId = t11.id -LEFT JOIN thesaurus t12 on - soay.typeFongicideId = t12.id -LEFT JOIN thesaurus t13 on - soay.typeAntiLimaceId = t13.id -LEFT JOIN thesaurus t14 on - soay.typeBouillieBordelaiseId = t14.id -LEFT JOIN thesaurus t15 on - soay.frequenceEngraisId = t15.id -LEFT JOIN thesaurus t16 on - soay.frequenceInsecticideId = t16.id -LEFT JOIN thesaurus t17 on - soay.frequenceHerbicideId = t17.id -LEFT JOIN thesaurus t18 on - soay.frequenceFongicideId = t18.id -LEFT JOIN thesaurus t19 on - soay.frequenceAntiLimaceId = t19.id -LEFT JOIN thesaurus t20 on +SELECT + pp.id as participation_id, + pp.startDate as date_collection, + year(pp.startDate) as annee, + po.nbTaxons as abondance, + st_x(soa.geopoint) as longitude, + st_y(soa.geopoint) as latitude, + soa.deptCode as dept_code, + soa.postalCode as code_postal, + soa.userId as user_id, + usr.email as email, + soay.observationAreaId as jardin_id, + t.value as num_semaine, + t2.value as nom_espece, + t3.value as freq_passage, + t4.value as type_environnement, + t5.value as surface, + t6.value as distance_bois, + t7.value as distance_champs, + t8.value as distance_prairie, + t9.value as type_Engrais, + t10.value as type_Insecticide, + t11.value as type_Herbicide, + t12.value as type_Fongicide, + t13.value as type_AntiLimace, + t14.value as type_BouillieBordelaise, + t15.value as frequence_Engrais, + t16.value as frequence_Insecticide, + t17.value as frequence_Herbicide, + t18.value as frequence_Fongicide, + t19.value as frequence_AntiLimace, + t20.value as frequence_BouillieBordelaise +FROM + spgp.pj_observation po +LEFT JOIN spgp.pj_participation pp on + pp.id = po.participationId +LEFT JOIN spgp.spj_observation_area_year soay on + pp.observationAreaYearId = soay.id +LEFT JOIN spgp.spj_observation_area soa on + soay.observationAreaId = soa.id +LEFT JOIN spgp.users usr on + soa.userId = usr.id +LEFT JOIN thesaurus t on + pp.weekId = t.id +LEFT JOIN thesaurus t2 on + po.taxonId = t2.id +LEFT JOIN thesaurus t3 on + pp.frequencePassageId = t3.id +LEFT JOIN thesaurus t4 on + soay.environmentId = t4.id +LEFT JOIN thesaurus t5 on + soay.surfaceId = t5.id +LEFT JOIN thesaurus t6 on + soay.distanceBoisId = t6.id +LEFT JOIN thesaurus t7 on + soay.distanceChampId = t7.id +LEFT JOIN thesaurus t8 on + soay.distancePrairieId = t8.id +LEFT JOIN thesaurus t9 on + soay.typeEngraisId = t9.id +LEFT JOIN thesaurus t10 on + soay.typeInsecticideId = t10.id +LEFT JOIN thesaurus t11 on + soay.typeHerbicideId = t11.id +LEFT JOIN thesaurus t12 on + soay.typeFongicideId = t12.id +LEFT JOIN thesaurus t13 on + soay.typeAntiLimaceId = t13.id +LEFT JOIN thesaurus t14 on + soay.typeBouillieBordelaiseId = t14.id +LEFT JOIN thesaurus t15 on + soay.frequenceEngraisId = t15.id +LEFT JOIN thesaurus t16 on + soay.frequenceInsecticideId = t16.id +LEFT JOIN thesaurus t17 on + soay.frequenceHerbicideId = t17.id +LEFT JOIN thesaurus t18 on + soay.frequenceFongicideId = t18.id +LEFT JOIN thesaurus t19 on + soay.frequenceAntiLimaceId = t19.id +LEFT JOIN thesaurus t20 on soay.frequenceBouillieBordelaiseId = t20.id \ No newline at end of file -- GitLab From 4ad9df059f48afc484fea279793a6a6011492be8 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Wed, 12 Feb 2025 16:19:12 +0100 Subject: [PATCH 06/14] =?UTF-8?q?feat:=20ajout=20de=20la=20fonction=20pour?= =?UTF-8?q?=20extraire=20les=20donn=C3=A9es=20historiques?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fonctions/create_df_old.R | 121 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 fonctions/create_df_old.R diff --git a/fonctions/create_df_old.R b/fonctions/create_df_old.R new file mode 100644 index 0000000..7ff7d09 --- /dev/null +++ b/fonctions/create_df_old.R @@ -0,0 +1,121 @@ +# HEADER -------------------------------------------- +# +# Author: Maël Pretet +# Copyright Copyright 2025 - Maël Pretet +# Email: mael.pretet1@mnhn.fr +# +# Date: 2025-01-10 +# +# Script Name: fonctions/create_df_old.R +# +# Script Description: Création du dataframe des données historiques de opj +# +# +# ------------------------------------ + +source("fonctions/library.R") +source("fonctions/var.R") + +# Tables historiques +obpj_observations <- read.csv2(paste0("data/donnees_historiques/", + "observations_OBJ_mensuelles_20250107"), sep = "\t") +obpj_compo <- read.csv2(paste0("data/donnees_historiques/", + "composition_jardin_papillons_bourdons"), sep = "\t") +obpj_participations <- read.csv2(paste0("data/donnees_historiques/", + "participations_OBJ_mensuelles_20250107"), sep = "\t") +obpj_pratiques <- read.csv2(paste0("data/donnees_historiques/", + "pratiques_jardin_papillons_bourdons"), sep = "\t") + +# Traitement du dataframe des pratiques +obpj_pratiques_trait = obpj_pratiques %>% + pivot_wider(names_from = X.column., values_from = engrais) %>% + rename(type_Engrais = `type d'engrais`, + type_Insecticide = `type d'insecticides`, + type_Herbicide = `type d'herbicide`, + type_Fongicide = `type de fongicides`, + type_AntiLimace = `type d'antilimace`, + type_BouillieBordelaise = `type de bouillie bordelaise`, + frequence_Engrais = engrais, + frequence_Insecticide = insecticides, + frequence_Herbicide = herbicides, + frequence_Fongicide = fongicides, + frequence_AntiLimace = antilimaces, + frequenceBouillieBordelaise = `bouillie bordelaise`) %>% + group_by(zfk) %>% + filter(annee == max(annee)) %>% + ungroup() + +# Liste user à partir de 2019 +# source("fonctions/create_df_all_sp.R") +# df_all_sp %>% +# select(c(id, email, jardin_id, latitude, longitude)) + +table_user_2019 = read.csv2(file = "data/table_user_2019.csv") + + +# Jointure des tables +df_join_obs_part = obpj_participations %>% + mutate(ytmp = as.integer(strftime(date, "%Y"))) %>% + full_join(obpj_observations, by = c("participationpk" = "participation_fk")) %>% + left_join(obpj_pratiques_trait, by = c("zpk" = "zfk")) %>% + select(-ytmp) + +# Traitement des anciennes données pour adapter au format des nouvelles +df_old_data = df_join_obs_part %>% + # On supprime les observations sans participation correspondante + # On supprime les observations avec des NA rentrées en \N + filter(!is.na(codepostal), + codepostal != "", + !is.na(date), + long != "\\N", + lat != "\\N", + lat != 0) %>% + # On renomme les colonnes selon les métadonnées de 2019 et + + dplyr::rename(code_postal = codepostal, + participation_id = participationpk, + latitude = lat, + longitude = long, + abondance = nb_individus, + type_environnement = environnement, + nom_espece = sp_name, + jardin_id = zpk) %>% + mutate(nom_espece = if_else(nom_espece == "Cuivrés", "Cuivré", nom_espece)) %>% + # On supprime les données pour les espèces absentes de la liste principale + filter(nom_espece %in% liste_principale) %>% + # On modifie le type ou le contenu de certaines colonnes + mutate(date = as.Date(date), + annee = as.integer(strftime(date, "%Y")), + # adaptation pour les département en Corse + dept_code = if_else(substr(code_postal, 1, 2)=="20", + if_else(code_postal < "20200", + "2A", "2B"), + substr(code_postal, 1, 2)), + num_semaine = strftime(date, "%V"), + user_id = -dense_rank(email), + jardin_id = -jardin_id, + longitude = as.double(longitude), + latitude = as.double(latitude), + an_sem = paste0(annee, "-S", num_semaine), + type_environnement = case_when(is.na(type_environnement) ~ NA, + type_environnement == "" ~ NA, + type_environnement == "urbain" ~ "Urbain", + type_environnement == "peri-urbain" ~ "Péri-urbain", + type_environnement == "rural" ~ "Rural"), + participation_id = -(participation_id))%>% + # Pré 2014, les dates d'observations sont en milieu de mois, et post 2014 + # elles sont en début de mois. Il semble y avoir eu une transition créant des + # dates en début de mois entre 2011 et 2014 qu'on supprime + filter(!( (annee <= 2013 & strftime(date, "%d") == "01") | + (annee == 2014 & strftime(date, "%d")=="15") )) %>% + left_join(reg_dep, by = c("dept_code" = "code_departement")) %>% + # Les users toujours actifs récupèrent l'id actuelle + left_join(table_user_2019, by = c("email" = "email")) %>% + mutate(user_id = if_else(!is.na(id), id, user_id)) %>% + # On supprime les colonnes qui ne sont plus présentes + select(-c(nom_user, name, adresse, ville, id, type, + distance_champ_cultive, frequence)) + +# Sauvegarde des données historiques dans un fichier RDS +saveRDS(object = df_old_data, file = "data/rdata/df_old_data.rds") + + -- GitLab From 12d3c8664dcf38fecf97b74098442ad298275fc4 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Wed, 12 Feb 2025 16:30:28 +0100 Subject: [PATCH 07/14] maj: ajout des test (dossier ou fichier) au gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index f7c49ef..aa4f3a2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ data/history/ **/*.html html/ out/graphiques/ +*/*test* +*/test*/ # Ignorer tous les fichiers .jpg data/photos/*.jpg data/photos/*.png -- GitLab From 2309154adcba9858ef7b347712408b556fea84c6 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 3 Mar 2025 17:17:29 +0100 Subject: [PATCH 08/14] maj: ajout des geojson dans le gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index aa4f3a2..706f6e6 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ data/photos/*.jpg data/photos/*.png # Ne pas ignorer Amaryllis.jpg !data/photos/Amaryllis.jpg +*/*.geojson data/img/* !data/img/papillon.png -- GitLab From 97a27a57acee989165a89d3e3bde16a75eb0e590 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 31 Mar 2025 15:03:19 +0200 Subject: [PATCH 09/14] refactor: modification des noms de colonnes avec le nnouvel export standard --- SQL/export_a_plat_OPJ.sql | 23 +-- fonctions/create_df_one_sp.R | 271 ++++++++++++++++++----------------- maquette_espece.qmd | 54 +++---- 3 files changed, 175 insertions(+), 173 deletions(-) diff --git a/SQL/export_a_plat_OPJ.sql b/SQL/export_a_plat_OPJ.sql index 0f11e46..f36d010 100644 --- a/SQL/export_a_plat_OPJ.sql +++ b/SQL/export_a_plat_OPJ.sql @@ -1,20 +1,21 @@ SELECT - pp.id as participation_id, - pp.startDate as date_collection, - year(pp.startDate) as annee, - po.nbTaxons as abondance, + po.id as observation_id, + pp.id as session_id, + pp.startDate as session_date, + year(pp.startDate) as session_year, + t.value as session_week, + soa.userId as user_id, + soay.observationAreaId as jardin_id, + ST_AsText(soa.geopoint) as geometry, st_x(soa.geopoint) as longitude, st_y(soa.geopoint) as latitude, soa.deptCode as dept_code, - soa.postalCode as code_postal, - soa.userId as user_id, - usr.email as email, - soay.observationAreaId as jardin_id, - t.value as num_semaine, - t2.value as nom_espece, + soa.postalCode as session_zip_code, + t2.value as taxon, + po.nbTaxons as taxon_count, t3.value as freq_passage, t4.value as type_environnement, - t5.value as surface, + t5.value as jardin_surface, t6.value as distance_bois, t7.value as distance_champs, t8.value as distance_prairie, diff --git a/fonctions/create_df_one_sp.R b/fonctions/create_df_one_sp.R index b6876eb..afedcfb 100644 --- a/fonctions/create_df_one_sp.R +++ b/fonctions/create_df_one_sp.R @@ -38,12 +38,13 @@ if (!exists("df_opj")) { df_opj = df_opj %>% filter(!is.na(dept_code), # suppression des départements nuls str_length(dept_code)==2, # suppression des drom-com - annee >= 2019, - nom_espece %in% liste_principale) %>% # suppression des données avant 2019 - mutate(date = as.Date(date_collection), - an_sem = if_else(as.numeric(num_semaine) < 10, - paste0(annee, "-S0", num_semaine), - paste0(annee, "-S", num_semaine))) %>% + session_year >= 2019, + taxon %in% liste_principale) %>% # suppression des données avant 2019 + mutate(date = as.Date(session_date), + session_date = as.Date(session_date), + an_sem = if_else(as.numeric(session_week) < 10, + paste0(session_year, "-S0", session_week), + paste0(session_year, "-S", session_week))) %>% left_join(reg_dep, by = c("dept_code" = "code_departement")) # ajout des départements # Df de l'historique @@ -53,46 +54,46 @@ if (is.histo) { rm("df_old_data") df_opj_new = df_opj %>% - filter(annee >= 2019) + filter(session_year >= 2019) df_opj_old = df_opj %>% - filter(annee < 2019) + filter(session_year < 2019) df_sp_new = df_opj_new %>% - filter(nom_espece == sp_name) + filter(taxon == sp_name) df_sp_old = df_opj_old %>% - filter(nom_espece == sp_name) + filter(taxon == sp_name) df_sp_ab_new = df_sp_new %>% - filter(abondance != 0) + filter(taxon_count != 0) df_sp_ab_old = df_sp_old %>% - filter(abondance != 0) + filter(taxon_count != 0) }else{ df_opj_new = df_opj df_opj_old = df_opj df_sp_new = df_opj %>% - filter(nom_espece == sp_name) + filter(taxon == sp_name) df_sp_old = df_opj %>% - filter(nom_espece == sp_name) + filter(taxon == sp_name) - df_sp_ab_new = df_sp %>% - filter(abondance != 0) + df_sp_ab_new = df_opj_new %>% + filter(taxon_count != 0) - df_sp_ab_old = df_sp %>% - filter(abondance != 0) + df_sp_ab_old = df_opj_old %>% + filter(taxon_count != 0) } # Df d'une espèce df_sp = df_opj %>% - filter(nom_espece == sp_name) + filter(taxon == sp_name) df_sp_ab = df_sp %>% - filter(abondance != 0) + filter(taxon_count != 0) # Carte de france en objet sf france <- read_sf(paste0("carte/contour-des-departements.geojson")) @@ -107,8 +108,8 @@ nb_jardin = length(unique(df_opj$jardin_id)) nb_jardin_obs = length(unique(df_sp_ab$jardin_id)) # Abondance maximale (calculée en groupant sur les années et les départements) nb_max_ab = df_sp %>% - group_by(annee, nom_departement, nom_region) %>% - summarise(sum_ab = sum(abondance), .groups = 'drop') %>% + group_by(session_year, nom_departement, nom_region) %>% + summarise(sum_ab = sum(taxon_count), .groups = 'drop') %>% filter(sum_ab == max(sum_ab)) %>% as.data.frame() @@ -124,12 +125,12 @@ fct_df_abondance <- function(df){ return(df_abondance <- df %>% filter(!is.na(jardin_id)) %>% group_by(dept_code) %>% - summarise(n = sum(abondance), - nb_participation = n_distinct(participation_id), + summarise(n = sum(taxon_count), + nb_participation = n_distinct(session_id), nb_jard = n_distinct(jardin_id), - nb_j_nul = sum(sapply(split(abondance, jardin_id), + nb_j_nul = sum(sapply(split(taxon_count, jardin_id), function(x) all(x == 0))), - nb_j_non_nul = sum(sapply(split(abondance, jardin_id), + nb_j_non_nul = sum(sapply(split(taxon_count, jardin_id), function(x) any(x > 0))), .groups = 'drop') %>% mutate(ab_moy = n/nb_jard, @@ -180,39 +181,39 @@ couleurs = c("#7f7f7f", "#ffef6c", "#f7b905", "#ff7400", "#ff0000", "#950000") # Nombre de fois où l'individu est observé nb_obs_idv = nrow(df_sp_ab) # Nombre total d'individus observés (somme de l'abondance) -nb_idv_cpt = sum(df_sp_ab$abondance) +nb_idv_cpt = sum(df_sp_ab$taxon_count) #----- Graphiques -----# # Df abondance par espèce (post 2019) df_repartition = df_opj_new %>% - group_by(nom_espece) %>% - summarise(sum_ab = sum(abondance), - rel_ab = sum(abondance)/sum(df_opj_new$abondance), + group_by(taxon) %>% + summarise(sum_ab = sum(taxon_count), + rel_ab = sum(taxon_count)/sum(df_opj_new$taxon_count), .groups = 'drop') %>% arrange(sum_ab) %>% mutate(couleur = c(rep("#3138cc", 10), rep("#6893fc", 9), rep("#90d3ff", 9)), - couleur = if_else(nom_espece == sp_name, color_flag, couleur)) + couleur = if_else(taxon == sp_name, color_flag, couleur)) # Df abondance par espèce (old data) df_repartition_old = df_opj_old %>% - group_by(nom_espece) %>% - summarise(sum_ab = sum(abondance), - rel_ab = sum(abondance)/sum(df_opj_old$abondance), + group_by(taxon) %>% + summarise(sum_ab = sum(taxon_count), + rel_ab = sum(taxon_count)/sum(df_opj_old$taxon_count), .groups = 'drop') %>% arrange(sum_ab) %>% mutate(couleur = c(rep("#3138cc", 10), rep("#6893fc", 9), rep("#90d3ff", 9)), - couleur = if_else(nom_espece == sp_name, color_flag, couleur)) + couleur = if_else(taxon == sp_name, color_flag, couleur)) # Df abondance par espèce (all data) df_repartition_new_old = df_opj %>% - group_by(nom_espece) %>% - summarise(sum_ab = sum(abondance), - rel_ab = sum(abondance)/sum(df_opj$abondance), + group_by(taxon) %>% + summarise(sum_ab = sum(taxon_count), + rel_ab = sum(taxon_count)/sum(df_opj$taxon_count), .groups = 'drop') %>% arrange(sum_ab) %>% mutate(couleur = c(rep("#3138cc", 10), rep("#6893fc", 9), rep("#90d3ff", 9)), - couleur = if_else(nom_espece == sp_name, color_flag, couleur)) + couleur = if_else(taxon == sp_name, color_flag, couleur)) ######################################### #-------- Variations annuelles ---------# @@ -233,8 +234,8 @@ df_nb_obs_date <- df_sp %>% # Df abondance par année df_dep_y = df_sp %>% - group_by(dept_code, annee) %>% - summarise(n = sum(abondance), + group_by(dept_code, session_year) %>% + summarise(n = sum(taxon_count), nb_jard = n_distinct(jardin_id), .groups = 'drop') %>% mutate(ab_moy = n/nb_jard, @@ -260,37 +261,37 @@ cat_carte_moy = c("0", "0-1", "2-5", "6-10", "+ de 10") # Calcul du nombre de participations sur toute l'opération par semaine nb_part_par_sem = df_opj %>% - mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine) %>% - summarise(nb_part = n_distinct(participation_id), + mutate(session_week = as.integer(session_week)) %>% + group_by(session_year, session_week) %>% + summarise(nb_part = n_distinct(session_id), .groups = 'drop') # New nb_part_par_sem_new = df_opj_new %>% - mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine) %>% - summarise(nb_part = n_distinct(participation_id), + mutate(session_week = as.integer(session_week)) %>% + group_by(session_year, session_week) %>% + summarise(nb_part = n_distinct(session_id), .groups = 'drop') # Old nb_part_par_sem_old = df_opj_old %>% - mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine) %>% - summarise(nb_part = n_distinct(participation_id), + mutate(session_week = as.integer(session_week)) %>% + group_by(session_year, session_week) %>% + summarise(nb_part = n_distinct(session_id), .groups = 'drop') # Abondance relative # All data df_ab_rel <- df_sp %>% - mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine, date) %>% - summarise(sum_ab = sum(abondance), + mutate(session_week = as.integer(session_week)) %>% + group_by(session_year, session_week, date) %>% + summarise(sum_ab = sum(taxon_count), .groups = 'drop') %>% # Somme des abondances - left_join(nb_part_par_sem, by = c("annee" = "annee", - "num_semaine" = "num_semaine")) %>% + left_join(nb_part_par_sem, by = c("session_year" = "session_year", + "session_week" = "session_week")) %>% mutate(sum_ab_rel = sum_ab/nb_part) %>% # Division par le nombre de participations - arrange(num_semaine) %>% - group_by(annee, num_semaine) %>% + arrange(session_week) %>% + group_by(session_year, session_week) %>% mutate(col_sup = if_else(n() == 2 & sum_ab == 0, 1, 0)) %>% filter(col_sup == 0) %>% select(!col_sup) %>% @@ -299,50 +300,50 @@ df_ab_rel <- df_sp %>% # Phénologie # All data df_freq_rel <- df_sp_ab %>% - mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine) %>% + mutate(session_week = as.integer(session_week)) %>% + group_by(session_year, session_week) %>% summarise(sum_obs = n(), .groups = 'drop') %>% # Somme des observations - full_join(nb_part_par_sem, by = c("annee" = "annee", - "num_semaine" = "num_semaine")) %>% + full_join(nb_part_par_sem, by = c("session_year" = "session_year", + "session_week" = "session_week")) %>% mutate(freq_rel = if_else(is.na(sum_obs), 0, sum_obs/nb_part)) %>% # Division par le nombre de participations - arrange(num_semaine) + arrange(session_week) # New data df_freq_rel_new <- df_sp_ab_new %>% - mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine) %>% + mutate(session_week = as.integer(session_week)) %>% + group_by(session_year, session_week) %>% summarise(sum_obs = n(), .groups = 'drop') %>% # Somme des observations - full_join(nb_part_par_sem_new, by = c("annee" = "annee", - "num_semaine" = "num_semaine")) %>% + full_join(nb_part_par_sem_new, by = c("session_year" = "session_year", + "session_week" = "session_week")) %>% mutate(freq_rel = if_else(is.na(sum_obs), 0, sum_obs/nb_part)) %>% # Division par le nombre de participations - arrange(num_semaine) + arrange(session_week) # Old data df_freq_rel_old <- df_sp_ab_old %>% - mutate(num_semaine = as.integer(num_semaine)) %>% - group_by(annee, num_semaine) %>% + mutate(session_week = as.integer(session_week)) %>% + group_by(session_year, session_week) %>% summarise(sum_obs = n(), .groups = 'drop') %>% # Somme des observations - full_join(nb_part_par_sem_old, by = c("annee" = "annee", - "num_semaine" = "num_semaine")) %>% + full_join(nb_part_par_sem_old, by = c("session_year" = "session_year", + "session_week" = "session_week")) %>% mutate(freq_rel = if_else(is.na(sum_obs), 0, sum_obs/nb_part)) %>% # Division par le nombre de participations - arrange(num_semaine) + arrange(session_week) # Présence moyenne df_date_wm = df_sp %>% - filter(abondance !=0, annee != strftime(Sys.Date()+365/2, "%Y")) %>% + filter(taxon_count !=0, session_year != strftime(Sys.Date()+365/2, "%Y")) %>% mutate(semaine = as.integer(strftime(date, '%V'))) %>% - group_by(annee) %>% - summarise(sum_sp = weighted.mean(semaine, abondance), .groups = 'drop') + group_by(session_year) %>% + summarise(sum_sp = weighted.mean(semaine, taxon_count), .groups = 'drop') df_date_wm_sqrt = df_sp %>% - filter(abondance !=0, annee != strftime(Sys.Date()+365/2, "%Y")) %>% + filter(taxon_count !=0, session_year != strftime(Sys.Date()+365/2, "%Y")) %>% mutate(semaine = as.integer(strftime(date, '%V'))) %>% - left_join(df_date_wm, by = c("annee" = "annee")) %>% - mutate(minus = abondance*((semaine - sum_sp)^2) ) %>% - group_by(annee, sum_sp) %>% + left_join(df_date_wm, by = c("session_year" = "session_year")) %>% + mutate(minus = taxon_count*((semaine - sum_sp)^2) ) %>% + group_by(session_year, sum_sp) %>% summarise(sum_minus = sum(minus), n = n(), .groups = 'drop') %>% mutate(rmse = sqrt(sum_minus/n)) @@ -353,17 +354,17 @@ df_date_wm_sqrt = df_sp %>% # Moyenne d'abondance df_moyenne_greg = df_opj %>% - filter(abondance!= 0) %>% - group_by(nom_espece) %>% - summarise(m_abn = mean(abondance), n = n(), + filter(taxon_count!= 0) %>% + group_by(taxon) %>% + summarise(m_abn = mean(taxon_count), n = n(), .groups = 'drop') %>% mutate(sd = 1.96*sqrt(m_abn/n)) %>% arrange(desc(m_abn)) %>% as.data.frame() # Espèce seule -df_gregarite = data.frame(nb_idv = as.numeric(names(summary(as.factor(df_sp_ab$abondance)))), - frequence = summary(as.factor(df_sp_ab$abondance))) %>% +df_gregarite = data.frame(nb_idv = as.numeric(names(summary(as.factor(df_sp_ab$taxon_count)))), + frequence = summary(as.factor(df_sp_ab$taxon_count))) %>% mutate(freq_prc = frequence/sum(frequence), class_idv = case_when(nb_idv < 2 ~ "1", nb_idv >= 2 & nb_idv < 5 ~ "2 à 4", @@ -372,16 +373,16 @@ df_gregarite = data.frame(nb_idv = as.numeric(names(summary(as.factor(df_sp_ab$a # Toutes les espèces df_gregarite_all = df_opj %>% - filter(abondance!= 0) %>% - mutate(ab_grega = factor(if_else(abondance == 1, "1 individu", "+ de 1 individu"), + filter(taxon_count!= 0) %>% + mutate(ab_grega = factor(if_else(taxon_count == 1, "1 individu", "+ de 1 individu"), levels = c("1 individu", "+ de 1 individu"))) %>% - group_by(nom_espece, ab_grega) %>% + group_by(taxon, ab_grega) %>% summarise(n = n(), .groups = 'drop') %>% - group_by(nom_espece) %>% + group_by(taxon) %>% mutate(sum_n = sum(n)) %>% ungroup() %>% mutate(prop_grega = n/sum_n) %>% - group_by(nom_espece) %>% + group_by(taxon) %>% mutate(sqrt_n = sqrt(prod(prop_grega)/sum_n), classif = prop_grega[2]) %>% ungroup() @@ -427,7 +428,7 @@ lst_param = list(bois, champ, prairie, environnement) # Df des jardins positionnés sur la carte (new data) df_jardin_point = df_sp_new %>% group_by(jardin_id, latitude, longitude) %>% - summarise(sum_ab = sum(abondance), .groups = 'drop') %>% + summarise(sum_ab = sum(taxon_count), .groups = 'drop') %>% filter(!is.na(latitude)) %>% mutate(Présence = if_else(sum_ab == 0, "Espèce non observée", "Espèce observée"), alpha = if_else(sum_ab == 0, 0.7, 1)) %>% @@ -436,7 +437,7 @@ df_jardin_point = df_sp_new %>% # Df des jardins positionnés sur la carte (old data) df_jardin_point_old = df_sp_old %>% group_by(jardin_id, latitude, longitude) %>% - summarise(sum_ab = sum(abondance), .groups = 'drop') %>% + summarise(sum_ab = sum(taxon_count), .groups = 'drop') %>% filter(!is.na(latitude)) %>% mutate(Présence = if_else(sum_ab == 0, "Espèce non observée", "Espèce observée"), alpha = if_else(sum_ab == 0, 0.7, 1)) %>% @@ -445,7 +446,7 @@ df_jardin_point_old = df_sp_old %>% # Df des jardins positionnés sur la carte (all data) df_jardin_point_new_old = df_sp %>% group_by(jardin_id, latitude, longitude) %>% - summarise(sum_ab = sum(abondance), .groups = 'drop') %>% + summarise(sum_ab = sum(taxon_count), .groups = 'drop') %>% filter(!is.na(latitude)) %>% mutate(Présence = if_else(sum_ab == 0, "Espèce non observée", "Espèce observée"), alpha = if_else(sum_ab == 0, 0.7, 1)) %>% @@ -453,12 +454,12 @@ df_jardin_point_new_old = df_sp %>% # Fonction à appliquer aux df pour les barycentres bary_function <- function(df, - gb1 = c("annee", "jardin_id", "latitude", "longitude"), - gb2 = c("annee")){ + gb1 = c("session_year", "jardin_id", "latitude", "longitude"), + gb2 = c("session_year")){ df <- df %>% group_by(!!!syms(gb1)) %>% # On groupe selon les paramètres de gb1 - summarise(sum_ab = sum(abondance), .groups = 'drop') %>% + summarise(sum_ab = sum(taxon_count), .groups = 'drop') %>% filter(!is.na(latitude)) %>% mutate(lat_pond = latitude*sum_ab, # Calcul des latitudes et long_pond = longitude*sum_ab) %>% # longitudes pondérées @@ -477,32 +478,32 @@ bary_function <- function(df, # Df barycentre de tous les jardins chaque année df_bary_base<- df_opj %>% - group_by(annee, jardin_id, latitude, longitude) %>% + group_by(session_year, jardin_id, latitude, longitude) %>% summarise(sum_ab = n(), .groups = 'drop') %>% filter(!is.na(latitude)) %>% ungroup() %>% mutate(lat_pond = latitude*sum_ab, long_pond = longitude*sum_ab) %>% - group_by(annee) %>% + group_by(session_year) %>% summarise(across(matches("*_pond"), \(x) sum(x, na.rm = TRUE)), across(matches("sum_ab"), sum), .groups = 'drop') %>% mutate(latitude = lat_pond/sum_ab, longitude = long_pond/sum_ab, - nom_espece = "Jardins", + taxon = "Jardins", color = "#0baaff") %>% as.data.frame() # Df du barycentre pour une espèce df_bary_one_sp <- cbind(bary_function(df = df_sp), - data.frame(nom_espece = sp_name, + data.frame(taxon = sp_name, color = "red")) # Df des barycentres pour toutes les espèces df_bary_all_sp <- bary_function(df = df_opj, - gb1 = c("annee", "jardin_id", "latitude", - "longitude", "nom_espece"), - gb2 = c("annee", "nom_espece")) %>% - mutate(nom_esp_min = if_else(nom_espece == sp_name, sp_name, "Autres")) + gb1 = c("session_year", "jardin_id", "latitude", + "longitude", "taxon"), + gb2 = c("session_year", "taxon")) %>% + mutate(nom_esp_min = if_else(taxon == sp_name, sp_name, "Autres")) @@ -515,22 +516,22 @@ df_bary_all_sp <- bary_function(df = df_opj, ######################################### df_co = df_opj %>% - filter(participation_id %in% unique(df_sp_ab$participation_id)) + filter(session_id %in% unique(df_sp_ab$session_id)) df_occurence = df_opj %>% - select(participation_id, an_sem, annee, nom_espece, abondance) %>% - pivot_wider(names_from = nom_espece, values_from = abondance)%>% + select(session_id, an_sem, session_year, taxon, taxon_count) %>% + pivot_wider(names_from = taxon, values_from = taxon_count)%>% filter(!!sym(sp_name) != 0)%>% - mutate(participation_id = as.character(participation_id), - annee = as.character(annee)) %>% + mutate(session_id = as.character(session_id), + session_year = as.character(session_year)) %>% mutate_if(~ any(is.numeric(.)), ~ if_else(.==0, "NON", "OUI")) %>% - select(!c(participation_id, an_sem, annee, !!sym(sp_name))) + select(!c(session_id, an_sem, session_year, !!sym(sp_name))) df_oui = apply(df_occurence, 2, function(x){return(length(which(x=="OUI"))/length(x))}) df_oui = sort(round(df_oui*100, digits = 2), decreasing = TRUE) df_oui = data.frame(nom = names(df_oui), corr = as.numeric(df_oui)) -all_names = unique(df_opj$nom_espece) +all_names = unique(df_opj$taxon) names_no_sp = all_names[-which(all_names == sp_name)] df_oui = df_oui %>% dplyr::arrange(nom) @@ -553,20 +554,20 @@ df_tab = df_oui %>% vec_name = c(sp_name, (df_oui %>% arrange(desc(corr)))$nom[1:5]) df_coocc = df_opj %>% - filter(nom_espece %in% vec_name) %>% - group_by(nom_espece, an_sem) %>% - summarise(sum_ab = sum(abondance), .groups = 'drop') %>% - group_by(nom_espece) %>% + filter(taxon %in% vec_name) %>% + group_by(taxon, an_sem) %>% + summarise(sum_ab = sum(taxon_count), .groups = 'drop') %>% + group_by(taxon) %>% mutate(sum_sp = sum(sum_ab), sum_ab_norm = sum_ab/sum_sp, - nom_espece = factor(nom_espece, levels = vec_name)) %>% + taxon = factor(taxon, levels = vec_name)) %>% arrange(an_sem) %>% - relocate(nom_espece, .before = an_sem) + relocate(taxon, .before = an_sem) # Nombre d'observations totales de chaque espèce df_nbsp_all = df_opj %>% - filter(abondance != 0) %>% - group_by(nom_espece) %>% + filter(taxon_count != 0) %>% + group_by(taxon) %>% summarise(n = n(), .groups = 'drop') if (file.exists("data/rdata/df_heatmap.rds") & @@ -576,22 +577,22 @@ if (file.exists("data/rdata/df_heatmap.rds") & }else{ df_heatmap = data.frame() - for (name in rev(df_repartition$nom_espece)) { + for (name in rev(df_repartition$taxon)) { df_tmp = df_opj %>% - select(participation_id, an_sem, annee, nom_espece, abondance) %>% - arrange(factor(nom_espece, levels = rev(df_repartition$nom_espece))) %>% - pivot_wider(names_from = nom_espece, values_from = abondance) %>% + select(session_id, an_sem, session_year, taxon, taxon_count) %>% + arrange(factor(taxon, levels = rev(df_repartition$taxon))) %>% + pivot_wider(names_from = taxon, values_from = taxon_count) %>% filter(!!sym(name) != 0) %>% - mutate(participation_id = as.character(participation_id), - annee = as.character(annee)) %>% + mutate(session_id = as.character(session_id), + session_year = as.character(session_year)) %>% mutate_if(~ any(is.numeric(.)), ~ if_else(.==0, 0, 1)) %>% - select(!c(participation_id, an_sem, annee)) %>% + select(!c(session_id, an_sem, session_year)) %>% summarise(across(where(is.numeric), \(x) sum(x, na.rm = TRUE))) - df_tmp = df_tmp / (df_nbsp_all %>% filter(nom_espece == name))$n + df_tmp = df_tmp / (df_nbsp_all %>% filter(taxon == name))$n df_heatmap = rbind(df_heatmap, df_tmp) } - rownames(df_heatmap) = rev(df_repartition$nom_espece) + rownames(df_heatmap) = rev(df_repartition$taxon) df_heatmap = as.matrix(df_heatmap) saveRDS(object = df_heatmap, file = "data/rdata/df_heatmap.rds") @@ -604,19 +605,19 @@ if (file.exists("data/rdata/df_heatmap.rds") & df_histo_test = df_opj %>% - filter(abondance!= 0) %>% - mutate(ab_grega = factor(case_when(abondance == 1 ~ "1", - abondance <= 4 ~ "2 à 4", - abondance <= 9 ~ "5 à 9", - abondance > 9 ~ "+ de 10"), + filter(taxon_count!= 0) %>% + mutate(ab_grega = factor(case_when(taxon_count == 1 ~ "1", + taxon_count <= 4 ~ "2 à 4", + taxon_count <= 9 ~ "5 à 9", + taxon_count > 9 ~ "+ de 10"), levels = c("1", "2 à 4", "5 à 9", "+ de 10"))) %>% - group_by(nom_espece, ab_grega) %>% + group_by(taxon, ab_grega) %>% summarise(n = n(), .groups = 'drop') %>% - group_by(nom_espece) %>% + group_by(taxon) %>% mutate(sum_n = sum(n)) %>% ungroup() %>% mutate(prop_grega = n/sum_n) %>% - full_join(df_moyenne_greg %>% select(nom_espece, m_abn), by = c("nom_espece" = "nom_espece")) + full_join(df_moyenne_greg %>% select(taxon, m_abn), by = c("taxon" = "taxon")) diff --git a/maquette_espece.qmd b/maquette_espece.qmd index 11b50cf..8d6009b 100644 --- a/maquette_espece.qmd +++ b/maquette_espece.qmd @@ -7,7 +7,7 @@ format: scrolling: false nav-buttons: - icon: gitlab - href: https://outils-patrinat.mnhn.fr/gitlab/mpretet/fiche_espece + href: https://outils-patrinat.mnhn.fr/gitlab/vigie-nature/fiche_espece_opj - icon: arrow-bar-left href: ../programs/accueil.html embed-resources: true @@ -88,12 +88,12 @@ Nombre de fois où au moins un individu a été observé : ### Infobulle ::: {.valuebox icon=none color="#5bbdd6"} -Les participants ont compté `r paste0(label_number(accuracy = 10)(nb_idv_cpt), " ", sp_name, " sur les ", label_number(accuracy = 10)(sum(df_opj$abondance)), " papillons au total.")` +Les participants ont compté `r paste0(label_number(accuracy = 10)(nb_idv_cpt), " ", sp_name, " sur les ", label_number(accuracy = 10)(sum(df_opj$taxon_count)), " papillons au total.")` ::: ### Explication {width="60%"} -Classement : `r paste0(which(rev(df_repartition$nom_espece) == sp_name), "/", length(df_repartition$nom_espece))` +Classement : `r paste0(which(rev(df_repartition$taxon) == sp_name), "/", length(df_repartition$taxon))` Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. @@ -102,7 +102,7 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor i :::panel-tabset ### 2019-2024 ```{r} -gg_histo_plotly(df_hp = df_repartition, limits = df_repartition$nom_espece, +gg_histo_plotly(df_hp = df_repartition, limits = df_repartition$taxon, couleur = df_repartition$couleur) # print("plotly repartition espece") @@ -110,7 +110,7 @@ gg_histo_plotly(df_hp = df_repartition, limits = df_repartition$nom_espece, ### 2006-2018 ```{r} -gg_histo_plotly(df_hp = df_repartition_old, limits = df_repartition_old$nom_espece, +gg_histo_plotly(df_hp = df_repartition_old, limits = df_repartition_old$taxon, couleur = df_repartition_old$couleur) # print("plotly repartition espece") @@ -118,7 +118,7 @@ gg_histo_plotly(df_hp = df_repartition_old, limits = df_repartition_old$nom_espe ### 2006-2024 ```{r} -gg_histo_plotly(df_hp = df_repartition_new_old, limits = df_repartition_new_old$nom_espece, +gg_histo_plotly(df_hp = df_repartition_new_old, limits = df_repartition_new_old$taxon, couleur = df_repartition_new_old$couleur) # print("plotly repartition espece") @@ -153,24 +153,24 @@ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor i ```{r} carte_point_jardin(france = france, df_jp = df_jardin_point, title = paste0("Positions des jardins ayant participé entre ", - strftime(min(df_sp$date_collection), "%Y"), " et ", - strftime(max(df_sp$date_collection), "%Y"))) + strftime(min(df_sp$session_date), "%Y"), " et ", + strftime(max(df_sp$session_date), "%Y"))) ``` #### 2006-2018 ```{r} carte_point_jardin(france = france, df_jp = df_jardin_point_old, title = paste0("Positions des jardins ayant participé entre ", - strftime(min(df_sp$date_collection), "%Y"), " et ", - strftime(max(df_sp$date_collection), "%Y"))) + strftime(min(df_sp$session_date), "%Y"), " et ", + strftime(max(df_sp$session_date), "%Y"))) ``` #### 2006-2024 ```{r} carte_point_jardin(france = france, df_jp = df_jardin_point_new_old, title = paste0("Positions des jardins ayant participé entre ", - strftime(min(df_sp$date_collection), "%Y"), " et ", - strftime(max(df_sp$date_collection), "%Y"))) + strftime(min(df_sp$session_date), "%Y"), " et ", + strftime(max(df_sp$session_date), "%Y"))) ``` ::: @@ -247,7 +247,7 @@ print(carte_ab(shape_map = france, fill_map = df_dep$cl_jard, fill_color = coule datemin = as.Date("2019-01-01") - 7 datemax = max(df_sp$date) + 7 -gg1 <- gg_histo(df_histo = df_ab_rel %>% filter(annee >= 2019), +gg1 <- gg_histo(df_histo = df_ab_rel %>% filter(session_year >= 2019), y = "sum_ab_rel", dmin = datemin, dmax = datemax, ytxt = "Abondance relative", title = "Abondance relative par semaine") @@ -266,7 +266,7 @@ grid.arrange(gg1, gg2, ncol = 1, heights=c(2, 1)) datemin = min(df_sp$date) - 7 datemax = as.Date("2019-01-01") + 7 -gg1 <- gg_histo(df_histo = df_ab_rel %>% filter(annee < 2019), +gg1 <- gg_histo(df_histo = df_ab_rel %>% filter(session_year < 2019), y = "sum_ab_rel", dmin = datemin, dmax = datemax, ytxt = "Abondance relative", title = "Abondance relative par semaine") @@ -314,10 +314,10 @@ colfunc <- colorRampPalette(c("#a4dbff", "#0b6cab")) :::panel-tabset #### 2019-2024 ```{r} -vec_col = c(colfunc(length(unique(df_sp_new$annee))-1), "#f40b0b") +vec_col = c(colfunc(length(unique(df_sp_new$session_year))-1), "#f40b0b") aes_echarts(plot_e = df_freq_rel_new %>% - group_by(annee) %>% - e_charts(num_semaine) %>% + group_by(session_year) %>% + e_charts(session_week) %>% e_line(freq_rel, symbol='none'), xlab = "Semaine de participation", ylab = "Fréquence relative", @@ -329,10 +329,10 @@ aes_echarts(plot_e = df_freq_rel_new %>% #### 2006-2018 ```{r} -vec_col = c(colfunc(length(unique(df_sp_old$annee))-1), "#f40b0b") +vec_col = c(colfunc(length(unique(df_sp_old$session_year))-1), "#f40b0b") aes_echarts(plot_e = df_freq_rel_old %>% - group_by(annee) %>% - e_charts(num_semaine) %>% + group_by(session_year) %>% + e_charts(session_week) %>% e_line(freq_rel, symbol='none'), xlab = "Semaine de participation", ylab = "Fréquence relative", @@ -344,10 +344,10 @@ aes_echarts(plot_e = df_freq_rel_old %>% #### 2006-2024 ```{r} -vec_col = c(colfunc(length(unique(df_sp$annee))-1), "#f40b0b") +vec_col = c(colfunc(length(unique(df_sp$session_year))-1), "#f40b0b") aes_echarts(plot_e = df_freq_rel %>% - group_by(annee) %>% - e_charts(num_semaine) %>% + group_by(session_year) %>% + e_charts(session_week) %>% e_line(freq_rel, symbol='none'), xlab = "Semaine de participation", ylab = "Fréquence relative", @@ -379,9 +379,9 @@ histo_grega(df_grega = df_gregarite, ### Indice de grégarité par espèce ```{r} -names_grega = unique(df_gregarite_all %>% arrange(classif) %>% select(nom_espece) %>% as.data.frame()) -color_txt = rep("black", length(names_grega$nom_espece)) -position_sp = which(unique(names_grega$nom_espece) == sp_name) +names_grega = unique(df_gregarite_all %>% arrange(classif) %>% select(taxon) %>% as.data.frame()) +color_txt = rep("black", length(names_grega$taxon)) +position_sp = which(unique(names_grega$taxon) == sp_name) color_txt[position_sp] = "red" histo_indice_greg(df_greg_all = df_gregarite_all, color_txt = color_txt) @@ -425,7 +425,7 @@ for(i in 1:4) { as.data.frame() df_obs = df_sp %>% - filter(abondance != 0) %>% + filter(taxon_count != 0) %>% group_by(!!sym(lst_param[[i]][[1]])) %>% summarise(nobs = n()) %>% as.data.frame() -- GitLab From 356bd5fd5bfcd745f8cb745db58f3769a9c859ba Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 31 Mar 2025 15:03:53 +0200 Subject: [PATCH 10/14] refactor: suppression des messages de warnings des library --- fonctions/library.R | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/fonctions/library.R b/fonctions/library.R index b3d6182..f4e7502 100644 --- a/fonctions/library.R +++ b/fonctions/library.R @@ -1,18 +1,19 @@ -library(cowplot) -library(dplyr) -library(echarts4r) -library(ggimage) -library(ggplot2) -library(gridExtra) -library(here) -library(knitr) -library(lubridate) -library(magick) -library(plotly) -library(RMySQL) -library(rlang) -library(scales) -library(sf) -library(stringr) -library(tibble) -library(tidyr) \ No newline at end of file +# R 4.4.2 +suppressWarnings(suppressMessages(library(cowplot))) # 1.1.3 +suppressWarnings(suppressMessages(library(dplyr))) # 1.1.4 +suppressWarnings(suppressMessages(library(echarts4r))) # 0.4.5 +suppressWarnings(suppressMessages(library(ggimage))) # 0.3.3 +suppressWarnings(suppressMessages(library(ggplot2))) # 3.5.1 +suppressWarnings(suppressMessages(library(gridExtra))) # 2.3 +library(here) # 1.0.1 +library(knitr) # 1.49 +suppressWarnings(suppressMessages(library(lubridate))) # 1.9.3 +suppressWarnings(suppressMessages(library(magick))) # 2.8.5 +suppressWarnings(suppressMessages(library(plotly))) # 4.10.4 +suppressWarnings(suppressMessages(library(RMySQL))) # 0.10.29 +library(rlang) # 1.1.4 +library(scales) # 1.3.0 +suppressWarnings(suppressMessages(library(sf))) # 1.0-19 +library(stringr) # 1.5.1 +library(tibble) # 3.2.1 +library(tidyr) # 1.3.1 \ No newline at end of file -- GitLab From e9e2ba45e7e9b07e2a7fee5da93625d7c52e9fa8 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 31 Mar 2025 15:05:05 +0200 Subject: [PATCH 11/14] =?UTF-8?q?feat:=20ajout=20de=20la=20fonction=20is?= =?UTF-8?q?=5Fintranet=20pour=20la=20cr=C3=A9ation=20du=20data=20frame=20i?= =?UTF-8?q?nitial?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fonctions/create_df_opj.R | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/fonctions/create_df_opj.R b/fonctions/create_df_opj.R index 3f43cf1..74526fd 100644 --- a/fonctions/create_df_opj.R +++ b/fonctions/create_df_opj.R @@ -24,15 +24,23 @@ if (Sys.getenv("CI") != "true") { } source("fonctions/function_import_from_mosaic.R") source("fonctions/var.R") - +is_intranet <- function(url = "https://virtualianet.mnhn.fr/") { + tryCatch({ + response <- GET(url) + return(status_code(response) == 200) + }, error = function(e) { + return(FALSE) + }) +} ### Dataframe des données pour toutes les espèces # ----------------------------------------------- # Mise à jour -if (!file.exists("data/rdata/df_opj.rds") | # Si le fichier n'existe pas OU +if ((!file.exists("data/rdata/df_opj.rds") | # Si le fichier n'existe pas OU (strftime(Sys.Date(), "%A") == "lundi" & # [que la date du jour est un lundi ET - Sys.Date()-as.Date(file.info("data/rdata/df_opj.rds")$ctime) > 5)) { # que le fichier a plus de 5 jours] + Sys.Date()-as.Date(file.info("data/rdata/df_opj.rds")$ctime) > 5)) & + is_intranet()) { # que le fichier a plus de 5 jours] # Lecture depuis la base mosaic df_opj = import_from_mosaic(query = read_sql_query("SQL/export_a_plat_OPJ.sql"), database_name = "spgp") -- GitLab From 2f8e72cf079ce72bb4269631908903ab4bb2c38a Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 31 Mar 2025 15:06:49 +0200 Subject: [PATCH 12/14] refactor: modification des noms de colonnes avec le format standard --- fonctions/create_df_old.R | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/fonctions/create_df_old.R b/fonctions/create_df_old.R index 7ff7d09..ec93991 100644 --- a/fonctions/create_df_old.R +++ b/fonctions/create_df_old.R @@ -29,7 +29,8 @@ obpj_pratiques <- read.csv2(paste0("data/donnees_historiques/", # Traitement du dataframe des pratiques obpj_pratiques_trait = obpj_pratiques %>% pivot_wider(names_from = X.column., values_from = engrais) %>% - rename(type_Engrais = `type d'engrais`, + rename(session_year = annee, + type_Engrais = `type d'engrais`, type_Insecticide = `type d'insecticides`, type_Herbicide = `type d'herbicide`, type_Fongicide = `type de fongicides`, @@ -40,9 +41,9 @@ obpj_pratiques_trait = obpj_pratiques %>% frequence_Herbicide = herbicides, frequence_Fongicide = fongicides, frequence_AntiLimace = antilimaces, - frequenceBouillieBordelaise = `bouillie bordelaise`) %>% + frequence_BouillieBordelaise = `bouillie bordelaise`) %>% group_by(zfk) %>% - filter(annee == max(annee)) %>% + filter(session_year == max(session_year)) %>% ungroup() # Liste user à partir de 2019 @@ -71,42 +72,43 @@ df_old_data = df_join_obs_part %>% lat != "\\N", lat != 0) %>% # On renomme les colonnes selon les métadonnées de 2019 et + - dplyr::rename(code_postal = codepostal, - participation_id = participationpk, + dplyr::rename(session_zip_code = codepostal, + session_id = participationpk, latitude = lat, longitude = long, - abondance = nb_individus, + taxon_count = nb_individus, type_environnement = environnement, - nom_espece = sp_name, + taxon = sp_name, jardin_id = zpk) %>% - mutate(nom_espece = if_else(nom_espece == "Cuivrés", "Cuivré", nom_espece)) %>% + mutate(taxon = if_else(taxon == "Cuivrés", "Cuivré", taxon)) %>% # On supprime les données pour les espèces absentes de la liste principale - filter(nom_espece %in% liste_principale) %>% + filter(taxon %in% liste_principale) %>% # On modifie le type ou le contenu de certaines colonnes mutate(date = as.Date(date), - annee = as.integer(strftime(date, "%Y")), + session_date = as.Date(date), + session_year = as.integer(strftime(date, "%Y")), # adaptation pour les département en Corse - dept_code = if_else(substr(code_postal, 1, 2)=="20", - if_else(code_postal < "20200", + dept_code = if_else(substr(session_zip_code, 1, 2)=="20", + if_else(session_zip_code < "20200", "2A", "2B"), - substr(code_postal, 1, 2)), - num_semaine = strftime(date, "%V"), + substr(session_zip_code, 1, 2)), + session_week = strftime(date, "%V"), user_id = -dense_rank(email), jardin_id = -jardin_id, longitude = as.double(longitude), latitude = as.double(latitude), - an_sem = paste0(annee, "-S", num_semaine), + an_sem = paste0(session_year, "-S", session_week), type_environnement = case_when(is.na(type_environnement) ~ NA, type_environnement == "" ~ NA, type_environnement == "urbain" ~ "Urbain", type_environnement == "peri-urbain" ~ "Péri-urbain", type_environnement == "rural" ~ "Rural"), - participation_id = -(participation_id))%>% + session_id = -(session_id))%>% # Pré 2014, les dates d'observations sont en milieu de mois, et post 2014 # elles sont en début de mois. Il semble y avoir eu une transition créant des # dates en début de mois entre 2011 et 2014 qu'on supprime - filter(!( (annee <= 2013 & strftime(date, "%d") == "01") | - (annee == 2014 & strftime(date, "%d")=="15") )) %>% + filter(!( (session_year <= 2013 & strftime(date, "%d") == "01") | + (session_year == 2014 & strftime(date, "%d")=="15") )) %>% left_join(reg_dep, by = c("dept_code" = "code_departement")) %>% # Les users toujours actifs récupèrent l'id actuelle left_join(table_user_2019, by = c("email" = "email")) %>% -- GitLab From 3e67a82bc086bd18cd8c3dabc3f9c9dc08de25e7 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 31 Mar 2025 15:07:27 +0200 Subject: [PATCH 13/14] refactor: modification des noms de colonnes dans les fonctions pour les adapter au standard VN --- fonctions/function_graphics.R | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/fonctions/function_graphics.R b/fonctions/function_graphics.R index befe876..cd63a9f 100644 --- a/fonctions/function_graphics.R +++ b/fonctions/function_graphics.R @@ -39,8 +39,8 @@ #' value = c(0.23, 0.11, 0.219)), #' x = "nom", y = "value", fill = "nom", title = "Proportions", #' limits = c("B", "C", "A"), couleur = c("red", "grey", "grey")) -gg_histo_plotly <- function(df_hp, x = "nom_espece", y = "rel_ab", - fill = "nom_espece", +gg_histo_plotly <- function(df_hp, x = "taxon", y = "rel_ab", + fill = "taxon", title = "Proportion d'abondance de chaque espèce parmi toutes les observations", ytxt = "% d'abondance", limits, couleur, percent = TRUE){ @@ -316,7 +316,7 @@ aes_echarts <- function(plot_e, xlab, ylab, title, line_color, one_y = TRUE){ # Pic d'activité -graph_pic <- function(df_pic, x = "annee", y = "sum_sp", ecart = "rmse", +graph_pic <- function(df_pic, x = "session_year", y = "sum_sp", ecart = "rmse", xlab = "Année", ylab = "Semaine de participation", title = "Semaine du pic d'activité et son écart-type chaque année"){ @@ -352,7 +352,7 @@ graph_pic <- function(df_pic, x = "annee", y = "sum_sp", ecart = "rmse", ######################################### # Abondance moyenne par espèce -histo_ab_mean <- function(df_mg, x = "nom_espece", w = "m_abn", sd = "sd", +histo_ab_mean <- function(df_mg, x = "taxon", w = "m_abn", sd = "sd", color_txt = "black", xlab = "Espèce", ylab = "Nombre d'individus", order = "m_abn", title = "Moyenne de l'abondance pour chaque espèce"){ @@ -412,7 +412,7 @@ histo_grega <- function(df_grega, x = "class_idv", y = "freq_prc", } # Indice de grégarité par espèce -histo_indice_greg <- function(df_greg_all, x = "nom_espece", order = "classif", +histo_indice_greg <- function(df_greg_all, x = "taxon", order = "classif", fill = "ab_grega", w = "prop_grega", mean = "classif", sqrt = "sqrt_n", lab_fill = "Grégarité", @@ -518,8 +518,8 @@ carte_point_jardin <- function(france, df_jp, x = "longitude", y = "latitude", # Barycentre carte_bary_one_df <- function(france, df_bary, x = "longitude", y = "latitude", - color = "nom_espece", frame ="annee", col_val, - txt = "nom_espece", + color = "taxon", frame ="session_year", col_val, + txt = "taxon", title = "Barycentre des jardins et de l'espèce"){ gg = ggplot(france) + geom_sf(fill = "#f0f0f0", color = "#a0a0a0") + @@ -540,8 +540,8 @@ carte_bary_one_df <- function(france, df_bary, x = "longitude", y = "latitude", # Barycentre (toutes les espèces) carte_bary_two_df <- function(france, df_bary, df_bary2, x = "longitude", y = "latitude", - color = "nom_esp_min", frame = "annee", txt = "nom_espece", - customdata ="nom_espece", col_man, + color = "nom_esp_min", frame = "session_year", txt = "taxon", + customdata ="taxon", col_man, title = "Barycentre des jardins et des espèces"){ gg = ggplot(france) + @@ -589,10 +589,10 @@ gg_carte_mois = function(month, df_sp, france){ longitude = NA, sum_ab = 0) df_migration = df_sp %>% - mutate(mois = strftime(date_collection, "%m")) %>% + mutate(mois = strftime(session_date, "%m")) %>% filter(mois == month) %>% group_by(mois, jardin_id, latitude, longitude) %>% - summarise(sum_ab = sum(abondance)) %>% + summarise(sum_ab = sum(taxon_count)) %>% filter(sum_ab != 0) %>% dplyr::union(df_mois) -- GitLab From 541fac0f51b6aa1e72cc63e808fd4cb44f380cd5 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Mon, 31 Mar 2025 15:15:11 +0200 Subject: [PATCH 14/14] =?UTF-8?q?rename:=20changement=20du=20nom=20du=20r?= =?UTF-8?q?=C3=A9pertoire=20SQL=20en=20sql?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- {SQL => sql}/export_a_plat_OPJ.sql | 0 sql/export_a_plat_OPJ_old.sql | 82 ++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) rename {SQL => sql}/export_a_plat_OPJ.sql (100%) create mode 100644 sql/export_a_plat_OPJ_old.sql diff --git a/SQL/export_a_plat_OPJ.sql b/sql/export_a_plat_OPJ.sql similarity index 100% rename from SQL/export_a_plat_OPJ.sql rename to sql/export_a_plat_OPJ.sql diff --git a/sql/export_a_plat_OPJ_old.sql b/sql/export_a_plat_OPJ_old.sql new file mode 100644 index 0000000..0f11e46 --- /dev/null +++ b/sql/export_a_plat_OPJ_old.sql @@ -0,0 +1,82 @@ +SELECT + pp.id as participation_id, + pp.startDate as date_collection, + year(pp.startDate) as annee, + po.nbTaxons as abondance, + st_x(soa.geopoint) as longitude, + st_y(soa.geopoint) as latitude, + soa.deptCode as dept_code, + soa.postalCode as code_postal, + soa.userId as user_id, + usr.email as email, + soay.observationAreaId as jardin_id, + t.value as num_semaine, + t2.value as nom_espece, + t3.value as freq_passage, + t4.value as type_environnement, + t5.value as surface, + t6.value as distance_bois, + t7.value as distance_champs, + t8.value as distance_prairie, + t9.value as type_Engrais, + t10.value as type_Insecticide, + t11.value as type_Herbicide, + t12.value as type_Fongicide, + t13.value as type_AntiLimace, + t14.value as type_BouillieBordelaise, + t15.value as frequence_Engrais, + t16.value as frequence_Insecticide, + t17.value as frequence_Herbicide, + t18.value as frequence_Fongicide, + t19.value as frequence_AntiLimace, + t20.value as frequence_BouillieBordelaise +FROM + spgp.pj_observation po +LEFT JOIN spgp.pj_participation pp on + pp.id = po.participationId +LEFT JOIN spgp.spj_observation_area_year soay on + pp.observationAreaYearId = soay.id +LEFT JOIN spgp.spj_observation_area soa on + soay.observationAreaId = soa.id +LEFT JOIN spgp.users usr on + soa.userId = usr.id +LEFT JOIN thesaurus t on + pp.weekId = t.id +LEFT JOIN thesaurus t2 on + po.taxonId = t2.id +LEFT JOIN thesaurus t3 on + pp.frequencePassageId = t3.id +LEFT JOIN thesaurus t4 on + soay.environmentId = t4.id +LEFT JOIN thesaurus t5 on + soay.surfaceId = t5.id +LEFT JOIN thesaurus t6 on + soay.distanceBoisId = t6.id +LEFT JOIN thesaurus t7 on + soay.distanceChampId = t7.id +LEFT JOIN thesaurus t8 on + soay.distancePrairieId = t8.id +LEFT JOIN thesaurus t9 on + soay.typeEngraisId = t9.id +LEFT JOIN thesaurus t10 on + soay.typeInsecticideId = t10.id +LEFT JOIN thesaurus t11 on + soay.typeHerbicideId = t11.id +LEFT JOIN thesaurus t12 on + soay.typeFongicideId = t12.id +LEFT JOIN thesaurus t13 on + soay.typeAntiLimaceId = t13.id +LEFT JOIN thesaurus t14 on + soay.typeBouillieBordelaiseId = t14.id +LEFT JOIN thesaurus t15 on + soay.frequenceEngraisId = t15.id +LEFT JOIN thesaurus t16 on + soay.frequenceInsecticideId = t16.id +LEFT JOIN thesaurus t17 on + soay.frequenceHerbicideId = t17.id +LEFT JOIN thesaurus t18 on + soay.frequenceFongicideId = t18.id +LEFT JOIN thesaurus t19 on + soay.frequenceAntiLimaceId = t19.id +LEFT JOIN thesaurus t20 on + soay.frequenceBouillieBordelaiseId = t20.id \ No newline at end of file -- GitLab