From d2d5e06785f9e2699717f0620e675d4e5005d896 Mon Sep 17 00:00:00 2001 From: sbenateau Date: Tue, 15 Apr 2025 14:26:00 +0200 Subject: [PATCH 01/12] rename participation functions --- fonctions/{stats_globales.R => fct_participation.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename fonctions/{stats_globales.R => fct_participation.R} (100%) diff --git a/fonctions/stats_globales.R b/fonctions/fct_participation.R similarity index 100% rename from fonctions/stats_globales.R rename to fonctions/fct_participation.R -- GitLab From 6a5d4bbc3c2a3a029d506269ecf0ada405a40f9a Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Tue, 15 Apr 2025 14:28:52 +0200 Subject: [PATCH 02/12] =?UTF-8?q?feat:=20fonction=20de=20v=C3=A9rification?= =?UTF-8?q?=20des=20colonnes=20dans=20un=20data=20frame?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fonctions/fct_check_columns.R | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 fonctions/fct_check_columns.R diff --git a/fonctions/fct_check_columns.R b/fonctions/fct_check_columns.R new file mode 100644 index 0000000..24e27ed --- /dev/null +++ b/fonctions/fct_check_columns.R @@ -0,0 +1,34 @@ +#' Title +#' +#' @param df +#' +#' @return +#' @export +#' +#' @examples +column_check <- function(df, vec_colnames = c("observation_id", "session_date")){ + + missing_columns = c() + #---- Vérification de la présence des colonnes obligatoires + # On vérifie pour chaque nom dans vec_colnames s'il n'apparaît pas dans les + # noms de colonne du dataframe en entrée. Cela renvoie un vecteur de TRUE / FALSE. + # On récupère les noms de vec_colnames qui n'apparaissent pas dans les colonnes + # du dataframe avec ce vecteur de TRUE / FALSE + for (i in length(vec_colnames)) { + if (!(vec_colnames[i] %in% colnames(df)) ) { + missing_columns = c(missing_columns, vec_colnames[i]) + } + } + + if (length(missing_columns)==0) { + msg = "Aucune colonne obligatoire manquante.\n" + } else { + msg = paste0("Colonne(s) obligatoire(s) manquante(s) : ", + paste(missing_columns, collapse = " / "), + "\n") + } + + # On renvoie le message résultant de la vérification + return(cat(sprintf(msg))) + +} \ No newline at end of file -- GitLab From 86de7c62b4e704ab658414ebe682a397ac57a51f Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Tue, 15 Apr 2025 14:29:56 +0200 Subject: [PATCH 03/12] feat: add sample_pipeline script -> example for users --- sample_pipeline.R | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 sample_pipeline.R diff --git a/sample_pipeline.R b/sample_pipeline.R new file mode 100644 index 0000000..b941fa6 --- /dev/null +++ b/sample_pipeline.R @@ -0,0 +1,10 @@ +# First step : download data + + + +# Second step : summary statistics + + + +# Third step : graphics + -- GitLab From 50175ed2e56ad2b26852aca2bf05b723eabe628f Mon Sep 17 00:00:00 2001 From: sbenateau Date: Tue, 15 Apr 2025 14:29:48 +0200 Subject: [PATCH 04/12] add a sample pipeline to test with users --- sample_pipeline.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 sample_pipeline.R diff --git a/sample_pipeline.R b/sample_pipeline.R new file mode 100644 index 0000000..b2a962e --- /dev/null +++ b/sample_pipeline.R @@ -0,0 +1,12 @@ +# + +source("fonctions/fct_ftp.R") +source("fonctions/fct_participation.R") + +# Import data +download_from_ftp(file_folder_server = "/Vigie-Nature/",file_to_download = "export_vne_vdt.csv", destination_folder = "data/") +# load data +data_export <- data.table::fread("data/export_vne_vdt.csv") + +# get participation information +stats_globales(data_export) -- GitLab From fd9ea69f6aa4828642bcbfac8d863ff03f751098 Mon Sep 17 00:00:00 2001 From: sbenateau Date: Tue, 15 Apr 2025 14:31:32 +0200 Subject: [PATCH 05/12] minor fix - packages --- fonctions/fct_import_database.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fonctions/fct_import_database.R b/fonctions/fct_import_database.R index 530a327..53a3f94 100644 --- a/fonctions/fct_import_database.R +++ b/fonctions/fct_import_database.R @@ -6,7 +6,6 @@ readRenviron(".env") #' @param filepath the path of the query as sql file #' #' @return -#' @export #' #' @examples #' @@ -75,6 +74,9 @@ import_from_vne <- function (query){ #' @export #' #' @examples +#' +#' +#' import_from_mosaic <- function(query, database_name, force_UTF8 = FALSE){ library(RMySQL) @@ -89,15 +91,15 @@ import_from_mosaic <- function(query, database_name, force_UTF8 = FALSE){ dbname = database_name, host = db_host, port = db_port) raw_query_result <- dbSendQuery(mydb, query) - query_result <- fetch(raw_query_result, n = -1) + query_result <- DBI::fetch(raw_query_result, n = -1) # Force UTF8 encoding if column is char if(force_UTF8) { - query_result <- query_result %>% - mutate_if(is.character, + query_result <- query_result |> + dplyr::mutate_if(is.character, function(x) {Encoding(x) <- "UTF-8" return(x) })} - on.exit(dbDisconnect(mydb)) + on.exit(DBI::dbDisconnect(mydb)) return(query_result) } -- GitLab From 67c29dc7fdd5f5b0d31f283fd97e065ae3db82b1 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Tue, 15 Apr 2025 14:37:27 +0200 Subject: [PATCH 06/12] maj: add data repository to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a52f149..34469bc 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ .RData .Ruserdata *.Rproj -.env \ No newline at end of file +.env +data/ \ No newline at end of file -- GitLab From ec49fe043fe3f9ff65b793bf21ff3f6bfea7fbc4 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Tue, 15 Apr 2025 17:47:27 +0200 Subject: [PATCH 07/12] refactor: modification of scale_fill optionnal (add of breaks in scale_fill) --- fonctions/fct_time_series.R | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/fonctions/fct_time_series.R b/fonctions/fct_time_series.R index acc93f8..6aadcd6 100644 --- a/fonctions/fct_time_series.R +++ b/fonctions/fct_time_series.R @@ -21,34 +21,49 @@ #' barplot_time_series(df = df_test, x = "x1", y = "y1", fill = "fill1", #' fix_ratio = TRUE, c_fix = 0.5) #' -barplot_time_series <- function(df, x = "year", y = NA, color_bar = NA, +barplot_time_series <- function(df, x = "session_year", y = NA, color_bar = NA, fill = "column_for_colour", lab_title = "lab_for_colour", xlab = "Years", ylab = "y_title", position = "stack", - scale_fill = c('#009ef8', '#76ea02'), + values_fill = c(), breaks_fill = c(), modif_x_axis = FALSE, fix_ratio = FALSE, c_fix = 0.15){ + # Graphique initial en barplot if (is.na(y)) { + ## Avec comptage automatique pour l'axe y selon l'axe x gg <- ggplot(df, aes(x = !!sym(x), fill = !!sym(fill))) stat = "count" }else{ + ## Avec renseignement de l'axe y en plus de l'axe x gg <- ggplot(df, aes(x = !!sym(x), y = !!sym(y), fill = !!sym(fill))) stat = "identity" } + # Création du geom_bar avec titres sur les axes x, y et la légende gg <- gg + geom_bar(stat = stat, position = position, color = color_bar)+ - scale_fill_manual(values=scale_fill)+ ylab(ylab) + xlab(xlab) + labs(fill = lab_title) + theme_cowplot() + # Modifications esthétiques du graphique + + ## Modification des couleurs et de l'ordre d'attribution des couleurs + if (length(values_fill) > 0) { + if (length(breaks_fill) > 0) { + gg <- gg + scale_fill_manual(values = values_fill, breaks = breaks_fill) + }else{ + gg <- gg + scale_fill_manual(values = values_fill) + } + } + ## Ajustement du texte de l'axe x if (modif_x_axis) { gg <- gg + theme(axis.text.x = element_text(angle = 45, size = 10, hjust = 1)) } + ## Réajustement du ratio hauteur / longueur du graphique if (fix_ratio) { gg <- gg + coord_fixed(ratio = c_fix) -- GitLab From e0643c852c49b7656c741130dc4359e9a0d0c3c2 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Tue, 15 Apr 2025 17:48:05 +0200 Subject: [PATCH 08/12] refactor: commentaries -> french to english --- fonctions/fct_check_columns.R | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/fonctions/fct_check_columns.R b/fonctions/fct_check_columns.R index 24e27ed..5ba502b 100644 --- a/fonctions/fct_check_columns.R +++ b/fonctions/fct_check_columns.R @@ -1,19 +1,18 @@ #' Title #' #' @param df +#' @param vec_colnames #' #' @return #' @export #' #' @examples -column_check <- function(df, vec_colnames = c("observation_id", "session_date")){ +check_column <- function(df, vec_colnames = c("observation_id", "session_date")){ missing_columns = c() - #---- Vérification de la présence des colonnes obligatoires - # On vérifie pour chaque nom dans vec_colnames s'il n'apparaît pas dans les - # noms de colonne du dataframe en entrée. Cela renvoie un vecteur de TRUE / FALSE. - # On récupère les noms de vec_colnames qui n'apparaissent pas dans les colonnes - # du dataframe avec ce vecteur de TRUE / FALSE + #---- Check presence of mandatory columns + # We check for each name in vec_colnames whether or not it appears in the + # column names of the input dataframe. If not, it is stored in missing_columns for (i in length(vec_colnames)) { if (!(vec_colnames[i] %in% colnames(df)) ) { missing_columns = c(missing_columns, vec_colnames[i]) @@ -21,14 +20,12 @@ column_check <- function(df, vec_colnames = c("observation_id", "session_date")) } if (length(missing_columns)==0) { - msg = "Aucune colonne obligatoire manquante.\n" + cat(sprintf("No required columns missing.\n")) + return(TRUE) } else { - msg = paste0("Colonne(s) obligatoire(s) manquante(s) : ", - paste(missing_columns, collapse = " / "), - "\n") + cat(sprintf(paste0("Missing column(s): ", + paste(missing_columns, collapse = " / "), "\n"))) + return(FALSE) } - # On renvoie le message résultant de la vérification - return(cat(sprintf(msg))) - -} \ No newline at end of file +} -- GitLab From 11329f54d57509343066e562f9535cd12d120a90 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Tue, 15 Apr 2025 17:48:21 +0200 Subject: [PATCH 09/12] feat: add third step to create a graph --- sample_pipeline.R | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/sample_pipeline.R b/sample_pipeline.R index 09b2314..5283783 100644 --- a/sample_pipeline.R +++ b/sample_pipeline.R @@ -1,6 +1,10 @@ +source("fonctions/library.R") + +source("fonctions/fct_check_columns.R") source("fonctions/fct_ftp.R") source("fonctions/fct_participation.R") +source("fonctions/fct_time_series.R") # First step: Import data download_from_ftp(file_folder_server = "/Vigie-Nature/",file_to_download = "export_vne_vdt.csv", destination_folder = "data/") @@ -8,4 +12,40 @@ download_from_ftp(file_folder_server = "/Vigie-Nature/",file_to_download = "expo data_export <- data.table::fread("data/export_vne_vdt.csv") # Second step: get participation information -stats_globales(data_export) +stats_globales(data_export, count = "sessions", selectAnnee = "table") + +# Third step : graph +## - Verification of required columns +required_columns = c("session_id", "session_date") +## - the graph is displayed only if the columns are present +if (check_column(df = data_export, vec_colnames = required_columns)) { + ## - Data frame modification : + ## -> creation of session_year + ## -> creation of session_season + ## -> keeping only columns of interest + ## -> delete duplicated lines + data_graphic = data_export %>% + dplyr::mutate(session_year = strftime(session_date, "%Y"), + session_month = strftime(session_date, "%m"), + session_season = dplyr::case_when( + session_month %in% c("01", "02", "03") ~ "Winter", + session_month %in% c("04", "05", "06") ~ "Spring", + session_month %in% c("07", "08", "09") ~ "Summer", + session_month %in% c("10", "11", "12") ~ "Autumn")) %>% + select(session_id, session_year, session_season) %>% + unique() + + ## - Display graph + barplot_time_series(df = data_graphic, x = "session_year", fill = "session_season", + lab_title = "Saison", ylab = "Nombre de sessions", + xlab = "Années", modif_x_axis = T, + values_fill = c("#DA7422", "#B8D4E3", "#9BC53D", "#FDE74C"), + breaks_fill = c("Autumn", "Winter", "Spring", "Summer") ) +} + + + + + + + -- GitLab From f53e72a2fe41bf626e76efcfc9fc6b1600550c46 Mon Sep 17 00:00:00 2001 From: "mael.pretet" Date: Tue, 15 Apr 2025 17:48:43 +0200 Subject: [PATCH 10/12] feat: add library file --- fonctions/library.R | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 fonctions/library.R diff --git a/fonctions/library.R b/fonctions/library.R new file mode 100644 index 0000000..874f0fc --- /dev/null +++ b/fonctions/library.R @@ -0,0 +1,6 @@ +suppressWarnings(suppressMessages(library(cowplot))) +suppressWarnings(suppressMessages(library(dplyr))) +suppressWarnings(suppressMessages(library(ggplot2))) +suppressWarnings(suppressMessages(library(RMySQL))) +suppressWarnings(suppressMessages(library(RPostgreSQL))) +suppressWarnings(suppressMessages(library(stringr))) \ No newline at end of file -- GitLab From cf0e13c07e867c8c57377f1ed2223cc52866022d Mon Sep 17 00:00:00 2001 From: sbenateau Date: Wed, 16 Apr 2025 14:42:46 +0200 Subject: [PATCH 11/12] merge sample_pipeline --- fonctions/fct_description.R | 48 +++++++++++++++++++++++++++++++++++++ sample_pipeline.R | 18 ++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 fonctions/fct_description.R diff --git a/fonctions/fct_description.R b/fonctions/fct_description.R new file mode 100644 index 0000000..2a9bce9 --- /dev/null +++ b/fonctions/fct_description.R @@ -0,0 +1,48 @@ +#' Species repartition in the data +#' +#' @param data a data.frame with the Vigie-Nature export format +#' @param index_type choose the index to calculate. It can take the followinf values : 'frequence', 'sum_presence', 'mean_taxon_count', 'sum_taxon_count' +#' +#' @returns +#' @import data.table +#' +#' @examples +#' +#' calculate_species_occurences(df) +#' calculate_species_occurences(df, index_type = "sum_taxon_count") +#' +calculate_species_occurences <- function(data, index_type = "frequence"){ + + freq_only <- typeof(data_export$taxon_count) == "character" + + # common errors + if ((!"taxon_count" %in% colnames(data) | freq_only) & index_type != "frequence") + stop("index_type can only be a frequence if taxon_count is absent from data, please use 'frequence' or 'sum_presence' as index_type or add a taxon_count variable") + if (!index_type %in% c("frequence", "sum_presence", "mean_taxon_count", "sum_taxon_count")) + stop("index_type can only take one of these values: 'frequence', 'sum_presence', 'mean_taxon_count', 'sum_taxon_count'") + + # calculate indices relative to the species + if(index_type %in% c("frequence", "sum_presence")){ + if ("taxon_count" %in% colnames(data)) { + res <- data[, .(indice = sum(taxon_count > 0)), by = taxon] + } else { + # remove potential duplicates for frequencies for exemple "mouche difficile à determiner" in spipoll + data <- unique(data[,.(session_id, taxon)]) + res <- data[, .(indice = .N), by = taxon] + } + } else { + res <- data[, .(indice = sum(taxon_count)), by = taxon] + } + + + if (index_type %in% c("frequence", "mean_taxon_count")){ + res <- res[, indice := indice/unlist(stats_globales(data))] + } + data.table::setnames(res, "indice", index_type) + return(res) +} + + +graph_species_occurences <- function(data){ + +} \ No newline at end of file diff --git a/sample_pipeline.R b/sample_pipeline.R index 5283783..9378f49 100644 --- a/sample_pipeline.R +++ b/sample_pipeline.R @@ -1,3 +1,8 @@ +# Example of an easy summary pipeline +# get a graph of the evolution participation +# get a graph of the species frequence + + source("fonctions/library.R") @@ -5,13 +10,17 @@ source("fonctions/fct_check_columns.R") source("fonctions/fct_ftp.R") source("fonctions/fct_participation.R") source("fonctions/fct_time_series.R") +source("fonctions/fct_description.R") -# First step: Import data +# First step: Import data ---- download_from_ftp(file_folder_server = "/Vigie-Nature/",file_to_download = "export_vne_vdt.csv", destination_folder = "data/") + + # load data data_export <- data.table::fread("data/export_vne_vdt.csv") -# Second step: get participation information + +# Second step: get participation information ---- stats_globales(data_export, count = "sessions", selectAnnee = "table") # Third step : graph @@ -46,6 +55,11 @@ if (check_column(df = data_export, vec_colnames = required_columns)) { +# Third step: get species repartitions ---- +species_repartition <- calculate_species_occurences(data_export) + +# remove the file from disk because it was a sample +file.remove("data/export_spipoll.csv") -- GitLab From 02387c155e613a50700910aebbd1c242d3793885 Mon Sep 17 00:00:00 2001 From: sbenateau Date: Wed, 16 Apr 2025 14:29:39 +0200 Subject: [PATCH 12/12] fix old syntax --- fonctions/fct_participation.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fonctions/fct_participation.R b/fonctions/fct_participation.R index 0e835ad..604048d 100644 --- a/fonctions/fct_participation.R +++ b/fonctions/fct_participation.R @@ -53,7 +53,7 @@ stats_globales <- function(df, count = "sessions", selectAnnee = "all", selectDe # if no table function with group_by if (!"table" %in% c(selectAnnee, selectDepartement)){ result <- df |> - dplyr::select_at(dplyr::all_of(variable_to_count)) |> + dplyr::select(dplyr::all_of(variable_to_count)) |> dplyr::distinct()|> tidyr::drop_na() |> dplyr::summarise(nombre = dplyr::n()) @@ -61,7 +61,7 @@ stats_globales <- function(df, count = "sessions", selectAnnee = "all", selectDe result } else { result <- df |> - dplyr::group_by_at(dplyr::all_of(group)) |> + dplyr::group_by(dplyr::across(dplyr::all_of(group))) |> dplyr::select(all_of(variable_to_count)) |> dplyr::distinct()|> tidyr::drop_na() |> -- GitLab