vignettes/all_of_us_example.Rmd
all_of_us_example.Rmd
Author: Patrick Wu
Date: 2021-04-05
Note: In this vignette, individual-level data are used for demonstration purposes only, and were not derived from real-world patients.
Install required packages
install.packages("janitor")
devtools::install_github("pwatrick/DrugRepurposingToolKit")
Import packages
Define variables for functions
drug_concept_id <- "1332418"
biomarker_concept_id <- "3004249"
indication_drug_concept_ids <-
glue([1139 chars quoted with '"'])
Download covariates data
## Create SQL query
covariates_query <-
DrugRepurposingToolKit::extract_clinical_data(drug_concept_id,
biomarker_concept_id,
table_name = "hypertension_amlodipine_drugs",
table_type = "covariates",
indication_drug_concept_ids)
## Execute query
hypertension_amlodipine_covariates <-
bigrquery::bq_table_download(
bigrquery::bq_dataset_query(Sys.getenv("WORKSPACE_CDR"),
covariates_query,
billing=Sys.getenv("GOOGLE_PROJECT")))
## Save `hypertension_amlodipine_covariates` to CSV file
my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
covariates_file <- "hypertension_amlodipine_covariates.csv"
write_csv(hypertension_amlodipine_covariates, covariates_file)
system(paste0("gsutil cp ./", covariates_file, " ", my_bucket, "/data/"), intern=T)
system(paste0("gsutil ls ", my_bucket, glue("/data/hypertension_amlodipine_covariates.csv")), intern=T)
Example hypertension_amlodipine_covariates
table
Download drugs data
## Create SQL query
drugs_query <-
DrugRepurposingToolKit::extract_clinical_data(drug_concept_id,
biomarker_concept_id,
table_name = "hypertension_amlodipine_drugs",
table_type = "drugs",
indication_drug_concept_ids)
## Execute query
hypertension_amlodipine_drugs <-
bigrquery::bq_table_download(
bigrquery::bq_dataset_query(Sys.getenv("WORKSPACE_CDR"),
drugs_query,
billing=Sys.getenv("GOOGLE_PROJECT")))
## Save `hypertension_amlodipine_covariates` to CSV file
my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
drugs_file <- "hypertension_amlodipine_drugs.csv"
write_csv(hypertension_amlodipine_drugs, drugs_file)
system(paste0("gsutil cp ./", drugs_file, " ", my_bucket, "/data/"), intern=T)
system(paste0("gsutil ls ", my_bucket, glue("/data/hypertension_amlodipine_drugs.csv")), intern=T)
Example hypertension_amlodipine_drugs
table
Download biomarkers data
## Create SQL query
biomarkers_query <-
DrugRepurposingToolKit::extract_clinical_data(drug_concept_id,
biomarker_concept_id,
table_name = "hypertension_amlodipine_drugs",
table_type = "biomarkers",
indication_drug_concept_ids)
## Execute query
hypertension_amlodipine_biomarkers <-
bigrquery::bq_table_download(
bigrquery::bq_dataset_query(Sys.getenv("WORKSPACE_CDR"),
biomarkers_query,
billing=Sys.getenv("GOOGLE_PROJECT")))
## Save `hypertension_amlodipine_covariates` to CSV file
my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
biomarkers_file <- "hypertension_amlodipine_biomarkers.csv"
write_csv(hypertension_amlodipine_biomarkers, biomarkers_file)
system(paste0("gsutil cp ./", biomarkers_file, " ", my_bucket, "/data/"), intern=T)
system(paste0("gsutil ls ", my_bucket, glue("/data/hypertension_amlodipine_biomarkers.csv")), intern=T)
Example hypertension_amlodipine_biomarkers
table
Process data to create table to perform paired two-tailed t-test
#Import data from saved CSV files
r_c <- vroom::vroom("hypertension_amlodipine_covariates.csv",
.name = janitor::make_clean_names,
col_types = cols())
r_d <- vroom::vroom("hypertension_amlodipine_drugs.csv",
.name = janitor::make_clean_names,
col_types = cols())
r_b <- vroom::vroom("hypertension_amlodipine_biomarkers.csv",
.name = janitor::make_clean_names,
col_types = cols())
#Prepare data for paired two-tailed t-test
drug <- "amlodipine"
phenotype <- "Hypertension"
biomarker <- "Systolic Blood Pressure"
indication_drugs <- DrugRepurposingToolKit::drugsHypertension
concept_id_exclusions <- c(2212451)
htn_amlodipine_processed_data <-
DrugRepurposingToolKit::clean_process_clinical_data(
drug, phenotype, biomarker, indication_drugs, concept_id_exclusions,
r_c, r_d, r_b) %>%
mutate(drug = "amlodipine")
Example htn_amlodipine_processed_data
table
#Run paired two-tailed t-test
htest <- t.test(htn_amlodipine_processed_data$biomarker_treatment_value,
htn_amlodipine_processed_data$biomarker_baseline_value,
conf.level = 0.95,
paired = TRUE)
tidy_htest <- tidy(htest) %>%
mutate(drug = drug,
se = htest$stderr,
biomarker = "Systolic Blood Pressure",
phenotype = "Hypertension",
source = "All of Us") %>%
select(source, drug, phenotype, biomarker, estimate, se, statistic,
p.value, parameter, conf.low, conf.high, method, alternative)
tidy_htest
Example tidy_htest
table
Workspace:
Dataset:
Cloud compute profile:
Time and cost to run notebook (without installing packages):