Clinical Validation Study Example in the NIH *All of Us* Research Program Database • DrugRepurposingToolKit

Author: Patrick Wu
Date: 2021-04-05

Note: In this vignette, individual-level data are used for demonstration purposes only, and were not derived from real-world patients.

Set up packages

Install required packages

install.packages("janitor")
devtools::install_github("pwatrick/DrugRepurposingToolKit")

Import packages

suppressPackageStartupMessages({
  library(glue); 
  library(lubridate); 
  library(tidyverse);
  library(vroom);
  library(broom);
  library(DrugRepurposingToolKit)
}) 
set.seed(1)

Download data

Define variables for functions

drug_concept_id <- "1332418"
biomarker_concept_id <- "3004249"
indication_drug_concept_ids <-
        glue([1139 chars quoted with '"'])

Download covariates data

## Create SQL query
covariates_query <-
  DrugRepurposingToolKit::extract_clinical_data(drug_concept_id, 
                                                biomarker_concept_id, 
                                                table_name = "hypertension_amlodipine_drugs", 
                                                table_type = "covariates", 
                                                indication_drug_concept_ids)
## Execute query
hypertension_amlodipine_covariates <- 
  bigrquery::bq_table_download(
    bigrquery::bq_dataset_query(Sys.getenv("WORKSPACE_CDR"),
                                covariates_query,
                                billing=Sys.getenv("GOOGLE_PROJECT")))

## Save `hypertension_amlodipine_covariates` to CSV file
my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
covariates_file <- "hypertension_amlodipine_covariates.csv"
write_csv(hypertension_amlodipine_covariates, covariates_file)
system(paste0("gsutil cp ./", covariates_file, " ", my_bucket, "/data/"), intern=T)
system(paste0("gsutil ls ", my_bucket, glue("/data/hypertension_amlodipine_covariates.csv")), intern=T)

Example hypertension_amlodipine_covariates table

Download drugs data

## Create SQL query
drugs_query <-
  DrugRepurposingToolKit::extract_clinical_data(drug_concept_id, 
                                                biomarker_concept_id, 
                                                table_name = "hypertension_amlodipine_drugs", 
                                                table_type = "drugs", 
                                                indication_drug_concept_ids)
## Execute query
hypertension_amlodipine_drugs <- 
  bigrquery::bq_table_download(
    bigrquery::bq_dataset_query(Sys.getenv("WORKSPACE_CDR"),
                                drugs_query,
                                billing=Sys.getenv("GOOGLE_PROJECT")))

## Save `hypertension_amlodipine_covariates` to CSV file
my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
drugs_file <- "hypertension_amlodipine_drugs.csv"
write_csv(hypertension_amlodipine_drugs, drugs_file)
system(paste0("gsutil cp ./", drugs_file, " ", my_bucket, "/data/"), intern=T)
system(paste0("gsutil ls ", my_bucket, glue("/data/hypertension_amlodipine_drugs.csv")), intern=T)

Example hypertension_amlodipine_drugs table

Download biomarkers data

## Create SQL query
biomarkers_query <-
  DrugRepurposingToolKit::extract_clinical_data(drug_concept_id, 
                                                biomarker_concept_id, 
                                                table_name = "hypertension_amlodipine_drugs", 
                                                table_type = "biomarkers", 
                                                indication_drug_concept_ids)
## Execute query
hypertension_amlodipine_biomarkers <- 
  bigrquery::bq_table_download(
    bigrquery::bq_dataset_query(Sys.getenv("WORKSPACE_CDR"),
                                biomarkers_query,
                                billing=Sys.getenv("GOOGLE_PROJECT")))

## Save `hypertension_amlodipine_covariates` to CSV file
my_bucket <- Sys.getenv('WORKSPACE_BUCKET')
biomarkers_file <- "hypertension_amlodipine_biomarkers.csv"
write_csv(hypertension_amlodipine_biomarkers, biomarkers_file)
system(paste0("gsutil cp ./", biomarkers_file, " ", my_bucket, "/data/"), intern=T)
system(paste0("gsutil ls ", my_bucket, glue("/data/hypertension_amlodipine_biomarkers.csv")), intern=T)

Example hypertension_amlodipine_biomarkers table

Process data

Process data to create table to perform paired two-tailed t-test

#Import data from saved CSV files
r_c <- vroom::vroom("hypertension_amlodipine_covariates.csv",
                    .name = janitor::make_clean_names, 
                    col_types = cols())
r_d <- vroom::vroom("hypertension_amlodipine_drugs.csv",
                    .name = janitor::make_clean_names, 
                    col_types = cols())
r_b <- vroom::vroom("hypertension_amlodipine_biomarkers.csv",
                    .name = janitor::make_clean_names, 
                    col_types = cols())


#Prepare data for paired two-tailed t-test
drug <- "amlodipine"
phenotype <- "Hypertension"
biomarker <- "Systolic Blood Pressure"
indication_drugs <- DrugRepurposingToolKit::drugsHypertension
concept_id_exclusions <- c(2212451)

htn_amlodipine_processed_data <- 
  DrugRepurposingToolKit::clean_process_clinical_data(
    drug, phenotype, biomarker, indication_drugs, concept_id_exclusions, 
    r_c, r_d, r_b) %>% 
  mutate(drug = "amlodipine")

Example htn_amlodipine_processed_data table

Run hypothesis test

#Run paired two-tailed t-test
htest <- t.test(htn_amlodipine_processed_data$biomarker_treatment_value, 
                htn_amlodipine_processed_data$biomarker_baseline_value, 
                conf.level = 0.95, 
                paired = TRUE)
tidy_htest <- tidy(htest) %>% 
    mutate(drug = drug, 
           se = htest$stderr,
           biomarker = "Systolic Blood Pressure", 
           phenotype = "Hypertension", 
           source = "All of Us") %>% 
    select(source, drug, phenotype, biomarker, estimate, se, statistic, 
           p.value, parameter, conf.low, conf.high, method, alternative)

tidy_htest

Example tidy_htest table

Metadata

Workspace:

Name: Drug Repurposing Validation Study
Owner: patrickwu@researchallofus.org

Dataset:

All of Us Dataset v4
Data access level: Registered

Cloud compute profile:

CPUs: 4
RAM (GB): 15
Disk (GB): 100
Compute type: Standard VM

Time and cost to run notebook (without installing packages):

Time: < 1 hour
Cost: < $0.20 USD

Clinical Validation Study Example in the NIH All of Us Research Program Database

Set up packages

Download data

Process data

Run hypothesis test

Metadata