Data Retrieval
This vignette describes how the user can retrieve data from the csv files using tcplLite or from the ToxCast databases using tcpl. Listed below are case examples that extract summary information about the chemical, Bisphenol A, and the assay, NVS_NR_hTRa. The multiple concentration table for level 5 can be downloaded from the publicly available ToxCast data https://www.epa.gov/chemical-research/exploring-toxcast-data-downloadable-data.
A. Using TcplLite
In this example, we will identify the positive responses for the chemical Bisphenol A.
## Load mc tables from levels of interest
<- tcplLoadData(lvl = 4, type = "mc")
mc4 <- tcplLoadData(lvl = 5, type = "mc")
mc5 <- tcplLoadData(lvl = 6, type = "mc") mc6
## Find all sample IDs (spids) associated with Bisphenol A
<- 'Bisphenol A'
chnm <- fread(system.file("/csv/chemical.csv",
ch package = "tcpl"),
sep = ",",
header = TRUE)
<- tcplLoadChem('chnm', chnm)
chem <- mc4[mc4$spid %in% chem$spid] dat4spid
Now that we isolated all the sample IDs (spids) for the chemical of interest, we can start mapping data from the multiple-concentration tables: level 4(mc4), level 5(mc5), and level 6(mc6) from the local csv directory. Level 4 includes the spids, and level 5 specifies whether the chemical is active or inactive. Level 6 indicates flags reflecting the quality of the data or the analysis.
Next, we match the the m5ids that correspond to the spids of interest. From those, we select the positive responses, with hit call = 1.
<- mc5[mc5$m4id %in% dat4spid$m4id]
dat5spid <- dat5spid[hitc == 1] dat5hit
To identify the flags for the Bisphenol A samples, we match the flag columns in the mc6 table by the corresponding m5ids.
<- mc6[ , .( flag = paste(flag, collapse=";")),
mc6_flags = m5id]
by <- merge(x = mc6_flags,
dat5dat6 y = dat5hit,
by = "m5id", all.y = TRUE)
B. Using Tcpl
We illustrate here the necessary steps for extracting information about Bisphenol A using tcpl rather than tcplLite.
First, we change the driver in tcpl settings to MySQL, and the database to invitrodb :
tcplConf(
user = 'XXXXXXX',
pass = 'XXXXXXX',
host = 'XXXXXXX',
db = 'invitrodb',
drvr = 'MySQL')
Next, we define the chemical of interest and load the related spids:
<- 'Bisphenol A'
chnm <- tcplLoadChem('chnm', chnm) chem
Then, we load the data from different levels to summarize positive responses for this chemical.
<- tcplPrepOtpt(tcplLoadData
dat5 lvl = 5, fld = 'spid',
(val = chem$spid, type = 'mc'))
## For positives only, hit call (hitc) should equal 1
<- dat5[hitc == 1]
dat5 <- tcplPrepOtpt(tcplLoadData
dat6 lvl = 6, fld = 'spid', val = chem$spid,
(type = 'mc'))
<- dat6[ , .( mc6_mthd_id =
mc6_mthds paste(mc6_mthd_id, collapse = ",")),
= m4id]
by <- dat6[ , .( flag =
mc6_flags paste(flag, collapse = ";")),
= m4id] by
Then, we can generate all level 6 plots for positive responses for this chemical:
<- dat5[ , m4id]
m4ids graphics.off()
pdf(file = file.path(getwd(),
paste("mc6",
paste(chnm,collapse = "."),
format(Sys.Date(),
"%y%m%d.pdf"),
sep = "_")),
height = 6,
width = 10,
pointsize = 10)
tcplPlotM4ID(m4ids, lvl = 6)
graphics.off()
In the following example, we will obtain summary information about the example assay NVS_NR_hTRa_Antagonist using tcpl:
## List the assay source IDs
tcplLoadAsid()
## Find the assay source (NVS)
<- tcplLoadAeid(fld='asid', val = 5)
nvs.assays ## Find the assay name (hTRa)
<- nvs.assays[grep("hTRa", aenm)]$aeid
aeids ## Load the mc5 to determine hit call and summary information
<- tcplPrepOtpt(tcplLoadData(lvl = 5, type = 'mc',
dat5 fld = 'aeid', val = aeids))
<- dat5[hitc == 1]
dat5 ## Make the level 6 plots for the positive responses
<- 'NVS_NR_hTRa_Antagonist'
assay <- dat5[ , m4id]
m4ids graphics.off()
pdf(file = file.path(getwd(),
paste("mc6",
paste(assay,collapse = "."),
format(Sys.Date(),
"%y%m%d.pdf"),
sep = "_")),
height = 6,
width = 10,
pointsize = 10)
tcplPlotM4ID(m4ids, lvl = 6)
graphics.off()
Moreover, we can extract the mc3 data used for plots:
<- unique(dat5[,spid])
spids ## logc = log10concentration, starting with micromolar
## units (x-axis), resp = normalized response value (y-axis)
<- tcplPrepOtpt(tcplLoadData
mc3 lvl = 3, type = 'mc', fld = 'spid',
(val = spids))
<- mc3[aeid %in% aeids] mc3
Then, we can visualize the normalized mc3 data without tcpl curve-fitting:
library(ggplot2)
graphics.off()
pdf(file = file.path(getwd(),
paste("mc3",
paste(assay,collapse = "."),
format(Sys.Date(),
"%y%m%d.pdf"),
sep = "_")),
height = 6,
width = 10,
pointsize = 10)
by(mc3, mc3$spid, function (x){
ggplot(x, aes(x = logc, y = resp), tab) +
geom_point(aes(group=spid, color=spid)) +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_text(size=rel(0.5)),
axis.ticks.y = element_blank(),
axis.text.y = element_blank()) +
xlab("logc") + ylab("resp")
}
)graphics.off()
The tcplLoadData function can be used to load data from level 7. The added level allows for estimating the uncertainty in the fitted parameters, such as AC50.