Reproducing drc::plot.drc with ggplot2
Asked Answered
H

1

4

I want to reproduce the following drc::plot.drc graphs with ggplot2.

enter image description here

df1 <-
      structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L, 
    13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
    11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L, 
    14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
    12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142", 
    "29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942", 
    "27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942", 
    "33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf), 
        Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L, 
        0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L, 
        31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L, 
        55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L, 
        15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L, 
        172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L, 
        0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L, 
        13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L, 
        57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L, 
        33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L, 
        0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L, 
        6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV", 
    "Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")

library(data.table)

dt1 <- data.table(df1)

library(drc)

dt1fm1 <- 
  drm(
        formula   = Germinated ~ Start + End
      , curveid   = TempV
  #   , pmodels   = 
  #   , weights   = 
      , data      = dt1
  #   , subset    = 
      , fct       = LL.2()
      , type      = "event"
      , bcVal     = NULL
      , bcAdd     = 0
  #   , start     =
      , na.action = na.fail
      , robust    = "mean"
      , logDose   = NULL
      , control   = drmc(
                            constr      = FALSE
                            , errorm      = TRUE
                            , maxIt       = 1500
                            , method      = "BFGS"
                            , noMessage   = FALSE
                            , relTol      = 1e-07
                            , rmNA        = FALSE
                            , useD        = FALSE
                            , trace       = FALSE
                            , otrace      = FALSE
                            , warnVal     = -1
                            , dscaleThres = 1e-15
                            , rscaleThres = 1e-15
                            )
      , lowerl    = NULL
      , upperl    = NULL
      , separate  = FALSE
      , pshifts   = NULL 
      )



## ----dt1fm1Plot1----
plot(
        x      = dt1fm1
    , xlab     = "Time (Hours)"
    , ylab     = "Proportion Germinated (\\%)"    
  # , ylab     = "Proportion Germinated (%)"    
    , add      = FALSE
    , level    = NULL
    , type     = "average" # c("average", "all", "bars", "none", "obs", "confidence")
    , broken   = FALSE
  # , bp
    , bcontrol = NULL
    , conName  = NULL
    , axes     = TRUE
    , gridsize = 100
    , log      = ""
  # , xtsty
    , xttrim   = TRUE
    , xt       = NULL
    , xtField    = NULL
    , xField     = "Time (Hours)"
    , xlim     = c(0, 200)
    , yt       = NULL
    , ytField    = NULL
    , yField     = "Proportion Germinated"
    , ylim     = c(0, 1.05)
    , lwd      = 1
    , cex      = 1.2
    , cex.axis = 1
    , col      = TRUE
  # , lty
  # , pch
    , legend     = TRUE
  # , legendText  
    , legendPos  = c(40, 1.1)
    , cex.legend = 0.6
    , normal     = FALSE
    , normRef    = 1
    , confidence.level = 0.95
    )


## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means  <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))

dt1fm1Plot2 <- 
       ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
        geom_point() +
        geom_line(aes(y = Pred)) +
        scale_shape_manual(values=seq(0, 13)) +
        labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
        theme_bw() +
        scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
        scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
      # scale_y_continuous(expand = c(0, 0), labels = percent) +
        expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
        theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
              axis.title.y = element_text(size = 12, angle = 90,  vjust = 0.25))
print(dt1fm1Plot2)

enter image description here

Question

There are few discrepancies in ggplot2 output. These discrepancies occur because the predict function gives output in different pattern than the given levels in the data.

Edited

Actually drm function changed the order of levels of TempV and this is clear from summary(dt1fm1) output and the graph of drc::plot.drc output.

Hasseman answered 10/7, 2016 at 7:27 Comment(3)
I've had a similar question before: #36780857. In brief, I was able to use the stat_smooth function to plot a drm model directly but for some reason this behaviour does not repeat on all machines I tested. The other option is to add predicted values to your data set and plot these.Sarre
biomiha: With the help of @dww, I figured out how to plot drc model with ggplot2 for one curve (See here). However, I am struggling when curveid = TempV is being used. Any thoughts, please.Hasseman
MYaseen208: What exactly is the issue with curveid? Is it just the order of the legend items? You can change that by transforming your TempV values as factors and defining the levels (#6919525)Sarre
D
3

As noted in the question, there is an issue related to drm shuffling the order of factor levels. Un-shuffling this mess proved more tricky than I expected.

In the end I approached this by calling the drm function once per factor level to build up a table of results one factor level at a time.

Doing it this long-winded way uncovered the fact that your 1st plot from plot.drc and the ggplot version are both incorrect.

Let's start by wrapping your function call to drm() inside another wrapper function, to facilitate calling it repeatedly for each trace:

drcmod <- function(dt1){
  drm(formula   = Germinated ~ Start + End
    , curveid   = TempV
    , data      = dt1
    , fct       = LL.2()
    , type      = "event"
    , bcVal     = NULL
    , bcAdd     = 0
    , na.action = na.fail
    , robust    = "mean"
    , logDose   = NULL
    , control   = drmc(
      constr      = FALSE
      , errorm      = TRUE
      , maxIt       = 1500
      , method      = "BFGS"
      , noMessage   = FALSE
      , relTol      = 1e-07
      , rmNA        = FALSE
      , useD        = FALSE
      , trace       = FALSE
      , otrace      = FALSE
      , warnVal     = -1
      , dscaleThres = 1e-15
      , rscaleThres = 1e-15
    )
    , lowerl    = NULL
    , upperl    = NULL
    , separate  = FALSE
    , pshifts   = NULL 
  )
}

Now we can use this wrapper to fit the drc model to each factor level in turn:

dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
  dt <- dt1[TempV==levels(TempV)[i]]
  dt[, TempV:=as.character(TempV)]
  dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
  dt[, cum_Germinated := cumsum(Germ_frac)]
  dt[, Pred := c(predict(object=drcmod(dt)), NA)] 
  dt2 <- rbind(dt2, dt)
}

and plot:

ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
  geom_point() +
  geom_line(aes(y = Pred)) +
  scale_shape_manual(values=seq(0, 13)) +
  labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
  theme_bw()

enter image description here

Edit

If we run the original code in the question using a subset of the data with fewer factor levels, for example using

dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)

all the plots (the 2 versions in OP, and the version in this answer) give the same result. The discrepancies only seem to arise when a large number of factor levels are used. The fact that both the ggplot and the plot.drc in OP give incorrect matching of traces to factor levels indicates that the problem is most likely to be in the drm() function, rather than in plot.drc.

Deflation answered 31/8, 2016 at 18:59 Comment(3)
Thanks @Deflation for your time and effort. What I understood from your answer that plot.drc bug, if that is true please report it. ThanksHasseman
@myaseen208 See my edit to this answer. After looking again, it appears that the problem seems to be in the drm function rather than in plot.drc. and it only seems to arise when a large number of factor levels are used. My recommendation is to run the drm model one trace at a time, as i do in my answer, until this can be fixed by the package maintainers. I suggest also that you raise this with the package maintainers, and point them towards this SO page. in the mean time, i think my work-around should work for you.Deflation
Thanks @Deflation for your efforts. I have written an email to the maintainer of drc package and looking forward to his notice and reply. ThanksHasseman

© 2022 - 2024 — McMap. All rights reserved.