To enhance reproducibility of my multi-state models, I'm running E-SURGE through R using snippets of code from the the R2ESURGE workshop notes found here: R2ESURGE.pdf and Answers2019.docx.
Here is a preview of my code to run one of my models (in this case a transience model that considers individual heterogeneity in detectability class, for example, see Schmaltz et al. 2015):
- Code: Select all
library(R.matlab)
library(here)
library(tidyverse)
# open the E-SURGE GUI, and select "Java R server (R2ESURGE)" from the Start menu,
# then back in R run the following line to establish a connection with R
matlab2 <- Matlab(port=9999); open(matlab2)
# import capture history and specify the data structure (in my case,
# the data type (2 = inp MARK format), number of groups (8), number of
# age classes (2), threshold to defined cluster (50; not sure what this does),
# number of states (5), and number of events (2)
R.matlab::evaluate(matlab2, paste0("[his,eff,autre]=esurge_data_from_file('", paste0(here(), "/data/"), "','", "HC_age_sex_morph.inp", "',", 2,",", 8,",", 2,",", 50,",", 5,",", 2,")"))
# define the GEPAT structure (each matrix is defined row by row, with
# each row being seperated by a semi colon)
R.matlab::evaluate(matlab2,
"PsiI = {}; PsiI{1} = {'i','-','*','-'};
PsiI{1}")
R.matlab::evaluate(matlab2,
"PsiT = {}; PsiT{1} = {'*','-','-','-','t';
'-','*','-','-','-';
'-','-','*','-','t';
'-','-','-','*','-';
'-','-','-','-','*'};
PsiT{2} = {'s','-','-','-','*';
'-','s','-','-','*';
'-','-','s','-','*';
'-','-','-','s','*';
'-','-','-','-','*'};
PsiT{3} = {'p','*','-','-','-';
'p','*','-','-','-';
'-','-','p','*','-';
'-','-','p','*','-';
'-','-','-','-','*'};
PsiT{1:3}")
R.matlab::evaluate(matlab2,
"PsiE = {}; PsiE{1} = {'-','*';
'*','-';
'-','*';
'*','-';
'*','-'};
PsiE{1}")
# label the parameters
R.matlab::evaluate(matlab2,
"labelI = {'Initial State', 'IS'};
labelT = {'Transition', 't', 's', 'p'};
labelE = {'Event', 'E'};")
# call GEPAT
R.matlab::evaluate(matlab2,
"[pat, modelchapeau, model, mpar, spar] = esurge_gepat(PsiI, PsiT, PsiE, labelI, labelT, labelE, autre)")
# no unequal time interval (uti=0)
R.matlab::evaluate(matlab2,
"uti = 0; coeftime = []; nuti = [];")
R.matlab::evaluate(matlab2,
"modeldef = { pat, modelchapeau, model, uti, coeftime, nuti, mpar, spar };")
# definition of the sentence
R.matlab::evaluate(matlab2,
paste0(
"strI = {'", "t", "'}; ",
"strT = {'", "f(1 3).to(5).[g(5 7).a(1)]+others", "', ",
"'", "[f(1 2,3 4).t]+others", "', ",
"'", "f(1 2, 3 4).to(1 3).t+others", "'};",
"strE = {'firste+nexte'} ;",
"NumSC = 0; strSC = {}; list_cluster = {}, filenamex = 'defaultfile'"))
# call GEMACO
R.matlab::evaluate(matlab2,
"[modeldef, sep] = esurge_gemaco(modeldef, strI, strT, strE, NumSC, strSC, list_cluster, filenamex)")
# fix any parameters in the IVFV (in this case none)
R.matlab::evaluate(matlab2,
"indR = []; indB = []; valR = []; valB = [];")
# call IVFV
R.matlab::evaluate(matlab2, "[modeldef, ivfvdef] = esurge_ivfv(modeldef, sep, indR, indB, valR, valB, initvalues)")
# specify chat (calcuated via R2UCARE)
chat = 1.646538
# condition on the first occasion and set the chat
R.matlab::evaluate(matlab2,
paste0(" nt = 1; mecond = 0; chat = ", chat, ";"))
# fit the model, while also estimating 95% confidence intervals
R.matlab::evaluate(matlab2,
"[dev, para, modeldef, sigma, sigmaplus, sigmamoins, outrun] = esurge_run(his, eff, autre, modeldef, nt, mecond, 'FIT CI RANK', chat);")
# extract the betas and real estimates
R.matlab::evaluate(matlab2, "[strbiol, strmath] = esurge_display_results(modeldef, ivfvdef, outrun, sigma, sigmamoins, sigmaplus, chat)")
output <- R.matlab::getVariable(matlab2, c("strbiol", "strmath"))
# wrangle the betas
strmath = unlist(output$strmath)
strmath <-
as.data.frame(unlist(output$strmath)) %>%
rename(v = `unlist(output$strmath)`)
strmath <-
str_split_fixed(string = strmath$v, pattern = "#", n = 3)
strmath_1 <-
str_split_fixed(string = strmath[,1], pattern = " ", n = 10)
strmath_2 <-
as.data.frame(strmath[,2]) %>%
rename(v = `strmath[, 2]`) %>%
mutate(v = as.numeric(v))
strmath_3 <-
str_split_fixed(string = strmath[,3], pattern = " ", n = 10)
strmath_1[strmath_1 == ""] <- NA
strmath_3[strmath_3 == ""] <- NA
strmath_1 <- strmath_1[ , colSums(is.na(strmath_1))==0] %>% as.data.frame()
strmath_3 <- strmath_3[ , colSums(is.na(strmath_3))==0]
strmath_3 <- strmath_3[ , colSums(strmath_3 == "|")==0] %>% as.data.frame()
strmath <-
bind_cols(strmath_1, strmath_2, strmath_3)
strmath <-
strmath %>%
rename(Parameter = 1,
Index = 3,
Value = 4,
LCI = 5,
UCI = 6,
SE = 7) %>%
select(-2) %>%
mutate(Value = as.numeric(Value),
LCI = as.numeric(LCI),
UCI = as.numeric(UCI),
SE = as.numeric(SE))
# wrangle the reals
strbiol = unlist(output$strbiol)
strbiol <-
as.data.frame(unlist(output$strbiol)) %>%
rename(v = `unlist(output$strbiol)`)
strbiol <-
str_split_fixed(string = strbiol$v, pattern = "#", n = 3)
strbiol <-
str_split_fixed(string = strbiol[,3], pattern = " ", n = 20)
strbiol[strbiol == ""] <- NA
strbiol <- strbiol[ , colSums(is.na(strbiol))==0] %>% as.data.frame()
strbiol[] <- lapply(strbiol, gsub, pattern='\\(', replacement='')
strbiol[] <- lapply(strbiol, gsub, pattern='\\)', replacement='')
strbiol[] <- lapply(strbiol, gsub, pattern=',', replacement='')
strbiol[] <- lapply(strbiol, gsub, pattern=' ', replacement='')
strbiol <-
strbiol %>%
rename(Parameters = 1,
From = 2,
To = 3,
Time = 4,
Age = 5,
Group = 6,
Step = 7,
Estimates = 9,
LCI = 10,
UCI = 11,
SE = 12) %>%
select(-8) %>%
mutate(Estimates = as.numeric(Estimates),
LCI = as.numeric(LCI),
UCI = as.numeric(UCI),
SE = as.numeric(SE))
# extract AIC results
res <- getVariable(matlab2, c("dev", "outrun"))
# store model name
AIC_table$model_name[i] = model_list$model_name[i]
# store npar
AIC_table$npar[i] = unlist(res$outrun[[4]])
# store QAIC
AIC_table$QAIC[i] = unlist(res$outrun[[3]])
# store deviance
AIC_table$deviance[i] = res$dev
# store model output
model_results_list[[i]] <-
list(reals = strbiol,
beta = strmath,
name = model_list$model_name[i],
npar = unlist(res$outrun[[4]]),
QAICc = unlist(res$outrun[[3]]),
deviance = res$dev)
The above code nicely reproduces the results when I run the same model via the E-SURGE GUI using the default settings. However, my models are having convergence issues, and so I need to change the default settings (i.e., the settings found in the "Advanced Numerical Options" box of the E-SURGE GUI). Specifically, I'd like to add the appropriate code to the esurge_run command in R2ESURGE do the following things:
1) change the Non-linear solver to "EM(20)+Quasi-Newton"
2) specify that "Multiple Random" Initial Values are used (in my case I'd like 3 sets)
3) specify that the model should "Continue(after n cycles)" to deal with Convergence (in my case I'd like 3 cycles considered)
Specifying these options is not covered in the aforementioned R2ESURGE workshop notes. The notes do show one how to "Compute C-I(Hessian)", which is a setting found in the "Advanced Numerical Options" box of the E-SURGE GUI by adding
- Code: Select all
'FIT CI RANK'
into the evalutate command that runs the pre-defined model via the esurge_run function in R2ESURGE, for example:
- Code: Select all
R.matlab::evaluate(matlab2,"[dev, para, modeldef, sigma, sigmaplus, sigmamoins, outrun] = esurge_run(his, eff, autre, modeldef, nt, mecond, 'FIT CI RANK', chat);")
Can anyone assist me on what to add to the esurge_run command in R2ESURGE to specify these non-default advanced settings in R2ESURGE?
Thanks in advance!
Cheers,
Luke