ブロックとZIPとnegbinと [統計]
ブロックのあるデータを負の二項分布やZIPで解析してみるとどうなるかテストしてみた。
set.seed(1234) n.b <- 10 # number of blocks sd.b <- 1.0 # random effect m <- 0.8 # real population size for each plot n <- 50 # sample size for each plot parameters <- function(n.b, sd.b, m, n) { y <- rep(NA, n.b * n) b <- rep(NA, n.b * n) mu <- m * exp(rnorm(n.b, 0, sd.b)) for (i in 1:n.b) { b[((i-1)*n+1):(i*n)] <- i y[((i-1)*n+1):(i*n)] <- rpois(n, mu[i]) } nz <- ifelse(y == 0, 0, 1) rbind(b, y, nz) } rep <- 100 params <- array(NA, dim = c(rep, 3, n.b * n)) for (i in 1:rep) { params[i,,] <- parameters(n.b, sd.b, m, n) } ## ZIP with WinBUGS library(R2WinBUGS) test.zip <- function(n, n.b, y, nz) { winepath <- "/Applications/Darwine/Wine.bundle/Contents/bin/winepath" wine <- "/Applications/Darwine/Wine.bundle/Contents/bin/wine" init1 <- list(alpha = -10, beta = -10) init2 <- list(alpha = 0, beta = 0) init3 <- list(alpha = 10, beta = 10) samples <- bugs(data = list(n = n * n.b, y = y, nz = nz), inits = list(init1, init2, init3), parameters = c("alpha", "beta"), model = "zip.bug", n.chains = 3, n.iter = 5000, useWINE = TRUE, WINEPATH = winepath, WINE = wine, bugs.directory = "c:/Program Files/WinBUGS14/", working.directory = getwd(), debug = FALSE) samples.list <- mcmc.list(lapply(1:(samples$n.chain), function(chain) as.mcmc(samples$sims.array[,chain,]))) summary(samples.list)$statistics[,1] } r.zip <- matrix(NA, rep, 3) for (i in 1:rep) { y <- params[i,2,] nz <- params[i,3,] r.zip[i,] <- test.zip(n, n.b, y, nz) } print(mean(exp(r.zip[,1])/(1+exp(r.zip[,1])))) print(mean(exp(r.zip[,2]))) print(mean(exp(r.zip[,2])*exp(r.zip[,1])/(1+exp(r.zip[,1])))) ## glm.nb library(MASS) test.glm.nb <- function(y) { s <- glm.nb(y ~ 1) coef(s) } r.glm.nb <- rep(NA, rep) for (i in 1:rep) { y <- params[i,2,] r.glm.nb[i] <- test.glm.nb(y) } print(mean(exp(r.glm.nb))) ## glmmML library(glmmML) test.glmm <- function(y, b) { s <- glmmML(y ~ 1, family = poisson, cluster = as.factor(b)) if(s$converged) coef(s) else NA } r.glmm <- rep(NA, rep) for (i in 1:rep) { b <- params[i,1,] y <- params[i,2,] r.glmm[i] <- test.glmm(y, b) } print(mean(exp(r.glmm))) ## save data save(r.zip, r.glm.nb, r.glmm, file = "zip_test2.RData")
データは相当の過分散になる。
> mean(c(params[,2,])) [1] 1.33546 > var(c(params[,2,])) [1] 4.784462
結果
ZIP> print(mean(exp(r.zip[,1])/(1+exp(r.zip[,1])))) [1] 0.7135664 > print(mean(exp(r.zip[,2]))) [1] 1.963107 > print(mean(exp(r.zip[,2])*exp(r.zip[,1])/(1+exp(r.zip[,1])))) [1] 1.339746負の二項分布
> print(mean(exp(r.glm.nb))) [1] 1.33546GLMM
> print(mean(exp(r.glmm))) [1] 0.836766
さて、どう解釈すべきか。
# しかし、WinBUGSをループさせるとウインドウが開いたり閉じたりでうっとおしいことこのうえない。JAGSでやりたかったのだが、このモデルはJAGSでは動かないのであった。
コメント 0