SSブログ

Stan: 実数に対応したポアソン分布と二項分布 [統計]

OpenBUGSのy ~ dpois(lambda)y ~ dbin(p, N)は、yが実数でもよくて、BPA 11章ではそれをつかっているので、Stanでも同等の分布を定義してみました。

real_Poisson.stan

functions {
  /**
   * Return log probability of Poisson distribution.
   * outcomes n may be real values; for compatibility with OpenBUGS.
   *
   * @param n      Outcomes
   * @theta lambda Mean
   *
   * @return Log probability
   */
  real poisson_log(vector n, real lambda) {
    real lp;

    lp <- 0.0;
    if (lambda < 0) {
      reject("lambda must not be negative; found lambda=", lambda);
    } else {
      for (i in 1:rows(n)) {
        if (n[i] < 0.0) {
          reject("n must not be negative; found n=", n[i]);
        }
        lp <- lp + n[i] * log(lambda) - lambda - lgamma(n[i] + 1);
      }
    }
    return lp;
  }
}

data {
  int N;
  vector<lower=0>[N] y;
}

parameters {
  real<lower=0,upper=10000> lambda;
}

model {
  lambda ~ uniform(0, 10000);
  y ~ poisson(lambda);
}

real_binomial.stan

functions {
  /**
   * Return log probability of binomial distribution.
   * Outcomes n may be real values; for compatibility with OpenBUGS.
   *
   * @param n     Outcomes
   * @param N     Size
   * @theta theta Probability
   *
   * @return Log probability
   */
  real binomial_log(vector n, real N, real theta) {
    real lp;

    lp <- 0.0;
    if (N < 0) {
      reject("N must be non-negative; found N=", N);
    } else if (theta < 0 || theta > 1) {
      reject("theta must be in [0,1]; found theta=", theta);
    } else {
      for (i in 1:rows(n)) {
        if (n[i] < 0 || n[i] > N) {
          reject("n must be in [0:N]; found n=", n[i]);
        }
        lp <- lp + binomial_coefficient_log(N, n[i]) +
              n[i] * log(theta) + (N - n[i]) * log(1.0 - theta);
      }
    }
    return lp;
  }
}

data {
  int N;
  int M;
  vector<lower=0>[N] y;
}

parameters {
  real<lower=0,upper=1> theta;
}

model {
  y ~ binomial(M, theta);
}

OpenBUGSと比較してみます。

library(R2OpenBUGS)
library(rstan)

## Poisson
set.seed(123)
N <- 100
lambda <- 5
y <- rpois(N, lambda) + 0.3

## OpenBUGS settings
WINE <- system("which wine", intern = TRUE)
WINEPATH <- system("which winepath", intern = TRUE)
OpenBUGS <- paste(Sys.getenv("HOME"),
                  ".wine/drive_c/Program Files",
                  "OpenBUGS/OpenBUGS323/OpenBUGS.exe", sep = "/")

## MCMC settings
nc <- 4
ni <- 2000
nb <- 1000
nt <- 1
inits <- lapply(1:nc, function(i) list(lambda = runif(1, 0, 10)))

## OpenBUGS
bugs_fit <- bugs(model.file = "real_poisson_bug.txt",
                 data = list(N = N, y = y),
                 inits = inits,
                 parameters.to.save = c("lambda"),
                 n.chains = nc, n.iter = ni, n.burnin = nb, n.thin = nt,
                 OpenBUGS.pgm = OpenBUGS,
                 useWINE = TRUE,
                 WINE = WINE,
                 WINEPATH = WINEPATH,
                 debug = FALSE)
print(bugs_fit, digits = 3)

## Stan
stan_fit <- stan("real_poisson.stan",
                 data = list(N = N, y = y),
                 init = inits,
                 chains = nc, iter = ni, warmup = nb, thin = nt,
                 seed = 1)
print(stan_fit, digits = 3)

## Binomial
set.seed(123)
N <- 100
M <- 20
theta <- 0.4
y <- rbinom(N, M, theta) + 0.3

## MCMC settings
nc <- 4
ni <- 2000
nb <- 1000
nt <- 1
inits <- lapply(1:nc, function(i) list(theta = runif(1, 0, 1)))

## OpenBUGS
bugs_fit <- bugs(model.file = "real_binomial_bug.txt",
                 data = list(N = N, M = M, y = y),
                 inits = inits,
                 parameters.to.save = c("theta"),
                 n.chains = nc, n.iter = ni, n.burnin = nb, n.thin = nt,
                 OpenBUGS = OpenBUGS,
                 useWINE = TRUE,
                 WINE = WINE,
                 WINEPATH = WINEPATH,
                 debug = FALSE)
print(bugs_fit, digits = 3)

## Stan
stan_fit <- stan("real_binomial.stan",
                 data = list(N = N, M = M, y = y),
                 init = inits,
                 chains = nc, iter = ni, warmup = nb, thin = nt,
                 seed = 1)
print(stan_fit, digits = 3)

なお、OpenBUGSのコードは以下のようにしています。

var
 N,
 y[N],
 lambda;

model {
  lambda ~ dunif(0, 10000);

  for (i in 1:N) {
    y[i] ~ dpois(lambda);
  }
}
var
 N,
 M,
 y[N],
 theta;

model {
  theta ~ dunif(0, 1);

  for (i in 1:N) {
    y[i] ~ dbin(theta, M);
  }
}

結果です。まずはポアソン分布。

> print(bugs_fit, digits = 3)
Inference for Bugs model at "real_poisson_bug.txt", 
Current: 4 chains, each with 2000 iterations (first 1000 discarded)
Cumulative: n.sims = 4000 iterations saved
            mean    sd    2.5%     25%     50%     75%   97.5%  Rhat n.eff
lambda     5.321 0.235   4.882   5.158   5.315   5.477   5.791 1.001  3300
deviance 435.546 3.108 432.700 433.200 434.500 436.800 443.800 1.001  4000

For each parameter, n.eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor (at convergence, Rhat=1).

DIC info (using the rule, pD = Dbar-Dhat)
pD = 1.0 and DIC = 436.5
DIC is an estimate of expected predictive error (lower deviance is better).
> print(stan_fit, digits = 3)
Inference for Stan model: real_poisson.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

           mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff Rhat
lambda    5.319   0.006 0.222    4.903    5.164    5.310    5.468    5.769  1510    1
lp__   -215.527   0.016 0.650 -217.359 -215.679 -215.285 -215.113 -215.067  1706    1

Samples were drawn using NUTS(diag_e) at Sat Feb 27 08:20:22 2016.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

二項分布の結果です。

> print(bugs_fit, digits = 3)
Inference for Bugs model at "real_binomial_bug.txt", 
Current: 4 chains, each with 2000 iterations (first 1000 discarded)
Cumulative: n.sims = 4000 iterations saved
            mean    sd    2.5%     25%     50%     75%   97.5%  Rhat n.eff
theta      0.416 0.011   0.395   0.408   0.416   0.423   0.437 1.001  4000
deviance 441.265 1.343 440.300 440.400 440.800 441.600 445.200 1.001  4000

For each parameter, n.eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor (at convergence, Rhat=1).

DIC info (using the rule, pD = Dbar-Dhat)
pD = 1.0 and DIC = 442.2
DIC is an estimate of expected predictive error (lower deviance is better).
> print(stan_fit, digits = 3)
Inference for Stan model: real_binomial.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

          mean se_mean    sd     2.5%      25%      50%      75%    97.5% n_eff  Rhat
theta    0.416   0.000 0.011    0.394    0.408    0.416    0.423    0.437  1397 1.001
lp__  -222.048   0.016 0.694 -224.010 -222.215 -221.771 -221.608 -221.560  1940 1.000

Samples were drawn using NUTS(diag_e) at Sat Feb 27 08:21:20 2016.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).

ほぼ同様の結果がえられたようです。


タグ:STAn
nice!(2)  コメント(0)  トラックバック(0) 
共通テーマ:日記・雑感

nice! 2

コメント 0

コメントを書く

お名前:
URL:
コメント:
画像認証:
下の画像に表示されている文字を入力してください。

Facebook コメント

トラックバック 0