Skip to content

Commit

Permalink
Merge branch 'develop' into feature/refactor_main
Browse files Browse the repository at this point in the history
  • Loading branch information
JasonAHendry authored Jan 22, 2025
2 parents ac33f8c + 2dc78e5 commit 97b029d
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 136 deletions.
21 changes: 14 additions & 7 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@ int main(int argc, char* argv[])
int n_pi_bins = 1000; // No. of WSAF bins in betabinomial lookup

// MCMC parameters
double w_proposal_sd = 0.1; // Titres sampled from ~N(0, w_propsal_sd)
int n_temps = 5; // Number of temperature levels for PT-MCMC
int n_burn_iters = 100; // Number of iterations in burn-in phase
int n_sample_iters = 900; // Number of iterations in sampling phase
int n_temps = 5; // Number of temperature levels for PT-MCMC
double target_acceptance = 0.44; // Target acceptance rate per MCMC rung
int swap_freq = 1; // Number of iterations between proposing Metropolis-coupling swaps

// OPTIONS
// Filter
Expand Down Expand Up @@ -90,12 +93,16 @@ int main(int argc, char* argv[])
cmd_infer->add_option("-b, --n_wsaf_bins", n_pi_bins, "Number of WSAF bins in Betabin lookup table.")
->group("Model Hyperparameters")
->check(CLI::Range(100, 10'000));
cmd_infer->add_option("-w, --w_proposal", w_proposal_sd, "Controls variance in proportion proposals.")
->group("MCMC Parameters")
->check(CLI::PositiveNumber);

cmd_infer->add_option("-t, --temps", n_temps, "Number of temperature levels in PT-MCMC.")
->group("MCMC Parameters")
->check(CLI::Range(5, 100));
cmd_infer->add_option("-B, --burnin", n_burn_iters, "Number of burn-in iterations.")
->group("MCMC Parameters")
->check(CLI::PositiveNumber);
cmd_infer->add_option("-S, --sampling", n_sample_iters, "Number of sampling iterations.")
->group("MCMC Parameters")
->check(CLI::PositiveNumber);

// Parse
CLI11_PARSE(app, argc, argv);
Expand Down Expand Up @@ -129,13 +136,13 @@ int main(int argc, char* argv[])
std::string K_output_dir = output_dir + "/K" + std::to_string(k);

// Creation
Parameters params(k, e_0, e_1, v, rho, G, w_proposal_sd, n_pi_bins);
Parameters params(k, e_0, e_1, v, rho, G, n_pi_bins, target_acceptance, swap_freq);
ProposalEngine proposal_engine(params);
Model model(params, data, betabin_lookup);

// Run MCMC
std::cout << " Runnning MCMC..." << std::endl;
MCMC mcmc(params, model, proposal_engine, n_temps);
MCMC mcmc(params, model, proposal_engine, n_burn_iters, n_sample_iters, n_temps);
mcmc.run();

// Write MCMC outputs
Expand Down
236 changes: 125 additions & 111 deletions src/mcmcs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,128 +24,38 @@ MCMC::MCMC(
const Parameters& params,
const Model& model,
ProposalEngine& proposal_engine,
int n_burn_iters,
int n_sample_iters,
int n_temps)
: params(params),
model(model),
proposal_engine(proposal_engine),
ix(0),
n_burn_iters(100),
n_sample_iters(900),
n_burn_iters(n_burn_iters),
n_sample_iters(n_sample_iters),
n_total_iters(n_burn_iters + n_sample_iters),
acceptance_rate_cumul(0.0),
acceptance_trace(n_total_iters),
loglike_trace(n_total_iters),
logprior_trace(n_total_iters),
particle_trace(n_total_iters, params.K), // TODO: Here is where space gets allocated. Double check.
n_temps(n_temps),
swap_freq(10), // For now just set as constant
swap_freq(params.swap_freq),
particles(n_temps), // Not exactly sure how this looks
temps(create_temp_levels(particles)),
loglikelihoods(MatrixXd::Constant(n_total_iters, n_temps, -9999)),
n_swap_attempts(0.0),
n_swaps(ArrayXd::Constant(n_temps - 1, 0.0)),
n_swap_attempts(0),
swap_rate_cumul(ArrayXd::Constant(n_temps - 1, 0.0)),
swap_rates(MatrixXd::Constant(n_total_iters, n_temps - 1, 0.0))
{};


void MCMC::write_output(
const string& output_dir,
const ParticleWriter& particle_writer) const
{

// Create the directory if it doesn't exist
fs::create_directories(output_dir); // TODO: not sure this is cross-platform

// Prepare file paths
std::string mcmc_csv = output_dir + "/mcmc.trace.csv";
std::string particles_csv = output_dir + "/mcmc.parameters.csv";

// Write MCMC diagnostics
std::ofstream csv_file(mcmc_csv);
if (!csv_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
csv_file << "iter,phase,loglike,logprior,acceptance_rate\n";
for (int i = 0; i < ix; ++i) {
csv_file << i << ",";
csv_file << (i < n_burn_iters ? "burn" : "sample") << ",";
csv_file << loglike_trace[i] << ",";
csv_file << logprior_trace[i] << ",";
csv_file << acceptance_trace[i] << "\n";
}
csv_file.close();

// Write MCMC particles
particle_writer.write_particle_trace(
particles_csv,
particle_trace
);

// Prepare file paths
std::string llk_csv = output_dir + "/mcmc.likelihood.csv";
std::string beta_csv = output_dir + "/mcmc.betas.csv";
std::string swap_csv = output_dir + "/mcmc.swap_rates.csv";

// Write loglikelihood matrix
const static Eigen::IOFormat CSVFormat(6, Eigen::DontAlignCols, ",", "\n");
std::ofstream llk_file(llk_csv);
if (!llk_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
for (int j = 0; j < n_temps; ++j) {
llk_file << "level" << j << (j < n_temps - 1 ? "," : "\n");
}
llk_file << loglikelihoods.format(CSVFormat);
llk_file.close();

// Write beta values
std::ofstream beta_file(beta_csv);
if (!beta_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
beta_file << "level,beta\n";
for (int j = 0; j < n_temps; ++j) {
beta_file << j << "," << temps[j].beta << "\n";
}
beta_file.close();

// Quick write of swap rates; should add to mcmc.trace.csv
std::ofstream swap_file(swap_csv);
if (!swap_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
for (int j = 0; j < n_temps - 1; ++j) {
swap_file << "swap_" << j << "-" << j+1 << (j < n_temps - 2 ? "," : "\n");
}
swap_file << swap_rates.format(CSVFormat);
swap_file.close();

}


Particle MCMC::get_map_particle() const
{
// Find iteration that achieved the highest joint probability
int i_max = 0;
double logjoint_max = loglike_trace[0] + logprior_trace[0];
for (int i = 0; i < loglike_trace.size(); ++i) {
double logjoint = loglike_trace[i] + logprior_trace[i];
if (logjoint > logjoint_max) {
i_max = i;
logjoint_max = logjoint;
}
}
Particle map_particle = particle_trace[i_max];
std::sort(map_particle.ws.begin(), map_particle.ws.end());
return map_particle;
}


MCMC::TemperatureLevel::TemperatureLevel()
: beta(0.0),
particle_ptr(NULL),
loglike(-9999),
logprior(-9999)
logprior(-9999),
w_prop_sd(1.0)
{}


Expand All @@ -158,7 +68,8 @@ std::vector<MCMC::TemperatureLevel> MCMC::create_temp_levels(
int n_temps = particles.size();
std::vector<MCMC::TemperatureLevel> temp_levels(n_temps);

// Populate with a sequence from 0 to 1, raised to the power beta_skew
// Assign particle pointers and set beta values for each rung
// beta values follow a linear sequence from 0 to 1, raised to the power beta_skew
for (int j = 0; j < n_temps; ++j) {
temp_levels[j].particle_ptr = &particles[j];
temp_levels[j].beta = pow(j / double(n_temps - 1), beta_skew);
Expand All @@ -167,6 +78,11 @@ std::vector<MCMC::TemperatureLevel> MCMC::create_temp_levels(
return temp_levels;
}

void MCMC::run()
{
run_burn();
run_sampling();
}

void MCMC::run_burn()
{
Expand All @@ -175,7 +91,11 @@ void MCMC::run_burn()
particles[j] = proposal_engine.create_particle();
temps[j].loglike = model.calc_loglikelihood(particles[j]);
temps[j].logprior = model.calc_logprior(particles[j]);

// Store the loglikelihood for TI
loglikelihoods(ix, j) = temps[j].loglike;
}

// Store cold chain
particle_trace[ix] = *temps[n_temps - 1].particle_ptr;
loglike_trace[ix] = temps[n_temps - 1].loglike;
Expand All @@ -184,7 +104,7 @@ void MCMC::run_burn()
acceptance_trace[ix] = 1.0;
++ix;

run_iterations(n_burn_iters - 1);
run_iterations(n_burn_iters - 1, true);
}


Expand All @@ -194,8 +114,7 @@ void MCMC::run_sampling()
}



void MCMC::run_iterations(int n)
void MCMC::run_iterations(int n, bool adaptive_on)
{
int N = ix + n;
for (; ix < N; ++ix) {
Expand All @@ -205,7 +124,7 @@ void MCMC::run_iterations(int n)

// Propose a particle
TemperatureLevel& temp_level = temps[j];
Particle proposed_particle = proposal_engine.propose_particle(*temp_level.particle_ptr, params.w_proposal_sd);
Particle proposed_particle = proposal_engine.propose_particle(*temp_level.particle_ptr, temp_level.w_prop_sd);

// Compute proposed likelihood and prior
double proposed_loglike = model.calc_loglikelihood(proposed_particle);
Expand All @@ -220,6 +139,17 @@ void MCMC::run_iterations(int n)
*temp_level.particle_ptr = proposed_particle; // change the particle's value
temp_level.loglike = proposed_loglike;
temp_level.logprior = proposed_logprior;

// Robbins-Monroe positive update
if (adaptive_on) {
temp_level.w_prop_sd = std::exp( std::log(temp_level.w_prop_sd) + (1.0 - params.target_acceptance) / sqrt(ix + 1.0) );
temp_level.w_prop_sd = (temp_level.w_prop_sd > 1.0 ? 1.0 : temp_level.w_prop_sd);
}
} else {
// Robbins-Monroe negative update
if (adaptive_on) {
temp_level.w_prop_sd = std::exp( std::log(temp_level.w_prop_sd) - params.target_acceptance / sqrt(ix + 1.0) );
}
}

// Store the loglikelihood for TI
Expand All @@ -229,6 +159,7 @@ void MCMC::run_iterations(int n)
if (j == (n_temps - 1)) {
acceptance_rate_cumul += (A < 0.0 ? std::exp(A) : 1.0);
}

}

// Between-temperature swaps
Expand All @@ -244,16 +175,15 @@ void MCMC::run_iterations(int n)
std::swap(temps[j].particle_ptr, temps[j-1].particle_ptr);
std::swap(temps[j].loglike, temps[j-1].loglike);
std::swap(temps[j].logprior, temps[j-1].logprior);

// Increment if swapped
++n_swaps(j-1);
}

swap_rate_cumul(j-1) += (A < 0.0 ? std::exp(A) : 1.0);
}
++n_swap_attempts;
}

// Compute running swap rates
swap_rates.row(ix) = n_swaps / n_swap_attempts;
swap_rates.row(ix) = swap_rate_cumul / double(n_swap_attempts);

// Store cold chain for this iteration
particle_trace[ix] = *temps[n_temps - 1].particle_ptr;
Expand All @@ -264,11 +194,95 @@ void MCMC::run_iterations(int n)
}


void MCMC::run()
void MCMC::write_output(
const string& output_dir,
const ParticleWriter& particle_writer) const
{
run_burn();
run_sampling();

// Create the directory if it doesn't exist
fs::create_directories(output_dir); // TODO: not sure this is cross-platform

// Prepare file paths
std::string mcmc_csv = output_dir + "/mcmc.trace.csv";
std::string particles_csv = output_dir + "/mcmc.parameters.csv";

// Write MCMC diagnostics
std::ofstream csv_file(mcmc_csv);
if (!csv_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
csv_file << "iter,phase,loglike,logprior,acceptance_rate\n";
for (int i = 0; i < ix; ++i) {
csv_file << i << ",";
csv_file << (i < n_burn_iters ? "burn" : "sample") << ",";
csv_file << loglike_trace[i] << ",";
csv_file << logprior_trace[i] << ",";
csv_file << acceptance_trace[i] << "\n";
}
csv_file.close();

// Write MCMC particles
particle_writer.write_particle_trace(
particles_csv,
particle_trace
);

// Prepare file paths
std::string llk_csv = output_dir + "/mcmc.likelihood.csv";
std::string beta_csv = output_dir + "/mcmc.betas.csv";
std::string swap_csv = output_dir + "/mcmc.swap_rates.csv";

// Write loglikelihood matrix
const static Eigen::IOFormat CSVFormat(6, Eigen::DontAlignCols, ",", "\n");
std::ofstream llk_file(llk_csv);
if (!llk_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
for (int j = 0; j < n_temps; ++j) {
llk_file << "level" << j << (j < n_temps - 1 ? "," : "\n");
}
llk_file << loglikelihoods.format(CSVFormat);
llk_file.close();

// Write beta values
std::ofstream beta_file(beta_csv);
if (!beta_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
beta_file << "level,beta\n";
for (int j = 0; j < n_temps; ++j) {
beta_file << j << "," << temps[j].beta << "\n";
}
beta_file.close();

// Quick write of swap rates; should add to mcmc.trace.csv
std::ofstream swap_file(swap_csv);
if (!swap_file.is_open()) {
throw std::invalid_argument("Could not open output file.");
}
for (int j = 0; j < n_temps - 1; ++j) {
swap_file << "swap_" << j << "-" << j+1 << (j < n_temps - 2 ? "," : "\n");
}
swap_file << swap_rates.format(CSVFormat);
swap_file.close();

}


Particle MCMC::get_map_particle() const
{
// Find iteration that achieved the highest joint probability
int i_max = 0;
double logjoint_max = loglike_trace[0] + logprior_trace[0];
for (int i = 0; i < loglike_trace.size(); ++i) {
double logjoint = loglike_trace[i] + logprior_trace[i];
if (logjoint > logjoint_max) {
i_max = i;
logjoint_max = logjoint;
}
}
Particle map_particle = particle_trace[i_max];
std::sort(map_particle.ws.begin(), map_particle.ws.end());
return map_particle;
}

Loading

0 comments on commit 97b029d

Please sign in to comment.