Skip to content

Commit

Permalink
update simpleExample to be simpler and quicker to run
Browse files Browse the repository at this point in the history
  • Loading branch information
bcdaniels committed Jun 13, 2022
1 parent 383f135 commit 23b1a86
Show file tree
Hide file tree
Showing 7 changed files with 1,636 additions and 10,562 deletions.
11,794 changes: 1,420 additions & 10,374 deletions simpleExample.ipynb

Large diffs are not rendered by default.

163 changes: 94 additions & 69 deletions simpleExample.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,53 @@

#!/usr/bin/env python
# coding: utf-8

# *Note: This file is provided in two formats:
# Python (simpleExample.py) and a Jupyter iPython
# Python (simpleExample.py) and a Jupyter
# notebook (simpleExample.ipynb). The
# iPython notebook opens in a web browser and
# Jupyter notebook opens in a web browser and
# includes plots in an interactive format. To
# open the .ipynb file, run:*
#
# jupyter notebook simpleExample.ipynb
#
# *To run the .py file in iPython at the command line, run:*
#
# ipython --pylab
# ipython
# %run simpleExample.py
# show()
# plt.show()

# simpleExample.ipynb
# -------------------
#
# - Bryan Daniels
#
# - 1.29.2014
# - updated 1.18.2019
# - Last updated 2022/6/13
#
# - Uses a simple 1D harmonic oscillator example to demonstrate usage of SirIsaac.
# - Uses a simple exponential decay example to demonstrate usage of SirIsaac

# In[1]:

import scipy, pylab

import numpy as np
from matplotlib import pyplot as plt
from SirIsaac import fittingProblem


# Load example data
# -----------------

# In the example data file, we have four columns, each with 100 data points, listing:
# In the example data file, we have five columns, each with 100 data points, listing:
#
# * Initial condition *x_init*
# * Input condition *tau*
# * Measurement time *t*
# * Measurement value *x*
# * Measurement uncertainty (standard deviation)

# In[2]:

data = scipy.loadtxt('simpleExample_data.txt')

data = np.loadtxt('simpleExample_data.csv',delimiter=',')


# We now put this in a format compatible with SirIsaac. First we make a list of input values (in this case initial conditions):
Expand All @@ -53,18 +56,21 @@

# In[3]:

indepParamsList = [ [ expt[0] ] for expt in data ]

indepParamsList = [ [ expt[0],expt[1] ] for expt in data ]


# In[4]:


indepParamsList[:3]


# Next, we have a corresponding list of data taken at each of those input values, in the format below. In this case, we only have one variable *x*. (Note: In general, multiple timepoints could be also be measured at each input value; in all of our examples, we measure all variables at a single timepoint per input value.)

# In[5]:


# [ {'var1': { time0: ( value, uncertainty ) },
# 'var2': { time0: ( value, uncertainty ) },
# ... },
Expand All @@ -76,43 +82,49 @@

# In[6]:


sirIsaacData = []
for expt in data:
sirIsaacData.append( { 'x': { expt[1]: ( expt[2], expt[3] ) } } )
sirIsaacData.append( { 'x': { expt[2]: ( expt[3], expt[4] ) } } )


# In[7]:


sirIsaacData[:3]


# Finally, SirIsaac will need to know what to call the input and output values. In this case, the input corresponds to the initial value of *x*. The way to indicate this to SirIsaac is by using the name 'x_init', where 'x' is the name of the corresponding variable.
#
# Here we have one input and one output:
# Here we have two inputs and one output:

# In[8]:


outputNames = ['x']
indepParamNames = ['x_init']
indepParamNames = ['x_init','tau']


# Create SirIsaac FittingProblem
# ------------------------------

# We'll attempt to fit a model in the power law class. To do this, we'll create an instance of a PowerLawFittingProblem. Here we set up its arguments and create it:
# We'll attempt to fit a model in the "continuous time sigmoidal network" (CTSN) class. To do this, we'll create an instance of a CTSNFittingProblem. Here we set up its arguments and create it:

# In[9]:


# complexityList lists which models in the model class may be tested.
# (Note that by default SirIsaac will still stop once 3 models have
# smaller estimated log-likelihood.)
complexityStepsize = 2 # increase complexity with steps of size 2
complexityMax = 25 # don't try models with complexity > 25
complexityList = list(range(0,complexityMax,complexityStepsize))
complexityList = range(0,complexityMax,complexityStepsize)

# ensGen controls the generation of the initial ensemble of
# parameter starting points.
totalSteps = 1e3
# (We use a small number for totalSteps here so that the example
# runs faster; a more typical value for totalSteps is 1e3)
totalSteps = 20 #1e3
keepSteps = 10
seeds = (1,1) # use a fixed random seed
ensTemperature = 100.
Expand All @@ -126,13 +138,13 @@
# priorSigma controls the width of priors on all parameters
priorSigma = 3.

# If you have pypar installed, you can run on multiple processors
numprocs = 10
# If you have mpi4py installed, you can run on multiple processors
numprocs = 4 #10

# We'll only use a subset of our data to make the example run faster
N = 20

p = fittingProblem.PowerLawFittingProblem( complexityList,
p = fittingProblem.CTSNFittingProblem( complexityList,
sirIsaacData[:N], indepParamsList=indepParamsList[:N],
outputNames=outputNames, indepParamNames=indepParamNames,
ensGen=ensGen, avegtol=avegtol, maxiter=maxiter,
Expand All @@ -142,20 +154,22 @@
# Run parameter fitting
# ---------------------

# The bulk of computation time is used to fit the parameters of each model to the data. Uncomment the following lines to run the parameter fitting, which takes a few hours using 10 processors. Or skip ahead to load a version that has already been fit.
# The bulk of computation time is used to fit the parameters of each model to the data. Uncomment the following lines to run the parameter fitting, which takes an hour or two using 4 processors. Or skip ahead to load a version that has already been fit.

# In[16]:

# In[11]:

## Uncomment to run parameter fitting.
#p.fitAll()
#
#fittingProblem.save(p,'simpleExample_savedFittingProblem.data')
#fittingProblem.save(p,'simpleExample_savedFittingProblem.pkl')


# In[10]:


# Load saved version of fittingProblem that has already been fit.
p = fittingProblem.load('simpleExample_savedFittingProblem.data')
p = fittingProblem.load('simpleExample_savedFittingProblem.pkl')


# Analyze the selected model
Expand All @@ -165,15 +179,17 @@

# In[11]:

pylab.figure(figsize=(20,2))
p.plotBestModelResults(plotInitialConditions=True,indices=list(range(10)));

plt.figure(figsize=(20,2))
p.plotBestModelResults(plotInitialConditions=True,indices=range(10));


# And now for out-of-sample data:

# In[12]:

pylab.figure(figsize=(20,2))

plt.figure(figsize=(20,2))
m = p.getBestModel()
m.plotResults(sirIsaacData[20:30],indepParamsList[20:30],
plotInitialConditions=True,plotFittingData=True);
Expand All @@ -183,6 +199,7 @@

# In[13]:


m = p.getBestModel()
print(m.getParameters())

Expand All @@ -191,6 +208,7 @@

# In[14]:


m = p.getBestModel()
fittingProblem.IO.eqns_TeX_file(m.net,filename='simpleExample_selectedModel.tex')

Expand All @@ -202,93 +220,100 @@

# In[15]:

pylab.figure(figsize=(20,6))

plt.figure(figsize=(20,4))
m = p.getBestModel()
m.plotResults(p.fittingData[:10],p.indepParamsList[:10],
plotInitialConditions=True,plotHiddenNodes=True);
m.plotResults(p.fittingData[:10],
p.indepParamsList[:10],
plotInitialConditions=True,
plotHiddenNodes=True);


# We have access to raw trajectories using evaluateVec. Here we use this to plot a projection of trajectories in phase space for the first in-sample initial conditions:

# In[16]:
# In[17]:


pylab.figure(figsize=(4,4))
times = scipy.linspace(0,1,1000)
plt.figure(figsize=(4,4))
times = np.linspace(0,1,1000)
xdata = m.evaluateVec(times,'x',p.indepParamsList[0])
X1data = m.evaluateVec(times,'X_1',p.indepParamsList[0])
fittingProblem.Plotting.plot(xdata,X1data)
pylab.xlabel('x')
pylab.ylabel('X_1')
X2data = m.evaluateVec(times,'X_2',p.indepParamsList[0])
fittingProblem.Plotting.plot(xdata,X2data)
plt.xlabel('x')
plt.ylabel('X_2');


# We can also look at other models that SirIsaac fit in searching for the best one. In this case, 'Model 6' was selected because it has the largest estimated log-likelihood:
# We can also look at other models that SirIsaac fit in searching for the best one. In this case, 'Model 4' was selected because it has the largest estimated log-likelihood:

# In[18]:

# In[17]:

for name in p.fittingModelNames:
if name in list(p.logLikelihoodDict.keys()):
print(name, ': #species =',len(p.fittingModelDict[name].speciesNames), ', #params =',p.numParametersDict[name], ', L =', p.logLikelihoodDict[name])
if name in p.logLikelihoodDict.keys():
print('{}: #species = {}, #params = {}, L = {}'.format(name,
len(p.fittingModelDict[name].speciesNames),
p.numParametersDict[name],
p.logLikelihoodDict[name]))
print()
print('Selected model:',p.maxLogLikelihoodName())


# A model with more parameters fits in-sample data better but out-of-sample data worse:

# In[18]:

pylab.figure(figsize=(20,2))
m2 = p.fittingModelDict['Model 9']
m2.plotResults(sirIsaacData[:10],indepParamsList[:10],
plotInitialConditions=True,plotFittingData=True);

# We can also look at predictions of a model with more parameters:

# In[19]:

pylab.figure(figsize=(20,2))
m2.plotResults(sirIsaacData[30:40],indepParamsList[30:40],

plt.figure(figsize=(20,2))
m2 = p.fittingModelDict['Model 7']
m2.plotResults(sirIsaacData[:10],indepParamsList[:10],
plotInitialConditions=True,plotFittingData=True);


# Also potentially useful is the Hessian at the best-fit parameters:

# In[20]:


hess = p.HessianDict[p.maxLogLikelihoodName()]
u,singVals,vt = scipy.linalg.svd( hess )
scipy.sort(singVals)
u,singVals,vt = np.linalg.svd( hess )
np.sort(singVals)


# Other details about what happened during parameter fitting are stored within each fittingModel:

# In[21]:


m = p.getBestModel()
print("Acceptance ratio for initial parameter ensemble =",m.acceptanceRatio)
c = sum(scipy.array(m.currentResiduals(p.fittingData,p.indepParamsList,includePriors=False))**2)
print("Sum of squared residuals at best-fit (without priors) =",c)
print("Convergence flags for local fits:",m.convFlagList)
print("Number of cost evaluations for local fits:",m.numCostCallsList)
print("Number of gradient evaluations for local fits:",m.numGradCallsList)
print("Acceptance ratio for initial parameter ensemble = {}".format(m.acceptanceRatio))
c = sum(np.array(m.currentResiduals(p.fittingData,p.indepParamsList,includePriors=False))**2)
print("Sum of squared residuals at best-fit (without priors) = {}".format(c))
print("Convergence flags for local fits: {}".format(m.convFlagList))
print("Number of cost evaluations for local fits: {}".format(m.numCostCallsList))
print("Number of gradient evaluations for local fits: {}".format(m.numGradCallsList))


# Finally, since in this case we know the function used to create the data, we can compare:

# In[22]:

pylab.figure(figsize=(20,2))
indicesToPlot = list(range(5))

plt.figure(figsize=(20,2))
indicesToPlot = range(10)
axArray = p.plotBestModelResults(plotInitialConditions=True,indices=indicesToPlot)

# compare to model that generated the data
f = lambda x0,t: 1.5 + 0.5*scipy.sin(4.*scipy.pi*t + scipy.arcsin(2.*x0 - 3.))
for i,indepParams in enumerate(scipy.array(indepParamsList)[indicesToPlot]):
times = scipy.linspace(0,1,100)
x0 = indepParams[0]
# compare to model that generated the data (see simpleExample_makeData.py)
f = lambda x0,tau,t: x0 * np.exp(-times/tau)

for i,indepParams in enumerate(np.array(indepParamsList)[indicesToPlot]):
times = np.linspace(0,1,100)
x0,tau = indepParams
fittingProblem.Plotting.sca(axArray[0][i])
fittingProblem.Plotting.plot(times,f(x0,times),'k:')
fittingProblem.Plotting.plot(times,f(x0,tau,times),'k:')


# In[ ]:




Loading

0 comments on commit 23b1a86

Please sign in to comment.