-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathconditionals.py
108 lines (94 loc) · 3.91 KB
/
conditionals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Credit to GPflow
import tensorflow as tf
def base_conditional(Kmn, Kmm, Knn, f, *, full_cov=False, q_sqrt=None, white=False):
"""
Given a g1 and g2, and distribution p and q such that
p(g2) = N(g2;0,Kmm)
p(g1) = N(g1;0,Knn)
p(g1|g2) = N(g1;0,Knm)
And
q(g2) = N(g2;f,q_sqrt*q_sqrt^T)
This method computes the mean and (co)variance of
q(g1) = \int q(g2) p(g1|g2)
:param Kmn: M x N
:param Kmm: M x M
:param Knn: N x N or N
:param f: M x R
:param full_cov: bool
:param q_sqrt: None or R x M x M (lower triangular)
:param white: bool
:return: N x R or R x N x N
"""
# compute kernel stuff
num_func = tf.shape(f)[1] # R
Lm = tf.cholesky(Kmm)
# Compute the projection matrix A
A = tf.matrix_triangular_solve(Lm, Kmn, lower=True)
# compute the covariance due to the conditioning
if full_cov:
fvar = Knn - tf.matmul(A, A, transpose_a=True)
fvar = tf.tile(fvar[None, :, :], [num_func, 1, 1]) # R x N x N
else:
fvar = Knn - tf.reduce_sum(tf.square(A), 0)
fvar = tf.tile(fvar[None, :], [num_func, 1]) # R x N
# another backsubstitution in the unwhitened case
if not white:
A = tf.matrix_triangular_solve(tf.transpose(Lm), A, lower=False)
# construct the conditional mean
fmean = tf.matmul(A, f, transpose_a=True)
if q_sqrt is not None:
if q_sqrt.get_shape().ndims == 2:
LTA = A * tf.expand_dims(tf.transpose(q_sqrt), 2) # R x M x N
elif q_sqrt.get_shape().ndims == 3:
L = q_sqrt
A_tiled = tf.tile(tf.expand_dims(A, 0), tf.stack([num_func, 1, 1]))
LTA = tf.matmul(L, A_tiled, transpose_a=True) # R x M x N
else: # pragma: no cover
raise ValueError("Bad dimension for q_sqrt: %s" %
str(q_sqrt.get_shape().ndims))
if full_cov:
fvar = fvar + tf.matmul(LTA, LTA, transpose_a=True) # R x N x N
else:
fvar = fvar + tf.reduce_sum(tf.square(LTA), 1) # R x N
if not full_cov:
fvar = tf.transpose(fvar) # N x R
return fmean, fvar # N x R, R x N x N or N x R
def conditional(Xnew, X, kern, f, *, full_cov=False, q_sqrt=None, white=False):
"""
Given f, representing the GP at the points X, produce the mean and
(co-)variance of the GP at the points Xnew.
Additionally, there may be Gaussian uncertainty about f as represented by
q_sqrt. In this case `f` represents the mean of the distribution and
q_sqrt the square-root of the covariance.
Additionally, the GP may have been centered (whitened) so that
p(v) = N(0, I)
f = L v
thus
p(f) = N(0, LL^T) = N(0, K).
In this case `f` represents the values taken by v.
The method can either return the diagonals of the covariance matrix for
each output (default) or the full covariance matrix (full_cov=True).
We assume R independent GPs, represented by the columns of f (and the
first dimension of q_sqrt).
:param Xnew: data matrix, size N x D. Evaluate the GP at these new points
:param X: data points, size M x D.
:param kern: GPflow kernel.
:param f: data matrix, M x R, representing the function values at X,
for K functions.
:param q_sqrt: matrix of standard-deviations or Cholesky matrices,
size M x R or R x M x M.
:param white: boolean of whether to use the whitened representation as
described above.
:return:
- mean: N x R
- variance: N x R (full_cov = False), R x N x N (full_cov = True)
"""
num_data = tf.shape(X)[0] # M
Kmm = kern.K(X) + tf.eye(num_data, dtype=tf.float64) * 1e-7
Kmn = kern.K(X, Xnew)
if full_cov:
Knn = kern.K(Xnew)
else:
Knn = kern.Kdiag(Xnew)
mean, var = base_conditional(Kmn, Kmm, Knn, f, full_cov=full_cov, q_sqrt=q_sqrt, white=white)
return mean, var # N x R, N x R or R x N x N