A library for probabilistic modeling, inference, and criticism. http://edwardlib.org
Greetings, I have been trying to implement Dirichlet process mixture model for data that follow a mixture of multivariate Bernoulli. I used Independent
and Mixture
to construct multivariate Bernoulli and mixture of multivariate Bernoulli respectively. Also, I used KLqp
for inference.
Unfortunately, I always get nan
s after a few iterations. The version of my tensorflow is '1.7.0', the version of my Edward is '1.3.5'. Any suggestions would be very much appreciated.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import edward as ed
import matplotlib.cm as cm
import numpy as np
import seaborn as sns
import tensorflow as tf
from edward.models import (
Categorical, Mixture, Gamma, Beta,
Bernoulli, Independent)
def build_toy_dataset2(N):
probs = np.array([[0.9, 0.9],[0.1, 0.9],[0.1, 0.1]])
x = np.zeros((N, 2), dtype=np.float32)
num_group1 = num_group2 = int(N/3)
num_group3 = N - num_group1 - num_group2
x[:num_group1,0] = np.random.choice([1,0],
size=num_group1,
p=[probs[0,0], 1-probs[0,0]])
x[:num_group1,1] = np.random.choice([1,0],
size=num_group1,
p=[probs[0,1], 1-probs[0,1]])
x[np.arange(num_group1,num_group1+num_group2),
0] = np.random.choice([1,0],
size=num_group2,
p=[probs[1,0], 1-probs[1,0]])
x[np.arange(num_group1,num_group1+num_group2),
1] = np.random.choice([1,0],
size=num_group2,
p=[probs[1,1], 1-probs[1,1]])
x[np.arange(num_group1+num_group2,N),
0] = np.random.choice([1,0],
size=num_group3,
p=[probs[2,0], 1-probs[2,0]])
x[np.arange(num_group1+num_group2,N),
1] = np.random.choice([1,0],
size=num_group3,
p=[probs[2,1], 1-probs[2,1]])
return x
ed.set_seed(42)
N = 500
D = 2
T = K = 5 # truncation level in DP
alpha = 0.5
x_train = build_toy_dataset2(N)
# MODEL
alpha = Gamma(concentration=6.0, rate=1.0, name="alpha")
beta = Beta(concentration1=1.0, concentration0=tf.ones(K)*alpha, name="beta")
pi = tf.concat([tf.reshape(beta[0],[1]), tf.reshape(tf.multiply(beta[1:],tf.cumprod(1 - beta[:-1])), [K-1])], 0, name="pi")
alpha0 = tf.Variable(tf.ones(D), dtype=tf.float32, trainable=False)
beta0 = tf.Variable(tf.ones(D), dtype=tf.float32, trainable=False)
prob = Beta(alpha0, beta0, sample_shape = K)
cat = Categorical(probs=pi, sample_shape=N)
comps = [Independent(distribution=Bernoulli(probs=prob[k]),
reinterpreted_batch_ndims=1, sample_shape=N)
for k in range(K)]
x = Mixture(cat=cat, components=comps, sample_shape=N)
# INFERENCE
qalpha = Gamma(concentration=tf.Variable(tf.constant(1.0),name="qalpha_concentration"), rate=tf.Variable(tf.constant(1.0), name="qalpha_rate"))
qbeta = Beta(tf.nn.softplus(tf.Variable(tf.random_normal([T]))) + 1e-5,
tf.nn.softplus(tf.Variable(tf.random_normal([T]))) + 1e-5)
qalpha0 = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) + 1e-5
qbeta0 = tf.nn.softplus(tf.Variable(tf.random_normal([D]))) + 1e-5
qprob = Beta(qalpha0,
qbeta0,
sample_shape = K)
inference = ed.KLqp({alpha:qalpha, beta: qbeta, prob: qprob}, data={x: x_train})
learning_rate = 1e-3
optimizer = tf.train.AdamOptimizer(learning_rate)
n_iter=1000
n_samples=10
inference.initialize(n_iter=n_iter, n_print=0, n_samples=n_samples, optimizer = optimizer)
sess = ed.get_session()
init = tf.global_variables_initializer()
init.run()
t_ph = tf.placeholder(tf.int32, [])
running_cluster_means = tf.reduce_mean(qbeta.mean()[:t_ph], 0)
for _ in range(inference.n_iter):
info_dict = inference.update()
inference.print_progress(info_dict)
t = info_dict['t']
if t % inference.n_print == 0:
print(sess.run(qprob.mean()))
print(sess.run(qbeta.mean()))
Hi all, In the implementation of Stochastic gradient Hamiltonian Monte Carlo [@chen2014stochastic] in Edward, we found that the update for gradients always uses full dataset instead of a minibatch of the dataset as suggested in the paper. We wonder if we miss anything or in the current implementation we just use the full dataset?
Code example: https://github.com/blei-lab/edward/blob/152c19f3080be0826b60fdb57c6d60724e044f2e/edward/inferences/sghmc.py#L110
Paper reference: http://proceedings.mlr.press/v32/cheni14.pdf
Hi, I'm looking into issue #271 which is about implementing IS / SMC inference and I was thinking of the two following options:
It feels like the first option is more of a hack, but is easier to implement than the second option which would require refactoring some existing code. Please let me know which of these two options make more sense or if you have any comments about them.
ed.models.Empirical
. Does anyone know what the corresponding thing in Edward2 or TensorFlow Probability is? Maybe as_random_variable
?
softplus
though. I can reread this though, so thanks for taking time to respond.
I have a problem where I have an architecture vaguely similar to an auto-encoder, but I want the encoder to be probabilistic.
I think I need this because the loss function I'm optimizing has a few 'hot spots' (good initial conditions) and many very 'cold spots' (zero gradient).
So, if I treat the output as something deterministic, just by the luck of initialization very few (maybe zero) of the encoder outputs may be hot spots. But if they are treated as a distribution with enough variance to cover the hot spots, I should be able to sample good encodings to find a good trajectory to optimize (as well as push the encoder distribution further towards hot spots during training).
Does this sound suited for probabilistic programming, and does anyone have any advice based on this description?
Greetings, I m trying to use Multinomial Distribution in Edward to predict multiclass labels ( 3 class ) with neural network. I m confused about :
1-) how should i design label dataset as shape. I choose to way that converting label as [[0],[1],[0],[2]] to [[1,0,0],[0,1,0],[1,0,0],[0,0,2] ] .
2-) what conditions for my total_counts arg in multinomial function not being equal 1 when i use probs instead logits?
I m not too familiar with multinomial actually i used bernoulli easily but i cant handle multiclass network:/
After training my data i got predictions from test data.But when i try evaluate mse i m getting error:
ValueError: Dimensions must be equal, but are 145 and 3 for 'sub_2' (op: 'Sub') with input shapes: [145], [145,3].
Here my code :
and if u see some missing parts of me i m very happy to get advise.
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
import edward as ed
from edward.models import Normal, Multinomial
num_labels = 3
(n_samples, n_iter) = (30, 2500)
symbol = 'A'
dataFrequency = '10'
X, Y = np.array(X), np.array(Y)
Y =(np.arange(num_labels) == Y[:,None]).astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
# X_train.shape (578, 120)
# y_train.shape (578, 3)
# X_test.shape (145, 120)
# y_test.shape (145, 3)
def neural_network(x):
h = tf.tanh(tf.matmul(x, W_0) + b_0)
h = tf.tanh(tf.matmul(h, W_1) + b_1)
h = tf.tanh(tf.matmul(h, W_2) + b_2)
h = tf.matmul(h, W_3) + b_3
nn_result = tf.nn.softmax(h)
return nn_result
D = X_train.shape[1]
N = X_train.shape[0]
N2 = X_test.shape[0]
W_0 = Normal(loc=tf.zeros([D, 10]), scale=tf.ones([D, 10]))
W_1 = Normal(loc=tf.zeros([10, 10]), scale=tf.ones([10, 10]))
W_2 = Normal(loc=tf.zeros([10, 5]), scale=tf.ones([10, 5]))
W_3 = Normal(loc=tf.zeros([5, 3]), scale=tf.ones([5, 3]))
b_0 = Normal(loc=tf.zeros(10), scale=tf.ones(10))
b_1 = Normal(loc=tf.zeros(10), scale=tf.ones(10))
b_2 = Normal(loc=tf.zeros(5), scale=tf.ones(5))
b_3 = Normal(loc=tf.zeros(3), scale=tf.ones(3))
x_ph = tf.placeholder(tf.float32, [None, D])
y = Multinomial(probs=neural_network(x_ph), total_count=1.)
qw_0 = Normal(loc=tf.get_variable("qw_0/loc", [D, 10]),
scale=tf.nn.softplus(tf.get_variable("qw_0/scale", [D, 10])))
qb_0 = Normal(loc=tf.get_variable("qb_0/loc", [10]),
scale=tf.nn.softplus(tf.get_variable("qb_0/scale", [10])))
qw_1 = Normal(loc=tf.get_variable("qw_1/loc", [10, 10]),
scale=tf.nn.softplus(tf.get_variable("qw_1/scale", [10, 10])))
qb_1 = Normal(loc=tf.get_variable("qb_1/loc", [10]),
scale=tf.nn.softplus(tf.get_variable("qb_1/scale", [10])))
qw_2 = Normal(loc=tf.get_variable("qw_2/loc", [10, 5]),
scale=tf.nn.softplus(tf.get_variable("qw_2/scale", [10, 5])))
qb_2 = Normal(loc=tf.get_variable("qb_2/loc", [5]),
scale=tf.nn.softplus(tf.get_variable("qb_2/scale", [5])))
qw_3 = Normal(loc=tf.get_variable("qw_3/loc", [5, 3]),
scale=tf.nn.softplus(tf.get_variable("qw_3/scale", [5, 3])))
qb_3 = Normal(loc=tf.get_variable("qb_3/loc", [3]),
scale=tf.nn.softplus(tf.get_variable("qb_3/scale", [3])))
inference = ed.KLqp({
W_0: qw_0, b_0: qb_0,
W_1: qw_1, b_1: qb_1,
W_2: qw_2, b_2: qb_2,
W_3: qw_3, b_3: qb_3,
}, data={x_ph: X_train, y: y_train})
inference.run(n_samples=n_samples, n_iter=n_iter,
logdir='log/{}/{}/{}/{}'.format(symbol,
dataFrequency,
n_samples,
n_iter)
)
y_post = ed.copy(y, {
W_0: qw_0, b_0: qb_0,
W_1: qw_1, b_1: qb_1,
W_2: qw_2, b_2: qb_2,
W_3: qw_3, b_3: qb_3,
})
sess = ed.get_session()
predictions = sess.run(y_post, feed_dict={x_ph: X_test})
print('mse: ', ed.evaluate('mse', data={x_ph: X_test, y: y_test}))