gpytorch: Help with fitting a multidimensional GP regression with IndexKernel (and maybe AR1?)
Hi everyone,
I’m trying to fit a multidimensional regression where I have GPs on 3 dimensions:
Y_{l,a,t} = b1*X1 + b2*X2 + (GP by L) + (GP by A) + (GP by T)
where I’m treating X1
and X2
as fixed effects (and hence entering with a linear kernel), and L, A and T are all integers denoting location, age and time.
I’d like to have, let’s say, a Matern and RBF on T
and A
respectively (planning to have an AR1 on A
), and an IID like kernel on L
. I found that the IndexKernel
seem to fit the IID nature the most. I think I’ve implemented everything except the IndexKernel part in my code below (I know that the sim is wrong, I’m just trying to learn about fitting the model first!), but I get a type-esque error after:
import torch
import pandas as pd
import numpy as np
import gpytorch
import gpytorch.kernels as kern
def cartesian(*arrays):
mesh = np.meshgrid(*arrays) # standard numpy meshgrid
dim = len(mesh) # number of dimensions
elements = mesh[0].size # number of elements, any index will do
flat = np.concatenate(mesh).ravel() # flatten the whole meshgrid
reshape = np.reshape(flat, (dim, elements)).T # reshape and transpose
return reshape
def prep_data(name_list, *arrays):
cartprod = cartesian(*arrays)
dout = pd.DataFrame(cartprod)
dout.columns = name_list
return dout
## Simulate data placeholders
L = [i for i in range(20)]
A = [i for i in range(5)]
T = [i for i in range(30)]
dtt = prep_data(['L', 'A', 'T'], L,A,T)
## Simulate random values
beta_x1 = 0.4
beta_x2 = 0.3
LL_noise = 0.25
dtt['x1'] = np.random.uniform(size = deef.shape[0])
dtt['x2'] = np.random.normal(size = deef.shape[0])
dtt['y'] = beta_x1*dtt['x1'] + beta_x2*dtt['x2'] + np.random.normal(scale=LL_noise, size = dtt.shape[0])
## Create train x and y tensors
train_x = torch.from_numpy(dtt[['x1', 'x2', 'A', 'T', 'L']].values.astype(np.double)).to(torch.float)
train_y = torch.from_numpy(dtt[['y']].values[:,0].astype(np.double)).to(torch.float)
class GPRegressionModel(gpytorch.models.ExactGP):
def __init__(self, train_x, train_y, likelihood):
super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
# SKI requires a grid size hyperparameter. This util can help with that
# grid_size = gpytorch.utils.grid.choose_grid_size(train_x)
# Mean funk
self.mean_module = gpytorch.means.ConstantMean()
# x1 and x2 (enter as linear)
covar_module_zero = kern.LinearKernel(num_dimensions=2, active_dims=[0,1])
# location
covar_module_one = kern.GridInterpolationKernel(kern.IndexKernel(num_tasks=1),
grid_size=7, active_dims=[4], num_dims=1)
# age
covar_module_two = kern.GridInterpolationKernel(kern.ScaleKernel(kern.RBFKernel()),
grid_size=7, active_dims=[2], num_dims=1)
# time
covar_module_three = kern.GridInterpolationKernel(kern.ScaleKernel(kern.MaternKernel()),
grid_size=7, active_dims=[3], num_dims=1)
self.covar_module = covar_module_zero + covar_module_one + \
covar_module_two + covar_module_three
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = GPRegressionModel(train_x, train_y, likelihood)
# Find optimal model hyperparameters
model.train()
likelihood.train()
# Use the adam optimizer
optimizer = torch.optim.Adam([
{'params': model.parameters()}, # Includes GaussianLikelihood parameters
], lr=0.1)
# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
def train():
training_iterations = 30
for i in range(training_iterations):
optimizer.zero_grad()
output = model(train_x)
loss = -mll(output, train_y)
loss.backward()
print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
optimizer.step()
and the error trace I get (which goes away if I comment out the IndexKernel part) is as follows:
<ipython-input-182-a138dc3fa672> in train()
16 optimizer.zero_grad()
17 output = model(train_x)
---> 18 loss = -mll(output, train_y)
19 loss.backward()
20 print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
/opt/conda/lib/python3.6/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
179
180 def __call__(self, *inputs, **kwargs):
--> 181 outputs = self.forward(*inputs, **kwargs)
182
183 if isinstance(outputs, tuple):
/opt/conda/lib/python3.6/site-packages/gpytorch/mlls/exact_marginal_log_likelihood.py in forward(self, output, target)
47
48 # Get log determininat and first part of quadratic form
---> 49 inv_quad, log_det = covar.inv_quad_log_det(inv_quad_rhs=(target - mean).unsqueeze(-1), log_det=True)
50
51 # Add terms for SGPR / when inducing points are learned
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in inv_quad_log_det(self, inv_quad_rhs, log_det)
621 batch_size = self.size(0) if self.ndimension() == 3 else None
622
--> 623 args = lazy_tsr.representation()
624 if inv_quad_rhs is not None:
625 args = [inv_quad_rhs] + list(args)
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in representation(self)
801 representation.append(arg)
802 elif isinstance(arg, LazyTensor):
--> 803 representation += list(arg.representation())
804 else:
805 raise RuntimeError("Representation of a LazyTensor should consist only of Tensors")
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in representation(self)
161
162 def representation(self):
--> 163 return self.evaluate_kernel().representation()
164
165 def representation_tree(self):
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in evaluate_kernel(self)
143
144 self._cached_kernel_eval = super(Kernel, self.kernel).__call__(
--> 145 x1, x2, diag=False, batch_dims=self.batch_dims, **self.params
146 )
147 if self.squeeze_row:
/opt/conda/lib/python3.6/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
179
180 def __call__(self, *inputs, **kwargs):
--> 181 outputs = self.forward(*inputs, **kwargs)
182
183 if isinstance(outputs, tuple):
/opt/conda/lib/python3.6/site-packages/gpytorch/kernels/kernel.py in forward(self, x1, x2, **params)
325 next_term = kern(x1, x2, **params)
326 if isinstance(next_term, LazyEvaluatedKernelTensor):
--> 327 next_term = next_term.evaluate_kernel()
328 res = res + next_term
329 return res
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in evaluate_kernel(self)
143
144 self._cached_kernel_eval = super(Kernel, self.kernel).__call__(
--> 145 x1, x2, diag=False, batch_dims=self.batch_dims, **self.params
146 )
147 if self.squeeze_row:
/opt/conda/lib/python3.6/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
179
180 def __call__(self, *inputs, **kwargs):
--> 181 outputs = self.forward(*inputs, **kwargs)
182
183 if isinstance(outputs, tuple):
/opt/conda/lib/python3.6/site-packages/gpytorch/kernels/kernel.py in forward(self, x1, x2, **params)
325 next_term = kern(x1, x2, **params)
326 if isinstance(next_term, LazyEvaluatedKernelTensor):
--> 327 next_term = next_term.evaluate_kernel()
328 res = res + next_term
329 return res
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in evaluate_kernel(self)
143
144 self._cached_kernel_eval = super(Kernel, self.kernel).__call__(
--> 145 x1, x2, diag=False, batch_dims=self.batch_dims, **self.params
146 )
147 if self.squeeze_row:
/opt/conda/lib/python3.6/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
179
180 def __call__(self, *inputs, **kwargs):
--> 181 outputs = self.forward(*inputs, **kwargs)
182
183 if isinstance(outputs, tuple):
/opt/conda/lib/python3.6/site-packages/gpytorch/kernels/kernel.py in forward(self, x1, x2, **params)
325 next_term = kern(x1, x2, **params)
326 if isinstance(next_term, LazyEvaluatedKernelTensor):
--> 327 next_term = next_term.evaluate_kernel()
328 res = res + next_term
329 return res
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in evaluate_kernel(self)
143
144 self._cached_kernel_eval = super(Kernel, self.kernel).__call__(
--> 145 x1, x2, diag=False, batch_dims=self.batch_dims, **self.params
146 )
147 if self.squeeze_row:
/opt/conda/lib/python3.6/site-packages/gpytorch/module.py in __call__(self, *inputs, **kwargs)
179
180 def __call__(self, *inputs, **kwargs):
--> 181 outputs = self.forward(*inputs, **kwargs)
182
183 if isinstance(outputs, tuple):
/opt/conda/lib/python3.6/site-packages/gpytorch/kernels/grid_interpolation_kernel.py in forward(self, x1, x2, batch_dims, **params)
177 self.update_inducing_points_and_grid(inducing_points, grid)
178
--> 179 base_lazy_tsr = self._inducing_forward(batch_dims=batch_dims, **params)
180 if x1.size(0) > 1:
181 base_lazy_tsr = base_lazy_tsr.repeat(x1.size(0), 1, 1)
/opt/conda/lib/python3.6/site-packages/gpytorch/kernels/grid_interpolation_kernel.py in _inducing_forward(self, batch_dims, **params)
146 def _inducing_forward(self, batch_dims, **params):
147 return super(GridInterpolationKernel, self).forward(
--> 148 self.inducing_points, self.inducing_points, batch_dims=batch_dims, **params
149 )
150
/opt/conda/lib/python3.6/site-packages/gpytorch/kernels/grid_kernel.py in forward(self, x1, x2, diag, batch_dims, **params)
74 first_item = grid[:, 0:1]
75 covar_columns = self.base_kernel(first_item, grid, diag=False, batch_dims=(0, 2), **params)
---> 76 covar_columns = covar_columns.evaluate().squeeze(-2)
77 if batch_dims == (0, 2):
78 covars = [ToeplitzLazyTensor(covar_columns)]
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_evaluated_kernel_tensor.py in evaluate(self)
167
168 def evaluate(self):
--> 169 return self.evaluate_kernel().evaluate()
170
171 def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, precomputed_cache=None):
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/lazy_tensor.py in evaluate(self)
416 if batch_mode:
417 eye = eye.unsqueeze(0).expand(batch_size, num_rows, num_rows)
--> 418 return self.transpose(1, 2).matmul(eye).transpose(1, 2).contiguous()
419 else:
420 return self.t().matmul(eye).t().contiguous()
/opt/conda/lib/python3.6/site-packages/gpytorch/lazy/interpolated_lazy_tensor.py in matmul(self, tensor)
531 # right_interp^T * tensor
532 base_size = self.base_lazy_tensor.size(-1)
--> 533 right_interp_res = left_t_interp(self.right_interp_indices, self.right_interp_values, tensor, base_size)
534
535 # base_lazy_tensor * right_interp^T * tensor
/opt/conda/lib/python3.6/site-packages/gpytorch/utils/interpolation.py in left_t_interp(interp_indices, interp_values, rhs, output_dim)
237 column_indices = column_indices.repeat(batch_size, 1).view(1, -1)
238
--> 239 summing_matrix_indices = torch.cat([batch_indices, flat_interp_indices, column_indices])
240 summing_matrix_values = torch.ones(
241 batch_size * n_data * n_interp, dtype=interp_values.dtype, device=interp_values.device
RuntimeError: Expected object of scalar type Long but got scalar type Float for sequence elment 1 in sequence argument at position #1 'tensors'```
I feel like I am not coercing something I should for the IndexKernel. I basically have two questions:
1) Could someone help me with this particular error if it's clear on what's going wrong?
2) Has anyone implemented a first-order autoregressive kernel in GPyTorch yet? I feel like that'd be an amazing addition to have if you guys have done it.
Thanks!!
About this issue
- Original URL
- State: open
- Created 6 years ago
- Comments: 19 (6 by maintainers)
@sadatnfs I’ll take a look once I’m back from traveling and see if I can help out!
Given that you are trying to add a single learned noise component that is independent of the actual location, I might just initialize this as a model parameter and add it in
forward
. Something like:If you want different noises for the different discrete locations, you can make the parameter a vector, and then do something like:
Do you think this is a reasonable solution for your use case?