import numpy as np
import theano as th
import theano.tensor as tt
from .hidden import HiddenLayer
from .weights import borrow, is_shared_var
float_x = th.config.floatX
############################### Output Layer ##################################
[docs]class OutputLayer(object):
[docs] def cost(self, y):
if self.loss == "nll":
return self.neg_log_likli(y)
elif self.loss == "nllsq":
return self.neg_log_likli_sq(y)
elif self.loss.startswith("nll"):
try:
threshold = int(self.loss[-2:])/100
threshold = np.clip(threshold, 0, 1)
except ValueError:
print("Did not understand {}, using plain NLL".format(self.loss))
threshold = 1.0
return self.neg_log_likli_trunc(y, threshold)
elif self.loss == "hinge":
return self.hinge(y)
else:
raise NotImplementedError("Loss : " + self.loss)
[docs] def neg_log_likli_sq(self, y):
return tt.mean(self.logprob[tt.arange(y.shape[0]), y]**2)
[docs] def neg_log_likli_trunc(self, y, threshold):
print("Using threshold: ", threshold)
logthreshold = np.log(threshold) # A negative number
return tt.mean(tt.maximum(0, logthreshold
-self.logprob[tt.arange(y.shape[0]), y]))
[docs] def neg_log_likli(self, y):
return -tt.mean(self.logprob[tt.arange(y.shape[0]), y])
[docs] def hinge(self, y):
print("Using Hinge Loss!!!")
def step(out, y_):
return tt.maximum(0, 1 +
tt.max(tt.concatenate((out[:y_],out[y_+1:self.n_out]))) - out[y_])
losses, _ = th.scan(step, sequences=[self.output, y])
return tt.mean(losses)
[docs] def features_and_predictions(self):
return self.features, self.y_preds
[docs] def sym_and_oth_err_rate(self, y):
sym_err_rate = tt.mean(tt.neq(self.y_preds, y))
if self.kind == 'LOGIT':
# Bit error rate
second_stat = tt.mean(self.bitprob[tt.arange(y.shape[0]), y] < .5)
else:
# Likelihood of MLE
second_stat = tt.mean(self.probs[tt.arange(y.shape[0]), y])
return sym_err_rate, second_stat
[docs]class SoftmaxLayer(HiddenLayer, OutputLayer):
def __init__(self, inpt, wts, rand_gen=None, n_in=None, n_out=None,
reg=(),
loss="nll"):
HiddenLayer.__init__(self, inpt, wts, rand_gen, n_in, n_out,
actvn='Softmax', reg=reg,
pdrop=0)
self.y_preds = tt.argmax(self.output, axis=1)
self.probs = self.output
self.logprob = tt.log(self.probs)
self.features = self.logprob
self.kind = 'SOFTMAX'
self.loss = loss
self.representation = "Softmax In:{:3d} Out:{:3d} Loss:{}" \
"\n\t L1:{L1} L2:{L2} Momentum:{momentum} Max Norm:{maxnorm} " \
"Rate:{rate}""".format(self.n_in, self.n_out,
self.loss, **self.reg)
[docs] def TestVersion(self, inpt):
return SoftmaxLayer(inpt, (self.w, self.b))
[docs]class HingeLayer(HiddenLayer, OutputLayer):
def __init__(self, inpt, wts, rand_gen=None, n_in=None, n_out=None,
reg=(),
loss="hinge"):
HiddenLayer.__init__(self, inpt, wts, rand_gen, n_in, n_out,
actvn='linear', reg=reg,
pdrop=0)
self.y_preds = tt.argmax(self.output, axis=1)
self.logprob = self.output
self.probs = self.output # tt.nnet.softmax(self.output)
self.features = self.logprob
self.kind = 'SVM'
self.loss = loss
self.representation = "SVM In:{:3d} Out:{:3d} Loss:{}" \
"\n\t L1:{L1} L2:{L2} Momentum:{momentum} Max Norm:{maxnorm} " \
"Rate:{rate}""".format(self.n_in, self.n_out,
self.loss, **self.reg)
[docs] def TestVersion(self, inpt):
return HingeLayer(inpt, (self.w, self.b))
activs = {'LOGIT': 'sigmoid', 'RBF': 'scaled_tanh'}
[docs]class CenteredOutLayer(HiddenLayer, OutputLayer):
def __init__(self, inpt, wts, centers, rand_gen=None,
n_in=None, n_features=None, n_classes=None,
kind='LOGIT', learn_centers=False, junk_dist=np.inf,
reg=()):
# wts (n_in x n_features)
# centers (n_classesx n_features)
assert kind in activs
assert n_in or wts
assert n_features or wts or centers
assert n_classes or centers
assert kind == 'RBF' or not learn_centers
HiddenLayer.__init__(self, inpt, wts, rand_gen, n_in, n_out=n_features,
actvn=activs[kind], pdrop=0, reg=reg)
# Initialize centers
if centers is None:
if kind == 'LOGIT':
centers_vals = rand_gen.binomial(n=1, p=.5,
size=(n_classes, n_features))
elif kind == 'RBF':
centers_vals = rand_gen.uniform(low=0, high=1,
size=(n_classes, n_features))
centers = np.asarray(centers_vals, dtype=float_x)
if is_shared_var(centers):
self.centers = centers
else:
self.centers = th.shared(centers, name='centers', borrow=True)
if learn_centers:
self.params.append(self.centers)
# Populate various n's based on weights
if not n_in or not n_features:
n_in, n_features = borrow(self.w).shape
if not n_features or not n_classes:
n_classes, n_features = borrow(self.centers).shape
# c = centers; v = output of hidden layer = calculated features
self.features = self.output # Refers to the output of HiddenLayer
c = self.centers.dimshuffle('x', 0, 1)
v = self.features.dimshuffle(0, 'x', 1)
self.kind = kind
self.junk_dist = junk_dist
if kind == 'LOGIT':
# BATCH_SZ x nClasses x nFeatures >> BATCH_SZ x nClasses >> BATCH_SZ
epsilon = .001
v = v * (1 - 2 * epsilon) + epsilon
self.bitprob = c * v + (1 - c) * (1 - v)
self.logprob = tt.sum(tt.log(self.bitprob), axis=2)
# if imp == None \
# else T.tensordot(T.log(self.bitprob), imp, axes=([2, 0]))
self.y_preds = tt.argmax(self.logprob, axis=1)
elif kind == 'RBF':
dists = tt.sum((v - c) ** 2, axis=2) # BATCH_SZ x nClasses
junk_col = junk_dist + tt.zeros_like(dists[:, 1]).dimshuffle(0, 'x')
self.dists = tt.concatenate([dists, junk_col], axis=1)
self.probs = tt.nnet.softmax(-self.dists) # BATCH_SZ x nClasses+1
self.logprob = tt.log(self.probs)
self.y_preds = tt.argmax(self.probs, axis=1)
self.representation = ('CenteredOut Kind:{} In:{:3d} Hidden:{:3d} '
'Out:{:3d} learn_centers:{} junk_dist:{}'.format(
kind, n_in, n_features, n_classes, learn_centers, junk_dist))
[docs] def TestVersion(self, inpt):
return CenteredOutLayer(inpt, (self.w, self.b), self.centers,
kind=self.kind, junk_dist=self.junk_dist)