Source code for theanet.layer.outlayers

import numpy as np
import theano as th
import theano.tensor as tt
from .hidden import HiddenLayer
from .weights import borrow, is_shared_var

float_x = th.config.floatX
############################### Output Layer  ##################################


[docs]class OutputLayer(object):
[docs]    def cost(self, y):
        if self.loss == "nll":
            return self.neg_log_likli(y)

        elif self.loss == "nllsq":
            return self.neg_log_likli_sq(y)

        elif self.loss.startswith("nll"):
            try:
                threshold = int(self.loss[-2:])/100
                threshold = np.clip(threshold, 0, 1)
            except ValueError:
                print("Did not understand {}, using plain NLL".format(self.loss))
                threshold = 1.0

            return self.neg_log_likli_trunc(y, threshold)

        elif self.loss == "hinge":
            return self.hinge(y)

        else:
            raise NotImplementedError("Loss : " + self.loss)

[docs]    def neg_log_likli_sq(self, y):
        return tt.mean(self.logprob[tt.arange(y.shape[0]), y]**2)

[docs]    def neg_log_likli_trunc(self, y, threshold):
        print("Using threshold: ", threshold)
        logthreshold = np.log(threshold)    # A negative number
        return tt.mean(tt.maximum(0, logthreshold
                                  -self.logprob[tt.arange(y.shape[0]), y]))

[docs]    def neg_log_likli(self, y):
        return -tt.mean(self.logprob[tt.arange(y.shape[0]), y])

[docs]    def hinge(self, y):
        print("Using Hinge Loss!!!")
        def step(out, y_):
            return tt.maximum(0, 1 +
              tt.max(tt.concatenate((out[:y_],out[y_+1:self.n_out]))) - out[y_])

        losses, _ = th.scan(step, sequences=[self.output, y])
        return tt.mean(losses)

[docs]    def features_and_predictions(self):
        return self.features, self.y_preds

[docs]    def sym_and_oth_err_rate(self, y):
        sym_err_rate = tt.mean(tt.neq(self.y_preds, y))

        if self.kind == 'LOGIT':
            # Bit error rate
            second_stat = tt.mean(self.bitprob[tt.arange(y.shape[0]), y] < .5)

        else:
            # Likelihood of MLE
            second_stat = tt.mean(self.probs[tt.arange(y.shape[0]), y])

        return sym_err_rate, second_stat


[docs]class SoftmaxLayer(HiddenLayer, OutputLayer):
    def __init__(self, inpt, wts, rand_gen=None, n_in=None, n_out=None,
                 reg=(),
                 loss="nll"):
        HiddenLayer.__init__(self, inpt, wts, rand_gen, n_in, n_out,
                             actvn='Softmax', reg=reg,
                             pdrop=0)
        self.y_preds = tt.argmax(self.output, axis=1)
        self.probs = self.output
        self.logprob = tt.log(self.probs)
        self.features = self.logprob
        self.kind = 'SOFTMAX'
        self.loss = loss
        self.representation = "Softmax In:{:3d} Out:{:3d} Loss:{}" \
            "\n\t  L1:{L1} L2:{L2} Momentum:{momentum} Max Norm:{maxnorm} " \
            "Rate:{rate}""".format(self.n_in, self.n_out,
                                   self.loss, **self.reg)

[docs]    def TestVersion(self, inpt):
        return SoftmaxLayer(inpt, (self.w, self.b))

[docs]class HingeLayer(HiddenLayer, OutputLayer):
    def __init__(self, inpt, wts, rand_gen=None, n_in=None, n_out=None,
                 reg=(),
                 loss="hinge"):
        HiddenLayer.__init__(self, inpt, wts, rand_gen, n_in, n_out,
                             actvn='linear', reg=reg,
                             pdrop=0)
        self.y_preds = tt.argmax(self.output, axis=1)
        self.logprob = self.output
        self.probs = self.output # tt.nnet.softmax(self.output)
        self.features = self.logprob
        self.kind = 'SVM'
        self.loss = loss
        self.representation = "SVM In:{:3d} Out:{:3d} Loss:{}" \
            "\n\t  L1:{L1} L2:{L2} Momentum:{momentum} Max Norm:{maxnorm} " \
            "Rate:{rate}""".format(self.n_in, self.n_out,
                                   self.loss, **self.reg)

[docs]    def TestVersion(self, inpt):
        return HingeLayer(inpt, (self.w, self.b))

activs = {'LOGIT': 'sigmoid', 'RBF': 'scaled_tanh'}


[docs]class CenteredOutLayer(HiddenLayer, OutputLayer):
    def __init__(self, inpt, wts, centers, rand_gen=None,
                 n_in=None, n_features=None, n_classes=None,
                 kind='LOGIT', learn_centers=False, junk_dist=np.inf,
                 reg=()):
        # wts (n_in x n_features)
        # centers (n_classesx n_features)

        assert kind in activs
        assert n_in or wts
        assert n_features or wts or centers
        assert n_classes or centers
        assert kind == 'RBF' or not learn_centers

        HiddenLayer.__init__(self, inpt, wts, rand_gen, n_in, n_out=n_features,
                             actvn=activs[kind], pdrop=0, reg=reg)

        # Initialize centers
        if centers is None:
            if kind == 'LOGIT':
                centers_vals = rand_gen.binomial(n=1, p=.5,
                                                 size=(n_classes, n_features))
            elif kind == 'RBF':
                centers_vals = rand_gen.uniform(low=0, high=1,
                                                size=(n_classes, n_features))
            centers = np.asarray(centers_vals, dtype=float_x)

        if is_shared_var(centers):
            self.centers = centers
        else:
            self.centers = th.shared(centers, name='centers', borrow=True)

        if learn_centers:
            self.params.append(self.centers)

        # Populate various n's based on weights
        if not n_in or not n_features:
            n_in, n_features = borrow(self.w).shape
        if not n_features or not n_classes:
            n_classes, n_features = borrow(self.centers).shape

        # c = centers; v = output of hidden layer = calculated features
        self.features = self.output  # Refers to the output of HiddenLayer
        c = self.centers.dimshuffle('x', 0, 1)
        v = self.features.dimshuffle(0, 'x', 1)
        self.kind = kind
        self.junk_dist = junk_dist

        if kind == 'LOGIT':
            # BATCH_SZ x nClasses x nFeatures >> BATCH_SZ x nClasses >> BATCH_SZ
            epsilon = .001
            v = v * (1 - 2 * epsilon) + epsilon
            self.bitprob = c * v + (1 - c) * (1 - v)
            self.logprob = tt.sum(tt.log(self.bitprob), axis=2)
            # if imp == None \
            # else T.tensordot(T.log(self.bitprob), imp, axes=([2, 0]))
            self.y_preds = tt.argmax(self.logprob, axis=1)
        elif kind == 'RBF':
            dists = tt.sum((v - c) ** 2, axis=2)  # BATCH_SZ x nClasses
            junk_col = junk_dist + tt.zeros_like(dists[:, 1]).dimshuffle(0, 'x')
            self.dists = tt.concatenate([dists, junk_col], axis=1)
            self.probs = tt.nnet.softmax(-self.dists)  # BATCH_SZ x nClasses+1
            self.logprob = tt.log(self.probs)
            self.y_preds = tt.argmax(self.probs, axis=1)

        self.representation = ('CenteredOut Kind:{} In:{:3d} Hidden:{:3d} '
                               'Out:{:3d} learn_centers:{} junk_dist:{}'.format(
            kind, n_in, n_features, n_classes, learn_centers, junk_dist))

[docs]    def TestVersion(self, inpt):
        return CenteredOutLayer(inpt, (self.w, self.b), self.centers,
                                kind=self.kind, junk_dist=self.junk_dist)