Python Examples of theano.tensor.patternbroadcast

Source File: opt.py From D-VAE with MIT License

6 votes

def local_gpualloc_memset_0(node):
    if isinstance(node.op, GpuAlloc) and not node.op.memset_0:
        inp = node.inputs[0]
        if (isinstance(inp, CudaNdarrayConstant) and
            inp.data.size == 1 and
            (numpy.asarray(inp.data) == 0).all()):

            new_out = GpuAlloc(memset_0=True)(*node.inputs)
            old_bcast = node.outputs[0].type.broadcastable
            if new_out.type.broadcastable != old_bcast:
                # check that we did not try discarding a broadcastable
                # dimension
                assert not any(b_old and not b_new for b_old, b_new in
                               zip(old_bcast, new_out.type.broadcastable))
                # force old broadcasting pattern; we must not change it here
                new_out = tensor.patternbroadcast(new_out, old_bcast)
            return [new_out]

Source File: recurrent.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

6 votes

def init_state(self, network):
        super(SimpleRecurrentNode, self).init_state(network)
        num_units = network.find_hyperparameter(["num_units"])
        # FIXME use batch_axis instead of batch_size
        batch_size = network.find_hyperparameter(["batch_size"])
        if batch_size is None:
            shape = (num_units,)
        else:
            shape = (batch_size, num_units)
        zeros = T.zeros(shape)
        # unfortunately, theano.tensor.zeros makes the result broadcastable
        # if the shape of any dimension is 1, so we have to undo this
        value = T.patternbroadcast(zeros, (False,) * len(shape))
        network.set_hyperparameter(self._name + "_initialstate",
                                   "constant_value",
                                   value)

Source File: dnn.py From attention-lvcsr with MIT License

6 votes

def local_conv_dnn(node):
        if not dnn_available():
            return
        if isinstance(node.op, GpuConv):
            if node.op.border_mode not in ['full', 'valid']:
                return
            img, kern = node.inputs
            border_mode = node.op.border_mode
            subsample = node.op.subsample
            direction_hint = node.op.direction_hint
            rval = dnn_conv(img, kern,
                            border_mode=border_mode, subsample=subsample,
                            direction_hint=direction_hint)
            if node.outputs[0].broadcastable != rval.broadcastable:
                rval = tensor.patternbroadcast(
                    rval, node.outputs[0].type.broadcastable)
            return [rval]

    # This optimizer is registered in opt.py as part of the meta-optimizer.
    # It tries exactly the opposite code path of what local_conv_dnn() uses,
    # because for some input/kernel shape configurations, this is faster.

Source File: abstract_conv.py From attention-lvcsr with MIT License

6 votes

def grad(self, inp, grads):
        weights, top = inp[:2]
        bottom, = grads
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample)(
                                                   bottom, top,
                                                   weights.shape[-2:])
        d_top = AbstractConv2d(self.imshp, self.kshp,
                               self.border_mode, self.subsample)(
                                   bottom, weights)
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_weights = weights.type.filter_variable(d_weights)
        d_top = patternbroadcast(d_top, top.broadcastable)
        d_top = top.type.filter_variable(d_top)

        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_weights, d_top) + d_height_width

Source File: recurrent.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

6 votes

def init_state(self, network):
        super(SimpleRecurrentNode, self).init_state(network)
        num_units = network.find_hyperparameter(["num_units"])
        # FIXME use batch_axis instead of batch_size
        batch_size = network.find_hyperparameter(["batch_size"])
        if batch_size is None:
            shape = (num_units,)
        else:
            shape = (batch_size, num_units)
        zeros = T.zeros(shape)
        # unfortunately, theano.tensor.zeros makes the result broadcastable
        # if the shape of any dimension is 1, so we have to undo this
        value = T.patternbroadcast(zeros, (False,) * len(shape))
        network.set_hyperparameter(self._name + "_initialstate",
                                   "constant_value",
                                   value)

Source File: abstract_conv.py From attention-lvcsr with MIT License

6 votes

def grad(self, inp, grads):
        bottom, weights = inp
        top, = grads
        d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
                                             self.border_mode,
                                             self.subsample,
                                             self.filter_flip)(
            weights, top, bottom.shape[-2:])
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample,
                                               self.filter_flip)(

            bottom, top, weights.shape[-2:])

        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_bottom = bottom.type.filter_variable(d_bottom)
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_weights = weights.type.filter_variable(d_weights)
        return d_bottom, d_weights

Source File: layers.py From kusanagi with MIT License

6 votes

def sample_noise(self, input):
        # get noise_shape
        noise_shape = self.input_shape
        if any(s is None for s in noise_shape):
            noise_shape = input.shape

        # respect shared axes
        if self.shared_axes:
            shared_axes = tuple(a if a >= 0 else a + input.ndim
                                for a in self.shared_axes)
            noise_shape = tuple(1 if a in shared_axes else s
                                for a, s in enumerate(noise_shape))

        one = tt.constant(1)
        retain_prob = one - self.p
        noise = self._srng.binomial(noise_shape, p=retain_prob,
                                    dtype=floatX)

        if self.shared_axes:
            bcast = tuple(bool(s == 1) for s in noise_shape)
            noise = tt.patternbroadcast(noise, bcast)

        return noise

Source File: layers.py From kusanagi with MIT License

6 votes

def sample_noise(self, input, mean=0, std=1):
        # get noise_shape
        noise_shape = input.shape

        # respect shared axes
        if self.shared_axes:
            shared_axes = tuple(a if a >= 0 else a + input.ndim
                                for a in self.shared_axes)
            noise_shape = tuple(1 if a in shared_axes else s
                                for a, s in enumerate(noise_shape))

        noise = self._srng.normal(
            noise_shape, avg=mean, std=std, dtype=floatX)

        if self.shared_axes:
            bcast = tuple(bool(s == 1) for s in noise_shape)
            noise = tt.patternbroadcast(noise, bcast)

        return noise

Source File: recurrent.py From treeano with Apache License 2.0

6 votes

def init_state(self, network):
        super(SimpleRecurrentNode, self).init_state(network)
        num_units = network.find_hyperparameter(["num_units"])
        # FIXME use batch_axis instead of batch_size
        batch_size = network.find_hyperparameter(["batch_size"])
        if batch_size is None:
            shape = (num_units,)
        else:
            shape = (batch_size, num_units)
        zeros = T.zeros(shape)
        # unfortunately, theano.tensor.zeros makes the result broadcastable
        # if the shape of any dimension is 1, so we have to undo this
        value = T.patternbroadcast(zeros, (False,) * len(shape))
        network.set_hyperparameter(self._name + "_initialstate",
                                   "constant_value",
                                   value)

Source File: layers.py From kusanagi with MIT License

6 votes

def sample_noise(self, input, a=1e-5, b=1-1e-5):
        # get noise_shape
        noise_shape = input.shape

        # respect shared axes
        if self.shared_axes:
            shared_axes = tuple(a if a >= 0 else a + input.ndim
                                for a in self.shared_axes)
            noise_shape = tuple(1 if a in shared_axes else s
                                for a, s in enumerate(noise_shape))

        noise = self._srng.uniform(
            noise_shape, low=a, high=b, dtype=floatX)

        if self.shared_axes:
            bcast = tuple(bool(s == 1) for s in noise_shape)
            noise = tt.patternbroadcast(noise, bcast)

        return noise

Source File: recurrent.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

6 votes

def init_state(self, network):
        super(SimpleRecurrentNode, self).init_state(network)
        num_units = network.find_hyperparameter(["num_units"])
        # FIXME use batch_axis instead of batch_size
        batch_size = network.find_hyperparameter(["batch_size"])
        if batch_size is None:
            shape = (num_units,)
        else:
            shape = (batch_size, num_units)
        zeros = T.zeros(shape)
        # unfortunately, theano.tensor.zeros makes the result broadcastable
        # if the shape of any dimension is 1, so we have to undo this
        value = T.patternbroadcast(zeros, (False,) * len(shape))
        network.set_hyperparameter(self._name + "_initialstate",
                                   "constant_value",
                                   value)

Source File: opt.py From D-VAE with MIT License

6 votes

def local_abstractconv_gradweight_gemm(node):
    if not isinstance(node.op, AbstractConv2d_gradWeights):
        return None
    img, topgrad, shape = node.inputs
    if not isinstance(img.type, CudaNdarrayType) or \
            not isinstance(topgrad.type, CudaNdarrayType):
        return None

    rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
                                 subsample=node.op.subsample)(
        gpu_contiguous(img), gpu_contiguous(topgrad), shape)
    if node.op.filter_flip:
        rval = rval[:, :, ::-1, ::-1]
    rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
    rval = as_cuda_ndarray_variable(rval)
    return [rval]

Source File: recurrent.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

6 votes

def init_state(self, network):
        super(SimpleRecurrentNode, self).init_state(network)
        num_units = network.find_hyperparameter(["num_units"])
        # FIXME use batch_axis instead of batch_size
        batch_size = network.find_hyperparameter(["batch_size"])
        if batch_size is None:
            shape = (num_units,)
        else:
            shape = (batch_size, num_units)
        zeros = T.zeros(shape)
        # unfortunately, theano.tensor.zeros makes the result broadcastable
        # if the shape of any dimension is 1, so we have to undo this
        value = T.patternbroadcast(zeros, (False,) * len(shape))
        network.set_hyperparameter(self._name + "_initialstate",
                                   "constant_value",
                                   value)

Source File: WordDropout.py From neural-dep-srl with Apache License 2.0

6 votes

def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic or self.alpha == 0:
            return T.ones_like(self.retain, dtype=input.dtype)
        else:
            # use nonsymbolic shape for dropout mask if possible
            mask_shape = self.input_shape
            if any(s is None for s in mask_shape):
                mask_shape = input.shape

            # apply dropout, respecting shared axes
            if self.shared_axes:
                shared_axes = tuple(a if a >= 0 else a + input.ndim
                                    for a in self.shared_axes)
                mask_shape = tuple(1 if a in shared_axes else s
                                   for a, s in enumerate(mask_shape))
            mask = self._srng.binomial(mask_shape, p=self.retain,
                                       dtype=input.dtype)
            if self.shared_axes:
                bcast = tuple(bool(s == 1) for s in mask_shape)
                mask = T.patternbroadcast(mask, bcast)
            return mask

Source File: opt.py From attention-lvcsr with MIT License

6 votes

def local_gpualloc_memset_0(node):
    if isinstance(node.op, GpuAlloc) and not node.op.memset_0:
        inp = node.inputs[0]
        if (isinstance(inp, CudaNdarrayConstant) and
            inp.data.size == 1 and
            (numpy.asarray(inp.data) == 0).all()):

            new_out = GpuAlloc(memset_0=True)(*node.inputs)
            old_bcast = node.outputs[0].type.broadcastable
            if new_out.type.broadcastable != old_bcast:
                # check that we did not try discarding a broadcastable
                # dimension
                assert not any(b_old and not b_new for b_old, b_new in
                               zip(old_bcast, new_out.type.broadcastable))
                # force old broadcasting pattern; we must not change it here
                new_out = tensor.patternbroadcast(new_out, old_bcast)
            return [new_out]

Source File: dnn.py From D-VAE with MIT License

6 votes

def local_conv_dnn(node):
        if not dnn_available():
            return
        if isinstance(node.op, GpuConv):
            if node.op.border_mode not in ['full', 'valid']:
                return
            img, kern = node.inputs
            border_mode = node.op.border_mode
            subsample = node.op.subsample
            direction_hint = node.op.direction_hint
            rval = dnn_conv(img, kern,
                            border_mode=border_mode, subsample=subsample,
                            direction_hint=direction_hint)
            if node.outputs[0].broadcastable != rval.broadcastable:
                rval = tensor.patternbroadcast(
                    rval, node.outputs[0].type.broadcastable)
            return [rval]

    # This optimizer is registered in opt.py as part of the meta-optimizer.
    # It tries exactly the opposite code path of what local_conv_dnn() uses,
    # because for some input/kernel shape configurations, this is faster.

Source File: abstract_conv.py From D-VAE with MIT License

6 votes

def grad(self, inp, grads):
        weights, top = inp[:2]
        bottom, = grads
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample)(
                                                   bottom, top,
                                                   weights.shape[-2:])
        d_top = AbstractConv2d(self.imshp, self.kshp,
                               self.border_mode, self.subsample)(
                                   bottom, weights)
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_weights = weights.type.filter_variable(d_weights)
        d_top = patternbroadcast(d_top, top.broadcastable)
        d_top = top.type.filter_variable(d_top)

        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_weights, d_top) + d_height_width

Source File: opt.py From attention-lvcsr with MIT License

6 votes

def local_abstractconv_gradweight_gemm(node):
    if not isinstance(node.op, AbstractConv2d_gradWeights):
        return None
    img, topgrad, shape = node.inputs
    if not isinstance(img.type, CudaNdarrayType) or \
            not isinstance(topgrad.type, CudaNdarrayType):
        return None

    rval = GpuCorrMM_gradWeights(border_mode=node.op.border_mode,
                                 subsample=node.op.subsample)(
        gpu_contiguous(img), gpu_contiguous(topgrad), shape)
    if node.op.filter_flip:
        rval = rval[:, :, ::-1, ::-1]
    rval = tensor.patternbroadcast(rval, node.outputs[0].broadcastable)
    rval = as_cuda_ndarray_variable(rval)
    return [rval]

Source File: abstract_conv.py From D-VAE with MIT License

6 votes

def grad(self, inp, grads):
        bottom, weights = inp
        top, = grads
        d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
                                             self.border_mode,
                                             self.subsample,
                                             self.filter_flip)(
            weights, top, bottom.shape[-2:])
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample,
                                               self.filter_flip)(

            bottom, top, weights.shape[-2:])

        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_bottom = bottom.type.filter_variable(d_bottom)
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_weights = weights.type.filter_variable(d_weights)
        return d_bottom, d_weights

Source File: recurrent.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

6 votes

def init_state(self, network):
        super(SimpleRecurrentNode, self).init_state(network)
        num_units = network.find_hyperparameter(["num_units"])
        # FIXME use batch_axis instead of batch_size
        batch_size = network.find_hyperparameter(["batch_size"])
        if batch_size is None:
            shape = (num_units,)
        else:
            shape = (batch_size, num_units)
        zeros = T.zeros(shape)
        # unfortunately, theano.tensor.zeros makes the result broadcastable
        # if the shape of any dimension is 1, so we have to undo this
        value = T.patternbroadcast(zeros, (False,) * len(shape))
        network.set_hyperparameter(self._name + "_initialstate",
                                   "constant_value",
                                   value)

Source File: theano_backend.py From KerasNeuralFingerprint with MIT License

5 votes

def squeeze(x, axis):
    '''Remove a 1-dimension from the tensor at index "axis".
    '''
    broadcastable = x.broadcastable[:axis] + x.broadcastable[axis+1:]
    x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)])
    x = T.squeeze(x)
    x = T.patternbroadcast(x, broadcastable)
    return x

Source File: prelu.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

5 votes

def compute_output(self, network, in_vw):
        # gather hyperparameters
        initial_alpha = network.find_hyperparameter(
            ["initial_alpha"],
            0.25)

        # calculate_shape
        ndim = in_vw.ndim
        parameter_axes = treeano.utils.find_axes(
            network,
            ndim,
            positive_keys=["parameter_axes"],
            negative_keys=["non_parameter_axes"],
            positive_default=[treeano.utils.nth_non_batch_axis(network, 0)])
        broadcastable = tuple([i not in parameter_axes
                               for i in range(ndim)])
        shape = tuple([1 if b else s
                       for b, s in zip(broadcastable, in_vw.shape)])

        # create state
        alpha_vw = network.create_vw(
            "alpha",
            is_shared=True,
            shape=shape,
            tags={"parameter", "bias"},
            default_inits=[treeano.inits.ConstantInit(initial_alpha)],
        )
        alpha = T.patternbroadcast(alpha_vw.variable, broadcastable)

        # return output
        network.create_vw(
            "default",
            variable=treeano.utils.rectify(in_vw.variable,
                                           negative_coefficient=alpha),
            shape=in_vw.shape,
            tags={"output"},
        )

Source File: prelu.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

5 votes

def compute_output(self, network, in_vw):
        # gather hyperparameters
        initial_alpha = network.find_hyperparameter(
            ["initial_alpha"],
            0.25)

        # calculate_shape
        ndim = in_vw.ndim
        parameter_axes = treeano.utils.find_axes(
            network,
            ndim,
            positive_keys=["parameter_axes"],
            negative_keys=["non_parameter_axes"],
            positive_default=[treeano.utils.nth_non_batch_axis(network, 0)])
        broadcastable = tuple([i not in parameter_axes
                               for i in range(ndim)])
        shape = tuple([1 if b else s
                       for b, s in zip(broadcastable, in_vw.shape)])

        # create state
        alpha_vw = network.create_vw(
            "alpha",
            is_shared=True,
            shape=shape,
            tags={"parameter", "bias"},
            default_inits=[treeano.inits.ConstantInit(initial_alpha)],
        )
        alpha = T.patternbroadcast(alpha_vw.variable, broadcastable)

        # return output
        network.create_vw(
            "default",
            variable=treeano.utils.rectify(in_vw.variable,
                                           negative_coefficient=alpha),
            shape=in_vw.shape,
            tags={"output"},
        )

Source File: prelu.py From u24_lymphocyte with BSD 3-Clause "New" or "Revised" License

5 votes

def compute_output(self, network, in_vw):
        # gather hyperparameters
        initial_alpha = network.find_hyperparameter(
            ["initial_alpha"],
            0.25)

        # calculate_shape
        ndim = in_vw.ndim
        parameter_axes = treeano.utils.find_axes(
            network,
            ndim,
            positive_keys=["parameter_axes"],
            negative_keys=["non_parameter_axes"],
            positive_default=[treeano.utils.nth_non_batch_axis(network, 0)])
        broadcastable = tuple([i not in parameter_axes
                               for i in range(ndim)])
        shape = tuple([1 if b else s
                       for b, s in zip(broadcastable, in_vw.shape)])

        # create state
        alpha_vw = network.create_vw(
            "alpha",
            is_shared=True,
            shape=shape,
            tags={"parameter", "bias"},
            default_inits=[treeano.inits.ConstantInit(initial_alpha)],
        )
        alpha = T.patternbroadcast(alpha_vw.variable, broadcastable)

        # return output
        network.create_vw(
            "default",
            variable=treeano.utils.rectify(in_vw.variable,
                                           negative_coefficient=alpha),
            shape=in_vw.shape,
            tags={"output"},
        )