Python Examples of blocks.graph.ComputationGraph

Source File: recognizer.py From attention-lvcsr with MIT License

6 votes

def init_beam_search(self, beam_size):
        """Compile beam search and set the beam size.

        See Blocks issue #500.

        """
        if hasattr(self, '_beam_search') and self.beam_size == beam_size:
            # Only recompile if the user wants a different beam size
            return
        self.beam_size = beam_size
        generated = self.get_generate_graph(use_mask=False, n_steps=3)
        cg = ComputationGraph(generated.values())
        samples, = VariableFilter(
            applications=[self.generator.generate], name="outputs")(cg)
        self._beam_search = BeamSearch(beam_size, samples)
        self._beam_search.compile()

Source File: models.py From dl4mt-multi with BSD 3-Clause "New" or "Revised" License

6 votes

def get_costs(self, probs, y, y_mask,
                  decay_cs=None, opt_rets=None):
        """
        probs : dict, mapping cg_name to probabilities
        y : theano tensor variable
        y_mask : theano tensor variable
        decay_cs : list of l2 regularization weights
        opt_rets : dict, mapping cg_name to optional returned variables
        """
        costs = self.decoder.costs(probs, y, y_mask)

        if decay_cs is not None:
            for name, cost in costs.iteritems():
                if decay_cs[name] > 0.:
                    decay_c = theano.shared(numpy.float32(decay_cs[name]),
                                            name='decay_c')
                    weight_decay = 0.
                    for pp in ComputationGraph(cost).parameters:
                        weight_decay += (pp ** 2).sum()
                    weight_decay *= decay_c
                    costs[name] += weight_decay
                    costs[name].name = name

        return costs

Source File: run.py From ladder with MIT License

6 votes

def setup_model(p):
    ladder = LadderAE(p)
    # Setup inputs
    input_type = TensorType('float32', [False] * (len(p.encoder_layers[0]) + 1))
    x_only = input_type('features_unlabeled')
    x = input_type('features_labeled')
    y = theano.tensor.lvector('targets_labeled')
    ladder.apply(x, y, x_only)

    # Load parameters if requested
    if p.get('load_from'):
        with open(p.load_from + '/trained_params.npz') as f:
            loaded = numpy.load(f)
            cg = ComputationGraph([ladder.costs.total])
            current_params = VariableFilter(roles=[PARAMETER])(cg.variables)
            logger.info('Loading parameters: %s' % ', '.join(loaded.keys()))
            for param in current_params:
                assert param.get_value().shape == loaded[param.name].shape
                param.set_value(loaded[param.name])

    return ladder

Source File: test_recurrent.py From attention-lvcsr with MIT License

6 votes

def test_saved_inner_graph():
    """Make sure that the original inner graph is saved."""
    x = tensor.tensor3()
    recurrent = SimpleRecurrent(dim=3, activation=Tanh())
    y = recurrent.apply(x)

    application_call = get_application_call(y)
    assert application_call.inner_inputs
    assert application_call.inner_outputs

    cg = ComputationGraph(application_call.inner_outputs)
    # Check that the inner scan graph is annotated
    # with `recurrent.apply`
    assert len(VariableFilter(applications=[recurrent.apply])(cg)) == 3
    # Check that the inner graph is equivalent to the one
    # produced by a stand-alone of `recurrent.apply`
    assert is_same_graph(application_call.inner_outputs[0],
                         recurrent.apply(*application_call.inner_inputs,
                                         iterate=False))

Source File: test_evaluators.py From attention-lvcsr with MIT License

6 votes

def test_dataset_evaluators():
    X = theano.tensor.matrix('X')
    brick = TestBrick(name='test_brick')
    Y = brick.apply(X)
    graph = ComputationGraph([Y])
    monitor_variables = [v for v in graph.auxiliary_variables]
    validator = DatasetEvaluator(monitor_variables)

    data = [numpy.arange(1, 5, dtype=theano.config.floatX).reshape(2, 2),
            numpy.arange(10, 16, dtype=theano.config.floatX).reshape(3, 2)]
    data_stream = IterableDataset(dict(X=data)).get_example_stream()

    values = validator.evaluate(data_stream)
    assert values['test_brick_apply_V_squared'] == 4
    numpy.testing.assert_allclose(
        values['test_brick_apply_mean_row_mean'], numpy.vstack(data).mean())
    per_batch_mean = numpy.mean([batch.mean() for batch in data])
    numpy.testing.assert_allclose(
        values['test_brick_apply_mean_batch_element'], per_batch_mean)

    with assert_raises(Exception) as ar:
        data_stream = IterableDataset(dict(X2=data)).get_example_stream()
        validator.evaluate(data_stream)
    assert "Not all data sources" in ar.exception.args[0]

Source File: evaluators.py From attention-lvcsr with MIT License

6 votes

def __init__(self, variables, use_take_last=False):
        self.variables = variables
        self.use_take_last = use_take_last

        self.variable_names = [v.name for v in self.variables]
        if len(set(self.variable_names)) < len(self.variables):
            duplicates = []
            for vname in set(self.variable_names):
                if self.variable_names.count(vname) > 1:
                    duplicates.append(vname)
            raise ValueError("variables should have different names!"
                             " Duplicates: {}".format(', '.join(duplicates)))
        self._computation_graph = ComputationGraph(self.variables)
        self.inputs = self._computation_graph.inputs

        self._initialized = False
        self._create_aggregators()
        self._compile()

Source File: search.py From attention-lvcsr with MIT License

5 votes

def __init__(self, beam_size, samples):
        self.beam_size = beam_size

        # Extracting information from the sampling computation graph
        cg = ComputationGraph(samples)
        self.inputs = cg.inputs
        self.generator = get_brick(samples)
        if not isinstance(self.generator, BaseSequenceGenerator):
            raise ValueError
        self.generate_call = get_application_call(samples)
        if (not self.generate_call.application ==
                self.generator.generate):
            raise ValueError
        self.inner_cg = ComputationGraph(self.generate_call.inner_outputs)

        # Fetching names from the sequence generator
        self.context_names = self.generator.generate.contexts
        self.state_names = self.generator.generate.states

        # Parsing the inner computation graph of sampling scan
        self.contexts = [
            VariableFilter(bricks=[self.generator],
                           name=name,
                           roles=[INPUT])(self.inner_cg)[0]
            for name in self.context_names]
        self.input_states = []
        # Includes only those state names that were actually used
        # in 'generate'
        self.input_state_names = []
        for name in self.generator.generate.states:
            var = VariableFilter(
                bricks=[self.generator], name=name,
                roles=[INPUT])(self.inner_cg)
            if var:
                self.input_state_names.append(name)
                self.input_states.append(var[0])

        self.compiled = False

Source File: __init__.py From blocks-examples with MIT License

5 votes

def main(save_to, num_batches):
    mlp = MLP([Tanh(), Identity()], [1, 10, 1],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0), seed=1)
    mlp.initialize()
    x = tensor.vector('numbers')
    y = tensor.vector('roots')
    cost = SquaredError().apply(y[:, None], mlp.apply(x[:, None]))
    cost.name = "cost"

    main_loop = MainLoop(
        GradientDescent(
            cost=cost, parameters=ComputationGraph(cost).parameters,
            step_rule=Scale(learning_rate=0.001)),
        get_data_stream(range(100)),
        model=Model(cost),
        extensions=[
            Timing(),
            FinishAfter(after_n_batches=num_batches),
            DataStreamMonitoring(
                [cost], get_data_stream(range(100, 200)),
                prefix="test"),
            TrainingDataMonitoring([cost], after_epoch=True),
            Checkpoint(save_to),
            Printing()])
    main_loop.run()
    return main_loop

Source File: theano_util.py From cpae with MIT License

5 votes

def merge_duplicates(cg):
    shared_variables = [var for var in cg
                        if isinstance(var, theano.tensor.sharedvar.SharedVariable)]
    inputs = cg.inputs + shared_variables
    outputs = cg.outputs
    new_inputs, new_outputs = clone(inputs, outputs, copy_inputs=False, copy_orphans=True)
    assert all([x is y for x, y in zip(inputs, new_inputs)])
    fg = FunctionGraph(inputs, new_outputs, clone=False)
    MergeOptimizer().optimize(fg)
    cg = ComputationGraph(fg.outputs)
    fg.disown()
    return cg

Source File: extensions.py From cpae with MIT License

5 votes

def do(self, *args, **kwargs):
        logger.info("Computation graph statistics:")
        cost_cg = ComputationGraph(self.main_loop.algorithm.cost)
        updates_cg = ComputationGraph(
            [u[1] for u in self.main_loop.algorithm.updates
             if isinstance(u[1], theano.Variable)])
        cost_nodes = io_toposort(cost_cg.inputs, cost_cg.outputs)
        updates_nodes = io_toposort(updates_cg.inputs, updates_cg.outputs)

        cost_scan_nodes = [
            node for node in cost_nodes
            if isinstance(node.op, Scan)]
        updates_scan_nodes = [
            node for node in updates_nodes
            if isinstance(node.op, Scan)]
        final_scan_nodes = [
            node for node in self.main_loop.algorithm._function.maker.fgraph.apply_nodes
            if isinstance(node.op, Scan)]

        logger.info("SCAN NODES IN THE COST GRAPH:")
        for n in cost_scan_nodes:
            logger.info(n.op.name)
        logger.info("SCAN NODES IN THE UPDATES GRAPH:")
        for n in updates_scan_nodes:
            logger.info(n.op.name)
        logger.info("SCAN NODES IN THE FINAL GRAPH:")
        for n in final_scan_nodes:
            logger.info(n.op.name)

Source File: predict.py From blocks-extras with MIT License

5 votes

def __init__(self, data_stream, variables, path=None, **kwargs):
        self.data_stream = data_stream
        self.variables = variables
        self.path = path
        self.prediction = None

        kwargs.setdefault('after_training', True)
        super(PredictDataStream, self).__init__(**kwargs)

        cg = ComputationGraph(variables)
        self.theano_function = cg.get_theano_function()

Source File: beam_search.py From blocks-extras with MIT License

5 votes

def __init__(self, samples):
        # Extracting information from the sampling computation graph
        self.cg = ComputationGraph(samples)
        self.inputs = self.cg.inputs
        self.generator = get_brick(samples)
        if not isinstance(self.generator, SequenceGenerator):
            raise ValueError
        self.generate_call = get_application_call(samples)
        if (not self.generate_call.application ==
                self.generator.generate):
            raise ValueError
        self.inner_cg = ComputationGraph(self.generate_call.inner_outputs)

        # Fetching names from the sequence generator
        self.context_names = self.generator.generate.contexts
        self.state_names = self.generator.generate.states

        # Parsing the inner computation graph of sampling scan
        self.contexts = [
            VariableFilter(bricks=[self.generator],
                           name=name,
                           roles=[INPUT])(self.inner_cg)[0]
            for name in self.context_names]
        self.input_states = []
        # Includes only those state names that were actually used
        # in 'generate'
        self.input_state_names = []
        for name in self.generator.generate.states:
            var = VariableFilter(
                bricks=[self.generator], name=name,
                roles=[INPUT])(self.inner_cg)
            if var:
                self.input_state_names.append(name)
                self.input_states.append(var[0])

        self.compiled = False

Source File: sketch.py From sketch with MIT License

5 votes

def __init__(self, generator, N=8, steps=1200, path='samples', **kwargs):
        self.N = N
        self.path = path
        super(Sample, self).__init__(**kwargs)

        batch_size = self.N * self.N

        self.sample = ComputationGraph(generator.generate(
            n_steps=steps, batch_size=batch_size, iterate=True)
        ).get_theano_function()

Source File: models.py From dl4mt-multi with BSD 3-Clause "New" or "Revised" License

5 votes

def get_computational_graphs(self, costs):
        """
        costs : dict, mapping cg_name to cost
        """
        cgs = OrderedDict()
        for name, cost in costs.iteritems():
            cg = ComputationGraph(cost)
            cgs[name] = cg
        return cgs

Source File: recognizer.py From attention-lvcsr with MIT License

5 votes

def get_cost_graph(self, batch=True,
                       prediction=None, prediction_mask=None):

        if batch:
            inputs = self.inputs
            inputs_mask = self.inputs_mask
            groundtruth = self.labels
            groundtruth_mask = self.labels_mask
        else:
            inputs, inputs_mask = self.bottom.single_to_batch_inputs(
                self.single_inputs)
            groundtruth = self.single_labels[:, None]
            groundtruth_mask = None

        if not prediction:
            prediction = groundtruth
        if not prediction_mask:
            prediction_mask = groundtruth_mask

        cost = self.cost(inputs_mask=inputs_mask,
                         labels=prediction,
                         labels_mask=prediction_mask,
                         **inputs)
        cost_cg = ComputationGraph(cost)
        if self.criterion['name'].startswith("mse"):
            placeholder, = VariableFilter(theano_name='groundtruth')(cost_cg)
            cost_cg = cost_cg.replace({placeholder: groundtruth})
        return cost_cg

Source File: recognizer.py From attention-lvcsr with MIT License

5 votes

def init_generate(self):
        generated = self.get_generate_graph(use_mask=False)
        cg = ComputationGraph(generated['outputs'])
        self._do_generate = cg.get_theano_function()

Source File: nn.py From ladder with MIT License

5 votes

def _get_updates(self, bn_ps, bn_share):
        cg = ComputationGraph(bn_ps)
        # Only store updates that relate to params or the counter
        updates = OrderedDict([(up, cg.updates[up]) for up in
                               cg.updates if up.name == 'counter' or
                               up in bn_share])
        assert self._counter == self._param_from_updates(cg.updates, 'counter')
        assert self._counter_max == self._param_from_updates(cg.updates,
                                                             'counter_max')
        assert len(updates) == len(bn_ps) + 1, \
            'Counter or var missing from update'
        return updates

Source File: nn.py From ladder with MIT License

5 votes

def _get_bn_params(self, output_vars):
        # Pick out the nodes with batch normalization vars
        cg = ComputationGraph(output_vars)
        var_filter = VariableFilter(roles=[BNPARAM])
        bn_ps = var_filter(cg.variables)

        if len(bn_ps) == 0:
            logger.warn('No batch normalization parameters found - is' +
                        ' batch normalization turned off?')
            self._bn = False
            self._counter = None
            self._counter_max = None
            bn_share = []
            output_vars_replaced = output_vars
        else:
            self._bn = True
            assert len(set([p.name for p in bn_ps])) == len(bn_ps), \
                'Some batch norm params have the same name'
            logger.info('Batch norm parameters: %s' % ', '.join([p.name for p in bn_ps]))

            # Filter out the shared variables from the model updates
            def filter_share(par):
                lst = [up for up in cg.updates if up.name == 'shared_%s' % par.name]
                assert len(lst) == 1
                return lst[0]
            bn_share = map(filter_share, bn_ps)

            # Replace the BN coefficients in the test data model - Replace the
            # theano variables in the test graph with the shareds
            output_vars_replaced = cg.replace(zip(bn_ps, bn_share)).outputs

            # Pick out the counter
            self._counter = self._param_from_updates(cg.updates, 'counter')
            self._counter_max = self._param_from_updates(cg.updates, 'counter_max')

        return bn_ps, bn_share, output_vars_replaced

Source File: evaluators.py From attention-lvcsr with MIT License

5 votes

def __init__(self, quantities):
        self.quantities = quantities
        requires = []
        for quantity in quantities:
            requires += quantity.requires
        self.requires = list(set(requires))
        self._initialized = False

        self.quantity_names = [q.name for q in self.quantities]
        self._computation_graph = ComputationGraph(self.requires)
        self.inputs = self._computation_graph.inputs

Source File: test_recurrent.py From attention-lvcsr with MIT License

5 votes

def test_many_steps(self):
        x = tensor.tensor3('x')
        gi = tensor.tensor3('gi')
        mask = tensor.matrix('mask')
        h = self.reset_only.apply(x, gi, mask=mask)
        calc_h = theano.function(inputs=[x, gi, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=theano.config.floatX)
        x_val = numpy.ones((24, 4, 3),
                           dtype=theano.config.floatX) * x_val[..., None]
        ri_val = 0.3 - x_val
        zi_val = 2 * ri_val
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        W = self.reset_only.state_to_state.get_value()
        Wz = self.reset_only.state_to_gates.get_value()[:, :3]
        Wr = self.reset_only.state_to_gates.get_value()[:, 3:]

        for i in range(1, 25):
            z_val = numpy.tanh(h_val[i - 1].dot(Wz) + zi_val[i - 1])
            r_val = numpy.tanh(h_val[i - 1].dot(Wr) + ri_val[i - 1])
            h_val[i] = numpy.tanh((r_val * h_val[i - 1]).dot(W) +
                                  x_val[i - 1])
            h_val[i] = z_val * h_val[i] + (1 - z_val) * h_val[i - 1]
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        # TODO Figure out why this tolerance needs to be so big
        assert_allclose(
            h_val,
            calc_h(x_val, numpy.concatenate(
                [zi_val, ri_val], axis=2), mask_val)[0],
            1e-04)

        # Also test that initial state is a parameter
        initial_state, = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial_state)
        assert initial_state.name == 'initial_state'

Source File: test_recurrent.py From attention-lvcsr with MIT License

5 votes

def test_many_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        h = self.simple.apply(x, mask=mask, iterate=True)
        calc_h = theano.function(inputs=[x, mask], outputs=[h])

        x_val = 0.1 * numpy.asarray(list(itertools.permutations(range(4))),
                                    dtype=theano.config.floatX)
        x_val = numpy.ones((24, 4, 3),
                           dtype=theano.config.floatX) * x_val[..., None]
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        for i in range(1, 25):
            h_val[i] = numpy.tanh(h_val[i - 1].dot(
                2 * numpy.ones((3, 3))) + x_val[i - 1])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
        h_val = h_val[1:]
        assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04)

        # Also test that initial state is a parameter
        initial_state, = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial_state)
        assert initial_state.name == 'initial_state'

Source File: test_variable_filter.py From attention-lvcsr with MIT License

5 votes

def test_variable_filter_applications_error():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')

    x = tensor.vector()
    h1 = brick1.apply(x)
    cg = ComputationGraph(h1)
    VariableFilter(applications=brick1.apply)(cg.variables)

Source File: test_variable_filter.py From attention-lvcsr with MIT License

5 votes

def test_variable_filter_roles_error():
    # Creating computation graph
    brick1 = Linear(input_dim=2, output_dim=2, name='linear1')

    x = tensor.vector()
    h1 = brick1.apply(x)
    cg = ComputationGraph(h1)
    # testing role error
    VariableFilter(roles=PARAMETER)(cg.variables)

Source File: __init__.py From attention-lvcsr with MIT License

5 votes

def __init__(self, cost, parameters):
        self.cost = cost
        self.parameters = parameters
        self._cost_computation_graph = ComputationGraph(self.cost)
        self._updates = []

Source File: evaluators.py From attention-lvcsr with MIT License

5 votes

def _compile(self):
        """Compiles Theano functions.

        .. todo::

            The current compilation method does not account for updates
            attached to `ComputationGraph` elements. Compiling should
            be out-sourced to `ComputationGraph` to deal with it.

        """
        inputs = []
        outputs = []
        updates = None
        if self.theano_buffer.accumulation_updates:
            updates = OrderedDict()
            updates.update(self.theano_buffer.accumulation_updates)
            inputs += self.theano_buffer.inputs
        if self.updates:
            # Handle the case in which we dont have any theano variables
            # to evaluate but we do have MonitoredQuantity
            # that may require an update of their own
            if updates is None:
                updates = self.updates
            else:
                updates.update(self.updates)
        inputs += self.monitored_quantities_buffer.inputs
        outputs = self.monitored_quantities_buffer.requires

        if inputs != []:
            self.unique_inputs = list(set(inputs))
            self._accumulate_fun = theano.function(self.unique_inputs,
                                                   outputs,
                                                   updates=updates)
        else:
            self._accumulate_fun = None

Source File: test_recurrent.py From attention-lvcsr with MIT License

4 votes

def test_many_steps(self):
        x = tensor.tensor3('x')
        mask = tensor.matrix('mask')
        h, c = self.lstm.apply(x, mask=mask, iterate=True)
        calc_h = theano.function(inputs=[x, mask], outputs=[h])

        x_val = (0.1 * numpy.asarray(
            list(itertools.islice(itertools.permutations(range(12)), 0, 24)),
            dtype=theano.config.floatX))
        x_val = numpy.ones((24, 4, 12),
                           dtype=theano.config.floatX) * x_val[:, None, :]
        mask_val = numpy.ones((24, 4), dtype=theano.config.floatX)
        mask_val[12:24, 3] = 0
        h_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        c_val = numpy.zeros((25, 4, 3), dtype=theano.config.floatX)
        W_state_val = 2 * numpy.ones((3, 12), dtype=theano.config.floatX)
        W_cell_to_in = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_out = 2 * numpy.ones((3,), dtype=theano.config.floatX)
        W_cell_to_forget = 2 * numpy.ones((3,), dtype=theano.config.floatX)

        def sigmoid(x):
            return 1. / (1. + numpy.exp(-x))

        for i in range(1, 25):
            activation = numpy.dot(h_val[i-1], W_state_val) + x_val[i-1]
            i_t = sigmoid(activation[:, :3] + c_val[i-1] * W_cell_to_in)
            f_t = sigmoid(activation[:, 3:6] + c_val[i-1] * W_cell_to_forget)
            c_val[i] = f_t * c_val[i-1] + i_t * numpy.tanh(activation[:, 6:9])
            o_t = sigmoid(activation[:, 9:12] +
                          c_val[i] * W_cell_to_out)
            h_val[i] = o_t * numpy.tanh(c_val[i])
            h_val[i] = (mask_val[i - 1, :, None] * h_val[i] +
                        (1 - mask_val[i - 1, :, None]) * h_val[i - 1])
            c_val[i] = (mask_val[i - 1, :, None] * c_val[i] +
                        (1 - mask_val[i - 1, :, None]) * c_val[i - 1])

        h_val = h_val[1:]
        assert_allclose(h_val, calc_h(x_val, mask_val)[0], rtol=1e-04)

        # Also test that initial state is a parameter
        initial1, initial2 = VariableFilter(roles=[INITIAL_STATE])(
            ComputationGraph(h))
        assert is_shared_variable(initial1)
        assert is_shared_variable(initial2)
        assert {initial1.name, initial2.name} == {
            'initial_state', 'initial_cells'}

Source File: __init__.py From blocks-examples with MIT License

4 votes

def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(x)
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1 ** 2).sum() + .00005 * (W2 ** 2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train",))
    mnist_test = MNIST(("test",))

    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=Scale(learning_rate=0.1))
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      Flatten(
                          DataStream.default_stream(
                              mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 500)),
                          which_sources=('features',)),
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  Printing()]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(Plot(
            'MNIST example',
            channels=[
                ['test_final_cost',
                 'test_misclassificationrate_apply_error_rate'],
                ['train_total_gradient_norm']]))

    main_loop = MainLoop(
        algorithm,
        Flatten(
            DataStream.default_stream(
                mnist_train,
                iteration_scheme=SequentialScheme(
                    mnist_train.num_examples, 50)),
            which_sources=('features',)),
        model=Model(cost),
        extensions=extensions)

    main_loop.run()

Source File: pacgan_task.py From PacGAN with MIT License

4 votes

def create_models(self):
        gan = self.create_model_brick()
        x = tensor.matrix('features')
        zs = []
        for i in range(self._config["num_packing"]):
            z = circle_gaussian_mixture(num_modes=self._config["num_zmode"], num_samples=x.shape[0], dimension=self._config["num_zdim"], r=self._config["z_mode_r"], std=self._config["z_mode_std"])
            zs.append(z)

        def _create_model(with_dropout):
            cg = ComputationGraph(gan.compute_losses(x, zs))
            if with_dropout:
                inputs = VariableFilter(
                    bricks=gan.discriminator.children[1:],
                    roles=[INPUT])(cg.variables)
                cg = apply_dropout(cg, inputs, 0.5)
                inputs = VariableFilter(
                    bricks=[gan.discriminator],
                    roles=[INPUT])(cg.variables)
                cg = apply_dropout(cg, inputs, 0.2)
            return Model(cg.outputs)

        model = _create_model(with_dropout=False)
        with batch_normalization(gan):
            bn_model = _create_model(with_dropout=False)

        pop_updates = list(set(get_batch_normalization_updates(bn_model, allow_duplicates=True)))
            
        # merge same variables
        names = []
        counts = []
        pop_update_merges = []
        pop_update_merges_finals = []
        for pop_update in pop_updates:
            b = False
            for i in range(len(names)):
                if (pop_update[0].auto_name == names[i]):
                    counts[i] += 1
                    pop_update_merges[i][1] += pop_update[1]
                    b = True
                    break
            if not b:
                names.append(pop_update[0].auto_name)
                counts.append(1)
                pop_update_merges.append([pop_update[0], pop_update[1]])
        for i in range(len(pop_update_merges)):
            pop_update_merges_finals.append((pop_update_merges[i][0], pop_update_merges[i][1] / counts[i]))
        
        bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_update_merges_finals]

        return model, bn_model, bn_updates

Source File: pacgan_task.py From PacGAN with MIT License

4 votes

def create_models(self):
        gan = self.create_model_brick()
        x = tensor.matrix('features')
        zs = []
        for i in range(self._config["num_packing"]):
            z = circle_gaussian_mixture(num_modes=self._config["num_zmode"], num_samples=x.shape[0], dimension=self._config["num_zdim"], r=self._config["z_mode_r"], std=self._config["z_mode_std"])
            zs.append(z)

        def _create_model(with_dropout):
            cg = ComputationGraph(gan.compute_losses(x, zs))
            if with_dropout:
                inputs = VariableFilter(
                    bricks=gan.discriminator.children[1:],
                    roles=[INPUT])(cg.variables)
                cg = apply_dropout(cg, inputs, 0.5)
                inputs = VariableFilter(
                    bricks=[gan.discriminator],
                    roles=[INPUT])(cg.variables)
                cg = apply_dropout(cg, inputs, 0.2)
            return Model(cg.outputs)

        model = _create_model(with_dropout=False)
        with batch_normalization(gan):
            bn_model = _create_model(with_dropout=False)

        pop_updates = list(set(get_batch_normalization_updates(bn_model, allow_duplicates=True)))
            
        # merge same variables
        names = []
        counts = []
        pop_update_merges = []
        pop_update_merges_finals = []
        for pop_update in pop_updates:
            b = False
            for i in range(len(names)):
                if (pop_update[0].auto_name == names[i]):
                    counts[i] += 1
                    pop_update_merges[i][1] += pop_update[1]
                    b = True
                    break
            if not b:
                names.append(pop_update[0].auto_name)
                counts.append(1)
                pop_update_merges.append([pop_update[0], pop_update[1]])
        for i in range(len(pop_update_merges)):
            pop_update_merges_finals.append((pop_update_merges[i][0], pop_update_merges[i][1] / counts[i]))
        
        bn_updates = [(p, m * 0.05 + p * 0.95) for p, m in pop_update_merges_finals]

        return model, bn_model, bn_updates

Python blocks.graph.ComputationGraph() Examples