Python Examples of data_utils.load

Source File: task.py From cloudml-samples with Apache License 2.0

6 votes

def train_model(args):
    """Load the data, train the model, test the model, export / save the model
    """
    torch.manual_seed(args.seed)

    # Open our dataset
    train_loader, test_loader = data_utils.load_data(args.test_split,
                                                     args.batch_size)

    # Create the model
    net = model.SonarDNN().double()
    optimizer = optim.SGD(net.parameters(), lr=args.lr,
                          momentum=args.momentum, nesterov=False)

    # Train / Test the model
    for epoch in range(1, args.epochs + 1):
        train(net, train_loader, optimizer, epoch)
        test(net, test_loader)

    # Export the trained model
    torch.save(net.state_dict(), args.model_name)

    if args.model_dir:
        # Save the model to GCS
        data_utils.save_model(args.model_dir, args.model_name)

Source File: task.py From cloudml-samples with Apache License 2.0

6 votes

def train_model(args):
    train_features, test_features, train_labels, test_labels = \
        data_utils.load_data(args)

    sonar_model = model.sonar_model()

    sonar_model.fit(train_features, train_labels, epochs=args.epochs,
                    batch_size=args.batch_size)

    score = sonar_model.evaluate(test_features, test_labels,
                                 batch_size=args.batch_size)
    print(score)

    # Export the trained model
    sonar_model.save(args.model_name)

    if args.model_dir:
        # Save the model to GCS
        data_utils.save_model(args.model_dir, args.model_name)

Source File: task.py From cloudml-samples with Apache License 2.0

5 votes

def train_model(args):
    """Load the data, train the model, test the model, export / save the model
    """
    torch.manual_seed(args.seed)

    # Open our dataset
    train_loader, test_loader = data_utils.load_data(
        args.test_split, args.seed, args.batch_size)

    # Create the model
    net = model.SonarDNN().double()
    optimizer = optim.SGD(net.parameters(), lr=args.lr,
                          momentum=args.momentum, nesterov=False)

    # Train / Test the model
    latest_accuracy = 0.0
    for epoch in range(1, args.epochs + 1):
        train(net, train_loader, optimizer)
        latest_accuracy = test(net, test_loader)

    # The default name of the metric is training/hptuning/metric.
    # We recommend that you assign a custom name. The only functional
    # difference is that if you use a custom name, you must set the
    # hyperparameterMetricTag value in the HyperparameterSpec object in your
    # job request to match your chosen name.
    # https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#HyperparameterSpec
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='my_accuracy_tag',
        metric_value=latest_accuracy,
        global_step=args.epochs)

    # Export the trained model
    torch.save(net.state_dict(), args.model_name)

    if args.job_dir:
        # Save the model to GCS
        data_utils.save_model(args.job_dir, args.model_name)
    else:
        print('Accuracy: {:.0f}%'.format(latest_accuracy))

Source File: DataLoader.py From Convolutional-Sequence-to-Sequence-Model-for-Human-Dynamics with MIT License

4 votes

def read_all_data( self, actions, data_dir, one_hot=False):
        """
        Loads data for training/testing and normalizes it.
        
        Args
        actions: list of strings (actions) to load
        seq_length_in: number of frames to use in the burn-in sequence
        seq_length_out: number of frames to use in the output sequence
        data_dir: directory to load the data from
        one_hot: whether to use one-hot encoding per action
        Returns
        train_set: dictionary with normalized training data
        test_set: dictionary with test data
        data_mean: d-long vector with the mean of the training data
        data_std: d-long vector with the standard dev of the training data
        dim_to_ignore: dimensions that are not used becaused stdev is too small
        dim_to_use: dimensions that we are actually using in the model
        """


        train_subject_ids = [1,6,7,8,9,11]
        test_subject_ids = [5]

        train_set, complete_train = data_utils.load_data( data_dir, train_subject_ids, actions, one_hot )
        test_set,  complete_test  = data_utils.load_data( data_dir, test_subject_ids,  actions, one_hot )

        # Compute normalization stats
        data_mean, data_std,  dim_to_ignore, dim_to_use = data_utils.normalization_stats(complete_train)

        # Normalize -- subtract mean, divide by stdev
        train_set = data_utils.normalize_data( train_set, data_mean, data_std, dim_to_use, actions, one_hot )
        test_set  = data_utils.normalize_data( test_set,  data_mean, data_std, dim_to_use, actions, one_hot )
        print("done reading data.")

        self.train_set = train_set
        self.test_set = test_set

        self.data_mean = data_mean
        self.data_std = data_std

        self.dim_to_ignore = dim_to_ignore
        self.dim_to_use = dim_to_use

        self.train_keys = list(self.train_set.keys())

Source File: trainDepthMap.py From Pix2Depth with GNU General Public License v3.0

4 votes

def trainDepthMap(**kwargs):
    """
    Train model

    Load the whole train data in memory for faster operations

    args: **kwargs (dict) keyword arguments that specify the model hyperparameters
    """

    # Roll out the parameters
    batch_size = kwargs["batch_size"]
    nb_train_samples = kwargs["nb_train_samples"]
    nb_validation_samples = kwargs["nb_validation_samples"]
    epochs = kwargs["nb_epoch"]
    model_name = kwargs["model_name"]
    lastLayerActivation=kwargs["lastLayerActivation"]
    PercentageOfTrianable=kwargs["PercentageOfTrianable"]
    SpecificPathStr=kwargs["SpecificPathStr"]
    lossFunction=kwargs["lossFunction"]
    if(kwargs["bnAtTheend"]!="True"):
         bnAtTheend=False
    else:
         bnAtTheend=True
    # Setup environment (logging directory etc)
    #general_utils.setup_logging(model_name)

    # Load and rescale data
    #X_full_train, X_sketch_train, X_full_val, X_sketch_val = data_utils.load_data(dset, image_data_format)
    img_dim = (256,256,3) # Manual entry

 

    try:
         print("Ok before directory this point")
         generator_model=CreatErrorMapModel(input_shape=img_dim,lastLayerActivation=lastLayerActivation, PercentageOfTrianable=PercentageOfTrianable, bnAtTheend=bnAtTheend,lossFunction=lossFunction)
         print("Ok before directory this point")
#-------------------------------------------------------------------------------
         logpath=os.path.join('../../log','DepthMapWith'+lastLayerActivation+str(PercentageOfTrianable)+'UnTr'+SpecificPathStr)
         modelPath=os.path.join('../../models','DepthMapwith'+lastLayerActivation+str(PercentageOfTrianable)+'Untr'+SpecificPathStr)
         shutil.rmtree(logpath, ignore_errors=True)
         shutil.rmtree(modelPath, ignore_errors=True)
         os.makedirs(logpath, exist_ok=True)
         os.makedirs(modelPath, exist_ok=True)
         print("Ok until this point")

#-----------------------PreTraining Depth Map-------------------------------------
         batchSize=batch_size
         history=generator_model.fit_generator(data_utils.facades_generator(img_dim,batch_size=batch_size), samples_per_epoch=nb_train_samples,epochs=epochs,verbose=1,validation_data=data_utils.facades_generator(img_dim,batch_size=batch_size),nb_val_samples=nb_validation_samples,callbacks=[
         keras.callbacks.ModelCheckpoint(os.path.join(modelPath,'DepthMap_weightsBestLoss.h5'), monitor='val_loss', verbose=1, save_best_only=True),
         keras.callbacks.ModelCheckpoint(os.path.join(modelPath,'DepthMap_weightsBestAcc.h5'), monitor='acc', verbose=1, save_best_only=True),
         keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, patience=2, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0),
         keras.callbacks.TensorBoard(log_dir=logpath, histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None)],)
         ErrorMap_weights_path = os.path.join(modelPath,'DepthMap_weights.h5' )
         generator_model.save_weights(ErrorMap_weights_path, overwrite=True)
         plt.plot(history.history['loss'])
         plt.savefig(logpath+"/history.png",bbox_inches='tight')
#------------------------------------------------------------------------------------
    except KeyboardInterrupt:
        pass

Source File: prepro_std.py From mt-dnn with MIT License

4 votes

def main(args):
    # hyper param
    do_lower_case = args.do_lower_case
    root = args.root_dir
    assert os.path.exists(root)

    literal_model_type = args.model.split('-')[0].upper()
    encoder_model = EncoderModelType[literal_model_type]
    literal_model_type = literal_model_type.lower()
    mt_dnn_suffix = literal_model_type
    if 'base' in args.model:
        mt_dnn_suffix += "_base"
    elif 'large' in args.model:
        mt_dnn_suffix += "_large"

    config_class, model_class, tokenizer_class = MODEL_CLASSES[literal_model_type]
    tokenizer = tokenizer_class.from_pretrained(args.model, do_lower_case=do_lower_case)

    if 'uncased' in args.model:
        mt_dnn_suffix = '{}_uncased'.format(mt_dnn_suffix)
    else:
        mt_dnn_suffix = '{}_cased'.format(mt_dnn_suffix)

    if do_lower_case:
        mt_dnn_suffix = '{}_lower'.format(mt_dnn_suffix)

    mt_dnn_root = os.path.join(root, mt_dnn_suffix)
    if not os.path.isdir(mt_dnn_root):
        os.mkdir(mt_dnn_root)

    task_defs = TaskDefs(args.task_def)

    for task in task_defs.get_task_names():
        task_def = task_defs.get_task_def(task)
        logger.info("Task %s" % task)
        for split_name in task_def.split_names:
            file_path = os.path.join(root, "%s_%s.tsv" % (task, split_name))
            if not os.path.exists(file_path):
                logger.warning("File %s doesnot exit")
                sys.exit(1)
            rows = load_data(file_path, task_def)
            dump_path = os.path.join(mt_dnn_root, "%s_%s.json" % (task, split_name))
            logger.info(dump_path)
            build_data(
                rows,
                dump_path,
                tokenizer,
                task_def.data_type,
                encoderModelType=encoder_model,
                lab_dict=task_def.label_vocab)

Python data_utils.load_data() Examples