org.datavec.api.split.partition.Partitioner Java Examples

The following examples show how to use org.datavec.api.split.partition.Partitioner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PartitionerTests.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testInputAddFile() throws Exception {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    Configuration configuration = new Configuration();
    configuration.set(NumberOfRecordsPartitioner.RECORDS_PER_FILE_CONFIG,String.valueOf(5));
    partitioner.init(configuration,fileSplit);
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    assertTrue(partitioner.needsNewPartition());
    OutputStream os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);
    //run more than once to ensure output stream creation works properly
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);


}
 
Example #2
Source File: PartitionerTests.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testInputAddFile() throws Exception {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    Configuration configuration = new Configuration();
    configuration.set(NumberOfRecordsPartitioner.RECORDS_PER_FILE_CONFIG,String.valueOf(5));
    partitioner.init(configuration,fileSplit);
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    assertTrue(partitioner.needsNewPartition());
    OutputStream os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);
    //run more than once to ensure output stream creation works properly
    partitioner.updatePartitionInfo(PartitionMetaData.builder().numRecordsUpdated(5).build());
    os = partitioner.openNewStream();
    os.close();
    assertNotNull(os);


}
 
Example #3
Source File: ExcelRecordWriter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {
    this.conf = new Configuration();
    this.partitioner = partitioner;
    partitioner.init(inputSplit);
    out = new DataOutputStream(partitioner.currentOutputStream());
    initPoi();


}
 
Example #4
Source File: ExcelRecordWriter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    this.workBookName = configuration.get(WORKSHEET_NAME,DEFAULT_WORKSHEET_NAME);
    this.fileTypeToUse = configuration.get(FILE_TYPE,DEFAULT_FILE_TYPE);
    this.conf = configuration;
    partitioner.init(split);
    out = new DataOutputStream(partitioner.currentOutputStream());
    initPoi();
}
 
Example #5
Source File: PartitionerTests.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordsPerFilePartition() {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    partitioner.init(fileSplit);
    assertEquals(1,partitioner.numPartitions());
}
 
Example #6
Source File: FileRecordWriter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {
    partitioner.init(inputSplit);
    out = new DataOutputStream(partitioner.currentOutputStream());
    this.partitioner = partitioner;

}
 
Example #7
Source File: FileRecordWriter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {
    partitioner.init(inputSplit);
    out = new DataOutputStream(partitioner.currentOutputStream());
    this.partitioner = partitioner;

}
 
Example #8
Source File: PartitionerTests.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordsPerFilePartition() {
    Partitioner partitioner = new NumberOfRecordsPartitioner();
    File tmpDir = Files.createTempDir();
    FileSplit fileSplit = new FileSplit(tmpDir);
    assertTrue(fileSplit.needsBootstrapForWrite());
    fileSplit.bootStrapForWrite();
    partitioner.init(fileSplit);
    assertEquals(1,partitioner.numPartitions());
}
 
Example #9
Source File: ExcelRecordWriter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    this.workBookName = configuration.get(WORKSHEET_NAME,DEFAULT_WORKSHEET_NAME);
    this.fileTypeToUse = configuration.get(FILE_TYPE,DEFAULT_FILE_TYPE);
    this.conf = configuration;
    partitioner.init(split);
    out = new DataOutputStream(partitioner.currentOutputStream());
    initPoi();
}
 
Example #10
Source File: ExcelRecordWriter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {
    this.conf = new Configuration();
    this.partitioner = partitioner;
    partitioner.init(inputSplit);
    out = new DataOutputStream(partitioner.currentOutputStream());
    initPoi();


}
 
Example #11
Source File: LocalExecuteExample.java    From Java-Deep-Learning-Cookbook with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    try {
        int numClasses = 2;
        int batchSize = 8;

        File file = new File("Path/to/titanic.csv-file");
        RecordReader recordReader = new CSVRecordReader(1,',');
        recordReader.initialize(new FileSplit(file));
        // WritableConverter writableConverter = new SelfWritableConverter();

        Schema schema = new Schema.Builder()
                .addColumnInteger("Survived")
                .addColumnCategorical("Pclass", Arrays.asList("1","2","3"))
                .addColumnString("Name")
                .addColumnCategorical("Sex", Arrays.asList("male","female"))
                .addColumnsInteger("Age","Siblings/Spouses Aboard","Parents/Children Aboard")
                .addColumnDouble("Fare")
                .build();
        TransformProcess transformProcess = new TransformProcess.Builder(schema)
                .removeColumns("Name","Fare")
                .categoricalToInteger("Sex")
                .categoricalToOneHot("Pclass")
                .removeColumns("Pclass[1]")
                .build();

        List<List<Writable>> outputData = new ArrayList<>();

        RecordWriter recordWriter = new CSVRecordWriter();
        Partitioner partitioner = new NumberOfRecordsPartitioner();
        recordWriter.initialize(new FileSplit(new File("/Path/To/LocalExecuteExample.csv/file")),partitioner);

        while(recordReader.hasNext()){
            outputData.add(recordReader.next());
        }
        List<List<Writable>> transformedOutput=LocalTransformExecutor.execute(outputData,transformProcess);
        recordWriter.writeBatch(transformedOutput);
        recordWriter.close();
    } catch (IllegalArgumentException e) {
        System.out.println("Please provide proper file paths for titanic.csv & fle in place of: Path/to/titanic.csv-file && /Path/To/LocalExecuteExample.csv");
        System.out.println("You need to create an empty CSV file and mention the file path in place of /Path/To/LocalExecuteExample.csv");
    }
}
 
Example #12
Source File: FileRecordWriter.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    setConf(configuration);
    partitioner.init(configuration, split);
    initialize(split, partitioner);
}
 
Example #13
Source File: ArrowRecordWriter.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    setConf(configuration);
    this.partitioner = partitioner;
}
 
Example #14
Source File: ArrowRecordWriter.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {
    this.partitioner = partitioner;
    partitioner.init(inputSplit);

}
 
Example #15
Source File: FileRecordWriter.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    setConf(configuration);
    partitioner.init(configuration,split);
    initialize(split, partitioner);
}
 
Example #16
Source File: ArrowRecordWriter.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {
    setConf(configuration);
    this.partitioner = partitioner;
}
 
Example #17
Source File: ArrowRecordWriter.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {
    this.partitioner = partitioner;
    partitioner.init(inputSplit);

}
 
Example #18
Source File: LocalExecuteExample.java    From Java-Deep-Learning-Cookbook with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    try {
        int numClasses = 2;
        int batchSize = 8;

        File file = new File("Path/to/titanic.csv-file");
        RecordReader recordReader = new CSVRecordReader(1,',');
        recordReader.initialize(new FileSplit(file));
        // WritableConverter writableConverter = new SelfWritableConverter();

        Schema schema = new Schema.Builder()
                .addColumnInteger("Survived")
                .addColumnCategorical("Pclass", Arrays.asList("1","2","3"))
                .addColumnString("Name")
                .addColumnCategorical("Sex", Arrays.asList("male","female"))
                .addColumnsInteger("Age","Siblings/Spouses Aboard","Parents/Children Aboard")
                .addColumnDouble("Fare")
                .build();
        TransformProcess transformProcess = new TransformProcess.Builder(schema)
                .removeColumns("Name","Fare")
                .categoricalToInteger("Sex")
                .categoricalToOneHot("Pclass")
                .removeColumns("Pclass[1]")
                .build();

        List<List<Writable>> outputData = new ArrayList<>();

        RecordWriter recordWriter = new CSVRecordWriter();
        Partitioner partitioner = new NumberOfRecordsPartitioner();
        recordWriter.initialize(new FileSplit(new File("/Path/To/LocalExecuteExample.csv/file")),partitioner);

        while(recordReader.hasNext()){
            outputData.add(recordReader.next());
        }
        List<List<Writable>> transformedOutput=LocalTransformExecutor.execute(outputData,transformProcess);
        recordWriter.writeBatch(transformedOutput);
        recordWriter.close();
    } catch (IllegalArgumentException e) {
        System.out.println("Please provide proper file paths for titanic.csv & fle in place of: Path/to/titanic.csv-file && /Path/To/LocalExecuteExample.csv");
        System.out.println("You need to create an empty CSV file and mention the file path in place of /Path/To/LocalExecuteExample.csv");
    }
}
 
Example #19
Source File: MapFileRecordWriter.java    From DataVec with Apache License 2.0 2 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {

}
 
Example #20
Source File: MapFileRecordWriter.java    From DataVec with Apache License 2.0 2 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {

}
 
Example #21
Source File: MapFileRecordWriter.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
@Override
public void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception {

}
 
Example #22
Source File: MapFileRecordWriter.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
@Override
public void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception {

}
 
Example #23
Source File: RecordWriter.java    From DataVec with Apache License 2.0 2 votes vote down vote up
/**
 * Initialize the record reader with the given configuration
 * and {@link InputSplit}
 * @param configuration the configuration to iniailize with
 * @param split the split to use
 * @param partitioner
 */
void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception;
 
Example #24
Source File: RecordWriter.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
/**
 * Initialize a record writer with the given input split
 * @param inputSplit the input split to initialize with
 * @param partitioner
 */
void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception;
 
Example #25
Source File: RecordWriter.java    From deeplearning4j with Apache License 2.0 2 votes vote down vote up
/**
 * Initialize the record reader with the given configuration
 * and {@link InputSplit}
 * @param configuration the configuration to iniailize with
 * @param split the split to use
 * @param partitioner
 */
void initialize(Configuration configuration, InputSplit split, Partitioner partitioner) throws Exception;
 
Example #26
Source File: RecordWriter.java    From DataVec with Apache License 2.0 2 votes vote down vote up
/**
 * Initialize a record writer with the given input split
 * @param inputSplit the input split to initialize with
 * @param partitioner
 */
void initialize(InputSplit inputSplit, Partitioner partitioner) throws Exception;