Java Code Examples for org.datavec.api.records.reader.RecordReader#initialize()
The following examples show how to use
org.datavec.api.records.reader.RecordReader#initialize() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExcelRecordReaderTest.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testSimple() throws Exception { RecordReader excel = new ExcelRecordReader(); excel.initialize(new FileSplit(new ClassPathResource("testsheet.xlsx").getFile())); assertTrue(excel.hasNext()); List<Writable> next = excel.next(); assertEquals(3,next.size()); RecordReader headerReader = new ExcelRecordReader(1); headerReader.initialize(new FileSplit(new ClassPathResource("testsheetheader.xlsx").getFile())); assertTrue(excel.hasNext()); List<Writable> next2 = excel.next(); assertEquals(3,next2.size()); }
Example 2
Source File: RecordReaderMultiDataSetIteratorTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testsBasicMeta() throws Exception { //As per testBasic - but also loading metadata RecordReader rr2 = new CSVRecordReader(0, ','); rr2.initialize(new FileSplit(Resources.asFile("iris.txt"))); RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10) .addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build(); rrmdsi.setCollectMetaData(true); int count = 0; while (rrmdsi.hasNext()) { MultiDataSet mds = rrmdsi.next(); MultiDataSet fromMeta = rrmdsi.loadFromMetaData(mds.getExampleMetaData(RecordMetaData.class)); assertEquals(mds, fromMeta); count++; } assertEquals(150 / 10, count); }
Example 3
Source File: HyperParameterTuningArbiterUiExample.java From Java-Deep-Learning-Cookbook with MIT License | 6 votes |
public RecordReader dataPreprocess() throws IOException, InterruptedException { //Schema Definitions Schema schema = new Schema.Builder() .addColumnsString("RowNumber") .addColumnInteger("CustomerId") .addColumnString("Surname") .addColumnInteger("CreditScore") .addColumnCategorical("Geography", Arrays.asList("France","Spain","Germany")) .addColumnCategorical("Gender",Arrays.asList("Male","Female")) .addColumnsInteger("Age","Tenure","Balance","NumOfProducts","HasCrCard","IsActiveMember","EstimatedSalary","Exited").build(); //Schema Transformation TransformProcess transformProcess = new TransformProcess.Builder(schema) .removeColumns("RowNumber","Surname","CustomerId") .categoricalToInteger("Gender") .categoricalToOneHot("Geography") .removeColumns("Geography[France]") .build(); //CSVReader - Reading from file and applying transformation RecordReader reader = new CSVRecordReader(1,','); reader.initialize(new FileSplit(new ClassPathResource("Churn_Modelling.csv").getFile())); RecordReader transformProcessRecordReader = new TransformProcessRecordReader(reader,transformProcess); return transformProcessRecordReader; }
Example 4
Source File: MultipleEpochsIteratorTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testNextAndReset() throws Exception { int epochs = 3; RecordReader rr = new CSVRecordReader(); rr.initialize(new FileSplit(Resources.asFile("iris.txt"))); DataSetIterator iter = new RecordReaderDataSetIterator(rr, 150); MultipleEpochsIterator multiIter = new MultipleEpochsIterator(epochs, iter); assertTrue(multiIter.hasNext()); while (multiIter.hasNext()) { DataSet path = multiIter.next(); assertFalse(path == null); } assertEquals(epochs, multiIter.epochs); }
Example 5
Source File: LineReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testLineReaderWithInputStreamInputSplit() throws Exception { String tempDir = System.getProperty("java.io.tmpdir"); File tmpdir = new File(tempDir, "tmpdir"); tmpdir.mkdir(); File tmp1 = new File(tmpdir, "tmp1.txt.gz"); OutputStream os = new GZIPOutputStream(new FileOutputStream(tmp1, false)); IOUtils.writeLines(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9"), null, os); os.flush(); os.close(); InputSplit split = new InputStreamInputSplit(new GZIPInputStream(new FileInputStream(tmp1))); RecordReader reader = new LineRecordReader(); reader.initialize(split); int count = 0; while (reader.hasNext()) { assertEquals(1, reader.next().size()); count++; } assertEquals(9, count); try { FileUtils.deleteDirectory(tmpdir); } catch (Exception e) { e.printStackTrace(); } }
Example 6
Source File: ModelGenerator.java From arabic-characters-recognition with Apache License 2.0 | 5 votes |
private static DataSetIterator readCSVDataset(String csvFileClasspath, int BATCH_SIZE, int LABEL_INDEX, int numClasses) throws IOException, InterruptedException { RecordReader rr = new CSVRecordReader(); rr.initialize(new FileSplit(new File(csvFileClasspath))); DataSetIterator iterator = new RecordReaderDataSetIterator(rr, BATCH_SIZE, LABEL_INDEX, numClasses); return iterator; }
Example 7
Source File: TestAnalyzeLocal.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testAnalysisBasic() throws Exception { RecordReader rr = new CSVRecordReader(); rr.initialize(new FileSplit(new ClassPathResource("iris.txt").getFile())); Schema s = new Schema.Builder() .addColumnsDouble("0", "1", "2", "3") .addColumnInteger("label") .build(); DataAnalysis da = AnalyzeLocal.analyze(s, rr); System.out.println(da); //Compare: List<List<Writable>> list = new ArrayList<>(); rr.reset(); while(rr.hasNext()){ list.add(rr.next()); } INDArray arr = RecordConverter.toMatrix(DataType.DOUBLE, list); INDArray mean = arr.mean(0); INDArray std = arr.std(0); for( int i=0; i<5; i++ ){ double m = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getMean(); double stddev = ((NumericalColumnAnalysis)da.getColumnAnalysis().get(i)).getSampleStdev(); assertEquals(mean.getDouble(i), m, 1e-3); assertEquals(std.getDouble(i), stddev, 1e-3); } }
Example 8
Source File: RecordReaderDataSetiteratorTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testRecordReaderMetaData() throws Exception { RecordReader csv = new CSVRecordReader(); csv.initialize(new FileSplit(Resources.asFile("iris.txt"))); int batchSize = 10; int labelIdx = 4; int numClasses = 3; RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(csv, batchSize, labelIdx, numClasses); rrdsi.setCollectMetaData(true); while (rrdsi.hasNext()) { DataSet ds = rrdsi.next(); List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class); int i = 0; for (RecordMetaData m : meta) { Record r = csv.loadFromMetaData(m); INDArray row = ds.getFeatures().getRow(i); // if(i <= 3) { // System.out.println(m.getLocation() + "\t" + r.getRecord() + "\t" + row); // } for (int j = 0; j < 4; j++) { double exp = r.getRecord().get(j).toDouble(); double act = row.getDouble(j); assertEquals("Failed on idx: " + j, exp, act, 1e-6); } i++; } // System.out.println(); DataSet fromMeta = rrdsi.loadFromMetaData(meta); assertEquals(ds, fromMeta); } }
Example 9
Source File: JacksonRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testReadingYaml() throws Exception { //Exact same information as JSON format, but in YAML format ClassPathResource cpr = new ClassPathResource("yaml/yaml_test_0.txt"); String path = cpr.getFile().getAbsolutePath().replace("0", "%d"); InputSplit is = new NumberedFileInputSplit(path, 0, 2); RecordReader rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new YAMLFactory())); rr.initialize(is); testJacksonRecordReader(rr); }
Example 10
Source File: RegexRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testRegexLineRecordReaderMeta() throws Exception { String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)"; RecordReader rr = new RegexLineRecordReader(regex, 1); rr.initialize(new FileSplit(new ClassPathResource("/logtestdata/logtestfile0.txt").getFile())); List<List<Writable>> list = new ArrayList<>(); while (rr.hasNext()) { list.add(rr.next()); } assertEquals(3, list.size()); List<Record> list2 = new ArrayList<>(); List<List<Writable>> list3 = new ArrayList<>(); List<RecordMetaData> meta = new ArrayList<>(); rr.reset(); int count = 1; //Start by skipping 1 line while (rr.hasNext()) { Record r = rr.nextRecord(); list2.add(r); list3.add(r.getRecord()); meta.add(r.getMetaData()); assertEquals(count++, ((RecordMetaDataLine) r.getMetaData()).getLineNumber()); } List<Record> fromMeta = rr.loadFromMetaData(meta); assertEquals(list, list3); assertEquals(list2, fromMeta); }
Example 11
Source File: LFWLoader.java From DataVec with Apache License 2.0 | 5 votes |
public RecordReader getRecordReader(int batchSize, int numExamples, int[] imgDim, int numLabels, PathLabelGenerator labelGenerator, boolean train, double splitTrainTest, Random rng) { load(batchSize, numExamples, numLabels, labelGenerator, splitTrainTest, rng); RecordReader recordReader = new ImageRecordReader(imgDim[0], imgDim[1], imgDim[2], labelGenerator, imageTransform); try { InputSplit data = train ? inputSplit[0] : inputSplit[1]; recordReader.initialize(data); } catch (IOException | InterruptedException e) { e.printStackTrace(); } return recordReader; }
Example 12
Source File: JacksonLineRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testReadJSON() throws Exception { RecordReader rr = new JacksonLineRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory())); rr.initialize(new FileSplit(new ClassPathResource("datavec-api/json/json_test_3.txt").getFile())); testJacksonRecordReader(rr); }
Example 13
Source File: MatlabInputFormat.java From DataVec with Apache License 2.0 | 4 votes |
@Override public RecordReader createReader(InputSplit split) throws IOException, InterruptedException { RecordReader reader = new MatlabRecordReader(); reader.initialize(split); return reader; }
Example 14
Source File: WavInputFormat.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public RecordReader createReader(InputSplit split) throws IOException, InterruptedException { RecordReader waveRecordReader = new WavFileRecordReader(); waveRecordReader.initialize(split); return waveRecordReader; }
Example 15
Source File: RecordReaderMultiDataSetIteratorTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testSplittingCSV() throws Exception { //Here's the idea: take Iris, and split it up into 2 inputs and 2 output arrays //Inputs: columns 0 and 1-2 //Outputs: columns 3, and 4->OneHot //need to manually extract RecordReader rr = new CSVRecordReader(0, ','); rr.initialize(new FileSplit(Resources.asFile("iris.txt"))); RecordReaderDataSetIterator rrdsi = new RecordReaderDataSetIterator(rr, 10, 4, 3); RecordReader rr2 = new CSVRecordReader(0, ','); rr2.initialize(new FileSplit(Resources.asFile("iris.txt"))); MultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10).addReader("reader", rr2) .addInput("reader", 0, 0).addInput("reader", 1, 2).addOutput("reader", 3, 3) .addOutputOneHot("reader", 4, 3).build(); while (rrdsi.hasNext()) { DataSet ds = rrdsi.next(); INDArray fds = ds.getFeatures(); INDArray lds = ds.getLabels(); MultiDataSet mds = rrmdsi.next(); assertEquals(2, mds.getFeatures().length); assertEquals(2, mds.getLabels().length); assertNull(mds.getFeaturesMaskArrays()); assertNull(mds.getLabelsMaskArrays()); INDArray[] fmds = mds.getFeatures(); INDArray[] lmds = mds.getLabels(); assertNotNull(fmds); assertNotNull(lmds); for (int i = 0; i < fmds.length; i++) assertNotNull(fmds[i]); for (int i = 0; i < lmds.length; i++) assertNotNull(lmds[i]); //Get the subsets of the original iris data INDArray expIn1 = fds.get(all(), interval(0,0,true)); INDArray expIn2 = fds.get(all(), interval(1, 2, true)); INDArray expOut1 = fds.get(all(), interval(3,3,true)); INDArray expOut2 = lds; assertEquals(expIn1, fmds[0]); assertEquals(expIn2, fmds[1]); assertEquals(expOut1, lmds[0]); assertEquals(expOut2, lmds[1]); } assertFalse(rrmdsi.hasNext()); }
Example 16
Source File: DataSetIteratorHelper.java From Java-Deep-Learning-Cookbook with MIT License | 4 votes |
public static RecordReader generateReader(File file) throws IOException, InterruptedException { final RecordReader recordReader = new CSVRecordReader(1,','); recordReader.initialize(new FileSplit(file)); final RecordReader transformProcessRecordReader=applyTransform(recordReader,generateSchema()); return transformProcessRecordReader; }
Example 17
Source File: TextInputFormat.java From DataVec with Apache License 2.0 | 4 votes |
@Override public RecordReader createReader(InputSplit split, Configuration conf) throws IOException, InterruptedException { RecordReader reader = new TfidfRecordReader(); reader.initialize(conf, split); return reader; }
Example 18
Source File: RecordReaderDataSetiteratorTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCSVLoadingRegression() throws Exception { int nLines = 30; int nFeatures = 5; int miniBatchSize = 10; int labelIdx = 0; String path = "rr_csv_test_rand.csv"; Pair<double[][],File> p = makeRandomCSV(path, nLines, nFeatures); double[][] data = p.getFirst(); RecordReader testReader = new CSVRecordReader(); testReader.initialize(new FileSplit(p.getSecond())); DataSetIterator iter = new RecordReaderDataSetIterator(testReader, miniBatchSize, labelIdx, labelIdx, true); int miniBatch = 0; while (iter.hasNext()) { DataSet test = iter.next(); INDArray features = test.getFeatures(); INDArray labels = test.getLabels(); assertArrayEquals(new long[] {miniBatchSize, nFeatures}, features.shape()); assertArrayEquals(new long[] {miniBatchSize, 1}, labels.shape()); int startRow = miniBatch * miniBatchSize; for (int i = 0; i < miniBatchSize; i++) { double labelExp = data[startRow + i][labelIdx]; double labelAct = labels.getDouble(i); assertEquals(labelExp, labelAct, 1e-5f); int featureCount = 0; for (int j = 0; j < nFeatures + 1; j++) { if (j == labelIdx) continue; double featureExp = data[startRow + i][j]; double featureAct = features.getDouble(i, featureCount++); assertEquals(featureExp, featureAct, 1e-5f); } } miniBatch++; } assertEquals(nLines / miniBatchSize, miniBatch); }
Example 19
Source File: CustomerRetentionPredictionExample.java From Java-Deep-Learning-Cookbook with MIT License | 4 votes |
private static RecordReader generateReader(File file) throws IOException, InterruptedException { final RecordReader recordReader = new CSVRecordReader(1,','); recordReader.initialize(new FileSplit(file)); final RecordReader transformProcessRecordReader=applyTransform(recordReader,generateSchema()); return transformProcessRecordReader; }
Example 20
Source File: JacksonRecordReaderTest.java From DataVec with Apache License 2.0 | 4 votes |
@Test public void testAppendingLabels() throws Exception { ClassPathResource cpr = new ClassPathResource("json/json_test_0.txt"); String path = cpr.getFile().getAbsolutePath().replace("0", "%d"); InputSplit is = new NumberedFileInputSplit(path, 0, 2); //Insert at the end: RecordReader rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory()), false, -1, new LabelGen()); rr.initialize(is); List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"), new IntWritable(0)); assertEquals(exp0, rr.next()); List<Writable> exp1 = Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"), new IntWritable(1)); assertEquals(exp1, rr.next()); List<Writable> exp2 = Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"), new IntWritable(2)); assertEquals(exp2, rr.next()); //Insert at position 0: rr = new JacksonRecordReader(getFieldSelection(), new ObjectMapper(new JsonFactory()), false, -1, new LabelGen(), 0); rr.initialize(is); exp0 = Arrays.asList((Writable) new IntWritable(0), new Text("aValue0"), new Text("bValue0"), new Text("cxValue0")); assertEquals(exp0, rr.next()); exp1 = Arrays.asList((Writable) new IntWritable(1), new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1")); assertEquals(exp1, rr.next()); exp2 = Arrays.asList((Writable) new IntWritable(2), new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX")); assertEquals(exp2, rr.next()); }