org.datavec.api.records.metadata.RecordMetaData Java Examples
The following examples show how to use
org.datavec.api.records.metadata.RecordMetaData.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RecordReaderMultiDataSetIteratorTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testsBasicMeta() throws Exception { //As per testBasic - but also loading metadata RecordReader rr2 = new CSVRecordReader(0, ','); rr2.initialize(new FileSplit(Resources.asFile("iris.txt"))); RecordReaderMultiDataSetIterator rrmdsi = new RecordReaderMultiDataSetIterator.Builder(10) .addReader("reader", rr2).addInput("reader", 0, 3).addOutputOneHot("reader", 4, 3).build(); rrmdsi.setCollectMetaData(true); int count = 0; while (rrmdsi.hasNext()) { MultiDataSet mds = rrmdsi.next(); MultiDataSet fromMeta = rrmdsi.loadFromMetaData(mds.getExampleMetaData(RecordMetaData.class)); assertEquals(mds, fromMeta); count++; } assertEquals(150 / 10, count); }
Example #2
Source File: TfidfRecordReader.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<Record> out = new ArrayList<>(); for (Record fileContents : super.loadFromMetaData(recordMetaDatas)) { INDArray transform = tfidfVectorizer.transform(fileContents); org.datavec.api.records.impl.Record record = new org.datavec.api.records.impl.Record( new ArrayList<>(Collections.<Writable>singletonList(new NDArrayWritable(transform))), new RecordMetaDataURI(fileContents.getMetaData().getURI(), TfidfRecordReader.class)); if (appendLabel) record.getRecord().add(fileContents.getRecord().get(fileContents.getRecord().size() - 1)); out.add(record); } return out; }
Example #3
Source File: VasttextTextFileReader.java From scava with Eclipse Public License 2.0 | 6 votes |
@Override public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<Record> out = new ArrayList<>(); for (Record fileContents : super.loadFromMetaData(recordMetaDatas)) { INDArray transformed = vasttextTextVectorizer.transform(fileContents); org.datavec.api.records.impl.Record transformedRecord = new org.datavec.api.records.impl.Record( new ArrayList<>(Collections.<Writable>singletonList(new NDArrayWritable(transformed))), new RecordMetaDataURI(fileContents.getMetaData().getURI(), VasttextTextFileReader.class)); if (labelled) transformedRecord.getRecord().add(fileContents.getRecord().get(fileContents.getRecord().size() - 1)); out.add(transformedRecord); } return out; }
Example #4
Source File: RegexLineRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Record nextRecord() { List<Writable> next = next(); URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]); RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, RegexLineRecordReader.class); //-1 as line number has been incremented already... return new org.datavec.api.records.impl.Record(next, meta); }
Example #5
Source File: CSVRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<Record> list = super.loadFromMetaData(recordMetaDatas); for (Record r : list) { String line = r.getRecord().get(0).toString(); r.setRecord(parseLine(line)); } return list; }
Example #6
Source File: VasttextMemoryRecordReader.java From scava with Eclipse Public License 2.0 | 5 votes |
@Override public Record nextRecord() { List<Writable> next = next(); //As we increase in next the listIndex, then we need to reduce in one the value of listIndex RecordMetaData meta = new RecordMetaDataIndex(listIndex-1, null, VasttextMemoryRecordReader.class); return new org.datavec.api.records.impl.Record(next, meta); }
Example #7
Source File: CSVSequenceRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testMetaData() throws Exception { CSVSequenceRecordReader seqReader = new CSVSequenceRecordReader(1, ","); seqReader.initialize(new TestInputSplit()); List<List<List<Writable>>> l = new ArrayList<>(); while (seqReader.hasNext()) { List<List<Writable>> sequence = seqReader.sequenceRecord(); assertEquals(4, sequence.size()); //4 lines, plus 1 header line Iterator<List<Writable>> timeStepIter = sequence.iterator(); int lineCount = 0; while (timeStepIter.hasNext()) { timeStepIter.next(); lineCount++; } assertEquals(4, lineCount); l.add(sequence); } List<SequenceRecord> l2 = new ArrayList<>(); List<RecordMetaData> meta = new ArrayList<>(); seqReader.reset(); while (seqReader.hasNext()) { SequenceRecord sr = seqReader.nextSequence(); l2.add(sr); meta.add(sr.getMetaData()); } assertEquals(3, l2.size()); List<SequenceRecord> fromMeta = seqReader.loadSequenceFromMetaData(meta); for (int i = 0; i < 3; i++) { assertEquals(l.get(i), l2.get(i).getSequenceRecord()); assertEquals(l.get(i), fromMeta.get(i).getSequenceRecord()); } }
Example #8
Source File: CSVVariableSlidingWindowRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public SequenceRecord nextSequence() { int lineBefore = lineIndex; List<List<Writable>> record = sequenceRecord(); int lineAfter = lineIndex + queue.size(); URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]); RecordMetaData meta = new RecordMetaDataLineInterval(lineBefore, lineAfter - 1, uri, CSVVariableSlidingWindowRecordReader.class); return new org.datavec.api.records.impl.SequenceRecord(record, meta); }
Example #9
Source File: VasttextExtraMemoryReader.java From scava with Eclipse Public License 2.0 | 5 votes |
private Record processNextRecord() { List<Writable> next = super.next(); //As we increase in next the listIndex, then we need to reduce in one the value of listIndex RecordMetaData meta = new RecordMetaDataIndex(listIndex-1, null, VasttextExtraMemoryReader.class); return new org.datavec.api.records.impl.Record(next, meta); }
Example #10
Source File: CSVSequenceRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<SequenceRecord> loadSequenceFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<SequenceRecord> out = new ArrayList<>(); for (RecordMetaData meta : recordMetaDatas) { File next = new File(meta.getURI()); List<List<Writable>> sequence = loadAndClose(new FileInputStream(next)); out.add(new org.datavec.api.records.impl.SequenceRecord(sequence, meta)); } return out; }
Example #11
Source File: RegexRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testRegexSequenceRecordReaderMeta() throws Exception { String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)"; ClassPathResource cpr = new ClassPathResource("datavec-api/logtestdata/"); File f = testDir.newFolder(); cpr.copyDirectory(f); String path = new File(f, "logtestfile%d.txt").getAbsolutePath(); InputSplit is = new NumberedFileInputSplit(path, 0, 1); SequenceRecordReader rr = new RegexSequenceRecordReader(regex, 1); rr.initialize(is); List<List<List<Writable>>> out = new ArrayList<>(); while (rr.hasNext()) { out.add(rr.sequenceRecord()); } assertEquals(2, out.size()); List<List<List<Writable>>> out2 = new ArrayList<>(); List<SequenceRecord> out3 = new ArrayList<>(); List<RecordMetaData> meta = new ArrayList<>(); rr.reset(); while (rr.hasNext()) { SequenceRecord seqr = rr.nextSequence(); out2.add(seqr.getSequenceRecord()); out3.add(seqr); meta.add(seqr.getMetaData()); } List<SequenceRecord> fromMeta = rr.loadSequenceFromMetaData(meta); assertEquals(out, out2); assertEquals(out3, fromMeta); }
Example #12
Source File: JDBCRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testNextRecordAndRecover() throws Exception { try (JDBCRecordReader reader = getInitializedReader("SELECT * FROM Coffee")) { Record r = reader.nextRecord(); List<Writable> fields = r.getRecord(); RecordMetaData meta = r.getMetaData(); Record recovered = reader.loadFromMetaData(meta); List<Writable> fieldsRecovered = recovered.getRecord(); assertEquals(fields.size(), fieldsRecovered.size()); for (int i = 0; i < fields.size(); i++) { assertEquals(fields.get(i), fieldsRecovered.get(i)); } } }
Example #13
Source File: ArrowRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Record loadFromMetaData(RecordMetaData recordMetaData) { if(!(recordMetaData instanceof RecordMetaDataIndex)) { throw new IllegalArgumentException("Unable to load from meta data. No index specified for record"); } RecordMetaDataIndex index = (RecordMetaDataIndex) recordMetaData; InputSplit fileSplit = new FileSplit(new File(index.getURI())); initialize(fileSplit); this.currIdx = (int) index.getIndex(); return nextRecord(); }
Example #14
Source File: ArrowRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) { Map<String,List<RecordMetaData>> metaDataByUri = new HashMap<>(); //gather all unique locations for the metadata //this will prevent initialization multiple times of the record for(RecordMetaData recordMetaData : recordMetaDatas) { if(!(recordMetaData instanceof RecordMetaDataIndex)) { throw new IllegalArgumentException("Unable to load from meta data. No index specified for record"); } List<RecordMetaData> recordMetaData1 = metaDataByUri.get(recordMetaData.getURI().toString()); if(recordMetaData1 == null) { recordMetaData1 = new ArrayList<>(); metaDataByUri.put(recordMetaData.getURI().toString(),recordMetaData1); } recordMetaData1.add(recordMetaData); } List<Record> ret = new ArrayList<>(); for(String uri : metaDataByUri.keySet()) { List<RecordMetaData> metaData = metaDataByUri.get(uri); InputSplit fileSplit = new FileSplit(new File(URI.create(uri))); initialize(fileSplit); for(RecordMetaData index : metaData) { RecordMetaDataIndex index2 = (RecordMetaDataIndex) index; this.currIdx = (int) index2.getIndex(); ret.add(nextRecord()); } } return ret; }
Example #15
Source File: ArrowConverterTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testRecordReaderMetaDataList() throws Exception { val recordsToWrite = recordToWrite(); //send file File tmp = tmpDataFile(recordsToWrite); RecordReader recordReader = new ArrowRecordReader(); RecordMetaDataIndex recordMetaDataIndex = new RecordMetaDataIndex(0,tmp.toURI(),ArrowRecordReader.class); recordReader.loadFromMetaData(Arrays.<RecordMetaData>asList(recordMetaDataIndex)); Record record = recordReader.nextRecord(); assertEquals(2,record.getRecord().size()); }
Example #16
Source File: MapFileSequenceRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<SequenceRecord> loadSequenceFromMetaData(@NonNull List<RecordMetaData> recordMetaDatas) throws IOException { List<SequenceRecord> out = new ArrayList<>(recordMetaDatas.size()); for (RecordMetaData r : recordMetaDatas) { out.add(loadSequenceFromMetaData(r)); } return out; }
Example #17
Source File: RecordReaderMultiDataSetIteratorTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testSplittingCSVSequenceMeta() throws Exception { //Idea: take CSV sequences, and split "csvsequence_i.txt" into two separate inputs; keep "csvSequencelables_i.txt" // as standard one-hot output //need to manually extract File rootDir = temporaryFolder.newFolder(); for (int i = 0; i < 3; i++) { new ClassPathResource(String.format("csvsequence_%d.txt", i)).getTempFileFromArchive(rootDir); new ClassPathResource(String.format("csvsequencelabels_%d.txt", i)).getTempFileFromArchive(rootDir); new ClassPathResource(String.format("csvsequencelabelsShort_%d.txt", i)).getTempFileFromArchive(rootDir); } String featuresPath = FilenameUtils.concat(rootDir.getAbsolutePath(), "csvsequence_%d.txt"); String labelsPath = FilenameUtils.concat(rootDir.getAbsolutePath(), "csvsequencelabels_%d.txt"); SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ","); SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ","); featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2)); labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2)); SequenceRecordReader featureReader2 = new CSVSequenceRecordReader(1, ","); SequenceRecordReader labelReader2 = new CSVSequenceRecordReader(1, ","); featureReader2.initialize(new NumberedFileInputSplit(featuresPath, 0, 2)); labelReader2.initialize(new NumberedFileInputSplit(labelsPath, 0, 2)); RecordReaderMultiDataSetIterator srrmdsi = new RecordReaderMultiDataSetIterator.Builder(1) .addSequenceReader("seq1", featureReader2).addSequenceReader("seq2", labelReader2) .addInput("seq1", 0, 1).addInput("seq1", 2, 2).addOutputOneHot("seq2", 0, 4).build(); srrmdsi.setCollectMetaData(true); int count = 0; while (srrmdsi.hasNext()) { MultiDataSet mds = srrmdsi.next(); MultiDataSet fromMeta = srrmdsi.loadFromMetaData(mds.getExampleMetaData(RecordMetaData.class)); assertEquals(mds, fromMeta); count++; } assertEquals(3, count); }
Example #18
Source File: CSVLineSequenceRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<SequenceRecord> loadSequenceFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<Record> toConvert = super.loadFromMetaData(recordMetaDatas); List<SequenceRecord> out = new ArrayList<>(); for(Record r : toConvert){ out.add(convert(r)); } return out; }
Example #19
Source File: JDBCRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testNextRecordAndRecover() throws Exception { try (JDBCRecordReader reader = getInitializedReader("SELECT * FROM Coffee")) { Record r = reader.nextRecord(); List<Writable> fields = r.getRecord(); RecordMetaData meta = r.getMetaData(); Record recovered = reader.loadFromMetaData(meta); List<Writable> fieldsRecovered = recovered.getRecord(); assertEquals(fields.size(), fieldsRecovered.size()); for (int i = 0; i < fields.size(); i++) { assertEquals(fields.get(i), fieldsRecovered.get(i)); } } }
Example #20
Source File: CSVNLinesSequenceRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
@Override public SequenceRecord nextSequence() { int lineBefore = lineIndex; List<List<Writable>> record = sequenceRecord(); int lineAfter = lineIndex; URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]); RecordMetaData meta = new RecordMetaDataLineInterval(lineBefore, lineAfter - 1, uri, CSVNLinesSequenceRecordReader.class); return new org.datavec.api.records.impl.SequenceRecord(record, meta); }
Example #21
Source File: TransformProcessSequenceRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
/** * Load multiple records from the given a list of {@link RecordMetaData} instances<br> * * @param recordMetaDatas Metadata for the records that we want to load from * @return Multiple records for the given RecordMetaData instances * @throws IOException If I/O error occurs during loading */ @Override public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<Record> records = sequenceRecordReader.loadFromMetaData(recordMetaDatas); for (Record record : records) record.setRecord(transformProcess.execute(record.getRecord())); return records; }
Example #22
Source File: FileRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<Record> out = new ArrayList<>(); for (RecordMetaData meta : recordMetaDatas) { URI uri = meta.getURI(); File f = new File(uri); List<Writable> list = loadFromFile(f); out.add(new org.datavec.api.records.impl.Record(list, meta)); } return out; }
Example #23
Source File: RecordReaderDataSetiteratorTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testSequenceRecordReaderMeta() throws Exception { File rootDir = temporaryFolder.newFolder(); //need to manually extract for (int i = 0; i < 3; i++) { FileUtils.copyFile(Resources.asFile(String.format("csvsequence_%d.txt", i)), new File(rootDir, String.format("csvsequence_%d.txt", i))); FileUtils.copyFile(Resources.asFile(String.format("csvsequencelabels_%d.txt", i)), new File(rootDir, String.format("csvsequencelabels_%d.txt", i))); } String featuresPath = FilenameUtils.concat(rootDir.getAbsolutePath(), "csvsequence_%d.txt"); String labelsPath = FilenameUtils.concat(rootDir.getAbsolutePath(), "csvsequencelabels_%d.txt"); SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ","); SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ","); featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2)); labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2)); SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false); iter.setCollectMetaData(true); assertEquals(3, iter.inputColumns()); assertEquals(4, iter.totalOutcomes()); while (iter.hasNext()) { DataSet ds = iter.next(); List<RecordMetaData> meta = ds.getExampleMetaData(RecordMetaData.class); DataSet fromMeta = iter.loadFromMetaData(meta); assertEquals(ds, fromMeta); } }
Example #24
Source File: SVMLightRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
/** * Return next Record. * * @return */ @Override public Record nextRecord() { List<Writable> next = next(); URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]); RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, SVMLightRecordReader.class); //-1 as line number has been incremented already... return new org.datavec.api.records.impl.Record(next, meta); }
Example #25
Source File: TestImageRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testMetaData() throws IOException { File parentDir = testDir.newFolder(); new ClassPathResource("datavec-data-image/testimages/").copyDirectory(parentDir); // System.out.println(f.getAbsolutePath()); // System.out.println(f.getParentFile().getParentFile().getAbsolutePath()); ParentPathLabelGenerator labelMaker = new ParentPathLabelGenerator(); ImageRecordReader rr = new ImageRecordReader(32, 32, 3, labelMaker); rr.initialize(new FileSplit(parentDir)); List<List<Writable>> out = new ArrayList<>(); while (rr.hasNext()) { List<Writable> l = rr.next(); out.add(l); assertEquals(2, l.size()); } assertEquals(6, out.size()); rr.reset(); List<List<Writable>> out2 = new ArrayList<>(); List<Record> out3 = new ArrayList<>(); List<RecordMetaData> meta = new ArrayList<>(); while (rr.hasNext()) { Record r = rr.nextRecord(); out2.add(r.getRecord()); out3.add(r); meta.add(r.getMetaData()); // System.out.println(r.getMetaData() + "\t" + r.getRecord().get(1)); } assertEquals(out, out2); List<Record> fromMeta = rr.loadFromMetaData(meta); assertEquals(out3, fromMeta); }
Example #26
Source File: LineRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Record nextRecord() { List<Writable> next = next(); URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]); RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, LineRecordReader.class); //-1 as line number has been incremented already... return new org.datavec.api.records.impl.Record(next, meta); }
Example #27
Source File: BaseCodecRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<SequenceRecord> loadSequenceFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException { List<SequenceRecord> out = new ArrayList<>(); for (RecordMetaData meta : recordMetaDatas) { try (InputStream s = streamCreatorFn.apply(meta.getURI())){ List<List<Writable>> list = loadData(null, s); out.add(new org.datavec.api.records.impl.SequenceRecord(list, meta)); } } return out; }
Example #28
Source File: CSVNLinesSequenceRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testCSVNlinesSequenceRecordReaderMetaData() throws Exception { int nLinesPerSequence = 10; SequenceRecordReader seqRR = new CSVNLinesSequenceRecordReader(nLinesPerSequence); seqRR.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile())); CSVRecordReader rr = new CSVRecordReader(); rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile())); List<List<List<Writable>>> out = new ArrayList<>(); while (seqRR.hasNext()) { List<List<Writable>> next = seqRR.sequenceRecord(); out.add(next); } seqRR.reset(); List<List<List<Writable>>> out2 = new ArrayList<>(); List<SequenceRecord> out3 = new ArrayList<>(); List<RecordMetaData> meta = new ArrayList<>(); while (seqRR.hasNext()) { SequenceRecord seq = seqRR.nextSequence(); out2.add(seq.getSequenceRecord()); meta.add(seq.getMetaData()); out3.add(seq); } assertEquals(out, out2); List<SequenceRecord> out4 = seqRR.loadSequenceFromMetaData(meta); assertEquals(out3, out4); }
Example #29
Source File: FeatureRecordReader.java From FancyBing with GNU General Public License v3.0 | 5 votes |
@Override public Record nextRecord() { List<Writable> next = next(); URI uri = (locations == null || locations.length < 1 ? null : locations[splitIndex]); RecordMetaData meta = new RecordMetaDataLine(this.lineIndex - 1, uri, LineRecordReader.class); //-1 as line number has been incremented already... return new org.datavec.api.records.impl.Record(next, meta); }
Example #30
Source File: VasttextExtraMemoryReader.java From scava with Eclipse Public License 2.0 | 4 votes |
@Override public Record loadFromMetaData(RecordMetaData recordMetaData) throws IOException { return null; }