org.datavec.api.records.metadata.RecordMetaDataIndex Java Examples

The following examples show how to use org.datavec.api.records.metadata.RecordMetaDataIndex. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CollectionSequenceRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<SequenceRecord> loadSequenceFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
    Set<Integer> toLoad = new LinkedHashSet<>();
    for (RecordMetaData recordMetaData : recordMetaDatas) {
        if (!(recordMetaData instanceof RecordMetaDataIndex)) {
            throw new IllegalArgumentException("Expected RecordMetaDataIndex; got: " + recordMetaData);
        }
        long idx = ((RecordMetaDataIndex) recordMetaData).getIndex();
        if (idx >= original.size()) {
            throw new IllegalStateException(
                            "Cannot get index " + idx + " from collection: contains " + original + " elements");
        }
        toLoad.add((int) idx);
    }

    List<SequenceRecord> out = new ArrayList<>();
    Iterator<? extends Collection<? extends Collection<Writable>>> iter = original.iterator();
    int i = 0;
    while (iter.hasNext()) {
        Collection<? extends Collection<Writable>> c = iter.next();
        if (!toLoad.contains(i++)) {
            continue;
        }
        List<List<Writable>> record = toList(c);
        SequenceRecord r = new org.datavec.api.records.impl.SequenceRecord(record,
                        new RecordMetaDataIndex(i - 1, null, CollectionSequenceRecordReader.class));
        out.add(r);
    }
    return out;
}
 
Example #2
Source File: CollectionSequenceRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<SequenceRecord> loadSequenceFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException {
    Set<Integer> toLoad = new LinkedHashSet<>();
    for (RecordMetaData recordMetaData : recordMetaDatas) {
        if (!(recordMetaData instanceof RecordMetaDataIndex)) {
            throw new IllegalArgumentException("Expected RecordMetaDataIndex; got: " + recordMetaData);
        }
        long idx = ((RecordMetaDataIndex) recordMetaData).getIndex();
        if (idx >= original.size()) {
            throw new IllegalStateException(
                            "Cannot get index " + idx + " from collection: contains " + original + " elements");
        }
        toLoad.add((int) idx);
    }

    List<SequenceRecord> out = new ArrayList<>();
    Iterator<? extends Collection<? extends Collection<Writable>>> iter = original.iterator();
    int i = 0;
    while (iter.hasNext()) {
        Collection<? extends Collection<Writable>> c = iter.next();
        if (!toLoad.contains(i++)) {
            continue;
        }
        List<List<Writable>> record = toList(c);
        SequenceRecord r = new org.datavec.api.records.impl.SequenceRecord(record,
                        new RecordMetaDataIndex(i - 1, null, CollectionSequenceRecordReader.class));
        out.add(r);
    }
    return out;
}
 
Example #3
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordReaderMetaData() throws Exception {
    val recordsToWrite = recordToWrite();
    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();
    RecordMetaDataIndex recordMetaDataIndex = new RecordMetaDataIndex(0,tmp.toURI(),ArrowRecordReader.class);
    recordReader.loadFromMetaData(recordMetaDataIndex);

    Record record = recordReader.nextRecord();
    assertEquals(2,record.getRecord().size());
}
 
Example #4
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordReaderMetaDataList() throws Exception {
    val recordsToWrite = recordToWrite();
    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();
    RecordMetaDataIndex recordMetaDataIndex = new RecordMetaDataIndex(0,tmp.toURI(),ArrowRecordReader.class);
    recordReader.loadFromMetaData(Arrays.<RecordMetaData>asList(recordMetaDataIndex));

    Record record = recordReader.nextRecord();
    assertEquals(2,record.getRecord().size());

}
 
Example #5
Source File: ArrowRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) {
    Map<String,List<RecordMetaData>> metaDataByUri = new HashMap<>();
    //gather all unique locations for the metadata
    //this will prevent initialization multiple times of the record
    for(RecordMetaData recordMetaData : recordMetaDatas) {
        if(!(recordMetaData instanceof RecordMetaDataIndex)) {
            throw new IllegalArgumentException("Unable to load from meta data. No index specified for record");
        }

        List<RecordMetaData> recordMetaData1 = metaDataByUri.get(recordMetaData.getURI().toString());
        if(recordMetaData1 == null) {
            recordMetaData1 = new ArrayList<>();
            metaDataByUri.put(recordMetaData.getURI().toString(),recordMetaData1);
        }

        recordMetaData1.add(recordMetaData);

    }

    List<Record> ret = new ArrayList<>();
    for(String uri : metaDataByUri.keySet()) {
        List<RecordMetaData> metaData = metaDataByUri.get(uri);
        InputSplit fileSplit = new FileSplit(new File(URI.create(uri)));
        initialize(fileSplit);
        for(RecordMetaData index : metaData) {
            RecordMetaDataIndex index2 = (RecordMetaDataIndex) index;
            this.currIdx = (int) index2.getIndex();
            ret.add(nextRecord());
        }

    }

    return ret;
}
 
Example #6
Source File: ArrowRecordReader.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Record loadFromMetaData(RecordMetaData recordMetaData) {
    if(!(recordMetaData instanceof RecordMetaDataIndex)) {
        throw new IllegalArgumentException("Unable to load from meta data. No index specified for record");
    }

    RecordMetaDataIndex index = (RecordMetaDataIndex) recordMetaData;
    InputSplit fileSplit = new FileSplit(new File(index.getURI()));
    initialize(fileSplit);
    this.currIdx = (int) index.getIndex();
    return nextRecord();
}
 
Example #7
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordReaderMetaData() throws Exception {
    val recordsToWrite = recordToWrite();
    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();
    RecordMetaDataIndex recordMetaDataIndex = new RecordMetaDataIndex(0,tmp.toURI(),ArrowRecordReader.class);
    recordReader.loadFromMetaData(recordMetaDataIndex);

    Record record = recordReader.nextRecord();
    assertEquals(2,record.getRecord().size());
}
 
Example #8
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecordReaderMetaDataList() throws Exception {
    val recordsToWrite = recordToWrite();
    //send file
    File tmp =  tmpDataFile(recordsToWrite);
    RecordReader recordReader = new ArrowRecordReader();
    RecordMetaDataIndex recordMetaDataIndex = new RecordMetaDataIndex(0,tmp.toURI(),ArrowRecordReader.class);
    recordReader.loadFromMetaData(Arrays.<RecordMetaData>asList(recordMetaDataIndex));

    Record record = recordReader.nextRecord();
    assertEquals(2,record.getRecord().size());

}
 
Example #9
Source File: ArrowRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public List<Record> loadFromMetaData(List<RecordMetaData> recordMetaDatas) {
    Map<String,List<RecordMetaData>> metaDataByUri = new HashMap<>();
    //gather all unique locations for the metadata
    //this will prevent initialization multiple times of the record
    for(RecordMetaData recordMetaData : recordMetaDatas) {
        if(!(recordMetaData instanceof RecordMetaDataIndex)) {
            throw new IllegalArgumentException("Unable to load from meta data. No index specified for record");
        }

        List<RecordMetaData> recordMetaData1 = metaDataByUri.get(recordMetaData.getURI().toString());
        if(recordMetaData1 == null) {
            recordMetaData1 = new ArrayList<>();
            metaDataByUri.put(recordMetaData.getURI().toString(),recordMetaData1);
        }

        recordMetaData1.add(recordMetaData);

    }

    List<Record> ret = new ArrayList<>();
    for(String uri : metaDataByUri.keySet()) {
        List<RecordMetaData> metaData = metaDataByUri.get(uri);
        InputSplit fileSplit = new FileSplit(new File(URI.create(uri)));
        initialize(fileSplit);
        for(RecordMetaData index : metaData) {
            RecordMetaDataIndex index2 = (RecordMetaDataIndex) index;
            this.currIdx = (int) index2.getIndex();
            ret.add(nextRecord());
        }

    }

    return ret;
}
 
Example #10
Source File: ArrowRecordReader.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Record loadFromMetaData(RecordMetaData recordMetaData) {
    if(!(recordMetaData instanceof RecordMetaDataIndex)) {
        throw new IllegalArgumentException("Unable to load from meta data. No index specified for record");
    }

    RecordMetaDataIndex index = (RecordMetaDataIndex) recordMetaData;
    InputSplit fileSplit = new FileSplit(new File(index.getURI()));
    initialize(fileSplit);
    this.currIdx = (int) index.getIndex();
    return nextRecord();
}
 
Example #11
Source File: VasttextExtraMemoryReader.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
private Record processNextRecord()
  {
  	List<Writable> next = super.next();
  	//As we increase in next the listIndex, then we need to reduce in one the value of listIndex
  	RecordMetaData meta = new RecordMetaDataIndex(listIndex-1, null, VasttextExtraMemoryReader.class);
return new org.datavec.api.records.impl.Record(next, meta);
  }
 
Example #12
Source File: VasttextMemoryRecordReader.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
@Override
public Record nextRecord() {
	List<Writable> next = next();
	//As we increase in next the listIndex, then we need to reduce in one the value of listIndex
	RecordMetaData meta = new RecordMetaDataIndex(listIndex-1, null, VasttextMemoryRecordReader.class);
	return new org.datavec.api.records.impl.Record(next, meta);
}
 
Example #13
Source File: CollectionRecordReader.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public Record nextRecord() {
    return new org.datavec.api.records.impl.Record(next(),
                    new RecordMetaDataIndex(count - 1, null, CollectionRecordReader.class));
}
 
Example #14
Source File: CollectionSequenceRecordReader.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public SequenceRecord nextSequence() {
    return new org.datavec.api.records.impl.SequenceRecord(sequenceRecord(),
                    new RecordMetaDataIndex(count - 1, null, CollectionSequenceRecordReader.class));
}
 
Example #15
Source File: VasttextTextMemoryReader.java    From scava with Eclipse Public License 2.0 4 votes vote down vote up
public Record processNextRecord() {
     List<Writable> nextProcessed = new ArrayList<>();
     List<Writable> next = next();
     int label;
     
     String text = next.get(0).toString();
     //Text part
     nextProcessed.add(new Text(text));
    
    //Label part 
    if(labelled)
    {
if (multilabel)
{
	String[] labels = next.get(1).toString().split(" ");
	// We need to create a vector of labels
	List<Double> transformedLabels = new ArrayList<Double>();
	boolean seenLabels = false;
	for (int i = 0; i < labels.length; i++)
	{
		label = vasttextTextVectorizer.getLabelIndex(labels[i]);
		if (label == -1 && vasttextTextVectorizer.hasfitFinished())
		{
			System.err.println("WARNING: The label \"" + next.get(1).toString()
					+ "\" wasn't seen during the training. It has been removed from the entry "
					+ (listIndex - 1));
			continue;
		}
		else if (label == -1)
			throw new UnsupportedOperationException(
					"Unknown error from the vectorizer. Returned a label of -1 during fitting the dictionary.");
		transformedLabels.add((double) label);
		seenLabels = true;
	}
	// This means that none of the labels was seen in the training
	if (!seenLabels)
		return null;
	INDArray transformed = Nd4j.create(transformedLabels.stream().mapToDouble(d -> d).toArray(),
			new int[] { transformedLabels.size(), 1 });
	nextProcessed.add(new NDArrayWritable(transformed));

}
else 
{
	label = vasttextTextVectorizer.getLabelIndex(next.get(1).toString());
	if (label == -1 && vasttextTextVectorizer.hasfitFinished())
	{
		System.err.println(
				"WARNING: The label \"" + next.get(1).toString() + "\" wasn't seen during the training.");
		return null;
	}
	else if (label == -1)
		throw new UnsupportedOperationException(
				"Unknown error from the vectorizer. Returned a label of -1 during fitting the dictionary.");
	nextProcessed.add(new IntWritable(label));
      }
    }
    //As we increase in next the listIndex, then we need to reduce in one the value of listIndex
    RecordMetaData meta = new RecordMetaDataIndex(listIndex-1, null, VasttextMemoryRecordReader.class); 
    return new org.datavec.api.records.impl.Record(nextProcessed, meta);
 }
 
Example #16
Source File: MapFileSequenceRecordReader.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public SequenceRecord loadSequenceFromMetaData(@NonNull RecordMetaData recordMetaData) throws IOException {
    long idx = ((RecordMetaDataIndex) recordMetaData).getIndex();
    return new org.datavec.api.records.impl.SequenceRecord(mapFileReader.getRecord(idx).getSequenceRecord(),
                    recordMetaData);
}
 
Example #17
Source File: MapFileSequenceRecordReader.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public SequenceRecord loadSequenceFromMetaData(@NonNull RecordMetaData recordMetaData) throws IOException {
    long idx = ((RecordMetaDataIndex) recordMetaData).getIndex();
    return new org.datavec.api.records.impl.SequenceRecord(mapFileReader.getRecord(idx).getSequenceRecord(),
                    recordMetaData);
}
 
Example #18
Source File: ArrowRecord.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public RecordMetaData getMetaData() {
    RecordMetaData ret = new RecordMetaDataIndex(index,recordUri,ArrowRecordReader.class);
    return ret;
}
 
Example #19
Source File: ArrowRecord.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public RecordMetaData getMetaData() {
    RecordMetaData ret = new RecordMetaDataIndex(index,recordUri,ArrowRecordReader.class);
    return ret;
}
 
Example #20
Source File: CollectionRecordReader.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Record nextRecord() {
    return new org.datavec.api.records.impl.Record(next(),
                    new RecordMetaDataIndex(count - 1, null, CollectionRecordReader.class));
}
 
Example #21
Source File: CollectionSequenceRecordReader.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public SequenceRecord nextSequence() {
    return new org.datavec.api.records.impl.SequenceRecord(sequenceRecord(),
                    new RecordMetaDataIndex(count - 1, null, CollectionSequenceRecordReader.class));
}