Java Code Examples for org.datavec.api.records.reader.SequenceRecordReader#sequenceRecord()

The following examples show how to use org.datavec.api.records.reader.SequenceRecordReader#sequenceRecord() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith("fire_lowres.mp4"));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example 2
Source File: AnalyzeLocal.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Analyze the data quality of sequence data - provides a report on missing values, values that don't comply with schema, etc
 * @param schema Schema for data
 * @param data   Data to analyze
 * @return DataQualityAnalysis object
 */
public static DataQualityAnalysis analyzeQualitySequence(Schema schema, SequenceRecordReader data) {
    int nColumns = schema.numColumns();
    List<QualityAnalysisState> states = new ArrayList<>();
    QualityAnalysisAddFunction addFn = new QualityAnalysisAddFunction(schema);
    while(data.hasNext()){
        List<List<Writable>> seq = data.sequenceRecord();
        for(List<Writable> step : seq){
            states = addFn.apply(states, step);
        }
    }

    List<ColumnQuality> list = new ArrayList<>(nColumns);

    for (QualityAnalysisState qualityState : states) {
        list.add(qualityState.getColumnQuality());
    }
    return new DataQualityAnalysis(schema, list);
}
 
Example 3
Source File: CodecReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("datavec-data-codec/fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith(file.getName()));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example 4
Source File: CodecReaderTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReader() throws Exception {
    File file = new ClassPathResource("datavec-data-codec/fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    //        System.out.println(record.size());

    Iterator<List<Writable>> it = record.iterator();
    List<Writable> first = it.next();
    //        System.out.println(first);

    //Expected size: 80x46x3
    assertEquals(1, first.size());
    assertEquals(80 * 46 * 3, ((ArrayWritable) first.iterator().next()).length());
}
 
Example 5
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Ignore
@Test
public void testNativeCodecReader() throws Exception {
    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new NativeCodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    //        System.out.println(record.size());

    Iterator<List<Writable>> it = record.iterator();
    List<Writable> first = it.next();
    //        System.out.println(first);

    //Expected size: 80x46x3
    assertEquals(1, first.size());
    assertEquals(80 * 46 * 3, ((ArrayWritable) first.iterator().next()).length());
}
 
Example 6
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCodecReader() throws Exception {
    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    //        System.out.println(record.size());

    Iterator<List<Writable>> it = record.iterator();
    List<Writable> first = it.next();
    //        System.out.println(first);

    //Expected size: 80x46x3
    assertEquals(1, first.size());
    assertEquals(80 * 46 * 3, ((ArrayWritable) first.iterator().next()).length());
}
 
Example 7
Source File: CSVNLinesSequenceRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCSVNlinesSequenceRecordReaderMetaData() throws Exception {
    int nLinesPerSequence = 10;

    SequenceRecordReader seqRR = new CSVNLinesSequenceRecordReader(nLinesPerSequence);
    seqRR.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    List<List<List<Writable>>> out = new ArrayList<>();
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();
        out.add(next);
    }

    seqRR.reset();
    List<List<List<Writable>>> out2 = new ArrayList<>();
    List<SequenceRecord> out3 = new ArrayList<>();
    List<RecordMetaData> meta = new ArrayList<>();
    while (seqRR.hasNext()) {
        SequenceRecord seq = seqRR.nextSequence();
        out2.add(seq.getSequenceRecord());
        meta.add(seq.getMetaData());
        out3.add(seq);
    }

    assertEquals(out, out2);

    List<SequenceRecord> out4 = seqRR.loadSequenceFromMetaData(meta);
    assertEquals(out3, out4);
}
 
Example 8
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testNativeViaDataInputStream() throws Exception {

    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new NativeCodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");

    Configuration conf2 = new Configuration(conf);

    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> expected = reader.sequenceRecord();


    SequenceRecordReader reader2 = new NativeCodecRecordReader();
    reader2.setConf(conf2);

    DataInputStream dataInputStream = new DataInputStream(new FileInputStream(file));
    List<List<Writable>> actual = reader2.sequenceRecord(null, dataInputStream);

    assertEquals(expected, actual);
}
 
Example 9
Source File: CSVVariableSlidingWindowRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCSVVariableSlidingWindowRecordReader() throws Exception {
    int maxLinesPerSequence = 3;

    SequenceRecordReader seqRR = new CSVVariableSlidingWindowRecordReader(maxLinesPerSequence);
    seqRR.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int count = 0;
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();

        if(count==maxLinesPerSequence-1) {
            LinkedList<List<Writable>> expected = new LinkedList<>();
            for (int i = 0; i < maxLinesPerSequence; i++) {
                expected.addFirst(rr.next());
            }
            assertEquals(expected, next);

        }
        if(count==maxLinesPerSequence) {
            assertEquals(maxLinesPerSequence, next.size());
        }
        if(count==0) { // first seq should be length 1
            assertEquals(1, next.size());
        }
        if(count>151) { // last seq should be length 1
            assertEquals(1, next.size());
        }

        count++;
    }

    assertEquals(152, count);
}
 
Example 10
Source File: CSVNLinesSequenceRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCSVNLinesSequenceRecordReader() throws Exception {
    int nLinesPerSequence = 10;

    SequenceRecordReader seqRR = new CSVNLinesSequenceRecordReader(nLinesPerSequence);
    seqRR.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int count = 0;
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();

        List<List<Writable>> expected = new ArrayList<>();
        for (int i = 0; i < nLinesPerSequence; i++) {
            expected.add(rr.next());
        }

        assertEquals(10, next.size());
        assertEquals(expected, next);

        count++;
    }

    assertEquals(150 / nLinesPerSequence, count);
}
 
Example 11
Source File: CSVNLinesSequenceRecordReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testCSVNlinesSequenceRecordReaderMetaData() throws Exception {
    int nLinesPerSequence = 10;

    SequenceRecordReader seqRR = new CSVNLinesSequenceRecordReader(nLinesPerSequence);
    seqRR.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    List<List<List<Writable>>> out = new ArrayList<>();
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();
        out.add(next);
    }

    seqRR.reset();
    List<List<List<Writable>>> out2 = new ArrayList<>();
    List<SequenceRecord> out3 = new ArrayList<>();
    List<RecordMetaData> meta = new ArrayList<>();
    while (seqRR.hasNext()) {
        SequenceRecord seq = seqRR.nextSequence();
        out2.add(seq.getSequenceRecord());
        meta.add(seq.getMetaData());
        out3.add(seq);
    }

    assertEquals(out, out2);

    List<SequenceRecord> out4 = seqRR.loadSequenceFromMetaData(meta);
    assertEquals(out3, out4);
}
 
Example 12
Source File: CodecReaderTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testViaDataInputStream() throws Exception {

    File file = new ClassPathResource("fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");

    Configuration conf2 = new Configuration(conf);

    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> expected = reader.sequenceRecord();


    SequenceRecordReader reader2 = new CodecRecordReader();
    reader2.setConf(conf2);

    DataInputStream dataInputStream = new DataInputStream(new FileInputStream(file));
    List<List<Writable>> actual = reader2.sequenceRecord(null, dataInputStream);

    assertEquals(expected, actual);
}
 
Example 13
Source File: CodecReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testViaDataInputStream() throws Exception {

    File file = new ClassPathResource("datavec-data-codec/fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new CodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");

    Configuration conf2 = new Configuration(conf);

    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> expected = reader.sequenceRecord();


    SequenceRecordReader reader2 = new CodecRecordReader();
    reader2.setConf(conf2);

    DataInputStream dataInputStream = new DataInputStream(new FileInputStream(file));
    List<List<Writable>> actual = reader2.sequenceRecord(null, dataInputStream);

    assertEquals(expected, actual);
}
 
Example 14
Source File: CSVLineSequenceRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {

    File f = testDir.newFolder();
    File source = new File(f, "temp.csv");
    String str = "a,b,c\n1,2,3,4";
    FileUtils.writeStringToFile(source, str, StandardCharsets.UTF_8);

    SequenceRecordReader rr = new CSVLineSequenceRecordReader();
    rr.initialize(new FileSplit(source));

    List<List<Writable>> exp0 = Arrays.asList(
            Collections.<Writable>singletonList(new Text("a")),
            Collections.<Writable>singletonList(new Text("b")),
            Collections.<Writable>singletonList(new Text("c")));

    List<List<Writable>> exp1 = Arrays.asList(
            Collections.<Writable>singletonList(new Text("1")),
            Collections.<Writable>singletonList(new Text("2")),
            Collections.<Writable>singletonList(new Text("3")),
            Collections.<Writable>singletonList(new Text("4")));

    for( int i=0; i<3; i++ ) {
        int count = 0;
        while (rr.hasNext()) {
            List<List<Writable>> next = rr.sequenceRecord();
            if (count++ == 0) {
                assertEquals(exp0, next);
            } else {
                assertEquals(exp1, next);
            }
        }

        assertEquals(2, count);

        rr.reset();
    }
}
 
Example 15
Source File: CodecReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testNativeCodecReaderMeta() throws Exception {
    File file = new ClassPathResource("datavec-data-codec/fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new NativeCodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");
    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> record = reader.sequenceRecord();
    assertEquals(500, record.size()); //500 frames

    reader.reset();
    SequenceRecord seqR = reader.nextSequence();
    assertEquals(record, seqR.getSequenceRecord());
    RecordMetaData meta = seqR.getMetaData();
    //        System.out.println(meta);
    assertTrue(meta.getURI().toString().endsWith("fire_lowres.mp4"));

    SequenceRecord fromMeta = reader.loadSequenceFromMetaData(meta);
    assertEquals(seqR, fromMeta);
}
 
Example 16
Source File: CodecReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test
public void testNativeViaDataInputStream() throws Exception {

    File file = new ClassPathResource("datavec-data-codec/fire_lowres.mp4").getFile();
    SequenceRecordReader reader = new NativeCodecRecordReader();
    Configuration conf = new Configuration();
    conf.set(CodecRecordReader.RAVEL, "true");
    conf.set(CodecRecordReader.START_FRAME, "160");
    conf.set(CodecRecordReader.TOTAL_FRAMES, "500");
    conf.set(CodecRecordReader.ROWS, "80");
    conf.set(CodecRecordReader.COLUMNS, "46");

    Configuration conf2 = new Configuration(conf);

    reader.initialize(new FileSplit(file));
    reader.setConf(conf);
    assertTrue(reader.hasNext());
    List<List<Writable>> expected = reader.sequenceRecord();


    SequenceRecordReader reader2 = new NativeCodecRecordReader();
    reader2.setConf(conf2);

    DataInputStream dataInputStream = new DataInputStream(new FileInputStream(file));
    List<List<Writable>> actual = reader2.sequenceRecord(null, dataInputStream);

    assertEquals(expected, actual);
}
 
Example 17
Source File: AnalyzeLocal.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Get a list of unique values from the specified column of a sequence
 *
 * @param columnName      Name of the column to get unique values from
 * @param schema          Data schema
 * @param sequenceData    Sequence data to get unique values from
 * @return
 */
public static Set<Writable> getUniqueSequence(String columnName, Schema schema,
                                               SequenceRecordReader sequenceData) {
    int colIdx = schema.getIndexOfColumn(columnName);
    Set<Writable> unique = new HashSet<>();
    while(sequenceData.hasNext()){
        List<List<Writable>> next = sequenceData.sequenceRecord();
        for(List<Writable> step : next){
            unique.add(step.get(colIdx));
        }
    }
    return unique;
}
 
Example 18
Source File: CSVVariableSlidingWindowRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCSVVariableSlidingWindowRecordReader() throws Exception {
    int maxLinesPerSequence = 3;

    SequenceRecordReader seqRR = new CSVVariableSlidingWindowRecordReader(maxLinesPerSequence);
    seqRR.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    int count = 0;
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();

        if(count==maxLinesPerSequence-1) {
            LinkedList<List<Writable>> expected = new LinkedList<>();
            for (int i = 0; i < maxLinesPerSequence; i++) {
                expected.addFirst(rr.next());
            }
            assertEquals(expected, next);

        }
        if(count==maxLinesPerSequence) {
            assertEquals(maxLinesPerSequence, next.size());
        }
        if(count==0) { // first seq should be length 1
            assertEquals(1, next.size());
        }
        if(count>151) { // last seq should be length 1
            assertEquals(1, next.size());
        }

        count++;
    }

    assertEquals(152, count);
}
 
Example 19
Source File: CSVNLinesSequenceRecordReaderTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testCSVNLinesSequenceRecordReader() throws Exception {
    int nLinesPerSequence = 10;

    SequenceRecordReader seqRR = new CSVNLinesSequenceRecordReader(nLinesPerSequence);
    seqRR.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/iris.dat").getFile()));

    int count = 0;
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();

        List<List<Writable>> expected = new ArrayList<>();
        for (int i = 0; i < nLinesPerSequence; i++) {
            expected.add(rr.next());
        }

        assertEquals(10, next.size());
        assertEquals(expected, next);

        count++;
    }

    assertEquals(150 / nLinesPerSequence, count);
}
 
Example 20
Source File: CSVVariableSlidingWindowRecordReaderTest.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void testCSVVariableSlidingWindowRecordReaderStride() throws Exception {
    int maxLinesPerSequence = 3;
    int stride = 2;

    SequenceRecordReader seqRR = new CSVVariableSlidingWindowRecordReader(maxLinesPerSequence, stride);
    seqRR.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    CSVRecordReader rr = new CSVRecordReader();
    rr.initialize(new FileSplit(new ClassPathResource("iris.dat").getFile()));

    int count = 0;
    while (seqRR.hasNext()) {
        List<List<Writable>> next = seqRR.sequenceRecord();

        if(count==maxLinesPerSequence-1) {
            LinkedList<List<Writable>> expected = new LinkedList<>();
            for(int s = 0; s < stride; s++) {
                expected = new LinkedList<>();
                for (int i = 0; i < maxLinesPerSequence; i++) {
                    expected.addFirst(rr.next());
                }
            }
            assertEquals(expected, next);

        }
        if(count==maxLinesPerSequence) {
            assertEquals(maxLinesPerSequence, next.size());
        }
        if(count==0) { // first seq should be length 2
            assertEquals(2, next.size());
        }
        if(count>151) { // last seq should be length 1
            assertEquals(1, next.size());
        }

        count++;
    }

    assertEquals(76, count);
}