org.datavec.api.split.StringSplit Java Examples
The following examples show how to use
org.datavec.api.split.StringSplit.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FeatureRecordReader.java From FancyBing with GNU General Public License v3.0 | 6 votes |
@Override public boolean hasNext() { if (iter != null && iter.hasNext()) { return true; } else { if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) { splitIndex++; lineIndex = 0; //New split -> reset line count try { close(); iter = lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream())); onLocationOpen(locations[splitIndex]); } catch (IOException e) { e.printStackTrace(); } return iter.hasNext(); } return false; } }
Example #2
Source File: LineRecordReader.java From DataVec with Apache License 2.0 | 6 votes |
@Override public boolean hasNext() { if (iter != null && iter.hasNext()) { return true; } else { if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) { splitIndex++; lineIndex = 0; //New split -> reset line count try { close(); iter = IOUtils.lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream())); onLocationOpen(locations[splitIndex]); } catch (IOException e) { e.printStackTrace(); } return iter.hasNext(); } return false; } }
Example #3
Source File: LineRecordReader.java From DataVec with Apache License 2.0 | 6 votes |
protected Iterator<String> getIterator(int location) { Iterator<String> iterator = null; if (inputSplit instanceof StringSplit) { StringSplit stringSplit = (StringSplit) inputSplit; iterator = Collections.singletonList(stringSplit.getData()).listIterator(); } else if (inputSplit instanceof InputStreamInputSplit) { InputStream is = ((InputStreamInputSplit) inputSplit).getIs(); if (is != null) { iterator = IOUtils.lineIterator(new InputStreamReader(is)); } } else { this.locations = inputSplit.locations(); if (locations != null && locations.length > 0) { InputStream inputStream; try { inputStream = locations[location].toURL().openStream(); } catch (IOException e) { throw new RuntimeException(e); } iterator = IOUtils.lineIterator(new InputStreamReader(inputStream)); } } if (iterator == null) throw new UnsupportedOperationException("Unknown input split: " + inputSplit); return iterator; }
Example #4
Source File: LineRecordReader.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public boolean hasNext() { Preconditions.checkState(initialized, "Record reader has not been initialized"); if (iter != null && iter.hasNext()) { return true; } else { if (locations != null && !(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) { splitIndex++; lineIndex = 0; //New split -> reset line count try { close(); iter = getIterator(splitIndex); onLocationOpen(locations[splitIndex]); } catch (IOException e) { log.error("",e); } return iter.hasNext(); } return false; } }
Example #5
Source File: StringToDataSetExportFunction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void call(Iterator<String> stringIterator) throws Exception { String jvmuid = UIDProvider.getJVMUID(); uid = Thread.currentThread().getId() + jvmuid.substring(0, Math.min(8, jvmuid.length())); List<List<Writable>> list = new ArrayList<>(batchSize); while (stringIterator.hasNext()) { String next = stringIterator.next(); recordReader.initialize(new StringSplit(next)); list.add(recordReader.next()); processBatchIfRequired(list, !stringIterator.hasNext()); } }
Example #6
Source File: FeatureRecordReader.java From FancyBing with GNU General Public License v3.0 | 5 votes |
@Override public List<Writable> next() { List<Writable> ret = new ArrayList<>(); if (count % 1000 == 0) { log.info((new Date()) + " Data " + count); } count++; if (iter.hasNext()) { ret = iter.next(); invokeListeners(ret); lineIndex++; return ret; } else { if (!(inputSplit instanceof StringSplit) && splitIndex < locations.length - 1) { splitIndex++; lineIndex = 0; try { close(); iter = lineIterator(new InputStreamReader(locations[splitIndex].toURL().openStream())); onLocationOpen(locations[splitIndex]); } catch (IOException e) { e.printStackTrace(); } lineIndex = 0; //New split opened -> reset line index if (iter.hasNext()) { ret = iter.next(); invokeListeners(ret); lineIndex++; return ret; } } throw new NoSuchElementException("No more elements found!"); } }
Example #7
Source File: StringToWritablesFunction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> call(String s) throws Exception { recordReader.initialize(new StringSplit(s)); Collection<Writable> next = recordReader.next(); if (next instanceof List) return (List<Writable>) next; return new ArrayList<>(next); }
Example #8
Source File: CSVRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testRegex() throws Exception { CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"}); reader.initialize(new StringSplit("normal,1.2.3.4 space separator")); while (reader.hasNext()) { List<Writable> vals = reader.next(); assertEquals("Entry count", 4, vals.size()); assertEquals("normal", vals.get(0).toString()); assertEquals("1.2.3.4", vals.get(1).toString()); assertEquals("space", vals.get(2).toString()); assertEquals("separator", vals.get(3).toString()); } }
Example #9
Source File: CSVRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testWithQuotes() throws Exception { CSVRecordReader reader = new CSVRecordReader(0, ',', '\"'); reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\"")); while (reader.hasNext()) { List<Writable> vals = reader.next(); assertEquals("Entry count", 6, vals.size()); assertEquals("1", vals.get(0).toString()); assertEquals("0", vals.get(1).toString()); assertEquals("3", vals.get(2).toString()); assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString()); assertEquals("male", vals.get(4).toString()); assertEquals("\"", vals.get(5).toString()); } }
Example #10
Source File: CSVRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testEmptyEntries() throws Exception { CSVRecordReader reader = new CSVRecordReader(); reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,")); while (reader.hasNext()) { List<Writable> vals = reader.next(); assertEquals("Entry count", 23, vals.size()); } }
Example #11
Source File: CSVRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testNext() throws Exception { CSVRecordReader reader = new CSVRecordReader(); reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1")); while (reader.hasNext()) { List<Writable> vals = reader.next(); List<Writable> arr = new ArrayList<>(vals); assertEquals("Entry count", 23, vals.size()); Text lastEntry = (Text) arr.get(arr.size() - 1); assertEquals("Last entry garbage", 1, lastEntry.getLength()); } }
Example #12
Source File: LineRecordReader.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void initialize(InputSplit split) throws IOException, InterruptedException { super.initialize(split); if(!(inputSplit instanceof StringSplit || inputSplit instanceof InputStreamInputSplit)){ final ArrayList<URI> uris = new ArrayList<>(); final Iterator<URI> uriIterator = inputSplit.locationsIterator(); while(uriIterator.hasNext()) uris.add(uriIterator.next()); this.locations = uris.toArray(new URI[0]); } this.iter = getIterator(0); this.initialized = true; }
Example #13
Source File: LineRecordReaderFunction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> apply(String s) { try { recordReader.initialize(new StringSplit(s)); } catch (Exception e) { throw new IllegalStateException(e); } return recordReader.next(); }
Example #14
Source File: StringToWritablesFunction.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Writable> call(String s) throws Exception { recordReader.initialize(new StringSplit(s)); Collection<Writable> next = recordReader.next(); if (next instanceof List) return (List<Writable>) next; return new ArrayList<>(next); }
Example #15
Source File: CSVRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testRegex() throws Exception { CSVRecordReader reader = new CSVRegexRecordReader(0, ",", null, new String[] {null, "(.+) (.+) (.+)"}); reader.initialize(new StringSplit("normal,1.2.3.4 space separator")); while (reader.hasNext()) { List<Writable> vals = reader.next(); assertEquals("Entry count", 4, vals.size()); assertEquals("normal", vals.get(0).toString()); assertEquals("1.2.3.4", vals.get(1).toString()); assertEquals("space", vals.get(2).toString()); assertEquals("separator", vals.get(3).toString()); } }
Example #16
Source File: CSVRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testWithQuotes() throws Exception { CSVRecordReader reader = new CSVRecordReader(0, ',', '\"'); reader.initialize(new StringSplit("1,0,3,\"Braund, Mr. Owen Harris\",male,\"\"\"\"")); while (reader.hasNext()) { List<Writable> vals = reader.next(); assertEquals("Entry count", 6, vals.size()); assertEquals("1", vals.get(0).toString()); assertEquals("0", vals.get(1).toString()); assertEquals("3", vals.get(2).toString()); assertEquals("Braund, Mr. Owen Harris", vals.get(3).toString()); assertEquals("male", vals.get(4).toString()); assertEquals("\"", vals.get(5).toString()); } }
Example #17
Source File: CSVRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testEmptyEntries() throws Exception { CSVRecordReader reader = new CSVRecordReader(); reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,")); while (reader.hasNext()) { List<Writable> vals = reader.next(); assertEquals("Entry count", 23, vals.size()); } }
Example #18
Source File: CSVRecordReaderTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testNext() throws Exception { CSVRecordReader reader = new CSVRecordReader(); reader.initialize(new StringSplit("1,1,8.0,,,,14.0,,,,15.0,,,,,,,,,,,,1")); while (reader.hasNext()) { List<Writable> vals = reader.next(); List<Writable> arr = new ArrayList<>(vals); assertEquals("Entry count", 23, vals.size()); Text lastEntry = (Text) arr.get(arr.size() - 1); assertEquals("Last entry garbage", 1, lastEntry.getLength()); } }
Example #19
Source File: LineRecordReaderFunction.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Writable> apply(String s) { try { recordReader.initialize(new StringSplit(s)); } catch (Exception e) { throw new IllegalStateException(e); } return recordReader.next(); }
Example #20
Source File: LineRecordReaderFunction.java From DataVec with Apache License 2.0 | 4 votes |
@Override public List<Writable> call(String s) throws Exception { recordReader.initialize(new StringSplit(s)); return recordReader.next(); }
Example #21
Source File: LineRecordReaderFunction.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public List<Writable> call(String s) throws Exception { recordReader.initialize(new StringSplit(s)); return recordReader.next(); }