org.datavec.api.writable.Text Java Examples
The following examples show how to use
org.datavec.api.writable.Text.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CSVSparkTransformTest.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testTransformerBatch() throws Exception { List<Writable> input = new ArrayList<>(); input.add(new DoubleWritable(1.0)); input.add(new DoubleWritable(2.0)); Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build(); List<Writable> output = new ArrayList<>(); output.add(new Text("1.0")); output.add(new Text("2.0")); TransformProcess transformProcess = new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build(); CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess); String[] values = new String[] {"1.0", "2.0"}; SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values)); BatchCSVRecord batchCSVRecord = new BatchCSVRecord(); for (int i = 0; i < 3; i++) batchCSVRecord.add(record); //data type is string, unable to convert BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord); /* Base64NDArrayBody body = csvSparkTransform.toArray(batchCSVRecord1); INDArray fromBase64 = Nd4jBase64.fromBase64(body.getNdarray()); assertTrue(fromBase64.isMatrix()); System.out.println("Base 64ed array " + fromBase64); */ }
Example #2
Source File: TestGeoTransforms.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testCoordinatesDistanceTransform() throws Exception { Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev") .build(); Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(4, out.numColumns()); assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames()); assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double), out.getColumnTypes()); assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)), transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10")))); assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"), new DoubleWritable(Math.sqrt(160))), transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5")))); }
Example #3
Source File: JacksonRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
private static void testJacksonRecordReader(RecordReader rr) { List<Writable> json0 = rr.next(); List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0")); assertEquals(exp0, json0); List<Writable> json1 = rr.next(); List<Writable> exp1 = Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1")); assertEquals(exp1, json1); List<Writable> json2 = rr.next(); List<Writable> exp2 = Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX")); assertEquals(exp2, json2); assertFalse(rr.hasNext()); //Test reset rr.reset(); assertEquals(exp0, rr.next()); assertEquals(exp1, rr.next()); assertEquals(exp2, rr.next()); assertFalse(rr.hasNext()); }
Example #4
Source File: TestGeoTransforms.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testCoordinatesDistanceTransform() throws Exception { Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev") .build(); Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(4, out.numColumns()); assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames()); assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double), out.getColumnTypes()); assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)), transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10")))); assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"), new DoubleWritable(Math.sqrt(160))), transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5")))); }
Example #5
Source File: CSVRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testCsvSkipAllButOneLine() throws IOException, InterruptedException { final int numLines = 4; final List<Writable> lineList = Arrays.<Writable>asList(new Text(Integer.toString(numLines - 1)), new Text("one"), new Text("two"), new Text("three")); String header = ",one,two,three"; List<String> lines = new ArrayList<>(); for (int i = 0; i < numLines; i++) lines.add(Integer.toString(i) + header); File tempFile = File.createTempFile("csvSkipLines", ".csv"); FileUtils.writeLines(tempFile, lines); CSVRecordReader rr = new CSVRecordReader(numLines - 1, ','); rr.initialize(new FileSplit(tempFile)); rr.reset(); assertTrue(rr.hasNext()); assertEquals(rr.next(), lineList); }
Example #6
Source File: JDBCRecordReaderTest.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testSimpleIter() throws Exception { try (JDBCRecordReader reader = getInitializedReader("SELECT * FROM Coffee")) { List<List<Writable>> records = new ArrayList<>(); while (reader.hasNext()) { List<Writable> values = reader.next(); records.add(values); } assertFalse(records.isEmpty()); List<Writable> first = records.get(0); assertEquals(new Text("Bolivian Dark"), first.get(0)); assertEquals(new Text("14-001"), first.get(1)); assertEquals(new DoubleWritable(8.95), first.get(2)); } }
Example #7
Source File: ExcelRecordReader.java From deeplearning4j with Apache License 2.0 | 6 votes |
private List<Writable> rowToRecord(Row currRow) { if(numColumns < 0) { numColumns = currRow.getLastCellNum(); } if(currRow.getLastCellNum() != numColumns) { throw new IllegalStateException("Invalid number of columns for row. First number of columns found was " + numColumns + " but row " + currRow.getRowNum() + " was " + currRow.getLastCellNum()); } List<Writable> ret = new ArrayList<>(currRow.getLastCellNum()); for(Cell cell: currRow) { String cellValue = dataFormatter.formatCellValue(cell); switch(cell.getCellTypeEnum()) { case BLANK: ret.add(new Text("")); break; case STRING: ret.add(new Text("")); break; case BOOLEAN: ret.add(new BooleanWritable(Boolean.valueOf(cellValue))); break; case NUMERIC: ret.add(new DoubleWritable(Double.parseDouble(cellValue))); break; default: ret.add(new Text(cellValue)); } } return ret; }
Example #8
Source File: TestGeoTransforms.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testCoordinatesDistanceTransform() throws Exception { Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev") .build(); Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|"); transform.setInputSchema(schema); Schema out = transform.transform(schema); assertEquals(4, out.numColumns()); assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames()); assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double), out.getColumnTypes()); assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)), transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10")))); assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"), new DoubleWritable(Math.sqrt(160))), transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5")))); }
Example #9
Source File: CategoricalQualityAddFunction.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public CategoricalQuality apply(CategoricalQuality v1, Writable writable) { long valid = v1.getCountValid(); long invalid = v1.getCountInvalid(); long countMissing = v1.getCountMissing(); long countTotal = v1.getCountTotal() + 1; if (meta.isValid(writable)) valid++; else if (writable instanceof NullWritable || writable instanceof Text && (writable.toString() == null || writable.toString().isEmpty())) countMissing++; else invalid++; return new CategoricalQuality(valid, invalid, countMissing, countTotal); }
Example #10
Source File: ExcelRecordReader.java From DataVec with Apache License 2.0 | 6 votes |
private List<Writable> rowToRecord(Row currRow) { if(numColumns < 0) { numColumns = currRow.getLastCellNum(); } if(currRow.getLastCellNum() != numColumns) { throw new IllegalStateException("Invalid number of columns for row. First number of columns found was " + numColumns + " but row " + currRow.getRowNum() + " was " + currRow.getLastCellNum()); } List<Writable> ret = new ArrayList<>(currRow.getLastCellNum()); for(Cell cell: currRow) { String cellValue = dataFormatter.formatCellValue(cell); switch(cell.getCellTypeEnum()) { case BLANK: ret.add(new Text("")); break; case STRING: ret.add(new Text("")); break; case BOOLEAN: ret.add(new BooleanWritable(Boolean.valueOf(cellValue))); break; case NUMERIC: ret.add(new DoubleWritable(Double.parseDouble(cellValue))); break; default: ret.add(new Text(cellValue)); } } return ret; }
Example #11
Source File: TestUI.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test @Ignore public void testSequencePlot() throws Exception { Schema schema = new SequenceSchema.Builder().addColumnDouble("sinx") .addColumnCategorical("cat", "s0", "s1", "s2").addColumnString("stringcol").build(); int nSteps = 100; List<List<Writable>> sequence = new ArrayList<>(nSteps); for (int i = 0; i < nSteps; i++) { String c = "s" + i % 3; sequence.add(Arrays.<Writable>asList(new DoubleWritable(Math.sin(i / 10.0)), new Text(c), new Text(String.valueOf(i)))); } String tempDir = System.getProperty("java.io.tmpdir"); String outPath = FilenameUtils.concat(tempDir, "datavec_seqplot_test.html"); // System.out.println(outPath); File f = new File(outPath); f.deleteOnExit(); HtmlSequencePlotting.createHtmlSequencePlotFile("Title!", schema, sequence, f); }
Example #12
Source File: CSVRegexRecordReader.java From deeplearning4j with Apache License 2.0 | 6 votes |
protected List<Writable> parseLine(String line) { String[] split = line.split(delimiter, -1); List<Writable> ret = new ArrayList<>(); for (int i = 0; i < split.length; i++) { String s = split[i]; if (quote != null && s.startsWith(quote) && s.endsWith(quote)) { int n = quote.length(); s = s.substring(n, s.length() - n).replace(quote + quote, quote); } if (regexs != null && regexs[i] != null) { Matcher m = patterns[i].matcher(s); if (m.matches()) { for (int j = 1; j <= m.groupCount(); j++) { //Note: Matcher.group(0) is the entire sequence; we only care about groups 1 onward ret.add(new Text(m.group(j))); } } else { throw new IllegalStateException("Invalid line: value does not match regex (regex=\"" + regexs[i] + "\"; value=\"" + s + "\""); } } else { ret.add(new Text(s)); } } return ret; }
Example #13
Source File: JsonExpanderTransformStepStepRunnerTest.java From konduit-serving with Apache License 2.0 | 6 votes |
@Test public void testJsonExpansionObjectArray() { JsonExpanderTransformStepRunner runner = new JsonExpanderTransformStepRunner(new JsonExpanderTransformStep()); Record[] input = new Record[1]; JsonArray inputArraysJson = new JsonArray(); JsonObject jsonObject = new JsonObject(); jsonObject.put("first", 1.0); jsonObject.put("second", "hello world"); inputArraysJson.add(jsonObject); input[0] = new org.datavec.api.records.impl.Record( Arrays.asList(new Text(inputArraysJson.encodePrettily())) , null); Record[] transform = runner.transform(input); assertEquals(1, transform.length); assertEquals(2, transform[0].getRecord().size()); assertEquals(1.0, transform[0].getRecord().get(0).toDouble(), 1e-1); assertEquals("hello world", transform[0].getRecord().get(1).toString()); }
Example #14
Source File: CSVRegexRecordReader.java From DataVec with Apache License 2.0 | 6 votes |
protected List<Writable> parseLine(String line) { String[] split = line.split(delimiter, -1); List<Writable> ret = new ArrayList<>(); for (int i = 0; i < split.length; i++) { String s = split[i]; if (quote != null && s.startsWith(quote) && s.endsWith(quote)) { int n = quote.length(); s = s.substring(n, s.length() - n).replace(quote + quote, quote); } if (regexs != null && regexs[i] != null) { Matcher m = patterns[i].matcher(s); if (m.matches()) { for (int j = 1; j <= m.groupCount(); j++) { //Note: Matcher.group(0) is the entire sequence; we only care about groups 1 onward ret.add(new Text(m.group(j))); } } else { throw new IllegalStateException("Invalid line: value does not match regex (regex=\"" + regexs[i] + "\"; value=\"" + s + "\""); } } else { ret.add(new Text(s)); } } return ret; }
Example #15
Source File: WordPieceTokenizerStepTest.java From konduit-serving with Apache License 2.0 | 6 votes |
@Test public void testWordPieceStepInference() throws Exception { String sampleText = "These pages provide further information about the dictionary, its content and how it's kept up-to-date."; WordPieceTokenizerStepRunner step = new WordPieceTokenizerStepRunner(wordPieceTokenizerStep); BertIterator iterator = step.getToken(sampleText); assertNotEquals(0, iterator.next().getFeatures(0).length()); List<Writable> ret = new ArrayList<>(); ret.add(new Text(sampleText)); Record[] tokenizedSentence = step.transform(new Record[]{ new org.datavec.api.records.impl.Record(ret, null) }); assertEquals(1, tokenizedSentence.length); }
Example #16
Source File: TestTransformProcess.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testExecution(){ Schema schema = new Schema.Builder() .addColumnsString("col") .addColumnsDouble("col2") .build(); Map<Character,Integer> m = defaultCharIndex(); TransformProcess transformProcess = new TransformProcess.Builder(schema) .doubleMathOp("col2", MathOp.Add, 1.0) .build(); List<Writable> in = Arrays.<Writable>asList(new Text("Text"), new DoubleWritable(2.0)); List<Writable> exp = Arrays.<Writable>asList(new Text("Text"), new DoubleWritable(3.0)); List<Writable> out = transformProcess.execute(in); assertEquals(exp, out); }
Example #17
Source File: RegexRecordReaderTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testRegexLineRecordReader() throws Exception { String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)"; RecordReader rr = new RegexLineRecordReader(regex, 1); rr.initialize(new FileSplit(new ClassPathResource("datavec-api/logtestdata/logtestfile0.txt").getFile())); List<Writable> exp0 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"), new Text("DEBUG"), new Text("First entry message!")); List<Writable> exp1 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"), new Text("INFO"), new Text("Second entry message!")); List<Writable> exp2 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"), new Text("WARN"), new Text("Third entry message!")); assertEquals(exp0, rr.next()); assertEquals(exp1, rr.next()); assertEquals(exp2, rr.next()); assertFalse(rr.hasNext()); //Test reset: rr.reset(); assertEquals(exp0, rr.next()); assertEquals(exp1, rr.next()); assertEquals(exp2, rr.next()); assertFalse(rr.hasNext()); }
Example #18
Source File: ExecutionTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testExecutionSimple() { Schema schema = new Schema.Builder().addColumnInteger("col0") .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").build(); TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1") .doubleMathOp("col2", MathOp.Add, 10.0).build(); List<List<Writable>> inputData = new ArrayList<>(); inputData.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1))); inputData.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1))); inputData.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1))); JavaRDD<List<Writable>> rdd = sc.parallelize(inputData); List<List<Writable>> out = new ArrayList<>(SparkTransformExecutor.execute(rdd, tp).collect()); Collections.sort(out, new Comparator<List<Writable>>() { @Override public int compare(List<Writable> o1, List<Writable> o2) { return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt()); } }); List<List<Writable>> expected = new ArrayList<>(); expected.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1))); expected.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1))); expected.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1))); assertEquals(expected, out); }
Example #19
Source File: TestConvertToSequence.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testConvertToSequenceLength1(){ Schema s = new Schema.Builder() .addColumnsString("string") .addColumnLong("long") .build(); List<List<Writable>> allExamples = Arrays.asList( Arrays.<Writable>asList(new Text("a"), new LongWritable(0)), Arrays.<Writable>asList(new Text("b"), new LongWritable(1)), Arrays.<Writable>asList(new Text("c"), new LongWritable(2))); TransformProcess tp = new TransformProcess.Builder(s) .convertToSequence() .build(); JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples); JavaRDD<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp); List<List<List<Writable>>> out2 = out.collect(); assertEquals(3, out2.size()); for( int i=0; i<3; i++ ){ assertTrue(out2.contains(Collections.singletonList(allExamples.get(i)))); } }
Example #20
Source File: TestWritablesToStringFunctions.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testSequenceWritablesToString() throws Exception { List<List<Writable>> l = Arrays.asList(Arrays.<Writable>asList(new DoubleWritable(1.5), new Text("someValue")), Arrays.<Writable>asList(new DoubleWritable(2.5), new Text("otherValue"))); String expected = l.get(0).get(0).toString() + "," + l.get(0).get(1).toString() + "\n" + l.get(1).get(0).toString() + "," + l.get(1).get(1).toString(); assertEquals(expected, new SequenceWritablesToStringFunction(",").apply(l)); }
Example #21
Source File: StringMapTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Text map(Writable writable) { String orig = writable.toString(); if (map.containsKey(orig)) { return new Text(map.get(orig)); } if (writable instanceof Text) return (Text) writable; else return new Text(writable.toString()); }
Example #22
Source File: FileRecordReader.java From DataVec with Apache License 2.0 | 5 votes |
@Override public List<Writable> record(URI uri, DataInputStream dataInputStream) throws IOException { invokeListeners(uri); //Here: reading the entire file to a Text writable BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream)); StringBuilder sb = new StringBuilder(); String line; while ((line = br.readLine()) != null) { sb.append(line).append("\n"); } return Collections.singletonList((Writable) new Text(sb.toString())); }
Example #23
Source File: RealQualityAddFunction.java From DataVec with Apache License 2.0 | 5 votes |
@Override public DoubleQuality call(DoubleQuality v1, Writable writable) throws Exception { long valid = v1.getCountValid(); long invalid = v1.getCountInvalid(); long countMissing = v1.getCountMissing(); long countTotal = v1.getCountTotal() + 1; long nonReal = v1.getCountNonReal(); long nan = v1.getCountNaN(); long infinite = v1.getCountInfinite(); if (meta.isValid(writable)) valid++; else if (writable instanceof NullWritable || writable instanceof Text && (writable.toString() == null || writable.toString().isEmpty())) countMissing++; else invalid++; String str = writable.toString(); double d; try { d = Double.parseDouble(str); if (Double.isNaN(d)) nan++; if (Double.isInfinite(d)) infinite++; } catch (NumberFormatException e) { nonReal++; } return new DoubleQuality(valid, invalid, countMissing, countTotal, nonReal, nan, infinite); }
Example #24
Source File: TestTransformProcess.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testExecuteToSequence() { Schema schema = new Schema.Builder() .addColumnsString("action") .build(); Map<Character,Integer> m = defaultCharIndex(); TransformProcess transformProcess = new TransformProcess.Builder(schema) .removeAllColumnsExceptFor("action") .convertToSequence() .transform(new TextToCharacterIndexTransform("action", "action_sequence", m, true)) .build(); String s = "in text"; List<Writable> input = Collections.<Writable>singletonList(new Text(s)); List<List<Writable>> expSeq = new ArrayList<>(s.length()); for( int i = 0; i<s.length(); i++) { expSeq.add(Collections.<Writable>singletonList(new IntWritable(m.get(s.charAt(i))))); } List<List<Writable>> out = transformProcess.executeToSequence(input); assertEquals(expSeq, out); }
Example #25
Source File: GeographicMidpointReduction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> get() { double x = sumx / count; double y = sumy / count; double z = sumz / count; if(count == 0){ throw new IllegalStateException("Cannot calculate geographic midpoint: no datapoints were added to be reduced"); } if(Math.abs(x) < EDGE_CASE_EPS && Math.abs(y) < EDGE_CASE_EPS && Math.abs(z) < EDGE_CASE_EPS ){ throw new IllegalStateException("No Geographic midpoint exists: midpoint is center of the earth"); } double longRad = Math.atan2(y,x); double hyp = Math.sqrt(x*x + y*y); double latRad = Math.atan2(z, hyp); double latDeg = latRad / PI_180; double longDeg = longRad / PI_180; Preconditions.checkState(!Double.isNaN(latDeg), "Final latitude is NaN"); Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN"); String str = latDeg + delim + longDeg; return Collections.<Writable>singletonList(new Text(str)); }
Example #26
Source File: ConcatenateStringColumns.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> map(List<Writable> writables) { StringBuilder newColumnText = new StringBuilder(); List<Writable> out = new ArrayList<>(writables); int i = 0; for (String columnName : columnsToConcatenate) { if (i++ > 0) newColumnText.append(delimiter); int columnIdx = inputSchema.getIndexOfColumn(columnName); newColumnText.append(writables.get(columnIdx)); } out.add(new Text(newColumnText.toString())); return out; }
Example #27
Source File: RecordReaderBytesFunction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> apply(Pair<Text, BytesWritable> v1) { URI uri = URI.create(v1.getRight().toString()); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(v1.getRight().getContent())); try { return recordReader.record(uri, dis); } catch (IOException e) { throw new IllegalStateException(e); } }
Example #28
Source File: CSVSparkTransformTest.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testSingleBatchSequence() throws Exception { List<Writable> input = new ArrayList<>(); input.add(new DoubleWritable(1.0)); input.add(new DoubleWritable(2.0)); Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build(); List<Writable> output = new ArrayList<>(); output.add(new Text("1.0")); output.add(new Text("2.0")); TransformProcess transformProcess = new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build(); CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess); String[] values = new String[] {"1.0", "2.0"}; SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values)); BatchCSVRecord batchCSVRecord = new BatchCSVRecord(); for (int i = 0; i < 3; i++) batchCSVRecord.add(record); BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord); SequenceBatchCSVRecord sequenceBatchCSVRecord = new SequenceBatchCSVRecord(); sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord)); Base64NDArrayBody sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord); INDArray outputBody = Nd4jBase64.fromBase64(sequenceArray.getNdarray()); //ensure accumulation sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord)); sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord); assertArrayEquals(new long[]{2,2,3},Nd4jBase64.fromBase64(sequenceArray.getNdarray()).shape()); SequenceBatchCSVRecord transformed = csvSparkTransform.transformSequence(sequenceBatchCSVRecord); assertNotNull(transformed.getRecords()); System.out.println(transformed); }
Example #29
Source File: TestGeoReduction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testCustomReductions() { List<List<Writable>> inputs = new ArrayList<>(); inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("1#5"))); inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("2#6"))); inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("3#7"))); inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("4#8"))); List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new Text("10.0#26.0")); Schema schema = new Schema.Builder().addColumnString("key").addColumnString("coord").build(); Reducer reducer = new Reducer.Builder(ReduceOp.Count).keyColumns("key") .customReduction("coord", new CoordinatesReduction("coordSum", ReduceOp.Sum, "#")).build(); reducer.setInputSchema(schema); IAggregableReduceOp<List<Writable>, List<Writable>> aggregableReduceOp = reducer.aggregableReducer(); for (List<Writable> l : inputs) aggregableReduceOp.accept(l); List<Writable> out = aggregableReduceOp.get(); assertEquals(2, out.size()); assertEquals(expected, out); //Check schema: String[] expNames = new String[] {"key", "coordSum"}; ColumnType[] expTypes = new ColumnType[] {ColumnType.String, ColumnType.String}; Schema outSchema = reducer.transform(schema); assertEquals(2, outSchema.numColumns()); for (int i = 0; i < 2; i++) { assertEquals(expNames[i], outSchema.getName(i)); assertEquals(expTypes[i], outSchema.getType(i)); } }
Example #30
Source File: FilterWritablesBySchemaFunction.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Boolean call(Writable v1) throws Exception { boolean valid = meta.isValid(v1); if (excludeMissing && (v1 instanceof NullWritable || v1 instanceof Text && (v1.toString() == null || v1.toString().isEmpty()))) return false; //Remove (spark) if (keepValid) return valid; //Spark: return true to keep else return !valid; }