org.datavec.api.writable.Text Java Exaples

Source File: CSVSparkTransformTest.java From DataVec with Apache License 2.0

6 votes

@Test
public void testTransformerBatch() throws Exception {
    List<Writable> input = new ArrayList<>();
    input.add(new DoubleWritable(1.0));
    input.add(new DoubleWritable(2.0));

    Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
    List<Writable> output = new ArrayList<>();
    output.add(new Text("1.0"));
    output.add(new Text("2.0"));

    TransformProcess transformProcess =
            new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
    CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
    String[] values = new String[] {"1.0", "2.0"};
    SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
    BatchCSVRecord batchCSVRecord = new BatchCSVRecord();
    for (int i = 0; i < 3; i++)
        batchCSVRecord.add(record);
    //data type is string, unable to convert
    BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord);
  /*  Base64NDArrayBody body = csvSparkTransform.toArray(batchCSVRecord1);
    INDArray fromBase64 = Nd4jBase64.fromBase64(body.getNdarray());
    assertTrue(fromBase64.isMatrix());
    System.out.println("Base 64ed array " + fromBase64); */
}

Source File: TestGeoTransforms.java From DataVec with Apache License 2.0

6 votes

@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}

Source File: JacksonRecordReaderTest.java From deeplearning4j with Apache License 2.0

6 votes

private static void testJacksonRecordReader(RecordReader rr) {

        List<Writable> json0 = rr.next();
        List<Writable> exp0 = Arrays.asList((Writable) new Text("aValue0"), new Text("bValue0"), new Text("cxValue0"));
        assertEquals(exp0, json0);

        List<Writable> json1 = rr.next();
        List<Writable> exp1 =
                        Arrays.asList((Writable) new Text("aValue1"), new Text("MISSING_B"), new Text("cxValue1"));
        assertEquals(exp1, json1);

        List<Writable> json2 = rr.next();
        List<Writable> exp2 =
                        Arrays.asList((Writable) new Text("aValue2"), new Text("bValue2"), new Text("MISSING_CX"));
        assertEquals(exp2, json2);

        assertFalse(rr.hasNext());

        //Test reset
        rr.reset();
        assertEquals(exp0, rr.next());
        assertEquals(exp1, rr.next());
        assertEquals(exp2, rr.next());
        assertFalse(rr.hasNext());
    }

Source File: TestGeoTransforms.java From DataVec with Apache License 2.0

6 votes

@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}

Source File: CSVRecordReaderTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testCsvSkipAllButOneLine() throws IOException, InterruptedException {
    final int numLines = 4;
    final List<Writable> lineList = Arrays.<Writable>asList(new Text(Integer.toString(numLines - 1)),
            new Text("one"), new Text("two"), new Text("three"));
    String header = ",one,two,three";
    List<String> lines = new ArrayList<>();
    for (int i = 0; i < numLines; i++)
        lines.add(Integer.toString(i) + header);
    File tempFile = File.createTempFile("csvSkipLines", ".csv");
    FileUtils.writeLines(tempFile, lines);

    CSVRecordReader rr = new CSVRecordReader(numLines - 1, ',');
    rr.initialize(new FileSplit(tempFile));
    rr.reset();
    assertTrue(rr.hasNext());
    assertEquals(rr.next(), lineList);
}

Source File: JDBCRecordReaderTest.java From DataVec with Apache License 2.0

6 votes

@Test
public void testSimpleIter() throws Exception {
    try (JDBCRecordReader reader = getInitializedReader("SELECT * FROM Coffee")) {
        List<List<Writable>> records = new ArrayList<>();
        while (reader.hasNext()) {
            List<Writable> values = reader.next();
            records.add(values);
        }

        assertFalse(records.isEmpty());

        List<Writable> first = records.get(0);
        assertEquals(new Text("Bolivian Dark"), first.get(0));
        assertEquals(new Text("14-001"), first.get(1));
        assertEquals(new DoubleWritable(8.95), first.get(2));
    }
}

Source File: ExcelRecordReader.java From deeplearning4j with Apache License 2.0

6 votes

private List<Writable> rowToRecord(Row currRow) {
    if(numColumns < 0) {
        numColumns = currRow.getLastCellNum();
    }

    if(currRow.getLastCellNum() != numColumns) {
        throw new IllegalStateException("Invalid number of columns for row. First number of columns found was " + numColumns + " but row " + currRow.getRowNum() + " was " + currRow.getLastCellNum());
    }

    List<Writable> ret = new ArrayList<>(currRow.getLastCellNum());
    for(Cell cell: currRow) {
        String cellValue = dataFormatter.formatCellValue(cell);
        switch(cell.getCellTypeEnum()) {
            case BLANK: ret.add(new Text("")); break;
            case STRING: ret.add(new Text("")); break;
            case BOOLEAN: ret.add(new BooleanWritable(Boolean.valueOf(cellValue))); break;
            case NUMERIC: ret.add(new DoubleWritable(Double.parseDouble(cellValue))); break;
            default: ret.add(new Text(cellValue));
        }
    }

    return ret;

}

Source File: TestGeoTransforms.java From DataVec with Apache License 2.0

6 votes

@Test
public void testCoordinatesDistanceTransform() throws Exception {
    Schema schema = new Schema.Builder().addColumnString("point").addColumnString("mean").addColumnString("stddev")
                    .build();

    Transform transform = new CoordinatesDistanceTransform("dist", "point", "mean", "stddev", "\\|");
    transform.setInputSchema(schema);

    Schema out = transform.transform(schema);
    assertEquals(4, out.numColumns());
    assertEquals(Arrays.asList("point", "mean", "stddev", "dist"), out.getColumnNames());
    assertEquals(Arrays.asList(ColumnType.String, ColumnType.String, ColumnType.String, ColumnType.Double),
                    out.getColumnTypes());

    assertEquals(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"), new DoubleWritable(5.0)),
                    transform.map(Arrays.asList((Writable) new Text("-30"), new Text("20"), new Text("10"))));
    assertEquals(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"), new Text("10|5"),
                    new DoubleWritable(Math.sqrt(160))),
                    transform.map(Arrays.asList((Writable) new Text("50|40"), new Text("10|-20"),
                                    new Text("10|5"))));
}

Source File: CategoricalQualityAddFunction.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public CategoricalQuality apply(CategoricalQuality v1, Writable writable) {

    long valid = v1.getCountValid();
    long invalid = v1.getCountInvalid();
    long countMissing = v1.getCountMissing();
    long countTotal = v1.getCountTotal() + 1;

    if (meta.isValid(writable))
        valid++;
    else if (writable instanceof NullWritable
                    || writable instanceof Text && (writable.toString() == null || writable.toString().isEmpty()))
        countMissing++;
    else
        invalid++;

    return new CategoricalQuality(valid, invalid, countMissing, countTotal);
}

Source File: ExcelRecordReader.java From DataVec with Apache License 2.0

6 votes

private List<Writable> rowToRecord(Row currRow) {
    if(numColumns < 0) {
        numColumns = currRow.getLastCellNum();
    }

    if(currRow.getLastCellNum() != numColumns) {
        throw new IllegalStateException("Invalid number of columns for row. First number of columns found was " + numColumns + " but row " + currRow.getRowNum() + " was " + currRow.getLastCellNum());
    }

    List<Writable> ret = new ArrayList<>(currRow.getLastCellNum());
    for(Cell cell: currRow) {
        String cellValue = dataFormatter.formatCellValue(cell);
        switch(cell.getCellTypeEnum()) {
            case BLANK: ret.add(new Text("")); break;
            case STRING: ret.add(new Text("")); break;
            case BOOLEAN: ret.add(new BooleanWritable(Boolean.valueOf(cellValue))); break;
            case NUMERIC: ret.add(new DoubleWritable(Double.parseDouble(cellValue))); break;
            default: ret.add(new Text(cellValue));
        }
    }

    return ret;

}

Source File: TestUI.java From deeplearning4j with Apache License 2.0

6 votes

@Test
@Ignore
public void testSequencePlot() throws Exception {

    Schema schema = new SequenceSchema.Builder().addColumnDouble("sinx")
                    .addColumnCategorical("cat", "s0", "s1", "s2").addColumnString("stringcol").build();

    int nSteps = 100;
    List<List<Writable>> sequence = new ArrayList<>(nSteps);
    for (int i = 0; i < nSteps; i++) {
        String c = "s" + i % 3;
        sequence.add(Arrays.<Writable>asList(new DoubleWritable(Math.sin(i / 10.0)), new Text(c),
                        new Text(String.valueOf(i))));
    }

    String tempDir = System.getProperty("java.io.tmpdir");
    String outPath = FilenameUtils.concat(tempDir, "datavec_seqplot_test.html");
    //        System.out.println(outPath);
    File f = new File(outPath);
    f.deleteOnExit();
    HtmlSequencePlotting.createHtmlSequencePlotFile("Title!", schema, sequence, f);


}

Source File: CSVRegexRecordReader.java From deeplearning4j with Apache License 2.0

6 votes

protected List<Writable> parseLine(String line) {
    String[] split = line.split(delimiter, -1);
    List<Writable> ret = new ArrayList<>();
    for (int i = 0; i < split.length; i++) {
        String s = split[i];
        if (quote != null && s.startsWith(quote) && s.endsWith(quote)) {
            int n = quote.length();
            s = s.substring(n, s.length() - n).replace(quote + quote, quote);
        }
        if (regexs != null && regexs[i] != null) {
            Matcher m = patterns[i].matcher(s);
            if (m.matches()) {
                for (int j = 1; j <= m.groupCount(); j++) { //Note: Matcher.group(0) is the entire sequence; we only care about groups 1 onward
                    ret.add(new Text(m.group(j)));
                }
            } else {
                throw new IllegalStateException("Invalid line: value does not match regex (regex=\"" + regexs[i]
                                + "\"; value=\"" + s + "\"");
            }
        } else {
            ret.add(new Text(s));
        }
    }
    return ret;
}

Source File: JsonExpanderTransformStepStepRunnerTest.java From konduit-serving with Apache License 2.0

6 votes

@Test
public void testJsonExpansionObjectArray() {
    JsonExpanderTransformStepRunner runner = new JsonExpanderTransformStepRunner(new JsonExpanderTransformStep());
    Record[] input = new Record[1];
    JsonArray inputArraysJson = new JsonArray();
    JsonObject jsonObject = new JsonObject();
    jsonObject.put("first", 1.0);
    jsonObject.put("second", "hello world");
    inputArraysJson.add(jsonObject);

    input[0] = new org.datavec.api.records.impl.Record(
            Arrays.asList(new Text(inputArraysJson.encodePrettily()))
            , null);

    Record[] transform = runner.transform(input);
    assertEquals(1, transform.length);
    assertEquals(2, transform[0].getRecord().size());
    assertEquals(1.0, transform[0].getRecord().get(0).toDouble(), 1e-1);
    assertEquals("hello world", transform[0].getRecord().get(1).toString());
}

Source File: CSVRegexRecordReader.java From DataVec with Apache License 2.0

6 votes

protected List<Writable> parseLine(String line) {
    String[] split = line.split(delimiter, -1);
    List<Writable> ret = new ArrayList<>();
    for (int i = 0; i < split.length; i++) {
        String s = split[i];
        if (quote != null && s.startsWith(quote) && s.endsWith(quote)) {
            int n = quote.length();
            s = s.substring(n, s.length() - n).replace(quote + quote, quote);
        }
        if (regexs != null && regexs[i] != null) {
            Matcher m = patterns[i].matcher(s);
            if (m.matches()) {
                for (int j = 1; j <= m.groupCount(); j++) { //Note: Matcher.group(0) is the entire sequence; we only care about groups 1 onward
                    ret.add(new Text(m.group(j)));
                }
            } else {
                throw new IllegalStateException("Invalid line: value does not match regex (regex=\"" + regexs[i]
                                + "\"; value=\"" + s + "\"");
            }
        } else {
            ret.add(new Text(s));
        }
    }
    return ret;
}

Source File: WordPieceTokenizerStepTest.java From konduit-serving with Apache License 2.0

6 votes

@Test
public void testWordPieceStepInference() throws Exception
{
    String sampleText = "These pages provide further information about the dictionary, its content and how it's kept up-to-date.";

    WordPieceTokenizerStepRunner step = new WordPieceTokenizerStepRunner(wordPieceTokenizerStep);

    BertIterator iterator = step.getToken(sampleText);

    assertNotEquals(0, iterator.next().getFeatures(0).length());

    List<Writable> ret = new ArrayList<>();
    ret.add(new Text(sampleText));

    Record[] tokenizedSentence = step.transform(new Record[]{
            new org.datavec.api.records.impl.Record(ret, null)
    });

    assertEquals(1, tokenizedSentence.length);
}

Source File: TestTransformProcess.java From DataVec with Apache License 2.0

6 votes

@Test
public void testExecution(){

    Schema schema = new Schema.Builder()
            .addColumnsString("col")
            .addColumnsDouble("col2")
            .build();

    Map<Character,Integer> m = defaultCharIndex();
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .doubleMathOp("col2", MathOp.Add, 1.0)
            .build();

    List<Writable> in = Arrays.<Writable>asList(new Text("Text"), new DoubleWritable(2.0));
    List<Writable> exp = Arrays.<Writable>asList(new Text("Text"), new DoubleWritable(3.0));

    List<Writable> out = transformProcess.execute(in);
    assertEquals(exp, out);
}

Source File: RegexRecordReaderTest.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testRegexLineRecordReader() throws Exception {
    String regex = "(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}\\.\\d{3}) (\\d+) ([A-Z]+) (.*)";

    RecordReader rr = new RegexLineRecordReader(regex, 1);
    rr.initialize(new FileSplit(new ClassPathResource("datavec-api/logtestdata/logtestfile0.txt").getFile()));

    List<Writable> exp0 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.001"), new Text("1"),
                    new Text("DEBUG"), new Text("First entry message!"));
    List<Writable> exp1 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.002"), new Text("2"),
                    new Text("INFO"), new Text("Second entry message!"));
    List<Writable> exp2 = Arrays.asList((Writable) new Text("2016-01-01 23:59:59.003"), new Text("3"),
                    new Text("WARN"), new Text("Third entry message!"));
    assertEquals(exp0, rr.next());
    assertEquals(exp1, rr.next());
    assertEquals(exp2, rr.next());
    assertFalse(rr.hasNext());

    //Test reset:
    rr.reset();
    assertEquals(exp0, rr.next());
    assertEquals(exp1, rr.next());
    assertEquals(exp2, rr.next());
    assertFalse(rr.hasNext());
}

Source File: ExecutionTest.java From DataVec with Apache License 2.0

5 votes

@Test
public void testExecutionSimple() {
    Schema schema = new Schema.Builder().addColumnInteger("col0")
                    .addColumnCategorical("col1", "state0", "state1", "state2").addColumnDouble("col2").build();

    TransformProcess tp = new TransformProcess.Builder(schema).categoricalToInteger("col1")
                    .doubleMathOp("col2", MathOp.Add, 10.0).build();

    List<List<Writable>> inputData = new ArrayList<>();
    inputData.add(Arrays.<Writable>asList(new IntWritable(0), new Text("state2"), new DoubleWritable(0.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(1), new Text("state1"), new DoubleWritable(1.1)));
    inputData.add(Arrays.<Writable>asList(new IntWritable(2), new Text("state0"), new DoubleWritable(2.1)));

    JavaRDD<List<Writable>> rdd = sc.parallelize(inputData);

    List<List<Writable>> out = new ArrayList<>(SparkTransformExecutor.execute(rdd, tp).collect());

    Collections.sort(out, new Comparator<List<Writable>>() {
        @Override
        public int compare(List<Writable> o1, List<Writable> o2) {
            return Integer.compare(o1.get(0).toInt(), o2.get(0).toInt());
        }
    });

    List<List<Writable>> expected = new ArrayList<>();
    expected.add(Arrays.<Writable>asList(new IntWritable(0), new IntWritable(2), new DoubleWritable(10.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(1), new IntWritable(1), new DoubleWritable(11.1)));
    expected.add(Arrays.<Writable>asList(new IntWritable(2), new IntWritable(0), new DoubleWritable(12.1)));

    assertEquals(expected, out);
}

Source File: TestConvertToSequence.java From DataVec with Apache License 2.0

5 votes

@Test
public void testConvertToSequenceLength1(){

    Schema s = new Schema.Builder()
            .addColumnsString("string")
            .addColumnLong("long")
            .build();

    List<List<Writable>> allExamples = Arrays.asList(
            Arrays.<Writable>asList(new Text("a"), new LongWritable(0)),
            Arrays.<Writable>asList(new Text("b"), new LongWritable(1)),
            Arrays.<Writable>asList(new Text("c"), new LongWritable(2)));

    TransformProcess tp = new TransformProcess.Builder(s)
            .convertToSequence()
            .build();

    JavaRDD<List<Writable>> rdd = sc.parallelize(allExamples);

    JavaRDD<List<List<Writable>>> out = SparkTransformExecutor.executeToSequence(rdd, tp);

    List<List<List<Writable>>> out2 = out.collect();

    assertEquals(3, out2.size());

    for( int i=0; i<3; i++ ){
        assertTrue(out2.contains(Collections.singletonList(allExamples.get(i))));
    }
}

Source File: TestWritablesToStringFunctions.java From DataVec with Apache License 2.0

5 votes

@Test
public void testSequenceWritablesToString() throws Exception {

    List<List<Writable>> l = Arrays.asList(Arrays.<Writable>asList(new DoubleWritable(1.5), new Text("someValue")),
                    Arrays.<Writable>asList(new DoubleWritable(2.5), new Text("otherValue")));

    String expected = l.get(0).get(0).toString() + "," + l.get(0).get(1).toString() + "\n"
                    + l.get(1).get(0).toString() + "," + l.get(1).get(1).toString();

    assertEquals(expected, new SequenceWritablesToStringFunction(",").apply(l));
}

Source File: StringMapTransform.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Text map(Writable writable) {
    String orig = writable.toString();
    if (map.containsKey(orig)) {
        return new Text(map.get(orig));
    }

    if (writable instanceof Text)
        return (Text) writable;
    else
        return new Text(writable.toString());
}

Source File: FileRecordReader.java From DataVec with Apache License 2.0

5 votes

@Override
public List<Writable> record(URI uri, DataInputStream dataInputStream) throws IOException {
    invokeListeners(uri);
    //Here: reading the entire file to a Text writable
    BufferedReader br = new BufferedReader(new InputStreamReader(dataInputStream));
    StringBuilder sb = new StringBuilder();
    String line;
    while ((line = br.readLine()) != null) {
        sb.append(line).append("\n");
    }
    return Collections.singletonList((Writable) new Text(sb.toString()));
}

Source File: RealQualityAddFunction.java From DataVec with Apache License 2.0

5 votes

@Override
public DoubleQuality call(DoubleQuality v1, Writable writable) throws Exception {

    long valid = v1.getCountValid();
    long invalid = v1.getCountInvalid();
    long countMissing = v1.getCountMissing();
    long countTotal = v1.getCountTotal() + 1;
    long nonReal = v1.getCountNonReal();
    long nan = v1.getCountNaN();
    long infinite = v1.getCountInfinite();

    if (meta.isValid(writable))
        valid++;
    else if (writable instanceof NullWritable
                    || writable instanceof Text && (writable.toString() == null || writable.toString().isEmpty()))
        countMissing++;
    else
        invalid++;

    String str = writable.toString();
    double d;
    try {
        d = Double.parseDouble(str);
        if (Double.isNaN(d))
            nan++;
        if (Double.isInfinite(d))
            infinite++;
    } catch (NumberFormatException e) {
        nonReal++;
    }

    return new DoubleQuality(valid, invalid, countMissing, countTotal, nonReal, nan, infinite);
}

Source File: TestTransformProcess.java From DataVec with Apache License 2.0

5 votes

@Test
public void testExecuteToSequence() {

    Schema schema = new Schema.Builder()
            .addColumnsString("action")
            .build();

    Map<Character,Integer> m = defaultCharIndex();
    TransformProcess transformProcess = new TransformProcess.Builder(schema)
            .removeAllColumnsExceptFor("action")
            .convertToSequence()
            .transform(new TextToCharacterIndexTransform("action", "action_sequence", m, true))
            .build();

    String s = "in text";
    List<Writable> input = Collections.<Writable>singletonList(new Text(s));

    List<List<Writable>> expSeq = new ArrayList<>(s.length());
    for( int i = 0; i<s.length(); i++) {
        expSeq.add(Collections.<Writable>singletonList(new IntWritable(m.get(s.charAt(i)))));
    }


    List<List<Writable>> out = transformProcess.executeToSequence(input);

    assertEquals(expSeq, out);
}

Source File: GeographicMidpointReduction.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public List<Writable> get() {
    double x = sumx / count;
    double y = sumy / count;
    double z = sumz / count;

    if(count == 0){
        throw new IllegalStateException("Cannot calculate geographic midpoint: no datapoints were added to be reduced");
    }

    if(Math.abs(x) < EDGE_CASE_EPS && Math.abs(y) < EDGE_CASE_EPS && Math.abs(z) < EDGE_CASE_EPS ){
        throw new IllegalStateException("No Geographic midpoint exists: midpoint is center of the earth");
    }

    double longRad = Math.atan2(y,x);
    double hyp = Math.sqrt(x*x + y*y);
    double latRad = Math.atan2(z, hyp);

    double latDeg = latRad / PI_180;
    double longDeg = longRad / PI_180;

    Preconditions.checkState(!Double.isNaN(latDeg), "Final latitude is NaN");
    Preconditions.checkState(!Double.isNaN(longDeg), "Final longitude is NaN");

    String str = latDeg + delim + longDeg;
    return Collections.<Writable>singletonList(new Text(str));
}

Source File: ConcatenateStringColumns.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public List<Writable> map(List<Writable> writables) {
    StringBuilder newColumnText = new StringBuilder();
    List<Writable> out = new ArrayList<>(writables);
    int i = 0;
    for (String columnName : columnsToConcatenate) {
        if (i++ > 0)
            newColumnText.append(delimiter);
        int columnIdx = inputSchema.getIndexOfColumn(columnName);
        newColumnText.append(writables.get(columnIdx));
    }
    out.add(new Text(newColumnText.toString()));
    return out;
}

Source File: RecordReaderBytesFunction.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public List<Writable> apply(Pair<Text, BytesWritable> v1) {
    URI uri = URI.create(v1.getRight().toString());
    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(v1.getRight().getContent()));
    try {
        return recordReader.record(uri, dis);
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }

}

Source File: CSVSparkTransformTest.java From DataVec with Apache License 2.0

5 votes

@Test
public void testSingleBatchSequence() throws Exception {
    List<Writable> input = new ArrayList<>();
    input.add(new DoubleWritable(1.0));
    input.add(new DoubleWritable(2.0));

    Schema schema = new Schema.Builder().addColumnDouble("1.0").addColumnDouble("2.0").build();
    List<Writable> output = new ArrayList<>();
    output.add(new Text("1.0"));
    output.add(new Text("2.0"));

    TransformProcess transformProcess =
            new TransformProcess.Builder(schema).convertToString("1.0").convertToString("2.0").build();
    CSVSparkTransform csvSparkTransform = new CSVSparkTransform(transformProcess);
    String[] values = new String[] {"1.0", "2.0"};
    SingleCSVRecord record = csvSparkTransform.transform(new SingleCSVRecord(values));
    BatchCSVRecord batchCSVRecord = new BatchCSVRecord();
    for (int i = 0; i < 3; i++)
        batchCSVRecord.add(record);
    BatchCSVRecord batchCSVRecord1 = csvSparkTransform.transform(batchCSVRecord);
    SequenceBatchCSVRecord sequenceBatchCSVRecord = new SequenceBatchCSVRecord();
    sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord));
    Base64NDArrayBody sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord);
    INDArray outputBody = Nd4jBase64.fromBase64(sequenceArray.getNdarray());


     //ensure accumulation
    sequenceBatchCSVRecord.add(Arrays.asList(batchCSVRecord));
    sequenceArray = csvSparkTransform.transformSequenceArray(sequenceBatchCSVRecord);
    assertArrayEquals(new long[]{2,2,3},Nd4jBase64.fromBase64(sequenceArray.getNdarray()).shape());

    SequenceBatchCSVRecord transformed = csvSparkTransform.transformSequence(sequenceBatchCSVRecord);
    assertNotNull(transformed.getRecords());
    System.out.println(transformed);


}

Source File: TestGeoReduction.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("1#5")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("2#6")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("3#7")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("4#8")));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new Text("10.0#26.0"));

    Schema schema = new Schema.Builder().addColumnString("key").addColumnString("coord").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Count).keyColumns("key")
                    .customReduction("coord", new CoordinatesReduction("coordSum", ReduceOp.Sum, "#")).build();

    reducer.setInputSchema(schema);

    IAggregableReduceOp<List<Writable>, List<Writable>> aggregableReduceOp = reducer.aggregableReducer();
    for (List<Writable> l : inputs)
        aggregableReduceOp.accept(l);
    List<Writable> out = aggregableReduceOp.get();

    assertEquals(2, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "coordSum"};
    ColumnType[] expTypes = new ColumnType[] {ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(2, outSchema.numColumns());
    for (int i = 0; i < 2; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}

Source File: FilterWritablesBySchemaFunction.java From DataVec with Apache License 2.0

5 votes

@Override
public Boolean call(Writable v1) throws Exception {
    boolean valid = meta.isValid(v1);
    if (excludeMissing && (v1 instanceof NullWritable
                    || v1 instanceof Text && (v1.toString() == null || v1.toString().isEmpty())))
        return false; //Remove (spark)
    if (keepValid)
        return valid; //Spark: return true to keep
    else
        return !valid;
}

org.datavec.api.writable.Text Java Examples