Java Code Examples for org.apache.flink.types.Row#of()
The following examples show how to use
org.apache.flink.types.Row#of() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BatchSQLTestProgram.java From flink with Apache License 2.0 | 6 votes |
@Override public Row next() { if (!hasNext()) { throw new NoSuchElementException(); } Row row = Row.of( keyIndex, LocalDateTime.ofInstant(Instant.ofEpochMilli(ms + offsetMs), ZoneOffset.UTC), "Some payload..."); ++keyIndex; if (keyIndex >= numKeys) { keyIndex = 0; ms += stepMs; } return row; }
Example 2
Source File: TableSummaryTest.java From Alink with Apache License 2.0 | 6 votes |
private TableSummary testVisit() { Row[] data = new Row[]{ Row.of("a", 1L, 1, 2.0, true), Row.of(null, 2L, 2, -3.0, true), Row.of("c", null, null, 2.0, false), Row.of("a", 0L, 0, null, null), }; int[] numberIdxs = new int[]{1, 2, 3}; String[] selectedColNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"}; TableSummarizer summarizer = new TableSummarizer(selectedColNames, numberIdxs, false); for (Row aData : data) { summarizer.visit(aData); } return summarizer.toSummary(); }
Example 3
Source File: CsvRowDeSerializationSchemaTest.java From flink with Apache License 2.0 | 6 votes |
private <T> void testField( TypeInformation<T> fieldInfo, String csvValue, T value, Consumer<CsvRowSerializationSchema.Builder> serializationConfig, Consumer<CsvRowDeserializationSchema.Builder> deserializationConfig, String fieldDelimiter) throws Exception { final TypeInformation<Row> rowInfo = Types.ROW(Types.STRING, fieldInfo, Types.STRING); final String expectedCsv = "BEGIN" + fieldDelimiter + csvValue + fieldDelimiter + "END\n"; final Row expectedRow = Row.of("BEGIN", value, "END"); // serialization final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(rowInfo); serializationConfig.accept(serSchemaBuilder); final byte[] serializedRow = serialize(serSchemaBuilder, expectedRow); assertEquals(expectedCsv, new String(serializedRow)); // deserialization final CsvRowDeserializationSchema.Builder deserSchemaBuilder = new CsvRowDeserializationSchema.Builder(rowInfo); deserializationConfig.accept(deserSchemaBuilder); final Row deserializedRow = deserialize(deserSchemaBuilder, expectedCsv); assertEquals(expectedRow, deserializedRow); }
Example 4
Source File: DocHashCountVectorizerModelMapperTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void testWordCount() throws Exception { Row[] rows = new Row[] { Row.of(0L, "{\"numFeatures\":\"20\",\"minTF\":\"1.0\",\"featureType\":\"\\\"WORD_COUNT\\\"\"}"), Row.of(1048576L, "{\"16\":0.4054651081081644,\"7\":0.0,\"13\":0.4054651081081644,\"14\":-0.5108256237659907," + "\"15\":-0.2876820724517809}") }; List<Row> model = Arrays.asList(rows); Params params = new Params() .set(DocHashCountVectorizerPredictParams.SELECTED_COL, "sentence"); DocHashCountVectorizerModelMapper mapper = new DocHashCountVectorizerModelMapper(modelSchema, dataSchema, params); mapper.loadModel(model); assertEquals(mapper.map(Row.of("a b c d a a ")).getField(0), new SparseVector(20, new int[] {7, 13, 14, 15}, new double[] {1.0, 1.0, 3.0, 1.0})); }
Example 5
Source File: LibSvmSourceSinkTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void testLibSvmBatchSink() throws Exception { Row[] rows = new Row[]{ Row.of(1, "0:1 1:1"), Row.of(-1, "1:1 3:1"), }; String fn = path + "libsvm1.txt"; MemSourceBatchOp source = new MemSourceBatchOp(rows, new String[]{"label", "features"}); new LibSvmSinkBatchOp().setFilePath(fn) .setLabelCol("label").setVectorCol("features").setOverwriteSink(true).linkFrom(source); BatchOperator.execute(); List<String> lines = Files.readAllLines(Paths.get(fn)); Assert.assertEquals(lines.size(), 2); }
Example 6
Source File: ClusterEvaluationUtilTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void getClusterStatisticsEuclideanTest() { Row[] rows0 = new Row[] { Row.of(0, "0,0,0"), Row.of(0, "0.1,0.1,0.1"), Row.of(0, "0.2,0.2,0.2") }; ClusterMetricsSummary clusterMetricsSummary = ClusterEvaluationUtil.getClusterStatistics(Arrays.asList(rows0), new EuclideanDistance()); Assert.assertEquals(clusterMetricsSummary.k, 1); //Tuple6<String, Integer, Double, Double, Double, DenseVector> t = clusterMetricsSummary.map.get(0); Assert.assertEquals(clusterMetricsSummary.clusterId.get(0), "0"); Assert.assertEquals(clusterMetricsSummary.clusterCnt.get(0).intValue(), 3); Assert.assertEquals(clusterMetricsSummary.compactness.get(0), 0.115, 0.001); Assert.assertEquals(clusterMetricsSummary.distanceSquareSum.get(0), 0.06, 0.01); Assert.assertEquals(clusterMetricsSummary.vectorNormL2Sum.get(0), 0.15, 0.01); Assert.assertEquals(clusterMetricsSummary.meanVector.get(0), new DenseVector(new double[]{0.1, 0.1, 0.1})); Assert.assertEquals(clusterMetricsSummary.k, 1); Assert.assertEquals(clusterMetricsSummary.total, 3); }
Example 7
Source File: CsvFormatterTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void testFormatter() throws Exception { TypeInformation[] types = new TypeInformation[]{Types.STRING, Types.DOUBLE, Types.LONG, Types.BOOLEAN, Types.SQL_TIMESTAMP}; Row row = Row.of("string", 1.0, 1L, true, new java.sql.Timestamp(System.currentTimeMillis())); CsvFormatter formatter = new CsvFormatter(types, ",", '"'); CsvParser parser = new CsvParser(types, ",", '"'); String text = formatter.format(row); Row parsed = parser.parse(text).f1; Assert.assertEquals(parsed.getArity(), row.getArity()); for (int i = 0; i < parsed.getArity(); i++) { Assert.assertEquals(parsed.getField(i), row.getField(i)); } }
Example 8
Source File: GenerateData.java From Alink with Apache License 2.0 | 5 votes |
public static Table getMultiTypeStreamTable() { Row[] testArray = new Row[]{ Row.of("a", 1L, 1, 2.0, true), Row.of(null, 2L, 2, -3.0, true), Row.of("c", null, null, 2.0, false), Row.of("a", 0L, 0, null, null), }; String[] colNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"}; return MLEnvironmentFactory.getDefault().createStreamTable(Arrays.asList(testArray), colNames); }
Example 9
Source File: SummarizerBatchOpTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void test() { Row[] testArray = new Row[]{ Row.of("a", 1L, 1, 2.0, true), Row.of(null, 2L, 2, -3.0, true), Row.of("c", null, null, 2.0, false), Row.of("a", 0L, 0, null, null), }; String[] colNames = new String[]{"f_string", "f_long", "f_int", "f_double", "f_boolean"}; MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames); SummarizerBatchOp summarizer = new SummarizerBatchOp() .setSelectedCols("f_double", "f_int"); summarizer.linkFrom(source); TableSummary srt = summarizer.collectSummary(); System.out.println(srt); Assert.assertEquals(srt.getColNames().length, 2); Assert.assertEquals(srt.count(), 4); Assert.assertEquals(srt.numMissingValue("f_double"), 1, 10e-4); Assert.assertEquals(srt.numValidValue("f_double"), 3, 10e-4); Assert.assertEquals(srt.max("f_double"), 2.0, 10e-4); Assert.assertEquals(srt.min("f_int"), 0.0, 10e-4); Assert.assertEquals(srt.mean("f_double"), 0.3333333333333333, 10e-4); Assert.assertEquals(srt.variance("f_double"), 8.333333333333334, 10e-4); Assert.assertEquals(srt.standardDeviation("f_double"), 2.886751345948129, 10e-4); Assert.assertEquals(srt.normL1("f_double"), 7.0, 10e-4); Assert.assertEquals(srt.normL2("f_double"), 4.123105625617661, 10e-4); }
Example 10
Source File: VectorSummarizerBatchOpTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void test() { Row[] testArray = new Row[]{ Row.of("1.0 2.0"), Row.of("-1.0 -3.0"), Row.of("4.0 2.0"), }; String selectedColName = "vec"; String[] colNames = new String[]{selectedColName}; MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames); VectorSummarizerBatchOp summarizer = new VectorSummarizerBatchOp() .setSelectedCol("vec"); summarizer.linkFrom(source); BaseVectorSummary srt = summarizer.collectVectorSummary(); System.out.println(srt); Assert.assertEquals(srt.vectorSize(), 2); Assert.assertEquals(srt.count(), 3); Assert.assertEquals(srt.max(0), 4.0, 10e-4); Assert.assertEquals(srt.min(0), -1.0, 10e-4); Assert.assertEquals(srt.mean(0), 1.3333333333333333, 10e-4); Assert.assertEquals(srt.variance(0), 6.333333333333334, 10e-4); Assert.assertEquals(srt.standardDeviation(0), 2.5166114784235836, 10e-4); Assert.assertEquals(srt.normL1(0), 6.0, 10e-4); Assert.assertEquals(srt.normL2(0), 4.242640687119285, 10e-4); }
Example 11
Source File: EvaluationUtil.java From Alink with Apache License 2.0 | 5 votes |
@Override public Row map(BaseMetricsSummary baseMetricsSummary) throws Exception { BaseMetricsSummary metrics = baseMetricsSummary; BaseMetrics baseMetrics = metrics.toMetrics(); Row row = baseMetrics.serialize(); return Row.of(funtionName, row.getField(0)); }
Example 12
Source File: GenerateData.java From Alink with Apache License 2.0 | 5 votes |
public static Table getDenseStream() { Row[] testArray = new Row[]{ Row.of("1.0 2.0"), Row.of("-1.0 -3.0"), Row.of("4.0 2.0"), Row.of(""), Row.of(new Object[]{null}) }; String selectedColName = "vec"; String[] colNames = new String[]{selectedColName}; return MLEnvironmentFactory.getDefault().createStreamTable(Arrays.asList(testArray), colNames); }
Example 13
Source File: TableSummarizerTest.java From Alink with Apache License 2.0 | 5 votes |
private Row[] geneData() { return new Row[]{ Row.of("a", 1L, 1, 2.0, true), Row.of(null, 2L, 2, -3.0, true), Row.of("c", null, null, 2.0, false), Row.of("a", 0L, 0, null, null), }; }
Example 14
Source File: RandomForestTrainBatchOpTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void linkFrom7() throws Exception { Row[] testArray = new Row[] { Row.of(1, 2, 0.8), Row.of(1, 2, 0.7), Row.of(0, 3, 0.4), Row.of(0, 2, 0.4), Row.of(1, 3, 0.6), Row.of(4, 3, 0.2), Row.of(4, 4, 0.3) }; String[] colNames = new String[] {"col0", "col1", "label"}; MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames); RandomForestTrainBatchOp rfOp = new RandomForestTrainBatchOp() .setLabelCol(colNames[2]) .setFeatureCols(colNames[0], colNames[1]) .setNumTrees(3) .setTreeType("partition") .setTreePartition("1,2") .setCategoricalCols(colNames[0], colNames[1]); rfOp.linkFrom(memSourceBatchOp).print(); RandomForestPredictBatchOp predictBatchOp = new RandomForestPredictBatchOp() .setPredictionCol("pred_result"); predictBatchOp.linkFrom(rfOp.linkFrom(memSourceBatchOp), memSourceBatchOp).print(); }
Example 15
Source File: FileSystemLookupFunction.java From flink with Apache License 2.0 | 5 votes |
public void eval(Object... values) { Preconditions.checkArgument(values.length == lookupCols.length, "Number of values and lookup keys mismatch"); checkCacheReload(); for (int i = 0; i < values.length; i++) { values[i] = converters[i].toExternal(values[i]); } Row probeKey = Row.of(values); List<RowData> matchedRows = cache.get(probeKey); if (matchedRows != null) { for (RowData matchedRow : matchedRows) { collect(matchedRow); } } }
Example 16
Source File: VectorCorrelationBatchOpTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void test() { Row[] testArray = new Row[]{ Row.of("1.0 2.0"), Row.of("-1.0 -3.0"), Row.of("4.0 2.0"), }; String selectedColName = "vec"; String[] colNames = new String[]{selectedColName}; MemSourceBatchOp source = new MemSourceBatchOp(Arrays.asList(testArray), colNames); VectorCorrelationBatchOp corr = new VectorCorrelationBatchOp() .setSelectedCol("vec") .setMethod("pearson"); corr.linkFrom(source); CorrelationResult corrMat = corr.collectCorrelation(); System.out.println(corrMat); Assert.assertArrayEquals(corrMat.getCorrelationMatrix().getArrayCopy1D(true), new double[] {1.0, 0.802955068546966, 0.802955068546966, 1.0}, 10e-4 ); }
Example 17
Source File: DocCountVectorizerModelMapperTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void testTFIDFType() throws Exception { Row[] rows = new Row[] { Row.of(0L, "{\"minTF\":\"1.0\",\"featureType\":\"\\\"TF_IDF\\\"\"}"), Row.of(1048576L, "{\"f0\":\"i\",\"f1\":0.6931471805599453,\"f2\":6}"), Row.of(2097152L, "{\"f0\":\"e\",\"f1\":0.1823215567939546,\"f2\":2}"), Row.of(3145728L, "{\"f0\":\"a\",\"f1\":0.4054651081081644,\"f2\":0}"), Row.of(4194304L, "{\"f0\":\"b\",\"f1\":0.1823215567939546,\"f2\":1}"), Row.of(5242880L, "{\"f0\":\"c\",\"f1\":0.6931471805599453,\"f2\":7}"), Row.of(6291456L, "{\"f0\":\"h\",\"f1\":0.4054651081081644,\"f2\":3}"), Row.of(7340032L, "{\"f0\":\"d\",\"f1\":0.6931471805599453,\"f2\":4}"), Row.of(8388608L, "{\"f0\":\"j\",\"f1\":0.6931471805599453,\"f2\":5}"), Row.of(9437184L, "{\"f0\":\"g\",\"f1\":0.6931471805599453,\"f2\":8}"), Row.of(10485760L, "{\"f0\":\"n\",\"f1\":1.0986122886681098,\"f2\":9}"), Row.of(11534336L, "{\"f0\":\"f\",\"f1\":1.0986122886681098,\"f2\":10}") }; List<Row> model = Arrays.asList(rows); Params params = new Params() .set(DocCountVectorizerPredictParams.SELECTED_COL, "sentence"); DocCountVectorizerModelMapper mapper = new DocCountVectorizerModelMapper(modelSchema, dataSchema, params); mapper.loadModel(model); assertEquals(mapper.map(Row.of("a b c d e")).getField(0), new SparseVector(11, new int[] {0, 1, 2, 4, 7}, new double[] {0.08109302162163289, 0.03646431135879092, 0.03646431135879092, 0.13862943611198905, 0.13862943611198905})); }
Example 18
Source File: GenerateData.java From Alink with Apache License 2.0 | 5 votes |
public static Table getSparseBatch() { Row[] testArray = new Row[]{ Row.of("0:1.0 1:2.0"), Row.of("0:-1.0 1:-3.0"), Row.of("0:4.0 1:2.0"), Row.of("") }; String selectedColName = "vec"; String[] colNames = new String[]{selectedColName}; return MLEnvironmentFactory.getDefault().createBatchTable(Arrays.asList(testArray), colNames); }
Example 19
Source File: RandomForestTrainBatchOpTest.java From Alink with Apache License 2.0 | 4 votes |
@Test public void linkFromDecisionTreeModeParallel() throws Exception { Row[] testArray = new Row[] { Row.of(1, 2, 0.8), Row.of(1, 2, 0.7), Row.of(0, 3, 0.4), Row.of(0, 2, 0.4), Row.of(1, 3, 0.6), Row.of(4, 3, 0.2), Row.of(4, 4, 0.3) }; String[] colNames = new String[] {"col0", "col1", "label"}; MemSourceBatchOp memSourceBatchOp = new MemSourceBatchOp(Arrays.asList(testArray), colNames); DecisionTreeRegTrainBatchOp decisionTreeRegTrainBatchOp = new DecisionTreeRegTrainBatchOp() .setLabelCol(colNames[2]) .setFeatureCols(colNames[0], colNames[1]) .setMinSamplesPerLeaf(1) .setMaxDepth(4) .setMaxMemoryInMB(1) .setCreateTreeMode("parallel"); DecisionTreeRegPredictBatchOp decisionTreeRegPredictBatchOp = new DecisionTreeRegPredictBatchOp() .setPredictionCol("pred"); EvalRegressionBatchOp eval = new EvalRegressionBatchOp() .setLabelCol(colNames[2]) .setPredictionCol("pred"); Assert.assertEquals( new RegressionMetrics( decisionTreeRegPredictBatchOp .linkFrom( decisionTreeRegTrainBatchOp.linkFrom(memSourceBatchOp), memSourceBatchOp ) .linkTo(eval) .collect() .get(0) ).getRmse(), 0.026726, 1e-6); }
Example 20
Source File: BaseMetrics.java From Alink with Apache License 2.0 | 4 votes |
/** * Serialize all the params into string. */ public Row serialize() { return Row.of(this.params.toJson()); }