Java Code Examples for org.apache.spark.sql.Row#get()
The following examples show how to use
org.apache.spark.sql.Row#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RowUtils.java From envelope with Apache License 2.0 | 6 votes |
public static boolean different(Row first, Row second, List<String> valueFieldNames) { for (String valueFieldName : valueFieldNames) { Object firstValue = first.get(first.fieldIndex(valueFieldName)); Object secondValue = second.get(second.fieldIndex(valueFieldName)); if (firstValue != null && secondValue != null && !firstValue.equals(secondValue)) { return true; } if ((firstValue != null && secondValue == null) || (firstValue == null && secondValue != null)) { return true; } } return false; }
Example 2
Source File: TestAppendPlanner.java From envelope with Apache License 2.0 | 6 votes |
@Test(expected=IllegalArgumentException.class) public void testNoLastUpdated() { Config config = ConfigFactory.empty(); AppendPlanner ap = new AppendPlanner(); assertNoValidationFailures(ap, config); ap.configure(config); List<Tuple2<MutationType, Dataset<Row>>> planned = ap.planMutationsForSet(dataFrame); assertEquals(planned.size(), 1); Dataset<Row> plannedDF = planned.get(0)._2(); assertEquals(planned.get(0)._1(), MutationType.INSERT); assertEquals(plannedDF.count(), 1); Row plannedRow = plannedDF.collectAsList().get(0); plannedRow.get(plannedRow.fieldIndex("lastupdated")); }
Example 3
Source File: NestDeriver.java From envelope with Apache License 2.0 | 6 votes |
@Override public Row call(Tuple2<Iterable<Row>, Iterable<Row>> cogrouped) throws Exception { // There should only be one 'into' record per key Row intoRow = cogrouped._1().iterator().next(); Row[] fromRows = Iterables.toArray(cogrouped._2(), Row.class); int intoRowNumFields = intoRow.size(); Object[] nestedValues = new Object[intoRowNumFields + 1]; for (int i = 0; i < intoRowNumFields; i++) { nestedValues[i] = intoRow.get(i); } nestedValues[intoRowNumFields] = fromRows; Row nested = RowFactory.create(nestedValues); return nested; }
Example 4
Source File: RowUtils.java From envelope with Apache License 2.0 | 5 votes |
public static Row subsetRow(Row row, StructType subsetSchema) { Object[] values = new Object[subsetSchema.length()]; int i = 0; for (String fieldName : subsetSchema.fieldNames()) { values[i] = row.get(row.fieldIndex(fieldName)); i++; } Row subset = new RowWithSchema(subsetSchema, values); return subset; }
Example 5
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testOutputDataFrameOfVectorsDML() { System.out.println("MLContextTest - output DataFrame of vectors DML"); String s = "m=matrix('1 2 3 4',rows=2,cols=2);"; Script script = dml(s).out("m"); MLResults results = ml.execute(script); Dataset<Row> df = results.getDataFrame("m", true); Dataset<Row> sortedDF = df.sort(RDDConverterUtils.DF_ID_COLUMN); // verify column types StructType schema = sortedDF.schema(); StructField[] fields = schema.fields(); StructField idColumn = fields[0]; StructField vectorColumn = fields[1]; Assert.assertTrue(idColumn.dataType() instanceof DoubleType); Assert.assertTrue(vectorColumn.dataType() instanceof VectorUDT); List<Row> list = sortedDF.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); Vector v1 = (DenseVector) row1.get(1); double[] arr1 = v1.toArray(); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, arr1, 0.0); Row row2 = list.get(1); Assert.assertEquals(2.0, row2.getDouble(0), 0.0); Vector v2 = (DenseVector) row2.get(1); double[] arr2 = v2.toArray(); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, arr2, 0.0); }
Example 6
Source File: HashDeriver.java From envelope with Apache License 2.0 | 5 votes |
@Override public Row call(Row toHash) { sb.setLength(0); for (int fieldNum = 0; fieldNum < toHash.schema().size(); fieldNum++) { if (includeInConcatenation(toHash, fieldNum)) { Object value = toHash.get(fieldNum); sb.append(value != null ? value : nullString); sb.append(delimiter); } } return RowUtils.append(toHash, sb.toString().getBytes()); }
Example 7
Source File: ToRecord.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public List<Writable> call(Row v1) throws Exception { List<Writable> ret = new ArrayList<>(); if (v1.size() != schema.numColumns()) throw new IllegalArgumentException("Invalid number of columns for row " + v1.size() + " should have matched schema columns " + schema.numColumns()); for (int i = 0; i < v1.size(); i++) { if (v1.get(i) == null) throw new IllegalStateException("Row item " + i + " is null"); switch (schema.getType(i)) { case Double: ret.add(new DoubleWritable(v1.getDouble(i))); break; case Float: ret.add(new FloatWritable(v1.getFloat(i))); break; case Integer: ret.add(new IntWritable(v1.getInt(i))); break; case Long: ret.add(new LongWritable(v1.getLong(i))); break; default: throw new IllegalStateException("Illegal type"); } } return ret; }
Example 8
Source File: RandomForestRegressionModelInfoAdapterBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testRandomForestRegressionWithPipeline() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm"); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a RandomForest model. RandomForestRegressionModel regressionModel = new RandomForestRegressor() .setFeaturesCol("features").fit(trainingData); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{regressionModel}); // Train model. This also runs the indexer. PipelineModel sparkPipeline = pipeline.fit(trainingData); //Export this model byte[] exportedModel = ModelExporter.export(sparkPipeline, null); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = sparkPipeline.transform(testData).select("features", "prediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector v = (Vector) row.get(0); double actual = row.getDouble(1); Map<String, Object> inputData = new HashMap<String, Object>(); inputData.put(transformer.getInputKeys().iterator().next(), v.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next()); assertEquals(actual, predicted, EPSILON); } }
Example 9
Source File: RowUtils.java From envelope with Apache License 2.0 | 5 votes |
public static Row set(Row row, String fieldName, Object replacement) { Object[] values = new Object[row.length()]; for (int i = 0; i < row.schema().fields().length; i++) { if (i == row.fieldIndex(fieldName)) { values[i] = replacement; } else { values[i] = row.get(i); } } return new RowWithSchema(row.schema(), values); }
Example 10
Source File: SQLRowId.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public void read(Row row, int ordinal) throws StandardException { if (row.isNullAt(ordinal)) setToNull(); else { isNull = false; bytes = (byte[]) row.get(ordinal); } }
Example 11
Source File: TestHelpers.java From iceberg with Apache License 2.0 | 5 votes |
private static Object getPrimitiveValue(Row row, int ord, Type type) { if (row.isNullAt(ord)) { return null; } switch (type.typeId()) { case BOOLEAN: return row.getBoolean(ord); case INTEGER: return row.getInt(ord); case LONG: return row.getLong(ord); case FLOAT: return row.getFloat(ord); case DOUBLE: return row.getDouble(ord); case STRING: return row.getString(ord); case BINARY: case FIXED: case UUID: return row.get(ord); case DATE: return row.getDate(ord); case TIMESTAMP: return row.getTimestamp(ord); case DECIMAL: return row.getDecimal(ord); default: throw new IllegalArgumentException("Unhandled type " + type); } }
Example 12
Source File: TestHelpers.java From iceberg with Apache License 2.0 | 5 votes |
public static void assertEqualsSafe(Types.StructType struct, Record rec, Row row) { List<Types.NestedField> fields = struct.fields(); for (int i = 0; i < fields.size(); i += 1) { Type fieldType = fields.get(i).type(); Object expectedValue = rec.get(i); Object actualValue = row.get(i); assertEqualsSafe(fieldType, expectedValue, actualValue); } }
Example 13
Source File: DecisionTreeRegressionModelBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testDecisionTreeRegression() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm"); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a DecisionTree model. DecisionTreeRegressionModel regressionModel = new DecisionTreeRegressor() .setFeaturesCol("features").fit(trainingData); byte[] exportedModel = ModelExporter.export(regressionModel, null); Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = regressionModel.transform(testData).select("features", "prediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector v = (Vector) row.get(0); double actual = row.getDouble(1); Map<String, Object> inputData = new HashMap<String, Object>(); inputData.put(transformer.getInputKeys().iterator().next(), v.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next()); System.out.println(actual + ", " + predicted); assertEquals(actual, predicted, EPSILON); } }
Example 14
Source File: UserType.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public void read(Row row, int ordinal) throws StandardException { if (row.isNullAt(ordinal)) setToNull(); else { isNull = false; Object object = row.get(ordinal); if (object instanceof byte[]) { value = SerializationUtils.deserialize((byte[]) object); } else { value = object; } } }
Example 15
Source File: SQLBinary.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public void read(Row row, int ordinal) throws StandardException { if (row.isNullAt(ordinal)) setToNull(); else { isNull = false; dataValue = (byte[]) row.get(ordinal); } }
Example 16
Source File: GenericsHelpers.java From iceberg with Apache License 2.0 | 5 votes |
public static void assertEqualsSafe(Types.StructType struct, Record expected, Row actual) { List<Types.NestedField> fields = struct.fields(); for (int i = 0; i < fields.size(); i += 1) { Type fieldType = fields.get(i).type(); Object expectedValue = expected.get(i); Object actualValue = actual.get(i); assertEqualsSafe(fieldType, expectedValue, actualValue); } }
Example 17
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testOutputDataFrameOfVectorsDML() { System.out.println("MLContextTest - output DataFrame of vectors DML"); String s = "m=matrix('1 2 3 4',rows=2,cols=2);"; Script script = dml(s).out("m"); MLResults results = ml.execute(script); Dataset<Row> df = results.getDataFrame("m", true); Dataset<Row> sortedDF = df.sort(RDDConverterUtils.DF_ID_COLUMN); // verify column types StructType schema = sortedDF.schema(); StructField[] fields = schema.fields(); StructField idColumn = fields[0]; StructField vectorColumn = fields[1]; Assert.assertTrue(idColumn.dataType() instanceof DoubleType); Assert.assertTrue(vectorColumn.dataType() instanceof VectorUDT); List<Row> list = sortedDF.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); Vector v1 = (DenseVector) row1.get(1); double[] arr1 = v1.toArray(); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, arr1, 0.0); Row row2 = list.get(1); Assert.assertEquals(2.0, row2.getDouble(0), 0.0); Vector v2 = (DenseVector) row2.get(1); double[] arr2 = v2.toArray(); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, arr2, 0.0); }
Example 18
Source File: DecisionTreeRegressionModelBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testDecisionTreeRegressionWithPipeline() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm"); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a DecisionTree model. DecisionTreeRegressor dt = new DecisionTreeRegressor() .setFeaturesCol("features"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{dt}); // Train model. This also runs the indexer. PipelineModel sparkPipeline = pipeline.fit(trainingData); //Export this model byte[] exportedModel = ModelExporter.export(sparkPipeline, null); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = sparkPipeline.transform(testData).select("features", "prediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector v = (Vector) row.get(0); double actual = row.getDouble(1); Map<String, Object> inputData = new HashMap<String, Object>(); inputData.put(transformer.getInputKeys().iterator().next(), v.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next()); assertEquals(actual, predicted, EPSILON); } }
Example 19
Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testDecisionTreeClassificationRawPrediction() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm"); StringIndexerModel stringIndexerModel = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex") .fit(data); data = stringIndexerModel.transform(data); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a DecisionTree model. DecisionTreeClassificationModel classificationModel = new DecisionTreeClassifier() .setLabelCol("labelIndex") .setFeaturesCol("features") .setRawPredictionCol("rawPrediction") .setPredictionCol("prediction") .fit(trainingData); byte[] exportedModel = ModelExporter.export(classificationModel, null); Transformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = classificationModel.transform(testData).select("features", "prediction", "rawPrediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector inp = (Vector) row.get(0); double actual = row.getDouble(1); double[] actualRaw = ((Vector) row.get(2)).toArray(); Map<String, Object> inputData = new HashMap<>(); inputData.put(transformer.getInputKeys().iterator().next(), inp.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next()); double[] rawPrediction = (double[]) inputData.get("rawPrediction"); assertEquals(actual, predicted, EPSILON); assertArrayEquals(actualRaw, rawPrediction, EPSILON); } }
Example 20
Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testDecisionTreeClassificationWithPipeline() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm"); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; StringIndexer indexer = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex"); // Train a DecisionTree model. DecisionTreeClassifier classificationModel = new DecisionTreeClassifier() .setLabelCol("labelIndex") .setFeaturesCol("features"); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{indexer, classificationModel}); // Train model. This also runs the indexer. PipelineModel sparkPipeline = pipeline.fit(trainingData); //Export this model byte[] exportedModel = ModelExporter.export(sparkPipeline, null); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = sparkPipeline.transform(testData).select("label", "features", "prediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector v = (Vector) row.get(1); double actual = row.getDouble(2); Map<String, Object> inputData = new HashMap<String, Object>(); inputData.put("features", v.toArray()); inputData.put("label", row.get(0).toString()); transformer.transform(inputData); double predicted = (double) inputData.get("prediction"); assertEquals(actual, predicted, EPSILON); } }