Java Code Examples for org.apache.spark.sql.Row#schema()
The following examples show how to use
org.apache.spark.sql.Row#schema() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@SuppressWarnings("serial") public static FlatMapFunction<Row, Row> morphlineMapper(final String morphlineFile, final String morphlineId, final StructType outputSchema, final boolean errorOnEmpty) { return new FlatMapFunction<Row, Row>() { @Override public Iterator<Row> call(Row row) throws Exception { // Retrieve the Command pipeline via ThreadLocal Pipeline pipeline = MorphlineUtils.getPipeline(morphlineFile, morphlineId); if (null == pipeline) { pipeline = MorphlineUtils.setPipeline(morphlineFile, morphlineId, new Collector(), true); } // Convert each Row into a Record StructType inputSchema = row.schema(); if (null == inputSchema) { throw new RuntimeException("Row does not have an associated StructType schema"); } Record inputRecord = new Record(); String[] fieldNames = inputSchema.fieldNames(); // TODO : Confirm nested object conversion for (int i = 0; i < fieldNames.length; i++) { inputRecord.put(fieldNames[i], row.get(i)); } // Process each Record via the Command pipeline List<Record> outputRecords = MorphlineUtils.executePipeline(pipeline, inputRecord, errorOnEmpty); // Convert each Record into a new Row List<Row> outputRows = Lists.newArrayListWithCapacity(outputRecords.size()); for (Record record : outputRecords) { outputRows.add(MorphlineUtils.convertToRow(outputSchema, record)); } return outputRows.iterator(); } }; }
Example 2
Source File: RawTranslator.java From envelope with Apache License 2.0 | 5 votes |
@Override public Iterable<Row> translate(Row message) { List<Object> values = Lists.newArrayList(); for (StructField field : message.schema().fields()) { values.add(message.getAs(field.name())); } Row row = new RowWithSchema(message.schema(), values.toArray()); return Collections.singleton(row); }
Example 3
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void morphlineMapper( final @Mocked MorphlineUtils.Pipeline pipeline, final @Mocked Row row, final @Mocked StructType schema ) throws Exception { new Expectations(MorphlineUtils.class) {{ MorphlineUtils.getPipeline("file", "id"); result = pipeline; times = 1; MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1; row.schema(); result = schema; row.get(anyInt); returns("val1", "val2"); times = 2; schema.fieldNames(); result = new String[] { "one", "two"}; }}; FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true); Iterator<Row> results = function.call(row); assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size()); new Verifications() {{ Record record; MorphlineUtils.executePipeline(pipeline, record = withCapture(), true); assertEquals(2, record.getFields().size()); assertEquals("val1", record.get("one").get(0)); }}; }
Example 4
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void morphlineMapperNoPipeline( final @Mocked MorphlineUtils.Pipeline pipeline, final @Mocked Row row, final @Mocked StructType schema ) throws Exception { new Expectations(MorphlineUtils.class) {{ MorphlineUtils.getPipeline("file", "id"); result = null; times = 1; MorphlineUtils.setPipeline("file", "id", (MorphlineUtils.Collector) any, true); result = pipeline; times = 1; MorphlineUtils.executePipeline(pipeline, (Record) any, true); result = Lists.newArrayList(); times = 1; row.schema(); result = schema; row.get(anyInt); returns("val1", "val2"); times = 2; schema.fieldNames(); result = new String[] { "one", "two"}; }}; FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true); Iterator<Row> results = function.call(row); assertEquals("Invalid number of Rows returned", 0, Lists.newArrayList(results).size()); new Verifications() {{ Record record; MorphlineUtils.executePipeline(pipeline, record = withCapture(), true); assertEquals(2, record.getFields().size()); assertEquals("val1", record.get("one").get(0)); }}; }
Example 5
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test (expected = RuntimeException.class) public void morphlineMapperNoSchema( final @Mocked MorphlineUtils.Pipeline pipeline, final @Mocked Row row, final @Mocked StructType schema ) throws Exception { new Expectations(MorphlineUtils.class) {{ MorphlineUtils.getPipeline("file", "id"); result = pipeline; times = 1; row.schema(); result = null; }}; FlatMapFunction<Row, Row> function = MorphlineUtils.morphlineMapper("file", "id", schema, true); function.call(row); }
Example 6
Source File: RowUtils.java From envelope with Apache License 2.0 | 5 votes |
public static Row set(Row row, String fieldName, Object replacement) { Object[] values = new Object[row.length()]; for (int i = 0; i < row.schema().fields().length; i++) { if (i == row.fieldIndex(fieldName)) { values[i] = replacement; } else { values[i] = row.get(i); } } return new RowWithSchema(row.schema(), values); }
Example 7
Source File: TranslateFunction.java From envelope with Apache License 2.0 | 5 votes |
private void validateMessageSchema(Row message) { if (message.schema() == null) { throw new RuntimeException("Translator must be provided raw messages with an embedded schema"); } if (!hasValueField(message)) { throw new RuntimeException("Translator must be provided raw messages with a '" + Translator.VALUE_FIELD_NAME + "' field"); } }
Example 8
Source File: TestRowUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void testToRowValueMapRowNested( final @Mocked Row inputRow, final @Mocked StructType innerSchema, final @Mocked StructType outerSchema ) { DataType field = DataTypes.createMapType(DataTypes.StringType, DataTypes.createMapType(DataTypes.StringType, DataTypes.IntegerType, true) ); Map<Object, Object> expectedInnerMap = Maps.newHashMap(); expectedInnerMap.put("field1", 1); expectedInnerMap.put("field2", 2); Map<Object, Object> expectedOuterMap = Maps.newHashMap(); expectedOuterMap.put("outer", expectedInnerMap); new Expectations() {{ inputRow.schema(); returns(outerSchema, innerSchema); outerSchema.fieldNames(); result = new String[] {"outer"}; innerSchema.fieldNames(); result = new String[] {"field1", "field2"}; inputRow.get(0); returns(inputRow, 1); inputRow.get(1); result = 2; }}; assertEquals("Invalid list of values", expectedOuterMap, RowUtils.toRowValue(inputRow, field)); }
Example 9
Source File: ZooKeeperOutput.java From envelope with Apache License 2.0 | 4 votes |
@Override public void applyRandomMutations(List<Row> planned) throws Exception { if (planned.size() > 1000) { throw new RuntimeException( "ZooKeeper output does not support applying more than 1000 mutations at a time. " + "This is to prevent misuse of ZooKeeper as a regular data store. " + "Do not use ZooKeeper for storing anything more than small pieces of metadata."); } ZooKeeper zk; try { zk = connection.getZooKeeper(); } catch (Exception e) { throw new RuntimeException("Could not connect to ZooKeeper output", e); } for (Row plan : planned) { if (plan.schema() == null) { throw new RuntimeException("Mutation row provided to ZooKeeper output must contain a schema"); } MutationType mutationType = PlannerUtils.getMutationType(plan); plan = PlannerUtils.removeMutationTypeField(plan); Row key = RowUtils.subsetRow(plan, SchemaUtils.subsetSchema(plan.schema(), keyFieldNames)); String znode = znodesForFilter(zk, key).iterator().next(); // There can only be one znode per full key byte[] value = serializeRow(RowUtils.subsetRow(plan, SchemaUtils.subtractSchema(plan.schema(), keyFieldNames))); switch (mutationType) { case DELETE: zk.delete(znode, -1); break; case UPSERT: prepareZnode(zk, znode); zk.setData(znode, value, -1); break; default: throw new RuntimeException("ZooKeeper output does not support mutation type: " + PlannerUtils.getMutationType(plan)); } } }
Example 10
Source File: EventTimeUpsertPlanner.java From envelope with Apache License 2.0 | 4 votes |
@Override public List<Row> planMutationsForKey(Row key, List<Row> arrivingForKey, List<Row> existingForKey) { resetCurrentSystemTime(); if (key.schema() == null) { throw new RuntimeException("Key sent to event time upsert planner does not contain a schema"); } List<Row> planned = Lists.newArrayList(); if (arrivingForKey.size() > 1) { Collections.sort(arrivingForKey, Collections.reverseOrder(eventTimeModel)); } Row arriving = arrivingForKey.get(0); if (arriving.schema() == null) { throw new RuntimeException("Arriving row sent to event time upsert planner does not contain a schema"); } arriving = PlannerUtils.appendMutationTypeField(arriving); if (hasLastUpdatedField()) { arriving = lastUpdatedTimeModel.appendFields(arriving); } Row existing = null; if (!existingForKey.isEmpty()) { existing = existingForKey.get(0); if (arriving.schema() == null) { throw new RuntimeException("Existing row sent to event time upsert planner does not contain a schema"); } } if (existing == null) { if (hasLastUpdatedField()) { arriving = lastUpdatedTimeModel.setCurrentSystemTime(arriving); } if (hasSurrogateKeyField()) { arriving = PlannerUtils.appendSurrogateKey(arriving, getSurrogateKeyFieldName()); } planned.add(PlannerUtils.setMutationType(arriving, MutationType.INSERT)); } else if (PlannerUtils.before(eventTimeModel, arriving, existing)) { // We do nothing because the arriving record is older than the existing record } else if ((PlannerUtils.simultaneous(eventTimeModel, arriving, existing) || PlannerUtils.after(eventTimeModel, arriving, existing)) && RowUtils.different(arriving, existing, valueFieldNames)) { if (hasLastUpdatedField()) { arriving = lastUpdatedTimeModel.setCurrentSystemTime(arriving); } planned.add(PlannerUtils.setMutationType(arriving, MutationType.UPDATE)); } return planned; }
Example 11
Source File: TranslateFunction.java From envelope with Apache License 2.0 | 4 votes |
private void validateTranslatedSchema(Row translationResult) { if (translationResult.schema() == null) { throw new RuntimeException("Translator must translate to rows with an embedded schema"); } }