Java Code Examples for org.apache.beam.sdk.values.KV#getValue()
The following examples show how to use
org.apache.beam.sdk.values.KV#getValue() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KVStringStringToKettleRowFn.java From kettle-beam with Apache License 2.0 | 7 votes |
@ProcessElement public void processElement( ProcessContext processContext ) { try { KV<String,String> kv = processContext.element(); inputCounter.inc(); Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() ); outputRow[ 0 ] = kv.getKey(); // String outputRow[ 1 ] = kv.getValue(); // String processContext.output( new KettleRow( outputRow ) ); writtenCounter.inc(); } catch ( Exception e ) { numErrors.inc(); LOG.error( "Error in KV<Long,String> to Kettle Row conversion function", e ); throw new RuntimeException( "Error in KV<Long,String> to Kettle Row conversion function", e ); } }
Example 2
Source File: KVLongStringToHopRowFn.java From hop with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement( ProcessContext processContext ) { try { KV<Long,String> kv = processContext.element(); inputCounter.inc(); Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() ); outputRow[ 0 ] = kv.getKey(); outputRow[ 1 ] = kv.getValue(); processContext.output( new HopRow( outputRow ) ); writtenCounter.inc(); } catch ( Exception e ) { numErrors.inc(); LOG.error( "Error in KV<Long,String> to Hop Row conversion function", e ); throw new RuntimeException( "Error in KV<Long,String> to Hop Row conversion function", e ); } }
Example 3
Source File: DLPTextToBigQueryStreaming.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public TableSchema getSchema(KV<String, TableRow> destination) { TableRow bqRow = destination.getValue(); TableSchema schema = new TableSchema(); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); List<TableCell> cells = bqRow.getF(); for (int i = 0; i < cells.size(); i++) { Map<String, Object> object = cells.get(i); String header = object.keySet().iterator().next(); /** currently all BQ data types are set to String */ fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING")); } schema.setFields(fields); return schema; }
Example 4
Source File: SplunkEventWriter.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement( @Element KV<Integer, SplunkEvent> input, OutputReceiver<SplunkWriteError> receiver, BoundedWindow window, @StateId(BUFFER_STATE_NAME) BagState<SplunkEvent> bufferState, @StateId(COUNT_STATE_NAME) ValueState<Long> countState, @TimerId(TIME_ID_NAME) Timer timer) throws IOException { Long count = MoreObjects.<Long>firstNonNull(countState.read(), 0L); SplunkEvent event = input.getValue(); INPUT_COUNTER.inc(); bufferState.add(event); count += 1; countState.write(count); timer.offset(Duration.standardSeconds(DEFAULT_FLUSH_DELAY)).setRelative(); if (count >= batchCount) { LOG.info("Flushing batch of {} events", count); flush(receiver, bufferState, countState); } }
Example 5
Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
@Override public TableSchema getSchema(KV<String, TableRow> destination) { TableRow bqRow = destination.getValue(); TableSchema schema = new TableSchema(); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); List<TableCell> cells = bqRow.getF(); for (int i = 0; i < cells.size(); i++) { Map<String, Object> object = cells.get(i); String header = object.keySet().iterator().next(); /** currently all BQ data types are set to String */ fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING")); } schema.setFields(fields); return schema; }
Example 6
Source File: BigQueryDynamicConverters.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public TableSchema getSchema(KV<TableId, TableRow> destination) { TableRow bqRow = destination.getValue(); TableSchema schema = new TableSchema(); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); List<TableCell> cells = bqRow.getF(); for (int i = 0; i < cells.size(); i++) { Map<String, Object> object = cells.get(i); String header = object.keySet().iterator().next(); /** currently all BQ data types are set to String */ // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING")); fields.add(new TableFieldSchema().setName(header).setType("STRING")); } schema.setFields(fields); return schema; }
Example 7
Source File: KVLongStringToKettleRowFn.java From kettle-beam with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement( ProcessContext processContext ) { try { KV<Long,String> kv = processContext.element(); inputCounter.inc(); Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() ); outputRow[ 0 ] = kv.getKey(); outputRow[ 1 ] = kv.getValue(); processContext.output( new KettleRow( outputRow ) ); writtenCounter.inc(); } catch ( Exception e ) { numErrors.inc(); LOG.error( "Error in KV<Long,String> to Kettle Row conversion function", e ); throw new RuntimeException( "Error in KV<Long,String> to Kettle Row conversion function", e ); } }
Example 8
Source File: BigQueryDynamicConverters.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@Override public TableSchema getSchema(KV<TableId, TableRow> destination) { TableRow bqRow = destination.getValue(); TableSchema schema = new TableSchema(); List<TableFieldSchema> fields = new ArrayList<TableFieldSchema>(); List<TableCell> cells = bqRow.getF(); for (int i = 0; i < cells.size(); i++) { Map<String, Object> object = cells.get(i); String header = object.keySet().iterator().next(); /** currently all BQ data types are set to String */ // Why do we use checkHeaderName here and not elsewhere, TODO if we add this back in // fields.add(new TableFieldSchema().setName(checkHeaderName(header)).setType("STRING")); fields.add(new TableFieldSchema().setName(header).setType("STRING")); } schema.setFields(fields); return schema; }
Example 9
Source File: KVStringStringToHopRowFn.java From hop with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement( ProcessContext processContext ) { try { KV<String,String> kv = processContext.element(); inputCounter.inc(); Object[] outputRow = RowDataUtil.allocateRowData( rowMeta.size() ); outputRow[ 0 ] = kv.getKey(); // String outputRow[ 1 ] = kv.getValue(); // String processContext.output( new HopRow( outputRow ) ); writtenCounter.inc(); } catch ( Exception e ) { numErrors.inc(); LOG.error( "Error in KV<Long,String> to Hop Row conversion function", e ); throw new RuntimeException( "Error in KV<Long,String> to Hop Row conversion function", e ); } }
Example 10
Source File: GroupByKeyAndWindowDoFnTransformTest.java From incubator-nemo with Apache License 2.0 | 5 votes |
private void checkOutput(final KV<String, List<String>> expected, final KV<String, Iterable<String>> result) { // check key assertEquals(expected.getKey(), result.getKey()); // check value final List<String> resultValue = new ArrayList<>(); final List<String> expectedValue = new ArrayList<>(expected.getValue()); result.getValue().iterator().forEachRemaining(resultValue::add); Collections.sort(resultValue); Collections.sort(expectedValue); assertEquals(expectedValue, resultValue); }
Example 11
Source File: PubsubMessageToTableRow.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
/** * Given a KV containing a destination and a message, return the message content as a {@link * TableRow} ready to pass to {@link org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO}. */ public TableRow kvToTableRow(KV<TableDestination, PubsubMessage> kv) { if (format == null) { format = createFormat(); } final TableReference ref = kv.getKey().getTableReference(); final TableId tableId = TableId.of(ref.getProjectId(), ref.getDatasetId(), ref.getTableId()); final PubsubMessage message = kv.getValue(); return Json.asTableRow(format.apply(tableId, message.getAttributeMap(), message.getPayload())); }
Example 12
Source File: TextRowToMutation.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext c) throws IOException { /** * Input string is one line but Apache CSVParser process multiple lines, so we only take the * first item in the result list */ KV<String, String> kv = c.element(); String tableName = kv.getKey(); Ddl ddl = c.sideInput(ddlView); Map<String, List<TableManifest.Column>> tableColumnsMap = c.sideInput(tableColumnsView); Table table = ddl.table(tableName); Reader in = new StringReader(kv.getValue()); CSVFormat csvFormat = CSVFormat.newFormat(columnDelimiter.get()) .withQuote(fieldQualifier.get()) .withIgnoreEmptyLines(true) .withTrailingDelimiter(trailingDelimiter.get()) .withEscape(escape.get()) .withNullString(nullString.get()); CSVParser parser = new CSVParser(in, csvFormat); List<CSVRecord> list = parser.getRecords(); if (list.isEmpty()) { return; } if (list.size() > 1) { throw new RuntimeException("Unable to parse this row: " + c.element()); } CSVRecord row = list.get(0); writeBuilder = Mutation.newInsertOrUpdateBuilder(table.name()); try { c.output(parseRow(writeBuilder, row, table, tableColumnsMap.get(tableName))); } catch (IllegalArgumentException e) { throw new RuntimeException( String.format("Error to parseRow. row: %s, table: %s", row, table), e); } }
Example 13
Source File: CSVContentProcessorDoFn.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext c, OffsetRangeTracker tracker) { for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) { String fileName = c.element().getKey(); String key = String.format("%s_%d", fileName, i); List<String> rows = c.element().getValue().stream().skip(1).collect(Collectors.toList()); List<FieldId> headers = Arrays.stream(c.element().getValue().get(0).split(",")) .map(header -> FieldId.newBuilder().setName(header).build()) .collect(Collectors.toList()); KV<Integer, Integer> lineRange = createStartEnd(rows.size(), i); int startOfLine = lineRange.getKey(); int endOfLine = lineRange.getValue(); List<String> lines = new ArrayList<>(); for (int index = startOfLine - 1; index < endOfLine; index++) { lines.add(rows.get(index)); } Table batchData = Util.createDLPTable(headers, lines); if (batchData.getRowsCount() > 0) { LOG.info( "Current Restriction From: {}, Current Restriction To: {}, StartofLine: {}, End Of Line {}, BatchData {}", tracker.currentRestriction().getFrom(), tracker.currentRestriction().getTo(), startOfLine, endOfLine, batchData.getRowsCount()); c.output(KV.of(key, batchData)); lines.clear(); } } }
Example 14
Source File: BeamWordCount.java From incubator-nemo with Apache License 2.0 | 4 votes |
@Override public String apply(final KV<String, Long> input) { return input.getKey() + ": " + input.getValue(); }
Example 15
Source File: WordCount.java From deployment-examples with MIT License | 4 votes |
@Override public String apply(KV<String, Long> input) { return input.getKey() + ": " + input.getValue(); }
Example 16
Source File: BigQueryMappers.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Override public TableRow getTableRow(KV<TableId, TableRow> input) { return input.getValue(); }
Example 17
Source File: ErrorConverters.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Override public PubsubMessage apply(KV<T, Map<String, String>> kv) { return new PubsubMessage(encode(payloadCoder(), kv.getKey()), kv.getValue()); }
Example 18
Source File: BigQueryMappers.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Override public TableRow getTableRow(KV<TableId, TableRow> input) { return input.getValue(); }
Example 19
Source File: AssemblerFn.java From hop with Apache License 2.0 | 4 votes |
@ProcessElement public void processElement( ProcessContext processContext ) { try { KV<HopRow, KV<HopRow, HopRow>> element = processContext.element(); KV<HopRow, HopRow> value = element.getValue(); HopRow key = element.getKey(); HopRow leftValue = value.getKey(); HopRow rightValue = value.getValue(); Object[] outputRow = RowDataUtil.allocateRowData( outputRowMeta.size() ); int index = 0; // Hop style, first the left values // if (leftValue.allNull()) { index+=leftVRowMeta.size(); } else { for ( int i = 0; i < leftVRowMeta.size(); i++ ) { outputRow[ index++ ] = leftValue.getRow()[ i ]; } } // Now the left key // if (leftValue.allNull()) { index+=leftKRowMeta.size(); } else { for ( int i = 0; i < leftKRowMeta.size(); i++ ) { outputRow[ index++ ] = key.getRow()[ i ]; } } // Then the right key // if (rightValue.allNull()) { // No right key given if the value is null // index+=leftKRowMeta.size(); } else { for ( int i = 0; i < leftKRowMeta.size(); i++ ) { outputRow[ index++ ] = key.getRow()[ i ]; } } // Finally the right values // if (rightValue.allNull()) { index+=rightVRowMeta.size(); } else { for ( int i = 0; i < rightVRowMeta.size(); i++ ) { outputRow[ index++ ] = rightValue.getRow()[ i ]; } } // System.out.println("Assembled row : "+outputRowMeta.getString(outputRow)); processContext.output( new HopRow( outputRow ) ); writtenCounter.inc(); } catch(Exception e) { errorCounter.inc(); LOG.error( "Error assembling rows", e); throw new RuntimeException( "Error assembling output KV<row, KV<row, row>>", e ); } }
Example 20
Source File: WordCount.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Override public String apply(KV<String, Long> input) { return input.getKey() + ": " + input.getValue(); }