Java Code Examples for org.apache.beam.sdk.values.KV#of()
The following examples show how to use
org.apache.beam.sdk.values.KV#of() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CSVStreamingPipelineTest.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
@Test public void testCSVStreamingInitialRestriction() { CSVContentProcessorDoFn csv = new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(2)); String[] lines1 = {"line1", "line2", "line3", "line4"}; String[] lines2 = {"line1", "line2", "line3", "line4", "line5", "line6"}; KV<String, List<String>> input1 = KV.of("FileName", Arrays.asList(lines1)); KV<String, List<String>> input2 = KV.of("FileName", Arrays.asList(lines2)); OffsetRange rangeResult1 = csv.getInitialRestriction(input1); assertEquals(rangeResult1.getFrom(), 1); assertEquals(rangeResult1.getTo(), 3); OffsetRange rangeResult2 = csv.getInitialRestriction(input2); assertEquals(rangeResult2.getFrom(), 1); assertEquals(rangeResult2.getTo(), 4); }
Example 2
Source File: MultinomialLogisticRegression.java From incubator-nemo with Apache License 2.0 | 6 votes |
/** * Method for parsing lines of inputs. * * @param input input line. * @return the parsed key-value pair. */ private KV<Integer, Pair<ArrayList<Integer>, ArrayList<Double>>> parseLine(final String input) { final String text = input.trim(); if (text.startsWith("#") || text.length() == 0) { // comments or newline return null; } final String[] split = text.split("\\s+|:"); final Integer output = Integer.parseInt(split[0]); final ArrayList<Integer> indices = new ArrayList<>(split.length / 2); final ArrayList<Double> data = new ArrayList<>(split.length / 2); for (Integer index = 0; index < split.length / 2; ++index) { indices.add(index, Integer.parseInt(split[2 * index + 1]) - 1); data.add(index, Double.parseDouble(split[2 * index + 2])); } return KV.of(output, Pair.of(indices, data)); }
Example 3
Source File: DataGeneratorReader.java From scotty-window-processor with Apache License 2.0 | 6 votes |
@Override public boolean advance() throws IOException { //Generate with limit if (throughputLimit != 0) { now = System.currentTimeMillis(); if (this.counter < this.throughputLimit && now < this.lastTime + 1000) { this.counter++; this.current = KV.of(key, random.nextInt()); this. currentTimestamp = this.source.timestampFn.apply(current.getValue()); return true; } else { if (now > this.lastTime + 1000) { lastTime = now; counter = 0; } return false; } } else { this.counter++; this.current = KV.of(key, random.nextInt()); this.currentTimestamp = source.timestampFn.apply(current.getValue()); return true; } }
Example 4
Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@Override public KV<String, TableRow> getDestination(ValueInSingleWindow<KV<String, TableRow>> element) { String key = element.getValue().getKey(); String tableName = String.format("%s:%s.%s", projectId.get(), datasetName.get(), key); // Strip the file name to only the letters and numbers so that it is a valid BQ table id. tableName = tableName.replaceAll("[^a-zA-Z0-9]", ""); LOG.debug("Table Name {}", tableName); return KV.of(tableName, element.getValue().getValue()); }
Example 5
Source File: BigQueryMappers.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public KV<TableId, TableRow> getOutputObject(TableRow input) { TableId tableId = getTableId(input); TableRow tableRow = getTableRow(input); return KV.of(tableId, tableRow); }
Example 6
Source File: CSVStreamingPipelineTest.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@Test public void testSplitRestriction() { CSVContentProcessorDoFn csv = new CSVContentProcessorDoFn(ValueProvider.StaticValueProvider.of(2)); OffsetRange off = new OffsetRange(2, 5); DoFn.OutputReceiver out = mock(DoFn.OutputReceiver.class); String[] lines2 = {"line1", "line2", "line3", "line4", "line5", "line6"}; KV<String, List<String>> input1 = KV.of("FileName", Arrays.asList(lines2)); csv.splitRestriction(input1, off, out); verify(out, times(3)).output(any(OffsetRange.class)); }
Example 7
Source File: KettleKeyValueFn.java From kettle-beam with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement( ProcessContext processContext ) { try { // Get an input row // KettleRow inputKettleRow = processContext.element(); readCounter.inc(); Object[] inputRow = inputKettleRow.getRow(); // Copy over the data... // Object[] keyRow = RowDataUtil.allocateRowData( keyIndexes.length ); for ( int i = 0; i< keyIndexes.length; i++) { keyRow[i] = inputRow[ keyIndexes[i]]; } // Copy over the values... // Object[] valueRow = RowDataUtil.allocateRowData( valueIndexes.length ); for ( int i = 0; i< valueIndexes.length; i++) { valueRow[i] = inputRow[ valueIndexes[i]]; } KV<KettleRow, KettleRow> keyValue = KV.of( new KettleRow(keyRow), new KettleRow( valueRow ) ); processContext.output( keyValue ); } catch(Exception e) { errorCounter.inc(); LOG.error("Error splitting row into key and value", e); throw new RuntimeException( "Unable to split row into key and value", e ); } }
Example 8
Source File: DLPTextToBigQueryStreaming.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Override public KV<String, TableRow> getDestination(ValueInSingleWindow<KV<String, TableRow>> element) { String key = element.getValue().getKey(); String tableName = String.format("%s:%s.%s", projectId.get(), datasetName.get(), key); LOG.debug("Table Name {}", tableName); return KV.of(tableName, element.getValue().getValue()); }
Example 9
Source File: HopKeyValueFn.java From hop with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement( ProcessContext processContext ) { try { // Get an input row // HopRow inputHopRow = processContext.element(); readCounter.inc(); Object[] inputRow = inputHopRow.getRow(); // Copy over the data... // Object[] keyRow = RowDataUtil.allocateRowData( keyIndexes.length ); for ( int i = 0; i< keyIndexes.length; i++) { keyRow[i] = inputRow[ keyIndexes[i]]; } // Copy over the values... // Object[] valueRow = RowDataUtil.allocateRowData( valueIndexes.length ); for ( int i = 0; i< valueIndexes.length; i++) { valueRow[i] = inputRow[ valueIndexes[i]]; } KV<HopRow, HopRow> keyValue = KV.of( new HopRow(keyRow), new HopRow( valueRow ) ); processContext.output( keyValue ); } catch(Exception e) { errorCounter.inc(); LOG.error("Error splitting row into key and value", e); throw new RuntimeException( "Unable to split row into key and value", e ); } }
Example 10
Source File: DataGeneratorReader.java From scotty-window-processor with Apache License 2.0 | 5 votes |
public DataGeneratorReader(int throughputLimit, DataGeneratorSource source, Checkpoint mark) { this.source = source; this.throughputLimit = throughputLimit; if (mark == null) { // Because we have not emitted an element yet, and start() calls advance, we need to // "un-advance" so that start() produces the correct output. this.current = KV.of(key, random.nextInt()); } else { this.current = KV.of(mark.getLastEmittedKey(), mark.getLastEmittedValue()); this.firstStarted = mark.getStartTime(); } }
Example 11
Source File: Max.java From scotty-window-processor with Apache License 2.0 | 4 votes |
@Override public KV<Integer, Integer> combine(KV<Integer, Integer> partialAggregate1, KV<Integer, Integer> partialAggregate2) { return KV.of(partialAggregate1.getKey(), Math.max(partialAggregate1.getValue(), partialAggregate2.getValue())); }
Example 12
Source File: TestUtils.java From DataflowTemplates with Apache License 2.0 | 4 votes |
static KV<ByteString, Iterable<Mutation>> createBigtableRowMutations(String key) { List<Mutation> mutations = new ArrayList<>(); return KV.of(toByteString(key), mutations); }
Example 13
Source File: DynamicJdbcIOTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@Override public KV<Integer, String> mapRow(ResultSet resultSet) throws Exception { return KV.of(resultSet.getInt("id"), resultSet.getString("name")); }
Example 14
Source File: Count.java From scotty-window-processor with Apache License 2.0 | 4 votes |
@Override public KV<Integer, Integer> invert(KV<Integer, Integer> currentAggregate, KV<Integer, Integer> toRemove) { return KV.of(currentAggregate.getKey(), currentAggregate.getValue() - toRemove.getValue()); }
Example 15
Source File: Sum.java From scotty-window-processor with Apache License 2.0 | 4 votes |
@Override public KV<Integer, Integer> invert( KV<Integer, Integer> currentAggregate, KV<Integer, Integer> toRemove) { return KV.of(currentAggregate.getKey(),currentAggregate.getValue()-toRemove.getValue()); }
Example 16
Source File: Mean.java From scotty-window-processor with Apache License 2.0 | 4 votes |
@Override public KV<Integer, Pair> combine(KV<Integer, Pair> partialAggregate1, KV<Integer, Pair> partialAggregate2) { return KV.of(partialAggregate1.getKey(), new Pair(partialAggregate1.getValue().sum + partialAggregate2.getValue().sum, partialAggregate1.getValue().count + partialAggregate2.getValue().count)); }
Example 17
Source File: KafkaToBigQueryTest.java From DataflowTemplates with Apache License 2.0 | 4 votes |
/** Tests the {@link KafkaToBigQuery} pipeline end-to-end. */ @Test public void testKafkaToBigQueryE2E() throws Exception { // Test input final String key = "{\"id\": \"1001\"}"; final String badKey = "{\"id\": \"1002\"}"; final String payload = "{\"ticker\": \"GOOGL\", \"price\": 1006.94}"; final String badPayload = "{\"tickets\": \"AMZ\", \"proctor\": 007"; final KV<String, String> message = KV.of(key, payload); final KV<String, String> badMessage = KV.of(badKey, badPayload); final Instant timestamp = new DateTime(2022, 2, 22, 22, 22, 22, 222, DateTimeZone.UTC).toInstant(); final FailsafeElementCoder<KV<String, String>, String> coder = FailsafeElementCoder.of( KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()), StringUtf8Coder.of()); CoderRegistry coderRegistry = pipeline.getCoderRegistry(); coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder); KafkaToBigQuery.Options options = PipelineOptionsFactory.create().as(KafkaToBigQuery.Options.class); options.setJavascriptTextTransformGcsPath(TRANSFORM_FILE_PATH); options.setJavascriptTextTransformFunctionName("transform"); // Build pipeline PCollectionTuple transformOut = pipeline .apply( "CreateInput", Create.of(message) .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply("ConvertMessageToTableRow", new MessageToTableRow(options)); // Assert PAssert.that(transformOut.get(KafkaToBigQuery.UDF_DEADLETTER_OUT)).empty(); PAssert.that(transformOut.get(KafkaToBigQuery.TRANSFORM_DEADLETTER_OUT)).empty(); PAssert.that(transformOut.get(KafkaToBigQuery.TRANSFORM_OUT)) .satisfies( collection -> { TableRow result = collection.iterator().next(); assertThat(result.get("ticker"), is(equalTo("GOOGL"))); assertThat(result.get("price"), is(equalTo(1006.94))); return null; }); // Execute pipeline pipeline.run(); // Build pipeline with malformed payload PCollectionTuple badTransformOut = pipeline .apply( "CreateBadInput", Create.of(badMessage) .withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of()))) .apply("ConvertMessageToTableRow", new MessageToTableRow(options)); // Assert PAssert.that(badTransformOut.get(KafkaToBigQuery.UDF_DEADLETTER_OUT)) .satisfies( collection -> { FailsafeElement badResult = collection.iterator().next(); assertThat(badResult.getOriginalPayload(), is(equalTo(badMessage))); assertThat(badResult.getPayload(), is(equalTo(badPayload))); return null; }); PAssert.that(badTransformOut.get(KafkaToBigQuery.TRANSFORM_DEADLETTER_OUT)).empty(); PAssert.that(badTransformOut.get(KafkaToBigQuery.TRANSFORM_OUT)).empty(); // Execute pipeline pipeline.run(); }
Example 18
Source File: Sum.java From scotty-window-processor with Apache License 2.0 | 4 votes |
@Override public KV<Integer, Integer> combine( KV<Integer, Integer> partialAggregate1, KV<Integer, Integer> partialAggregate2) { return KV.of(partialAggregate1.getKey(), partialAggregate1.getValue()+ partialAggregate2.getValue()); }
Example 19
Source File: Quantile.java From scotty-window-processor with Apache License 2.0 | 4 votes |
@Override public KV<Integer, QuantileTreeMap> lift(KV<Integer, Integer> inputTuple) { return KV.of(inputTuple.getKey(),new QuantileTreeMap(Math.toIntExact(inputTuple.getValue()),quantile)); }
Example 20
Source File: Quantile.java From scotty-window-processor with Apache License 2.0 | 4 votes |
@Override public KV<Integer, QuantileTreeMap> clone(KV<Integer, QuantileTreeMap> partialAggregate) { return KV.of(partialAggregate.getKey(),partialAggregate.getValue().clone()); }