Java Code Examples for org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage#getPayload()
The following examples show how to use
org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage#getPayload() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DecompressPayload.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Override public PubsubMessage apply(PubsubMessage message) { if (enabled.isAccessible() && !enabled.get()) { // Compression has been explicitly turned off in options, so return unchanged message. return message; } else { try { ByteArrayInputStream payloadStream = new ByteArrayInputStream(message.getPayload()); GZIPInputStream gzipStream = new GZIPInputStream(payloadStream); ByteArrayOutputStream decompressedStream = new ByteArrayOutputStream(); // Throws IOException IOUtils.copy(gzipStream, decompressedStream); compressedInput.inc(); return new PubsubMessage(decompressedStream.toByteArray(), message.getAttributeMap()); } catch (IOException ignore) { // payload wasn't valid gzip, assume it wasn't compressed uncompressedInput.inc(); return message; } } }
Example 2
Source File: NormalizeAttributes.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Override public PubsubMessage apply(PubsubMessage message) { message = PubsubConstraints.ensureNonNull(message); Map<String, String> attributes = new HashMap<>(message.getAttributeMap()); Optional.ofNullable(attributes.get(Attribute.APP_UPDATE_CHANNEL)) .map(NormalizeAttributes::normalizeChannel) .ifPresent(v -> attributes.put(Attribute.NORMALIZED_CHANNEL, v)); Optional.ofNullable(attributes.get(Attribute.OS)) // .map(NormalizeAttributes::normalizeOs) // .ifPresent(v -> attributes.put(Attribute.NORMALIZED_OS, v)); Optional.ofNullable(attributes.get(Attribute.OS_VERSION)) .map(NormalizeAttributes::normalizeOsVersion) .ifPresent(v -> attributes.put(Attribute.NORMALIZED_OS_VERSION, v)); Optional.ofNullable(attributes.get(Attribute.APP_NAME)) .map(NormalizeAttributes::normalizeAppName) .ifPresent(v -> attributes.put(Attribute.NORMALIZED_APP_NAME, v)); Optional.ofNullable(attributes.get(Attribute.GEO_COUNTRY)) .map(NormalizeAttributes::normalizeCountryCode) .ifPresent(v -> attributes.put(Attribute.NORMALIZED_COUNTRY_CODE, v)); return new PubsubMessage(message.getPayload(), attributes); }
Example 3
Source File: ErrorConverters.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement(ProcessContext context) { FailsafeElement<PubsubMessage, String> failsafeElement = context.element(); final PubsubMessage message = failsafeElement.getOriginalPayload(); // Format the timestamp for insertion String timestamp = TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC)); // Build the table row final TableRow failedRow = new TableRow() .set("timestamp", timestamp) .set("attributes", attributeMapToTableRows(message.getAttributeMap())) .set("errorMessage", failsafeElement.getErrorMessage()) .set("stacktrace", failsafeElement.getStacktrace()); // Only set the payload if it's populated on the message. if (message.getPayload() != null) { failedRow .set("payloadString", new String(message.getPayload())) .set("payloadBytes", message.getPayload()); } context.output(failedRow); }
Example 4
Source File: ErrorConverters.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement(ProcessContext context) { FailsafeElement<PubsubMessage, String> failsafeElement = context.element(); PubsubMessage pubsubMessage = failsafeElement.getOriginalPayload(); String message = pubsubMessage.getPayload().length > 0 ? new String(pubsubMessage.getPayload()) : pubsubMessage.getAttributeMap().toString(); // Format the timestamp for insertion String timestamp = TIMESTAMP_FORMATTER.print(context.timestamp().toDateTime(DateTimeZone.UTC)); // Build the table row TableRow failedRow = new TableRow() .set("timestamp", timestamp) .set("errorMessage", failsafeElement.getErrorMessage()) .set("stacktrace", failsafeElement.getStacktrace()) .set("payloadString", message) .set("payloadBytes", message.getBytes(StandardCharsets.UTF_8)); context.output(failedRow); }
Example 5
Source File: RemoveAttributes.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Override public PubsubMessage apply(PubsubMessage message) { message = PubsubConstraints.ensureNonNull(message); final Map<String, String> attributes = message.getAttributeMap(); Map<String, String> strippedAttributes = new HashMap<>(); // Use geo_country to match IP privacy v1 dataset attributes.put(Attribute.GEO_COUNTRY, message.getAttribute(Attribute.NORMALIZED_COUNTRY_CODE)); ATTRIBUTES_TO_KEEP.forEach(name -> Optional.ofNullable(attributes.get(name)) .ifPresent(value -> strippedAttributes.put(name, value))); return new PubsubMessage(message.getPayload(), strippedAttributes); }
Example 6
Source File: ParseIp.java From gcp-ingestion with Mozilla Public License 2.0 | 6 votes |
@Override public PubsubMessage apply(PubsubMessage message) { Map<String, String> attributes = new HashMap<>(message.getAttributeMap()); String ip; String xff = attributes.get(Attribute.X_FORWARDED_FOR); if (xff != null) { // Google's load balancer will append the immediate sending client IP and a global // forwarding rule IP to any existing content in X-Forwarded-For as documented in: // https://cloud.google.com/load-balancing/docs/https/#components // // In practice, many of the "first" addresses are bogus or internal, // so we target the immediate sending client IP. String[] ips = xff.split("\\s*,\\s*"); ip = ips[Math.max(ips.length - 2, 0)]; } else { ip = attributes.getOrDefault(Attribute.REMOTE_ADDR, ""); } attributes.put(Attribute.CLIENT_IP, ip); return new PubsubMessage(message.getPayload(), attributes); }
Example 7
Source File: PubsubMessageToRow.java From beam with Apache License 2.0 | 5 votes |
private Row parsePayload(PubsubMessage pubsubMessage) { String payloadJson = new String(pubsubMessage.getPayload(), StandardCharsets.UTF_8); // Retrieve nested payload schema. Schema payloadSchema = messageSchema.getField(PAYLOAD_FIELD).getType().getRowSchema(); if (objectMapper == null) { objectMapper = newObjectMapperWith(RowJsonDeserializer.forSchema(payloadSchema)); } return RowJsonUtils.jsonToRow(objectMapper, payloadJson); }
Example 8
Source File: PubsubMessageToRow.java From beam with Apache License 2.0 | 5 votes |
private Row parsePayload(PubsubMessage pubsubMessage) { String payloadJson = new String(pubsubMessage.getPayload(), StandardCharsets.UTF_8); if (objectMapper == null) { objectMapper = newObjectMapperWith(RowJsonDeserializer.forSchema(payloadSchema)); } return RowJsonUtils.jsonToRow(objectMapper, payloadJson); }
Example 9
Source File: ErrorConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void transformConvertsBigQueryInsertErrorToPubsubMessageWithTruncatedMessage() throws IOException { GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord(); String errorMessage = Strings.repeat("a", 1000); BigQueryInsertError bigQueryInsertError = getBigQueryInsertError(expectedRecord, errorMessage); ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter = getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema())); PCollection<PubsubMessage> output = pipeline .apply(Create.of(bigQueryInsertError) .withCoder(BigQueryInsertErrorCoder.of())) .apply(converter); // Expecting a truncated message with a truncation indicator suffix. String expectedErrorMessage = Ascii.truncate( bigQueryInsertError.getError().toString(), /* maxLength= */ 512, /* truncationIndicator= */ "..."); PubsubMessage expectedMessage = getPubsubMessage(expectedRecord, expectedErrorMessage); byte[] expectedPayload = expectedMessage.getPayload(); Map<String, String> expectedAttributes = expectedMessage.getAttributeMap(); PAssert.thatSingleton(output) .satisfies(input -> { assertThat(input.getPayload()).isEqualTo(expectedPayload); assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes); return null; }); pipeline.run(); }
Example 10
Source File: ErrorConvertersTest.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@Test @Category(NeedsRunner.class) public void transformConvertsBigQueryInsertErrorToPubsubMessage() throws IOException { GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord(); String errorMessage = "small-test-message"; BigQueryInsertError bigQueryInsertError = getBigQueryInsertError(expectedRecord, errorMessage); ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter = getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema())); PCollection<PubsubMessage> output = pipeline .apply(Create.of(bigQueryInsertError) .withCoder(BigQueryInsertErrorCoder.of())) .apply(converter); PubsubMessage expectedMessage = getPubsubMessage(expectedRecord, bigQueryInsertError.getError().toString()); byte[] expectedPayload = expectedMessage.getPayload(); Map<String, String> expectedAttributes = expectedMessage.getAttributeMap(); PAssert.thatSingleton(output) .satisfies(input -> { assertThat(input.getPayload()).isEqualTo(expectedPayload); assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes); return null; }); pipeline.run(); }
Example 11
Source File: PubSubToMongoDB.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext context) { PubsubMessage pubsubMessage = context.element().getOriginalPayload(); JsonObject messageObject = new JsonObject(); try { if (pubsubMessage.getPayload().length > 0) { messageObject = gson.fromJson(new String(pubsubMessage.getPayload()), JsonObject.class); } // If message attributes are present they will be serialized along with the message payload if (pubsubMessage.getAttributeMap() != null) { pubsubMessage.getAttributeMap().forEach(messageObject::addProperty); } context.output( FailsafeElement.of(pubsubMessage, messageObject.toString())); successCounter.inc(); } catch (JsonSyntaxException e) { context.output( TRANSFORM_DEADLETTER_OUT, FailsafeElement.of(context.element()) .setErrorMessage(e.getMessage()) .setStacktrace(Throwables.getStackTraceAsString(e))); failedCounter.inc(); } }
Example 12
Source File: PubSubToElasticsearch.java From DataflowTemplates with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext context) { PubsubMessage pubsubMessage = context.element().getOriginalPayload(); JsonObject messageObject = new JsonObject(); try { if (pubsubMessage.getPayload().length > 0) { messageObject = gson.fromJson(new String(pubsubMessage.getPayload()), JsonObject.class); } // If message attributes are present they will be serialized along with the message payload if (pubsubMessage.getAttributeMap() != null) { pubsubMessage.getAttributeMap().forEach(messageObject::addProperty); } context.output( FailsafeElement.of(pubsubMessage, messageObject.toString())); successCounter.inc(); } catch (JsonSyntaxException e) { context.output( TRANSFORM_DEADLETTER_OUT, FailsafeElement.of(context.element()) .setErrorMessage(e.getMessage()) .setStacktrace(Throwables.getStackTraceAsString(e))); failedCounter.inc(); } }
Example 13
Source File: Json.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
/** * Read a {@link PubsubMessage} from a string. * * @exception IOException if {@code data} does not contain a valid {@link PubsubMessage}. */ public static PubsubMessage readPubsubMessage(String data) throws IOException { PubsubMessage output = MAPPER.readValue(data, PubsubMessage.class); if (output == null) { throw new IOException("not a valid PubsubMessage: null"); } else if (output.getPayload() == null) { throw new IOException("not a valid PubsubMessage.payload: null"); } return output; }
Example 14
Source File: BinaryRecordFormatter.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
@Override public GenericRecord formatRecord(PubsubMessage element, Schema schema) { InputStream in = new ByteArrayInputStream(element.getPayload()); DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); Decoder decoder = DecoderFactory.get().binaryDecoder(in, null); try { return reader.read(null, decoder); } catch (IOException e) { throw new UncheckedIOException(e); } }
Example 15
Source File: FailureMessage.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
/** * Return a modified PubsubMessage with additional attributes describing the error. */ public static PubsubMessage of(Object caller, PubsubMessage message, Throwable e) { final Map<String, String> attributes = new HashMap<>(); if (message.getAttributeMap() != null) { attributes.putAll(message.getAttributeMap()); } attributes.putAll(errorAttributes(caller, e)); return new PubsubMessage(message.getPayload(), attributes); }
Example 16
Source File: PubsubConstraints.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
/** Fills out empty payload and attributes if the message itself or components are null. */ public static PubsubMessage ensureNonNull(PubsubMessage message) { if (message == null) { return new PubsubMessage(new byte[] {}, new HashMap<>()); } else if (message.getPayload() == null) { return ensureNonNull(new PubsubMessage(new byte[] {}, message.getAttributeMap())); } else if (message.getAttributeMap() == null) { return ensureNonNull(new PubsubMessage(message.getPayload(), new HashMap<>())); } else { return message; } }
Example 17
Source File: GeoIspLookup.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
@Override public PubsubMessage apply(PubsubMessage message) { message = PubsubConstraints.ensureNonNull(message); try { if (ispReader == null) { loadResourcesOnFirstMessage(); } if (message.getAttributeMap().containsKey(Attribute.ISP_NAME)) { // Return early since ISP lookup has already been performed. countIspAlreadyApplied.inc(); return message; } // copy attributes Map<String, String> attributes = new HashMap<String, String>(message.getAttributeMap()); // Determine client ip String ip; String xff = attributes.get(Attribute.X_FORWARDED_FOR); if (xff != null) { // Google's load balancer will append the immediate sending client IP and a global // forwarding rule IP to any existing content in X-Forwarded-For as documented in: // https://cloud.google.com/load-balancing/docs/https/#components // // In practice, many of the "first" addresses are bogus or internal, // so we target the immediate sending client IP. String[] ips = xff.split("\\s*,\\s*"); ip = ips[Math.max(ips.length - 2, 0)]; countIpForwarded.inc(); } else { ip = attributes.getOrDefault(Attribute.REMOTE_ADDR, ""); countIpRemoteAddr.inc(); } try { attributes.put(Attribute.ISP_DB_VERSION, DateTimeFormatter.ISO_INSTANT .format(Instant.ofEpochMilli(ispReader.getMetadata().getBuildDate().getTime()))); // Throws UnknownHostException InetAddress ipAddress = InetAddress.getByName(ip); foundIp.inc(); IspResponse response = ispReader.isp(ipAddress); foundIsp.inc(); attributes.put(Attribute.ISP_NAME, response.getIsp()); attributes.put(Attribute.ISP_ORGANIZATION, response.getOrganization()); } catch (UnknownHostException | GeoIp2Exception ignore) { // ignore these exceptions } // remove null attributes because the coder can't handle them attributes.values().removeIf(Objects::isNull); return new PubsubMessage(message.getPayload(), attributes); } catch (IOException e) { // Re-throw unchecked, so that the pipeline will fail at run time if it occurs throw new UncheckedIOException(e); } }
Example 18
Source File: ParseProxy.java From gcp-ingestion with Mozilla Public License 2.0 | 4 votes |
@Override public PubsubMessage apply(PubsubMessage message) { // Prevent null pointer exception message = PubsubConstraints.ensureNonNull(message); // Copy attributes Map<String, String> attributes = new HashMap<>(message.getAttributeMap()); String xpp = attributes.get(Attribute.X_PIPELINE_PROXY); if (xpp != null) { // Check if X-Pipeline-Proxy is a timestamp final Instant proxyInstant = Time.parseAsInstantOrNull(xpp); if (proxyInstant != null) { // Record the difference between submission and proxy times as tee latency. final String submissionTimestamp = attributes.get(Attribute.SUBMISSION_TIMESTAMP); final Instant submissionInstant = Time.parseAsInstantOrNull(submissionTimestamp); if (submissionInstant != null) { teeLatencyTimer.update(submissionInstant.toEpochMilli() - proxyInstant.toEpochMilli()); } // Rename submission timestamp to proxy timestamp attributes.put(Attribute.PROXY_TIMESTAMP, submissionTimestamp); // Use submission timestamp from X-Pipeline-Proxy attributes.put(Attribute.SUBMISSION_TIMESTAMP, xpp); } // Drop extra IP from X-Forwarded-For String xff = attributes.get(Attribute.X_FORWARDED_FOR); if (xff != null) { attributes.put(Attribute.X_FORWARDED_FOR, xff.substring(0, Math.max(xff.lastIndexOf(","), 0))); } // Remove the proxy attribute attributes.remove(Attribute.X_PIPELINE_PROXY); // remove null attributes because the coder can't handle them attributes.values().removeIf(Objects::isNull); // Report proxied message countPipelineProxy.inc(); } // Return new message return new PubsubMessage(message.getPayload(), attributes); }