Java Code Examples for org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage#getAttributeMap()

The following examples show how to use org.apache.beam.sdk.io.gcp.pubsub.PubsubMessage#getAttributeMap() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DecompressPayload.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public PubsubMessage apply(PubsubMessage message) {
  if (enabled.isAccessible() && !enabled.get()) {
    // Compression has been explicitly turned off in options, so return unchanged message.
    return message;
  } else {
    try {
      ByteArrayInputStream payloadStream = new ByteArrayInputStream(message.getPayload());
      GZIPInputStream gzipStream = new GZIPInputStream(payloadStream);
      ByteArrayOutputStream decompressedStream = new ByteArrayOutputStream();
      // Throws IOException
      IOUtils.copy(gzipStream, decompressedStream);
      compressedInput.inc();
      return new PubsubMessage(decompressedStream.toByteArray(), message.getAttributeMap());
    } catch (IOException ignore) {
      // payload wasn't valid gzip, assume it wasn't compressed
      uncompressedInput.inc();
      return message;
    }
  }
}
 
Example 2
Source File: NormalizeAttributes.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public PubsubMessage apply(PubsubMessage message) {
  message = PubsubConstraints.ensureNonNull(message);
  Map<String, String> attributes = new HashMap<>(message.getAttributeMap());

  Optional.ofNullable(attributes.get(Attribute.APP_UPDATE_CHANNEL))
      .map(NormalizeAttributes::normalizeChannel)
      .ifPresent(v -> attributes.put(Attribute.NORMALIZED_CHANNEL, v));
  Optional.ofNullable(attributes.get(Attribute.OS)) //
      .map(NormalizeAttributes::normalizeOs) //
      .ifPresent(v -> attributes.put(Attribute.NORMALIZED_OS, v));
  Optional.ofNullable(attributes.get(Attribute.OS_VERSION))
      .map(NormalizeAttributes::normalizeOsVersion)
      .ifPresent(v -> attributes.put(Attribute.NORMALIZED_OS_VERSION, v));
  Optional.ofNullable(attributes.get(Attribute.APP_NAME))
      .map(NormalizeAttributes::normalizeAppName)
      .ifPresent(v -> attributes.put(Attribute.NORMALIZED_APP_NAME, v));
  Optional.ofNullable(attributes.get(Attribute.GEO_COUNTRY))
      .map(NormalizeAttributes::normalizeCountryCode)
      .ifPresent(v -> attributes.put(Attribute.NORMALIZED_COUNTRY_CODE, v));

  return new PubsubMessage(message.getPayload(), attributes);
}
 
Example 3
Source File: RemoveAttributes.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public PubsubMessage apply(PubsubMessage message) {
  message = PubsubConstraints.ensureNonNull(message);

  final Map<String, String> attributes = message.getAttributeMap();

  Map<String, String> strippedAttributes = new HashMap<>();

  // Use geo_country to match IP privacy v1 dataset
  attributes.put(Attribute.GEO_COUNTRY,
      message.getAttribute(Attribute.NORMALIZED_COUNTRY_CODE));

  ATTRIBUTES_TO_KEEP.forEach(name -> Optional.ofNullable(attributes.get(name))
      .ifPresent(value -> strippedAttributes.put(name, value)));

  return new PubsubMessage(message.getPayload(), strippedAttributes);
}
 
Example 4
Source File: ParseIp.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
@Override
public PubsubMessage apply(PubsubMessage message) {
  Map<String, String> attributes = new HashMap<>(message.getAttributeMap());

  String ip;
  String xff = attributes.get(Attribute.X_FORWARDED_FOR);
  if (xff != null) {
    // Google's load balancer will append the immediate sending client IP and a global
    // forwarding rule IP to any existing content in X-Forwarded-For as documented in:
    // https://cloud.google.com/load-balancing/docs/https/#components
    //
    // In practice, many of the "first" addresses are bogus or internal,
    // so we target the immediate sending client IP.
    String[] ips = xff.split("\\s*,\\s*");
    ip = ips[Math.max(ips.length - 2, 0)];
  } else {
    ip = attributes.getOrDefault(Attribute.REMOTE_ADDR, "");
  }
  attributes.put(Attribute.CLIENT_IP, ip);
  return new PubsubMessage(message.getPayload(), attributes);
}
 
Example 5
Source File: CompressPayload.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@VisibleForTesting
PubsubMessage compress(PubsubMessage message) {
  message = PubsubConstraints.ensureNonNull(message);
  byte[] compressedBytes = compress(message.getPayload(), compression.get());
  if (compressedBytes.length > maxCompressedBytes) {
    byte[] truncated = Arrays.copyOfRange(message.getPayload(), 0, maxCompressedBytes);
    truncationCounter.inc();
    compressedBytes = compress(truncated, compression.get());
  }
  return new PubsubMessage(compressedBytes, message.getAttributeMap());
}
 
Example 6
Source File: PubsubConstraints.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/** Fills out empty payload and attributes if the message itself or components are null. */
public static PubsubMessage ensureNonNull(PubsubMessage message) {
  if (message == null) {
    return new PubsubMessage(new byte[] {}, new HashMap<>());
  } else if (message.getPayload() == null) {
    return ensureNonNull(new PubsubMessage(new byte[] {}, message.getAttributeMap()));
  } else if (message.getAttributeMap() == null) {
    return ensureNonNull(new PubsubMessage(message.getPayload(), new HashMap<>()));
  } else {
    return message;
  }
}
 
Example 7
Source File: FailureMessage.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Return a modified PubsubMessage with additional attributes describing the error.
 */
public static PubsubMessage of(Object caller, PubsubMessage message, Throwable e) {
  final Map<String, String> attributes = new HashMap<>();
  if (message.getAttributeMap() != null) {
    attributes.putAll(message.getAttributeMap());
  }
  attributes.putAll(errorAttributes(caller, e));
  return new PubsubMessage(message.getPayload(), attributes);
}
 
Example 8
Source File: PioneerBenchmarkGenerator.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/** Encrypt the payload in a Pubsub message and place it into an envelope. */
public static Optional<String> transform(PubsubMessage message, PublicKey key,
    byte[] exampleData) {
  try {
    HashMap<String, String> attributes = new HashMap<String, String>(message.getAttributeMap());
    ObjectNode node = Json.readObjectNode(exampleData);
    ObjectNode payload = (ObjectNode) node.get(FieldName.PAYLOAD);

    payload.put(DecryptPioneerPayloads.ENCRYPTED_DATA, encrypt(message.getPayload(), key));
    payload.put(DecryptPioneerPayloads.ENCRYPTION_KEY_ID,
        attributes.get(Attribute.DOCUMENT_NAMESPACE));
    payload.put(DecryptPioneerPayloads.SCHEMA_NAMESPACE,
        attributes.get(Attribute.DOCUMENT_NAMESPACE));
    payload.put(DecryptPioneerPayloads.SCHEMA_NAME, attributes.get(Attribute.DOCUMENT_TYPE));
    payload.put(DecryptPioneerPayloads.SCHEMA_VERSION,
        Integer.parseInt(attributes.get(Attribute.DOCUMENT_VERSION)));
    attributes.put(Attribute.DOCUMENT_NAMESPACE, "telemetry");
    attributes.put(Attribute.DOCUMENT_TYPE, "pioneer-study");
    attributes.put(Attribute.DOCUMENT_VERSION, "4");

    PubsubMessage encryptedMessage = new PubsubMessage(
        Json.asString(node).getBytes(Charsets.UTF_8), attributes);
    return Optional.of(Json.asString(encryptedMessage));
  } catch (IOException | JoseException e) {
    e.printStackTrace();
    return Optional.empty();
  }
}
 
Example 9
Source File: Write.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext ctx) {
  PubsubMessage message = ctx.element();
  Map<String, String> attributes = message.getAttributeMap();
  try {
    Schema schema = getStore().getSchema(attributes);
    GenericRecord record = formatter.formatRecord(message, schema);
    byte[] avroPayload = binaryEncoder.encodeRecord(record, schema);
    ctx.output(successTag, new PubsubMessage(avroPayload, attributes));
  } catch (Exception e) {
    ctx.output(errorTag, FailureMessage.of(this, message, e));
  }
}
 
Example 10
Source File: PubSubToElasticsearch.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  PubsubMessage pubsubMessage = context.element().getOriginalPayload();

  JsonObject messageObject = new JsonObject();

  try {
    if (pubsubMessage.getPayload().length > 0) {
      messageObject = gson.fromJson(new String(pubsubMessage.getPayload()), JsonObject.class);
    }

    // If message attributes are present they will be serialized along with the message payload
    if (pubsubMessage.getAttributeMap() != null) {
      pubsubMessage.getAttributeMap().forEach(messageObject::addProperty);
    }

    context.output(
            FailsafeElement.of(pubsubMessage, messageObject.toString()));
    successCounter.inc();

  } catch (JsonSyntaxException e) {
    context.output(
            TRANSFORM_DEADLETTER_OUT,
            FailsafeElement.of(context.element())
                    .setErrorMessage(e.getMessage())
                    .setStacktrace(Throwables.getStackTraceAsString(e)));
    failedCounter.inc();
  }
}
 
Example 11
Source File: PubSubToMongoDB.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  PubsubMessage pubsubMessage = context.element().getOriginalPayload();

  JsonObject messageObject = new JsonObject();

  try {
    if (pubsubMessage.getPayload().length > 0) {
      messageObject = gson.fromJson(new String(pubsubMessage.getPayload()), JsonObject.class);
    }

    // If message attributes are present they will be serialized along with the message payload
    if (pubsubMessage.getAttributeMap() != null) {
      pubsubMessage.getAttributeMap().forEach(messageObject::addProperty);
    }

    context.output(
            FailsafeElement.of(pubsubMessage, messageObject.toString()));
    successCounter.inc();

  } catch (JsonSyntaxException e) {
    context.output(
            TRANSFORM_DEADLETTER_OUT,
            FailsafeElement.of(context.element())
                    .setErrorMessage(e.getMessage())
                    .setStacktrace(Throwables.getStackTraceAsString(e)));
    failedCounter.inc();
  }
}
 
Example 12
Source File: ErrorConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void transformConvertsBigQueryInsertErrorToPubsubMessage()
    throws IOException {

  GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord();
  String errorMessage = "small-test-message";
  BigQueryInsertError bigQueryInsertError =
      getBigQueryInsertError(expectedRecord, errorMessage);
  ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter =
      getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema()));

  PCollection<PubsubMessage> output =
      pipeline
          .apply(Create.of(bigQueryInsertError)
              .withCoder(BigQueryInsertErrorCoder.of()))
          .apply(converter);

  PubsubMessage expectedMessage =
      getPubsubMessage(expectedRecord, bigQueryInsertError.getError().toString());
  byte[] expectedPayload = expectedMessage.getPayload();
  Map<String, String> expectedAttributes = expectedMessage.getAttributeMap();
  PAssert.thatSingleton(output)
      .satisfies(input -> {
        assertThat(input.getPayload()).isEqualTo(expectedPayload);
        assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes);
        return null;
      });
  pipeline.run();
}
 
Example 13
Source File: ErrorConvertersTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void transformConvertsBigQueryInsertErrorToPubsubMessageWithTruncatedMessage()
    throws IOException {

  GenericRecord expectedRecord = BigQueryConvertersTest.generateNestedAvroRecord();
  String errorMessage = Strings.repeat("a", 1000);
  BigQueryInsertError bigQueryInsertError =
      getBigQueryInsertError(expectedRecord, errorMessage);
  ErrorConverters.BigQueryInsertErrorToPubsubMessage<GenericRecord> converter =
      getConverter(expectedRecord.getSchema(), AvroCoder.of(expectedRecord.getSchema()));

  PCollection<PubsubMessage> output =
      pipeline
          .apply(Create.of(bigQueryInsertError)
              .withCoder(BigQueryInsertErrorCoder.of()))
          .apply(converter);

  // Expecting a truncated message with a truncation indicator suffix.
  String expectedErrorMessage =
      Ascii.truncate(
          bigQueryInsertError.getError().toString(),
          /* maxLength= */ 512,
          /* truncationIndicator= */ "...");
  PubsubMessage expectedMessage =
      getPubsubMessage(expectedRecord, expectedErrorMessage);
  byte[] expectedPayload = expectedMessage.getPayload();
  Map<String, String> expectedAttributes = expectedMessage.getAttributeMap();
  PAssert.thatSingleton(output)
      .satisfies(input -> {
        assertThat(input.getPayload()).isEqualTo(expectedPayload);
        assertThat(input.getAttributeMap()).isEqualTo(expectedAttributes);
        return null;
      });
  pipeline.run();
}
 
Example 14
Source File: DecryptPioneerPayloads.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
@Override
public Iterable<PubsubMessage> apply(PubsubMessage message)
    throws IOException, JoseException, ValidationException {
  message = PubsubConstraints.ensureNonNull(message);

  if (keyStore == null) {
    // If configured resources aren't available, this throws UncheckedIOException;
    // this is unretryable so we allow it to bubble up and kill the worker and eventually fail
    // the pipeline.
    keyStore = KeyStore.of(metadataLocation.get(), kmsEnabled.get());
  }

  if (validator == null) {
    validator = new JsonValidator();
    byte[] data = Resources
        .toByteArray(Resources.getResource("telemetry.pioneer-study.4.schema.json"));
    envelopeSchema = JSONSchemaStore.readSchema(data);
  }

  ObjectNode json = Json.readObjectNode(message.getPayload());
  validator.validate(envelopeSchema, json);
  JsonNode payload = json.get(FieldName.PAYLOAD);

  String encryptionKeyId = payload.get(ENCRYPTION_KEY_ID).asText();
  PrivateKey key = keyStore.getKey(encryptionKeyId);
  if (key == null) {
    // Is this really an IOException?
    throw new IOException(String.format("encryptionKeyId not found: %s", encryptionKeyId));
  }

  final byte[] decrypted = decrypt(key, payload.get(ENCRYPTED_DATA).asText());

  byte[] payloadData;
  if (decompressPayload.get()) {
    payloadData = GzipUtil.maybeDecompress(decrypted);
  } else {
    // don't bother decompressing
    payloadData = decrypted;
  }

  // insert top-level metadata into the payload
  ObjectNode metadata = Json.createObjectNode();
  metadata.put(PIONEER_ID, payload.get(PIONEER_ID).asText());
  metadata.put(STUDY_NAME, payload.get(STUDY_NAME).asText());
  final byte[] merged = AddMetadata.mergedPayload(payloadData, Json.asBytes(metadata));

  // Redirect messages via attributes
  Map<String, String> attributes = new HashMap<String, String>(message.getAttributeMap());
  attributes.put(Attribute.DOCUMENT_NAMESPACE, payload.get(SCHEMA_NAMESPACE).asText());
  attributes.put(Attribute.DOCUMENT_TYPE, payload.get(SCHEMA_NAME).asText());
  attributes.put(Attribute.DOCUMENT_VERSION, payload.get(SCHEMA_VERSION).asText());
  return Collections.singletonList(new PubsubMessage(merged, attributes));
}
 
Example 15
Source File: ParseProxy.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
@Override
public PubsubMessage apply(PubsubMessage message) {
  // Prevent null pointer exception
  message = PubsubConstraints.ensureNonNull(message);

  // Copy attributes
  Map<String, String> attributes = new HashMap<>(message.getAttributeMap());

  String xpp = attributes.get(Attribute.X_PIPELINE_PROXY);
  if (xpp != null) {

    // Check if X-Pipeline-Proxy is a timestamp
    final Instant proxyInstant = Time.parseAsInstantOrNull(xpp);
    if (proxyInstant != null) {
      // Record the difference between submission and proxy times as tee latency.
      final String submissionTimestamp = attributes.get(Attribute.SUBMISSION_TIMESTAMP);
      final Instant submissionInstant = Time.parseAsInstantOrNull(submissionTimestamp);
      if (submissionInstant != null) {
        teeLatencyTimer.update(submissionInstant.toEpochMilli() - proxyInstant.toEpochMilli());
      }
      // Rename submission timestamp to proxy timestamp
      attributes.put(Attribute.PROXY_TIMESTAMP, submissionTimestamp);
      // Use submission timestamp from X-Pipeline-Proxy
      attributes.put(Attribute.SUBMISSION_TIMESTAMP, xpp);
    }

    // Drop extra IP from X-Forwarded-For
    String xff = attributes.get(Attribute.X_FORWARDED_FOR);
    if (xff != null) {
      attributes.put(Attribute.X_FORWARDED_FOR,
          xff.substring(0, Math.max(xff.lastIndexOf(","), 0)));
    }

    // Remove the proxy attribute
    attributes.remove(Attribute.X_PIPELINE_PROXY);

    // remove null attributes because the coder can't handle them
    attributes.values().removeIf(Objects::isNull);

    // Report proxied message
    countPipelineProxy.inc();
  }

  // Return new message
  return new PubsubMessage(message.getPayload(), attributes);
}
 
Example 16
Source File: GeoIspLookup.java    From gcp-ingestion with Mozilla Public License 2.0 4 votes vote down vote up
@Override
public PubsubMessage apply(PubsubMessage message) {
  message = PubsubConstraints.ensureNonNull(message);

  try {
    if (ispReader == null) {
      loadResourcesOnFirstMessage();
    }

    if (message.getAttributeMap().containsKey(Attribute.ISP_NAME)) {
      // Return early since ISP lookup has already been performed.
      countIspAlreadyApplied.inc();
      return message;
    }

    // copy attributes
    Map<String, String> attributes = new HashMap<String, String>(message.getAttributeMap());

    // Determine client ip
    String ip;
    String xff = attributes.get(Attribute.X_FORWARDED_FOR);

    if (xff != null) {
      // Google's load balancer will append the immediate sending client IP and a global
      // forwarding rule IP to any existing content in X-Forwarded-For as documented in:
      // https://cloud.google.com/load-balancing/docs/https/#components
      //
      // In practice, many of the "first" addresses are bogus or internal,
      // so we target the immediate sending client IP.
      String[] ips = xff.split("\\s*,\\s*");
      ip = ips[Math.max(ips.length - 2, 0)];
      countIpForwarded.inc();
    } else {
      ip = attributes.getOrDefault(Attribute.REMOTE_ADDR, "");
      countIpRemoteAddr.inc();
    }

    try {
      attributes.put(Attribute.ISP_DB_VERSION, DateTimeFormatter.ISO_INSTANT
          .format(Instant.ofEpochMilli(ispReader.getMetadata().getBuildDate().getTime())));

      // Throws UnknownHostException
      InetAddress ipAddress = InetAddress.getByName(ip);
      foundIp.inc();

      IspResponse response = ispReader.isp(ipAddress);
      foundIsp.inc();

      attributes.put(Attribute.ISP_NAME, response.getIsp());
      attributes.put(Attribute.ISP_ORGANIZATION, response.getOrganization());
    } catch (UnknownHostException | GeoIp2Exception ignore) {
      // ignore these exceptions
    }

    // remove null attributes because the coder can't handle them
    attributes.values().removeIf(Objects::isNull);

    return new PubsubMessage(message.getPayload(), attributes);
  } catch (IOException e) {
    // Re-throw unchecked, so that the pipeline will fail at run time if it occurs
    throw new UncheckedIOException(e);
  }
}