Java Code Examples for com.google.privacy.dlp.v2.Table#Row

The following examples show how to use com.google.privacy.dlp.v2.Table#Row . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {

  Table tokenizedData = c.element().getValue();
  List<String> headers =
      tokenizedData.getHeadersList().stream()
          .map(fid -> fid.getName())
          .collect(Collectors.toList());
  List<Table.Row> outputRows = tokenizedData.getRowsList();
  if (outputRows.size() > 0) {
    for (Table.Row outputRow : outputRows) {
      if (outputRow.getValuesCount() != headers.size()) {
        throw new IllegalArgumentException(
            "CSV file's header count must exactly match with data element count");
      }
      c.output(
          KV.of(
              c.element().getKey(),
              createBqRow(outputRow, headers.toArray(new String[headers.size()]))));
    }
  }
}
 
Example 2
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
private static TableRow createBqRow(Table.Row tokenizedValue, String[] headers) {
  TableRow bqRow = new TableRow();
  AtomicInteger headerIndex = new AtomicInteger(0);
  List<TableCell> cells = new ArrayList<>();
  tokenizedValue
      .getValuesList()
      .forEach(
          value -> {
            String checkedHeaderName =
                checkHeaderName(headers[headerIndex.getAndIncrement()].toString());
            bqRow.set(checkedHeaderName, value.getStringValue());
            cells.add(new TableCell().set(checkedHeaderName, value.getStringValue()));
          });
  bqRow.setF(cells);
  return bqRow;
}
 
Example 3
Source File: MapStringToDlpRowTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void mapsDelimitedStringToRow() {
  PCollection<KV<String, Table.Row>> rowCollection =
      testPipeline
          .apply(Create.of(KV.of("key", "value,secondValue")))
          .apply(ParDo.of(new MapStringToDlpRow(",")));
  PAssert.that(rowCollection)
      .containsInAnyOrder(
          KV.of(
              "key",
              Table.Row.newBuilder()
                  .addValues(Value.newBuilder().setStringValue("value").build())
                  .addValues(Value.newBuilder().setStringValue("secondValue").build())
                  .build()));
  testPipeline.run().waitUntilFinish();
}
 
Example 4
Source File: DLPTokenizationDoFnTest.java    From dlp-dataflow-deidentification with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertTableRowToRow() {
  DLPTokenizationDoFn dlp =
      new DLPTokenizationDoFn(
          "Project Name",
          ValueProvider.StaticValueProvider.of("DeidentifyTemplateName"),
          ValueProvider.StaticValueProvider.of("IdentifyTemplateName"));

  String[] header = {"header0", "header1"};
  String key = "Key name";
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  tableRowBuilder.addValues(0, Value.newBuilder().setStringValue("value0"));
  tableRowBuilder.addValues(1, Value.newBuilder().setStringValue("value1"));
  Table.Row row = tableRowBuilder.build();

  Row result = dlp.convertTableRowToRow(header, key, row);
  assertEquals(result.getTableId(), key);
  assertEquals(result.getHeader()[0], "header0");
  assertEquals(result.getValue()[1], "value1");
}
 
Example 5
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
private static TableRow createBqRow(Table.Row tokenizedValue, String[] headers) {
  TableRow bqRow = new TableRow();
  AtomicInteger headerIndex = new AtomicInteger(0);
  List<TableCell> cells = new ArrayList<>();
  tokenizedValue
      .getValuesList()
      .forEach(
          value -> {
            String checkedHeaderName =
                checkHeaderName(headers[headerIndex.getAndIncrement()].toString());
            bqRow.set(checkedHeaderName, value.getStringValue());
            cells.add(new TableCell().set(checkedHeaderName, value.getStringValue()));
          });
  bqRow.setF(cells);
  return bqRow;
}
 
Example 6
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {

  Table tokenizedData = c.element().getValue();
  List<String> headers =
      tokenizedData.getHeadersList().stream()
          .map(fid -> fid.getName())
          .collect(Collectors.toList());
  List<Table.Row> outputRows = tokenizedData.getRowsList();
  if (outputRows.size() > 0) {
    for (Table.Row outputRow : outputRows) {
      if (outputRow.getValuesCount() != headers.size()) {
        throw new IllegalArgumentException(
            "CSV file's header count must exactly match with data element count");
      }
      c.output(
          KV.of(
              c.element().getKey(),
              createBqRow(outputRow, headers.toArray(new String[headers.size()]))));
    }
  }
}
 
Example 7
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) {
  /** convert from CSV row to DLP Table Row */
  Iterator<String> valueIterator = csvRow.iterator();
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  while (valueIterator.hasNext()) {
    String value = valueIterator.next();
    if (value != null) {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build());
    } else {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build());
    }
  }

  return tableRowBuilder.build();
}
 
Example 8
Source File: BatchRequestForDlpTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public Void apply(Iterable<KV<String, Iterable<Table.Row>>> input) {
  List<KV<String, Iterable<Table.Row>>> itemList = new ArrayList<>();
  input.forEach(itemList::add);
  assertEquals(1, itemList.size());
  return null;
}
 
Example 9
Source File: BatchRequestForDlpTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void batchesRequests() {
  PCollection<KV<String, Iterable<Table.Row>>> batchedRows =
      testPipeline
          .apply(Create.of(KV.of("key", "value1"), KV.of("key", "value2")))
          .apply(ParDo.of(new MapStringToDlpRow(null)))
          .apply(ParDo.of(new BatchRequestForDLP(524000)));
  PAssert.that(batchedRows).satisfies(new VerifyPCollectionSize());
  testPipeline.run().waitUntilFinish();
}
 
Example 10
Source File: MapStringToDlpRowTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void mapsStringToRow() {
  PCollection<KV<String, Table.Row>> rowCollection =
      testPipeline
          .apply(Create.of(KV.of("key", "value")))
          .apply(ParDo.of(new MapStringToDlpRow(null)));
  PAssert.that(rowCollection)
      .containsInAnyOrder(
          KV.of(
              "key",
              Table.Row.newBuilder()
                  .addValues(Value.newBuilder().setStringValue("value").build())
                  .build()));
  testPipeline.run().waitUntilFinish();
}
 
Example 11
Source File: BatchRequestForDLP.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Outputs the elements buffered in the elementsBag in batches of desired size.
 *
 * @param elementsBag element buffer.
 * @param output Batched input elements.
 */
@OnTimer("eventTimer")
public void onTimer(
    @StateId("elementsBag") BagState<KV<String, Table.Row>> elementsBag,
    OutputReceiver<KV<String, Iterable<Table.Row>>> output) {
  if (elementsBag.read().iterator().hasNext()) {
    String key = elementsBag.read().iterator().next().getKey();
    AtomicInteger bufferSize = new AtomicInteger();
    List<Table.Row> rows = new ArrayList<>();
    elementsBag
        .read()
        .forEach(
            element -> {
              int elementSize = element.getValue().getSerializedSize();
              boolean clearBuffer = bufferSize.intValue() + elementSize > batchSizeBytes;
              if (clearBuffer) {
                LOG.debug(
                    "Clear buffer of {} bytes, Key {}", bufferSize.intValue(), element.getKey());
                numberOfRowsBagged.inc(rows.size());
                output.output(KV.of(element.getKey(), rows));
                rows.clear();
                bufferSize.set(0);
              }
              rows.add(element.getValue());
              bufferSize.getAndAdd(element.getValue().getSerializedSize());
            });
    if (!rows.isEmpty()) {
      LOG.debug("Outputting remaining {} rows.", rows.size());
      numberOfRowsBagged.inc(rows.size());
      output.output(KV.of(key, rows));
    }
  }
}
 
Example 12
Source File: BatchRequestForDLP.java    From beam with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void process(
    @Element KV<String, Table.Row> element,
    @StateId("elementsBag") BagState<KV<String, Table.Row>> elementsBag,
    @TimerId("eventTimer") Timer eventTimer,
    BoundedWindow w) {
  elementsBag.add(element);
  eventTimer.set(w.maxTimestamp());
}
 
Example 13
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) {
  /** convert from CSV row to DLP Table Row */
  Iterator<String> valueIterator = csvRow.iterator();
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  while (valueIterator.hasNext()) {
    String value = valueIterator.next();
    if (value != null) {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build());
    } else {
      tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build());
    }
  }

  return tableRowBuilder.build();
}
 
Example 14
Source File: UtilTest.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvertCsvRowToTableRow() {
  String csvRow = "this,is,a,sentence";
  Table.Row result = Util.convertCsvRowToTableRow(csvRow);

  assertEquals(result.getValuesCount(), 4);
  assertEquals(result.getValues(0).getStringValue(), "this");
  assertEquals(result.getValues(1).getStringValue(), "is");
  assertEquals(result.getValues(2).getStringValue(), "a");
  assertEquals(result.getValues(3).getStringValue(), "sentence");
}
 
Example 15
Source File: DeIdentifyWithDateShift.java    From java-docs-samples with Apache License 2.0 5 votes vote down vote up
public static Table.Row parseLineAsRow(String line) {
  List<String> values = Splitter.on(",").splitToList(line);
  Value name = Value.newBuilder().setStringValue(values.get(0)).build();
  Value birthDate = Value.newBuilder().setDateValue(parseAsDate(values.get(1))).build();
  Value creditCardNumber = Value.newBuilder().setStringValue(values.get(2)).build();
  Value registerDate = Value.newBuilder().setDateValue(parseAsDate(values.get(3))).build();
  return Table.Row.newBuilder()
      .addValues(name)
      .addValues(birthDate)
      .addValues(creditCardNumber)
      .addValues(registerDate)
      .build();
}
 
Example 16
Source File: Util.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
public static Table.Row convertCsvRowToTableRow(String row) {
  String[] values = row.split(",");
  Table.Row.Builder tableRowBuilder = Table.Row.newBuilder();
  for (String value : values) {
    tableRowBuilder.addValues(Value.newBuilder().setStringValue(value).build());
  }

  return tableRowBuilder.build();
}
 
Example 17
Source File: DLPTokenizationDoFn.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c) {

  String key = c.element().getKey();
  Table nonEncryptedData = c.element().getValue();
  setInspectTemplateExist();
  ContentItem tableItem = ContentItem.newBuilder().setTable(nonEncryptedData).build();

  DeidentifyContentResponse response;
  DeidentifyContentRequest request = buildDeidentifyContentRequest(tableItem);
  response = dlpServiceClient.deidentifyContent(request);
  Table encryptedData = response.getItem().getTable();
  LOG.info(
      "Request Size Successfully Tokenized:{} rows {} bytes ",
      encryptedData.getRowsList().size(),
      request.toByteString().size());

  List<String> outputHeaders =
      encryptedData.getHeadersList().stream().map(FieldId::getName).collect(Collectors.toList());
  String[] header = new String[outputHeaders.size()];

  for (int i = 0; i < header.length; i++) {
    header[i] = Util.checkHeaderName(outputHeaders.get(i));
  }
  List<Table.Row> outputRows = encryptedData.getRowsList();

  for (Table.Row outputRow : outputRows) {
    Row row = convertTableRowToRow(header, key, outputRow);
    c.output(row);
  }
}
 
Example 18
Source File: DLPTokenizationDoFn.java    From dlp-dataflow-deidentification with Apache License 2.0 5 votes vote down vote up
public Row convertTableRowToRow(String[] header, String key, Table.Row outputRow) {
  String dlpRow =
      outputRow.getValuesList().stream()
          .map(value -> value.getStringValue())
          .collect(Collectors.joining(","));
  String[] values = dlpRow.split(",");
  Row row = new Row(key, header, values);
  return row;
}
 
Example 19
Source File: DLPTextToBigQueryStreaming.java    From DataflowTemplates with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker)
    throws IOException {
  for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
    String fileKey = c.element().getKey();
    try (BufferedReader br = getReader(c.element().getValue())) {

      csvHeaders = getHeaders(c.sideInput(headerMap), fileKey);
      if (csvHeaders != null) {
        List<FieldId> dlpTableHeaders =
            csvHeaders.stream()
                .map(header -> FieldId.newBuilder().setName(header).build())
                .collect(Collectors.toList());
        List<Table.Row> rows = new ArrayList<>();
        Table dlpTable = null;
        /** finding out EOL for this restriction so that we know the SOL */
        int endOfLine = (int) (i * batchSize.get().intValue());
        int startOfLine = (endOfLine - batchSize.get().intValue());
        /** skipping all the rows that's not part of this restriction */
        br.readLine();
        Iterator<CSVRecord> csvRows =
            CSVFormat.DEFAULT.withSkipHeaderRecord().parse(br).iterator();
        for (int line = 0; line < startOfLine; line++) {
          if (csvRows.hasNext()) {
            csvRows.next();
          }
        }
        /** looping through buffered reader and creating DLP Table Rows equals to batch */
        while (csvRows.hasNext() && lineCount <= batchSize.get()) {

          CSVRecord csvRow = csvRows.next();
          rows.add(convertCsvRowToTableRow(csvRow));
          lineCount += 1;
        }
        /** creating DLP table and output for next transformation */
        dlpTable = Table.newBuilder().addAllHeaders(dlpTableHeaders).addAllRows(rows).build();
        c.output(KV.of(fileKey, dlpTable));

        LOG.debug(
            "Current Restriction From: {}, Current Restriction To: {},"
                + " StartofLine: {}, End Of Line {}, BatchData {}",
            tracker.currentRestriction().getFrom(),
            tracker.currentRestriction().getTo(),
            startOfLine,
            endOfLine,
            dlpTable.getRowsCount());

      } else {

        throw new RuntimeException("Header Values Can't be found For file Key " + fileKey);
      }
    }
  }
}
 
Example 20
Source File: DLPTextToBigQueryStreaming.java    From dlp-dataflow-deidentification with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker)
    throws IOException {
  for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
    String fileKey = c.element().getKey();
    try (BufferedReader br = getReader(c.element().getValue())) {

      csvHeaders = getHeaders(c.sideInput(headerMap), fileKey);
      if (csvHeaders != null) {
        List<FieldId> dlpTableHeaders =
            csvHeaders.stream()
                .map(header -> FieldId.newBuilder().setName(header).build())
                .collect(Collectors.toList());
        List<Table.Row> rows = new ArrayList<>();
        Table dlpTable = null;
        /** finding out EOL for this restriction so that we know the SOL */
        int endOfLine = (int) (i * batchSize.get().intValue());
        int startOfLine = (endOfLine - batchSize.get().intValue());
        /** skipping all the rows that's not part of this restriction */
        br.readLine();
        Iterator<CSVRecord> csvRows =
            CSVFormat.DEFAULT.withSkipHeaderRecord().parse(br).iterator();
        for (int line = 0; line < startOfLine; line++) {
          if (csvRows.hasNext()) {
            csvRows.next();
          }
        }
        /** looping through buffered reader and creating DLP Table Rows equals to batch */
        while (csvRows.hasNext() && lineCount <= batchSize.get()) {

          CSVRecord csvRow = csvRows.next();
          rows.add(convertCsvRowToTableRow(csvRow));
          lineCount += 1;
        }
        /** creating DLP table and output for next transformation */
        dlpTable = Table.newBuilder().addAllHeaders(dlpTableHeaders).addAllRows(rows).build();
        c.output(KV.of(fileKey, dlpTable));

        LOG.debug(
            "Current Restriction From: {}, Current Restriction To: {},"
                + " StartofLine: {}, End Of Line {}, BatchData {}",
            tracker.currentRestriction().getFrom(),
            tracker.currentRestriction().getTo(),
            startOfLine,
            endOfLine,
            dlpTable.getRowsCount());

      } else {

        throw new RuntimeException("Header Values Can't be found For file Key " + fileKey);
      }
    }
  }
}