Java Code Examples for com.google.privacy.dlp.v2.Table#Row
The following examples show how to use
com.google.privacy.dlp.v2.Table#Row .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement(ProcessContext c) { Table tokenizedData = c.element().getValue(); List<String> headers = tokenizedData.getHeadersList().stream() .map(fid -> fid.getName()) .collect(Collectors.toList()); List<Table.Row> outputRows = tokenizedData.getRowsList(); if (outputRows.size() > 0) { for (Table.Row outputRow : outputRows) { if (outputRow.getValuesCount() != headers.size()) { throw new IllegalArgumentException( "CSV file's header count must exactly match with data element count"); } c.output( KV.of( c.element().getKey(), createBqRow(outputRow, headers.toArray(new String[headers.size()])))); } } }
Example 2
Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
private static TableRow createBqRow(Table.Row tokenizedValue, String[] headers) { TableRow bqRow = new TableRow(); AtomicInteger headerIndex = new AtomicInteger(0); List<TableCell> cells = new ArrayList<>(); tokenizedValue .getValuesList() .forEach( value -> { String checkedHeaderName = checkHeaderName(headers[headerIndex.getAndIncrement()].toString()); bqRow.set(checkedHeaderName, value.getStringValue()); cells.add(new TableCell().set(checkedHeaderName, value.getStringValue())); }); bqRow.setF(cells); return bqRow; }
Example 3
Source File: MapStringToDlpRowTest.java From beam with Apache License 2.0 | 6 votes |
@Test public void mapsDelimitedStringToRow() { PCollection<KV<String, Table.Row>> rowCollection = testPipeline .apply(Create.of(KV.of("key", "value,secondValue"))) .apply(ParDo.of(new MapStringToDlpRow(","))); PAssert.that(rowCollection) .containsInAnyOrder( KV.of( "key", Table.Row.newBuilder() .addValues(Value.newBuilder().setStringValue("value").build()) .addValues(Value.newBuilder().setStringValue("secondValue").build()) .build())); testPipeline.run().waitUntilFinish(); }
Example 4
Source File: DLPTokenizationDoFnTest.java From dlp-dataflow-deidentification with Apache License 2.0 | 6 votes |
@Test public void testConvertTableRowToRow() { DLPTokenizationDoFn dlp = new DLPTokenizationDoFn( "Project Name", ValueProvider.StaticValueProvider.of("DeidentifyTemplateName"), ValueProvider.StaticValueProvider.of("IdentifyTemplateName")); String[] header = {"header0", "header1"}; String key = "Key name"; Table.Row.Builder tableRowBuilder = Table.Row.newBuilder(); tableRowBuilder.addValues(0, Value.newBuilder().setStringValue("value0")); tableRowBuilder.addValues(1, Value.newBuilder().setStringValue("value1")); Table.Row row = tableRowBuilder.build(); Row result = dlp.convertTableRowToRow(header, key, row); assertEquals(result.getTableId(), key); assertEquals(result.getHeader()[0], "header0"); assertEquals(result.getValue()[1], "value1"); }
Example 5
Source File: DLPTextToBigQueryStreaming.java From DataflowTemplates with Apache License 2.0 | 6 votes |
private static TableRow createBqRow(Table.Row tokenizedValue, String[] headers) { TableRow bqRow = new TableRow(); AtomicInteger headerIndex = new AtomicInteger(0); List<TableCell> cells = new ArrayList<>(); tokenizedValue .getValuesList() .forEach( value -> { String checkedHeaderName = checkHeaderName(headers[headerIndex.getAndIncrement()].toString()); bqRow.set(checkedHeaderName, value.getStringValue()); cells.add(new TableCell().set(checkedHeaderName, value.getStringValue())); }); bqRow.setF(cells); return bqRow; }
Example 6
Source File: DLPTextToBigQueryStreaming.java From DataflowTemplates with Apache License 2.0 | 6 votes |
@ProcessElement public void processElement(ProcessContext c) { Table tokenizedData = c.element().getValue(); List<String> headers = tokenizedData.getHeadersList().stream() .map(fid -> fid.getName()) .collect(Collectors.toList()); List<Table.Row> outputRows = tokenizedData.getRowsList(); if (outputRows.size() > 0) { for (Table.Row outputRow : outputRows) { if (outputRow.getValuesCount() != headers.size()) { throw new IllegalArgumentException( "CSV file's header count must exactly match with data element count"); } c.output( KV.of( c.element().getKey(), createBqRow(outputRow, headers.toArray(new String[headers.size()])))); } } }
Example 7
Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) { /** convert from CSV row to DLP Table Row */ Iterator<String> valueIterator = csvRow.iterator(); Table.Row.Builder tableRowBuilder = Table.Row.newBuilder(); while (valueIterator.hasNext()) { String value = valueIterator.next(); if (value != null) { tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build()); } else { tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build()); } } return tableRowBuilder.build(); }
Example 8
Source File: BatchRequestForDlpTest.java From beam with Apache License 2.0 | 5 votes |
@Override public Void apply(Iterable<KV<String, Iterable<Table.Row>>> input) { List<KV<String, Iterable<Table.Row>>> itemList = new ArrayList<>(); input.forEach(itemList::add); assertEquals(1, itemList.size()); return null; }
Example 9
Source File: BatchRequestForDlpTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void batchesRequests() { PCollection<KV<String, Iterable<Table.Row>>> batchedRows = testPipeline .apply(Create.of(KV.of("key", "value1"), KV.of("key", "value2"))) .apply(ParDo.of(new MapStringToDlpRow(null))) .apply(ParDo.of(new BatchRequestForDLP(524000))); PAssert.that(batchedRows).satisfies(new VerifyPCollectionSize()); testPipeline.run().waitUntilFinish(); }
Example 10
Source File: MapStringToDlpRowTest.java From beam with Apache License 2.0 | 5 votes |
@Test public void mapsStringToRow() { PCollection<KV<String, Table.Row>> rowCollection = testPipeline .apply(Create.of(KV.of("key", "value"))) .apply(ParDo.of(new MapStringToDlpRow(null))); PAssert.that(rowCollection) .containsInAnyOrder( KV.of( "key", Table.Row.newBuilder() .addValues(Value.newBuilder().setStringValue("value").build()) .build())); testPipeline.run().waitUntilFinish(); }
Example 11
Source File: BatchRequestForDLP.java From beam with Apache License 2.0 | 5 votes |
/** * Outputs the elements buffered in the elementsBag in batches of desired size. * * @param elementsBag element buffer. * @param output Batched input elements. */ @OnTimer("eventTimer") public void onTimer( @StateId("elementsBag") BagState<KV<String, Table.Row>> elementsBag, OutputReceiver<KV<String, Iterable<Table.Row>>> output) { if (elementsBag.read().iterator().hasNext()) { String key = elementsBag.read().iterator().next().getKey(); AtomicInteger bufferSize = new AtomicInteger(); List<Table.Row> rows = new ArrayList<>(); elementsBag .read() .forEach( element -> { int elementSize = element.getValue().getSerializedSize(); boolean clearBuffer = bufferSize.intValue() + elementSize > batchSizeBytes; if (clearBuffer) { LOG.debug( "Clear buffer of {} bytes, Key {}", bufferSize.intValue(), element.getKey()); numberOfRowsBagged.inc(rows.size()); output.output(KV.of(element.getKey(), rows)); rows.clear(); bufferSize.set(0); } rows.add(element.getValue()); bufferSize.getAndAdd(element.getValue().getSerializedSize()); }); if (!rows.isEmpty()) { LOG.debug("Outputting remaining {} rows.", rows.size()); numberOfRowsBagged.inc(rows.size()); output.output(KV.of(key, rows)); } } }
Example 12
Source File: BatchRequestForDLP.java From beam with Apache License 2.0 | 5 votes |
@ProcessElement public void process( @Element KV<String, Table.Row> element, @StateId("elementsBag") BagState<KV<String, Table.Row>> elementsBag, @TimerId("eventTimer") Timer eventTimer, BoundedWindow w) { elementsBag.add(element); eventTimer.set(w.maxTimestamp()); }
Example 13
Source File: DLPTextToBigQueryStreaming.java From DataflowTemplates with Apache License 2.0 | 5 votes |
private Table.Row convertCsvRowToTableRow(CSVRecord csvRow) { /** convert from CSV row to DLP Table Row */ Iterator<String> valueIterator = csvRow.iterator(); Table.Row.Builder tableRowBuilder = Table.Row.newBuilder(); while (valueIterator.hasNext()) { String value = valueIterator.next(); if (value != null) { tableRowBuilder.addValues(Value.newBuilder().setStringValue(value.toString()).build()); } else { tableRowBuilder.addValues(Value.newBuilder().setStringValue("").build()); } } return tableRowBuilder.build(); }
Example 14
Source File: UtilTest.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@Test public void testConvertCsvRowToTableRow() { String csvRow = "this,is,a,sentence"; Table.Row result = Util.convertCsvRowToTableRow(csvRow); assertEquals(result.getValuesCount(), 4); assertEquals(result.getValues(0).getStringValue(), "this"); assertEquals(result.getValues(1).getStringValue(), "is"); assertEquals(result.getValues(2).getStringValue(), "a"); assertEquals(result.getValues(3).getStringValue(), "sentence"); }
Example 15
Source File: DeIdentifyWithDateShift.java From java-docs-samples with Apache License 2.0 | 5 votes |
public static Table.Row parseLineAsRow(String line) { List<String> values = Splitter.on(",").splitToList(line); Value name = Value.newBuilder().setStringValue(values.get(0)).build(); Value birthDate = Value.newBuilder().setDateValue(parseAsDate(values.get(1))).build(); Value creditCardNumber = Value.newBuilder().setStringValue(values.get(2)).build(); Value registerDate = Value.newBuilder().setDateValue(parseAsDate(values.get(3))).build(); return Table.Row.newBuilder() .addValues(name) .addValues(birthDate) .addValues(creditCardNumber) .addValues(registerDate) .build(); }
Example 16
Source File: Util.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
public static Table.Row convertCsvRowToTableRow(String row) { String[] values = row.split(","); Table.Row.Builder tableRowBuilder = Table.Row.newBuilder(); for (String value : values) { tableRowBuilder.addValues(Value.newBuilder().setStringValue(value).build()); } return tableRowBuilder.build(); }
Example 17
Source File: DLPTokenizationDoFn.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
@ProcessElement public void processElement(ProcessContext c) { String key = c.element().getKey(); Table nonEncryptedData = c.element().getValue(); setInspectTemplateExist(); ContentItem tableItem = ContentItem.newBuilder().setTable(nonEncryptedData).build(); DeidentifyContentResponse response; DeidentifyContentRequest request = buildDeidentifyContentRequest(tableItem); response = dlpServiceClient.deidentifyContent(request); Table encryptedData = response.getItem().getTable(); LOG.info( "Request Size Successfully Tokenized:{} rows {} bytes ", encryptedData.getRowsList().size(), request.toByteString().size()); List<String> outputHeaders = encryptedData.getHeadersList().stream().map(FieldId::getName).collect(Collectors.toList()); String[] header = new String[outputHeaders.size()]; for (int i = 0; i < header.length; i++) { header[i] = Util.checkHeaderName(outputHeaders.get(i)); } List<Table.Row> outputRows = encryptedData.getRowsList(); for (Table.Row outputRow : outputRows) { Row row = convertTableRowToRow(header, key, outputRow); c.output(row); } }
Example 18
Source File: DLPTokenizationDoFn.java From dlp-dataflow-deidentification with Apache License 2.0 | 5 votes |
public Row convertTableRowToRow(String[] header, String key, Table.Row outputRow) { String dlpRow = outputRow.getValuesList().stream() .map(value -> value.getStringValue()) .collect(Collectors.joining(",")); String[] values = dlpRow.split(","); Row row = new Row(key, header, values); return row; }
Example 19
Source File: DLPTextToBigQueryStreaming.java From DataflowTemplates with Apache License 2.0 | 4 votes |
@ProcessElement public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) throws IOException { for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) { String fileKey = c.element().getKey(); try (BufferedReader br = getReader(c.element().getValue())) { csvHeaders = getHeaders(c.sideInput(headerMap), fileKey); if (csvHeaders != null) { List<FieldId> dlpTableHeaders = csvHeaders.stream() .map(header -> FieldId.newBuilder().setName(header).build()) .collect(Collectors.toList()); List<Table.Row> rows = new ArrayList<>(); Table dlpTable = null; /** finding out EOL for this restriction so that we know the SOL */ int endOfLine = (int) (i * batchSize.get().intValue()); int startOfLine = (endOfLine - batchSize.get().intValue()); /** skipping all the rows that's not part of this restriction */ br.readLine(); Iterator<CSVRecord> csvRows = CSVFormat.DEFAULT.withSkipHeaderRecord().parse(br).iterator(); for (int line = 0; line < startOfLine; line++) { if (csvRows.hasNext()) { csvRows.next(); } } /** looping through buffered reader and creating DLP Table Rows equals to batch */ while (csvRows.hasNext() && lineCount <= batchSize.get()) { CSVRecord csvRow = csvRows.next(); rows.add(convertCsvRowToTableRow(csvRow)); lineCount += 1; } /** creating DLP table and output for next transformation */ dlpTable = Table.newBuilder().addAllHeaders(dlpTableHeaders).addAllRows(rows).build(); c.output(KV.of(fileKey, dlpTable)); LOG.debug( "Current Restriction From: {}, Current Restriction To: {}," + " StartofLine: {}, End Of Line {}, BatchData {}", tracker.currentRestriction().getFrom(), tracker.currentRestriction().getTo(), startOfLine, endOfLine, dlpTable.getRowsCount()); } else { throw new RuntimeException("Header Values Can't be found For file Key " + fileKey); } } } }
Example 20
Source File: DLPTextToBigQueryStreaming.java From dlp-dataflow-deidentification with Apache License 2.0 | 4 votes |
@ProcessElement public void processElement(ProcessContext c, RestrictionTracker<OffsetRange, Long> tracker) throws IOException { for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) { String fileKey = c.element().getKey(); try (BufferedReader br = getReader(c.element().getValue())) { csvHeaders = getHeaders(c.sideInput(headerMap), fileKey); if (csvHeaders != null) { List<FieldId> dlpTableHeaders = csvHeaders.stream() .map(header -> FieldId.newBuilder().setName(header).build()) .collect(Collectors.toList()); List<Table.Row> rows = new ArrayList<>(); Table dlpTable = null; /** finding out EOL for this restriction so that we know the SOL */ int endOfLine = (int) (i * batchSize.get().intValue()); int startOfLine = (endOfLine - batchSize.get().intValue()); /** skipping all the rows that's not part of this restriction */ br.readLine(); Iterator<CSVRecord> csvRows = CSVFormat.DEFAULT.withSkipHeaderRecord().parse(br).iterator(); for (int line = 0; line < startOfLine; line++) { if (csvRows.hasNext()) { csvRows.next(); } } /** looping through buffered reader and creating DLP Table Rows equals to batch */ while (csvRows.hasNext() && lineCount <= batchSize.get()) { CSVRecord csvRow = csvRows.next(); rows.add(convertCsvRowToTableRow(csvRow)); lineCount += 1; } /** creating DLP table and output for next transformation */ dlpTable = Table.newBuilder().addAllHeaders(dlpTableHeaders).addAllRows(rows).build(); c.output(KV.of(fileKey, dlpTable)); LOG.debug( "Current Restriction From: {}, Current Restriction To: {}," + " StartofLine: {}, End Of Line {}, BatchData {}", tracker.currentRestriction().getFrom(), tracker.currentRestriction().getTo(), startOfLine, endOfLine, dlpTable.getRowsCount()); } else { throw new RuntimeException("Header Values Can't be found For file Key " + fileKey); } } } }