Java Code Examples for org.kitesdk.morphline.api.Record#getFirstValue()

The following examples show how to use org.kitesdk.morphline.api.Record#getFirstValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractParser.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
protected boolean doProcess(Record record) {
  if (!hasAtLeastOneAttachment(record)) {
    return false;
  }

  // TODO: make field for stream configurable
  String streamMediaType = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE);
  if (!isMimeTypeSupported(streamMediaType, record)) {
    return false;
  }

  InputStream stream = getAttachmentInputStream(record);
  try {
    return doProcess(record, stream);
  } catch (IOException e) {
    throw new MorphlineRuntimeException(e);
  } finally {
    Closeables.closeQuietly(stream);
  }
}
 
Example 2
Source File: ExtractProtobufPathsBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
protected boolean doProcess(Record inputRecord) {
  Object datum = inputRecord.getFirstValue(Fields.ATTACHMENT_BODY);
  Preconditions.checkNotNull(datum);
  Record outputRecord = inputRecord.copy();

  for (Map.Entry<String, Collection<String>> entry : stepMap.entrySet()) {
    String fieldName = entry.getKey();
    List<String> steps = (List<String>) entry.getValue();
    try {
      extractPath(datum, fieldName, steps, outputRecord, 0);
    } catch (Exception e) {
      LOG.error(e.getMessage(), e);
      return false;
    }
  }

  // pass record to next command in chain:
  return getChild().process(outputRecord);
}
 
Example 3
Source File: ExtractAvroPathsBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
    protected boolean doProcess(Record inputRecord) {
//      Preconditions.checkState(ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE.equals(inputRecord.getFirstValue(Fields.ATTACHMENT_MIME_TYPE)));
      GenericContainer datum = (GenericContainer) inputRecord.getFirstValue(Fields.ATTACHMENT_BODY);
      Preconditions.checkNotNull(datum);
      Preconditions.checkNotNull(datum.getSchema());      
      Record outputRecord = inputRecord.copy();
      
      for (Map.Entry<String, Collection<String>> entry : stepMap.entrySet()) {
        String fieldName = entry.getKey();
        List<String> steps = (List<String>) entry.getValue();
        extractPath(datum, datum.getSchema(), fieldName, steps, outputRecord, 0);
      }
        
      // pass record to next command in chain:
      return getChild().process(outputRecord);
    }
 
Example 4
Source File: ExtractHBaseCellsBuilder.java    From hbase-indexer with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean doProcess(Record record) {
    Result result = (Result)record.getFirstValue(Fields.ATTACHMENT_BODY);
    Preconditions.checkNotNull(result);
    removeAttachments(record);
    for (Mapping mapping : mappings) {
        mapping.apply(result, record);
    }
    // pass record to next command in chain:      
    return super.doProcess(record);
}
 
Example 5
Source File: AbstractParser.java    From kite with Apache License 2.0 5 votes vote down vote up
private InputStream getAttachmentInputStream(Record record) {
  Object body = record.getFirstValue(Fields.ATTACHMENT_BODY);
  Preconditions.checkNotNull(body);
  if (body instanceof byte[]) {
    return new ByteArrayInputStream((byte[]) body);
  } else {
    return (InputStream) body;
  }
}
 
Example 6
Source File: ParseTextMyWritableBuilder.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean doProcess(Record inputRecord) {
  Record outputRecord = inputRecord.copy();

  // change key
  Text myTextKey = (Text)inputRecord.getFirstValue(this.keyField);
  outputRecord.replaceValues(this.keyField, MyWritable.keyStr(myTextKey));
  // change value
  MyWritable myWritableValue = (MyWritable)inputRecord.getFirstValue(this.valueField);
  outputRecord.replaceValues(this.valueField, MyWritable.valueStr(myWritableValue));
  return super.doProcess(outputRecord);
}
 
Example 7
Source File: ReadRCFileBuilder.java    From kite with Apache License 2.0 5 votes vote down vote up
private Path getAttachmentPath(Record record) {
  // We have more meaningful RCFile error messages if we have an attachment name
  String attachmentName = (String) record.getFirstValue(Fields.ATTACHMENT_NAME);
  if (attachmentName == null) {
    attachmentName = "UNKNOWN";
  }
  return new Path(STREAM_PROTOCOL + attachmentName);
}
 
Example 8
Source File: ExtractJsonPathsBuilder.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
protected boolean doProcess(Record inputRecord) {
  JsonNode datum = (JsonNode) inputRecord.getFirstValue(Fields.ATTACHMENT_BODY);
  Preconditions.checkNotNull(datum);
  Record outputRecord = inputRecord.copy();
  
  for (Map.Entry<String, Collection<String>> entry : stepMap.entrySet()) {
    String fieldName = entry.getKey();
    List<String> steps = (List<String>) entry.getValue();
    extractPath(datum, fieldName, steps, outputRecord, 0);
  }
    
  // pass record to next command in chain:
  return getChild().process(outputRecord);
}
 
Example 9
Source File: MaxmindMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
private void processAndVerifySuccess2(final Record input, final boolean isSuccess, boolean isSame,
    final Integer countryGeoNameId, final Integer cityGeoNameId) {
  final Record inputCopy = input.copy();
  collector.reset();
  startSession();
  assertEquals(1, collector.getNumStartEvents());
  assertEquals(isSuccess, morphline.process(inputCopy));
  if (!isSuccess) {
    assertEquals(0, collector.getRecords().size());
  } else {
    assertEquals(1, collector.getRecords().size());
    final Record actual = collector.getFirstRecord();
    final Object body = actual.getFirstValue(Fields.ATTACHMENT_BODY);
    actual.removeAll(Fields.ATTACHMENT_BODY);
    assertEquals(input, actual);
    assertTrue(body instanceof JsonNode);
    final JsonNode jsonNode = (JsonNode)body;
    assertNotNull(jsonNode);
    final JsonNode countryNode = jsonNode.get("country");
    if (countryGeoNameId != null) {
      assertNotNull(countryNode);
      assertEquals(countryGeoNameId.intValue(), countryNode.get("geoname_id").asInt());
    } else {
      assertNull(countryNode);
    }
    final JsonNode cityNode = jsonNode.get("city");
    if (cityGeoNameId != null) {
      assertNotNull(cityNode);
      assertEquals(cityGeoNameId.intValue(), cityNode.get("geoname_id").asInt());
    } else {
      assertNull(cityNode);
    }
  }
}
 
Example 10
Source File: ExtractAvroTreeBuilder.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
    protected boolean doProcess(Record inputRecord) {
//      Preconditions.checkState(ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE.equals(inputRecord.getFirstValue(Fields.ATTACHMENT_MIME_TYPE)));
      GenericContainer datum = (GenericContainer) inputRecord.getFirstValue(Fields.ATTACHMENT_BODY);
      Preconditions.checkNotNull(datum);
      Preconditions.checkNotNull(datum.getSchema());      
      Record outputRecord = inputRecord.copy();
      
      extractTree(datum, datum.getSchema(), outputRecord, outputFieldPrefix);
        
      // pass record to next command in chain:
      return getChild().process(outputRecord);
    }
 
Example 11
Source File: AvroMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
/**
 * Test that schema caching in readAvroContainer works even if the Avro writer schema of each input
 * file is different (yet compatible). Test writer schema A before B and B before A.
 */
public void testReadAvroContainerWithMultipleSchemas() throws IOException {
  for (int reverse = 0; reverse < 2; reverse++) {
    morphline = createMorphline("test-morphlines/readAvroContainer");
    for (int run = 0; run < 10; run++) {
      collector.reset();
      int version = run % 2;
      version = (version + reverse) % 2; // reverse direction with reverse == 1: 0 -> 1  as well as 1 -> 0
      byte[] fileContents = Files.toByteArray(
          new File(RESOURCES_DIR + "/test-documents/avroContainerWithWriterschema" + version + ".avro"));
      Record inputRecord = new Record();
      inputRecord.put(Fields.ATTACHMENT_BODY, fileContents);
      assertTrue(morphline.process(inputRecord));

      int numRecords = 5;
      assertEquals(numRecords, collector.getRecords().size());
      
      String[] expectedUids = new String[] {"sdfsdf", "fhgfgh", "werwer", "345trgt", "dfgdg"};
      for (int i = 0; i < numRecords; i++) {
        Record record = collector.getRecords().get(i);
        GenericData.Record avroRecord = (GenericData.Record)record.getFirstValue(Fields.ATTACHMENT_BODY);
        assertEquals(expectedUids[i], avroRecord.get("sc_uid").toString());
      }
    }
  }
}