Java Code Examples for org.kitesdk.morphline.api.Record#put()

The following examples show how to use org.kitesdk.morphline.api.Record#put() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReadJsonBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
protected boolean doProcess(Record inputRecord, InputStream in) throws IOException {
  Record template = inputRecord.copy();
  removeAttachments(template);
  MappingIterator iter = reader.readValues(in);
  try {
    while (iter.hasNextValue()) {
      Object rootNode = iter.nextValue();
      incrementNumRecords();
      LOG.trace("jsonObject: {}", rootNode);
      
      Record outputRecord = template.copy();
      outputRecord.put(Fields.ATTACHMENT_BODY, rootNode);
      outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, MIME_TYPE);
  
      // pass record to next command in chain:
      if (!getChild().process(outputRecord)) {
        return false;
      }
    }
    return true;
  } finally {
    iter.close();
  }
}
 
Example 2
Source File: ReadAvroBuilder.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
protected boolean doProcess(Record inputRecord, InputStream in) throws IOException {
  Record template = inputRecord.copy();
  removeAttachments(template);
  template.put(Fields.ATTACHMENT_MIME_TYPE, ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE);
  Decoder decoder = prepare(in);
  try {
    while (true) {
      GenericContainer datum = datumReader.read(null, decoder);
      if (!extract(datum, template)) {
        return false;
      }
    }
  } catch (EOFException e) { 
    ; // ignore
  } finally {
    in.close();
  }
  return true;
}
 
Example 3
Source File: AvroMorphlineTest.java    From kite with Apache License 2.0 6 votes vote down vote up
private void runTweetContainer(String morphlineConfigFile, String[] fieldNames) throws Exception {
  File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433-medium.avro");
  morphline = createMorphline(morphlineConfigFile);    
  for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
    Record record = new Record();
    byte[] body = Files.toByteArray(file);    
    record.put(Fields.ATTACHMENT_BODY, body);
    collector.reset();
    startSession();
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
    assertEquals(2104, collector.getRecords().size());
    
    FileReader<GenericData.Record> reader = new DataFileReader(file, new GenericDatumReader());
    int i = 0;
    while (reader.hasNext()) {
      Record actual = collector.getRecords().get(i);
      GenericData.Record expected = reader.next();
      assertTweetEquals(expected, actual, fieldNames, i);
      i++;
    }    
    assertEquals(collector.getRecords().size(), i);
  }
}
 
Example 4
Source File: ExampleMorphlineTest.java    From kite-examples with Apache License 2.0 6 votes vote down vote up
@Test
public void testExtractJsonPathsFlattened() throws Exception {
  morphline = createMorphline("test-morphlines/extractJsonPathsFlattened");    
  File file = new File(RESOURCES_DIR + "/test-documents/arrays.json");
  InputStream in = new BufferedInputStream(new FileInputStream(file));
  Record record = new Record();
  record.put(Fields.ATTACHMENT_BODY, in);
  
  startSession();
  assertEquals(1, collector.getNumStartEvents());
  assertTrue(morphline.process(record));    
  
  assertEquals(1, collector.getRecords().size());
  List expected = Arrays.asList(1, 2, 3, 4, 5, 10, 20, 100, 200);
  assertEquals(1, collector.getRecords().size());
  assertEquals(expected, collector.getFirstRecord().get("/price"));
  assertEquals(expected, collector.getFirstRecord().get("/price/[]"));
  assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));

  in.close();
}
 
Example 5
Source File: ReadRCFileTest.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testRCFileRowWise() throws Exception {
  morphline = createMorphline("test-morphlines/rcFileMorphlineRow");
  String rcFileName = "testRCFileRowWise.rc";
  List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS,
      true);
  Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName));
  Record input = new Record();
  input.put(Fields.ATTACHMENT_NAME, inputFile.toString());
  input.put(Fields.ATTACHMENT_BODY, readPath(inputFile));
  startSession();
  assertEquals(1, collector.getNumStartEvents());
  assertTrue(morphline.process(input));
  assertTrue(
      areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS, true));
}
 
Example 6
Source File: MaxmindMorphlineTest.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testIPv6() throws Exception {
  morphline = createMorphline("test-morphlines/geoIP");

  Record record = new Record();
  String ip = "2001:620::1";
  record.put("ip", ip);
  Record expected = new Record();
  expected.put("ip", ip);
  expected.put("/country/iso_code", "CH");
  expected.put("/country/names/en", "Switzerland");
  expected.put("/country/names/zh-CN", "瑞士");
  expected.put("/location/latitude", 47.00016);
  expected.put("/location/longitude", 8.01427);
  expected.put("/location/latitude_longitude", "47.00016,8.01427");
  expected.put("/location/longitude_latitude", "8.01427,47.00016");

  processAndVerifySuccess(record, expected, false);
}
 
Example 7
Source File: TestMorphlineUtils.java    From envelope with Apache License 2.0 6 votes vote down vote up
@Test
public void convertToRowMissingColumnNotNullable(
    final @Mocked RowUtils utils
) throws Exception {

  Record record = new Record();
  record.put("foo", "one");

  StructType schema = DataTypes.createStructType(Lists.newArrayList(
      DataTypes.createStructField("field1", DataTypes.StringType, false))
  );

  try {
    MorphlineUtils.convertToRow(schema, record);
    fail("Did not throw a RuntimeException");
  } catch (Exception e) {
    assertThat(e.getMessage(), CoreMatchers.containsString("DataType cannot contain 'null'"));
  }

  new Verifications() {{
    RowUtils.toRowValue(any, (DataType) any); times = 0;
  }};
}
 
Example 8
Source File: ReadSequenceFileTest.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * return a mapping of expected keys -> records
 */
private HashMap<String, Record> createMyWritableSequenceFile(File file, int numRecords) throws IOException {
  HashMap<String, Record> map = new HashMap<String, Record>();
  SequenceFile.Metadata metadata = new SequenceFile.Metadata(getMetadataForSequenceFile());
  FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(file), null);
  SequenceFile.Writer writer = null;
  try {
    writer = SequenceFile.createWriter(new Configuration(), out, Text.class, ParseTextMyWritableBuilder.MyWritable.class,
      SequenceFile.CompressionType.NONE, null, metadata);
    for (int i = 0; i < numRecords; ++i) {
      Text key = new Text("key" + i);
      ParseTextMyWritableBuilder.MyWritable value = new ParseTextMyWritableBuilder.MyWritable("value", i);
      writer.append(key, value);
      Record record = new Record();
      record.put("key", key);
      record.put("value", value);
      map.put(key.toString(), record);
    }
  } finally {
    Closeables.closeQuietly(writer);
  }
  return map;
}
 
Example 9
Source File: ExampleMorphlineTest.java    From kite-examples with Apache License 2.0 6 votes vote down vote up
@Test
public void testGrokSyslogNgCisco() throws Exception {
  morphline = createMorphline("test-morphlines/grokSyslogNgCisco");
  Record record = new Record();
  String msg = "<179>Jun 10 04:42:51 www.foo.com Jun 10 2013 04:42:51 : %myproduct-3-mysubfacility-251010: " +
      "Health probe failed for server 1.2.3.4 on port 8083, connection refused by server";
  record.put(Fields.MESSAGE, msg);
  assertTrue(morphline.process(record));
  Record expected = new Record();
  expected.put(Fields.MESSAGE, msg);
  expected.put("cisco_message_code", "%myproduct-3-mysubfacility-251010");
  expected.put("cisco_product", "myproduct");
  expected.put("cisco_level", "3");
  expected.put("cisco_subfacility", "mysubfacility");
  expected.put("cisco_message_id", "251010");
  expected.put("syslog_message", "%myproduct-3-mysubfacility-251010: Health probe failed for server 1.2.3.4 " +
      "on port 8083, connection refused by server");
  assertEquals(expected, collector.getFirstRecord());
  assertNotSame(record, collector.getFirstRecord());      
}
 
Example 10
Source File: SplitBuilder.java    From sequenceiq-samples with Apache License 2.0 6 votes vote down vote up
@Override
protected boolean doProcess(Record record) {
    ListIterator iter = record.get(fieldName).listIterator();
    while (iter.hasNext()) {
        String[] segments = iter.next().toString().split(separator);
        iter.remove();
        for (int i = 0; i < segments.length; i++) {
            if (i < newFields.size()) {
                record.put(newFields.get(i), trimIfNeeded(segments[i]));
            } else {
                if (!dropUndeclaredField) {
                   record.put(String.valueOf(i), trimIfNeeded(segments[i]));
                }
            }
        }
    }
    return super.doProcess(record);
}
 
Example 11
Source File: SolrMorphlineTest.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testTokenizeText() throws Exception {
  morphline = createMorphline("test-morphlines" + File.separator + "tokenizeText");
  for (int i = 0; i < 3; i++) {
    Record record = new Record();
    record.put(Fields.MESSAGE, "Hello World!");
    record.put(Fields.MESSAGE, "\[email protected] #%()123");
    Record expected = record.copy();
    expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123"));
    collector.reset();
    startSession();
    Notifications.notifyBeginTransaction(morphline);
    assertTrue(morphline.process(record));
    assertEquals(1, collector.getNumStartEvents());
    Notifications.notifyCommitTransaction(morphline);
    assertEquals(expected, collector.getFirstRecord());
  }
}
 
Example 12
Source File: SaxonMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testXsltHelloWorldSequence() throws Exception {
  morphline = createMorphline("test-morphlines/xslt-helloworld-sequence");    
  InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/helloworld.xml"));
  Record record = new Record();
  record.put("id", "123");
  record.put(Fields.ATTACHMENT_BODY, in);
  processAndVerifySuccess(record, 
      ImmutableMultimap.of("id", "123", "attr", "foo", "HEAD", "title1", "BODY", "Hello, World!Paragraph1aParagraph1b"),
      ImmutableMultimap.of("id", "123", "HEAD", "title2", "BODY", "Hello, World!Paragraph2aParagraph2b")
      );    
  in.close();
}
 
Example 13
Source File: ExampleMorphlineTest.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Test
public void testMyLowerCase() throws Exception {
  morphline = createMorphline("test-morphlines/myToLowerCase");    
  Record record = new Record();
  record.put("message", "Hello");
  Record expected = new Record();
  expected.put("message", "olleh");
  processAndVerifySuccess(record, expected);
}
 
Example 14
Source File: AvroMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
/**
 * Test that schema caching in readAvroContainer works even if the Avro writer schema of each input
 * file is different (yet compatible). Test writer schema A before B and B before A.
 */
public void testReadAvroContainerWithMultipleSchemas() throws IOException {
  for (int reverse = 0; reverse < 2; reverse++) {
    morphline = createMorphline("test-morphlines/readAvroContainer");
    for (int run = 0; run < 10; run++) {
      collector.reset();
      int version = run % 2;
      version = (version + reverse) % 2; // reverse direction with reverse == 1: 0 -> 1  as well as 1 -> 0
      byte[] fileContents = Files.toByteArray(
          new File(RESOURCES_DIR + "/test-documents/avroContainerWithWriterschema" + version + ".avro"));
      Record inputRecord = new Record();
      inputRecord.put(Fields.ATTACHMENT_BODY, fileContents);
      assertTrue(morphline.process(inputRecord));

      int numRecords = 5;
      assertEquals(numRecords, collector.getRecords().size());
      
      String[] expectedUids = new String[] {"sdfsdf", "fhgfgh", "werwer", "345trgt", "dfgdg"};
      for (int i = 0; i < numRecords; i++) {
        Record record = collector.getRecords().get(i);
        GenericData.Record avroRecord = (GenericData.Record)record.getFirstValue(Fields.ATTACHMENT_BODY);
        assertEquals(expectedUids[i], avroRecord.get("sc_uid").toString());
      }
    }
  }
}
 
Example 15
Source File: SaxonMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testXQueryAtomicValues() throws Exception {
  morphline = createMorphline("test-morphlines/xquery-atomic-values");    
  InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433.xml"));
  Record record = new Record();
  record.put(Fields.ATTACHMENT_BODY, in);
  processAndVerifySuccess(record); 
  in.close();
}
 
Example 16
Source File: SaxonMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testXsltIdentityHelloWorld() throws Exception {
  morphline = createMorphline("test-morphlines/xslt-helloworld-identity");    
  InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/helloworld.xml"));
  Record record = new Record();
  record.put(Fields.ATTACHMENT_BODY, in);
  processAndVerifySuccess(record, 
      ImmutableMultimap.of("description", "An XSLT Morphline", "welcome", "Hello, World!", "id", "2")
      );    
  in.close();
}
 
Example 17
Source File: JsonMorphlineTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplexDocuments() throws Exception {
  morphline = createMorphline("test-morphlines/extractJsonPaths");    
  File file = new File(RESOURCES_DIR + "/test-documents/complex.json");
  InputStream in = new FileInputStream(file);
  Record record = new Record();
  record.put(Fields.ATTACHMENT_BODY, in);
  
  startSession();
  assertEquals(1, collector.getNumStartEvents());
  assertTrue(morphline.process(record));    
  
  assertEquals(1, collector.getRecords().size());
  JsonNode rootNode = (JsonNode) new ObjectMapper().reader(JsonNode.class).readValues(file).next();
  JsonNodeFactory factory = new JsonNodeFactory(false);

  assertEquals(Arrays.asList(10), collector.getFirstRecord().get("/docId"));
  assertEquals(Arrays.asList(rootNode.get("links")), collector.getFirstRecord().get("/links"));
  
  assertEquals(Arrays.asList(factory.arrayNode()), collector.getFirstRecord().get("/links/backward"));
  assertEquals(factory.arrayNode(), rootNode.get("links").get("backward"));
  
  List expected = Arrays.asList(factory.arrayNode().add(20).add(40).add(60).add(true).add(false).add(32767).add(2147483647).add(9223372036854775807L).add(1.23).add(1.7976931348623157E308));
  assertEquals(expected, collector.getFirstRecord().get("/links/forward"));
  assertEquals(expected, collector.getFirstRecord().get("/links/forward/[]"));
  assertEquals(expected, collector.getFirstRecord().get("/links/forward[]"));
  assertEquals(Arrays.asList(rootNode.get("name")), collector.getFirstRecord().get("/name"));
  assertEquals(Arrays.asList("en-us", "en", "en-gb"), collector.getFirstRecord().get("/name/[]/language/[]/code"));
  assertEquals(Arrays.asList("en-us", "en", "en-gb"), collector.getFirstRecord().get("/name[]/language[]/code"));
  assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name/[]/language/[]/country"));
  assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name[]/language[]/country"));
  assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
  assertEquals(Arrays.asList(true), collector.getFirstRecord().get("/links/bool"));
  assertEquals(Arrays.asList(32767), collector.getFirstRecord().get("/links/short"));
  assertEquals(Arrays.asList(2147483647), collector.getFirstRecord().get("/links/int"));
  assertEquals(Arrays.asList(9223372036854775807L), collector.getFirstRecord().get("/links/long"));
  assertEquals(Arrays.asList(1.7976931348623157E308), collector.getFirstRecord().get("/links/double"));    

  in.close();    
}
 
Example 18
Source File: ExtractURIComponentBuilder.java    From kite with Apache License 2.0 4 votes vote down vote up
private void addValue(Record record, Object value) {
  if (value != null) {
    record.put(outputFieldName, value);
  }
}
 
Example 19
Source File: ExtractURIComponentsBuilder.java    From kite with Apache License 2.0 4 votes vote down vote up
private void addValue(Record record, String name, Object value) {
  if (value != null) {
    record.put(concat(outputFieldPrefix, name), value);
  }
}
 
Example 20
Source File: ReadSequenceFileBuilder.java    From kite with Apache License 2.0 4 votes vote down vote up
@Override
protected boolean doProcess(Record inputRecord, final InputStream in) throws IOException {
  SequenceFile.Metadata sequenceFileMetaData = null;
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(conf, SequenceFile.Reader.stream(new FSDataInputStream(new ForwardOnlySeekable(in))));

    if (includeMetaData) {
      sequenceFileMetaData = reader.getMetadata();
    }
    Class keyClass = reader.getKeyClass();
    Class valueClass = reader.getValueClass();
    Record template = inputRecord.copy();
    removeAttachments(template);
    
    while (true) {
      Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
      Writable val = (Writable)ReflectionUtils.newInstance(valueClass, conf);
      try {
        if (!reader.next(key, val)) {
          break;
        }
      } catch (EOFException ex) {
        // SequenceFile.Reader will throw an EOFException after reading
        // all the data, if it doesn't know the length.  Since we are
        // passing in an InputStream, we hit this case;
        LOG.trace("Received expected EOFException", ex);
        break;
      }
      incrementNumRecords();
      Record outputRecord = template.copy();
      outputRecord.put(keyField, key);
      outputRecord.put(valueField, val);
      outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE);
      if (includeMetaData && sequenceFileMetaData != null) {
        outputRecord.put(SEQUENCE_FILE_META_DATA, sequenceFileMetaData);
      }
      
      // pass record to next command in chain:
      if (!getChild().process(outputRecord)) {
        return false;
      }
    }
  } finally {
    Closeables.closeQuietly(reader);
  }
  return true;
}