org.kitesdk.morphline.api.Record Java Examples
The following examples show how to use
org.kitesdk.morphline.api.Record.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonMorphlineTest.java From kite with Apache License 2.0 | 6 votes |
@Test public void testReadJsonWithMap() throws Exception { morphline = createMorphline("test-morphlines/readJsonWithMap"); for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/stream.json")); Record record = new Record(); record.put(Fields.ATTACHMENT_BODY, in); collector.reset(); startSession(); assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(record)); Iterator<Record> iter = collector.getRecords().iterator(); assertTrue(iter.hasNext()); Map node = (Map) iter.next().getFirstValue(Fields.ATTACHMENT_BODY); assertEquals(ImmutableMap.of("firstObject", "foo"), node); assertTrue(iter.hasNext()); node = (Map) iter.next().getFirstValue(Fields.ATTACHMENT_BODY); assertEquals(ImmutableMap.of("secondObject", "bar"), node); assertFalse(iter.hasNext()); in.close(); } }
Example #2
Source File: AbstractParser.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record record) { if (!hasAtLeastOneAttachment(record)) { return false; } // TODO: make field for stream configurable String streamMediaType = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE); if (!isMimeTypeSupported(streamMediaType, record)) { return false; } InputStream stream = getAttachmentInputStream(record); try { return doProcess(record, stream); } catch (IOException e) { throw new MorphlineRuntimeException(e); } finally { Closeables.closeQuietly(stream); } }
Example #3
Source File: AvroMorphlineTest.java From kite with Apache License 2.0 | 6 votes |
@Test public void testExtractAvroPathsArrayInUnion() throws Exception { List<String> items = Arrays.asList("a", "b", "c"); ArrayInUnionTestRecord avroRecord = new ArrayInUnionTestRecord(items, items); morphline = createMorphline("test-morphlines/extractAvroPathsArrayInUnion"); deleteAllDocuments(); Record record = new Record(); record.put(Fields.ATTACHMENT_BODY, avroRecord); startSession(); assertTrue(morphline.process(record)); assertEquals(1, collector.getRecords().size()); assertEquals(Arrays.asList(items), collector.getFirstRecord().get("/items[]")); assertEquals(Arrays.asList(items), collector.getFirstRecord().get("/itemsInUnion[]")); }
Example #4
Source File: TokenizeTextBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record record) { try { List outputValues = record.get(outputFieldName); for (Object value : record.get(inputFieldName)) { reader.setValue(value.toString()); TokenStream tokenStream = analyzer.tokenStream("content", reader); tokenStream.reset(); while (tokenStream.incrementToken()) { if (token.length() > 0) { // incrementToken() updates the token! String tokenStr = new String(token.buffer(), 0, token.length()); outputValues.add(tokenStr); } } tokenStream.end(); tokenStream.close(); } } catch (IOException e) { throw new MorphlineRuntimeException(e); } // pass record to next command in chain: return super.doProcess(record); }
Example #5
Source File: ReadAvroBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord, InputStream in) throws IOException { Record template = inputRecord.copy(); removeAttachments(template); template.put(Fields.ATTACHMENT_MIME_TYPE, ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE); Decoder decoder = prepare(in); try { while (true) { GenericContainer datum = datumReader.read(null, decoder); if (!extract(datum, template)) { return false; } } } catch (EOFException e) { ; // ignore } finally { in.close(); } return true; }
Example #6
Source File: ReadSequenceFileTest.java From kite with Apache License 2.0 | 6 votes |
/** * Test that Solr queries on a parsed SequenceFile document * return the expected content and fields. Don't pass * in our own parser via the context. */ @Test public void testSequenceFileContentSimple() throws Exception { morphline = createMorphline("test-morphlines/sequenceFileMorphlineSimple"); String path = RESOURCES_DIR; File sequenceFile = new File(path, "testSequenceFileContentSimple.seq"); int numRecords = 5; HashMap<String, Record> expected = createTextSequenceFile(sequenceFile, numRecords); InputStream in = new FileInputStream(sequenceFile.getAbsolutePath()); Record record = new Record(); record.put(Fields.ATTACHMENT_BODY, in); startSession(); assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(record)); assertTrue(areFieldsEqual(expected, collector.getRecords())); }
Example #7
Source File: ExtractProtobufPathsBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord) { Object datum = inputRecord.getFirstValue(Fields.ATTACHMENT_BODY); Preconditions.checkNotNull(datum); Record outputRecord = inputRecord.copy(); for (Map.Entry<String, Collection<String>> entry : stepMap.entrySet()) { String fieldName = entry.getKey(); List<String> steps = (List<String>) entry.getValue(); try { extractPath(datum, fieldName, steps, outputRecord, 0); } catch (Exception e) { LOG.error(e.getMessage(), e); return false; } } // pass record to next command in chain: return getChild().process(outputRecord); }
Example #8
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 6 votes |
@Test public void convertToRowValidValue( final @Mocked RowUtils utils ) throws Exception { Record record = new Record(); record.put("field1", "one"); StructType schema = DataTypes.createStructType(Lists.newArrayList( DataTypes.createStructField("field1", DataTypes.StringType, false)) ); new Expectations() {{ RowUtils.toRowValue("one", DataTypes.StringType); result = "success"; }}; assertEquals("Invalid conversion", "success", MorphlineUtils.convertToRow(schema, record).get(0)); }
Example #9
Source File: TranslateBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") protected boolean doProcess(Record record) { ListIterator iter = record.get(fieldName).listIterator(); while (iter.hasNext()) { String key = iter.next().toString(); Object value = dictionary.get(key); if (value != null) { iter.set(value); } else if (fallback != null) { iter.set(fallback); } else { LOG.debug("No match found for key: {}", key); return false; } } // pass record to next command in chain: return super.doProcess(record); }
Example #10
Source File: JsonMorphlineTest.java From kite with Apache License 2.0 | 6 votes |
@Test public void testExtractJsonPaths() throws Exception { morphline = createMorphline("test-morphlines/extractJsonPaths"); File file = new File(RESOURCES_DIR + "/test-documents/arrays.json"); InputStream in = new FileInputStream(file); Record record = new Record(); record.put(Fields.ATTACHMENT_BODY, in); startSession(); assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(record)); assertEquals(1, collector.getRecords().size()); JsonNode rootNode = (JsonNode) new ObjectMapper().reader(JsonNode.class).readValues(file).next(); assertTrue(rootNode.get("price").isArray()); List<JsonNode> expected = Arrays.asList(rootNode.get("price")); assertEquals(1, collector.getRecords().size()); assertEquals(expected, collector.getFirstRecord().get("/price")); assertEquals(expected, collector.getFirstRecord().get("/price/[]")); assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField")); in.close(); }
Example #11
Source File: ReadRCFileTest.java From kite with Apache License 2.0 | 6 votes |
@Test public void testRCFileWithNull() throws Exception { morphline = createMorphline("test-morphlines/rcFileMorphlineRow"); String rcFileName = "testRCFileRowWise.rc"; List<Record> expected = setupRCFile(rcFileName, NUM_RECORDS, NUM_COLUMNS, true, true); Path inputFile = dfs.makeQualified(new Path(testDirectory, rcFileName)); Record input = new Record(); input.put(Fields.ATTACHMENT_NAME, inputFile.toString()); input.put(Fields.ATTACHMENT_BODY, readPath(inputFile)); startSession(); assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(input)); assertTrue( areFieldsEqual(expected, collector.getRecords(), NUM_COLUMNS, true)); }
Example #12
Source File: AbstractParser.java From kite with Apache License 2.0 | 6 votes |
private boolean isMimeTypeSupported(String mediaTypeStr, Record record) { if (supportedMimeTypes == null) { return true; } if (!hasAtLeastOneMimeType(record)) { return false; } MediaType mediaType = parseMimeType(mediaTypeStr); if (supportedMimeTypes.contains(mediaType)) { return true; // fast path } // wildcard matching for (MediaType rangePattern : supportedMimeTypes) { if (isMimeTypeMatch(mediaType, rangePattern)) { return true; } } if (LOG.isDebugEnabled()) { LOG.debug("No supported MIME type found for " + Fields.ATTACHMENT_MIME_TYPE + "=" + mediaTypeStr); } return false; }
Example #13
Source File: ExampleMorphlineTest.java From kite-examples with Apache License 2.0 | 6 votes |
@Test public void testGrokEmail() throws Exception { morphline = createMorphline("test-morphlines/grokEmail"); Record record = new Record(); byte[] bytes = Files.toByteArray(new File(RESOURCES_DIR + "/test-documents/email.txt")); record.put(Fields.ATTACHMENT_BODY, bytes); assertTrue(morphline.process(record)); Record expected = new Record(); String msg = new String(bytes, "UTF-8"); //.replaceAll("(\r)?\n", "\n"); expected.put(Fields.MESSAGE, msg); expected.put("message_id", "12345.6789.JavaMail.foo@bar"); expected.put("date", "Wed, 6 Feb 2012 06:06:05 -0800"); expected.put("from", "[email protected]"); expected.put("to", "[email protected]"); expected.put("subject", "WEDNESDAY WEATHER HEADLINES"); expected.put("from_names", "Foo Bar <[email protected]>@xxx"); expected.put("to_names", "'Weather News Distribution' <[email protected]>"); expected.put("text", "Average 1 to 3- degrees above normal: Mid-Atlantic, Southern Plains.." + "\nAverage 4 to 6-degrees above normal: Ohio Valley, Rockies, Central Plains"); assertEquals(expected, collector.getFirstRecord()); assertNotSame(record, collector.getFirstRecord()); }
Example #14
Source File: ExtractProtobufPathsBuilder.java From kite with Apache License 2.0 | 6 votes |
private void resolve(Object datum, Record record, String fieldName) throws NoSuchMethodException, SecurityException, IllegalAccessException, IllegalArgumentException, InvocationTargetException { if (datum == null) { return; } Class<?> clazz = datum.getClass(); if (isCommonType(clazz)) { record.put(fieldName, datum); } else if (List.class.isAssignableFrom(clazz)) { for (Object o : (List<?>) datum) { resolve(o, record, fieldName); } } else { Object extracted = extractValue(datum, clazz); record.put(fieldName, extracted); } }
Example #15
Source File: SanitizeUnknownSolrFieldsBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record record) { Collection<Map.Entry> entries = new ArrayList<Map.Entry>(record.getFields().asMap().entrySet()); for (Map.Entry<String, Collection<Object>> entry : entries) { String key = entry.getKey(); if (schema.getFieldOrNull(key) == null && !LoadSolrBuilder.LOAD_SOLR_DELETE_BY_ID.equals(key) && !LoadSolrBuilder.LOAD_SOLR_DELETE_BY_QUERY.equals(key) && !LoadSolrBuilder.LOAD_SOLR_CHILD_DOCUMENTS.equals(key)) { LOG.debug("Sanitizing unknown Solr field: {}", key); Collection values = entry.getValue(); if (renameToPrefix != null) { record.getFields().putAll(renameToPrefix + key, values); } values.clear(); // implicitly removes key from record } } // pass record to next command in chain: return super.doProcess(record); }
Example #16
Source File: SplitBuilder.java From sequenceiq-samples with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record record) { ListIterator iter = record.get(fieldName).listIterator(); while (iter.hasNext()) { String[] segments = iter.next().toString().split(separator); iter.remove(); for (int i = 0; i < segments.length; i++) { if (i < newFields.size()) { record.put(newFields.get(i), trimIfNeeded(segments[i])); } else { if (!dropUndeclaredField) { record.put(String.valueOf(i), trimIfNeeded(segments[i])); } } } } return super.doProcess(record); }
Example #17
Source File: StartReportingMetricsToJMXBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected void doNotify(Record notification) { for (Object event : Notifications.getLifecycleEvents(notification)) { if (event == Notifications.LifecycleEvent.SHUTDOWN) { synchronized (REGISTRIES) { Map<String, JmxReporter> reporters = REGISTRIES.get(getContext().getMetricRegistry()); if (reporters != null) { JmxReporter reporter = reporters.remove(domain); if (reporter != null) { reporter.stop(); } } } } } super.doNotify(notification); }
Example #18
Source File: UnpackBuilder.java From kite with Apache License 2.0 | 6 votes |
private boolean parseEntry(ArchiveInputStream archive, ArchiveEntry entry, EmbeddedExtractor extractor, Record record) { String name = entry.getName(); if (archive.canReadEntryData(entry)) { Record entrydata = new Record(); // TODO: or pass myself? //Record entrydata = record.copy(); // For detectors to work, we need a mark/reset supporting // InputStream, which ArchiveInputStream isn't, so wrap TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(archive, tmp); return extractor.parseEmbedded(tis, entrydata, name, getChild()); } finally { try { tmp.dispose(); } catch (TikaException e) { LOG.warn("Cannot dispose of tmp Tika resources", e); } } } else { return false; } }
Example #19
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 6 votes |
@Test public void convertToRowMissingColumnNullable( final @Mocked RowUtils utils ) throws Exception { Record record = new Record(); record.put("foo", "one"); StructType schema = DataTypes.createStructType(Lists.newArrayList( DataTypes.createStructField("field1", DataTypes.StringType, true)) ); MorphlineUtils.convertToRow(schema, record); new Verifications() {{ RowUtils.toRowValue(any, (DataType) any); times = 0; }}; }
Example #20
Source File: AvroMorphlineTest.java From kite with Apache License 2.0 | 5 votes |
@Test /** * Test that schema caching in readAvroContainer works even if the Avro writer schema of each input * file is different (yet compatible). Test writer schema A before B and B before A. */ public void testReadAvroContainerWithMultipleSchemas() throws IOException { for (int reverse = 0; reverse < 2; reverse++) { morphline = createMorphline("test-morphlines/readAvroContainer"); for (int run = 0; run < 10; run++) { collector.reset(); int version = run % 2; version = (version + reverse) % 2; // reverse direction with reverse == 1: 0 -> 1 as well as 1 -> 0 byte[] fileContents = Files.toByteArray( new File(RESOURCES_DIR + "/test-documents/avroContainerWithWriterschema" + version + ".avro")); Record inputRecord = new Record(); inputRecord.put(Fields.ATTACHMENT_BODY, fileContents); assertTrue(morphline.process(inputRecord)); int numRecords = 5; assertEquals(numRecords, collector.getRecords().size()); String[] expectedUids = new String[] {"sdfsdf", "fhgfgh", "werwer", "345trgt", "dfgdg"}; for (int i = 0; i < numRecords; i++) { Record record = collector.getRecords().get(i); GenericData.Record avroRecord = (GenericData.Record)record.getFirstValue(Fields.ATTACHMENT_BODY); assertEquals(expectedUids[i], avroRecord.get("sc_uid").toString()); } } } }
Example #21
Source File: SaxonMorphlineTest.java From kite with Apache License 2.0 | 5 votes |
@Test public void testXQueryAtomFeeds() throws Exception { morphline = createMorphline("test-morphlines/xquery-atom-feeds"); InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/atom.xml")); Record record = new Record(); record.put(Fields.ATTACHMENT_BODY, in); processAndVerifySuccess(record, ImmutableMultimap .of("id", "tag:blogger.com,1999:blog-10832468.post-112136653221060965", "summary", "A Great Place To Pick Up Cast Iron Pan Or Circulon Cookware On The Web You don't have to wait to get the cast iron pan that is right for you. Everything you need to know about cast iron pan is online. All this came to me as I was looking out the window. You decide what cast iron pan is right for you. It is so easy and fast! Cast Iron Pan : Cast Iron Pan", "title", "Cast Iron Pan", "generator", "Blogger"), ImmutableMultimap .of("id", "tag:blogger.com,1999:blog-10832468.post-112135176551133849", "summary", "A Great Place To Shop For Soapstone Cookware Or Roll Pan Cheap The best part about it is, it's so easy. You will always have your soapstone cookware. Go over to Google and type in soapstone cookware in the search form. soapstone cookware popped right out in front of me. Just try a single search for soapstone cookware. Soapstone Cookware : Soapstone Cookware", "title", "Soapstone Cookware", "generator", "Blogger"), ImmutableMultimap .of("id", "tag:blogger.com,1999:blog-10832468.post-112133988275976426", "summary", "The Best Place To Obtain Air Core Cookware Set Or Cookware Stores On The Internet There is no better way to get air core cookware set faster. Everything you need to know about air core cookware set is online. The internet is the place to find it. This is not just local info, you literally have access to worldwide solutions for air core cookware set. The online forms to get my air core", "title", "Air Core Cookware Set", "generator", "Blogger")); in.close(); }
Example #22
Source File: SimpleCSVTokenizer.java From kite with Apache License 2.0 | 5 votes |
private void put(String line, int start, int i, int j, Record record) { if (j >= columnNames.size()) { columnNames.add("column" + j); } String columnName = columnNames.get(j); if (columnName.length() != 0) { // empty column name indicates omit this field on output String col = line.substring(start, i); col = trim ? col.trim() : col; if (col.length() > 0 || addEmptyStrings) { record.put(columnName, col); } } }
Example #23
Source File: RemoveFieldsBuilder.java From kite with Apache License 2.0 | 5 votes |
private void doProcessSlow(Record record) { Iterator<String> iter = record.getFields().asMap().keySet().iterator(); while (iter.hasNext()) { if (nameMatcher.matches(iter.next())) { iter.remove(); } } }
Example #24
Source File: DetectMimeTypesTest.java From kite with Apache License 2.0 | 5 votes |
private String detect(Record event, boolean includeMetaData, boolean excludeParameters) throws IOException { List key = Arrays.asList(includeMetaData, excludeParameters); Command cachedMorphline = morphlineCache.get(key); if (cachedMorphline == null) { // avoid recompiling time and again (performance) Config override = ConfigFactory.parseString("INCLUDE_META_DATA : " + includeMetaData + "\nEXCLUDE_PARAMETERS : " + excludeParameters); cachedMorphline = createMorphline("test-morphlines/detectMimeTypesWithDefaultMimeTypesAndFile", override); morphlineCache.put(key, cachedMorphline); } collector.reset(); assertTrue(cachedMorphline.process(event)); String mimeType = (String) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_MIME_TYPE); return mimeType; }
Example #25
Source File: ExampleMorphlineTest.java From kite-examples with Apache License 2.0 | 5 votes |
@Test public void testMyLowerCase() throws Exception { morphline = createMorphline("test-morphlines/myToLowerCase"); Record record = new Record(); record.put("message", "Hello"); Record expected = new Record(); expected.put("message", "olleh"); processAndVerifySuccess(record, expected); }
Example #26
Source File: SolrCellBuilder.java From kite with Apache License 2.0 | 5 votes |
private boolean hasAtLeastOneMimeType(Record record) { if (!record.getFields().containsKey(Fields.ATTACHMENT_MIME_TYPE)) { LOG.debug("Command failed because of missing MIME type for record: {}", record); return false; } return true; }
Example #27
Source File: SeparateAttachmentsBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override protected boolean doProcess(Record record) { List attachments = record.get(Fields.ATTACHMENT_BODY); List mimeTypes = record.get(Fields.ATTACHMENT_MIME_TYPE); List charsets = record.get(Fields.ATTACHMENT_CHARSET); List names = record.get(Fields.ATTACHMENT_NAME); for (int i = 0; i < attachments.size(); i++) { Record outputRecord = record.copy(); outputRecord.getFields().replaceValues(Fields.ATTACHMENT_BODY, Collections.singletonList(attachments.get(i))); List<Object> replacement; replacement = i < mimeTypes.size() ? Collections.singletonList(mimeTypes.get(i)) : Collections.emptyList(); outputRecord.getFields().replaceValues(Fields.ATTACHMENT_MIME_TYPE, replacement); replacement = i < charsets.size() ? Collections.singletonList(charsets.get(i)) : Collections.emptyList(); outputRecord.getFields().replaceValues(Fields.ATTACHMENT_CHARSET, replacement); replacement = i < names.size() ? Collections.singletonList(names.get(i)) : Collections.emptyList(); outputRecord.getFields().replaceValues(Fields.ATTACHMENT_NAME, replacement); // pass record to next command in chain: if (!super.doProcess(outputRecord)) { return false; } } return true; }
Example #28
Source File: ExampleMorphlineTest.java From kite-examples with Apache License 2.0 | 5 votes |
private void processAndVerifySuccess(Record input, Record expected, boolean isSame) { collector.reset(); startSession(); assertEquals(1, collector.getNumStartEvents()); assertTrue(morphline.process(input)); assertEquals(expected, collector.getFirstRecord()); if (isSame) { assertSame(input, collector.getFirstRecord()); } else { assertNotSame(input, collector.getFirstRecord()); } }
Example #29
Source File: SaxonMorphlineTest.java From kite with Apache License 2.0 | 5 votes |
@Test public void testXQueryJoin() throws Exception { File table = new File("target/test-table.xml"); generateTestTable(table, 3); morphline = createMorphline("test-morphlines/xquery-join"); InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/helloworld.xml")); Record record = new Record(); record.put("id", "123"); record.put(Fields.ATTACHMENT_BODY, in); processAndVerifySuccess(record, ImmutableMultimap.of("id", "123", "outputId", "2", "outputText", "Hello, World!") ); in.close(); }
Example #30
Source File: ExtractJsonPathsBuilder.java From kite with Apache License 2.0 | 5 votes |
private void resolve(JsonNode datum, Record record, String fieldName) { if (datum == null) { return; } if (flatten) { flatten(datum, record.get(fieldName)); return; } if (datum.isObject()) { record.put(fieldName, datum); } else if (datum.isArray()) { record.put(fieldName, datum); } else if (datum.isTextual()) { record.put(fieldName, datum.asText()); } else if (datum.isBoolean()) { record.put(fieldName, datum.asBoolean()); } else if (datum.isInt()) { record.put(fieldName, datum.asInt()); } else if (datum.isLong()) { record.put(fieldName, datum.asLong()); } else if (datum.isShort()) { record.put(fieldName, datum.shortValue()); } else if (datum.isDouble()) { record.put(fieldName, datum.asDouble()); } else if (datum.isFloat()) { record.put(fieldName, datum.floatValue()); } else if (datum.isBigInteger()) { record.put(fieldName, datum.bigIntegerValue()); } else if (datum.isBigDecimal()) { record.put(fieldName, datum.decimalValue()); } else if (datum.isNull()) { ; // ignore } else { record.put(fieldName, datum.toString()); } }