Java Code Examples for org.kitesdk.morphline.api.Record#copy()
The following examples show how to use
org.kitesdk.morphline.api.Record#copy() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReadClobBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException { if (counter++ % 8192 == 0) { clob = new StringBuilder(); // periodically gc memory from large outlier strings } incrementNumRecords(); Charset detectedCharset = detectCharset(inputRecord, charset); Reader reader = new InputStreamReader(stream, detectedCharset); clob.setLength(0); int len; while ((len = reader.read(buffer)) >= 0) { clob.append(buffer, 0, len); } Record outputRecord = inputRecord.copy(); removeAttachments(outputRecord); outputRecord.replaceValues(outputFieldName, clob.toString()); // pass record to next command in chain: return getChild().process(outputRecord); }
Example 2
Source File: ReadAvroBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord, InputStream in) throws IOException { Record template = inputRecord.copy(); removeAttachments(template); template.put(Fields.ATTACHMENT_MIME_TYPE, ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE); Decoder decoder = prepare(in); try { while (true) { GenericContainer datum = datumReader.read(null, decoder); if (!extract(datum, template)) { return false; } } } catch (EOFException e) { ; // ignore } finally { in.close(); } return true; }
Example 3
Source File: XSLTBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess2(Record inputRecord, InputStream stream) throws SaxonApiException, XMLStreamException { incrementNumRecords(); for (Fragment fragment : fragments) { Record outputRecord = inputRecord.copy(); removeAttachments(outputRecord); XdmNode document = parseXmlDocument(stream); LOG.trace("XSLT input document: {}", document); XsltTransformer evaluator = fragment.transformer; evaluator.setInitialContextNode(document); XMLStreamWriter morphlineWriter = new MorphlineXMLStreamWriter(getChild(), outputRecord); evaluator.setDestination(new XMLStreamWriterDestination(morphlineWriter)); evaluator.transform(); // run the query and push into child via RecordXMLStreamWriter } return true; }
Example 4
Source File: ReadProtobufBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord, InputStream in) throws IOException { Object protoObjectInstance; try { protoObjectInstance = parseMethod.invoke(null, in); } catch (Exception e) { throw new IOException("Instance creation of '" + outputClass.getName() + "' from inputStream failed. " + e.getMessage(), e); } incrementNumRecords(); LOG.trace("protoObject: {}", protoObjectInstance); Record outputRecord = inputRecord.copy(); removeAttachments(outputRecord); outputRecord.put(Fields.ATTACHMENT_BODY, protoObjectInstance); outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, MIME_TYPE); // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } return true; }
Example 5
Source File: ExtractAvroPathsBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord) { // Preconditions.checkState(ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE.equals(inputRecord.getFirstValue(Fields.ATTACHMENT_MIME_TYPE))); GenericContainer datum = (GenericContainer) inputRecord.getFirstValue(Fields.ATTACHMENT_BODY); Preconditions.checkNotNull(datum); Preconditions.checkNotNull(datum.getSchema()); Record outputRecord = inputRecord.copy(); for (Map.Entry<String, Collection<String>> entry : stepMap.entrySet()) { String fieldName = entry.getKey(); List<String> steps = (List<String>) entry.getValue(); extractPath(datum, datum.getSchema(), fieldName, steps, outputRecord, 0); } // pass record to next command in chain: return getChild().process(outputRecord); }
Example 6
Source File: ConvertHTMLBuilder.java From kite with Apache License 2.0 | 6 votes |
private boolean doProcess2(Record inputRecord, InputStream stream) throws IOException, SAXException { ByteArrayOutputStream out = new ByteArrayOutputStream(16 * 1024); XMLWriter xmlWriter = new XMLWriter(new BufferedWriter(new OutputStreamWriter(out, Charsets.UTF_8))); xmlWriter.setOutputProperty(XMLWriter.ENCODING, "UTF-8"); if (omitXMLDeclaration) { xmlWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, "yes"); } xmlReader.setContentHandler(xmlWriter); Charset detectedCharset = detectCharset(inputRecord, charset); InputSource source = new InputSource(new BufferedReader(new InputStreamReader(stream, detectedCharset))); xmlReader.parse(source); // push the HTML through tagsoup into the output byte array Record outputRecord = inputRecord.copy(); removeAttachments(outputRecord); outputRecord.replaceValues(Fields.ATTACHMENT_BODY, out.toByteArray()); incrementNumRecords(); // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } return true; }
Example 7
Source File: SolrMorphlineTest.java From kite with Apache License 2.0 | 6 votes |
@Test public void testTokenizeText() throws Exception { morphline = createMorphline("test-morphlines" + File.separator + "tokenizeText"); for (int i = 0; i < 3; i++) { Record record = new Record(); record.put(Fields.MESSAGE, "Hello World!"); record.put(Fields.MESSAGE, "\[email protected] #%()123"); Record expected = record.copy(); expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123")); collector.reset(); startSession(); Notifications.notifyBeginTransaction(morphline); assertTrue(morphline.process(record)); assertEquals(1, collector.getNumStartEvents()); Notifications.notifyCommitTransaction(morphline); assertEquals(expected, collector.getFirstRecord()); } }
Example 8
Source File: ToAvroMapBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord) { Record outputRecord = inputRecord.copy(); AbstractParser.removeAttachments(outputRecord); Map<String, Collection<Object>> map = inputRecord.getFields().asMap(); map = new HashMap<String, Collection<Object>>(map); // make it mutable Field field = schema.getFields().get(0); Object avroResult = AvroConversions.toAvro(map, field.schema()); if (avroResult == AvroConversions.ERROR) { LOG.debug("Cannot convert record: {} to schema: {}", inputRecord, schema); return false; } IndexedRecord avroRecord = new GenericData.Record(schema); avroRecord.put(field.pos(), avroResult); outputRecord.put(Fields.ATTACHMENT_BODY, avroRecord); // pass record to next command in chain: return super.doProcess(outputRecord); }
Example 9
Source File: ReadCSVBuilder.java From kite with Apache License 2.0 | 6 votes |
@Override protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException { Record template = inputRecord.copy(); removeAttachments(template); Charset detectedCharset = detectCharset(inputRecord, charset); BufferedReader reader = new BufferedReader( new InputStreamReader(stream, detectedCharset), getBufferSize(stream)); if (ignoreFirstLine) { reader.readLine(); } while (true) { Record outputRecord = readNext(reader, template); if (outputRecord == null) { break; } incrementNumRecords(); // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } return true; }
Example 10
Source File: XQueryBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override protected boolean doProcess2(Record inputRecord, InputStream stream) throws SaxonApiException, XMLStreamException { incrementNumRecords(); for (Fragment fragment : fragments) { Record template = inputRecord.copy(); removeAttachments(template); XdmNode document = parseXmlDocument(stream); LOG.trace("XQuery input document: {}", document); XQueryEvaluator evaluator = fragment.xQueryEvaluator; evaluator.setContextItem(document); int i = 0; for (XdmItem item : evaluator) { i++; if (LOG.isTraceEnabled()) { LOG.trace("XQuery result sequence item #{} is of class: {} with value: {}", new Object[] { i, item.getUnderlyingValue().getClass().getName(), item }); } if (item.isAtomicValue()) { LOG.debug("Ignoring atomic value in result sequence: {}", item); continue; } XdmNode node = (XdmNode) item; Record outputRecord = template.copy(); boolean isNonEmpty = addRecordValues(node, Axis.SELF, XdmNodeKind.ATTRIBUTE, outputRecord); isNonEmpty = addRecordValues(node, Axis.ATTRIBUTE, XdmNodeKind.ATTRIBUTE, outputRecord) || isNonEmpty; isNonEmpty = addRecordValues(node, Axis.CHILD, XdmNodeKind.ELEMENT, outputRecord) || isNonEmpty; if (isNonEmpty) { // pass record to next command in chain if (!getChild().process(outputRecord)) { return false; } } } } return true; }
Example 11
Source File: WriteAvroToByteArrayBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override protected boolean doProcess(Record inputRecord) { Record outputRecord = inputRecord.copy(); AbstractParser.removeAttachments(outputRecord); ByteArrayOutputStream bout = new ByteArrayOutputStream(1024); if (format == Format.container) { writeContainer(inputRecord, bout); } else { writeContainerless(inputRecord, bout); } outputRecord.put(Fields.ATTACHMENT_BODY, bout.toByteArray()); // pass record to next command in chain: return super.doProcess(outputRecord); }
Example 12
Source File: ReadRCFileBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override protected boolean doProcess(Record record, InputStream in) throws IOException { Path attachmentPath = getAttachmentPath(record); SingleStreamFileSystem fs = new SingleStreamFileSystem(in, attachmentPath); RCFile.Reader reader = null; try { reader = new RCFile.Reader(fs, attachmentPath, conf); Record template = record.copy(); removeAttachments(template); template.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE); if (includeMetaData) { SequenceFile.Metadata metadata = reader.getMetadata(); if (metadata != null) { template.put(RC_FILE_META_DATA, metadata); } } switch (readMode) { case row: return readRowWise(reader, template); case column: return readColumnWise(reader, template); default : throw new IllegalStateException(); } } catch (IOException e) { throw new MorphlineRuntimeException("IOException while processing attachment " + attachmentPath.getName(), e); } finally { if (reader != null) { reader.close(); } } }
Example 13
Source File: ReadRCFileBuilder.java From kite with Apache License 2.0 | 5 votes |
private boolean readRowWise(final RCFile.Reader reader, final Record record) throws IOException { LongWritable rowID = new LongWritable(); while (true) { boolean next; try { next = reader.next(rowID); } catch (EOFException ex) { // We have hit EOF of the stream break; } if (!next) { break; } incrementNumRecords(); Record outputRecord = record.copy(); BytesRefArrayWritable rowBatchBytes = new BytesRefArrayWritable(); rowBatchBytes.resetValid(columns.size()); reader.getCurrentRow(rowBatchBytes); // Read all the columns configured and set it in the output record for (RCFileColumn rcColumn : columns) { BytesRefWritable columnBytes = rowBatchBytes.get(rcColumn.getInputField()); outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, columnBytes)); } // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } return true; }
Example 14
Source File: ReadAvroParquetFileBuilder.java From kite with Apache License 2.0 | 5 votes |
private boolean extract(GenericContainer datum, Record inputRecord) { incrementNumRecords(); Record outputRecord = inputRecord.copy(); outputRecord.put(Fields.ATTACHMENT_BODY, datum); // pass record to next command in chain: return getChild().process(outputRecord); }
Example 15
Source File: AvroMorphlineTest.java From kite with Apache License 2.0 | 5 votes |
private void processAndVerifyUnion(Object input, Object expected, List<Schema> types) { Schema documentSchema = Schema.createRecord("Doc", "adoc", null, false); Schema unionSchema = Schema.createUnion(types); documentSchema.setFields(Arrays.asList(new Field("price", unionSchema, null, null))); GenericData.Record document1 = new GenericData.Record(documentSchema); document1.put("price", expected); Record jdoc1 = new Record(); jdoc1.put("_dataset_descriptor_schema", documentSchema); jdoc1.put("price", input); Record expect1 = jdoc1.copy(); expect1.put(Fields.ATTACHMENT_BODY, document1); processAndVerifySuccess(jdoc1, expect1, false); }
Example 16
Source File: ExtractAvroTreeBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override protected boolean doProcess(Record inputRecord) { // Preconditions.checkState(ReadAvroBuilder.AVRO_MEMORY_MIME_TYPE.equals(inputRecord.getFirstValue(Fields.ATTACHMENT_MIME_TYPE))); GenericContainer datum = (GenericContainer) inputRecord.getFirstValue(Fields.ATTACHMENT_BODY); Preconditions.checkNotNull(datum); Preconditions.checkNotNull(datum.getSchema()); Record outputRecord = inputRecord.copy(); extractTree(datum, datum.getSchema(), outputRecord, outputFieldPrefix); // pass record to next command in chain: return getChild().process(outputRecord); }
Example 17
Source File: MaxmindMorphlineTest.java From kite with Apache License 2.0 | 5 votes |
private void processAndVerifySuccess2(final Record input, final boolean isSuccess, boolean isSame, final Integer countryGeoNameId, final Integer cityGeoNameId) { final Record inputCopy = input.copy(); collector.reset(); startSession(); assertEquals(1, collector.getNumStartEvents()); assertEquals(isSuccess, morphline.process(inputCopy)); if (!isSuccess) { assertEquals(0, collector.getRecords().size()); } else { assertEquals(1, collector.getRecords().size()); final Record actual = collector.getFirstRecord(); final Object body = actual.getFirstValue(Fields.ATTACHMENT_BODY); actual.removeAll(Fields.ATTACHMENT_BODY); assertEquals(input, actual); assertTrue(body instanceof JsonNode); final JsonNode jsonNode = (JsonNode)body; assertNotNull(jsonNode); final JsonNode countryNode = jsonNode.get("country"); if (countryGeoNameId != null) { assertNotNull(countryNode); assertEquals(countryGeoNameId.intValue(), countryNode.get("geoname_id").asInt()); } else { assertNull(countryNode); } final JsonNode cityNode = jsonNode.get("city"); if (cityGeoNameId != null) { assertNotNull(cityNode); assertEquals(cityGeoNameId.intValue(), cityNode.get("geoname_id").asInt()); } else { assertNull(cityNode); } } }
Example 18
Source File: ReadLineBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override protected boolean doProcess(Record inputRecord, InputStream stream) throws IOException { Record template = inputRecord.copy(); removeAttachments(template); template.removeAll(Fields.MESSAGE); Charset detectedCharset = detectCharset(inputRecord, charset); Reader reader = new InputStreamReader(stream, detectedCharset); BufferedReader lineReader = new BufferedReader(reader, getBufferSize(stream)); boolean isFirst = true; String line; while ((line = lineReader.readLine()) != null) { if (isFirst && ignoreFirstLine) { isFirst = false; continue; // ignore first line } if (line.length() == 0) { continue; // ignore empty lines } if (commentPrefix != null && line.startsWith(commentPrefix)) { continue; // ignore comments } Record outputRecord = template.copy(); outputRecord.put(Fields.MESSAGE, line); incrementNumRecords(); // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } return true; }
Example 19
Source File: ReadSequenceFileBuilder.java From kite with Apache License 2.0 | 4 votes |
@Override protected boolean doProcess(Record inputRecord, final InputStream in) throws IOException { SequenceFile.Metadata sequenceFileMetaData = null; SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(conf, SequenceFile.Reader.stream(new FSDataInputStream(new ForwardOnlySeekable(in)))); if (includeMetaData) { sequenceFileMetaData = reader.getMetadata(); } Class keyClass = reader.getKeyClass(); Class valueClass = reader.getValueClass(); Record template = inputRecord.copy(); removeAttachments(template); while (true) { Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf); Writable val = (Writable)ReflectionUtils.newInstance(valueClass, conf); try { if (!reader.next(key, val)) { break; } } catch (EOFException ex) { // SequenceFile.Reader will throw an EOFException after reading // all the data, if it doesn't know the length. Since we are // passing in an InputStream, we hit this case; LOG.trace("Received expected EOFException", ex); break; } incrementNumRecords(); Record outputRecord = template.copy(); outputRecord.put(keyField, key); outputRecord.put(valueField, val); outputRecord.put(Fields.ATTACHMENT_MIME_TYPE, OUTPUT_MEDIA_TYPE); if (includeMetaData && sequenceFileMetaData != null) { outputRecord.put(SEQUENCE_FILE_META_DATA, sequenceFileMetaData); } // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } } finally { Closeables.closeQuietly(reader); } return true; }
Example 20
Source File: XSLTBuilder.java From kite with Apache License 2.0 | 4 votes |
public MorphlineXMLStreamWriter(Command child, Record template) { this.child = child; this.template = template; this.record = template.copy(); }