org.kitesdk.morphline.api.Command Java Examples
The following examples show how to use
org.kitesdk.morphline.api.Command.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FindReplaceBuilder.java From kite with Apache License 2.0 | 6 votes |
public FindReplace(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); GrokDictionaries dict = new GrokDictionaries(config, getConfigs()); String replacementStr = getConfigs().getString(config, "replacement"); String pattern = getConfigs().getString(config, "pattern"); if (getConfigs().getBoolean(config, "isRegex", false)) { Pattern regex = dict.compileExpression(pattern); this.matcher = regex.pattern().matcher(""); replacementStr = regex.replaceProperties(replacementStr); this.literalPattern = null; } else { this.matcher = null; this.literalPattern = pattern; } this.replacement = replacementStr; this.replaceFirst = getConfigs().getBoolean(config, "replaceFirst", false); validateArguments(); }
Example #2
Source File: TokenizeTextBuilder.java From kite with Apache License 2.0 | 6 votes |
public TokenizeText(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.inputFieldName = getConfigs().getString(config, "inputField"); this.outputFieldName = getConfigs().getString(config, "outputField"); String solrFieldType = getConfigs().getString(config, "solrFieldType"); Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); SolrLocator locator = new SolrLocator(solrLocatorConfig, context); LOG.debug("solrLocator: {}", locator); IndexSchema schema = locator.getIndexSchema(); FieldType fieldType = schema.getFieldTypeByName(solrFieldType); if (fieldType == null) { throw new MorphlineCompilationException("Missing Solr field type in schema.xml for name: " + solrFieldType, config); } this.analyzer = fieldType.getIndexAnalyzer(); Preconditions.checkNotNull(analyzer); // register CharTermAttribute for later (implicit) reuse this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class); Preconditions.checkNotNull(token); validateArguments(); }
Example #3
Source File: GenerateUUIDBuilder.java From kite with Apache License 2.0 | 6 votes |
public GenerateUUID(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.fieldName = getConfigs().getString(config, FIELD_NAME, Fields.ID); this.preserveExisting = getConfigs().getBoolean(config, PRESERVE_EXISTING_NAME, true); this.prefix = getConfigs().getString(config, PREFIX_NAME, ""); Type type = new Validator<Type>().validateEnum( config, getConfigs().getString(config, "type", Type.secure.toString()), Type.class); if (type == Type.secure) { prng = null; // secure & slow } else { Random rand = new SecureRandom(); int[] seed = new int[624]; for (int i = 0; i < seed.length; i++) { seed[i] = rand.nextInt(); } prng = new Well19937c(seed); // non-secure & fast } validateArguments(); }
Example #4
Source File: AbstractCommand.java From kite with Apache License 2.0 | 6 votes |
/** Deprecated; will be removed in the next release */ @Deprecated protected AbstractCommand(Config config, Command parent, Command child, MorphlineContext context) { Preconditions.checkNotNull(config); Preconditions.checkNotNull(parent); Preconditions.checkNotNull(child); Preconditions.checkNotNull(context); this.config = config; this.parent = parent; this.child = child; this.context = context; this.name = "morphline." + getShortClassName(getClass()); this.configs = new Configs(); this.numProcessCallsMeter = getMeter(Metrics.NUM_PROCESS_CALLS); this.numNotifyCallsMeter = getMeter(Metrics.NUM_NOTIFY_CALLS); }
Example #5
Source File: DownloadHdfsFileBuilder.java From kite with Apache License 2.0 | 6 votes |
public DownloadHdfsFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) throws IOException { super(builder, config, parent, child, context); List<String> uris = getConfigs().getStringList(config, "inputFiles", Collections.<String>emptyList()); File dstRootDir = new File(getConfigs().getString(config, "outputDir", ".")); Configuration conf = new Configuration(); String defaultFileSystemUri = getConfigs().getString(config, "fs", null); if (defaultFileSystemUri != null) { FileSystem.setDefaultUri(conf, defaultFileSystemUri); // see Hadoop's GenericOptionsParser } for (String value : getConfigs().getStringList(config, "conf", Collections.<String>emptyList())) { conf.addResource(new Path(value)); // see Hadoop's GenericOptionsParser } validateArguments(); download(uris, conf, dstRootDir); }
Example #6
Source File: ReadLineBuilder.java From kite with Apache License 2.0 | 5 votes |
public ReadLine(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.charset = getConfigs().getCharset(config, "charset", null); this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false); String cprefix = getConfigs().getString(config, "commentPrefix", ""); if (cprefix.length() > 1) { throw new MorphlineCompilationException("commentPrefix must be at most one character long: " + cprefix, config); } this.commentPrefix = (cprefix.length() > 0 ? cprefix : null); validateArguments(); }
Example #7
Source File: ExtractJsonPathsBuilder.java From kite with Apache License 2.0 | 5 votes |
public ExtractJsonPaths(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); ListMultimap<String, String> stepMultiMap = ArrayListMultimap.create(); this.flatten = getConfigs().getBoolean(config, "flatten", true); Config paths = getConfigs().getConfig(config, "paths"); for (Map.Entry<String, Object> entry : new Configs().getEntrySet(paths)) { String fieldName = entry.getKey(); String path = entry.getValue().toString().trim(); if (path.contains("//")) { throw new MorphlineCompilationException("No support for descendant axis available yet", config); } if (path.startsWith("/")) { path = path.substring(1); } if (path.endsWith("/")) { path = path.substring(0, path.length() - 1); } path = path.trim(); for (String step : path.split("/")) { step = step.trim(); if (step.length() > ARRAY_TOKEN.length() && step.endsWith(ARRAY_TOKEN)) { step = step.substring(0, step.length() - ARRAY_TOKEN.length()); stepMultiMap.put(fieldName, normalize(step)); stepMultiMap.put(fieldName, ARRAY_TOKEN); } else { stepMultiMap.put(fieldName, normalize(step)); } } } this.stepMap = stepMultiMap.asMap(); LOG.debug("stepMap: {}", stepMap); validateArguments(); }
Example #8
Source File: JavaBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override public Command build(Config config, Command parent, Command child, MorphlineContext context) { try { return new Java(this, config, parent, child, context); } catch (ScriptException e) { throw new MorphlineCompilationException("Cannot compile script", config, e); } }
Example #9
Source File: MorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
/** * * @param morphlineFile * @param morphlineId * @param collector * @param isProduction * @return */ public static Pipeline setPipeline(String morphlineFile, String morphlineId, Collector collector, boolean isProduction) { LOG.debug("Constructing Pipeline[{}#{}]", morphlineFile, morphlineId); // Set up the Morphline context and handler MorphlineContext context = new MorphlineContext.Builder() .setExceptionHandler(new FaultTolerance(isProduction, false)) .build(); // Compile the Morphline process Command morphline; try { morphline = new Compiler().compile( new File(morphlineFile), morphlineId, context, collector); } catch (Exception e) { throw new MorphlineCompilationException("Morphline compilation error", null, e); } // Create the pipeline wrapper Pipeline pipeline = new Pipeline(morphline, collector); // Ensure shutdown notification to Morphline commands esp in streaming environments JVMUtils.closeAtShutdown(pipeline); // Prep the pipeline Notifications.notifyBeginTransaction(pipeline.getMorphline()); // Register the pipeline into the cache if (null == pipelineCache.get()) { pipelineCache.set(new HashMap<String, Pipeline>()); } pipelineCache.get().put(morphlineFile + SEPARATOR + morphlineId, pipeline); LOG.trace("Pipeline[{}#{}] prepared", morphlineFile, morphlineId); return pipeline; }
Example #10
Source File: ToUpperCaseBuilder.java From sequenceiq-samples with Apache License 2.0 | 5 votes |
public ToUpperCase(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.fieldName = getConfigs().getString(config, "field"); this.locale = getConfigs().getLocale(config, "locale", Locale.ROOT); LOG.debug("fieldName: {}", fieldName); validateArguments(); }
Example #11
Source File: TestMorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test (expected = MorphlineRuntimeException.class) public void executePipelineProcessError( final @Mocked MorphlineUtils.Pipeline pipeline, final @Mocked Command morphline ) throws Exception { final Record inputRecord = new Record(); new Expectations() {{ morphline.process(inputRecord); result = false; }}; MorphlineUtils.executePipeline(pipeline, inputRecord); }
Example #12
Source File: ToLowerCaseBuilder.java From sequenceiq-samples with Apache License 2.0 | 5 votes |
public ToLowerCase(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.fieldName = getConfigs().getString(config, "field"); this.locale = getConfigs().getLocale(config, "locale", Locale.ROOT); LOG.debug("fieldName: {}", fieldName); validateArguments(); }
Example #13
Source File: OpenHdfsFileTest.java From kite with Apache License 2.0 | 5 votes |
private Command createMorphline(String file) { return new Compiler().compile( new File(RESOURCES_DIR + "/" + file + ".conf"), null, createMorphlineContext(), collector); }
Example #14
Source File: UnpackBuilder.java From kite with Apache License 2.0 | 5 votes |
public Unpack(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); if (!config.hasPath(SUPPORTED_MIME_TYPES)) { // for (MediaType mediaType : new PackageParser().getSupportedTypes(new ParseContext())) { for (MediaType mediaType : SUPPORTED_TYPES) { addSupportedMimeType(mediaType.toString()); } addSupportedMimeType(GTAR); // apparently not already included in PackageParser.getSupportedTypes() } validateArguments(); }
Example #15
Source File: EmbeddedExtractor.java From kite with Apache License 2.0 | 5 votes |
public boolean parseEmbedded(InputStream stream, Record record, String name, Command child) { // Use the delegate parser to parse this entry TemporaryResources tmp = new TemporaryResources(); try { final TikaInputStream newStream = TikaInputStream.get(new CloseShieldInputStream(stream), tmp); if (stream instanceof TikaInputStream) { final Object container = ((TikaInputStream) stream).getOpenContainer(); if (container != null) { newStream.setOpenContainer(container); } } record = record.copy(); record.replaceValues(Fields.ATTACHMENT_BODY, newStream); record.removeAll(Fields.ATTACHMENT_MIME_TYPE); record.removeAll(Fields.ATTACHMENT_CHARSET); record.removeAll(Fields.ATTACHMENT_NAME); if (name != null && name.length() > 0) { record.put(Fields.ATTACHMENT_NAME, name); } return child.process(record); // } catch (RuntimeException e) { // // // THIS IS THE DIFF WRT ParsingEmbeddedDocumentExtractor // throw new MorphlineRuntimeException(e); // // // TODO: can we log a warning somehow? // // Could not parse the entry, just skip the content } finally { Closeables.closeQuietly(tmp); } }
Example #16
Source File: ReadRCFileBuilder.java From kite with Apache License 2.0 | 5 votes |
public ReadRCFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.includeMetaData = getConfigs().getBoolean(config, "includeMetaData", false); this.readMode = new Validator<RCFileReadMode>() .validateEnum( config, getConfigs().getString(config, "readMode", RCFileReadMode.row.name()), RCFileReadMode.class); for (Config columnConfig : getConfigs().getConfigList(config, "columns")) { columns.add(new RCFileColumn(columnConfig, conf)); } validateArguments(); }
Example #17
Source File: DropRecordBuilder.java From kite with Apache License 2.0 | 5 votes |
@Override public Command build(Config config, Command parent, Command child, MorphlineContext context) { if (config == null) { return new DevNull(parent); } else { return new DropRecord(this, config, parent, child, context); } }
Example #18
Source File: HashDigestBuilder.java From kite with Apache License 2.0 | 5 votes |
public HashDigest(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.inputFieldName = getConfigs().getString(config, INPUT_FIELD); this.outputFieldName = getConfigs().getString(config, OUTPUT_FIELD); this.hashType = getConfigs().getString(config, HASH_TYPE); this.preserveExisting = getConfigs().getBoolean(config, PRESERVE_EXISTING_NAME, PRESERVE_EXISTING_DEFAULT); this.charset = getConfigs().getCharset(config, CHARSET_FIELD, Charsets.UTF_8); try { this.digest = MessageDigest.getInstance(hashType); } catch (NoSuchAlgorithmException e) { throw new MorphlineCompilationException("Unable to initialise digest", config, e); } validateArguments(); if (LOG.isTraceEnabled()) { LOG.trace("inputField: {}", inputFieldName); LOG.trace("outputField: {}", outputFieldName); LOG.trace("hashType: {}", hashType); LOG.trace("preserveExisting: {}", preserveExisting ); } }
Example #19
Source File: ReadSequenceFileBuilder.java From kite with Apache License 2.0 | 5 votes |
public ReadSequenceFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.includeMetaData = getConfigs().getBoolean(config, "includeMetaData", false); this.keyField = getConfigs().getString(config, CONFIG_KEY_FIELD, Fields.ATTACHMENT_NAME); this.valueField = getConfigs().getString(config, CONFIG_VALUE_FIELD, Fields.ATTACHMENT_BODY); validateArguments(); }
Example #20
Source File: AbstractCommand.java From kite with Apache License 2.0 | 5 votes |
/** * Factory method to create the chain of commands rooted at the given rootConfig. The last command * in the chain will feed records into finalChild. * * @param ignoreNotifications * if true indicates don't forward notifications at the end of the chain of commands. * This is a feature that multi-branch commands like tryRules and ifThenElse need to * avoid sending a notification multiple times to finalChild, once from each branch. */ protected List<Command> buildCommandChain(Config rootConfig, String configKey, Command finalChild, boolean ignoreNotifications) { Preconditions.checkNotNull(rootConfig); Preconditions.checkNotNull(configKey); Preconditions.checkNotNull(finalChild); List<? extends Config> commandConfigs = new Configs().getConfigList(rootConfig, configKey, Collections.<Config>emptyList()); List<Command> commands = Lists.newArrayList(); Command currentParent = this; Connector lastConnector = null; for (int i = 0; i < commandConfigs.size(); i++) { boolean isLast = (i == commandConfigs.size() - 1); Connector connector = new Connector(ignoreNotifications && isLast); if (isLast) { connector.setChild(finalChild); } Config cmdConfig = commandConfigs.get(i); Command cmd = buildCommand(cmdConfig, currentParent, connector); commands.add(cmd); if (i > 0) { lastConnector.setChild(cmd); } connector.setParent(cmd); currentParent = connector; lastConnector = connector; } return commands; }
Example #21
Source File: LogCommand.java From kite with Apache License 2.0 | 5 votes |
public LogCommand(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.format = getConfigs().getString(config, "format"); List<String> argList = getConfigs().getStringList(config, "args", Collections.<String>emptyList()); this.expressions = new FieldExpression[argList.size()]; for (int i = 0; i < argList.size(); i++) { this.expressions[i] = new FieldExpression(argList.get(i), getConfig()); } validateArguments(); }
Example #22
Source File: ReadMultiLineBuilder.java From kite with Apache License 2.0 | 5 votes |
public ReadMultiLine(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.regex = Pattern.compile(getConfigs().getString(config, "regex")).matcher(""); this.negate = getConfigs().getBoolean(config, "negate", false); this.charset = getConfigs().getCharset(config, "charset", null); this.what = new Validator<What>().validateEnum( config, getConfigs().getString(config, "what", What.previous.toString()), What.class); validateArguments(); }
Example #23
Source File: ReadCSVBuilder.java From kite with Apache License 2.0 | 5 votes |
public ReadCSV(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); String separator = getConfigs().getString(config, "separator", ","); if (separator.length() != 1) { throw new MorphlineCompilationException("CSV separator must be one character only: " + separator, config); } this.separatorChar = separator.charAt(0); this.columnNames = getConfigs().getStringList(config, "columns"); this.charset = getConfigs().getCharset(config, "charset", null); this.ignoreFirstLine = getConfigs().getBoolean(config, "ignoreFirstLine", false); this.trim = getConfigs().getBoolean(config, "trim", true); this.addEmptyStrings = getConfigs().getBoolean(config, "addEmptyStrings", true); this.quoteChar = getConfigs().getString(config, "quoteChar", ""); if (quoteChar.length() > 1) { throw new MorphlineCompilationException( "Quote character must not have a length of more than one character: " + quoteChar, config); } if (quoteChar.equals(String.valueOf(separatorChar))) { throw new MorphlineCompilationException( "Quote character must not be the same as separator: " + quoteChar, config); } this.commentPrefix = getConfigs().getString(config, "commentPrefix", ""); if (commentPrefix.length() > 1) { throw new MorphlineCompilationException( "Comment prefix must not have a length of more than one character: " + commentPrefix, config); } this.maxCharactersPerRecord = getConfigs().getInt(config, "maxCharactersPerRecord", 1000 * 1000); this.ignoreTooLongRecords = new Validator<OnMaxCharactersPerRecord>().validateEnum( config, getConfigs().getString(config, "onMaxCharactersPerRecord", OnMaxCharactersPerRecord.throwException.toString()), OnMaxCharactersPerRecord.class) == OnMaxCharactersPerRecord.ignoreRecord; this.tokenizer = quoteChar.length() == 0 ? new SimpleCSVTokenizer(separatorChar, trim, addEmptyStrings, columnNames) : new QuotedCSVTokenizer(separatorChar, trim, addEmptyStrings, columnNames, maxCharactersPerRecord, ignoreTooLongRecords, quoteChar.charAt(0)); validateArguments(); }
Example #24
Source File: OpenHdfsFileBuilder.java From kite with Apache License 2.0 | 5 votes |
public OpenHdfsFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.conf = new Configuration(); String defaultFileSystemUri = getConfigs().getString(config, "fs", null); if (defaultFileSystemUri != null) { FileSystem.setDefaultUri(conf, defaultFileSystemUri); // see Hadoop's GenericOptionsParser } for (String value : getConfigs().getStringList(config, "conf", Collections.<String>emptyList())) { conf.addResource(new Path(value)); // see Hadoop's GenericOptionsParser } validateArguments(); }
Example #25
Source File: DetectMimeTypesTest.java From kite with Apache License 2.0 | 5 votes |
private String detect(Record event, boolean includeMetaData, boolean excludeParameters) throws IOException { List key = Arrays.asList(includeMetaData, excludeParameters); Command cachedMorphline = morphlineCache.get(key); if (cachedMorphline == null) { // avoid recompiling time and again (performance) Config override = ConfigFactory.parseString("INCLUDE_META_DATA : " + includeMetaData + "\nEXCLUDE_PARAMETERS : " + excludeParameters); cachedMorphline = createMorphline("test-morphlines/detectMimeTypesWithDefaultMimeTypesAndFile", override); morphlineCache.put(key, cachedMorphline); } collector.reset(); assertTrue(cachedMorphline.process(event)); String mimeType = (String) collector.getFirstRecord().getFirstValue(Fields.ATTACHMENT_MIME_TYPE); return mimeType; }
Example #26
Source File: AbstractParser.java From kite with Apache License 2.0 | 5 votes |
/** Deprecated; will be removed in the next release */ @Deprecated protected AbstractParser(Config config, Command parent, Command child, MorphlineContext context) { super(config, parent, child, context); List<String> mimeTypes = getConfigs().getStringList(config, SUPPORTED_MIME_TYPES, Collections.<String>emptyList()); for (String mimeType : mimeTypes) { addSupportedMimeType(mimeType); } this.numRecordsMeter = getMeter(Metrics.NUM_RECORDS); }
Example #27
Source File: SaxonCommand.java From kite with Apache License 2.0 | 5 votes |
public SaxonCommand(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); this.isTracing = getConfigs().getBoolean(config, "isTracing", false); boolean isLicensedSaxonEdition = getConfigs().getBoolean(config, "isLicensedSaxonEdition", false); this.processor = new Processor(isLicensedSaxonEdition); this.documentBuilder = processor.newDocumentBuilder(); Config features = getConfigs().getConfig(config, "features", ConfigFactory.empty()); for (Map.Entry<String, Object> entry : new Configs().getEntrySet(features)) { processor.setConfigurationProperty(entry.getKey(), entry.getValue()); } for (String clazz : getConfigs().getStringList(config, "extensionFunctions", Collections.<String>emptyList())) { Object function; try { function = Class.forName(clazz).newInstance(); } catch (Exception e) { throw new MorphlineCompilationException("Cannot instantiate extension function: " + clazz, config); } if (function instanceof ExtensionFunction) { processor.registerExtensionFunction((ExtensionFunction) function); // } // else if (function instanceof ExtensionFunctionDefinition) { // processor.registerExtensionFunction((ExtensionFunctionDefinition) function); } else { throw new MorphlineCompilationException("Extension function has wrong class: " + clazz, config); } } }
Example #28
Source File: MorphlineUtils.java From envelope with Apache License 2.0 | 5 votes |
public static List<Record> executePipeline(Pipeline pipeline, Record inputRecord, boolean errorOnEmpty) { Command morphline = pipeline.getMorphline(); try { LOG.trace("Input Record: {}", inputRecord); // Process the Record Notifications.notifyStartSession(morphline); boolean success = morphline.process(inputRecord); Notifications.notifyCommitTransaction(morphline); if (!success) { throw new MorphlineRuntimeException("Morphline failed to process incoming Record: " + inputRecord); } // Collect the output List<Record> outputRecords = pipeline.getCollector().getRecords(); if (errorOnEmpty && !outputRecords.iterator().hasNext()) { throw new MorphlineRuntimeException("Morphline did not produce output Record(s)"); } LOG.trace("Output Record(s): {}", outputRecords); return outputRecords; } catch (RuntimeException e) { Notifications.notifyRollbackTransaction(morphline); // TODO : Review exception handling LOG.warn("Morphline failed to execute properly on incoming Record: " + inputRecord, e); throw e; } }
Example #29
Source File: LogErrorBuilder.java From kite with Apache License 2.0 | 4 votes |
public LogError(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); }
Example #30
Source File: LoadSolrBuilder.java From kite with Apache License 2.0 | 4 votes |
@Override public Command build(Config config, Command parent, Command child, MorphlineContext context) { return new LoadSolr(this, config, parent, child, context); }