cascading.flow.FlowDef Java Examples
The following examples show how to use
cascading.flow.FlowDef.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CommonCrawlIndexTest.java From aws-big-data-blog with Apache License 2.0 | 6 votes |
@Test public void testCreateCommonCrawlFlowDef() throws Exception { Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class); String sourcePath = properties.getProperty("inPath"); String sinkPath = properties.getProperty("testCreateCommonCrawlFlowDefOutput"); String sinkValidationPath = properties.getProperty("testCreateCommonCrawlFlowDefOutputValidation"); // create the Cascading "source" (input) tap to read the commonCrawl WAT file(s) Tap source = new FileTap(new TextLine(new Fields("line")) ,sourcePath); // create the Cascading "sink" (output) tap to dump the results Tap sink = new FileTap(new TextLine(new Fields("line")) ,sinkPath); //Build the Cascading Flow Definition FlowDef flowDef = CommonCrawlIndex.createCommonCrawlFlowDef(source, sink); new LocalFlowConnector(properties).connect(flowDef).complete(); Assert.sameContent(sinkPath, sinkValidationPath); }
Example #2
Source File: WordCount.java From cascading-flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) { if (args.length < 2) { throw new IllegalArgumentException("Please specify input and ouput paths as arguments."); } Fields token = new Fields( "token", String.class ); Fields text = new Fields( "text" ); RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" ); // only returns "token" Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS ); Pipe wcPipe = new Pipe( "wc", docPipe ); wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count"))); Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]); Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE); FlowDef flowDef = FlowDef.flowDef().setName( "wc" ) .addSource( docPipe, inTap ) .addTailSink( wcPipe, outTap ); FlowConnector flowConnector = new FlinkConnector(); Flow wcFlow = flowConnector.connect( flowDef ); wcFlow.complete(); }
Example #3
Source File: Main.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
public static void main(String args[]) { Properties properties = null; try { properties = new ConfigReader().renderProperties(Main.class); if (args[0] != null && args[0].length() > 0){ properties.put("inPath", args[0]); } } catch (IOException e) { System.out.println("Could not read your config.properties file");e.printStackTrace(); } FlowDef flowDef = buildFlowDef(properties); new HadoopFlowConnector(properties).connect(flowDef).complete(); }
Example #4
Source File: CommonCrawlIndexTest.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
@Test public void testMain() throws IOException { Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class); FlowDef flowDef = CommonCrawlIndex.buildFlowDef(properties); if (properties.getProperty("platform").toString().compareTo("LOCAL")==0){ //Using cascading Local connector to exclude Hadoop and just test the logic new LocalFlowConnector(properties).connect(flowDef).complete(); } else { new HadoopFlowConnector(properties).connect(flowDef).complete(); } }
Example #5
Source File: SortTest.java From plunger with Apache License 2.0 | 5 votes |
@Test public void testComplete() throws Exception { Bucket sink = new Bucket(); Fields inFields = Fields.join(FIELD_S, FIELD_X, FIELD_Y); TupleListTap source = new DataBuilder(inFields) .addTuple("A", "a", "za") .addTuple("B", "b", "zb") .addTuple("AA", "aa", "zaa") .addTuple("BB", "bb", "zbb") .toTap(); FlowDef flowDef = defineFlow(source, sink); new LocalFlowConnector().connect(flowDef).complete(); List<TupleEntry> tupleEntries = sink.result().asTupleEntryList(); assertThat(tupleEntries.get(0).getString(FIELD_S), is("A")); assertThat(tupleEntries.get(0).getString(FIELD_Y), is("za")); assertThat(tupleEntries.get(0).getString(FIELD_V), is("a")); assertThat(tupleEntries.get(1).getString(FIELD_S), is("AA")); assertThat(tupleEntries.get(1).getString(FIELD_Y), is("zaa")); assertThat(tupleEntries.get(1).getString(FIELD_V), is("aa")); assertThat(tupleEntries.get(2).getString(FIELD_S), is("B")); assertThat(tupleEntries.get(3).getString(FIELD_S), is("BB")); assertThat(tupleEntries.get(3).getString(FIELD_Y), is("zbb")); assertThat(tupleEntries.get(3).getString(FIELD_V), is("bb")); }
Example #6
Source File: FlinkConnector.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override public Flow connect(FlowDef flowDef) { classPath.addAll(flowDef.getClassPath()); return super.connect(flowDef); }
Example #7
Source File: FlinkPlanner.java From cascading-flink with Apache License 2.0 | 4 votes |
@Override protected FlinkFlow createFlow( FlowDef flowDef ) { return new FlinkFlow(getPlatformInfo(), flowDef, getDefaultProperties(), getDefaultConfig()); }
Example #8
Source File: JoinFilterExampleCascading.java From hadoop-arch-book with Apache License 2.0 | 4 votes |
public static void main(String[] args) { String fooInputPath = args[0]; String barInputPath = args[1]; String outputPath = args[2]; int fooValMax = Integer.parseInt(args[3]); int joinValMax = Integer.parseInt(args[4]); int numberOfReducers = Integer.parseInt(args[5]); Properties properties = new Properties(); AppProps.setApplicationJarClass(properties, JoinFilterExampleCascading.class); properties.setProperty("mapred.reduce.tasks", Integer.toString(numberOfReducers)); properties.setProperty("mapreduce.job.reduces", Integer.toString(numberOfReducers)); SpillableProps props = SpillableProps.spillableProps() .setCompressSpill( true ) .setMapSpillThreshold( 50 * 1000 ); HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties); // create source and sink taps Fields fooFields = new Fields("fooId", "fooVal", "foobarId"); Tap fooTap = new Hfs(new TextDelimited(fooFields, "|"), fooInputPath); Fields barFields = new Fields("barId", "barVal"); Tap barTap = new Hfs(new TextDelimited(barFields, "|"), barInputPath); Tap outputTap = new Hfs(new TextDelimited(false, "|"), outputPath); Fields joinFooFields = new Fields("foobarId"); Fields joinBarFields = new Fields("barId"); Pipe fooPipe = new Pipe("fooPipe"); Pipe barPipe = new Pipe("barPipe"); Pipe fooFiltered = new Each(fooPipe, fooFields, new FooFilter(fooValMax)); Pipe joinedPipe = new HashJoin(fooFiltered, joinFooFields, barPipe, joinBarFields); props.setProperties( joinedPipe.getConfigDef(), Mode.REPLACE ); Fields joinFields = new Fields("fooId", "fooVal", "foobarId", "barVal"); Pipe joinedFilteredPipe = new Each(joinedPipe, joinFields, new JoinedFilter(joinValMax)); FlowDef flowDef = FlowDef.flowDef().setName("wc") .addSource(fooPipe, fooTap).addSource(barPipe, barTap) .addTailSink(joinedFilteredPipe, outputTap); Flow wcFlow = flowConnector.connect(flowDef); wcFlow.writeDOT("dot/wc.dot"); wcFlow.complete(); }
Example #9
Source File: PlungerFlow.java From plunger with Apache License 2.0 | 4 votes |
/** Constructs a new plunger flow */ PlungerFlow() { flowDef = new FlowDef(); }
Example #10
Source File: PlungerFlow.java From plunger with Apache License 2.0 | 4 votes |
/** Gets the underlying FlowDef */ FlowDef getFlowDef() { return flowDef; }
Example #11
Source File: SortTest.java From plunger with Apache License 2.0 | 4 votes |
private static FlowDef defineFlow(Tap in, Tap out) { Pipe pipe = new Pipe("pipe"); pipe = new SortAssembly(pipe); return FlowDef.flowDef().addSource(pipe, in).addTailSink(pipe, out); }
Example #12
Source File: FlinkFlow.java From cascading-flink with Apache License 2.0 | 2 votes |
public FlinkFlow(PlatformInfo platformInfo, FlowDef flowDef, Map<Object, Object> properties, Configuration defaultConfig) { super(platformInfo, properties, defaultConfig, flowDef); }