cascading.flow.FlowDef Java Exaples

Source File: CommonCrawlIndexTest.java From aws-big-data-blog with Apache License 2.0

6 votes

@Test
public void testCreateCommonCrawlFlowDef() throws Exception {
    Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);

    String sourcePath = properties.getProperty("inPath");
    String sinkPath = properties.getProperty("testCreateCommonCrawlFlowDefOutput");
    String sinkValidationPath = properties.getProperty("testCreateCommonCrawlFlowDefOutputValidation");

    // create the Cascading "source" (input) tap to read the commonCrawl WAT file(s)
    Tap source = new FileTap(new TextLine(new Fields("line")) ,sourcePath);

    // create the Cascading "sink" (output) tap to dump the results
    Tap sink = new FileTap(new TextLine(new Fields("line")) ,sinkPath);

    //Build the Cascading Flow Definition
    FlowDef flowDef = CommonCrawlIndex.createCommonCrawlFlowDef(source, sink);
    new LocalFlowConnector(properties).connect(flowDef).complete();

    Assert.sameContent(sinkPath, sinkValidationPath);
}

Source File: WordCount.java From cascading-flink with Apache License 2.0

5 votes

public static void main(String[] args) {

		if (args.length < 2) {
			throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
		}

		Fields token = new Fields( "token", String.class );
		Fields text = new Fields( "text" );
		RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
		// only returns "token"
		Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );

		Pipe wcPipe = new Pipe( "wc", docPipe );
		wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));

		Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
		Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);

		FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
				.addSource( docPipe, inTap )
				.addTailSink( wcPipe, outTap );

		FlowConnector flowConnector = new FlinkConnector();

		Flow wcFlow = flowConnector.connect( flowDef );

		wcFlow.complete();
	}

Source File: Main.java From aws-big-data-blog with Apache License 2.0

5 votes

public static void main(String args[]) {
    Properties properties = null;
    try {
        properties = new ConfigReader().renderProperties(Main.class);
        if (args[0] != null && args[0].length() > 0){
            properties.put("inPath", args[0]);
        }
    } catch (IOException e) {
        System.out.println("Could not read your config.properties file");e.printStackTrace();
    }

    FlowDef flowDef = buildFlowDef(properties);
    new HadoopFlowConnector(properties).connect(flowDef).complete();
}

Source File: CommonCrawlIndexTest.java From aws-big-data-blog with Apache License 2.0

5 votes

@Test
public void testMain() throws IOException {
    Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);
    FlowDef flowDef = CommonCrawlIndex.buildFlowDef(properties);

    if (properties.getProperty("platform").toString().compareTo("LOCAL")==0){
    //Using cascading Local connector to exclude Hadoop and just test the logic
        new LocalFlowConnector(properties).connect(flowDef).complete();
    }
    else {
            new HadoopFlowConnector(properties).connect(flowDef).complete();
    }
}

Source File: SortTest.java From plunger with Apache License 2.0

5 votes

@Test
public void testComplete() throws Exception {

  Bucket sink = new Bucket();

  Fields inFields = Fields.join(FIELD_S, FIELD_X, FIELD_Y);

  TupleListTap source = new DataBuilder(inFields)
      .addTuple("A", "a", "za")
      .addTuple("B", "b", "zb")
      .addTuple("AA", "aa", "zaa")
      .addTuple("BB", "bb", "zbb")
      .toTap();

  FlowDef flowDef = defineFlow(source, sink);

  new LocalFlowConnector().connect(flowDef).complete();

  List<TupleEntry> tupleEntries = sink.result().asTupleEntryList();

  assertThat(tupleEntries.get(0).getString(FIELD_S), is("A"));
  assertThat(tupleEntries.get(0).getString(FIELD_Y), is("za"));
  assertThat(tupleEntries.get(0).getString(FIELD_V), is("a"));
  assertThat(tupleEntries.get(1).getString(FIELD_S), is("AA"));
  assertThat(tupleEntries.get(1).getString(FIELD_Y), is("zaa"));
  assertThat(tupleEntries.get(1).getString(FIELD_V), is("aa"));
  assertThat(tupleEntries.get(2).getString(FIELD_S), is("B"));
  assertThat(tupleEntries.get(3).getString(FIELD_S), is("BB"));
  assertThat(tupleEntries.get(3).getString(FIELD_Y), is("zbb"));
  assertThat(tupleEntries.get(3).getString(FIELD_V), is("bb"));

}

Source File: FlinkConnector.java From cascading-flink with Apache License 2.0

4 votes

@Override
public Flow connect(FlowDef flowDef) {
	classPath.addAll(flowDef.getClassPath());
	return super.connect(flowDef);
}

Source File: FlinkPlanner.java From cascading-flink with Apache License 2.0

4 votes

@Override
protected FlinkFlow createFlow( FlowDef flowDef ) {
	return new FlinkFlow(getPlatformInfo(), flowDef, getDefaultProperties(), getDefaultConfig());
}

Source File: JoinFilterExampleCascading.java From hadoop-arch-book with Apache License 2.0

4 votes

public static void main(String[] args) {
  String fooInputPath = args[0];
  String barInputPath = args[1];
  String outputPath = args[2];
  int fooValMax = Integer.parseInt(args[3]);
  int joinValMax = Integer.parseInt(args[4]);
  int numberOfReducers = Integer.parseInt(args[5]);

  Properties properties = new Properties();
  AppProps.setApplicationJarClass(properties,
      JoinFilterExampleCascading.class);
  properties.setProperty("mapred.reduce.tasks", Integer.toString(numberOfReducers));
  properties.setProperty("mapreduce.job.reduces", Integer.toString(numberOfReducers));
  
  SpillableProps props = SpillableProps.spillableProps()
      .setCompressSpill( true )
      .setMapSpillThreshold( 50 * 1000 );
      

  
  HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);

  // create source and sink taps
  Fields fooFields = new Fields("fooId", "fooVal", "foobarId");
  Tap fooTap = new Hfs(new TextDelimited(fooFields, "|"), fooInputPath);
  Fields barFields = new Fields("barId", "barVal");
  Tap barTap = new Hfs(new TextDelimited(barFields, "|"), barInputPath);

  Tap outputTap = new Hfs(new TextDelimited(false, "|"), outputPath);

  Fields joinFooFields = new Fields("foobarId");
  Fields joinBarFields = new Fields("barId");

  Pipe fooPipe = new Pipe("fooPipe");
  Pipe barPipe = new Pipe("barPipe");

  Pipe fooFiltered = new Each(fooPipe, fooFields, new FooFilter(fooValMax));

  Pipe joinedPipe = new HashJoin(fooFiltered, joinFooFields, barPipe,
      joinBarFields);
  props.setProperties( joinedPipe.getConfigDef(), Mode.REPLACE );
  
  
  Fields joinFields = new Fields("fooId", "fooVal", "foobarId", "barVal");
  Pipe joinedFilteredPipe = new Each(joinedPipe, joinFields,
      new JoinedFilter(joinValMax));

  FlowDef flowDef = FlowDef.flowDef().setName("wc")
      .addSource(fooPipe, fooTap).addSource(barPipe, barTap)
      .addTailSink(joinedFilteredPipe, outputTap);

  Flow wcFlow = flowConnector.connect(flowDef);
  wcFlow.writeDOT("dot/wc.dot");
  wcFlow.complete();
}

Source File: PlungerFlow.java From plunger with Apache License 2.0

4 votes

/** Constructs a new plunger flow */
PlungerFlow() {
  flowDef = new FlowDef();
}

Source File: PlungerFlow.java From plunger with Apache License 2.0

4 votes

/** Gets the underlying FlowDef */
FlowDef getFlowDef() {
  return flowDef;
}

Source File: SortTest.java From plunger with Apache License 2.0

4 votes

private static FlowDef defineFlow(Tap in, Tap out) {
  Pipe pipe = new Pipe("pipe");
  pipe = new SortAssembly(pipe);
  return FlowDef.flowDef().addSource(pipe, in).addTailSink(pipe, out);
}

Source File: FlinkFlow.java From cascading-flink with Apache License 2.0

2 votes

public FlinkFlow(PlatformInfo platformInfo, FlowDef flowDef, Map<Object, Object> properties, Configuration defaultConfig) {

		super(platformInfo, properties, defaultConfig, flowDef);
	}

cascading.flow.FlowDef Java Examples