org.apache.hadoop.examples.terasort.TeraGen Java Examples

The following examples show how to use org.apache.hadoop.examples.terasort.TeraGen. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExampleDriver.java    From hadoop with Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("wordmean", WordMean.class,
                 "A map/reduce program that counts the average length of the words in the input files.");
    pgd.addClass("wordmedian", WordMedian.class,
                 "A map/reduce program that counts the median length of the words in the input files.");
    pgd.addClass("wordstandarddeviation", WordStandardDeviation.class,
                 "A map/reduce program that counts the standard deviation of the length of the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");

    pgd.addClass("pi", QuasiMonteCarlo.class, QuasiMonteCarlo.DESCRIPTION);
    pgd.addClass("bbp", BaileyBorweinPlouffe.class, BaileyBorweinPlouffe.DESCRIPTION);
    pgd.addClass("distbbp", DistBbp.class, DistBbp.DESCRIPTION);

    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    exitCode = pgd.run(argv);
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}
 
Example #2
Source File: ExampleDriver.java    From big-c with Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;

  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
    "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
    "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
    "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
    "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
    "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
    "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.addClass("kmeans", Kmeans.class, "Kmeans on movies data");
    pgd.addClass("classification", Classification.class, "Classify movies into clusters");
    pgd.addClass("histogram_movies", HistogramMovies.class,
    "A map/reduce program that gives a histogram of movies based on ratings.");
    pgd.addClass("histogram_ratings", HistogramRatings.class,
    "A map/reduce program that gives a histogram of users ratings on movies.");
    pgd.addClass("selfjoin", SelfJoin.class,
    "A map/reduce program that creates k+1 associations given set of k-field associations");
    pgd.addClass("invertedindex", InvertedIndex.class,
    "A map/reduce program that creates an inverted index of documents.");
    pgd.addClass("adjlist", AdjList.class,
    "A map/reduce program that finds adjacency list of graph nodes.");
    pgd.addClass("termvectorperhost", TermVectorPerHost.class,
    "A map/reduce program that creates the term-vectors (frequency of words) per document.");
    pgd.addClass("sequencecount", SequenceCount.class,
    "A map/reduce program that counts the occurrence of consecutive words in the input files.");
    pgd.addClass("rankedinvertedindex", RankedInvertedIndex.class,
    "A map/reduce program that creates the top k document lists per word");


    pgd.driver(argv);

    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }

  System.exit(exitCode);
}
 
Example #3
Source File: HadoopTeraSortTest.java    From ignite with Apache License 2.0 4 votes vote down vote up
/**
 * Represents the data generation stage.
 * @throws Exception
 */
private void teraGenerate() throws Exception {
    System.out.println("TeraGenerate ===============================================================");

    getFileSystem().delete(new Path(generateOutDir), true);

    final long numLines = dataSizeBytes() / 100; // TeraGen makes 100 bytes ber line

    if (numLines < 1)
        throw new IllegalStateException("Data size is too small: " + dataSizeBytes());

    // Generate input data:
    int res = ToolRunner.run(new Configuration(), new TeraGen(), new String[] {"-Dmapreduce.framework.name=local",
        String.valueOf(numLines), generateOutDir});

    assertEquals(0, res);

    FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(generateOutDir));

    long sumLen = 0;

    for (FileStatus fs: fileStatuses)
        sumLen += fs.getLen();

    assertEquals(dataSizeBytes(), sumLen); // Ensure correct size data is generated.
}
 
Example #4
Source File: ExampleDriver.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.driver(argv);
    
    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}
 
Example #5
Source File: ExampleDriver.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public static void main(String argv[]){
  int exitCode = -1;
  ProgramDriver pgd = new ProgramDriver();
  try {
    pgd.addClass("wordcount", WordCount.class, 
                 "A map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                 "An Aggregate based map/reduce program that counts the words in the input files.");
    pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 
                 "An Aggregate based map/reduce program that computes the histogram of the words in the input files.");
    pgd.addClass("grep", Grep.class, 
                 "A map/reduce program that counts the matches of a regex in the input.");
    pgd.addClass("randomwriter", RandomWriter.class, 
                 "A map/reduce program that writes 10GB of random data per node.");
    pgd.addClass("randomtextwriter", RandomTextWriter.class, 
    "A map/reduce program that writes 10GB of random textual data per node.");
    pgd.addClass("sort", Sort.class, "A map/reduce program that sorts the data written by the random writer.");
    pgd.addClass("pi", PiEstimator.class, "A map/reduce program that estimates Pi using monte-carlo method.");
    pgd.addClass("pentomino", DistributedPentomino.class,
    "A map/reduce tile laying program to find solutions to pentomino problems.");
    pgd.addClass("secondarysort", SecondarySort.class,
                 "An example defining a secondary sort to the reduce.");
    pgd.addClass("sudoku", Sudoku.class, "A sudoku solver.");
    pgd.addClass("sleep", SleepJob.class, "A job that sleeps at each map and reduce task.");
    pgd.addClass("join", Join.class, "A job that effects a join over sorted, equally partitioned datasets");
    pgd.addClass("multifilewc", MultiFileWordCount.class, "A job that counts words from several files.");
    pgd.addClass("dbcount", DBCountPageView.class, "An example job that count the pageview counts from a database.");
    pgd.addClass("teragen", TeraGen.class, "Generate data for the terasort");
    pgd.addClass("terasort", TeraSort.class, "Run the terasort");
    pgd.addClass("teravalidate", TeraValidate.class, "Checking results of terasort");
    pgd.driver(argv);
    
    // Success
    exitCode = 0;
  }
  catch(Throwable e){
    e.printStackTrace();
  }
  
  System.exit(exitCode);
}