org.apache.hadoop.mapred.ClusterStatus Java Exaples

Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0

6 votes

/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(Job job)
    throws IOException {
  // from RandomWriter
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapOutput.class);

  Configuration conf = job.getConfiguration();
  final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
  int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
  long numBytesToWritePerMap =
    conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    throw new IOException(
        "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
  }
  long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES,
       numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
  int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}

Source File: GenerateData.java From RDFS with Apache License 2.0

6 votes

@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}

Source File: JobTrackerJspHelper.java From RDFS with Apache License 2.0

6 votes

/**
 * Generates an XML-formatted block that summarizes the state of the JobTracker.
 */
public void generateSummaryTable(JspWriter out,
                                 JobTracker tracker) throws IOException {
  ClusterStatus status = tracker.getClusterStatus();
  int maxMapTasks = status.getMaxMapTasks();
  int maxReduceTasks = status.getMaxReduceTasks();
  int numTaskTrackers = status.getTaskTrackers();
  String tasksPerNodeStr;
  if (numTaskTrackers > 0) {
    double tasksPerNodePct = (double) (maxMapTasks + maxReduceTasks) / (double) numTaskTrackers;
    tasksPerNodeStr = percentFormat.format(tasksPerNodePct);
  } else {
    tasksPerNodeStr = "-";
  }
  out.print("<maps>" + status.getMapTasks() + "</maps>\n" +
          "<reduces>" + status.getReduceTasks() + "</reduces>\n" +
          "<total_submissions>" + tracker.getTotalSubmissions() + "</total_submissions>\n" +
          "<nodes>" + status.getTaskTrackers() + "</nodes>\n" +
          "<map_task_capacity>" + status.getMaxMapTasks() + "</map_task_capacity>\n" +
          "<reduce_task_capacity>" + status.getMaxReduceTasks() + "</reduce_task_capacity>\n" +
          "<avg_tasks_per_node>" + tasksPerNodeStr + "</avg_tasks_per_node>\n");
}

Source File: InfrastructureAnalyzer.java From systemds with Apache License 2.0

6 votes

/**
 * Analyzes properties of hadoop cluster and configuration.
 */
private static void analyzeHadoopCluster() {
	try {
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		if( stat != null ) { //if in cluster mode
			//analyze cluster status
			_remotePar = stat.getTaskTrackers();
			_remoteParMap = stat.getMaxMapTasks(); 
			_remoteParReduce = stat.getMaxReduceTasks(); 
			
			//analyze pure configuration properties
			analyzeHadoopConfiguration();
		}
	} 
	catch (IOException e) {
		throw new RuntimeException("Unable to analyze infrastructure.",e);
	}
}

Source File: StressJobFactory.java From big-c with Apache License 2.0

6 votes

/**
 * STRESS Once you get the notification from StatsCollector.Collect the
 * clustermetrics. Update current loadStatus with new load status of JT.
 *
 * @param item
 */
@Override
public void update(Statistics.ClusterStats item) {
  ClusterStatus clusterStatus = item.getStatus();
  try {
    // update the max cluster map/reduce task capacity
    loadStatus.updateMapCapacity(clusterStatus.getMaxMapTasks());
    
    loadStatus.updateReduceCapacity(clusterStatus.getMaxReduceTasks());
    
    int numTrackers = clusterStatus.getTaskTrackers();
    int jobLoad = 
      (int) (maxJobTrackerRatio * numTrackers) - item.getNumRunningJob();
    loadStatus.updateJobLoad(jobLoad);
  } catch (Exception e) {
    LOG.error("Couldn't get the new Status",e);
  }
}

Source File: GenerateData.java From big-c with Apache License 2.0

6 votes

@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}

Source File: GenericMRLoadGenerator.java From big-c with Apache License 2.0

6 votes

/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(Job job)
    throws IOException {
  // from RandomWriter
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapOutput.class);

  Configuration conf = job.getConfiguration();
  final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
  int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
  long numBytesToWritePerMap =
    conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    throw new IOException(
        "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
  }
  long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES,
       numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
  int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}

Source File: StressJobFactory.java From hadoop with Apache License 2.0

6 votes

/**
 * STRESS Once you get the notification from StatsCollector.Collect the
 * clustermetrics. Update current loadStatus with new load status of JT.
 *
 * @param item
 */
@Override
public void update(Statistics.ClusterStats item) {
  ClusterStatus clusterStatus = item.getStatus();
  try {
    // update the max cluster map/reduce task capacity
    loadStatus.updateMapCapacity(clusterStatus.getMaxMapTasks());
    
    loadStatus.updateReduceCapacity(clusterStatus.getMaxReduceTasks());
    
    int numTrackers = clusterStatus.getTaskTrackers();
    int jobLoad = 
      (int) (maxJobTrackerRatio * numTrackers) - item.getNumRunningJob();
    loadStatus.updateJobLoad(jobLoad);
  } catch (Exception e) {
    LOG.error("Couldn't get the new Status",e);
  }
}

Source File: GenerateData.java From hadoop with Apache License 2.0

6 votes

@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}

Source File: InfrastructureAnalyzer.java From systemds with Apache License 2.0

6 votes

/**
 * Analyzes properties of hadoop cluster and configuration.
 */
private static void analyzeHadoopCluster() {
	try {
		JobConf job = ConfigurationManager.getCachedJobConf();
		JobClient client = new JobClient(job);
		ClusterStatus stat = client.getClusterStatus();
		if( stat != null ) { //if in cluster mode
			//analyze cluster status
			_remotePar = stat.getTaskTrackers();
			_remoteParMap = stat.getMaxMapTasks(); 
			_remoteParReduce = stat.getMaxReduceTasks(); 
			
			//analyze pure configuration properties
			analyzeHadoopConfiguration();
		}
	} 
	catch (IOException e) {
		throw new RuntimeException("Unable to analyze infrastructure.",e);
	}
}

Source File: TestCluster.java From imputationserver with GNU Affero General Public License v3.0

5 votes

public void start() throws IOException {

		File testCluster = new File(WORKING_DIRECTORY);
		if (testCluster.exists()) {
			FileUtil.deleteDirectory(testCluster);
		}
		testCluster.mkdirs();
		
		File testClusterData = new File(WORKING_DIRECTORY + "/data");
		File testClusterLog = new File(WORKING_DIRECTORY + "/logs");

		
		if (cluster == null) {

			conf = new HdfsConfiguration();		
			conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR,
					testClusterData.getAbsolutePath());
			cluster = new MiniDFSCluster.Builder(conf).build();
			fs = cluster.getFileSystem();

			// set mincluster as default config
			HdfsUtil.setDefaultConfiguration(conf);
			System.setProperty("hadoop.log.dir", testClusterLog.getAbsolutePath());
			MiniMRCluster mrCluster = new MiniMRCluster(1, fs.getUri()
					.toString(), 1, null, null, new JobConf(conf));
			JobConf mrClusterConf = mrCluster.createJobConf();
			HdfsUtil.setDefaultConfiguration(new Configuration(mrClusterConf));

			System.out.println("------");

			JobClient client = new JobClient(mrClusterConf);
			ClusterStatus status = client.getClusterStatus(true);
			System.out.println(status.getActiveTrackerNames());
		}
	}

Source File: Statistics.java From hadoop with Apache License 2.0

5 votes

private void updateAndNotifyClusterStatsListeners(
  ClusterStatus clusterStatus) {
  ClusterStats stats = ClusterStats.getClusterStats();
  stats.setClusterMetric(clusterStatus);
  for (StatListener<ClusterStats> listener : clusterStatlisteners) {
    listener.update(stats);
  }
}

Source File: Statistics.java From big-c with Apache License 2.0

5 votes

private void updateAndNotifyClusterStatsListeners(
  ClusterStatus clusterStatus) {
  ClusterStats stats = ClusterStats.getClusterStats();
  stats.setClusterMetric(clusterStatus);
  for (StatListener<ClusterStats> listener : clusterStatlisteners) {
    listener.update(stats);
  }
}

Source File: RandomWriter.java From incubator-tez with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = new Job(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

Source File: RandomWriter.java From hadoop with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

Source File: RandomTextWriter.java From RDFS with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormat(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite);
  }
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormat(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}

Source File: RandomWriter.java From RDFS with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }
  
  Path outDir = new Path(args[0]);
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormat(RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  job.setReducerClass(IdentityReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
  }
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}

Source File: RandomTextWriter.java From hadoop with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

Source File: RandomTextWriter.java From incubator-tez with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = new Job(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

Source File: GenerateDistCacheData.java From big-c with Apache License 2.0

4 votes

@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}

Source File: RandomWriter.java From hadoop-book with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job. It
 * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The
 * reduce doesn't do anything.
 *
 * @throws IOException
 */
public int run(String[] args) throws Exception {
    if (args.length == 0) {
        System.out.println("Usage: writer <out-dir>");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    Path outDir = new Path(args[0]);
    JobConf job = new JobConf(getConf());

    job.setJarByClass(RandomWriter.class);
    job.setJobName("random-writer");
    FileOutputFormat.setOutputPath(job, outDir);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormat(RandomInputFormat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(IdentityReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    JobClient client = new JobClient(job);
    ClusterStatus cluster = client.getClusterStatus();
    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
    long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
            1 * 1024 * 1024 * 1024);
    if (numBytesToWritePerMap == 0) {
        System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
        return -2;
    }
    long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes",
            numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
    if (numMaps == 0 && totalBytesToWrite > 0) {
        numMaps = 1;
        job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
    }

    job.setNumMapTasks(numMaps);
    System.out.println("Running " + numMaps + " maps.");

    // reducer NONE
    job.setNumReduceTasks(0);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + (endTime.getTime() - startTime.getTime()) / 1000
            + " seconds.");

    return 0;
}

Source File: RandomTextWriter.java From tez with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
@SuppressWarnings("deprecation")
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = new Job(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

Source File: RandomWriter.java From tez with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
@SuppressWarnings("deprecation")
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = new Job(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

Source File: RandomTextWriter.java From hadoop-gpu with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormat(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite);
  }
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormat(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}

Source File: RandomWriter.java From hadoop-gpu with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return -1;
  }
  
  Path outDir = new Path(args[0]);
  JobConf job = new JobConf(getConf());
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  
  job.setInputFormat(RandomInputFormat.class);
  job.setMapperClass(Map.class);        
  job.setReducerClass(IdentityReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
  
  JobClient client = new JobClient(job);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
  long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
    return -2;
  }
  long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
  }
  
  job.setNumMapTasks(numMaps);
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  JobClient.runJob(job);
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return 0;
}

Source File: RandomTextWriter.java From hadoop with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

Source File: Statistics.java From hadoop with Apache License 2.0

4 votes

/**
 * @param metrics
 */
void setClusterMetric(ClusterStatus metrics) {
  this.status = metrics;
}

Source File: Statistics.java From big-c with Apache License 2.0

4 votes

/**
 * @return metrics
 */
public ClusterStatus getStatus() {
  return status;
}

Source File: Statistics.java From big-c with Apache License 2.0

4 votes

/**
 * @param metrics
 */
void setClusterMetric(ClusterStatus metrics) {
  this.status = metrics;
}

Source File: RandomWriter.java From hadoop with Apache License 2.0

4 votes

/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    System.out.println("Usage: writer <out-dir>");
    ToolRunner.printGenericCommandUsage(System.out);
    return 2;
  }
  
  Path outDir = new Path(args[0]);
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);

  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomWriter.class);
  job.setJobName("random-writer");
  FileOutputFormat.setOutputPath(job, outDir);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(BytesWritable.class);
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapper.class);        
  job.setReducerClass(Reducer.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}

org.apache.hadoop.mapred.ClusterStatus Java Examples