org.apache.hadoop.mapred.ClusterStatus Java Examples
The following examples show how to use
org.apache.hadoop.mapred.ClusterStatus.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0 | 6 votes |
/** * When no input dir is specified, generate random data. */ protected static void confRandom(Job job) throws IOException { // from RandomWriter job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapOutput.class); Configuration conf = job.getConfiguration(); final ClusterStatus cluster = new JobClient(conf).getClusterStatus(); int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { throw new IOException( "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0"); } long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); }
Example #2
Source File: GenerateData.java From RDFS with Apache License 2.0 | 6 votes |
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobClient client = new JobClient(new JobConf(jobCtxt.getConfiguration())); ClusterStatus stat = client.getClusterStatus(true); final long toGen = jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1); if (toGen < 0) { throw new IOException("Invalid/missing generation bytes: " + toGen); } final int nTrackers = stat.getTaskTrackers(); final long bytesPerTracker = toGen / nTrackers; final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers); final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*"); final Matcher m = trackerPattern.matcher(""); for (String tracker : stat.getActiveTrackerNames()) { m.reset(tracker); if (!m.find()) { System.err.println("Skipping node: " + tracker); continue; } final String name = m.group(1); splits.add(new GenSplit(bytesPerTracker, new String[] { name })); } return splits; }
Example #3
Source File: JobTrackerJspHelper.java From RDFS with Apache License 2.0 | 6 votes |
/** * Generates an XML-formatted block that summarizes the state of the JobTracker. */ public void generateSummaryTable(JspWriter out, JobTracker tracker) throws IOException { ClusterStatus status = tracker.getClusterStatus(); int maxMapTasks = status.getMaxMapTasks(); int maxReduceTasks = status.getMaxReduceTasks(); int numTaskTrackers = status.getTaskTrackers(); String tasksPerNodeStr; if (numTaskTrackers > 0) { double tasksPerNodePct = (double) (maxMapTasks + maxReduceTasks) / (double) numTaskTrackers; tasksPerNodeStr = percentFormat.format(tasksPerNodePct); } else { tasksPerNodeStr = "-"; } out.print("<maps>" + status.getMapTasks() + "</maps>\n" + "<reduces>" + status.getReduceTasks() + "</reduces>\n" + "<total_submissions>" + tracker.getTotalSubmissions() + "</total_submissions>\n" + "<nodes>" + status.getTaskTrackers() + "</nodes>\n" + "<map_task_capacity>" + status.getMaxMapTasks() + "</map_task_capacity>\n" + "<reduce_task_capacity>" + status.getMaxReduceTasks() + "</reduce_task_capacity>\n" + "<avg_tasks_per_node>" + tasksPerNodeStr + "</avg_tasks_per_node>\n"); }
Example #4
Source File: InfrastructureAnalyzer.java From systemds with Apache License 2.0 | 6 votes |
/** * Analyzes properties of hadoop cluster and configuration. */ private static void analyzeHadoopCluster() { try { JobConf job = ConfigurationManager.getCachedJobConf(); JobClient client = new JobClient(job); ClusterStatus stat = client.getClusterStatus(); if( stat != null ) { //if in cluster mode //analyze cluster status _remotePar = stat.getTaskTrackers(); _remoteParMap = stat.getMaxMapTasks(); _remoteParReduce = stat.getMaxReduceTasks(); //analyze pure configuration properties analyzeHadoopConfiguration(); } } catch (IOException e) { throw new RuntimeException("Unable to analyze infrastructure.",e); } }
Example #5
Source File: StressJobFactory.java From big-c with Apache License 2.0 | 6 votes |
/** * STRESS Once you get the notification from StatsCollector.Collect the * clustermetrics. Update current loadStatus with new load status of JT. * * @param item */ @Override public void update(Statistics.ClusterStats item) { ClusterStatus clusterStatus = item.getStatus(); try { // update the max cluster map/reduce task capacity loadStatus.updateMapCapacity(clusterStatus.getMaxMapTasks()); loadStatus.updateReduceCapacity(clusterStatus.getMaxReduceTasks()); int numTrackers = clusterStatus.getTaskTrackers(); int jobLoad = (int) (maxJobTrackerRatio * numTrackers) - item.getNumRunningJob(); loadStatus.updateJobLoad(jobLoad); } catch (Exception e) { LOG.error("Couldn't get the new Status",e); } }
Example #6
Source File: GenerateData.java From big-c with Apache License 2.0 | 6 votes |
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobClient client = new JobClient(new JobConf(jobCtxt.getConfiguration())); ClusterStatus stat = client.getClusterStatus(true); final long toGen = jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1); if (toGen < 0) { throw new IOException("Invalid/missing generation bytes: " + toGen); } final int nTrackers = stat.getTaskTrackers(); final long bytesPerTracker = toGen / nTrackers; final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers); final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*"); final Matcher m = trackerPattern.matcher(""); for (String tracker : stat.getActiveTrackerNames()) { m.reset(tracker); if (!m.find()) { System.err.println("Skipping node: " + tracker); continue; } final String name = m.group(1); splits.add(new GenSplit(bytesPerTracker, new String[] { name })); } return splits; }
Example #7
Source File: GenericMRLoadGenerator.java From big-c with Apache License 2.0 | 6 votes |
/** * When no input dir is specified, generate random data. */ protected static void confRandom(Job job) throws IOException { // from RandomWriter job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapOutput.class); Configuration conf = job.getConfiguration(); final ClusterStatus cluster = new JobClient(conf).getClusterStatus(); int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { throw new IOException( "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0"); } long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES, numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); }
Example #8
Source File: StressJobFactory.java From hadoop with Apache License 2.0 | 6 votes |
/** * STRESS Once you get the notification from StatsCollector.Collect the * clustermetrics. Update current loadStatus with new load status of JT. * * @param item */ @Override public void update(Statistics.ClusterStats item) { ClusterStatus clusterStatus = item.getStatus(); try { // update the max cluster map/reduce task capacity loadStatus.updateMapCapacity(clusterStatus.getMaxMapTasks()); loadStatus.updateReduceCapacity(clusterStatus.getMaxReduceTasks()); int numTrackers = clusterStatus.getTaskTrackers(); int jobLoad = (int) (maxJobTrackerRatio * numTrackers) - item.getNumRunningJob(); loadStatus.updateJobLoad(jobLoad); } catch (Exception e) { LOG.error("Couldn't get the new Status",e); } }
Example #9
Source File: GenerateData.java From hadoop with Apache License 2.0 | 6 votes |
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobClient client = new JobClient(new JobConf(jobCtxt.getConfiguration())); ClusterStatus stat = client.getClusterStatus(true); final long toGen = jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1); if (toGen < 0) { throw new IOException("Invalid/missing generation bytes: " + toGen); } final int nTrackers = stat.getTaskTrackers(); final long bytesPerTracker = toGen / nTrackers; final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers); final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*"); final Matcher m = trackerPattern.matcher(""); for (String tracker : stat.getActiveTrackerNames()) { m.reset(tracker); if (!m.find()) { System.err.println("Skipping node: " + tracker); continue; } final String name = m.group(1); splits.add(new GenSplit(bytesPerTracker, new String[] { name })); } return splits; }
Example #10
Source File: InfrastructureAnalyzer.java From systemds with Apache License 2.0 | 6 votes |
/** * Analyzes properties of hadoop cluster and configuration. */ private static void analyzeHadoopCluster() { try { JobConf job = ConfigurationManager.getCachedJobConf(); JobClient client = new JobClient(job); ClusterStatus stat = client.getClusterStatus(); if( stat != null ) { //if in cluster mode //analyze cluster status _remotePar = stat.getTaskTrackers(); _remoteParMap = stat.getMaxMapTasks(); _remoteParReduce = stat.getMaxReduceTasks(); //analyze pure configuration properties analyzeHadoopConfiguration(); } } catch (IOException e) { throw new RuntimeException("Unable to analyze infrastructure.",e); } }
Example #11
Source File: TestCluster.java From imputationserver with GNU Affero General Public License v3.0 | 5 votes |
public void start() throws IOException { File testCluster = new File(WORKING_DIRECTORY); if (testCluster.exists()) { FileUtil.deleteDirectory(testCluster); } testCluster.mkdirs(); File testClusterData = new File(WORKING_DIRECTORY + "/data"); File testClusterLog = new File(WORKING_DIRECTORY + "/logs"); if (cluster == null) { conf = new HdfsConfiguration(); conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, testClusterData.getAbsolutePath()); cluster = new MiniDFSCluster.Builder(conf).build(); fs = cluster.getFileSystem(); // set mincluster as default config HdfsUtil.setDefaultConfiguration(conf); System.setProperty("hadoop.log.dir", testClusterLog.getAbsolutePath()); MiniMRCluster mrCluster = new MiniMRCluster(1, fs.getUri() .toString(), 1, null, null, new JobConf(conf)); JobConf mrClusterConf = mrCluster.createJobConf(); HdfsUtil.setDefaultConfiguration(new Configuration(mrClusterConf)); System.out.println("------"); JobClient client = new JobClient(mrClusterConf); ClusterStatus status = client.getClusterStatus(true); System.out.println(status.getActiveTrackerNames()); } }
Example #12
Source File: Statistics.java From hadoop with Apache License 2.0 | 5 votes |
private void updateAndNotifyClusterStatsListeners( ClusterStatus clusterStatus) { ClusterStats stats = ClusterStats.getClusterStats(); stats.setClusterMetric(clusterStatus); for (StatListener<ClusterStats> listener : clusterStatlisteners) { listener.update(stats); } }
Example #13
Source File: Statistics.java From big-c with Apache License 2.0 | 5 votes |
private void updateAndNotifyClusterStatsListeners( ClusterStatus clusterStatus) { ClusterStats stats = ClusterStats.getClusterStats(); stats.setClusterMetric(clusterStatus); for (StatListener<ClusterStats> listener : clusterStatlisteners) { listener.update(stats); } }
Example #14
Source File: RandomWriter.java From incubator-tez with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
Example #15
Source File: RandomWriter.java From hadoop with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
Example #16
Source File: RandomTextWriter.java From RDFS with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } JobConf job = new JobConf(getConf()); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RandomWriter.RandomInputFormat.class); job.setMapperClass(Map.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map", 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite); } Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormat(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }
Example #17
Source File: RandomWriter.java From RDFS with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }
Example #18
Source File: RandomTextWriter.java From hadoop with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
Example #19
Source File: RandomTextWriter.java From incubator-tez with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
Example #20
Source File: GenerateDistCacheData.java From big-c with Apache License 2.0 | 4 votes |
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobConf jobConf = new JobConf(jobCtxt.getConfiguration()); final JobClient client = new JobClient(jobConf); ClusterStatus stat = client.getClusterStatus(true); int numTrackers = stat.getTaskTrackers(); final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1); // Total size of distributed cache files to be generated final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1); // Get the path of the special file String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST); if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) { throw new RuntimeException("Invalid metadata: #files (" + fileCount + "), total_size (" + totalSize + "), filelisturi (" + distCacheFileList + ")"); } Path sequenceFile = new Path(distCacheFileList); FileSystem fs = sequenceFile.getFileSystem(jobConf); FileStatus srcst = fs.getFileStatus(sequenceFile); // Consider the number of TTs * mapSlotsPerTracker as number of mappers. int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2); int numSplits = numTrackers * numMapSlotsPerTracker; List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); // Average size of data to be generated by each map task final long targetSize = Math.max(totalSize / numSplits, DistributedCacheEmulator.AVG_BYTES_PER_MAP); long splitStartPosition = 0L; long splitEndPosition = 0L; long acc = 0L; long bytesRemaining = srcst.getLen(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, sequenceFile, jobConf); while (reader.next(key, value)) { // If adding this file would put this split past the target size, // cut the last split and put this file in the next split. if (acc + key.get() > targetSize && acc != 0) { long splitSize = splitEndPosition - splitStartPosition; splits.add(new FileSplit( sequenceFile, splitStartPosition, splitSize, (String[])null)); bytesRemaining -= splitSize; splitStartPosition = splitEndPosition; acc = 0L; } acc += key.get(); splitEndPosition = reader.getPosition(); } } finally { if (reader != null) { reader.close(); } } if (bytesRemaining != 0) { splits.add(new FileSplit( sequenceFile, splitStartPosition, bytesRemaining, (String[])null)); } return splits; }
Example #21
Source File: RandomWriter.java From hadoop-book with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. It * runs 10 maps/node and each node writes 1 gig of data to a DFS file. The * reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1 * 1024 * 1024 * 1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
Example #22
Source File: RandomTextWriter.java From tez with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ @SuppressWarnings("deprecation") public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
Example #23
Source File: RandomWriter.java From tez with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ @SuppressWarnings("deprecation") public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = new Job(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
Example #24
Source File: RandomTextWriter.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } JobConf job = new JobConf(getConf()); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(RandomWriter.RandomInputFormat.class); job.setMapperClass(Map.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomtextwrite.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomtextwrite.bytes_per_map", 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomtextwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomtextwrite.total_bytes", numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomtextwrite.bytes_per_map", totalBytesToWrite); } Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormat(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }
Example #25
Source File: RandomWriter.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }
Example #26
Source File: RandomTextWriter.java From hadoop with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { return printUsage(); } Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomTextWriter.class); job.setJobName("random-text-writer"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RandomWriter.RandomInputFormat.class); job.setMapperClass(RandomTextMapper.class); Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; List<String> otherArgs = new ArrayList<String>(); for(int i=0; i < args.length; ++i) { try { if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else { otherArgs.add(args[i]); } } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i-1]); return printUsage(); // exits } } job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0))); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }
Example #27
Source File: Statistics.java From hadoop with Apache License 2.0 | 4 votes |
/** * @param metrics */ void setClusterMetric(ClusterStatus metrics) { this.status = metrics; }
Example #28
Source File: Statistics.java From big-c with Apache License 2.0 | 4 votes |
/** * @return metrics */ public ClusterStatus getStatus() { return status; }
Example #29
Source File: Statistics.java From big-c with Apache License 2.0 | 4 votes |
/** * @param metrics */ void setClusterMetric(ClusterStatus metrics) { this.status = metrics; }
Example #30
Source File: RandomWriter.java From hadoop with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return 2; } Path outDir = new Path(args[0]); Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10); long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP, 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0"); return -2; } long totalBytesToWrite = conf.getLong(TOTAL_BYTES, numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; conf.setLong(BYTES_PER_MAP, totalBytesToWrite); } conf.setInt(MRJobConfig.NUM_MAPS, numMaps); Job job = Job.getInstance(conf); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(RandomInputFormat.class); job.setMapperClass(RandomMapper.class); job.setReducerClass(Reducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return ret; }