org.apache.hadoop.mapreduce.server.tasktracker.TTConfig Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.server.tasktracker.TTConfig.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TeraScheduler.java From hadoop with Apache License 2.0 | 6 votes |
public TeraScheduler(FileSplit[] realSplits, Configuration conf) throws IOException { this.realSplits = realSplits; this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4); Map<String, Host> hostTable = new HashMap<String, Host>(); splits = new Split[realSplits.length]; for(FileSplit realSplit: realSplits) { Split split = new Split(realSplit.getPath().toString()); splits[remainingSplits++] = split; for(String hostname: realSplit.getLocations()) { Host host = hostTable.get(hostname); if (host == null) { host = new Host(hostname); hostTable.put(hostname, host); hosts.add(host); } host.splits.add(split); split.locations.add(host); } } }
Example #2
Source File: LoadJob.java From hadoop with Apache License 2.0 | 6 votes |
ResourceUsageMatcherRunner(final TaskInputOutputContext context, ResourceUsageMetrics metrics) { Configuration conf = context.getConfiguration(); // set the resource calculator plugin Class<? extends ResourceCalculatorPlugin> clazz = conf.getClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN, null, ResourceCalculatorPlugin.class); ResourceCalculatorPlugin plugin = ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf); // set the other parameters this.sleepTime = conf.getLong(SLEEP_CONFIG, DEFAULT_SLEEP_TIME); progress = new BoostingProgress(context); // instantiate a resource-usage-matcher matcher = new ResourceUsageMatcher(); matcher.configure(conf, plugin, metrics, progress); }
Example #3
Source File: TeraScheduler.java From pravega-samples with Apache License 2.0 | 6 votes |
public TeraScheduler(FileSplit[] realSplits, Configuration conf) throws IOException { this.realSplits = realSplits; this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4); Map<String, Host> hostTable = new HashMap<String, Host>(); splits = new Split[realSplits.length]; for(FileSplit realSplit: realSplits) { Split split = new Split(realSplit.getPath().toString()); splits[remainingSplits++] = split; for(String hostname: realSplit.getLocations()) { Host host = hostTable.get(hostname); if (host == null) { host = new Host(hostname); hostTable.put(hostname, host); hosts.add(host); } host.splits.add(split); split.locations.add(host); } } }
Example #4
Source File: TeraScheduler.java From big-c with Apache License 2.0 | 6 votes |
public TeraScheduler(FileSplit[] realSplits, Configuration conf) throws IOException { this.realSplits = realSplits; this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4); Map<String, Host> hostTable = new HashMap<String, Host>(); splits = new Split[realSplits.length]; for(FileSplit realSplit: realSplits) { Split split = new Split(realSplit.getPath().toString()); splits[remainingSplits++] = split; for(String hostname: realSplit.getLocations()) { Host host = hostTable.get(hostname); if (host == null) { host = new Host(hostname); hostTable.put(hostname, host); hosts.add(host); } host.splits.add(split); split.locations.add(host); } } }
Example #5
Source File: LoadJob.java From big-c with Apache License 2.0 | 6 votes |
ResourceUsageMatcherRunner(final TaskInputOutputContext context, ResourceUsageMetrics metrics) { Configuration conf = context.getConfiguration(); // set the resource calculator plugin Class<? extends ResourceCalculatorPlugin> clazz = conf.getClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN, null, ResourceCalculatorPlugin.class); ResourceCalculatorPlugin plugin = ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf); // set the other parameters this.sleepTime = conf.getLong(SLEEP_CONFIG, DEFAULT_SLEEP_TIME); progress = new BoostingProgress(context); // instantiate a resource-usage-matcher matcher = new ResourceUsageMatcher(); matcher.configure(conf, plugin, metrics, progress); }
Example #6
Source File: TeraScheduler.java From incubator-tez with Apache License 2.0 | 6 votes |
public TeraScheduler(FileSplit[] realSplits, Configuration conf) throws IOException { this.realSplits = realSplits; this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4); Map<String, Host> hostTable = new HashMap<String, Host>(); splits = new Split[realSplits.length]; for(FileSplit realSplit: realSplits) { Split split = new Split(realSplit.getPath().toString()); splits[remainingSplits++] = split; for(String hostname: realSplit.getLocations()) { Host host = hostTable.get(hostname); if (host == null) { host = new Host(hostname); hostTable.put(hostname, host); hosts.add(host); } host.splits.add(split); split.locations.add(host); } } }
Example #7
Source File: TestIndexCache.java From hadoop with Apache License 2.0 | 5 votes |
public void testBadIndex() throws Exception { final int parts = 30; fs.delete(p, true); conf.setInt(TTConfig.TT_INDEX_CACHE, 1); IndexCache cache = new IndexCache(conf); Path f = new Path(p, "badindex"); FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { if (0 == (i % 3)) { dout.writeLong(i); } else { out.writeLong(i); } } } out.writeLong(iout.getChecksum().getValue()); dout.close(); try { cache.getIndexInformation("badindex", 7, f, UserGroupInformation.getCurrentUser().getShortUserName()); fail("Did not detect bad checksum"); } catch (IOException e) { if (!(e.getCause() instanceof ChecksumException)) { throw e; } } }
Example #8
Source File: TestIndexCache.java From big-c with Apache License 2.0 | 5 votes |
public void testBadIndex() throws Exception { final int parts = 30; fs.delete(p, true); conf.setInt(TTConfig.TT_INDEX_CACHE, 1); IndexCache cache = new IndexCache(conf); Path f = new Path(p, "badindex"); FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { if (0 == (i % 3)) { dout.writeLong(i); } else { out.writeLong(i); } } } out.writeLong(iout.getChecksum().getValue()); dout.close(); try { cache.getIndexInformation("badindex", 7, f, UserGroupInformation.getCurrentUser().getShortUserName()); fail("Did not detect bad checksum"); } catch (IOException e) { if (!(e.getCause() instanceof ChecksumException)) { throw e; } } }
Example #9
Source File: IndexCache.java From hadoop with Apache License 2.0 | 4 votes |
public IndexCache(JobConf conf) { this.conf = conf; totalMemoryAllowed = conf.getInt(TTConfig.TT_INDEX_CACHE, 10) * 1024 * 1024; LOG.info("IndexCache created with max memory = " + totalMemoryAllowed); }
Example #10
Source File: TestIndexCache.java From hadoop with Apache License 2.0 | 4 votes |
public void testRemoveMap() throws Exception { // This test case use two thread to call getIndexInformation and // removeMap concurrently, in order to construct race condition. // This test case may not repeatable. But on my macbook this test // fails with probability of 100% on code before MAPREDUCE-2541, // so it is repeatable in practice. fs.delete(p, true); conf.setInt(TTConfig.TT_INDEX_CACHE, 10); // Make a big file so removeMapThread almost surely runs faster than // getInfoThread final int partsPerMap = 100000; final int bytesPerFile = partsPerMap * 24; final IndexCache cache = new IndexCache(conf); final Path big = new Path(p, "bigIndex"); final String user = UserGroupInformation.getCurrentUser().getShortUserName(); writeFile(fs, big, bytesPerFile, partsPerMap); // run multiple times for (int i = 0; i < 20; ++i) { Thread getInfoThread = new Thread() { @Override public void run() { try { cache.getIndexInformation("bigIndex", partsPerMap, big, user); } catch (Exception e) { // should not be here } } }; Thread removeMapThread = new Thread() { @Override public void run() { cache.removeMap("bigIndex"); } }; if (i%2==0) { getInfoThread.start(); removeMapThread.start(); } else { removeMapThread.start(); getInfoThread.start(); } getInfoThread.join(); removeMapThread.join(); assertEquals(true, cache.checkTotalMemoryUsed()); } }
Example #11
Source File: GenerateDistCacheData.java From hadoop with Apache License 2.0 | 4 votes |
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobConf jobConf = new JobConf(jobCtxt.getConfiguration()); final JobClient client = new JobClient(jobConf); ClusterStatus stat = client.getClusterStatus(true); int numTrackers = stat.getTaskTrackers(); final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1); // Total size of distributed cache files to be generated final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1); // Get the path of the special file String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST); if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) { throw new RuntimeException("Invalid metadata: #files (" + fileCount + "), total_size (" + totalSize + "), filelisturi (" + distCacheFileList + ")"); } Path sequenceFile = new Path(distCacheFileList); FileSystem fs = sequenceFile.getFileSystem(jobConf); FileStatus srcst = fs.getFileStatus(sequenceFile); // Consider the number of TTs * mapSlotsPerTracker as number of mappers. int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2); int numSplits = numTrackers * numMapSlotsPerTracker; List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); // Average size of data to be generated by each map task final long targetSize = Math.max(totalSize / numSplits, DistributedCacheEmulator.AVG_BYTES_PER_MAP); long splitStartPosition = 0L; long splitEndPosition = 0L; long acc = 0L; long bytesRemaining = srcst.getLen(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, sequenceFile, jobConf); while (reader.next(key, value)) { // If adding this file would put this split past the target size, // cut the last split and put this file in the next split. if (acc + key.get() > targetSize && acc != 0) { long splitSize = splitEndPosition - splitStartPosition; splits.add(new FileSplit( sequenceFile, splitStartPosition, splitSize, (String[])null)); bytesRemaining -= splitSize; splitStartPosition = splitEndPosition; acc = 0L; } acc += key.get(); splitEndPosition = reader.getPosition(); } } finally { if (reader != null) { reader.close(); } } if (bytesRemaining != 0) { splits.add(new FileSplit( sequenceFile, splitStartPosition, bytesRemaining, (String[])null)); } return splits; }
Example #12
Source File: TestResourceUsageEmulators.java From hadoop with Apache License 2.0 | 4 votes |
/** * Test {@link LoadJob.ResourceUsageMatcherRunner}. */ @Test @SuppressWarnings("unchecked") public void testResourceUsageMatcherRunner() throws Exception { Configuration conf = new Configuration(); FakeProgressive progress = new FakeProgressive(); // set the resource calculator plugin conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); // set the resources // set the resource implementation class conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, TestResourceUsageEmulatorPlugin.class, ResourceUsageEmulatorPlugin.class); long currentTime = System.currentTimeMillis(); // initialize the matcher class TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1); StatusReporter reporter = new DummyReporter(progress); TaskInputOutputContext context = new MapContextImpl(conf, id, null, null, null, reporter, null); FakeResourceUsageMatcherRunner matcher = new FakeResourceUsageMatcherRunner(context, null); // check if the matcher initialized the plugin String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER; long initTime = TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf); assertTrue("ResourceUsageMatcherRunner failed to initialize the" + " configured plugin", initTime > currentTime); // check the progress assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 0, progress.getProgress(), 0D); // call match() and check progress progress.setProgress(0.01f); currentTime = System.currentTimeMillis(); matcher.test(); long emulateTime = TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf); assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate" + " the configured plugin", emulateTime > currentTime); }
Example #13
Source File: IndexCache.java From big-c with Apache License 2.0 | 4 votes |
public IndexCache(JobConf conf) { this.conf = conf; totalMemoryAllowed = conf.getInt(TTConfig.TT_INDEX_CACHE, 10) * 1024 * 1024; LOG.info("IndexCache created with max memory = " + totalMemoryAllowed); }
Example #14
Source File: TestIndexCache.java From big-c with Apache License 2.0 | 4 votes |
public void testRemoveMap() throws Exception { // This test case use two thread to call getIndexInformation and // removeMap concurrently, in order to construct race condition. // This test case may not repeatable. But on my macbook this test // fails with probability of 100% on code before MAPREDUCE-2541, // so it is repeatable in practice. fs.delete(p, true); conf.setInt(TTConfig.TT_INDEX_CACHE, 10); // Make a big file so removeMapThread almost surely runs faster than // getInfoThread final int partsPerMap = 100000; final int bytesPerFile = partsPerMap * 24; final IndexCache cache = new IndexCache(conf); final Path big = new Path(p, "bigIndex"); final String user = UserGroupInformation.getCurrentUser().getShortUserName(); writeFile(fs, big, bytesPerFile, partsPerMap); // run multiple times for (int i = 0; i < 20; ++i) { Thread getInfoThread = new Thread() { @Override public void run() { try { cache.getIndexInformation("bigIndex", partsPerMap, big, user); } catch (Exception e) { // should not be here } } }; Thread removeMapThread = new Thread() { @Override public void run() { cache.removeMap("bigIndex"); } }; if (i%2==0) { getInfoThread.start(); removeMapThread.start(); } else { removeMapThread.start(); getInfoThread.start(); } getInfoThread.join(); removeMapThread.join(); assertEquals(true, cache.checkTotalMemoryUsed()); } }
Example #15
Source File: GenerateDistCacheData.java From big-c with Apache License 2.0 | 4 votes |
@Override public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException { final JobConf jobConf = new JobConf(jobCtxt.getConfiguration()); final JobClient client = new JobClient(jobConf); ClusterStatus stat = client.getClusterStatus(true); int numTrackers = stat.getTaskTrackers(); final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1); // Total size of distributed cache files to be generated final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1); // Get the path of the special file String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST); if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) { throw new RuntimeException("Invalid metadata: #files (" + fileCount + "), total_size (" + totalSize + "), filelisturi (" + distCacheFileList + ")"); } Path sequenceFile = new Path(distCacheFileList); FileSystem fs = sequenceFile.getFileSystem(jobConf); FileStatus srcst = fs.getFileStatus(sequenceFile); // Consider the number of TTs * mapSlotsPerTracker as number of mappers. int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2); int numSplits = numTrackers * numMapSlotsPerTracker; List<InputSplit> splits = new ArrayList<InputSplit>(numSplits); LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); // Average size of data to be generated by each map task final long targetSize = Math.max(totalSize / numSplits, DistributedCacheEmulator.AVG_BYTES_PER_MAP); long splitStartPosition = 0L; long splitEndPosition = 0L; long acc = 0L; long bytesRemaining = srcst.getLen(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, sequenceFile, jobConf); while (reader.next(key, value)) { // If adding this file would put this split past the target size, // cut the last split and put this file in the next split. if (acc + key.get() > targetSize && acc != 0) { long splitSize = splitEndPosition - splitStartPosition; splits.add(new FileSplit( sequenceFile, splitStartPosition, splitSize, (String[])null)); bytesRemaining -= splitSize; splitStartPosition = splitEndPosition; acc = 0L; } acc += key.get(); splitEndPosition = reader.getPosition(); } } finally { if (reader != null) { reader.close(); } } if (bytesRemaining != 0) { splits.add(new FileSplit( sequenceFile, splitStartPosition, bytesRemaining, (String[])null)); } return splits; }
Example #16
Source File: TestResourceUsageEmulators.java From big-c with Apache License 2.0 | 4 votes |
/** * Test {@link LoadJob.ResourceUsageMatcherRunner}. */ @Test @SuppressWarnings("unchecked") public void testResourceUsageMatcherRunner() throws Exception { Configuration conf = new Configuration(); FakeProgressive progress = new FakeProgressive(); // set the resource calculator plugin conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN, DummyResourceCalculatorPlugin.class, ResourceCalculatorPlugin.class); // set the resources // set the resource implementation class conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, TestResourceUsageEmulatorPlugin.class, ResourceUsageEmulatorPlugin.class); long currentTime = System.currentTimeMillis(); // initialize the matcher class TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1); StatusReporter reporter = new DummyReporter(progress); TaskInputOutputContext context = new MapContextImpl(conf, id, null, null, null, reporter, null); FakeResourceUsageMatcherRunner matcher = new FakeResourceUsageMatcherRunner(context, null); // check if the matcher initialized the plugin String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER; long initTime = TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf); assertTrue("ResourceUsageMatcherRunner failed to initialize the" + " configured plugin", initTime > currentTime); // check the progress assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 0, progress.getProgress(), 0D); // call match() and check progress progress.setProgress(0.01f); currentTime = System.currentTimeMillis(); matcher.test(); long emulateTime = TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf); assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate" + " the configured plugin", emulateTime > currentTime); }