org.apache.hadoop.mapreduce.server.tasktracker.TTConfig Java Exaples

Source File: TeraScheduler.java From hadoop with Apache License 2.0

6 votes

public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}

Source File: LoadJob.java From hadoop with Apache License 2.0

6 votes

ResourceUsageMatcherRunner(final TaskInputOutputContext context, 
                           ResourceUsageMetrics metrics) {
  Configuration conf = context.getConfiguration();
  
  // set the resource calculator plugin
  Class<? extends ResourceCalculatorPlugin> clazz =
    conf.getClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                  null, ResourceCalculatorPlugin.class);
  ResourceCalculatorPlugin plugin = 
    ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
  
  // set the other parameters
  this.sleepTime = conf.getLong(SLEEP_CONFIG, DEFAULT_SLEEP_TIME);
  progress = new BoostingProgress(context);
  
  // instantiate a resource-usage-matcher
  matcher = new ResourceUsageMatcher();
  matcher.configure(conf, plugin, metrics, progress);
}

Source File: TeraScheduler.java From pravega-samples with Apache License 2.0

6 votes

public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}

Source File: TeraScheduler.java From big-c with Apache License 2.0

6 votes

public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}

Source File: LoadJob.java From big-c with Apache License 2.0

6 votes

ResourceUsageMatcherRunner(final TaskInputOutputContext context, 
                           ResourceUsageMetrics metrics) {
  Configuration conf = context.getConfiguration();
  
  // set the resource calculator plugin
  Class<? extends ResourceCalculatorPlugin> clazz =
    conf.getClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                  null, ResourceCalculatorPlugin.class);
  ResourceCalculatorPlugin plugin = 
    ResourceCalculatorPlugin.getResourceCalculatorPlugin(clazz, conf);
  
  // set the other parameters
  this.sleepTime = conf.getLong(SLEEP_CONFIG, DEFAULT_SLEEP_TIME);
  progress = new BoostingProgress(context);
  
  // instantiate a resource-usage-matcher
  matcher = new ResourceUsageMatcher();
  matcher.configure(conf, plugin, metrics, progress);
}

Source File: TeraScheduler.java From incubator-tez with Apache License 2.0

6 votes

public TeraScheduler(FileSplit[] realSplits,
                     Configuration conf) throws IOException {
  this.realSplits = realSplits;
  this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
  Map<String, Host> hostTable = new HashMap<String, Host>();
  splits = new Split[realSplits.length];
  for(FileSplit realSplit: realSplits) {
    Split split = new Split(realSplit.getPath().toString());
    splits[remainingSplits++] = split;
    for(String hostname: realSplit.getLocations()) {
      Host host = hostTable.get(hostname);
      if (host == null) {
        host = new Host(hostname);
        hostTable.put(hostname, host);
        hosts.add(host);
      }
      host.splits.add(split);
      split.locations.add(host);
    }
  }
}

Source File: TestIndexCache.java From hadoop with Apache License 2.0

5 votes

public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}

Source File: TestIndexCache.java From big-c with Apache License 2.0

5 votes

public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}

Source File: IndexCache.java From hadoop with Apache License 2.0

4 votes

public IndexCache(JobConf conf) {
  this.conf = conf;
  totalMemoryAllowed =
    conf.getInt(TTConfig.TT_INDEX_CACHE, 10) * 1024 * 1024;
  LOG.info("IndexCache created with max memory = " + totalMemoryAllowed);
}

Source File: TestIndexCache.java From hadoop with Apache License 2.0

4 votes

public void testRemoveMap() throws Exception {
  // This test case use two thread to call getIndexInformation and 
  // removeMap concurrently, in order to construct race condition.
  // This test case may not repeatable. But on my macbook this test 
  // fails with probability of 100% on code before MAPREDUCE-2541,
  // so it is repeatable in practice.
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 10);
  // Make a big file so removeMapThread almost surely runs faster than 
  // getInfoThread 
  final int partsPerMap = 100000;
  final int bytesPerFile = partsPerMap * 24;
  final IndexCache cache = new IndexCache(conf);

  final Path big = new Path(p, "bigIndex");
  final String user = 
    UserGroupInformation.getCurrentUser().getShortUserName();
  writeFile(fs, big, bytesPerFile, partsPerMap);
  
  // run multiple times
  for (int i = 0; i < 20; ++i) {
    Thread getInfoThread = new Thread() {
      @Override
      public void run() {
        try {
          cache.getIndexInformation("bigIndex", partsPerMap, big, user);
        } catch (Exception e) {
          // should not be here
        }
      }
    };
    Thread removeMapThread = new Thread() {
      @Override
      public void run() {
        cache.removeMap("bigIndex");
      }
    };
    if (i%2==0) {
      getInfoThread.start();
      removeMapThread.start();        
    } else {
      removeMapThread.start();        
      getInfoThread.start();
    }
    getInfoThread.join();
    removeMapThread.join();
    assertEquals(true, cache.checkTotalMemoryUsed());
  }      
}

Source File: GenerateDistCacheData.java From hadoop with Apache License 2.0

4 votes

@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}

Source File: TestResourceUsageEmulators.java From hadoop with Apache License 2.0

4 votes

/**
 * Test {@link LoadJob.ResourceUsageMatcherRunner}.
 */
@Test
@SuppressWarnings("unchecked")
public void testResourceUsageMatcherRunner() throws Exception {
  Configuration conf = new Configuration();
  FakeProgressive progress = new FakeProgressive();
  
  // set the resource calculator plugin
  conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                DummyResourceCalculatorPlugin.class, 
                ResourceCalculatorPlugin.class);
  // set the resources
  // set the resource implementation class
  conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, 
                TestResourceUsageEmulatorPlugin.class, 
                ResourceUsageEmulatorPlugin.class);
  
  long currentTime = System.currentTimeMillis();
  
  // initialize the matcher class
  TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1);
  StatusReporter reporter = new DummyReporter(progress);
  TaskInputOutputContext context = 
    new MapContextImpl(conf, id, null, null, null, reporter, null);
  FakeResourceUsageMatcherRunner matcher = 
    new FakeResourceUsageMatcherRunner(context, null);
  
  // check if the matcher initialized the plugin
  String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER;
  long initTime = 
    TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf);
  assertTrue("ResourceUsageMatcherRunner failed to initialize the"
             + " configured plugin", initTime > currentTime);
  
  // check the progress
  assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 
               0, progress.getProgress(), 0D);
  
  // call match() and check progress
  progress.setProgress(0.01f);
  currentTime = System.currentTimeMillis();
  matcher.test();
  long emulateTime = 
    TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf);
  assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate"
             + " the configured plugin", emulateTime > currentTime);
}

Source File: IndexCache.java From big-c with Apache License 2.0

4 votes

public IndexCache(JobConf conf) {
  this.conf = conf;
  totalMemoryAllowed =
    conf.getInt(TTConfig.TT_INDEX_CACHE, 10) * 1024 * 1024;
  LOG.info("IndexCache created with max memory = " + totalMemoryAllowed);
}

Source File: TestIndexCache.java From big-c with Apache License 2.0

4 votes

public void testRemoveMap() throws Exception {
  // This test case use two thread to call getIndexInformation and 
  // removeMap concurrently, in order to construct race condition.
  // This test case may not repeatable. But on my macbook this test 
  // fails with probability of 100% on code before MAPREDUCE-2541,
  // so it is repeatable in practice.
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 10);
  // Make a big file so removeMapThread almost surely runs faster than 
  // getInfoThread 
  final int partsPerMap = 100000;
  final int bytesPerFile = partsPerMap * 24;
  final IndexCache cache = new IndexCache(conf);

  final Path big = new Path(p, "bigIndex");
  final String user = 
    UserGroupInformation.getCurrentUser().getShortUserName();
  writeFile(fs, big, bytesPerFile, partsPerMap);
  
  // run multiple times
  for (int i = 0; i < 20; ++i) {
    Thread getInfoThread = new Thread() {
      @Override
      public void run() {
        try {
          cache.getIndexInformation("bigIndex", partsPerMap, big, user);
        } catch (Exception e) {
          // should not be here
        }
      }
    };
    Thread removeMapThread = new Thread() {
      @Override
      public void run() {
        cache.removeMap("bigIndex");
      }
    };
    if (i%2==0) {
      getInfoThread.start();
      removeMapThread.start();        
    } else {
      removeMapThread.start();        
      getInfoThread.start();
    }
    getInfoThread.join();
    removeMapThread.join();
    assertEquals(true, cache.checkTotalMemoryUsed());
  }      
}

Source File: GenerateDistCacheData.java From big-c with Apache License 2.0

4 votes

@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}

Source File: TestResourceUsageEmulators.java From big-c with Apache License 2.0

4 votes

/**
 * Test {@link LoadJob.ResourceUsageMatcherRunner}.
 */
@Test
@SuppressWarnings("unchecked")
public void testResourceUsageMatcherRunner() throws Exception {
  Configuration conf = new Configuration();
  FakeProgressive progress = new FakeProgressive();
  
  // set the resource calculator plugin
  conf.setClass(TTConfig.TT_RESOURCE_CALCULATOR_PLUGIN,
                DummyResourceCalculatorPlugin.class, 
                ResourceCalculatorPlugin.class);
  // set the resources
  // set the resource implementation class
  conf.setClass(ResourceUsageMatcher.RESOURCE_USAGE_EMULATION_PLUGINS, 
                TestResourceUsageEmulatorPlugin.class, 
                ResourceUsageEmulatorPlugin.class);
  
  long currentTime = System.currentTimeMillis();
  
  // initialize the matcher class
  TaskAttemptID id = new TaskAttemptID("test", 1, TaskType.MAP, 1, 1);
  StatusReporter reporter = new DummyReporter(progress);
  TaskInputOutputContext context = 
    new MapContextImpl(conf, id, null, null, null, reporter, null);
  FakeResourceUsageMatcherRunner matcher = 
    new FakeResourceUsageMatcherRunner(context, null);
  
  // check if the matcher initialized the plugin
  String identifier = TestResourceUsageEmulatorPlugin.DEFAULT_IDENTIFIER;
  long initTime = 
    TestResourceUsageEmulatorPlugin.testInitialization(identifier, conf);
  assertTrue("ResourceUsageMatcherRunner failed to initialize the"
             + " configured plugin", initTime > currentTime);
  
  // check the progress
  assertEquals("Progress mismatch in ResourceUsageMatcherRunner", 
               0, progress.getProgress(), 0D);
  
  // call match() and check progress
  progress.setProgress(0.01f);
  currentTime = System.currentTimeMillis();
  matcher.test();
  long emulateTime = 
    TestResourceUsageEmulatorPlugin.testEmulation(identifier, conf);
  assertTrue("ProgressBasedResourceUsageMatcher failed to load and emulate"
             + " the configured plugin", emulateTime > currentTime);
}

org.apache.hadoop.mapreduce.server.tasktracker.TTConfig Java Examples