Java Code Examples for org.apache.hadoop.mapreduce.InputSplit#getLocations()
The following examples show how to use
org.apache.hadoop.mapreduce.InputSplit#getLocations() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TabletSplitSplit.java From datawave with Apache License 2.0 | 5 votes |
/** * Collect a set of hosts from all child InputSplits. * * @throws InterruptedException */ public String[] getLocations() throws IOException, InterruptedException { HashSet<String> hosts = new HashSet<>(); for (InputSplit s : splits) { String[] hints = s.getLocations(); if (hints != null && hints.length > 0) { Collections.addAll(hosts, hints); } } return hosts.toArray(new String[hosts.size()]); }
Example 2
Source File: RedisHashRecordReader.java From Redis-4.x-Cookbook with MIT License | 5 votes |
public void initialize(InputSplit split, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { host = split.getLocations()[0]; prefix = ((RedisHashInputSplit) split).getPrefix(); key = ((RedisHashInputSplit) split).getKey(); String hashKey = prefix+":"+key; jedis = new Jedis(host); log.info("Connect to " + host); jedis.connect(); jedis.getClient().setTimeoutInfinite(); totalKVs = jedis.hlen(hashKey); keyValueMapIter = jedis.hgetAll(hashKey).entrySet().iterator(); }
Example 3
Source File: CompositeInputSplit.java From hadoop with Apache License 2.0 | 5 votes |
/** * Collect a set of hosts from all child InputSplits. */ public String[] getLocations() throws IOException, InterruptedException { HashSet<String> hosts = new HashSet<String>(); for (InputSplit s : splits) { String[] hints = s.getLocations(); if (hints != null && hints.length > 0) { for (String host : hints) { hosts.add(host); } } } return hosts.toArray(new String[hosts.size()]); }
Example 4
Source File: JobSplit.java From hadoop with Apache License 2.0 | 5 votes |
public SplitMetaInfo(InputSplit split, long startOffset) throws IOException { try { this.locations = split.getLocations(); this.inputDataLength = split.getLength(); this.startOffset = startOffset; } catch (InterruptedException ie) { throw new IOException(ie); } }
Example 5
Source File: CombineDocumentSplit.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
public CombineDocumentSplit(List<FileSplit> splits) throws IOException, InterruptedException { this.splits = splits; locations = new HashSet<String>(); for (InputSplit split : splits) { length += split.getLength(); for (String loc : split.getLocations()) { if (!locations.contains(loc)) { locations.add(loc); } } } }
Example 6
Source File: CompositeInputSplit.java From big-c with Apache License 2.0 | 5 votes |
/** * Collect a set of hosts from all child InputSplits. */ public String[] getLocations() throws IOException, InterruptedException { HashSet<String> hosts = new HashSet<String>(); for (InputSplit s : splits) { String[] hints = s.getLocations(); if (hints != null && hints.length > 0) { for (String host : hints) { hosts.add(host); } } } return hosts.toArray(new String[hosts.size()]); }
Example 7
Source File: JobSplit.java From big-c with Apache License 2.0 | 5 votes |
public SplitMetaInfo(InputSplit split, long startOffset) throws IOException { try { this.locations = split.getLocations(); this.inputDataLength = split.getLength(); this.startOffset = startOffset; } catch (InterruptedException ie) { throw new IOException(ie); } }
Example 8
Source File: PigSplit.java From spork with Apache License 2.0 | 5 votes |
@Override @SuppressWarnings("unchecked") public String[] getLocations() throws IOException, InterruptedException { if (locations == null) { HashMap<String, Long> locMap = new HashMap<String, Long>(); Long lenInMap; for (InputSplit split : wrappedSplits) { String[] locs = split.getLocations(); for (String loc : locs) { if ((lenInMap = locMap.get(loc)) == null) locMap.put(loc, split.getLength()); else locMap.put(loc, lenInMap + split.getLength()); } } Set<Map.Entry<String, Long>> entrySet = locMap.entrySet(); Map.Entry<String, Long>[] hostSize = entrySet.toArray(new Map.Entry[entrySet.size()]); Arrays.sort(hostSize, new Comparator<Map.Entry<String, Long>>() { @Override public int compare(Entry<String, Long> o1, Entry<String, Long> o2) { long diff = o1.getValue() - o2.getValue(); if (diff < 0) return 1; if (diff > 0) return -1; return 0; } }); // maximum 5 locations are in list: refer to PIG-1648 for more details int nHost = Math.min(hostSize.length, 5); locations = new String[nHost]; for (int i = 0; i < nHost; ++i) { locations[i] = hostSize[i].getKey(); } } return locations; }
Example 9
Source File: TestCombineFileInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testNodeDistribution() throws IOException, InterruptedException { DummyInputFormat inFormat = new DummyInputFormat(); int numBlocks = 60; long totLength = 0; long blockSize = 100; int numNodes = 10; long minSizeNode = 50; long minSizeRack = 50; int maxSplitSize = 200; // 4 blocks per split. String[] locations = new String[numNodes]; for (int i = 0; i < numNodes; i++) { locations[i] = "h" + i; } String[] racks = new String[0]; Path path = new Path("hdfs://file"); OneBlockInfo[] blocks = new OneBlockInfo[numBlocks]; int hostCountBase = 0; // Generate block list. Replication 3 per block. for (int i = 0; i < numBlocks; i++) { int localHostCount = hostCountBase; String[] blockHosts = new String[3]; for (int j = 0; j < 3; j++) { int hostNum = localHostCount % numNodes; blockHosts[j] = "h" + hostNum; localHostCount++; } hostCountBase++; blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts, racks); totLength += blockSize; } List<InputSplit> splits = new ArrayList<InputSplit>(); HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>(); HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>(); OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes); inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSplitSize, minSizeNode, minSizeRack, splits); int expectedSplitCount = (int) (totLength / maxSplitSize); assertEquals(expectedSplitCount, splits.size()); // Ensure 90+% of the splits have node local blocks. // 100% locality may not always be achieved. int numLocalSplits = 0; for (InputSplit inputSplit : splits) { assertEquals(maxSplitSize, inputSplit.getLength()); if (inputSplit.getLocations().length == 1) { numLocalSplits++; } } assertTrue(numLocalSplits >= 0.9 * splits.size()); }
Example 10
Source File: JobSplit.java From hadoop with Apache License 2.0 | 4 votes |
public TaskSplitMetaInfo(InputSplit split, long startOffset) throws InterruptedException, IOException { this(new TaskSplitIndex("", startOffset), split.getLocations(), split.getLength()); }
Example 11
Source File: TestCombineFileInputFormat.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testNodeDistribution() throws IOException, InterruptedException { DummyInputFormat inFormat = new DummyInputFormat(); int numBlocks = 60; long totLength = 0; long blockSize = 100; int numNodes = 10; long minSizeNode = 50; long minSizeRack = 50; int maxSplitSize = 200; // 4 blocks per split. String[] locations = new String[numNodes]; for (int i = 0; i < numNodes; i++) { locations[i] = "h" + i; } String[] racks = new String[0]; Path path = new Path("hdfs://file"); OneBlockInfo[] blocks = new OneBlockInfo[numBlocks]; int hostCountBase = 0; // Generate block list. Replication 3 per block. for (int i = 0; i < numBlocks; i++) { int localHostCount = hostCountBase; String[] blockHosts = new String[3]; for (int j = 0; j < 3; j++) { int hostNum = localHostCount % numNodes; blockHosts[j] = "h" + hostNum; localHostCount++; } hostCountBase++; blocks[i] = new OneBlockInfo(path, i * blockSize, blockSize, blockHosts, racks); totLength += blockSize; } List<InputSplit> splits = new ArrayList<InputSplit>(); HashMap<String, Set<String>> rackToNodes = new HashMap<String, Set<String>>(); HashMap<String, List<OneBlockInfo>> rackToBlocks = new HashMap<String, List<OneBlockInfo>>(); HashMap<OneBlockInfo, String[]> blockToNodes = new HashMap<OneBlockInfo, String[]>(); Map<String, Set<OneBlockInfo>> nodeToBlocks = new TreeMap<String, Set<OneBlockInfo>>(); OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, nodeToBlocks, rackToNodes); inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, maxSplitSize, minSizeNode, minSizeRack, splits); int expectedSplitCount = (int) (totLength / maxSplitSize); assertEquals(expectedSplitCount, splits.size()); // Ensure 90+% of the splits have node local blocks. // 100% locality may not always be achieved. int numLocalSplits = 0; for (InputSplit inputSplit : splits) { assertEquals(maxSplitSize, inputSplit.getLength()); if (inputSplit.getLocations().length == 1) { numLocalSplits++; } } assertTrue(numLocalSplits >= 0.9 * splits.size()); }
Example 12
Source File: JobSplit.java From big-c with Apache License 2.0 | 4 votes |
public TaskSplitMetaInfo(InputSplit split, long startOffset) throws InterruptedException, IOException { this(new TaskSplitIndex("", startOffset), split.getLocations(), split.getLength()); }
Example 13
Source File: CqlRecordReader.java From stratio-cassandra with Apache License 2.0 | 4 votes |
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException { this.split = (ColumnFamilySplit) split; Configuration conf = HadoopCompat.getConfiguration(context); totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength() : ConfigHelper.getInputSplitSize(conf); cfName = ConfigHelper.getInputColumnFamily(conf); keyspace = ConfigHelper.getInputKeyspace(conf); partitioner = ConfigHelper.getInputPartitioner(conf); inputColumns = CqlConfigHelper.getInputcolumns(conf); userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf); try { if (cluster != null) return; // create a Cluster instance String[] locations = split.getLocations(); cluster = CqlConfigHelper.getInputCluster(locations, conf); } catch (Exception e) { throw new RuntimeException(e); } if (cluster != null) session = cluster.connect(quote(keyspace)); if (session == null) throw new RuntimeException("Can't create connection session"); //get negotiated serialization protocol nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion(); // If the user provides a CQL query then we will use it without validation // otherwise we will fall back to building a query using the: // inputColumns // whereClauses cqlQuery = CqlConfigHelper.getInputCql(conf); // validate that the user hasn't tried to give us a custom query along with input columns // and where clauses if (StringUtils.isNotEmpty(cqlQuery) && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) { throw new AssertionError("Cannot define a custom query with input columns and / or where clauses"); } if (StringUtils.isEmpty(cqlQuery)) cqlQuery = buildQuery(); logger.debug("cqlQuery {}", cqlQuery); rowIterator = new RowIterator(); logger.debug("created {}", rowIterator); }