org.apache.hadoop.mapreduce.Partitioner Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.Partitioner.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TableShardCountCollapserTest.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount) throws IOException { HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf); Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>(); int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount); assertEquals(i, partition); Document doc = getDoc(i); indexWriter.addDocument(doc); indexWriter.close(); }
Example #2
Source File: DelegatingPartitioner.java From datawave with Apache License 2.0 | 6 votes |
@Override // delegates partitioning public int getPartition(BulkIngestKey key, Value value, int numPartitions) { Text tableName = key.getTableName(); Partitioner<BulkIngestKey,Value> partitioner = partitionerCache.getPartitioner(tableName); int partition = partitioner.getPartition(key, value, numPartitions); Integer offset = this.tableOffsets.get(tableName); if (null != offset) { return (offset + partition) % numPartitions; } else { return partition % numPartitions; } }
Example #3
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 6 votes |
/** * Filters a list of table names, returning only ones with valid partitioners. * * @param tableNames * an array of table names, it's expected to include non-configured table names * @param job * @return only the table names that were configured with valid partitioners. */ public List<String> validatePartitioners(String[] tableNames, Job job) { ArrayList<String> validTableNames = new ArrayList<>(); for (String tableName : tableNames) { if (hasPartitionerOverride(new Text(tableName))) { try { Partitioner<BulkIngestKey,Value> partitionerForTable = cachePartitioner(new Text(tableName)); initializeJob(job, partitionerForTable); validTableNames.add(tableName); } catch (Exception e) { log.warn("Unable to create the partitioner for " + tableName + " despite its configuration." + "Will use the default partitioner for this table.", e); lazyInitializeDefaultPartitioner(job); } } else { lazyInitializeDefaultPartitioner(job); } } return validTableNames; }
Example #4
Source File: TableShardCountCollapserTest.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private void assertData(int totalShardCount) throws IOException { Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>(); for (int i = 0; i < totalShardCount; i++) { HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i))); DirectoryReader reader = DirectoryReader.open(directory); int numDocs = reader.numDocs(); for (int d = 0; d < numDocs; d++) { Document document = reader.document(d); IndexableField field = document.getField("id"); Integer id = (Integer) field.numericValue(); int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount); assertEquals(i, partition); } reader.close(); } }
Example #5
Source File: BalancedShardPartitionerTest.java From datawave with Apache License 2.0 | 6 votes |
public static void assertExpectedCollisions(Partitioner partitionerIn, int daysBack, int expectedCollisions) { String formattedDay = formatDay(daysBack); TreeSet<Integer> partitionsUsed = new TreeSet<>(); int collisions = 0; for (int i = 1; i < SHARDS_PER_DAY; i++) { String shardId = formattedDay + ("_" + i); int partition = partitionerIn.getPartition(new BulkIngestKey(new Text(TableName.SHARD), new Key(shardId)), new Value(), NUM_REDUCE_TASKS); if (partitionsUsed.contains(partition)) { collisions++; } partitionsUsed.add(partition); } // 9 is what we get by hashing the shardId Assert.assertTrue("For " + daysBack + " days ago, we had a different number of collisions: " + collisions, expectedCollisions >= collisions); // this // has // more to // do with // the // random // assignment // of the // tablets }
Example #6
Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0 | 6 votes |
/** * Extracts package-private TeraSort total order partitioner class. * * @return The class. */ private Class<? extends Partitioner> getTeraSortTotalOrderPartitioner() { Class[] classes = TeraSort.class.getDeclaredClasses(); Class<? extends Partitioner> totalOrderPartitionerCls = null; for (Class<?> x: classes) { if ("TotalOrderPartitioner".equals(x.getSimpleName())) { totalOrderPartitionerCls = (Class<? extends Partitioner>)x; break; } } if (totalOrderPartitionerCls == null) throw new IllegalStateException("Failed to find TeraSort total order partitioner class."); return totalOrderPartitionerCls; }
Example #7
Source File: HFileGenerator.java From terrapin with Apache License 2.0 | 5 votes |
/** * Generate hfiles for testing purpose * * @param sourceFileSystem source file system * @param conf configuration for hfile * @param outputFolder output folder for generated hfiles * @param partitionerType partitioner type * @param numOfPartitions number of partitions * @param numOfKeys number of keys * @return list of generated hfiles * @throws IOException if hfile creation goes wrong */ public static List<Path> generateHFiles(FileSystem sourceFileSystem, Configuration conf, File outputFolder, PartitionerType partitionerType, int numOfPartitions, int numOfKeys) throws IOException { StoreFile.Writer[] writers = new StoreFile.Writer[numOfPartitions]; for (int i = 0; i < numOfPartitions; i++) { writers[i] = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), sourceFileSystem, 4096) .withFilePath(new Path(String.format("%s/%s", outputFolder.getAbsoluteFile(), TerrapinUtil.formatPartitionName(i)))) .withCompression(Compression.Algorithm.NONE) .build(); } Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType); for (int i = 0; i < numOfKeys; i++) { byte[] key = String.format("%06d", i).getBytes(); byte[] value; if (i <= 1) { value = "".getBytes(); } else { value = ("v" + (i + 1)).getBytes(); } KeyValue kv = new KeyValue(key, Bytes.toBytes("cf"), Bytes.toBytes(""), value); int partition = partitioner.getPartition(new BytesWritable(key), new BytesWritable(value), numOfPartitions); writers[partition].append(kv); } for (int i = 0; i < numOfPartitions; i++) { writers[i].close(); } return Lists.transform(Lists.newArrayList(writers), new Function<StoreFile.Writer, Path>() { @Override public Path apply(StoreFile.Writer writer) { return writer.getPath(); } }); }
Example #8
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 5 votes |
private Partitioner<BulkIngestKey,Value> getConfiguredPartitioner(String prefixCategoryPartitioner, String identifier) throws ClassNotFoundException { Class<? extends Partitioner<BulkIngestKey,Value>> partitionerClassForTable = getPartitionerClass(prefixCategoryPartitioner + identifier); if (log.isDebugEnabled()) log.debug("Found partitioner for " + prefixCategoryPartitioner + identifier + ": " + partitionerClassForTable); return createConfiguredPartitioner(partitionerClassForTable, identifier); }
Example #9
Source File: MapFileOutputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** Get an entry from output generated by this class. */ public static <K extends WritableComparable<?>, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException { int part = partitioner.getPartition(key, value, readers.length); return readers[part].get(key, value); }
Example #10
Source File: JobContextImpl.java From hadoop with Apache License 2.0 | 5 votes |
/** * Get the {@link Partitioner} class for the job. * * @return the {@link Partitioner} class for the job. */ @SuppressWarnings("unchecked") public Class<? extends Partitioner<?,?>> getPartitionerClass() throws ClassNotFoundException { return (Class<? extends Partitioner<?,?>>) conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class); }
Example #11
Source File: MapFileOutputFormat.java From big-c with Apache License 2.0 | 5 votes |
/** Get an entry from output generated by this class. */ public static <K extends WritableComparable<?>, V extends Writable> Writable getEntry(MapFile.Reader[] readers, Partitioner<K, V> partitioner, K key, V value) throws IOException { int part = partitioner.getPartition(key, value, readers.length); return readers[part].get(key, value); }
Example #12
Source File: TablePartitionerOffsets.java From datawave with Apache License 2.0 | 5 votes |
private TreeMap<Text,Integer> getMaxNumPartitionsPerTable(List<Text> tableNames) throws ClassNotFoundException { TreeMap<Text,Integer> maxPartitionsByTable = new TreeMap(); for (Text tableName : tableNames) { Partitioner<BulkIngestKey,Value> partitioner = partitionerCache.getPartitioner(tableName); if (partitioner instanceof DelegatePartitioner) { maxPartitionsByTable.put(tableName, ((DelegatePartitioner) partitioner).getNumPartitions()); } else { maxPartitionsByTable.put(tableName, Integer.MAX_VALUE); } } return maxPartitionsByTable; }
Example #13
Source File: TerrapinUtil.java From terrapin with Apache License 2.0 | 5 votes |
public static String getPartitionName(ByteBuffer key, PartitionerType partitionerType, int numPartitions) { Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType); return Integer.toString( partitioner.getPartition( new BytesWritable(BytesUtil.readBytesFromByteBufferWithoutConsume(key)), null, numPartitions)); }
Example #14
Source File: JobContextImpl.java From big-c with Apache License 2.0 | 5 votes |
/** * Get the {@link Partitioner} class for the job. * * @return the {@link Partitioner} class for the job. */ @SuppressWarnings("unchecked") public Class<? extends Partitioner<?,?>> getPartitionerClass() throws ClassNotFoundException { return (Class<? extends Partitioner<?,?>>) conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class); }
Example #15
Source File: HadoopFormats.java From beam with Apache License 2.0 | 5 votes |
/** * Creates new instance of {@link Partitioner} by class specified in hadoop {@link Configuration}. * * @param conf hadoop Configuration * @param <KeyT> KeyType of {@link Partitioner} * @param <ValueT> ValueTYpe of {@link Partitioner} * @return new {@link Partitioner} */ @SuppressWarnings("unchecked") static <KeyT, ValueT> Partitioner<KeyT, ValueT> getPartitioner(Configuration conf) { return (Partitioner<KeyT, ValueT>) createInstanceFromConfig( conf, MRJobConfig.PARTITIONER_CLASS_ATTR, DEFAULT_PARTITIONER_CLASS_ATTR, Partitioner.class); }
Example #16
Source File: PartitionerFactory.java From terrapin with Apache License 2.0 | 5 votes |
/** * Get the partitioner. If shardFunction is "ShardFunction.CASCADING", return * CascadingPartitioner. Otherwise, return HashPartitioner. */ public static Partitioner getPartitioner(PartitionerType type) { if (type.equals(PartitionerType.CASCADING)) { return CASCADING_PARTITIONER; } else if (type.equals(PartitionerType.MODULUS)) { return HASH_PARTITIONER; } else { throw new RuntimeException("Unsupported ShardFunction." + type); } }
Example #17
Source File: BaseUploader.java From terrapin with Apache License 2.0 | 5 votes |
/** * Validates the first non-empty partition hfile has right partitioning function. * It reads several keys, then calculates the partition according to the partitioning function * client offering. If the calculated partition number is different with actual partition number * an exception is thrown. If all partition hfiles are empty, an exception is thrown. * * @param parts full absolute path for all partitions * @param partitionerType type of paritioning function * @param numShards total number of partitions * @throws IOException if something goes wrong when reading the hfiles * @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty */ public void validate(List<Path> parts, PartitionerType partitionerType, int numShards) throws IOException { boolean hasNonEmptyPartition = false; HColumnDescriptor columnDescriptor = new HColumnDescriptor(); // Disable block cache to ensure it reads the actual file content. columnDescriptor.setBlockCacheEnabled(false); for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) { Path fileToBeValidated = parts.get(shardIndex); HFile.Reader reader = null; try { FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf); CacheConfig cc = new CacheConfig(conf, columnDescriptor); reader = HFile.createReader(fs, fileToBeValidated, cc); Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType); byte[] rowKey = reader.getFirstRowKey(); if (rowKey == null) { LOG.warn(String.format("empty partition %s", fileToBeValidated.toString())); reader.close(); continue; } hasNonEmptyPartition = true; BytesWritable key = new BytesWritable(rowKey); int partition = partitioner.getPartition(key, null, numShards); if (partition != shardIndex) { throw new IllegalArgumentException( String.format("wrong partition type %s for key %s in partition %d, expected %d", partitionerType.toString(), new String(key.getBytes()), shardIndex, partition) ); } } finally { if (reader != null) { reader.close(); } } } if (!hasNonEmptyPartition) { throw new IllegalArgumentException("all partitions are empty"); } }
Example #18
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 5 votes |
private Partitioner<BulkIngestKey,Value> cachePartitionerForCategoryAndTable(Text tableName, Text categoryName) throws ClassNotFoundException { Partitioner<BulkIngestKey,Value> partitionerForCategory; partitionerForCategory = getConfiguredPartitioner(PREFIX_CATEGORY_PARTITIONER, categoryName.toString()); addToCache(categoryName, partitionerForCategory); addToCache(tableName, partitionerForCategory); return partitionerForCategory; }
Example #19
Source File: JobContextImpl.java From tez with Apache License 2.0 | 5 votes |
/** * Get the {@link Partitioner} class for the job. * * @return the {@link Partitioner} class for the job. */ @SuppressWarnings("unchecked") public Class<? extends Partitioner<?,?>> getPartitionerClass() throws ClassNotFoundException { return (Class<? extends Partitioner<?,?>>) conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class); }
Example #20
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 5 votes |
/** * @param tableName * @return the cached partitioner for this table name (which may be a dedicated or shared partitioner) */ public Partitioner<BulkIngestKey,Value> getPartitioner(Text tableName) { if (log.isDebugEnabled()) log.debug("Looking up partitioner for " + tableName); Partitioner<BulkIngestKey,Value> cachedPartitioner = configuredPartitionerCache.get(tableName); if (null != cachedPartitioner) { if (log.isTraceEnabled()) { log.trace("Found partitioner in cache for table " + tableName + ": " + cachedPartitioner.getClass().getName()); } return cachedPartitioner; } else { return getDefaultPartitioner(); } }
Example #21
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 5 votes |
/** * Lazily initializes the default delegate partitioner * * @return the cached instance */ private Partitioner<BulkIngestKey,Value> getDefaultPartitioner() { if (defaultDelegatePartitioner == null) { Class<? extends Partitioner<BulkIngestKey,Value>> clazz = getPartitionerClass(DEFAULT_DELEGATE_PARTITIONER, MultiTableRangePartitioner.class, Partitioner.class); defaultDelegatePartitioner = createConfiguredPartitioner(clazz, null); log.info("Created default Partitioner: " + clazz.getName()); } return defaultDelegatePartitioner; }
Example #22
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 5 votes |
private Partitioner<BulkIngestKey,Value> cachePartitioner(Text tableName) throws ClassNotFoundException { if (isMemberOfACategory(tableName)) { return updateCacheForCategoryMember(tableName, getCategory(conf, tableName)); } else if (hasDedicatedPartitioner(tableName)) { return updateCacheForDedicatedPartitioner(tableName); } else { throw new IllegalStateException(tableName + " is not configured properly for a partitioner. " + "It shouldn't have made it into the list at all."); } }
Example #23
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 5 votes |
private Partitioner<BulkIngestKey,Value> updateCacheForCategoryMember(Text tableName, Text categoryName) throws ClassNotFoundException { Partitioner<BulkIngestKey,Value> partitionerForCategory = configuredPartitionerCache.get(new Text(categoryName)); if (null != partitionerForCategory) { addPartitionerForTableIfMissing(tableName, partitionerForCategory); } else { partitionerForCategory = cachePartitionerForCategoryAndTable(tableName, categoryName); } return partitionerForCategory; }
Example #24
Source File: JobContextImpl.java From incubator-tez with Apache License 2.0 | 5 votes |
/** * Get the {@link Partitioner} class for the job. * * @return the {@link Partitioner} class for the job. */ @SuppressWarnings("unchecked") public Class<? extends Partitioner<?,?>> getPartitionerClass() throws ClassNotFoundException { return (Class<? extends Partitioner<?,?>>) conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class); }
Example #25
Source File: PartitionerCache.java From datawave with Apache License 2.0 | 5 votes |
private Partitioner<BulkIngestKey,Value> createConfiguredPartitioner(Class<? extends Partitioner<BulkIngestKey,Value>> clazz, String prefix) { try { Partitioner<BulkIngestKey,Value> partitioner = clazz.newInstance(); if (partitioner instanceof Configurable) { ((Configurable) partitioner).setConf(conf); } // If this supports by-table configurations, attempt to use it if (prefix != null && partitioner instanceof DelegatePartitioner) { ((DelegatePartitioner) partitioner).configureWithPrefix(prefix); } return partitioner; } catch (Exception e) { throw new RuntimeException("Unable to instantiate delegate partitioner class: " + e.getMessage(), e); } }
Example #26
Source File: MockupMapContext.java From kylin with Apache License 2.0 | 4 votes |
@Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { throw new NotImplementedException(); }
Example #27
Source File: TestContext.java From Cubert with Apache License 2.0 | 4 votes |
@Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { return null; }
Example #28
Source File: MockupMapContext.java From kylin with Apache License 2.0 | 4 votes |
@Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { throw new NotImplementedException(); }
Example #29
Source File: NativeMapContext.java From geowave with Apache License 2.0 | 4 votes |
@Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { return context.getPartitionerClass(); }
Example #30
Source File: MapReduceExecutionUtil.java From jumbune with GNU Lesser General Public License v3.0 | 4 votes |
@Override protected Partitioner initialValue() { return null; }