org.apache.spark.RangePartitioner Java Examples

The following examples show how to use org.apache.spark.RangePartitioner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestPartitionerFactory.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testRange() {
  Map<String, Object> configMap = Maps.newHashMap();
  configMap.put("type", "range");
  
  JavaPairRDD<Row, Row> base = getDummyRDD(10);
  Config config = ConfigFactory.parseMap(configMap);
  Partitioner p = PartitionerFactory.create(config, base);
  
  assertTrue(p instanceof RangePartitioner);
  assertEquals(p.numPartitions(), 10);
}
 
Example #2
Source File: Partitioning.java    From Apache-Spark-2x-for-Java-Developers with MIT License 2 votes vote down vote up
public static void main(String[] args) {
		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
		SparkConf conf = new SparkConf().setMaster("local").setAppName("Partitioning");
		JavaSparkContext jsc = new JavaSparkContext(conf);

		JavaPairRDD<Integer, String> pairRdd = jsc.parallelizePairs(
				Arrays.asList(new Tuple2<Integer, String>(1, "A"),new Tuple2<Integer, String>(2, "B"),
						new Tuple2<Integer, String>(3, "C"),new Tuple2<Integer, String>(4, "D"),
						new Tuple2<Integer, String>(5, "E"),new Tuple2<Integer, String>(6, "F"),
						new Tuple2<Integer, String>(7, "G"),new Tuple2<Integer, String>(8, "H")),3);
		
		
		
		
		RDD<Tuple2<Integer, String>> rdd = JavaPairRDD.toRDD(pairRdd);
		
		System.out.println(pairRdd.getNumPartitions());
//		JavaPairRDD<Integer, String> hashPartitioned = pairRdd.partitionBy(new HashPartitioner(2));
//		
//		System.out.println(hashPartitioned.getNumPartitions());
		
		
		
		RangePartitioner rangePartitioner = new RangePartitioner(4, rdd, true, scala.math.Ordering.Int$.MODULE$ , scala.reflect.ClassTag$.MODULE$.apply(Integer.class));
				
		JavaPairRDD<Integer, String> rangePartitioned = pairRdd.partitionBy(rangePartitioner);
		
		
		 JavaRDD<String> mapPartitionsWithIndex = rangePartitioned.mapPartitionsWithIndex((index, tupleIterator) -> {
				
			List<String> list=new ArrayList<>();
			
			while(tupleIterator.hasNext()){
				list.add("Partition number:"+index+",key:"+tupleIterator.next()._1());
			}
			
			return list.iterator();
		}, true);
		
		 System.out.println(mapPartitionsWithIndex.collect());
		 
		 
		 
		
		 
		 
		 
		 
	}