Java Code Examples for org.apache.spark.TaskContext#getPartitionId()
The following examples show how to use
org.apache.spark.TaskContext#getPartitionId() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StreamNodeLoader.java From sylph with Apache License 2.0 | 6 votes |
public static Iterator<Row> transFunction(Iterator<Row> partition, RealTimeTransForm realTimeTransForm) { Exception errorOrNull = null; Schema schema = realTimeTransForm.getSchema(); // if not null List<Row> list = new ArrayList<>(); try { int partitionId = TaskContext.getPartitionId(); if (realTimeTransForm.open(partitionId, 0)) { partition.forEachRemaining(row -> { realTimeTransForm.process(SparkRecord.make(row), (transOutrow) -> { //TODO: SparkRow.parserRow(x) with schema ? list.add(SparkRecord.parserRow(transOutrow)); }); }); } } catch (Exception e) { errorOrNull = e; //转换失败 这批数据都丢弃 } finally { realTimeTransForm.close(errorOrNull); //destroy() } return list.iterator(); }
Example 2
Source File: ExpKeyFilenameMap.java From incubator-retired-pirk with Apache License 2.0 | 5 votes |
@Override public Iterator<Tuple2<Integer, String>> call(Iterator<Tuple2<Integer,Iterable<Tuple2<Integer,BigInteger>>>> iter) throws Exception { List<Tuple2<Integer,String>> keyFileList = new ArrayList<>(); FileSystem fs = FileSystem.get(new Configuration()); // Form the filename for the exp table portion that corresponds to this partition int taskId = TaskContext.getPartitionId(); logger.info("taskId = " + taskId); String fileName = expOutDir + "/exp-" + String.format("%05d", taskId); logger.info("fileName = " + fileName); // Iterate over the elements of the partition BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(fileName), true))); while (iter.hasNext()) { // <queryHash, <<power>,<element^power mod N^2>> Tuple2<Integer,Iterable<Tuple2<Integer,BigInteger>>> expTuple = iter.next(); int queryHash = expTuple._1; // Record the queryHash -> fileName keyFileList.add(new Tuple2<>(queryHash, fileName)); // Write the partition elements to the corresponding exp table file // each line: queryHash,<power>-<element^power mod N^2> for (Tuple2<Integer,BigInteger> modPow : expTuple._2) { String lineOut = queryHash + "," + modPow._1 + "-" + modPow._2; bw.write(lineOut); bw.newLine(); } } bw.close(); return keyFileList.iterator(); }
Example 3
Source File: SparkFactDistinct.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
private void init() throws IOException { taskId = TaskContext.getPartitionId(); kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); cubeDesc = cubeInstance.getDescriptor(); cubeConfig = cubeInstance.getConfig(); reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance); result = Lists.newArrayList(); if (reducerMapping.isCuboidRowCounterReducer(taskId)) { // hll isStatistics = true; baseCuboidId = cubeInstance.getCuboidScheduler().getBaseCuboidId(); baseCuboidRowCountInMappers = Lists.newArrayList(); cuboidHLLMap = Maps.newHashMap(); logger.info("Partition {} handling stats", taskId); } else { // normal col col = reducerMapping.getColForReducer(taskId); Preconditions.checkNotNull(col); isDimensionCol = cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col) && col.getType().needCompare(); isDictCol = cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col); // local build dict buildDictInReducer = kConfig.isBuildDictInReducerEnabled(); if (cubeDesc.getDictionaryBuilderClass(col) != null) { // only works with default dictionary builder buildDictInReducer = false; } if (reducerMapping.getReducerNumForDimCol(col) > 1) { buildDictInReducer = false; // only works if this is the only reducer of a dictionary column } if (buildDictInReducer) { builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } logger.info("Partition {} handling column {}, buildDictInReducer={}", taskId, col, buildDictInReducer); } initialized = true; } }
Example 4
Source File: SparkTaskContextSupplier.java From hudi with Apache License 2.0 | 4 votes |
public Supplier<Integer> getPartitionIdSupplier() { return () -> TaskContext.getPartitionId(); }
Example 5
Source File: SparkFactDistinct.java From kylin with Apache License 2.0 | 4 votes |
private void init() throws IOException { taskId = TaskContext.getPartitionId(); kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); cubeDesc = cubeInstance.getDescriptor(); cubeConfig = cubeInstance.getConfig(); reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance); result = Lists.newArrayList(); if (reducerMapping.isCuboidRowCounterReducer(taskId)) { // hll isStatistics = true; baseCuboidId = cubeInstance.getCuboidScheduler().getBaseCuboidId(); baseCuboidRowCountInMappers = Lists.newArrayList(); cuboidHLLMap = Maps.newHashMap(); logger.info("Partition {} handling stats", taskId); } else { // normal col col = reducerMapping.getColForReducer(taskId); Preconditions.checkNotNull(col); isDimensionCol = cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col) && col.getType().needCompare(); isDictCol = cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(col); // local build dict buildDictInReducer = kConfig.isBuildDictInReducerEnabled(); if (cubeDesc.getDictionaryBuilderClass(col) != null) { // only works with default dictionary builder buildDictInReducer = false; } if (reducerMapping.getReducerNumForDimCol(col) > 1) { buildDictInReducer = false; // only works if this is the only reducer of a dictionary column } if (buildDictInReducer) { builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } logger.info("Partition {} handling column {}, buildDictInReducer={}", taskId, col, buildDictInReducer); } initialized = true; } }