Java Code Examples for org.apache.hadoop.mapred.JobConf#setInt()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#setInt() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDatamerge.java From hadoop with Apache License 2.0 | 6 votes |
private static void joinAs(String jointype, Class<? extends SimpleCheckerBase> c) throws Exception { final int srcs = 4; Configuration conf = new Configuration(); JobConf job = new JobConf(conf, c); Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype)); Path[] src = writeSimpleSrc(base, conf, srcs); job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype, SequenceFileInputFormat.class, src)); job.setInt("testdatamerge.sources", srcs); job.setInputFormat(CompositeInputFormat.class); FileOutputFormat.setOutputPath(job, new Path(base, "out")); job.setMapperClass(c); job.setReducerClass(c); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); JobClient.runJob(job); base.getFileSystem(job).delete(base, true); }
Example 2
Source File: TestMiniCoronaRunJob.java From RDFS with Apache License 2.0 | 6 votes |
public void testMemoryLimit() throws Exception { LOG.info("Starting testMemoryLimit"); JobConf conf = new JobConf(); conf.setInt(CoronaConf.NODE_RESERVED_MEMORY_MB, Integer.MAX_VALUE); corona = new MiniCoronaCluster.Builder().conf(conf).numTaskTrackers(2).build(); final JobConf jobConf = corona.createJobConf(); long start = System.currentTimeMillis(); FutureTask<Boolean> task = submitSleepJobFutureTask(jobConf); checkTaskNotDone(task, 10); NodeManager nm = corona.getClusterManager().getNodeManager(); nm.getResourceLimit().setNodeReservedMemoryMB(0); Assert.assertTrue(task.get()); long end = System.currentTimeMillis(); LOG.info("Task Done. Verifying"); new ClusterManagerMetricsVerifier(corona.getClusterManager(), 1, 1, 1, 1, 1, 1, 0, 0).verifyAll(); LOG.info("Time spent for testMemoryLimit:" + (end - start)); }
Example 3
Source File: MRTask.java From incubator-tez with Apache License 2.0 | 6 votes |
public void localizeConfiguration(JobConf jobConf) throws IOException, InterruptedException { jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString()); jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap); Path outputPath = FileOutputFormat.getOutputPath(jobConf); if (outputPath != null) { if ((committer instanceof FileOutputCommitter)) { FileOutputFormat.setWorkOutputPath(jobConf, ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext)); } else { FileOutputFormat.setWorkOutputPath(jobConf, outputPath); } } }
Example 4
Source File: TeraSort.java From hadoop-book with Apache License 2.0 | 6 votes |
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); JobClient.runJob(job); LOG.info("done"); return 0; }
Example 5
Source File: MRTask.java From tez with Apache License 2.0 | 6 votes |
public void localizeConfiguration(JobConf jobConf) throws IOException, InterruptedException { jobConf.set(JobContext.TASK_ID, taskAttemptId.getTaskID().toString()); jobConf.set(JobContext.TASK_ATTEMPT_ID, taskAttemptId.toString()); jobConf.setInt(JobContext.TASK_PARTITION, taskAttemptId.getTaskID().getId()); jobConf.set(JobContext.ID, taskAttemptId.getJobID().toString()); jobConf.setBoolean(MRJobConfig.TASK_ISMAP, isMap); Path outputPath = FileOutputFormat.getOutputPath(jobConf); if (outputPath != null) { if ((committer instanceof FileOutputCommitter)) { FileOutputFormat.setWorkOutputPath(jobConf, ((FileOutputCommitter)committer).getTaskAttemptPath(taskAttemptContext)); } else { FileOutputFormat.setWorkOutputPath(jobConf, outputPath); } } }
Example 6
Source File: TestDeprecatedKeys.java From incubator-tez with Apache License 2.0 | 5 votes |
@Test public void verifyReduceKeyTranslation() { JobConf jobConf = new JobConf(); jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f); jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f); jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f); jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f); jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true); jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f); MRHelpers.translateVertexConfToTez(jobConf); assertEquals(0.4f, jobConf.getFloat( TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT, 0f), 0.01f); assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(2000, jobConf.getInt(TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(0.55f, jobConf.getFloat( TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f); assertEquals(0.60f, jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0), 0.01f); assertEquals(0.22f, jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f); assertEquals(true, jobConf.getBoolean( TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(0.33f, jobConf.getFloat(TezJobConfig.TEZ_RUNTIME_INPUT_BUFFER_PERCENT, 0), 0.01f); }
Example 7
Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Parameters public static Collection<Object[]> configs() throws Exception { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(SplittableTextInputFormat.class); conf.setOutputFormat(EsOutputFormat.class); conf.setReducerClass(IdentityReducer.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumMapTasks(2); conf.setInt("actual.splits", 2); conf.setNumReduceTasks(0); JobConf standard = new JobConf(conf); standard.setMapperClass(TabMapper.class); standard.setMapOutputValueClass(LinkedMapWritable.class); standard.set(ConfigurationOptions.ES_INPUT_JSON, "false"); FileInputFormat.setInputPaths(standard, new Path(MRSuite.testData.sampleArtistsDat(conf))); JobConf json = new JobConf(conf); json.setMapperClass(IdentityMapper.class); json.setMapOutputValueClass(Text.class); json.set(ConfigurationOptions.ES_INPUT_JSON, "true"); FileInputFormat.setInputPaths(json, new Path(MRSuite.testData.sampleArtistsJson(conf))); return Arrays.asList(new Object[][] { { standard, "" }, { json, "json-" } }); }
Example 8
Source File: ValueAggregatorJob.java From hadoop with Apache License 2.0 | 5 votes |
public static void setAggregatorDescriptors(JobConf job , Class<? extends ValueAggregatorDescriptor>[] descriptors) { job.setInt("aggregator.descriptor.num", descriptors.length); //specify the aggregator descriptors for(int i=0; i< descriptors.length; i++) { job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName()); } }
Example 9
Source File: TeraSort.java From RDFS with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); long startTime = System.currentTimeMillis(); JobClient.runJob(job); long endTime = System.currentTimeMillis(); System.out.println((float)(endTime-startTime)/1000); LOG.info("done"); return 0; }
Example 10
Source File: TestMRHelpers.java From tez with Apache License 2.0 | 5 votes |
@Test(timeout = 5000) public void testContainerResourceConstruction() { JobConf conf = new JobConf(new Configuration()); Resource mapResource = MRHelpers.getResourceForMRMapper(conf); Resource reduceResource = MRHelpers.getResourceForMRReducer(conf); Assert.assertEquals(MRJobConfig.DEFAULT_MAP_CPU_VCORES, mapResource.getVirtualCores()); Assert.assertEquals(MRJobConfig.DEFAULT_MAP_MEMORY_MB, mapResource.getMemory()); Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_CPU_VCORES, reduceResource.getVirtualCores()); Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB, reduceResource.getMemory()); conf.setInt(MRJobConfig.MAP_CPU_VCORES, 2); conf.setInt(MRJobConfig.MAP_MEMORY_MB, 123); conf.setInt(MRJobConfig.REDUCE_CPU_VCORES, 20); conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 1234); mapResource = MRHelpers.getResourceForMRMapper(conf); reduceResource = MRHelpers.getResourceForMRReducer(conf); Assert.assertEquals(2, mapResource.getVirtualCores()); Assert.assertEquals(123, mapResource.getMemory()); Assert.assertEquals(20, reduceResource.getVirtualCores()); Assert.assertEquals(1234, reduceResource.getMemory()); }
Example 11
Source File: TestEncryptedShuffle.java From big-c with Apache License 2.0 | 5 votes |
private void encryptedShuffleWithCerts(boolean useClientCerts) throws Exception { try { Configuration conf = new Configuration(); String keystoresDir = new File(BASEDIR).getAbsolutePath(); String sslConfsDir = KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class); KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf, useClientCerts); conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true); startCluster(conf); FileSystem fs = FileSystem.get(getJobConf()); Path inputDir = new Path("input"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("hello"); writer.close(); Path outputDir = new Path("output", "output"); JobConf jobConf = new JobConf(getJobConf()); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); JobClient jobClient = new JobClient(jobConf); RunningJob runJob = jobClient.submitJob(jobConf); runJob.waitForCompletion(); Assert.assertTrue(runJob.isComplete()); Assert.assertTrue(runJob.isSuccessful()); } finally { stopCluster(); } }
Example 12
Source File: TestEncryptedShuffle.java From hadoop with Apache License 2.0 | 5 votes |
private void encryptedShuffleWithCerts(boolean useClientCerts) throws Exception { try { Configuration conf = new Configuration(); String keystoresDir = new File(BASEDIR).getAbsolutePath(); String sslConfsDir = KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class); KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf, useClientCerts); conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true); startCluster(conf); FileSystem fs = FileSystem.get(getJobConf()); Path inputDir = new Path("input"); fs.mkdirs(inputDir); Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt"))); writer.write("hello"); writer.close(); Path outputDir = new Path("output", "output"); JobConf jobConf = new JobConf(getJobConf()); jobConf.setInt("mapred.map.tasks", 1); jobConf.setInt("mapred.map.max.attempts", 1); jobConf.setInt("mapred.reduce.max.attempts", 1); jobConf.set("mapred.input.dir", inputDir.toString()); jobConf.set("mapred.output.dir", outputDir.toString()); JobClient jobClient = new JobClient(jobConf); RunningJob runJob = jobClient.submitJob(jobConf); runJob.waitForCompletion(); Assert.assertTrue(runJob.isComplete()); Assert.assertTrue(runJob.isSuccessful()); } finally { stopCluster(); } }
Example 13
Source File: TestKeyFieldBasedPartitioner.java From hadoop with Apache License 2.0 | 5 votes |
/** * Test is key-field-based partitioned works with empty key. */ @Test public void testEmptyKey() throws Exception { KeyFieldBasedPartitioner<Text, Text> kfbp = new KeyFieldBasedPartitioner<Text, Text>(); JobConf conf = new JobConf(); conf.setInt("num.key.fields.for.partition", 10); kfbp.configure(conf); assertEquals("Empty key should map to 0th partition", 0, kfbp.getPartition(new Text(), new Text(), 10)); }
Example 14
Source File: MneMapredBufferDataTest.java From mnemonic with Apache License 2.0 | 5 votes |
@BeforeClass public void setUp() throws IOException { m_workdir = new Path( System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); m_partfns = new ArrayList<String>(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_conf.setInt(JobContext.TASK_PARTITION, TASK_PARTITION); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "mapred-buffer-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] {DurableType.BUFFER}); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {}); }
Example 15
Source File: ValueAggregatorJob.java From RDFS with Apache License 2.0 | 5 votes |
public static void setAggregatorDescriptors(JobConf job , Class<? extends ValueAggregatorDescriptor>[] descriptors) { job.setInt("aggregator.descriptor.num", descriptors.length); //specify the aggregator descriptors for(int i=0; i< descriptors.length; i++) { job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName()); } }
Example 16
Source File: TestMapProcessor.java From tez with Apache License 2.0 | 4 votes |
@Test(timeout = 30000) public void testMapProcessorProgress() throws Exception { String dagName = "mrdag0"; String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName(); JobConf jobConf = new JobConf(defaultConf); setUpJobConf(jobConf); MRHelpers.translateMRConfToTez(jobConf); jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0); jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir, "localized-resources").toUri().toString()); Path mapInput = new Path(workDir, "map0"); MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000); InputSpec mapInputSpec = new InputSpec("NullSrcVertex", InputDescriptor.create(MRInputLegacy.class.getName()) .setUserPayload(UserPayload.create(ByteBuffer.wrap( MRRuntimeProtos.MRInputUserPayloadProto.newBuilder() .setConfigurationBytes(TezUtils.createByteStringFromConf (jobConf)).build() .toByteArray()))), 1); OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName()) .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1); TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf); final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask (localFs, workDir, jobConf, 0, new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName, Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec), sharedExecutor); ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); Thread monitorProgress = new Thread(new Runnable() { @Override public void run() { float prog = task.getProgress(); if(prog > 0.0f && prog < 1.0f) progressUpdate = prog; } }); task.initialize(); scheduler.scheduleAtFixedRate(monitorProgress, 0, 1, TimeUnit.MILLISECONDS); task.run(); Assert.assertTrue("Progress Updates should be captured!", progressUpdate > 0.0f && progressUpdate < 1.0f); task.close(); sharedExecutor.shutdownNow(); }
Example 17
Source File: TestReduceTaskFetchFail.java From RDFS with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") @Test public void testcheckAndInformJobTracker() throws Exception { //mock creation TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class); TaskReporter mockTaskReporter = mock(TaskReporter.class); JobConf conf = new JobConf(); conf.setUser("testuser"); conf.setJobName("testJob"); conf.setSessionId("testSession"); TaskAttemptID tid = new TaskAttemptID(); TestReduceTask rTask = new TestReduceTask(); rTask.setConf(conf); ReduceTask.ReduceCopier reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter); reduceCopier.checkAndInformJobTracker(1, tid, false); verify(mockTaskReporter, never()).progress(); reduceCopier.checkAndInformJobTracker(10, tid, false); verify(mockTaskReporter, times(1)).progress(); // Test the config setting conf.setInt("mapreduce.reduce.shuffle.maxfetchfailures", 3); rTask.setConf(conf); reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter); reduceCopier.checkAndInformJobTracker(1, tid, false); verify(mockTaskReporter, times(1)).progress(); reduceCopier.checkAndInformJobTracker(3, tid, false); verify(mockTaskReporter, times(2)).progress(); reduceCopier.checkAndInformJobTracker(5, tid, false); verify(mockTaskReporter, times(2)).progress(); reduceCopier.checkAndInformJobTracker(6, tid, false); verify(mockTaskReporter, times(3)).progress(); // test readError and its config reduceCopier.checkAndInformJobTracker(7, tid, true); verify(mockTaskReporter, times(4)).progress(); conf.setBoolean("mapreduce.reduce.shuffle.notify.readerror", false); rTask.setConf(conf); reduceCopier = rTask.new TestReduceCopier(mockUmbilical, conf, mockTaskReporter); reduceCopier.checkAndInformJobTracker(7, tid, true); verify(mockTaskReporter, times(4)).progress(); }
Example 18
Source File: DataFsck.java From RDFS with Apache License 2.0 | 4 votes |
List<JobContext> submitJobs(BufferedReader inputReader, int filesPerJob) throws IOException { boolean done = false; JobClient jClient = new JobClient(createJobConf()); List<JobContext> submitted = new ArrayList<JobContext>(); Random rand = new Random(); do { JobConf jobConf = createJobConf(); final String randomId = Integer.toString(rand.nextInt(Integer.MAX_VALUE), 36); Path jobDir = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDir.toString()); Path log = new Path(jobDir, "_logs"); FileOutputFormat.setOutputPath(jobConf, log); LOG.info("log=" + log); // create operation list FileSystem fs = jobDir.getFileSystem(jobConf); Path opList = new Path(jobDir, "_" + OP_LIST_LABEL); jobConf.set(OP_LIST_LABEL, opList.toString()); int opCount = 0, synCount = 0; SequenceFile.Writer opWriter = null; try { opWriter = SequenceFile.createWriter(fs, jobConf, opList, Text.class, Text.class, SequenceFile.CompressionType.NONE); String f = null; do { f = inputReader.readLine(); if (f == null) { done = true; break; } opWriter.append(new Text(f), new Text(f)); opCount++; if (++synCount > SYNC_FILE_MAX) { opWriter.sync(); synCount = 0; } } while (opCount < filesPerJob); } finally { if (opWriter != null) { opWriter.close(); } fs.setReplication(opList, OP_LIST_REPLICATION); // increase replication for control file } jobConf.setInt(OP_COUNT_LABEL, opCount); RunningJob rJob = jClient.submitJob(jobConf); JobContext ctx = new JobContext(rJob, jobConf); submitted.add(ctx); } while (!done); return submitted; }
Example 19
Source File: TableMapReduceUtil.java From hbase with Apache License 2.0 | 2 votes |
/** * Sets the number of rows to return and cache with each scanner iteration. * Higher caching values will enable faster mapreduce jobs at the expense of * requiring more heap to contain the cached rows. * * @param job The current job configuration to adjust. * @param batchSize The number of rows to return in batch with each scanner * iteration. */ public static void setScannerCaching(JobConf job, int batchSize) { job.setInt("hbase.client.scanner.caching", batchSize); }
Example 20
Source File: HadoopTeraSortTest.java From ignite with Apache License 2.0 | 2 votes |
/** * Does actual test TeraSort job Through Ignite API * * @param gzip Whether to use GZIP. */ protected final void teraSort(boolean gzip) throws Exception { System.out.println("TeraSort ==============================================================="); getFileSystem().delete(new Path(sortOutDir), true); final JobConf jobConf = new JobConf(); jobConf.setUser(getUser()); jobConf.set("fs.defaultFS", getFsBase()); log().info("Desired number of reduces: " + numReduces()); jobConf.set("mapreduce.job.reduces", String.valueOf(numReduces())); log().info("Desired number of maps: " + numMaps()); final long splitSize = dataSizeBytes() / numMaps(); log().info("Desired split size: " + splitSize); // Force the split to be of the desired size: jobConf.set("mapred.min.split.size", String.valueOf(splitSize)); jobConf.set("mapred.max.split.size", String.valueOf(splitSize)); jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MAPPER_STRIPED_OUTPUT.propertyName(), true); jobConf.setInt(HadoopJobProperty.SHUFFLE_MSG_SIZE.propertyName(), 4096); if (gzip) jobConf.setBoolean(HadoopJobProperty.SHUFFLE_MSG_GZIP.propertyName(), true); jobConf.set(HadoopJobProperty.JOB_PARTIALLY_RAW_COMPARATOR.propertyName(), TextPartiallyRawComparator.class.getName()); Job job = setupConfig(jobConf); HadoopJobId jobId = new HadoopJobId(UUID.randomUUID(), 1); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(jobId, createJobInfo(job.getConfiguration(), null)); fut.get(); }