Java Code Examples for org.apache.hadoop.mapred.JobConf#set()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#set() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTaskTrackerInstrumentation.java From RDFS with Apache License 2.0 | 6 votes |
@Test public void testCreateInstrumentationWithMultipleClasses() { // Set up configuration to create two dummy instrumentation objects JobConf conf = new JobConf(); String dummyClass = DummyTaskTrackerInstrumentation.class.getName(); String classList = dummyClass + "," + dummyClass; conf.set("mapred.tasktracker.instrumentation", classList); TaskTracker tracker = new TaskTracker(); // Check that a composite instrumentation object is created TaskTrackerInstrumentation inst = TaskTracker.createInstrumentation(tracker, conf); assertEquals(CompositeTaskTrackerInstrumentation.class.getName(), inst.getClass().getName()); // Check that each member of the composite is a dummy instrumentation CompositeTaskTrackerInstrumentation comp = (CompositeTaskTrackerInstrumentation) inst; List<TaskTrackerInstrumentation> insts = comp.getInstrumentations(); assertEquals(2, insts.size()); assertEquals(DummyTaskTrackerInstrumentation.class.getName(), insts.get(0).getClass().getName()); assertEquals(DummyTaskTrackerInstrumentation.class.getName(), insts.get(1).getClass().getName()); }
Example 2
Source File: OfficeFormatHadoopExcelLowFootPrintStaXTest.java From hadoopoffice with Apache License 2.0 | 6 votes |
@Test public void readExcelInputFormatExcel2013SingleSheetEncryptedNegativeLowFootprint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013encrypt.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint job.set("hadoopoffice.read.lowFootprint", "true"); job.set("hadoopoffice.read.lowFootprint.parser", "stax"); // for decryption simply set the password job.set("hadoopoffice.read.security.crypt.password", "test2"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNull(reader, "Null record reader implies invalid password"); }
Example 3
Source File: TestMRAppMaster.java From big-c with Apache License 2.0 | 5 votes |
@Test public void testMRAppMasterMissingStaging() throws IOException, InterruptedException { String applicationAttemptIdStr = "appattempt_1317529182569_0004_000002"; String containerIdStr = "container_1317529182569_0004_000002_1"; String userName = "TestAppMasterUser"; JobConf conf = new JobConf(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); ApplicationAttemptId applicationAttemptId = ConverterUtils .toApplicationAttemptId(applicationAttemptIdStr); //Delete the staging directory File dir = new File(stagingDir); if(dir.exists()) { FileUtils.deleteDirectory(dir); } ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); MRAppMaster appMaster = new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1, System.currentTimeMillis(), false, false); boolean caught = false; try { MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); } catch (IOException e) { //The IO Exception is expected LOG.info("Caught expected Exception", e); caught = true; } assertTrue(caught); assertTrue(appMaster.errorHappenedShutDown); //Copying the history file is disabled, but it is not really visible from //here assertEquals(JobStateInternal.ERROR, appMaster.forcedState); appMaster.stop(); }
Example 4
Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void testUpsertScript() throws Exception { JobConf conf = createJobConf(); conf.set(ConfigurationOptions.ES_RESOURCE, resource("mroldapi-upsert-script", "data", clusterInfo.getMajorVersion())); conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, "yes"); conf.set(ConfigurationOptions.ES_WRITE_OPERATION, "upsert"); conf.set(ConfigurationOptions.ES_MAPPING_ID, "number"); conf.set(ConfigurationOptions.ES_UPDATE_SCRIPT_INLINE, "counter = 1"); runJob(conf); }
Example 5
Source File: HoodieMergeOnReadTestUtils.java From hudi with Apache License 2.0 | 5 votes |
private static void setPropsForInputFormat(FileInputFormat inputFormat, JobConf jobConf, Schema schema, String basePath) { List<Schema.Field> fields = schema.getFields(); String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(",")); String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(",")); Configuration conf = HoodieTestUtils.getDefaultHadoopConf(); String hiveColumnNames = fields.stream().filter(field -> !field.name().equalsIgnoreCase("datestr")) .map(Schema.Field::name).collect(Collectors.joining(",")); hiveColumnNames = hiveColumnNames + ",datestr"; String hiveColumnTypes = HoodieAvroUtils.addMetadataColumnTypes(HoodieTestDataGenerator.TRIP_HIVE_COLUMN_TYPES); hiveColumnTypes = hiveColumnTypes + ",string"; jobConf.set(hive_metastoreConstants.META_TABLE_COLUMNS, hiveColumnNames); jobConf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypes); jobConf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); jobConf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); jobConf.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, "datestr"); conf.set(hive_metastoreConstants.META_TABLE_COLUMNS, hiveColumnNames); conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, names); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions); conf.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, "datestr"); conf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypes); // Hoodie Input formats are also configurable Configurable configurable = (Configurable)inputFormat; configurable.setConf(conf); jobConf.addResource(conf); }
Example 6
Source File: TestReduceProcessor.java From tez with Apache License 2.0 | 5 votes |
public void setUpJobConf(JobConf job) { job.set(TezRuntimeFrameworkConfigs.LOCAL_DIRS, workDir.toString()); job.set(MRConfig.LOCAL_DIR, workDir.toString()); job.setClass( Constants.TEZ_RUNTIME_TASK_OUTPUT_MANAGER, TezTaskOutputFiles.class, TezTaskOutput.class); job.set(TezRuntimeConfiguration.TEZ_RUNTIME_PARTITIONER_CLASS, MRPartitioner.class.getName()); job.setNumReduceTasks(1); job.setInt(MRJobConfig.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, 1); }
Example 7
Source File: TestKeyFieldBasedPartitioner.java From hadoop with Apache License 2.0 | 5 votes |
@Test public void testMultiConfigure() { KeyFieldBasedPartitioner<Text, Text> kfbp = new KeyFieldBasedPartitioner<Text, Text>(); JobConf conf = new JobConf(); conf.set(KeyFieldBasedPartitioner.PARTITIONER_OPTIONS, "-k1,1"); kfbp.setConf(conf); Text key = new Text("foo\tbar"); Text val = new Text("val"); int partNum = kfbp.getPartition(key, val, 4096); kfbp.configure(conf); assertEquals(partNum, kfbp.getPartition(key,val, 4096)); }
Example 8
Source File: ValueAggregatorJob.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public static void setAggregatorDescriptors(JobConf job , Class<? extends ValueAggregatorDescriptor>[] descriptors) { job.setInt("aggregator.descriptor.num", descriptors.length); //specify the aggregator descriptors for(int i=0; i< descriptors.length; i++) { job.set("aggregator.descriptor." + i, "UserDefined," + descriptors[i].getName()); } }
Example 9
Source File: TestMRAppMaster.java From big-c with Apache License 2.0 | 5 votes |
@Test public void testMRAppMasterMidLock() throws IOException, InterruptedException { String applicationAttemptIdStr = "appattempt_1317529182569_0004_000002"; String containerIdStr = "container_1317529182569_0004_000002_1"; String userName = "TestAppMasterUser"; JobConf conf = new JobConf(); conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); ApplicationAttemptId applicationAttemptId = ConverterUtils .toApplicationAttemptId(applicationAttemptIdStr); JobId jobId = TypeConverter.toYarn( TypeConverter.fromYarn(applicationAttemptId.getApplicationId())); Path start = MRApps.getStartJobCommitFile(conf, userName, jobId); FileSystem fs = FileSystem.get(conf); //Create the file, but no end file so we should unregister with an error. fs.create(start).close(); ContainerId containerId = ConverterUtils.toContainerId(containerIdStr); MRAppMaster appMaster = new MRAppMasterTest(applicationAttemptId, containerId, "host", -1, -1, System.currentTimeMillis(), false, false); boolean caught = false; try { MRAppMaster.initAndStartAppMaster(appMaster, conf, userName); } catch (IOException e) { //The IO Exception is expected LOG.info("Caught expected Exception", e); caught = true; } assertTrue(caught); assertTrue(appMaster.errorHappenedShutDown); assertEquals(JobStateInternal.ERROR, appMaster.forcedState); appMaster.stop(); // verify the final status is FAILED verifyFailedStatus((MRAppMasterTest)appMaster, "FAILED"); }
Example 10
Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test(expected = IOException.class) public void testUpdateWithoutUpsert() throws Exception { JobConf conf = createJobConf(); conf.set(ConfigurationOptions.ES_WRITE_OPERATION, "update"); conf.set(ConfigurationOptions.ES_MAPPING_ID, "number"); conf.set(ConfigurationOptions.ES_RESOURCE, resource("mroldapi-updatewoupsert", "data", clusterInfo.getMajorVersion())); runJob(conf); }
Example 11
Source File: AbstractMROldApiSaveTest.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Test public void testCreateWithId() throws Exception { JobConf conf = createJobConf(); conf.set(ConfigurationOptions.ES_WRITE_OPERATION, "create"); conf.set(ConfigurationOptions.ES_MAPPING_ID, "number"); conf.set(ConfigurationOptions.ES_RESOURCE, resource("mroldapi-createwithid", "data", clusterInfo.getMajorVersion())); runJob(conf); }
Example 12
Source File: LinkRank.java From anthelion with Apache License 2.0 | 5 votes |
/** * Runs the link analysis job. The link analysis job applies the link rank * formula to create a score per url and stores that score in the NodeDb. * * Typically the link analysis job is run a number of times to allow the link * rank scores to converge. * * @param nodeDb The node database from which we are getting previous link * rank scores. * @param inverted The inverted inlinks * @param output The link analysis output. * @param iteration The current iteration number. * @param numIterations The total number of link analysis iterations * * @throws IOException If an error occurs during link analysis. */ private void runAnalysis(Path nodeDb, Path inverted, Path output, int iteration, int numIterations, float rankOne) throws IOException { JobConf analyzer = new NutchJob(getConf()); analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1)); analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1) + " of " + numIterations); FileInputFormat.addInputPath(analyzer, nodeDb); FileInputFormat.addInputPath(analyzer, inverted); FileOutputFormat.setOutputPath(analyzer, output); analyzer.set("link.analyze.rank.one", String.valueOf(rankOne)); analyzer.setMapOutputKeyClass(Text.class); analyzer.setMapOutputValueClass(ObjectWritable.class); analyzer.setInputFormat(SequenceFileInputFormat.class); analyzer.setMapperClass(Analyzer.class); analyzer.setReducerClass(Analyzer.class); analyzer.setOutputKeyClass(Text.class); analyzer.setOutputValueClass(Node.class); analyzer.setOutputFormat(MapFileOutputFormat.class); analyzer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); LOG.info("Starting analysis job"); try { JobClient.runJob(analyzer); } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); throw e; } LOG.info("Finished analysis job."); }
Example 13
Source File: SparkUtil.java From spork with Apache License 2.0 | 5 votes |
public static JobConf newJobConf(PigContext pigContext) throws IOException { JobConf jobConf = new JobConf( ConfigurationUtil.toConfiguration(pigContext.getProperties())); jobConf.set("pig.pigContext", ObjectSerializer.serialize(pigContext)); UDFContext.getUDFContext().serialize(jobConf); jobConf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); return jobConf; }
Example 14
Source File: OfficeFormatHadoopExcelLowFootPrintSAXTest.java From hadoopoffice with Apache License 2.0 | 4 votes |
@Test public void readExcelInputFormatExcel2013MultiSheetAllLowFootPrint() throws IOException { JobConf job = new JobConf(defaultConf); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "excel2013testmultisheet.xlsx"; String fileNameSpreadSheet = classLoader.getResource(fileName).getFile(); Path file = new Path(fileNameSpreadSheet); FileInputFormat.setInputPaths(job, file); // set locale to the one of the test data job.set("hadoopoffice.read.locale.bcp47", "de"); // low footprint job.set("hadoopoffice.read.lowFootprint", "true"); job.set("hadoopoffice.read.lowFootprint.parser", "sax"); ExcelFileInputFormat format = new ExcelFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals(1, inputSplits.length, "Only one split generated for Excel file"); RecordReader<Text, ArrayWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNotNull(reader, "Format returned null RecordReader"); Text spreadSheetKey = new Text(); ArrayWritable spreadSheetValue = new ArrayWritable(SpreadSheetCellDAO.class); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 1 (first sheet)"); assertEquals("[excel2013testmultisheet.xlsx]Sheet1!A1", spreadSheetKey.toString(), "Input Split for Excel file has keyname == \"[excel2013testmultisheet.xlsx]Sheet1!A1\""); assertEquals(4, spreadSheetValue.get().length, "Input Split for Excel file contains row 1 with 4 columns"); assertEquals("test1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 1 == \"test1\""); assertEquals("Sheet1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getSheetName(), "Input Split for Excel file contains row 1 with cell 1 sheetname == \"Sheet1\""); assertEquals("A1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getAddress(), "Input Split for Excel file contains row 1 with cell 1 address == \"A1\""); assertEquals("test2", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 2 == \"test2\""); assertEquals("test3", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 3 == \"test3\""); assertEquals("test4", ((SpreadSheetCellDAO) spreadSheetValue.get()[3]).getFormattedValue(), "Input Split for Excel file contains row 1 with cell 4 == \"test4\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 2 (first sheet)"); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 2 with 1 column"); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 2 with cell 1 == \"4\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 3 (first sheet)"); assertEquals(5, spreadSheetValue.get().length, "Input Split for Excel file contains row 3 with 5 columns"); assertEquals("31/12/99", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 1 == \"31/12/99\""); assertEquals("5", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 2 == \"5\""); assertNull(spreadSheetValue.get()[2], "Input Split for Excel file contains row 3 with cell 3 == null"); assertNull(spreadSheetValue.get()[3], "Input Split for Excel file contains row 3 with cell 4 == null"); assertEquals("null", ((SpreadSheetCellDAO) spreadSheetValue.get()[4]).getFormattedValue(), "Input Split for Excel file contains row 3 with cell 5 == \"null\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 4 (first sheet)"); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 4 with 1 column"); assertEquals("1", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 4 with cell 1 == \"1\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 5 (first sheet)"); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 5 with 3 columns"); assertEquals("2", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 1 == \"2\""); assertEquals("6", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 2== \"6\""); assertEquals("10", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 5 with cell 3== \"10\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 6 (first sheet)"); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 6 with 3 columns"); assertEquals("3", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 1 == \"3\""); assertEquals("4", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 2== \"4\""); assertEquals("15", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 6 with cell 3== \"15\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 7 (second sheet)"); assertEquals("8", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 7 with cell 1 == \"8\""); assertEquals("99", ((SpreadSheetCellDAO) spreadSheetValue.get()[1]).getFormattedValue(), "Input Split for Excel file contains row 7 with cell 2 == \"99\""); assertEquals(2, spreadSheetValue.get().length, "Input Split for Excel file contains row 7 with 2 columns"); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 8 (second sheet)"); assertEquals(1, spreadSheetValue.get().length, "Input Split for Excel file contains row 8 with 1 column"); assertEquals("test", ((SpreadSheetCellDAO) spreadSheetValue.get()[0]).getFormattedValue(), "Input Split for Excel file contains row 8 with cell 1 == \"test\""); assertTrue(reader.next(spreadSheetKey, spreadSheetValue), "Input Split for Excel file contains row 9 (second sheet)"); assertEquals(3, spreadSheetValue.get().length, "Input Split for Excel file contains row 9 with 3 columns"); assertNull(spreadSheetValue.get()[0], "Input Split for Excel file contains row 9 with cell 1 == null"); assertNull(spreadSheetValue.get()[1], "Input Split for Excel file contains row 9 with cell 2 == null"); assertEquals("seven", ((SpreadSheetCellDAO) spreadSheetValue.get()[2]).getFormattedValue(), "Input Split for Excel file contains row 9 with cell 3 == \"seven\""); }
Example 15
Source File: TestTaskAttempt.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testAppDiognosticEventOnNewTask() throws Exception { ApplicationId appId = ApplicationId.newInstance(1, 2); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( appId, 0); JobId jobId = MRBuilderUtils.newJobId(appId, 1); TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP); TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0); Path jobFile = mock(Path.class); MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptListener taListener = mock(TaskAttemptListener.class); when(taListener.getAddress()).thenReturn( new InetSocketAddress("localhost", 0)); JobConf jobConf = new JobConf(); jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class); jobConf.setBoolean("fs.file.impl.disable.cache", true); jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, ""); jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10"); TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class); when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" }); AppContext appCtx = mock(AppContext.class); ClusterInfo clusterInfo = mock(ClusterInfo.class); Resource resource = mock(Resource.class); when(appCtx.getClusterInfo()).thenReturn(clusterInfo); when(resource.getMemory()).thenReturn(1024); TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), new SystemClock(), appCtx); NodeId nid = NodeId.newInstance("127.0.0.1", 0); ContainerId contId = ContainerId.newContainerId(appAttemptId, 3); Container container = mock(Container.class); when(container.getId()).thenReturn(contId); when(container.getNodeId()).thenReturn(nid); when(container.getNodeHttpAddress()).thenReturn("localhost:0"); taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId, "Task got killed")); assertFalse( "InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task", eventHandler.internalError); }
Example 16
Source File: AbstractExtraMRTests.java From elasticsearch-hadoop with Apache License 2.0 | 4 votes |
private void runJob(JobConf conf) throws Exception { String string = conf.get(ConfigurationOptions.ES_RESOURCE); string = indexPrefix + (string.startsWith("/") ? string.substring(1) : string); conf.set(ConfigurationOptions.ES_RESOURCE, string); JobClient.runJob(conf); }
Example 17
Source File: CloudBurst.java From emr-sample-apps with Apache License 2.0 | 4 votes |
public static RunningJob alignall(String refpath, String qrypath, String outpath, int MIN_READ_LEN, int MAX_READ_LEN, int K, int ALLOW_DIFFERENCES, boolean FILTER_ALIGNMENTS, int NUM_MAP_TASKS, int NUM_REDUCE_TASKS, int BLOCK_SIZE, int REDUNDANCY) throws IOException, Exception { int SEED_LEN = MIN_READ_LEN / (K+1); int FLANK_LEN = MAX_READ_LEN-SEED_LEN+K; System.out.println("refath: " + refpath); System.out.println("qrypath: " + qrypath); System.out.println("outpath: " + outpath); System.out.println("MIN_READ_LEN: " + MIN_READ_LEN); System.out.println("MAX_READ_LEN: " + MAX_READ_LEN); System.out.println("K: " + K); System.out.println("SEED_LEN: " + SEED_LEN); System.out.println("FLANK_LEN: " + FLANK_LEN); System.out.println("ALLOW_DIFFERENCES: " + ALLOW_DIFFERENCES); System.out.println("FILTER_ALIGNMENTS: " + FILTER_ALIGNMENTS); System.out.println("NUM_MAP_TASKS: " + NUM_MAP_TASKS); System.out.println("NUM_REDUCE_TASKS: " + NUM_REDUCE_TASKS); System.out.println("BLOCK_SIZE: " + BLOCK_SIZE); System.out.println("REDUNDANCY: " + REDUNDANCY); JobConf conf = new JobConf(MerReduce.class); conf.setJobName("CloudBurst"); conf.setNumMapTasks(NUM_MAP_TASKS); conf.setNumReduceTasks(NUM_REDUCE_TASKS); FileInputFormat.addInputPath(conf, new Path(refpath)); FileInputFormat.addInputPath(conf, new Path(qrypath)); conf.set("refpath", refpath); conf.set("qrypath", qrypath); conf.set("MIN_READ_LEN", Integer.toString(MIN_READ_LEN)); conf.set("MAX_READ_LEN", Integer.toString(MAX_READ_LEN)); conf.set("K", Integer.toString(K)); conf.set("SEED_LEN", Integer.toString(SEED_LEN)); conf.set("FLANK_LEN", Integer.toString(FLANK_LEN)); conf.set("ALLOW_DIFFERENCES", Integer.toString(ALLOW_DIFFERENCES)); conf.set("BLOCK_SIZE", Integer.toString(BLOCK_SIZE)); conf.set("REDUNDANCY", Integer.toString(REDUNDANCY)); conf.set("FILTER_ALIGNMENTS", (FILTER_ALIGNMENTS ? "1" : "0")); conf.setMapperClass(MapClass.class); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(BytesWritable.class); conf.setMapOutputValueClass(BytesWritable.class); conf.setReducerClass(ReduceClass.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setOutputFormat(SequenceFileOutputFormat.class); Path oPath = new Path(outpath); FileOutputFormat.setOutputPath(conf, oPath); System.err.println(" Removing old results"); FileSystem.get(conf).delete(oPath); RunningJob rj = JobClient.runJob(conf); System.err.println("CloudBurst Finished"); return rj; }
Example 18
Source File: TestPipeApplication.java From hadoop with Apache License 2.0 | 4 votes |
/** * test org.apache.hadoop.mapred.pipes.Application * test a internal functions: MessageType.REGISTER_COUNTER, INCREMENT_COUNTER, STATUS, PROGRESS... * * @throws Throwable */ @Test public void testApplication() throws Throwable { JobConf conf = new JobConf(); RecordReader<FloatWritable, NullWritable> rReader = new Reader(); // client for test File fCommand = getFileCommand("org.apache.hadoop.mapred.pipes.PipeApplicationStub"); TestTaskReporter reporter = new TestTaskReporter(); File[] psw = cleanTokenPasswordFile(); try { conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskName); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); // token for authorization Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>( "user".getBytes(), "password".getBytes(), new Text("kind"), new Text( "service")); TokenCache.setJobToken(token, conf.getCredentials()); FakeCollector output = new FakeCollector(new Counters.Counter(), new Progress()); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); Writer<IntWritable, Text> wr = new Writer<IntWritable, Text>(conf, fs.create( new Path(workSpace.getAbsolutePath() + File.separator + "outfile")), IntWritable.class, Text.class, null, null, true); output.setWriter(wr); conf.set(Submitter.PRESERVE_COMMANDFILE, "true"); initStdOut(conf); Application<WritableComparable<IntWritable>, Writable, IntWritable, Text> application = new Application<WritableComparable<IntWritable>, Writable, IntWritable, Text>( conf, rReader, output, reporter, IntWritable.class, Text.class); application.getDownlink().flush(); application.getDownlink().mapItem(new IntWritable(3), new Text("txt")); application.getDownlink().flush(); application.waitForFinish(); wr.close(); // test getDownlink().mapItem(); String stdOut = readStdOut(conf); assertTrue(stdOut.contains("key:3")); assertTrue(stdOut.contains("value:txt")); // reporter test counter, and status should be sended // test MessageType.REGISTER_COUNTER and INCREMENT_COUNTER assertEquals(1.0, reporter.getProgress(), 0.01); assertNotNull(reporter.getCounter("group", "name")); // test status MessageType.STATUS assertEquals(reporter.getStatus(), "PROGRESS"); stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile")); // check MessageType.PROGRESS assertEquals(0.55f, rReader.getProgress(), 0.001); application.getDownlink().close(); // test MessageType.OUTPUT Entry<IntWritable, Text> entry = output.getCollect().entrySet() .iterator().next(); assertEquals(123, entry.getKey().get()); assertEquals("value", entry.getValue().toString()); try { // try to abort application.abort(new Throwable()); fail(); } catch (IOException e) { // abort works ? assertEquals("pipe child exception", e.getMessage()); } } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
Example 19
Source File: TestPipesNonJavaInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
/** * test PipesNonJavaInputFormat */ @Test public void testFormat() throws IOException { PipesNonJavaInputFormat inputFormat = new PipesNonJavaInputFormat(); JobConf conf = new JobConf(); Reporter reporter= mock(Reporter.class); RecordReader<FloatWritable, NullWritable> reader = inputFormat .getRecordReader(new FakeSplit(), conf, reporter); assertEquals(0.0f, reader.getProgress(), 0.001); // input and output files File input1 = new File(workSpace + File.separator + "input1"); if (!input1.getParentFile().exists()) { Assert.assertTrue(input1.getParentFile().mkdirs()); } if (!input1.exists()) { Assert.assertTrue(input1.createNewFile()); } File input2 = new File(workSpace + File.separator + "input2"); if (!input2.exists()) { Assert.assertTrue(input2.createNewFile()); } // set data for splits conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, StringUtils.escapeString(input1.getAbsolutePath()) + "," + StringUtils.escapeString(input2.getAbsolutePath())); InputSplit[] splits = inputFormat.getSplits(conf, 2); assertEquals(2, splits.length); PipesNonJavaInputFormat.PipesDummyRecordReader dummyRecordReader = new PipesNonJavaInputFormat.PipesDummyRecordReader( conf, splits[0]); // empty dummyRecordReader assertNull(dummyRecordReader.createKey()); assertNull(dummyRecordReader.createValue()); assertEquals(0, dummyRecordReader.getPos()); assertEquals(0.0, dummyRecordReader.getProgress(), 0.001); // test method next assertTrue(dummyRecordReader.next(new FloatWritable(2.0f), NullWritable.get())); assertEquals(2.0, dummyRecordReader.getProgress(), 0.001); dummyRecordReader.close(); }
Example 20
Source File: Submitter.java From big-c with Apache License 2.0 | 2 votes |
/** * Set the configuration, if it doesn't already have a value for the given * key. * @param conf the configuration to modify * @param key the key to set * @param value the new "default" value to set */ private static void setIfUnset(JobConf conf, String key, String value) { if (conf.get(key) == null) { conf.set(key, value); } }