Java Code Examples for org.apache.hadoop.mapreduce.Job#setJobName()
The following examples show how to use
org.apache.hadoop.mapreduce.Job#setJobName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestChainErrors.java From hadoop with Apache License 2.0 | 6 votes |
/** * Tests Reducer throwing exception. * * @throws Exception */ public void testReducerFail() throws Exception { Configuration conf = createJobConf(); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, 1, 1, input); job.setJobName("chain"); ChainMapper.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.setReducer(job, FailReduce.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); ChainReducer.addMapper(job, Mapper.class, LongWritable.class, Text.class, LongWritable.class, Text.class, null); job.waitForCompletion(true); assertTrue("Job Not failed", !job.isSuccessful()); }
Example 2
Source File: FailJob.java From big-c with Apache License 2.0 | 6 votes |
public Job createJob(boolean failMappers, boolean failReducers, Path inputFile) throws IOException { Configuration conf = getConf(); conf.setBoolean(FAIL_MAP, failMappers); conf.setBoolean(FAIL_REDUCE, failReducers); Job job = Job.getInstance(conf, "fail"); job.setJarByClass(FailJob.class); job.setMapperClass(FailMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(FailReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); job.setSpeculativeExecution(false); job.setJobName("Fail job"); FileInputFormat.addInputPath(job, inputFile); return job; }
Example 3
Source File: TeraChecksum.java From incubator-tez with Apache License 2.0 | 6 votes |
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
Example 4
Source File: XflowStatic.java From bigdata-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: xflowstatic <type> <in> <out>"); System.exit(2); } conf.set(TYPE_KEY, otherArgs[0]); Job job = Job.getInstance(); job.setJobName("xflowstatic"); job.setJarByClass(XflowStatic.class); job.setMapperClass(XflowMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example 5
Source File: TeraGen.java From hadoop with Apache License 2.0 | 6 votes |
/** * @param args the cli arguments */ public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } setNumberOfRows(job, parseHumanLong(args[0])); Path outputDir = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RangeInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
Example 6
Source File: MultiFileWordCount.java From big-c with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception { if(args.length < 2) { printUsage(); return 2; } Job job = Job.getInstance(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
Example 7
Source File: Phase3Step4LocalDeDuplication.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Phase3Step4LocalDeDuplication.class); job.setJobName(Phase3Step4LocalDeDuplication.class.getName()); // paths String inputPath = args[0]; // text files of ids to be deleted String outputPath = args[1]; // input: reading max N lines for each mapper job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, new Path(inputPath)); job.getConfiguration().setInt("mapreduce.input.lineinputformat.linespermap", LINES); // mapper job.setMapperClass(LocalGreedyDeDuplicationMapper.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); // reducer job.setReducerClass(IDCollectorReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
Example 8
Source File: WordCounterExample.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(); job.setJarByClass(WordCounterExample.class); job.setJobName(WordCounterExample.class.getName()); // mapper job.setMapperClass(WordCounterMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // combiner + reducer job.setCombinerClass(TextLongCountingReducer.class); job.setReducerClass(TextLongCountingReducer.class); job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // paths String commaSeparatedInputFiles = otherArgs[0]; String outputPath = otherArgs[1]; FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
Example 9
Source File: Phase3Step3NearDupTuplesCreation.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Phase3Step3NearDupTuplesCreation.class); job.setJobName(Phase3Step3NearDupTuplesCreation.class.getName()); // mapper job.setMapperClass(CreateTuplesMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(TreeSet.class); job.setInputFormatClass(TextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); // paths String commaSeparatedInputFiles = args[0]; String outputPath = args[1]; FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setNumReduceTasks(0); //must be added or the mapper wont be called return job.waitForCompletion(true) ? 0 : 1; }
Example 10
Source File: ConfigurationHelper.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
/** * Job configurator * * @param job job instance * @param jarByClass class of the jar * @param mapperClass mapper * @param reducerClass reducer * @param commaSeparatedInputFiles input paths * @param outputPath output * @throws IOException I/O exception */ public static void configureJob(Job job, Class<?> jarByClass, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, String commaSeparatedInputFiles, String outputPath) throws IOException { job.setJarByClass(jarByClass); job.setJobName(jarByClass.getName()); // mapper job.setMapperClass(mapperClass); // reducer job.setReducerClass(reducerClass); // input-output is warc job.setInputFormatClass(WARCInputFormat.class); // prevent producing empty files LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class); // intermediate data job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(WARCWritable.class); // output data job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); }
Example 11
Source File: GeoWaveInputLoadJobRunner.java From geowave with Apache License 2.0 | 5 votes |
@Override public void configure(final Job job) throws Exception { job.setMapperClass(Mapper.class); job.setReducerClass(InputToOutputKeyReducer.class); job.setMapOutputKeyClass(GeoWaveInputKey.class); job.setMapOutputValueClass(ObjectWritable.class); job.setOutputKeyClass(GeoWaveOutputKey.class); job.setOutputValueClass(Object.class); job.setSpeculativeExecution(false); job.setJobName("GeoWave Input to Output"); job.setReduceSpeculativeExecution(false); }
Example 12
Source File: Phase3Step1ExtractNearDupInfo.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Phase3Step1ExtractNearDupInfo.class); job.setJobName(Phase3Step1ExtractNearDupInfo.class.getName()); // mapper job.setMapperClass(MapperClass.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DocumentInfo.class); // reducer job.setReducerClass(DeDuplicationTextOutputReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(List.class); job.setInputFormatClass(WARCInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, DocumentInfoOutputFormat.class); // paths String commaSeparatedInputFiles = args[0]; String outputPath = args[1]; FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job.waitForCompletion(true) ? 0 : 1; }
Example 13
Source File: TeraSort.java From pravega-samples with Apache License 2.0 | 4 votes |
/** * The original run() has been modified to: * - take command parameters for running terasort on Pravega streams * - use special mapper and reducer to convert data type required by Pravega hadoop connector */ public int run(String[] args) throws Exception { if (args.length != 6) { usage(); return 2; } LOG.info("starting"); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); getConf().setStrings(INPUT_URI_STRING, args[2]); getConf().setStrings(INPUT_SCOPE_NAME, args[3]); getConf().setStrings(INPUT_STREAM_NAME, args[4]); getConf().setStrings(INPUT_DESERIALIZER, TextSerializer.class.getName()); getConf().setStrings(OUTPUT_SCOPE_NAME, args[3]); getConf().setStrings(OUTPUT_URI_STRING, args[2]); getConf().setStrings(OUTPUT_DESERIALIZER, TextSerializer.class.getName()); getConf().setStrings(OUTPUT_STREAM_PREFIX, args[5]); Job job = Job.getInstance(getConf()); boolean useSimplePartitioner = getUseSimplePartitioner(job); TeraInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(TeraSortMapper.class); job.setReducerClass(TeraSortReducer.class); job.setInputFormatClass(PravegaInputFormat.class); job.setOutputFormatClass(PravegaTeraSortOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else { long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); LOG.info("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
Example 14
Source File: MapAlgebra.java From mrgeo with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings("squid:S1166") // Exception caught and error message printed @SuppressFBWarnings(value = "PATH_TRAVERSAL_IN", justification = "File used for reading script") public int run(final CommandLine line, final Configuration conf, final ProviderProperties providerProperties) throws ParseException { System.out.println(log.getClass().getName()); String expression = line.getOptionValue("e"); String output = line.getOptionValue("o"); String script = line.getOptionValue("s"); if (expression == null && script == null) { throw new ParseException("Either an expression or script must be specified."); } try { if (script != null) { File f = new File(script); int total = (int) f.length(); byte[] buffer = new byte[total]; int read = 0; try (FileInputStream fis = new FileInputStream(f)) { while (read < total) { read += fis.read(buffer, read, total - read); } expression = new String(buffer); } } String protectionLevel = line.getOptionValue("pl"); log.debug("expression: " + expression); log.debug("output: " + output); Job job = new Job(); job.setJobName("MapAlgebra"); MrsImageDataProvider dp = DataProviderFactory.getMrsImageDataProvider(output, AccessMode.OVERWRITE, providerProperties); String useProtectionLevel = ProtectionLevelUtils.getAndValidateProtectionLevel(dp, protectionLevel); boolean valid = org.mrgeo.mapalgebra.MapAlgebra.validate(expression, providerProperties); if (valid) { if (org.mrgeo.mapalgebra.MapAlgebra.mapalgebra(expression, output, conf, providerProperties, useProtectionLevel)) { if (line.hasOption("b")) { System.out.println("Building pyramids..."); if (!BuildPyramid.build(output, new MeanAggregator(), conf, providerProperties)) { System.out.println("Building pyramids failed. See YARN logs for more information."); } } } } } catch (IOException e) { System.out.println("Failure while running map algebra " + e.getMessage()); return -1; } return 0; }
Example 15
Source File: StockProtocolBuffersMapReduce.java From hiped2 with Apache License 2.0 | 4 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(Opts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } File localStocksFile = new File(cli.getArgValueAsString(Opts.INPUT)); Path inputPath = new Path(cli.getArgValueAsString(Opts.PB_INPUT)); Path outputPath = new Path(cli.getArgValueAsString(Opts.OUTPUT)); Configuration conf = super.getConf(); if (!inputPath.getName().endsWith(".lzo")) { throw new Exception("HDFS stock file must have a .lzo suffix"); } generateInput(conf, localStocksFile, inputPath); Job job = new Job(conf); job.setJobName(StockProtocolBuffersMapReduce.class.getName()); job.setJarByClass(StockProtocolBuffersMapReduce.class); job.setMapperClass(PBMapper.class); job.setReducerClass(PBReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ProtobufStockWritable.class); MultiInputFormat.setClassConf(Stock.class, job.getConfiguration()); LzoProtobufBlockOutputFormat.setClassConf(StockAvg.class, job.getConfiguration()); job.setInputFormatClass(LzoProtobufBlockInputFormat.class); job.setOutputFormatClass(LzoProtobufBlockOutputFormat.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (job.waitForCompletion(true)) { return 0; } return 1; }
Example 16
Source File: ImportJobBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 4 votes |
/** * Run an import job to read a table in to HDFS. * * @param tableName the database table to read; may be null if a free-form * query is specified in the SqoopOptions, and the ImportJobBase subclass * supports free-form queries. * @param ormJarFile the Jar file to insert into the dcache classpath. * (may be null) * @param splitByCol the column of the database table to use to split * the import * @param conf A fresh Hadoop Configuration to use to build an MR job. * @throws IOException if the job encountered an IO problem * @throws ImportException if the job failed unexpectedly or was * misconfigured. */ public void runImport(String tableName, String ormJarFile, String splitByCol, Configuration conf) throws IOException, ImportException { // Check if there are runtime error checks to do if (isHCatJob && options.isDirect() && !context.getConnManager().isDirectModeHCatSupported()) { throw new IOException("Direct import is not compatible with " + "HCatalog operations using the connection manager " + context.getConnManager().getClass().getName() + ". Please remove the parameter --direct"); } if (options.getAccumuloTable() != null && options.isDirect() && !getContext().getConnManager().isDirectModeAccumuloSupported()) { throw new IOException("Direct mode is incompatible with " + "Accumulo. Please remove the parameter --direct"); } if (options.getHBaseTable() != null && options.isDirect() && !getContext().getConnManager().isDirectModeHBaseSupported()) { throw new IOException("Direct mode is incompatible with " + "HBase. Please remove the parameter --direct"); } if (null != tableName) { LOG.info("Beginning import of " + tableName); } else { LOG.info("Beginning query import."); } String tableClassName = null; if (!getContext().getConnManager().isORMFacilitySelfManaged()) { tableClassName = new TableClassName(options).getClassForTable(tableName); } // For ORM self managed, we leave the tableClassName to null so that // we don't check for non-existing classes. loadJars(conf, ormJarFile, tableClassName); Job job = createJob(conf); try { // Set the external jar to use for the job. job.getConfiguration().set("mapred.jar", ormJarFile); if (options.getMapreduceJobName() != null) { job.setJobName(options.getMapreduceJobName()); } propagateOptionsToJob(job); configureInputFormat(job, tableName, tableClassName, splitByCol); configureOutputFormat(job, tableName, tableClassName); configureMapper(job, tableName, tableClassName); configureNumTasks(job); cacheJars(job, getContext().getConnManager()); jobSetup(job); setJob(job); boolean success = runJob(job); if (!success) { throw new ImportException("Import job failed!"); } completeImport(job); if (options.isValidationEnabled()) { validateImport(tableName, conf, job); } } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } finally { unloadJars(); jobTeardown(job); } }
Example 17
Source File: MetricsIngester.java From datawave with Apache License 2.0 | 4 votes |
@Override public int run(String[] args) throws Exception { _configure(args); final Configuration conf = getConf(); String type = conf.get(MetricsConfig.TYPE); /* * if the type is "errors", we want to process all of the errors from the metrics files first and then run the regular ingest metrics process */ // MetricsServer.setServerConf(conf); // MetricsServer.initInstance(); if ("errors".equals(type)) { try { launchErrorsJob(Job.getInstance(conf), conf); } catch (Exception e) { log.info("Failed to launch errors job", e); } type = "ingest"; conf.set(MetricsConfig.TYPE, type); } /* Type logic so I can differeniate between loader and ingest metrics jobs */ Class<? extends Mapper<?,?,?,?>> mapperClass; String outTable; Path inputDirectoryPath = new Path(conf.get(MetricsConfig.INPUT_DIRECTORY)); FileSystem fs = FileSystem.get(inputDirectoryPath.toUri(), conf); FileStatus[] fstats = fs.listStatus(inputDirectoryPath); Path[] files = FileUtil.stat2Paths(fstats); Path[] fileBuffer = new Path[MAX_FILES]; for (int i = 0; i < files.length;) { Job job = Job.getInstance(getConf()); job.setJarByClass(this.getClass()); job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1); if ("ingest".equalsIgnoreCase(type)) { mapperClass = IngestMetricsMapper.class; outTable = conf.get(MetricsConfig.INGEST_TABLE, MetricsConfig.DEFAULT_INGEST_TABLE); job.setInputFormatClass(SequenceFileInputFormat.class); } else if ("loader".equalsIgnoreCase(type)) { mapperClass = LoaderMetricsMapper.class; outTable = conf.get(MetricsConfig.LOADER_TABLE, MetricsConfig.DEFAULT_LOADER_TABLE); job.setInputFormatClass(SequenceFileInputFormat.class); } else if ("flagmaker".equalsIgnoreCase(type)) { mapperClass = FlagMakerMetricsMapper.class; outTable = conf.get(MetricsConfig.FLAGMAKER_TABLE, MetricsConfig.DEFAULT_FLAGMAKER_TABLE); job.setInputFormatClass(SequenceFileInputFormat.class); } else { log.error(type + " is not a valid job type. Please use <ingest|loader>."); return -1; } job.setJobName("MetricsIngester-" + type); if (files.length - i > MAX_FILES) { System.arraycopy(files, i, fileBuffer, 0, MAX_FILES); i += MAX_FILES; } else { fileBuffer = new Path[files.length - i]; System.arraycopy(files, i, fileBuffer, 0, fileBuffer.length); i += files.length - i; } SequenceFileInputFormat.setInputPaths(job, fileBuffer); job.setMapperClass(mapperClass); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setConnectorInfo(job, conf.get(MetricsConfig.USER), new PasswordToken(conf.get(MetricsConfig.PASS, "").getBytes())); AccumuloOutputFormat.setCreateTables(job, createTables); AccumuloOutputFormat.setDefaultTableName(job, outTable); log.info("zookeepers = " + conf.get(MetricsConfig.ZOOKEEPERS)); log.info("instance = " + conf.get(MetricsConfig.INSTANCE)); log.info("clientConfuguration = " + ClientConfiguration.loadDefault().withInstance(conf.get(MetricsConfig.INSTANCE)).withZkHosts(conf.get(MetricsConfig.ZOOKEEPERS))); AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(conf.get(MetricsConfig.INSTANCE)).withZkHosts(conf.get(MetricsConfig.ZOOKEEPERS))); AccumuloOutputFormat.setBatchWriterOptions(job, new BatchWriterConfig().setMaxLatency(25, TimeUnit.MILLISECONDS)); job.submit(); job.waitForCompletion(true); if (job.isSuccessful()) { for (Path p : fileBuffer) { fs.delete(p, true); } } } return 0; }
Example 18
Source File: ExportHBaseTableToDelimiteredTxt.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToDelimiteredTxt {tableName} {ColumnFamily} {outputPath} {shouldCompressWithGz} {schemaLocationOnHdfs} {delimiter} {rowKeyColumn.Optional}"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String shouldCompression = args[3]; String schemaFilePath = args[4]; String delimiter = args[5]; String rowKeyColumn = ""; if (args.length > 6) { rowKeyColumn = args[6]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.getConfiguration().set(SHOULD_COMPRESSION_CONF, shouldCompression); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath); job.getConfiguration().set(DELIMITER_CONF, delimiter); job.setJarByClass(ExportHBaseTableToDelimiteredTxt.class); job.setJobName("ExportHBaseTableToDelimiteredTxt "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(NullOutputFormat.class); // because we aren't // emitting anything from // mapper job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 19
Source File: PiEstimator.java From big-c with Apache License 2.0 | 4 votes |
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi */ @SuppressWarnings("finally") public static BigDecimal estimate(int numMaps, long numPoints, Job job ) throws IOException { //setup job conf job.setJobName(PiEstimator.class.getSimpleName()); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(PiMapper.class); job.setReducerClass(PiReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories //final Path inDir = new Path(TMP_DIR, "in"); final Path inDir = new Path("/home/hadoop1/tmp_dir", "in"); System.out.println("inDir =" + inDir.toString()); //final Path outDir = new Path(TMP_DIR, "out"); final Path outDir = new Path("/home/hadoop1/tmp_dir", "out"); System.out.println("outDir =" + outDir.toString()); FileInputFormat.addInputPath(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(job.getConfiguration()); if (fs.exists(TMP_DIR)) { throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for(int i=0; i < numMaps; ++i) { final Path file = new Path(inDir, "part"+i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter( fs, job.getConfiguration(), file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #"+i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); Boolean waitforCompletion = job.waitForCompletion(true) ; final double duration = (System.currentTimeMillis() - startTime)/1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, job.getConfiguration()); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20) .multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)) .divide(BigDecimal.valueOf(numPoints)); }catch (InterruptedException e){ System.out.println("Job Exception " + e.getMessage() ); } finally { fs.delete(TMP_DIR, true); return BigDecimal.valueOf(4); } }
Example 20
Source File: ParseLogJob.java From 163-bigdate-note with GNU General Public License v3.0 | 4 votes |
public int run(String[] args) throws Exception { //创建job Configuration config = getConf(); //添加自定义配置 config.addResource("mr.xml"); Job job = Job.getInstance(config); //通过job设置一些参数 job.setJarByClass(ParseLogJob.class); job.setJobName("parselog"); job.setMapperClass(LogMapper.class); job.setReducerClass(LogReducer.class); job.setMapOutputKeyClass(TextLongWritable.class); job.setGroupingComparatorClass(TextLongGroupComparator.class); job.setPartitionerClass(TextLongPartition.class); job.setMapOutputValueClass(LogWritable.class); job.setOutputValueClass(Text.class); //设置CombineFileInputFormat job.setInputFormatClass(CombineTextInputFormat.class); //添加分布式缓存 job.addCacheFile(new URI(config.get("ip.file.path"))); //添加输入和输出数据 FileInputFormat.addInputPath(job, new Path(args[0])); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputPath); //设置压缩类型 // FileOutputFormat.setCompressOutput(job, true); // FileOutputFormat.setOutputCompressorClass(job, LzopCodec.class); FileSystem fs = FileSystem.get(config); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } //运行程序 if (!job.waitForCompletion(true)) { throw new RuntimeException(job.getJobName() + "failed!"); } return 0; }