Java Code Examples for org.apache.hadoop.mapreduce.lib.input.FileInputFormat#addInputPaths()
The following examples show how to use
org.apache.hadoop.mapreduce.lib.input.FileInputFormat#addInputPaths() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LeftJoin.java From BigData-In-Practice with Apache License 2.0 | 6 votes |
@Override
public int run(String[] args) throws Exception {
Configuration conf = getConf();
GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
conf = optionparser.getConfiguration();
Job job = Job.getInstance(conf, "leftjoin");
job.setJarByClass(LeftJoin.class);
FileInputFormat.addInputPaths(job, conf.get("input_dir"));
Path out = new Path(conf.get("output_dir"));
FileOutputFormat.setOutputPath(job, out);
job.setNumReduceTasks(conf.getInt("reduce_num", 1));
job.setMapperClass(LeftJoinMapper.class);
job.setReducerClass(LeftJoinReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
conf.set("mapred.textoutputformat.separator", ",");
return (job.waitForCompletion(true) ? 0 : 1);
}
Example 2
Source File: Phase3Step3NearDupTuplesCreation.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step3NearDupTuplesCreation.class);
job.setJobName(Phase3Step3NearDupTuplesCreation.class.getName());
// mapper
job.setMapperClass(CreateTuplesMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(TreeSet.class);
job.setInputFormatClass(TextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setNumReduceTasks(0); //must be added or the mapper wont be called
return job.waitForCompletion(true) ? 0 : 1;
}
Example 3
Source File: TopDomainCounter.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = Job.getInstance();
job.setJarByClass(TopDomainCounter.class);
job.setJobName(TopDomainCounter.class.getName());
// mapper
job.setMapperClass(DomainMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// combiner + reducer
job.setCombinerClass(TextLongCountingReducer.class);
job.setReducerClass(TextLongCountingReducer.class);
job.setInputFormatClass(WARCInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = otherArgs[0];
String outputPath = otherArgs[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 4
Source File: WordDistributionStatisticsCollector.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(WordDistributionStatisticsCollector.class);
job.setJobName(WordDistributionStatisticsCollector.class.getName());
// mapper
job.setMapperClass(getMapperClass());
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// reducer
job.setReducerClass(SumReducer.class);
job.setInputFormatClass(getInputFormatClass());
job.setOutputFormatClass(TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 5
Source File: ContentTypeAndSizeDistribution.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(ContentTypeAndSizeDistribution.class);
job.setJobName(ContentTypeAndSizeDistribution.class.getName());
// mapper
job.setMapperClass(ContentAndSizeMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// reducer
// job.setReducerClass(DistributionReducer.class);
job.setReducerClass(TextLongCountingReducer.class);
job.setInputFormatClass(WARCInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 6
Source File: TextToSentencesSplitter.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = Job.getInstance(conf);
job.setJarByClass(TextToSentencesSplitter.class);
job.setJobName(TextToSentencesSplitter.class.getName());
// mapper
job.setMapperClass(TextToSentencesSplitter.MapperClass.class);
job.setInputFormatClass(WARCInputFormat.class);
// reducer
job.setReducerClass(ReducerClass.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = otherArgs[0];
String outputPath = otherArgs[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 7
Source File: PagesByURLExtractor.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
for (Map.Entry<String, String> next : job.getConfiguration()) {
System.out.println(next.getKey() + ": " + next.getValue());
}
job.setJarByClass(PagesByURLExtractor.class);
job.setJobName(PagesByURLExtractor.class.getName());
// mapper
job.setMapperClass(MapperClass.class);
// input
job.setInputFormatClass(WARCInputFormat.class);
// output
job.setOutputFormatClass(WARCOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(WARCWritable.class);
FileOutputFormat.setCompressOutput(job, true);
// paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
// load IDs to be searched for
job.getConfiguration().set(MAPREDUCE_MAPPER_URLS, loadURLs(args[2]));
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 8
Source File: URIExtractor.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
/**
* {@inheritDoc}
*/
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
// set from the command line
job.setJarByClass(URIExtractor.class);
job.setJobName(URIExtractor.class.getName());
// mapper
job.setMapperClass(URIExtractorMapper.class);
job.setReducerClass(URIExtractorReducer.class);
// input-output is warc
job.setInputFormatClass(WARCInputFormat.class);
// is necessary, so that Hadoop does not mix the map input format up.
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// set output compression to GZip
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 9
Source File: Phase3Step2DistinctDataJob.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step2DistinctDataJob.class);
job.setJobName(Phase3Step2DistinctDataJob.class.getName());
//mapper
job.setMapperClass(RemoveRedundantDataMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//reducer
job.setReducerClass(RemoveRedundantDataReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
job.setInputFormatClass(TextInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
//i/o paths
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 10
Source File: Phase1FullJob.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
// set from the command line
job.setJarByClass(Phase1FullJob.class);
job.setJobName(Phase1FullJob.class.getName());
// mapper
job.setMapperClass(MapperClass.class);
// we will compress the mapper's output (use fast Snappy compressor)
job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
job.getConfiguration()
.setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
// reducer
job.setReducerClass(SimpleWarcWriterReducer.class);
// input-output is warc
job.setInputFormatClass(WARCInputFormat.class);
job.setOutputFormatClass(WARCOutputFormat.class);
// mapper output data
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(WARCWritable.class);
// set output compression to GZip
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 11
Source File: Phase2ExactMatchDeDuplication.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
//set from the command line
job.setJarByClass(Phase2ExactMatchDeDuplication.class);
job.setJobName(Phase2ExactMatchDeDuplication.class.getName());
// mapper
job.setMapperClass(ExactMatchDetectionMapper.class);
// we will compress the mapper's output (use fast Snappy compressor)
job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true);
job.getConfiguration()
.setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class);
// reducer
job.setReducerClass(UniqueWarcWriterReducer.class);
// no combiner, as the output classes in mapper and reducer are different!
// input-output is warc
job.setInputFormatClass(WARCInputFormat.class);
job.setOutputFormatClass(WARCOutputFormat.class);
// mapper output data
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(WARCWritable.class);
// set output compression to GZip
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 12
Source File: Phase3Step1ExtractNearDupInfo.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Job job = Job.getInstance(getConf());
job.setJarByClass(Phase3Step1ExtractNearDupInfo.class);
job.setJobName(Phase3Step1ExtractNearDupInfo.class.getName());
// mapper
job.setMapperClass(MapperClass.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DocumentInfo.class);
// reducer
job.setReducerClass(DeDuplicationTextOutputReducer.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(List.class);
job.setInputFormatClass(WARCInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, DocumentInfoOutputFormat.class);
// paths
String commaSeparatedInputFiles = args[0];
String outputPath = args[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 13
Source File: ConfigurationHelper.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
/**
* Job configurator
*
* @param job job instance
* @param jarByClass class of the jar
* @param mapperClass mapper
* @param reducerClass reducer
* @param commaSeparatedInputFiles input paths
* @param outputPath output
* @throws IOException I/O exception
*/
public static void configureJob(Job job, Class<?> jarByClass,
Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass,
String commaSeparatedInputFiles, String outputPath)
throws IOException
{
job.setJarByClass(jarByClass);
job.setJobName(jarByClass.getName());
// mapper
job.setMapperClass(mapperClass);
// reducer
job.setReducerClass(reducerClass);
// input-output is warc
job.setInputFormatClass(WARCInputFormat.class);
// prevent producing empty files
LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class);
// intermediate data
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(WARCWritable.class);
// output data
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(WARCWritable.class);
// set output compression to GZip
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
}
Example 14
Source File: WordCounterExample.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = Job.getInstance();
job.setJarByClass(WordCounterExample.class);
job.setJobName(WordCounterExample.class.getName());
// mapper
job.setMapperClass(WordCounterMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// combiner + reducer
job.setCombinerClass(TextLongCountingReducer.class);
job.setReducerClass(TextLongCountingReducer.class);
job.setInputFormatClass(WARCInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = otherArgs[0];
String outputPath = otherArgs[1];
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 15
Source File: SimpleTextSearch.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override
public int run(String[] args)
throws Exception
{
Configuration conf = getConf();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = Job.getInstance();
job.setJarByClass(SimpleTextSearch.class);
job.setJobName(SimpleTextSearch.class.getName());
// mapper
job.setMapperClass(TextSearchMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// combiner + reducer
job.setCombinerClass(TextLongCountingReducer.class);
job.setReducerClass(TextLongCountingReducer.class);
job.setInputFormatClass(WARCInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
// paths
String commaSeparatedInputFiles = otherArgs[0];
String outputPath = otherArgs[1];
// regex with a phrase to be searched for
String regex = otherArgs[2];
job.getConfiguration().set(MAPREDUCE_MAP_REGEX, regex);
FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 16
Source File: MultiFileWordCount.java From big-c with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception {
if(args.length < 2) {
printUsage();
return 2;
}
Job job = Job.getInstance(getConf());
job.setJobName("MultiFileWordCount");
job.setJarByClass(MultiFileWordCount.class);
//set the InputFormat of the job to our InputFormat
job.setInputFormatClass(MyInputFormat.class);
// the keys are words (strings)
job.setOutputKeyClass(Text.class);
// the values are counts (ints)
job.setOutputValueClass(IntWritable.class);
//use the defined mapper
job.setMapperClass(MapClass.class);
//use the WordCount Reducer
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 17
Source File: MultiFileWordCount.java From hadoop with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception {
if(args.length < 2) {
printUsage();
return 2;
}
Job job = Job.getInstance(getConf());
job.setJobName("MultiFileWordCount");
job.setJarByClass(MultiFileWordCount.class);
//set the InputFormat of the job to our InputFormat
job.setInputFormatClass(MyInputFormat.class);
// the keys are words (strings)
job.setOutputKeyClass(Text.class);
// the values are counts (ints)
job.setOutputValueClass(IntWritable.class);
//use the defined mapper
job.setMapperClass(MapClass.class);
//use the WordCount Reducer
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
Example 18
Source File: GenericMRLoadGenerator.java From big-c with Apache License 2.0 | 4 votes |
/**
* Configure a job given argv.
*/
public static boolean parseArgs(String[] argv, Job job) throws IOException {
if (argv.length < 1) {
return 0 == printUsage();
}
for(int i=0; i < argv.length; ++i) {
if (argv.length == i + 1) {
System.out.println("ERROR: Required parameter missing from " +
argv[i]);
return 0 == printUsage();
}
try {
if ("-r".equals(argv[i])) {
job.setNumReduceTasks(Integer.parseInt(argv[++i]));
} else if ("-inFormat".equals(argv[i])) {
job.setInputFormatClass(
Class.forName(argv[++i]).asSubclass(InputFormat.class));
} else if ("-outFormat".equals(argv[i])) {
job.setOutputFormatClass(
Class.forName(argv[++i]).asSubclass(OutputFormat.class));
} else if ("-outKey".equals(argv[i])) {
job.setOutputKeyClass(
Class.forName(argv[++i]).asSubclass(WritableComparable.class));
} else if ("-outValue".equals(argv[i])) {
job.setOutputValueClass(
Class.forName(argv[++i]).asSubclass(Writable.class));
} else if ("-keepmap".equals(argv[i])) {
job.getConfiguration().set(MAP_PRESERVE_PERCENT, argv[++i]);
} else if ("-keepred".equals(argv[i])) {
job.getConfiguration().set(REDUCE_PRESERVE_PERCENT, argv[++i]);
} else if ("-outdir".equals(argv[i])) {
FileOutputFormat.setOutputPath(job, new Path(argv[++i]));
} else if ("-indir".equals(argv[i])) {
FileInputFormat.addInputPaths(job, argv[++i]);
} else if ("-inFormatIndirect".equals(argv[i])) {
job.getConfiguration().setClass(INDIRECT_INPUT_FORMAT,
Class.forName(argv[++i]).asSubclass(InputFormat.class),
InputFormat.class);
job.setInputFormatClass(IndirectInputFormat.class);
} else {
System.out.println("Unexpected argument: " + argv[i]);
return 0 == printUsage();
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + argv[i]);
return 0 == printUsage();
} catch (Exception e) {
throw (IOException)new IOException().initCause(e);
}
}
return true;
}
Example 19
Source File: AbstractBulkLoadTool.java From phoenix with Apache License 2.0 | 4 votes |
/**
* Submits the jobs to the cluster.
* Loads the HFiles onto the respective tables.
* @throws Exception
*/
public int submitJob(final Configuration conf, final String qualifiedTableName,
final String inputPaths, final Path outputPath, List<TargetTableRef> tablesToBeLoaded, boolean hasLocalIndexes) throws Exception {
Job job = Job.getInstance(conf, "Phoenix MapReduce import for " + qualifiedTableName);
FileInputFormat.addInputPaths(job, inputPaths);
FileOutputFormat.setOutputPath(job, outputPath);
job.setInputFormatClass(PhoenixTextInputFormat.class);
job.setMapOutputKeyClass(TableRowkeyPair.class);
job.setMapOutputValueClass(ImmutableBytesWritable.class);
job.setOutputKeyClass(TableRowkeyPair.class);
job.setOutputValueClass(KeyValue.class);
job.setReducerClass(FormatToKeyValueReducer.class);
byte[][] splitKeysBeforeJob = null;
try(org.apache.hadoop.hbase.client.Connection hbaseConn =
ConnectionFactory.createConnection(job.getConfiguration())) {
RegionLocator regionLocator = null;
if(hasLocalIndexes) {
try{
regionLocator = hbaseConn.getRegionLocator(
TableName.valueOf(qualifiedTableName));
splitKeysBeforeJob = regionLocator.getStartKeys();
} finally {
if (regionLocator != null) regionLocator.close();
}
}
MultiHfileOutputFormat.configureIncrementalLoad(job, tablesToBeLoaded);
final String tableNamesAsJson = TargetTableRefFunctions.NAMES_TO_JSON
.apply(tablesToBeLoaded);
final String logicalNamesAsJson = TargetTableRefFunctions.LOGICAL_NAMES_TO_JSON
.apply(tablesToBeLoaded);
job.getConfiguration().set(FormatToBytesWritableMapper.TABLE_NAMES_CONFKEY,
tableNamesAsJson);
job.getConfiguration().set(FormatToBytesWritableMapper.LOGICAL_NAMES_CONFKEY,
logicalNamesAsJson);
// give subclasses their hook
setupJob(job);
LOGGER.info("Running MapReduce import job from {} to {}", inputPaths, outputPath);
boolean success = job.waitForCompletion(true);
if (success) {
if (hasLocalIndexes) {
try {
regionLocator = hbaseConn.getRegionLocator(
TableName.valueOf(qualifiedTableName));
if(!IndexUtil.matchingSplitKeys(splitKeysBeforeJob,
regionLocator.getStartKeys())) {
LOGGER.error("The table " + qualifiedTableName + " has local indexes and"
+ " there is split key mismatch before and after running"
+ " bulkload job. Please rerun the job otherwise there may be"
+ " inconsistencies between actual data and index data.");
return -1;
}
} finally {
if (regionLocator != null) regionLocator.close();
}
}
LOGGER.info("Loading HFiles from {}", outputPath);
completebulkload(conf,outputPath,tablesToBeLoaded);
LOGGER.info("Removing output directory {}", outputPath);
if(!outputPath.getFileSystem(conf).delete(outputPath, true)) {
LOGGER.error("Failed to delete the output directory {}", outputPath);
}
return 0;
} else {
return -1;
}
}
}
Example 20
Source File: GenericMRLoadGenerator.java From hadoop with Apache License 2.0 | 4 votes |
/**
* Configure a job given argv.
*/
public static boolean parseArgs(String[] argv, Job job) throws IOException {
if (argv.length < 1) {
return 0 == printUsage();
}
for(int i=0; i < argv.length; ++i) {
if (argv.length == i + 1) {
System.out.println("ERROR: Required parameter missing from " +
argv[i]);
return 0 == printUsage();
}
try {
if ("-r".equals(argv[i])) {
job.setNumReduceTasks(Integer.parseInt(argv[++i]));
} else if ("-inFormat".equals(argv[i])) {
job.setInputFormatClass(
Class.forName(argv[++i]).asSubclass(InputFormat.class));
} else if ("-outFormat".equals(argv[i])) {
job.setOutputFormatClass(
Class.forName(argv[++i]).asSubclass(OutputFormat.class));
} else if ("-outKey".equals(argv[i])) {
job.setOutputKeyClass(
Class.forName(argv[++i]).asSubclass(WritableComparable.class));
} else if ("-outValue".equals(argv[i])) {
job.setOutputValueClass(
Class.forName(argv[++i]).asSubclass(Writable.class));
} else if ("-keepmap".equals(argv[i])) {
job.getConfiguration().set(MAP_PRESERVE_PERCENT, argv[++i]);
} else if ("-keepred".equals(argv[i])) {
job.getConfiguration().set(REDUCE_PRESERVE_PERCENT, argv[++i]);
} else if ("-outdir".equals(argv[i])) {
FileOutputFormat.setOutputPath(job, new Path(argv[++i]));
} else if ("-indir".equals(argv[i])) {
FileInputFormat.addInputPaths(job, argv[++i]);
} else if ("-inFormatIndirect".equals(argv[i])) {
job.getConfiguration().setClass(INDIRECT_INPUT_FORMAT,
Class.forName(argv[++i]).asSubclass(InputFormat.class),
InputFormat.class);
job.setInputFormatClass(IndirectInputFormat.class);
} else {
System.out.println("Unexpected argument: " + argv[i]);
return 0 == printUsage();
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + argv[i]);
return 0 == printUsage();
} catch (Exception e) {
throw (IOException)new IOException().initCause(e);
}
}
return true;
}