Java Code Examples for org.apache.hadoop.hbase.HBaseConfiguration#addHbaseResources()
The following examples show how to use
org.apache.hadoop.hbase.HBaseConfiguration#addHbaseResources() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IndexUpgradeTool.java From phoenix with Apache License 2.0 | 6 votes |
@VisibleForTesting public int executeTool() { Configuration conf = HBaseConfiguration.addHbaseResources(getConf()); try (Connection conn = getConnection(conf)) { ConnectionQueryServices queryServices = conn.unwrap(PhoenixConnection.class) .getQueryServices(); boolean status = extractTablesAndIndexes(conn.unwrap(PhoenixConnection.class)); if (status) { return executeTool(conn, queryServices, conf); } } catch (SQLException e) { LOGGER.severe("Something went wrong in executing tool "+ e); } return -1; }
Example 2
Source File: CsvBulkLoadTool.java From phoenix with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = HBaseConfiguration.addHbaseResources(getConf()); CommandLine cmdLine = null; try { cmdLine = parseOptions(args); } catch (IllegalStateException e) { printHelpAndExit(e.getMessage(), getOptions()); } Class.forName(DriverManager.class.getName()); Connection conn = DriverManager.getConnection( getJdbcUrl(cmdLine.getOptionValue(ZK_QUORUM_OPT.getOpt()))); return loadData(conf, cmdLine, conn); }
Example 3
Source File: IndexRebuildTask.java From phoenix with Apache License 2.0 | 5 votes |
@Override public TaskRegionObserver.TaskResult checkCurrentResult(Task.TaskRecord taskRecord) throws Exception { String jobID = getJobID(taskRecord.getData()); if (jobID != null) { Configuration conf = HBaseConfiguration.create(env.getConfiguration()); Configuration configuration = HBaseConfiguration.addHbaseResources(conf); Cluster cluster = new Cluster(configuration); Job job = cluster.getJob(org.apache.hadoop.mapreduce.JobID.forName(jobID)); if (job == null) { return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.SKIPPED, ""); } if (job != null && job.isComplete()) { if (job.isSuccessful()) { LOGGER.warn("IndexRebuildTask checkCurrentResult job is successful " + taskRecord.getTableName()); return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.SUCCESS, ""); } else { return new TaskRegionObserver.TaskResult(TaskRegionObserver.TaskResultCode.FAIL, "Index is DISABLED"); } } } return null; }
Example 4
Source File: IndexTool.java From phoenix with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { CommandLine cmdLine; try { cmdLine = parseOptions(args); } catch (IllegalStateException e) { printHelpAndExit(e.getMessage(), getOptions()); return -1; } configuration = HBaseConfiguration.addHbaseResources(getConf()); populateIndexToolAttributes(cmdLine); if (tenantId != null) { configuration.set(PhoenixRuntime.TENANT_ID_ATTRIB, tenantId); } try (Connection conn = getConnection(configuration)) { createIndexToolTables(conn); if (dataTable != null && indexTable != null) { setupIndexAndDataTable(conn); checkIfFeatureApplicable(startTime, endTime, lastVerifyTime, pDataTable, isLocalIndexBuild); if (shouldDeleteBeforeRebuild) { deleteBeforeRebuild(conn); } } preSplitIndexTable(cmdLine, conn); boolean result = submitIndexToolJob(conn, configuration); if (result) { return 0; } else { LOGGER.error("IndexTool job failed! Check logs for errors.."); return -1; } } catch (Exception ex) { LOGGER.error("An exception occurred while performing the indexing job: " + ExceptionUtils.getMessage(ex) + " at:\n" + ExceptionUtils.getStackTrace(ex)); return -1; } }
Example 5
Source File: ExportHBaseTableToDelimiteredTxt.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToDelimiteredTxt {tableName} {ColumnFamily} {outputPath} {shouldCompressWithGz} {schemaLocationOnHdfs} {delimiter} {rowKeyColumn.Optional}"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String shouldCompression = args[3]; String schemaFilePath = args[4]; String delimiter = args[5]; String rowKeyColumn = ""; if (args.length > 6) { rowKeyColumn = args[6]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.getConfiguration().set(SHOULD_COMPRESSION_CONF, shouldCompression); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath); job.getConfiguration().set(DELIMITER_CONF, delimiter); job.setJarByClass(ExportHBaseTableToDelimiteredTxt.class); job.setJobName("ExportHBaseTableToDelimiteredTxt "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(NullOutputFormat.class); // because we aren't // emitting anything from // mapper job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 6
Source File: ExportHBaseTableToParquet.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToParquet.class); job.setJobName("ExportHBaseTableToParquet "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroParquetOutputFormat.class); AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 7
Source File: PopulateTable.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { if (args.length == 0) { System.out.println("PopulateSmallTable {numberOfMappers} {numberOfRecords} {tmpOutputPath} {tableName} {columnFamily} {runID}"); return; } String numberOfMappers = args[0]; String numberOfRecords = args[1]; String outputPath = args[2]; String tableName = args[3]; String columnFamily = args[4]; String runID = args[5]; // Create job Job job = Job.getInstance(); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(PopulateTable.class); job.setJobName("PopulateTable: " + runID); job.getConfiguration().set(NUMBER_OF_RECORDS, numberOfRecords); job.getConfiguration().set(TABLE_NAME, tableName); job.getConfiguration().set(COLUMN_FAMILY, columnFamily); job.getConfiguration().set(RUN_ID, runID); // Define input format and path job.setInputFormatClass(NMapInputFormat.class); NMapInputFormat.setNumMapTasks(job.getConfiguration(), Integer.parseInt(numberOfMappers)); Configuration config = HBaseConfiguration.create(); HTable hTable = new HTable(config, tableName); // Auto configure partitioner and reducer HFileOutputFormat.configureIncrementalLoad(job, hTable); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // Define the mapper and reducer job.setMapperClass(CustomMapper.class); // job.setReducerClass(CustomReducer.class); // Define the key and value format job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); // Exit job.waitForCompletion(true); FileSystem hdfs = FileSystem.get(config); // Must all HBase to have write access to HFiles HFileUtils.changePermissionR(outputPath, hdfs); LoadIncrementalHFiles load = new LoadIncrementalHFiles(config); load.doBulkLoad(new Path(outputPath), hTable); }
Example 8
Source File: ExportHBaseTableToDelimiteredSeq.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main (String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out .println("ExportHBaseTableToDelimiteredSeq {tableName} {ColumnFamily} {outputPath} {compressionCodec} {schemaLocationOnLocal} {delimiter} {rowKeyColumn.optional"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String delimiter = args[5]; String rowKeyColumn = ""; if (args.length > 6) { rowKeyColumn = args[6]; } Job job = Job.getInstance(); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); job.getConfiguration().set(OUTPUT_PATH_CONF, outputPath); job.getConfiguration().set(DELIMITER_CONF, delimiter); job.setJarByClass(ExportHBaseTableToDelimiteredSeq.class); job.setJobName("ExportHBaseTableToDelimiteredSeq "); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath)); if (compressionCodec.equals("snappy")) { SequenceFileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { //nothing } job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 9
Source File: ExportHBaseTableToAvro.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { if (args.length == 0) { System.out.println("ExportHBaseTableToAvro {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowKeyColumn.Optional}"); return; } String table = args[0]; String columnFamily = args[1]; String outputPath = args[2]; String compressionCodec = args[3]; String schemaFilePath = args[4]; String rowKeyColumn = ""; if (args.length > 5) { rowKeyColumn = args[5]; } Job job = Job.getInstance(); HBaseConfiguration.addHbaseResources(job.getConfiguration()); job.setJarByClass(ExportHBaseTableToAvro.class); job.setJobName("ExportHBaseTableToAvro "); job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn); job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for // MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs scan.addFamily(Bytes.toBytes(columnFamily)); TableMapReduceUtil.initTableMapperJob(table, // input HBase table name scan, // Scan instance to control CF and attribute selection MyMapper.class, // mapper null, // mapper output key null, // mapper output value job); job.setOutputFormatClass(AvroKeyOutputFormat.class); AvroKeyOutputFormat.setOutputPath(job, new Path(outputPath)); Schema.Parser parser = new Schema.Parser(); FileSystem fs = FileSystem.get(job.getConfiguration()); AvroJob.setOutputKeySchema(job, parser.parse(fs.open(new Path(schemaFilePath)))); if (compressionCodec.equals("snappy")) { AvroKeyOutputFormat.setOutputCompressorClass(job, SnappyCodec.class); } else if (compressionCodec.equals("gzip")) { AvroKeyOutputFormat.setOutputCompressorClass(job, GzipCodec.class); } else { // nothing } job.setNumReduceTasks(0); boolean b = job.waitForCompletion(true); }
Example 10
Source File: CreateTable.java From HBase-ToHDFS with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception{ if (args.length == 0) { System.out.println("CreateTables {tableName} {columnFamilyName} {RegionCount}"); return; } String tableName = args[0]; String columnFamilyName = args[1]; String regionCount = args[2]; long regionMaxSize = 107374182400l; Configuration config = HBaseConfiguration.addHbaseResources(new Configuration()); HBaseAdmin admin = new HBaseAdmin(config); createTable(tableName, columnFamilyName, Short.parseShort(regionCount), regionMaxSize, admin); admin.close(); System.out.println("Done"); }
Example 11
Source File: HalyardStats.java From Halyard with Apache License 2.0 | 4 votes |
@Override public int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('s'); String target = cmd.getOptionValue('t'); String targetGraph = cmd.getOptionValue('g'); String graphContext = cmd.getOptionValue('c'); String thresh = cmd.getOptionValue('r'); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardStats " + source + (target == null ? " update" : " -> " + target)); job.getConfiguration().set(SOURCE, source); if (target != null) job.getConfiguration().set(TARGET, target); if (targetGraph != null) job.getConfiguration().set(TARGET_GRAPH, targetGraph); if (graphContext != null) job.getConfiguration().set(GRAPH_CONTEXT, graphContext); if (thresh != null) job.getConfiguration().setLong(THRESHOLD, Long.parseLong(thresh)); job.setJarByClass(HalyardStats.class); TableMapReduceUtil.initCredentials(job); Scan scan = HalyardTableUtils.scan(null, null); if (graphContext != null) { //restricting stats to scan given graph context only List<RowRange> ranges = new ArrayList<>(); byte[] gcHash = HalyardTableUtils.hashKey(SimpleValueFactory.getInstance().createIRI(graphContext)); ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, gcHash)); ranges.add(rowRange(HalyardTableUtils.CPOS_PREFIX, gcHash)); ranges.add(rowRange(HalyardTableUtils.COSP_PREFIX, gcHash)); if (target == null) { //add stats context to the scanned row ranges (when in update mode) to delete the related stats during MapReduce ranges.add(rowRange(HalyardTableUtils.CSPO_PREFIX, HalyardTableUtils.hashKey(targetGraph == null ? HALYARD.STATS_GRAPH_CONTEXT : SimpleValueFactory.getInstance().createIRI(targetGraph)))); } scan.setFilter(new MultiRowRangeFilter(ranges)); } TableMapReduceUtil.initTableMapperJob( source, scan, StatsMapper.class, ImmutableBytesWritable.class, LongWritable.class, job); job.setPartitionerClass(StatsPartitioner.class); job.setReducerClass(StatsReducer.class); job.setOutputFormatClass(NullOutputFormat.class); if (job.waitForCompletion(true)) { LOG.info("Stats Generation Completed.."); return 0; } return -1; }
Example 12
Source File: HalyardBulkUpdate.java From Halyard with Apache License 2.0 | 4 votes |
public int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('s'); String queryFiles = cmd.getOptionValue('q'); String workdir = cmd.getOptionValue('w'); getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis())))); if (cmd.hasOption('i')) getConf().set(ELASTIC_INDEX_URL, cmd.getOptionValue('i')); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); getConf().setStrings(TABLE_NAME_PROPERTY, source); getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis())); int stages = 1; for (int stage = 0; stage < stages; stage++) { Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + workdir + " -> " + source + " stage #" + stage); job.getConfiguration().setInt(STAGE_PROPERTY, stage); job.setJarByClass(HalyardBulkUpdate.class); job.setMapperClass(SPARQLUpdateMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setInputFormatClass(QueryInputFormat.class); job.setSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) { HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator()); QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, true, stage); Path outPath = new Path(workdir, "stage"+stage); FileOutputFormat.setOutputPath(job, outPath); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); if (stage == 0) { //count real number of stages for (InputSplit is : new QueryInputFormat().getSplits(job)) { QueryInputFormat.QueryInputSplit qis = (QueryInputFormat.QueryInputSplit)is; int updates = QueryParserUtil.parseUpdate(QueryLanguage.SPARQL, qis.getQuery(), null).getUpdateExprs().size(); if (updates > stages) { stages = updates; } LOG.log(Level.INFO, "{0} contains {1} stages of the update sequence.", new Object[]{qis.getQueryName(), updates}); } LOG.log(Level.INFO, "Bulk Update will process {0} MapReduce stages.", stages); } if (job.waitForCompletion(true)) { new LoadIncrementalHFiles(getConf()).doBulkLoad(outPath, hTable); LOG.log(Level.INFO, "Stage #{0} of {1} completed..", new Object[]{stage, stages}); } else { return -1; } } } LOG.info("Bulk Update Completed.."); return 0; }
Example 13
Source File: HalyardBulkLoad.java From Halyard with Apache License 2.0 | 4 votes |
@Override protected int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('s'); String workdir = cmd.getOptionValue('w'); String target = cmd.getOptionValue('t'); getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i')); getConf().setBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, cmd.hasOption('d')); getConf().setBoolean(TRUNCATE_PROPERTY, cmd.hasOption('r')); getConf().setInt(SPLIT_BITS_PROPERTY, Integer.parseInt(cmd.getOptionValue('b', "3"))); if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g')); getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o')); getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis())))); if (cmd.hasOption('m')) getConf().setLong("mapreduce.input.fileinputformat.split.maxsize", Long.parseLong(cmd.getOptionValue('m'))); TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + workdir + " -> " + target); job.setJarByClass(HalyardBulkLoad.class); job.setMapperClass(RDFMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setInputFormatClass(RioFileInputFormat.class); job.setSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) { HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator()); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, source); FileOutputFormat.setOutputPath(job, new Path(workdir)); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); if (job.waitForCompletion(true)) { if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) { HalyardTableUtils.truncateTable(hTable).close(); } new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(workdir), hTable); LOG.info("Bulk Load Completed.."); return 0; } } return -1; }
Example 14
Source File: HalyardPreSplit.java From Halyard with Apache License 2.0 | 4 votes |
@Override protected int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('s'); String target = cmd.getOptionValue('t'); try (Connection con = ConnectionFactory.createConnection(getConf())) { try (Admin admin = con.getAdmin()) { if (admin.tableExists(TableName.valueOf(target))) { LOG.log(Level.WARNING, "Pre-split cannot modify already existing table {0}", target); return -1; } } } getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i')); if (cmd.hasOption('g')) getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g')); getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o')); TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class); HBaseConfiguration.addHbaseResources(getConf()); getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis())); getConf().setInt(DECIMATION_FACTOR_PROPERTY, Integer.parseInt(cmd.getOptionValue('d', String.valueOf(DEFAULT_DECIMATION_FACTOR)))); getConf().setLong(SPLIT_LIMIT_PROPERTY, Long.parseLong(cmd.getOptionValue('l', String.valueOf(DEFAULT_SPLIT_LIMIT)))); Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + target); job.getConfiguration().set(TABLE_PROPERTY, target); job.setJarByClass(HalyardPreSplit.class); job.setMapperClass(RDFDecimatingMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setInputFormatClass(RioFileInputFormat.class); FileInputFormat.setInputDirRecursive(job, true); FileInputFormat.setInputPaths(job, source); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.setReducerClass(PreSplitReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(NullOutputFormat.class); if (job.waitForCompletion(true)) { LOG.info("PreSplit Calculation Completed.."); return 0; } return -1; }
Example 15
Source File: HalyardSummary.java From Halyard with Apache License 2.0 | 4 votes |
@Override public int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('s'); String target = cmd.getOptionValue('t'); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardSummary " + source + (target == null ? " update" : " -> " + target)); job.getConfiguration().set(SOURCE, source); if (target != null) job.getConfiguration().set(TARGET, target); if (cmd.hasOption('g')) job.getConfiguration().set(TARGET_GRAPH, cmd.getOptionValue('g')); if (cmd.hasOption('d')) job.getConfiguration().setInt(DECIMATION_FACTOR, Integer.parseInt(cmd.getOptionValue('d'))); job.setJarByClass(HalyardSummary.class); TableMapReduceUtil.initCredentials(job); Scan scan = HalyardTableUtils.scan(new byte[]{HalyardTableUtils.POS_PREFIX}, new byte[]{HalyardTableUtils.POS_PREFIX + 1}); TableMapReduceUtil.initTableMapperJob(source, scan, SummaryMapper.class, ImmutableBytesWritable.class, LongWritable.class, job); job.setNumReduceTasks(1); job.setCombinerClass(SummaryCombiner.class); job.setReducerClass(SummaryReducer.class); job.setOutputFormatClass(NullOutputFormat.class); if (job.waitForCompletion(true)) { LOG.info("Summary Generation Completed.."); return 0; } return -1; }
Example 16
Source File: HalyardBulkDelete.java From Halyard with Apache License 2.0 | 4 votes |
@Override public int run(CommandLine cmd) throws Exception { String source = cmd.getOptionValue('t'); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardDelete " + source); if (cmd.hasOption('s')) { job.getConfiguration().set(SUBJECT, cmd.getOptionValue('s')); } if (cmd.hasOption('p')) { job.getConfiguration().set(PREDICATE, cmd.getOptionValue('p')); } if (cmd.hasOption('o')) { job.getConfiguration().set(OBJECT, cmd.getOptionValue('o')); } if (cmd.hasOption('g')) { job.getConfiguration().setStrings(CONTEXTS, cmd.getOptionValues('g')); } job.setJarByClass(HalyardBulkDelete.class); TableMapReduceUtil.initCredentials(job); Scan scan = HalyardTableUtils.scan(null, null); TableMapReduceUtil.initTableMapperJob(source, scan, DeleteMapper.class, ImmutableBytesWritable.class, LongWritable.class, job); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.setSpeculativeExecution(false); job.setMapSpeculativeExecution(false); job.setReduceSpeculativeExecution(false); try (HTable hTable = HalyardTableUtils.getTable(getConf(), source, false, 0)) { HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator()); FileOutputFormat.setOutputPath(job, new Path(cmd.getOptionValue('f'))); TableMapReduceUtil.addDependencyJars(job); if (job.waitForCompletion(true)) { new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(cmd.getOptionValue('f')), hTable); LOG.info("Bulk Delete Completed.."); return 0; } } return -1; }
Example 17
Source File: HalyardBulkExport.java From Halyard with Apache License 2.0 | 4 votes |
@Override protected int run(CommandLine cmd) throws Exception { if (!cmd.getArgList().isEmpty()) throw new HalyardExport.ExportException("Unknown arguments: " + cmd.getArgList().toString()); String source = cmd.getOptionValue('s'); String queryFiles = cmd.getOptionValue('q'); String target = cmd.getOptionValue('t'); if (!target.contains("{0}")) { throw new HalyardExport.ExportException("Bulk export target must contain '{0}' to be replaced by stripped filename of the actual SPARQL query."); } getConf().set(SOURCE, source); getConf().set(TARGET, target); String driver = cmd.getOptionValue('c'); if (driver != null) { getConf().set(JDBC_DRIVER, driver); } String props[] = cmd.getOptionValues('p'); if (props != null) { for (int i=0; i<props.length; i++) { props[i] = Base64.encodeBase64String(props[i].getBytes(StandardCharsets.UTF_8)); } getConf().setStrings(JDBC_PROPERTIES, props); } if (cmd.hasOption('i')) getConf().set(HalyardBulkUpdate.ELASTIC_INDEX_URL, cmd.getOptionValue('i')); TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class, Gauge.class); HBaseConfiguration.addHbaseResources(getConf()); String cp = cmd.getOptionValue('l'); if (cp != null) { String jars[] = cp.split(":"); StringBuilder newCp = new StringBuilder(); for (int i=0; i<jars.length; i++) { if (i > 0) newCp.append(':'); newCp.append(addTmpFile(jars[i])); //append clappspath entris to tmpfiles and trim paths from the classpath } getConf().set(JDBC_CLASSPATH, newCp.toString()); } Job job = Job.getInstance(getConf(), "HalyardBulkExport " + source + " -> " + target); job.setJarByClass(HalyardBulkExport.class); job.setMaxMapAttempts(1); job.setMapperClass(BulkExportMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Void.class); job.setNumReduceTasks(0); job.setInputFormatClass(QueryInputFormat.class); QueryInputFormat.setQueriesFromDirRecursive(job.getConfiguration(), queryFiles, false, 0); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initCredentials(job); if (job.waitForCompletion(true)) { LOG.info("Bulk Export Completed.."); return 0; } return -1; }