Java Code Examples for org.apache.hadoop.mapred.JobConf#setBoolean()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#setBoolean() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFileSystem.java From RDFS with Apache License 2.0 | 6 votes |
public static void readTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(ReadMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example 2
Source File: LinkDbMerger.java From anthelion with Apache License 2.0 | 6 votes |
public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) { Path newLinkDb = new Path("linkdb-merge-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf job = new NutchJob(config); job.setJobName("linkdb merge " + linkDb); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(LinkDbFilter.class); job.setBoolean(LinkDbFilter.URL_NORMALIZING, normalize); job.setBoolean(LinkDbFilter.URL_FILTERING, filter); job.setReducerClass(LinkDbMerger.class); FileOutputFormat.setOutputPath(job, newLinkDb); job.setOutputFormat(MapFileOutputFormat.class); job.setBoolean("mapred.output.compress", true); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Inlinks.class); // https://issues.apache.org/jira/browse/NUTCH-1069 job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); return job; }
Example 3
Source File: RegressionPrepare.java From ml-ease with Apache License 2.0 | 6 votes |
@Override public void run() throws Exception { JobConfig config = super.getJobConfig(); JobConf conf = super.createJobConf(RegressionPrepareMapper.class, RegressionPrepareOutput.SCHEMA$); String mapKey = config.getString(MAP_KEY, ""); conf.set(MAP_KEY, mapKey); conf.setInt(NUM_CLICK_REPLICATES, config.getInt(NUM_CLICK_REPLICATES, 1)); conf.setBoolean(IGNORE_FEATURE_VALUE, config.getBoolean(IGNORE_FEATURE_VALUE, false)); int nblocks = config.getInt(NUM_BLOCKS, 0); conf.setInt(NUM_BLOCKS, nblocks); _logger.info("Running the preparation job of admm with map.key = " + mapKey + " and num.blocks=" + nblocks); AvroUtils.runAvroJob(conf); }
Example 4
Source File: TestConfigTranslationMRToTez.java From tez with Apache License 2.0 | 6 votes |
@Test(timeout = 5000) public void testMRToTezKeyTranslation() { JobConf confVertex1 = new JobConf(); confVertex1.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, IntWritable.class.getName()); confVertex1.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, LongWritable.class.getName()); confVertex1.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); MRHelpers.translateMRConfToTez(confVertex1); // Verify translation assertEquals(IntWritable.class.getName(), ConfigUtils .getIntermediateOutputKeyClass(confVertex1).getName()); assertEquals(LongWritable.class.getName(), ConfigUtils .getIntermediateOutputValueClass(confVertex1).getName()); assertEquals(IntWritable.class.getName(), ConfigUtils .getIntermediateInputKeyClass(confVertex1).getName()); assertEquals(LongWritable.class.getName(), ConfigUtils .getIntermediateInputValueClass(confVertex1).getName()); assertTrue(ConfigUtils.shouldCompressIntermediateOutput(confVertex1)); assertTrue(ConfigUtils.isIntermediateInputCompressed(confVertex1)); }
Example 5
Source File: TestFetcher.java From hadoop with Apache License 2.0 | 6 votes |
@Before @SuppressWarnings("unchecked") // mocked generics public void setup() { LOG.info(">>>> " + name.getMethodName()); job = new JobConf(); job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false); jobWithRetry = new JobConf(); jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true); id = TaskAttemptID.forName("attempt_0_1_r_1_1"); ss = mock(ShuffleSchedulerImpl.class); mm = mock(MergeManagerImpl.class); r = mock(Reporter.class); metrics = mock(ShuffleClientMetrics.class); except = mock(ExceptionReporter.class); key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0}); connection = mock(HttpURLConnection.class); allErrs = mock(Counters.Counter.class); when(r.getCounter(anyString(), anyString())).thenReturn(allErrs); ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1); maps.add(map1ID); maps.add(map2ID); when(ss.getMapsForHost(host)).thenReturn(maps); }
Example 6
Source File: TestDeprecatedKeys.java From tez with Apache License 2.0 | 5 votes |
@Test(timeout = 5000) public void verifyReduceKeyTranslation() { JobConf jobConf = new JobConf(); jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f); jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f); jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f); jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f); jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true); jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f); jobConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false); MRHelpers.translateMRConfToTez(jobConf); assertEquals(0.4f, jobConf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f); assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(0.55f, jobConf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f); assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0), 0.01f); assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f); assertEquals(true, jobConf.getBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(0.33f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f); assertEquals(false, jobConf.getBoolean(TezConfiguration.TEZ_USER_CLASSPATH_FIRST, true)); }
Example 7
Source File: TestChild.java From big-c with Apache License 2.0 | 5 votes |
private Job submitAndValidateJob(JobConf conf, int numMaps, int numReds, boolean oldConfigs) throws IOException, InterruptedException, ClassNotFoundException { conf.setBoolean(OLD_CONFIGS, oldConfigs); if (oldConfigs) { conf.set(JobConf.MAPRED_TASK_JAVA_OPTS, TASK_OPTS_VAL); } else { conf.set(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, MAP_OPTS_VAL); conf.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, REDUCE_OPTS_VAL); } conf.set(JobConf.MAPRED_MAP_TASK_LOG_LEVEL, Level.OFF.toString()); conf.set(JobConf.MAPRED_REDUCE_TASK_LOG_LEVEL, Level.OFF.toString()); Job job = MapReduceTestUtil.createJob(conf, inDir, outDir, numMaps, numReds); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); assertFalse("Job already has a job tracker connection, before it's submitted", job.isConnected()); job.submit(); assertTrue("Job doesn't have a job tracker connection, even though it's been submitted", job.isConnected()); job.waitForCompletion(true); assertTrue(job.isSuccessful()); // Check output directory FileSystem fs = FileSystem.get(conf); assertTrue("Job output directory doesn't exit!", fs.exists(outDir)); FileStatus[] list = fs.listStatus(outDir, new OutputFilter()); int numPartFiles = numReds == 0 ? numMaps : numReds; assertTrue("Number of part-files is " + list.length + " and not " + numPartFiles, list.length == numPartFiles); return job; }
Example 8
Source File: LinkRank.java From nutch-htmlunit with Apache License 2.0 | 5 votes |
/** * Runs the initializer job. The initializer job sets up the nodes with a * default starting score for link analysis. * * @param nodeDb The node database to use. * @param output The job output directory. * * @throws IOException If an error occurs while running the initializer job. */ private void runInitializer(Path nodeDb, Path output) throws IOException { // configure the initializer JobConf initializer = new NutchJob(getConf()); initializer.setJobName("LinkAnalysis Initializer"); FileInputFormat.addInputPath(initializer, nodeDb); FileOutputFormat.setOutputPath(initializer, output); initializer.setInputFormat(SequenceFileInputFormat.class); initializer.setMapperClass(Initializer.class); initializer.setMapOutputKeyClass(Text.class); initializer.setMapOutputValueClass(Node.class); initializer.setOutputKeyClass(Text.class); initializer.setOutputValueClass(Node.class); initializer.setOutputFormat(MapFileOutputFormat.class); initializer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false); // run the initializer LOG.info("Starting initialization job"); try { JobClient.runJob(initializer); } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); throw e; } LOG.info("Finished initialization job."); }
Example 9
Source File: IndexingJob.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
public void index(Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String params, boolean filter, boolean normalize) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("Indexer: starting at " + sdf.format(start)); final JobConf job = new NutchJob(getConf()); job.setJobName("Indexer"); LOG.info("Indexer: deleting gone documents: " + deleteGone); LOG.info("Indexer: URL filtering: " + filter); LOG.info("Indexer: URL normalizing: " + normalize); IndexWriters writers = new IndexWriters(getConf()); LOG.info(writers.describe()); IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job); // NOW PASSED ON THE COMMAND LINE AS A HADOOP PARAM // job.set(SolrConstants.SERVER_URL, solrUrl); job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone); job.setBoolean(IndexerMapReduce.URL_FILTERING, filter); job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize); if (params != null) { job.set(IndexerMapReduce.INDEXER_PARAMS, params); } job.setReduceSpeculativeExecution(false); final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt()); FileOutputFormat.setOutputPath(job, tmp); try { JobClient.runJob(job); // do the commits once and for all the reducers in one go if (!noCommit) { writers.open(job,"commit"); writers.commit(); } long end = System.currentTimeMillis(); LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); } finally { FileSystem.get(job).delete(tmp, true); } }
Example 10
Source File: GenericMRLoadJobCreator.java From RDFS with Apache License 2.0 | 4 votes |
public static JobConf createJob(String[] argv, boolean mapoutputCompressed, boolean outputCompressed) throws Exception { JobConf job = new JobConf(); job.setJarByClass(GenericMRLoadGenerator.class); job.setMapperClass(SampleMapper.class); job.setReducerClass(SampleReducer.class); if (!parseArgs(argv, job)) { return null; } if (null == FileOutputFormat.getOutputPath(job)) { // No output dir? No writes job.setOutputFormat(NullOutputFormat.class); } if (0 == FileInputFormat.getInputPaths(job).length) { // No input dir? Generate random data System.err.println("No input path; ignoring InputFormat"); confRandom(job); } else if (null != job.getClass("mapred.indirect.input.format", null)) { // specified IndirectInputFormat? Build src list JobClient jClient = new JobClient(job); Path sysdir = jClient.getSystemDir(); Random r = new Random(); Path indirInputFile = new Path(sysdir, Integer.toString(r .nextInt(Integer.MAX_VALUE), 36) + "_files"); job.set("mapred.indirect.input.file", indirInputFile.toString()); SequenceFile.Writer writer = SequenceFile.createWriter(sysdir .getFileSystem(job), job, indirInputFile, LongWritable.class, Text.class, SequenceFile.CompressionType.NONE); try { for (Path p : FileInputFormat.getInputPaths(job)) { FileSystem fs = p.getFileSystem(job); Stack<Path> pathstack = new Stack<Path>(); pathstack.push(p); while (!pathstack.empty()) { for (FileStatus stat : fs.listStatus(pathstack.pop())) { if (stat.isDir()) { if (!stat.getPath().getName().startsWith("_")) { pathstack.push(stat.getPath()); } } else { writer.sync(); writer.append(new LongWritable(stat.getLen()), new Text(stat .getPath().toUri().toString())); } } } } } finally { writer.close(); } } job.setCompressMapOutput(mapoutputCompressed); job.setBoolean("mapred.output.compress", outputCompressed); return job; }
Example 11
Source File: TradesHdfsDataVerifier.java From gemfirexd-oss with Apache License 2.0 | 4 votes |
public int run(String[] args) throws Exception { GfxdDataSerializable.initTypes(); JobConf conf = new JobConf(getConf()); conf.setJobName("TradesHdfsDataVerifier"); String hdfsHomeDir = args[0]; String url = args[1]; String tableName = args[2]; System.out.println("TradesHdfsDataVerifier.run() invoked with " + " hdfsHomeDir = " + hdfsHomeDir + " url = " + url + " tableName = " + tableName); // Job-specific params conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setInputFormat(RowInputFormat.class); conf.setMapperClass(HdfsDataMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(TradesRow.class); conf.setReducerClass(HdfsDataReducer.class); conf.set(RowOutputFormat.OUTPUT_TABLE, tableName + "_HDFS"); //conf.set(GfxdOutputFormat.OUTPUT_SCHEMA, "APP"); conf.set(RowOutputFormat.OUTPUT_URL, url); conf.setOutputFormat(RowOutputFormat.class); conf.setOutputKeyClass(Key.class); conf.setOutputValueClass(TradeOutputObject.class); StringBuffer aStr = new StringBuffer(); aStr.append("HOME_DIR = " + conf.get(RowInputFormat.HOME_DIR) + " "); aStr.append("INPUT_TABLE = " + conf.get(RowInputFormat.INPUT_TABLE) + " "); aStr.append("OUTPUT_TABLE = " + conf.get(RowOutputFormat.OUTPUT_TABLE) + " "); aStr.append("OUTPUT_URL = " + conf.get(RowOutputFormat.OUTPUT_URL) + " "); System.out.println("VerifyHdfsData running with the following conf: " + aStr.toString()); FileOutputFormat.setOutputPath(conf, new Path("" + System.currentTimeMillis())); JobClient.runJob(conf); return 0; }
Example 12
Source File: TopBusyAirportGemfirexd.java From gemfirexd-oss with Apache License 2.0 | 4 votes |
public int run(String[] args) throws Exception { GfxdDataSerializable.initTypes(); JobConf conf = new JobConf(getConf()); conf.setJobName("Busy Airport Count"); Path outputPath = new Path(args[0]); Path intermediateOutputPath = new Path(args[0] + "_int"); String hdfsHomeDir = args[1]; String tableName = args[2]; outputPath.getFileSystem(conf).delete(outputPath, true); intermediateOutputPath.getFileSystem(conf).delete(intermediateOutputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setInputFormat(RowInputFormat.class); conf.setMapperClass(SampleMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); conf.setReducerClass(SampleReducer.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(conf, intermediateOutputPath); int rc = JobClient.runJob(conf).isSuccessful() ? 0 : 1; if (rc == 0) { JobConf topConf = new JobConf(getConf()); topConf.setJobName("Top Busy Airport"); String hdfsFS = topConf.get("fs.defaultFS"); URI hdfsUri = URI.create(hdfsFS); hdfsUri.getHost(); // Assume that SqlFire locator is running alongside the namenode topConf.set(RowOutputFormat.OUTPUT_URL, "jdbc:gemfirexd://" + hdfsUri.getHost() + ":1527"); //topConf.set(ddGfxdOutputFormat.OUTPUT_SCHEMA, "APP"); //topConf.set(GfxdOutputFormat.OUTPUT_TABLE, "BUSY_AIRPORT"); topConf.set(RowOutputFormat.OUTPUT_TABLE, "APP.BUSY_AIRPORT"); // Only run a single reducer topConf.setNumReduceTasks(1); FileInputFormat.setInputPaths(topConf, intermediateOutputPath); topConf.setInputFormat(TextInputFormat.class); topConf.setMapperClass(TopBusyAirportMapper.class); topConf.setMapOutputKeyClass(Text.class); topConf.setMapOutputValueClass(StringIntPair.class); topConf.setReducerClass(TopBusyAirportReducer.class); topConf.setOutputKeyClass(Key.class); topConf.setOutputValueClass(BusyAirportModel.class); topConf.setOutputFormat(RowOutputFormat.class); rc = JobClient.runJob(topConf).isSuccessful() ? 0 : 1; } return rc; }
Example 13
Source File: VerifyHdfsDataUsingMR.java From gemfirexd-oss with Apache License 2.0 | 4 votes |
public int run(String[] args) throws Exception { // todo@lhughes -- why do we need this? GfxdDataSerializable.initTypes(); JobConf conf = new JobConf(getConf()); conf.setJobName("hdfsMapReduce"); String hdfsHomeDir = args[0]; String url = args[1]; String tableName = args[2]; System.out.println("VerifyHdfsData.run() invoked with " + " hdfsHomeDir = " + hdfsHomeDir + " url = " + url + " tableName = " + tableName); // Job-specific params conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); conf.setInputFormat(RowInputFormat.class); conf.setMapperClass(HdfsDataMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(MyRow.class); conf.setReducerClass(HdfsDataReducer.class); conf.set(RowOutputFormat.OUTPUT_TABLE, "TRADE.HDFS_CUSTOMERS"); //conf.set(GfxdOutputFormat.OUTPUT_SCHEMA, "APP"); conf.set(RowOutputFormat.OUTPUT_URL, url); conf.setOutputFormat(RowOutputFormat.class); conf.setOutputKeyClass(Key.class); conf.setOutputValueClass(DataObject.class); StringBuffer aStr = new StringBuffer(); aStr.append("HOME_DIR = " + conf.get(RowInputFormat.HOME_DIR) + " "); aStr.append("INPUT_TABLE = " + conf.get(RowInputFormat.INPUT_TABLE) + " "); aStr.append("OUTPUT_TABLE = " + conf.get(RowOutputFormat.OUTPUT_TABLE) + " "); aStr.append("OUTPUT_URL = " + conf.get(RowOutputFormat.OUTPUT_URL) + " "); System.out.println("VerifyHdfsData running with the following conf: " + aStr.toString()); // not planning to use this, but I get an NPE without it FileOutputFormat.setOutputPath(conf, new Path("" + System.currentTimeMillis())); JobClient.runJob(conf); return 0; }
Example 14
Source File: TestTaskAttempt.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testAppDiognosticEventOnNewTask() throws Exception { ApplicationId appId = ApplicationId.newInstance(1, 2); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance( appId, 0); JobId jobId = MRBuilderUtils.newJobId(appId, 1); TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP); TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0); Path jobFile = mock(Path.class); MockEventHandler eventHandler = new MockEventHandler(); TaskAttemptListener taListener = mock(TaskAttemptListener.class); when(taListener.getAddress()).thenReturn( new InetSocketAddress("localhost", 0)); JobConf jobConf = new JobConf(); jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class); jobConf.setBoolean("fs.file.impl.disable.cache", true); jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, ""); jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10"); TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class); when(splits.getLocations()).thenReturn(new String[] { "127.0.0.1" }); AppContext appCtx = mock(AppContext.class); ClusterInfo clusterInfo = mock(ClusterInfo.class); Resource resource = mock(Resource.class); when(appCtx.getClusterInfo()).thenReturn(clusterInfo); when(resource.getMemory()).thenReturn(1024); TaskAttemptImpl taImpl = new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1, splits, jobConf, taListener, new Token(), new Credentials(), new SystemClock(), appCtx); NodeId nid = NodeId.newInstance("127.0.0.1", 0); ContainerId contId = ContainerId.newContainerId(appAttemptId, 3); Container container = mock(Container.class); when(container.getId()).thenReturn(contId); when(container.getNodeId()).thenReturn(nid); when(container.getNodeHttpAddress()).thenReturn("localhost:0"); taImpl.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptId, "Task got killed")); assertFalse( "InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task", eventHandler.internalError); }
Example 15
Source File: EventInputFormatTest.java From gemfirexd-oss with Apache License 2.0 | 4 votes |
private void doTestRowSerDe(boolean concurrencyChecks) throws Exception { getConnection(); Connection conn = startNetserverAndGetLocalNetConnection(); final long statTS = System.currentTimeMillis(); Statement st = conn.createStatement(); st.execute("create hdfsstore myhdfs namenode 'localhost' homedir '" + HDFS_DIR + "' batchtimeinterval 5000 milliseconds"); String concurrency = "persistent ENABLE CONCURRENCY CHECKS"; st.execute("create table app.mytab1 (col1 int primary key, col2 varchar(100)) partition by primary key buckets 1 hdfsstore (myhdfs) " +(concurrencyChecks ? concurrency : "")); PreparedStatement ps = conn.prepareStatement("insert into mytab1 values (?, ?)"); ps.setInt(1, 1); ps.setString(2, "Value-1"); ps.execute(); //Wait for data to get to HDFS... String qname = HDFSStoreFactoryImpl.getEventQueueName("/APP/MYTAB1"); st.execute("CALL SYS.WAIT_FOR_SENDER_QUEUE_FLUSH('" + qname + "', 1, 0)"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus[] list = fs.listStatus(new Path(HDFS_DIR + "/APP_MYTAB1/0/")); assertEquals(1, list.length); conf.set(RowInputFormat.INPUT_TABLE, "MYTAB1"); conf.set(RowInputFormat.HOME_DIR, HDFS_DIR); JobConf job = new JobConf(conf); job.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); RowInputFormat ipformat = new RowInputFormat(); InputSplit[] splits = ipformat.getSplits(job, 2); assertEquals(1, splits.length); RecordReader<Key, Row> rr = ipformat.getRecordReader(splits[0], job, null); Key key = rr.createKey(); Row value = rr.createValue(); assertTrue(rr.next(key, value)); assertEquals(1, value.getRowAsResultSet().getInt(1)); assertEquals("Value-1", value.getRowAsResultSet().getString(2)); assertTrue(value.getTimestamp() > statTS); assertFalse(value.getRowAsResultSet().next()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); value.write(dos); dos.close(); byte[] buf = baos.toByteArray(); DataInputStream dis = new DataInputStream(new ByteArrayInputStream(buf)); Row row = new Row(); row.readFields(dis); dis.close(); assertEquals(1, row.getRowAsResultSet().getInt(1)); assertEquals("Value-1", row.getRowAsResultSet().getString(2)); assertFalse(value.getRowAsResultSet().next()); TestUtil.shutDown(); }
Example 16
Source File: SolrIndexer.java From anthelion with Apache License 2.0 | 4 votes |
public void indexSolr(String solrUrl, Path crawlDb, Path linkDb, List<Path> segments, boolean noCommit, boolean deleteGone, String solrParams, boolean filter, boolean normalize) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("SolrIndexer: starting at " + sdf.format(start)); final JobConf job = new NutchJob(getConf()); job.setJobName("index-solr " + solrUrl); LOG.info("SolrIndexer: deleting gone documents: " + deleteGone); LOG.info("SolrIndexer: URL filtering: " + filter); LOG.info("SolrIndexer: URL normalizing: " + normalize); IndexerMapReduce.initMRJob(crawlDb, linkDb, segments, job); job.set(SolrConstants.SERVER_URL, solrUrl); job.setBoolean(IndexerMapReduce.INDEXER_DELETE, deleteGone); job.setBoolean(IndexerMapReduce.URL_FILTERING, filter); job.setBoolean(IndexerMapReduce.URL_NORMALIZING, normalize); if (solrParams != null) { job.set(SolrConstants.PARAMS, solrParams); } NutchIndexWriterFactory.addClassToConf(job, SolrWriter.class); job.setReduceSpeculativeExecution(false); final Path tmp = new Path("tmp_" + System.currentTimeMillis() + "-" + new Random().nextInt()); FileOutputFormat.setOutputPath(job, tmp); try { JobClient.runJob(job); // do the commits once and for all the reducers in one go SolrServer solr = SolrUtils.getCommonsHttpSolrServer(job); if (!noCommit) { solr.commit(); } long end = System.currentTimeMillis(); LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); } catch (Exception e){ LOG.error(e.toString()); } finally { FileSystem.get(job).delete(tmp, true); } }
Example 17
Source File: Submitter.java From RDFS with Apache License 2.0 | 2 votes |
/** * Set whether to keep the command file for debugging * @param conf the configuration to modify * @param keep the new value */ public static void setKeepCommandFile(JobConf conf, boolean keep) { conf.setBoolean("hadoop.pipes.command-file.keep", keep); }
Example 18
Source File: Submitter.java From hadoop-gpu with Apache License 2.0 | 2 votes |
/** * Set whether the job will use a Java RecordWriter. * @param conf the configuration to modify * @param value the new value to set */ public static void setIsJavaRecordWriter(JobConf conf, boolean value) { conf.setBoolean("hadoop.pipes.java.recordwriter", value); }
Example 19
Source File: Submitter.java From big-c with Apache License 2.0 | 2 votes |
/** * Set whether the Mapper is written in Java. * @param conf the configuration to modify * @param value the new value */ public static void setIsJavaMapper(JobConf conf, boolean value) { conf.setBoolean(Submitter.IS_JAVA_MAP, value); }
Example 20
Source File: Submitter.java From hadoop with Apache License 2.0 | 2 votes |
/** * Set whether the job is using a Java RecordReader. * @param conf the configuration to modify * @param value the new value */ public static void setIsJavaRecordReader(JobConf conf, boolean value) { conf.setBoolean(Submitter.IS_JAVA_RR, value); }