Example #1
Source File: From pentaho-hadoop-shims with Apache License 2.0 | 9 votes |
/** * Add an file path to the current set of classpath entries. It adds the file to cache as well. * <p/> * This is copied from Hadoop 0.20.2 o.a.h.filecache.DistributedCache so we can inject the correct path separator for * the environment the cluster is executing in. See {@link #getClusterPathSeparator()}. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting */ public void addFileToClassPath( Path file, Configuration conf ) throws IOException { // Save off the classloader, to make sure the version info can be loaded successfully from the hadoop-common JAR ClassLoader cl = Thread.currentThread().getContextClassLoader(); Thread.currentThread().setContextClassLoader( VersionInfo.class.getClassLoader() ); // Restore the original classloader Thread.currentThread().setContextClassLoader( cl ); String classpath = conf.get( "mapred.job.classpath.files" ); conf.set( "mapred.job.classpath.files", classpath == null ? file.toString() : classpath + getClusterPathSeparator() + file.toString() ); FileSystem fs = FileSystem.get( conf ); URI uri = fs.makeQualified( file ).toUri(); org.apache.hadoop.mapreduce.filecache.DistributedCache.addCacheFile( uri, conf ); }
Example #2
Source File: From parquet-mr with Apache License 2.0 | 6 votes |
public WriteContext init(Configuration configuration) {
if (this.protocolFactory == null) {
try {
this.protocolFactory = getTProtocolFactoryClass(configuration).newInstance();
} catch (InstantiationException | IllegalAccessException e) {
throw new RuntimeException(e);
if (thriftClass != null) {
TBaseWriteSupport.setThriftClass(configuration, thriftClass);
} else {
thriftClass = TBaseWriteSupport.getThriftClass(configuration);
this.thriftStruct = ThriftSchemaConverter.toStructType(thriftClass);
this.schema = ThriftSchemaConverter.convertWithoutProjection(thriftStruct);
if (buffered) {
readToWrite = new BufferedProtocolReadToWrite(thriftStruct, errorHandler);
} else {
readToWrite = new ProtocolReadToWrite();
return thriftWriteSupport.init(configuration);
Example #3
Source File: From Bats with Apache License 2.0 | 6 votes |
private void setTokenRefreshCredentials(LogicalPlan dag, Configuration conf) throws IOException
String principal = conf.get(StramClientUtils.TOKEN_REFRESH_PRINCIPAL, StramUserLogin.getPrincipal());
String keytabPath = conf.get(StramClientUtils.TOKEN_REFRESH_KEYTAB, conf.get(StramClientUtils.KEY_TAB_FILE));
if (keytabPath == null) {
String keytab = StramUserLogin.getKeytab();
if (keytab != null) {
Path localKeyTabPath = new Path(keytab);
try (FileSystem fs = StramClientUtils.newFileSystemInstance(conf)) {
Path destPath = new Path(StramClientUtils.getApexDFSRootDir(fs, conf), localKeyTabPath.getName());
if (!fs.exists(destPath)) {
fs.copyFromLocalFile(false, false, localKeyTabPath, destPath);
keytabPath = destPath.toString();
LOG.debug("User principal is {}, keytab is {}", principal, keytabPath);
if ((principal != null) && (keytabPath != null)) {
dag.setAttribute(LogicalPlan.PRINCIPAL, principal);
dag.setAttribute(LogicalPlan.KEY_TAB_FILE, keytabPath);
} else {
LOG.warn("Credentials for refreshing tokens not available, application may not be able to run indefinitely");
Example #4
Source File: From hadoop with Apache License 2.0 | 6 votes |
/** * Construct an IFile Reader. * * @param conf Configuration File * @param in The input stream * @param length Length of the data in the stream, including the checksum * bytes. * @param codec codec * @param readsCounter Counter for records read from disk * @throws IOException */ public Reader(Configuration conf, FSDataInputStream in, long length, CompressionCodec codec, Counters.Counter readsCounter) throws IOException { readRecordsCounter = readsCounter; checksumIn = new IFileInputStream(in,length, conf); if (codec != null) { decompressor = CodecPool.getDecompressor(codec); if (decompressor != null) { = codec.createInputStream(checksumIn, decompressor); } else { LOG.warn("Could not obtain decompressor from CodecPool"); = checksumIn; } } else { = checksumIn; } this.dataIn = new DataInputStream(; this.fileLength = length; if (conf != null) { bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE); } }
Example #5
Source File: From hbase with Apache License 2.0 | 6 votes |
* Callers must afterward call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)}
private HRegion initHRegion(byte[] tableName, String callingMethod, Configuration conf,
String family, BlockCache blockCache) throws IOException {
TableDescriptorBuilder builder =
for (int i = 0; i < BLOOM_TYPE.length; i++) {
BloomType bloomType = BLOOM_TYPE[i];
ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(family + "_" + bloomType))
RegionInfo info = RegionInfoBuilder.newBuilder(TableName.valueOf(tableName)).build();
Path path = new Path(DIR + callingMethod);
if (blockCache != null) {
return HBaseTestingUtility.createRegionAndWAL(info, path, conf,, blockCache);
} else {
return HBaseTestingUtility.createRegionAndWAL(info, path, conf,;
Example #6
Source File: From datawave with Apache License 2.0 | 6 votes |
@Test(expected = IOException.class) public void testGetAllShardedTableMapFilesWithoutPath() throws Exception { Configuration conf = new Configuration(); File tempWorkDir = Files.createTempDir(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, tempWorkDir.toURI().toString()); FileSystem fs = FileSystem.get(tempWorkDir.toURI(), conf); fs.setWorkingDirectory(new Path(tempWorkDir.toString())); Path workDir = fs.makeQualified(new Path("work")); conf.set(ShardedTableMapFile.SPLIT_WORK_DIR, workDir.toString()); conf.set(ShardedDataTypeHandler.SHARDED_TNAMES, "shard_ingest_unit_test_table_1,shard_ingest_unit_test_table_2,shard_ingest_unit_test_table_3"); String[] tableNames = new String[] {TABLE_NAME}; conf.set(ShardedTableMapFile.TABLE_NAMES, StringUtils.join(",", tableNames)); ShardedTableMapFile.setupFile(conf); ShardedTableMapFile.getShardIdToLocations(conf, TABLE_NAME); }
Example #7
Source File: From hadoop with Apache License 2.0 | 6 votes |
* Refresh all user-to-groups mappings.
* @param conf
public void refresh(Configuration conf) {"reload staticUserToGroupsMap");
staticUserToGroupsMap = parseStaticMapping(conf);"clearing userToGroupsMap cache");
try {
} catch (IOException e) {
LOG.warn("Error refreshing groups cache", e);
if(isNegativeCacheEnabled()) {
Example #8
Source File: From marklogic-contentpump with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCountHDFS inputDir outputDir"); System.exit(2); } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count hdfs"); job.setJarByClass(LinkCountHDFS.class); job.setInputFormatClass(HDFSInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example #9
Source File: From hadoop with Apache License 2.0 | 6 votes |
/** Get an array of FilterConfiguration specified in the conf */
private static FilterInitializer[] getFilterInitializers(Configuration conf) {
if (conf == null) {
return null;
Class<?>[] classes = conf.getClasses(FILTER_INITIALIZER_PROPERTY);
if (classes == null) {
return null;
FilterInitializer[] initializers = new FilterInitializer[classes.length];
for(int i = 0; i < classes.length; i++) {
initializers[i] = (FilterInitializer)ReflectionUtils.newInstance(
classes[i], conf);
return initializers;
Example #10
Source File: From gemfirexd-oss with Apache License 2.0 | 6 votes |
* @param path
* @param conf
* @param logger
* @param version - is being used only for testing. Should be passed as null for other purposes.
* @return SequenceFile.Writer
* @throws IOException
public static SequenceFile.Writer getSequenceFileWriter(Path path,
Configuration conf, LogWriterI18n logger, Version version) throws IOException {
Option optPath = SequenceFile.Writer.file(path);
Option optKey = SequenceFile.Writer.keyClass(BytesWritable.class);
Option optVal = SequenceFile.Writer.valueClass(BytesWritable.class);
Option optCom = withCompression(logger);
logger.fine("Started creating hoplog " + path);
if (version == null)
version = Version.CURRENT;
//Create a metadata option with the gemfire version, for future versioning
//of the key and value format
SequenceFile.Metadata metadata = new SequenceFile.Metadata();
metadata.set(new Text(, new Text(String.valueOf(version.ordinal())));
Option optMeta = SequenceFile.Writer.metadata(metadata);
SequenceFile.Writer writer = SequenceFile.createWriter(conf, optPath, optKey, optVal, optCom, optMeta);
return writer;
Example #11
Source File: From big-c with Apache License 2.0 | 6 votes |
protected void configureServlets() {
appContext = new MockHistoryContext(0, 1, 2, 1, false);
webApp = mock(HsWebApp.class);
Example #12
Source File: From RDFS with Apache License 2.0 | 6 votes |
public SkippingReduceValuesIterator(RawKeyValueIterator in,
RawComparator<KEY> comparator, Class<KEY> keyClass,
Class<VALUE> valClass, Configuration conf, TaskReporter reporter,
TaskUmbilicalProtocol umbilical) throws IOException {
super(in, comparator, keyClass, valClass, conf, reporter);
this.umbilical = umbilical;
this.skipGroupCounter =
this.skipRecCounter =
this.toWriteSkipRecs = toWriteSkipRecs() &&
this.keyClass = keyClass;
this.valClass = valClass;
this.reporter = reporter;
skipIt = getSkipRanges().skipRangeIterator();
Example #13
Source File: From eagle with Apache License 2.0 | 6 votes |
private void mockHdfs() throws Exception {
hdfs = mock(FileSystem.class);
FileStatus fileDirStatus = new FileStatus(100l, true, 3, 1000l, new Date().getTime(), new Path("/user/history/done/2016/12/09/000508"));
when(hdfs.listStatus(any(Path.class))).thenReturn(new FileStatus[] {fileDirStatus});
FileStatus filePartitionStatus = new FileStatus(100l, false, 3, 1000l, new Date().getTime(), new Path("/user/history/done/2016/12/09/000508/job_1479206441898_508949-1481299030929-testhistory.jhist"));
when(hdfs.listStatus(any(Path.class), any(PathFilter.class))).thenReturn(new FileStatus[] {filePartitionStatus});
Path historyFilePath = mock(Path.class);
Path historyConfPath = mock(Path.class);
Example #14
Source File: From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
public void testSaveWithIngest() throws Exception {
EsAssume.versionOnOrAfter(EsMajorVersion.V_5_X, "Ingest Supported in 5.x and above only");
Configuration conf = createConf();
RestUtils.ExtendedRestClient client = new RestUtils.ExtendedRestClient();
String prefix = "mrnewapi";
String pipeline = "{\"description\":\"Test Pipeline\",\"processors\":[{\"set\":{\"field\":\"pipeTEST\",\"value\":true,\"override\":true}}]}";
client.put("/_ingest/pipeline/" + prefix + "-pipeline", StringUtils.toUTF(pipeline));
conf.set(ConfigurationOptions.ES_RESOURCE, resource("mrnewapi-ingested", "data", clusterInfo.getMajorVersion()));
conf.set(ConfigurationOptions.ES_INGEST_PIPELINE, "mrnewapi-pipeline");
conf.set(ConfigurationOptions.ES_NODES_INGEST_ONLY, "true");
Example #15
Source File: From tez with Apache License 2.0 | 5 votes |
@Test(timeout = 5000) public void testCustomLauncherSpecified() throws IOException, TezException { Configuration conf = new Configuration(false); AppContext appContext = mock(AppContext.class); TaskCommunicatorManagerInterface tal = mock(TaskCommunicatorManagerInterface.class); String customLauncherName = "customLauncher"; List<NamedEntityDescriptor> launcherDescriptors = new LinkedList<>(); ByteBuffer bb = ByteBuffer.allocate(4); bb.putInt(0, 3); UserPayload customPayload = UserPayload.create(bb); launcherDescriptors.add( new NamedEntityDescriptor(customLauncherName, FakeContainerLauncher.class.getName()) .setUserPayload(customPayload)); ContainerLaucherRouterForMultipleLauncherTest clr = new ContainerLaucherRouterForMultipleLauncherTest(appContext, tal, null, launcherDescriptors, true); try { clr.init(conf); clr.start(); assertEquals(1, clr.getNumContainerLaunchers()); assertFalse(clr.getYarnContainerLauncherCreated()); assertFalse(clr.getUberContainerLauncherCreated()); assertEquals(customLauncherName, clr.getContainerLauncherName(0)); assertEquals(bb, clr.getContainerLauncherContext(0).getInitialUserPayload().getPayload()); } finally { clr.stop(); } }
Example #16
Source File: From datawave with Apache License 2.0 | 5 votes |
@Test public void testHigherMaxThanReducers() throws IllegalAccessException, InstantiationException { Configuration conf = new Configuration(); conf.setInt(PartitionLimiter.MAX_PARTITIONS_PROPERTY, NUM_REDUCERS + 1); LimitedKeyPartitioner partitioner = LimitedKeyPartitioner.class.newInstance(); partitioner.setConf(conf); assertPartitionsUnderMax(partitioner, NUM_REDUCERS); }
Example #17
Source File: From mr4c with Apache License 2.0 | 5 votes |
@Test public void testConfigurationUpdate() { Configuration conf = new Configuration(false); m_limit.applyTo(conf); ResourceLimit expected = new ResourceLimit(m_resource, m_value, LimitSource.CONFIG); ResourceLimit limit = ResourceLimit.extractFrom(m_resource, conf); assertEquals(expected, limit); }
Example #18
Source File: From tez with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public Builder setFromConfigurationUnfiltered(Configuration conf) { // Maybe ensure this is the first call ? Otherwise this can end up overriding other parameters Preconditions.checkArgument(conf != null, "Configuration cannot be null"); ConfigUtils.mergeConfs(this.conf, conf); return this; }
Example #19
Source File: From hbase with Apache License 2.0 | 5 votes |
/** * Test if out-of-scope observers are deregistered on GC. */ @Test public void testDeregisterOnOutOfScope() { Configuration conf = new Configuration(); ConfigurationManager cm = new ConfigurationManager(); boolean outOfScopeObserversDeregistered = false; // On my machine, I was able to cause a GC after around 5 iterations. // If we do not cause a GC in 100k iterations, which is very unlikely, // there might be something wrong with the GC. for (int i = 0; i < 100000; i++) { registerLocalObserver(cm); cm.notifyAllObservers(conf); // 'Suggest' the system to do a GC. We should be able to cause GC // atleast once in the 2000 iterations. System.gc(); // If GC indeed happened, all the observers (which are all out of scope), // should have been deregistered. if (cm.getNumObservers() <= i) { outOfScopeObserversDeregistered = true; break; } } if (!outOfScopeObserversDeregistered) { LOG.warn("Observers were not GC-ed! Something seems to be wrong."); } assertTrue(outOfScopeObserversDeregistered); }
Example #20
Source File: From phoenix with Apache License 2.0 | 5 votes |
@Test public void testMaxThreadsNonZero() { Configuration conf = new Configuration(false); String key = name.getTableNameString()+"-key"; ThreadPoolBuilder builder = new ThreadPoolBuilder(name.getTableNameString(), conf); assertTrue("core threads not set, but failed return", builder.getMaxThreads() > 0); // set an negative value builder.setMaxThread(key, -1); assertTrue("core threads not set, but failed return", builder.getMaxThreads() > 0); // set a positive value builder.setMaxThread(key, 1234); assertEquals("core threads not set, but failed return", 1234, builder.getMaxThreads()); }
Example #21
Source File: From big-c with Apache License 2.0 | 5 votes |
@Test @TestDir public void simpleSecurity() throws Exception { String dir = TestDirHelper.getTestDir().getAbsolutePath(); String services = StringUtils.join(",", Arrays.asList(InstrumentationService.class.getName(), SchedulerService.class.getName(), FileSystemAccessService.class.getName())); Configuration conf = new Configuration(false); conf.set("", services); Server server = new Server("server", dir, dir, dir, dir, conf); server.init(); Assert.assertNotNull(server.get(FileSystemAccess.class)); server.destroy(); }
Example #22
Source File: From RDFS with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) {
maxDiff = conf.getFloat("mapred.fairscheduler.load.max.diff", 0.0f);
defaultMaxMapSlots = conf.getInt(
"", Integer.MAX_VALUE);
defaultMaxReduceSlots = conf.getInt(
"mapred.fairscheduler.reduce.tasks.maximum", Integer.MAX_VALUE);
defaultCpuToMaxMapSlots = loadCpuToMaxSlots(TaskType.MAP);
defaultCpuToMaxReduceSlots = loadCpuToMaxSlots(TaskType.REDUCE);"Allowed load difference between TaskTrackers = " + maxDiff);"Default slots per node: Maps:" + defaultMaxMapSlots +
" Reduces:" + defaultMaxReduceSlots);
Example #23
Source File: From phoenix with Apache License 2.0 | 5 votes |
private void enableKeyTabSecurity() throws IOException { final String PRINCIPAL = "principal"; final String KEYTAB = "keyTab"; // Login with the credentials from the keytab to retrieve the TGT . The // renewal of the TGT happens in a Zookeeper thread String principal = null; String keyTabPath = null; AppConfigurationEntry entries[] = .getAppConfigurationEntry("Client");"Security - Fetched App Login Configuration Entries"); if (entries != null) { for (AppConfigurationEntry entry : entries) { if (entry.getOptions().get(PRINCIPAL) != null) { principal = (String) entry.getOptions().get(PRINCIPAL); } if (entry.getOptions().get(KEYTAB) != null) { keyTabPath = (String) entry.getOptions().get(KEYTAB); } }"Security - Got Principal = " + principal + ""); if (principal != null && keyTabPath != null) {"Security - Retreiving the TGT with principal:" + principal + " and keytab:" + keyTabPath); UserGroupInformation.loginUserFromKeytab(principal, keyTabPath);"Security - Retrieved TGT with principal:" + principal + " and keytab:" + keyTabPath); } } }
Example #24
Source File: From streamx with Apache License 2.0 | 5 votes |
public Reader(Configuration conf, Option... opts) throws IOException {
// Look up the options, these are null if not set
FileOption fileOpt = Options.getOption(FileOption.class, opts);
InputStreamOption streamOpt =
Options.getOption(InputStreamOption.class, opts);
StartOption startOpt = Options.getOption(StartOption.class, opts);
LengthOption lenOpt = Options.getOption(LengthOption.class, opts);
BufferSizeOption bufOpt = Options.getOption(BufferSizeOption.class, opts);
OnlyHeaderOption headerOnly =
Options.getOption(OnlyHeaderOption.class, opts);
// check for consistency
if ((fileOpt == null) == (streamOpt == null)) {
throw new
IllegalArgumentException("File or stream option must be specified");
if (fileOpt == null && bufOpt != null) {
throw new IllegalArgumentException("buffer size can only be set when" +
" a file is specified.");
// figure out the real values
Path filename = null;
FSDataInputStream file;
final long len;
if (fileOpt != null) {
filename = fileOpt.getValue();
FileSystem fs = filename.getFileSystem(conf);
int bufSize = bufOpt == null ? getBufferSize(conf) : bufOpt.getValue();
len = null == lenOpt
? fs.getFileStatus(filename).getLen()
: lenOpt.getValue();
file = openFile(fs, filename, bufSize, len);
} else {
len = null == lenOpt ? Long.MAX_VALUE : lenOpt.getValue();
file = streamOpt.getValue();
long start = startOpt == null ? 0 : startOpt.getValue();
// really set up
initialize(filename, file, start, len, conf, headerOnly != null);
Example #25
Source File: From big-c with Apache License 2.0 | 5 votes |
/** * NameNode should load the edits correctly if the applicable edits are * present in the BKJM. */ @Test public void testNameNodeMultipleSwitchesUsingBKJM() throws Exception { MiniDFSCluster cluster = null; try { Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil .createJournalURI("/correctEditLogSelection").toString()); BKJMUtil.addJournalManagerDefinition(conf); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0) .manageNameDfsSharedDirs(false).build(); NameNode nn1 = cluster.getNameNode(0); NameNode nn2 = cluster.getNameNode(1); cluster.waitActive(); cluster.transitionToActive(0); nn1.getRpcServer().rollEditLog(); // Roll Edits from current Active. // Transition to standby current active gracefully. cluster.transitionToStandby(0); // Make the other Active and Roll edits multiple times cluster.transitionToActive(1); nn2.getRpcServer().rollEditLog(); nn2.getRpcServer().rollEditLog(); // Now One more failover. So NN1 should be able to failover successfully. cluster.transitionToStandby(1); cluster.transitionToActive(0); } finally { if (cluster != null) { cluster.shutdown(); } } }
Example #26
Source File: From hbase with Apache License 2.0 | 5 votes |
public static Reader createReaderFromStream(ReaderContext context, CacheConfig cacheConf,
Configuration conf) throws IOException {
HFileInfo fileInfo = new HFileInfo(context, conf);
Reader preadReader = HFile.createReader(context, fileInfo, cacheConf, conf);
context = new ReaderContextBuilder()
.withFileSystemAndPath(context.getFileSystem(), context.getFilePath())
Reader streamReader = HFile.createReader(context, fileInfo, cacheConf, conf);
return streamReader;
Example #27
Source File: From big-c with Apache License 2.0 | 5 votes |
public void setUp() throws Exception {
conf = new Configuration();
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION)
fsn = cluster.getNamesystem();
hdfs = cluster.getFileSystem();
Example #28
Source File: From deep-spark with Apache License 2.0 | 5 votes |
public static long getInputNumRangeBegin(Configuration conf) {
long begin = conf.getLong(INPUT_NUMRANGE_BEGIN, INVALID_LONG);
if (begin == INVALID_LONG && getInputOperation(conf).equals("numrange"))
throw new UnsupportedOperationException
("missing input numrange begin");"using " + INPUT_NUMRANGE_BEGIN + " = " + begin);
return begin;
Example #29
Source File: From hadoop-connectors with Apache License 2.0 | 5 votes |
public void testInitializePath_failure_bucketNotSpecified() throws Exception {
List<String> invalidPaths = Arrays.asList("gs:/", "gs:/foo", "gs:/foo/bar", "gs:///");
for (String path : invalidPaths) {
URI uri = new URI(path);
try (GoogleHadoopFileSystem testGhfs = createInMemoryGoogleHadoopFileSystem()) {
IllegalArgumentException e =
"Path '" + path + "' should be invalid",
() -> testGhfs.initialize(uri, new Configuration()));
assertThat(e).hasMessageThat().startsWith("No bucket specified in GCS URI:");
Example #30
Source File: From hadoop with Apache License 2.0 | 5 votes |
private static void checkPaths(Configuration conf, List<Path> paths) throws
IOException {
for (Path p : paths) {
FileSystem fs = p.getFileSystem(conf);
if (!fs.exists(p)) {
throw new FileNotFoundException("Source " + p + " does not exist.");