org.apache.hadoop.conf.Configuration#setBoolean

Source File: IntegrationTestRegionReplicaReplication.java From hbase with Apache License 2.0

6 votes

@Override
public void setConf(Configuration conf) {
  conf.setIfUnset(
    String.format("%s.%s", TEST_NAME, LoadTestTool.OPT_REGION_REPLICATION),
    String.valueOf(DEFAULT_REGION_REPLICATION));

  conf.setIfUnset(
    String.format("%s.%s", TEST_NAME, LoadTestTool.OPT_COLUMN_FAMILIES),
    StringUtils.join(",", DEFAULT_COLUMN_FAMILIES));

  conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, true);

  // enable async wal replication to region replicas for unit tests
  conf.setBoolean(ServerRegionReplicaUtil.REGION_REPLICA_REPLICATION_CONF_KEY, true);

  conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024L * 1024 * 4); // flush every 4 MB
  conf.setInt("hbase.hstore.blockingStoreFiles", 100);

  super.setConf(conf);
}

Source File: TestAuditLogger.java From hadoop with Apache License 2.0

6 votes

/**
 * Tests that TopAuditLogger can be disabled
 */
@Test
public void testDisableTopAuditLogger() throws IOException {
  Configuration conf = new HdfsConfiguration();
  conf.setBoolean(NNTOP_ENABLED_KEY, false);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();

  try {
    cluster.waitClusterUp();
    List<AuditLogger> auditLoggers =
        cluster.getNameNode().getNamesystem().getAuditLoggers();
    for (AuditLogger auditLogger : auditLoggers) {
      assertFalse(
          "top audit logger is still hooked in after it is disabled",
          auditLogger instanceof TopAuditLogger);
    }
  } finally {
    cluster.shutdown();
  }
}

Source File: TestUnorderedPartitionedKVWriter.java From incubator-tez with Apache License 2.0

6 votes

private Configuration createConfiguration(TezOutputContext outputContext,
    Class<? extends Writable> keyClass, Class<? extends Writable> valClass,
    boolean shouldCompress, int maxSingleBufferSizeBytes,
    Class<? extends Partitioner> partitionerClass) {
  Configuration conf = new Configuration(false);
  conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, outputContext.getWorkDirs());
  conf.set(TezJobConfig.TEZ_RUNTIME_KEY_CLASS, keyClass.getName());
  conf.set(TezJobConfig.TEZ_RUNTIME_VALUE_CLASS, valClass.getName());
  conf.set(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS, partitionerClass.getName());
  if (maxSingleBufferSizeBytes >= 0) {
    conf.setInt(TezJobConfig.TEZ_RUNTIME_UNORDERED_OUTPUT_MAX_PER_BUFFER_SIZE_BYTES,
        maxSingleBufferSizeBytes);
  }
  conf.setBoolean(TezJobConfig.TEZ_RUNTIME_COMPRESS, shouldCompress);
  if (shouldCompress) {
    conf.set(TezJobConfig.TEZ_RUNTIME_COMPRESS_CODEC,
        DefaultCodec.class.getName());
  }
  return conf;
}

Source File: TestDatanodeRestart.java From RDFS with Apache License 2.0

5 votes

public void testRbwReplicas() throws IOException {
  Configuration conf = new Configuration();
  conf.setLong("dfs.block.size", 1024L);
  conf.setInt("dfs.write.packet.size", 512);
  conf.setBoolean("dfs.support.append", true);
  MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
  cluster.waitActive();
  try {
    testRbwReplicas(cluster, false);
    testRbwReplicas(cluster, true);
  } finally {
    cluster.shutdown();
  }
}

Source File: JournalNodeRpcServer.java From hadoop with Apache License 2.0

5 votes

JournalNodeRpcServer(Configuration conf, JournalNode jn) throws IOException {
  this.jn = jn;
  
  Configuration confCopy = new Configuration(conf);
  
  // Ensure that nagling doesn't kick in, which could cause latency issues.
  confCopy.setBoolean(
      CommonConfigurationKeysPublic.IPC_SERVER_TCPNODELAY_KEY,
      true);
  
  InetSocketAddress addr = getAddress(confCopy);
  RPC.setProtocolEngine(confCopy, QJournalProtocolPB.class,
      ProtobufRpcEngine.class);
  QJournalProtocolServerSideTranslatorPB translator =
      new QJournalProtocolServerSideTranslatorPB(this);
  BlockingService service = QJournalProtocolService
      .newReflectiveBlockingService(translator);
  
  this.server = new RPC.Builder(confCopy)
    .setProtocol(QJournalProtocolPB.class)
    .setInstance(service)
    .setBindAddress(addr.getHostName())
    .setPort(addr.getPort())
    .setNumHandlers(HANDLER_COUNT)
    .setVerbose(false)
    .build();

  // set service-level authorization security policy
  if (confCopy.getBoolean(
    CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, false)) {
        server.refreshServiceAcl(confCopy, new HDFSPolicyProvider());
  }
}

Source File: TestSnapshotDeletion.java From hadoop with Apache License 2.0

5 votes

@Test
public void testDeleteSnapshotWithPermissionsDisabled() throws Exception {
  cluster.shutdown();
  Configuration newConf = new Configuration(conf);
  newConf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  cluster = new MiniDFSCluster.Builder(newConf).numDataNodes(0).build();
  cluster.waitActive();
  hdfs = cluster.getFileSystem();

  final Path path = new Path("/dir");
  hdfs.mkdirs(path);
  hdfs.allowSnapshot(path);
  hdfs.mkdirs(new Path(path, "/test"));
  hdfs.createSnapshot(path, "s1");
  UserGroupInformation anotherUser = UserGroupInformation
      .createRemoteUser("anotheruser");
  anotherUser.doAs(new PrivilegedAction<Object>() {
    @Override
    public Object run() {
      DistributedFileSystem anotherUserFS = null;
      try {
        anotherUserFS = cluster.getFileSystem();
        anotherUserFS.deleteSnapshot(path, "s1");
      } catch (IOException e) {
        fail("Failed to delete snapshot : " + e.getLocalizedMessage());
      } finally {
        IOUtils.closeStream(anotherUserFS);
      }
      return null;
    }
  });
}

Source File: TestDistCpWithAcls.java From hadoop with Apache License 2.0

5 votes

/**
 * Initialize the cluster, wait for it to become active, and get FileSystem.
 *
 * @param format if true, format the NameNode and DataNodes before starting up
 * @param aclsEnabled if true, ACL support is enabled
 * @throws Exception if any step fails
 */
private static void initCluster(boolean format, boolean aclsEnabled)
    throws Exception {
  conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, aclsEnabled);
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "stubfs:///");
  conf.setClass("fs.stubfs.impl", StubFileSystem.class, FileSystem.class);
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(format)
    .build();
  cluster.waitActive();
  fs = cluster.getFileSystem();
}

Source File: AccumuloRecordWriter.java From datawave with Apache License 2.0

5 votes

public static void setZooKeeperInstance(Configuration conf, String instanceName, String zooKeepers) {
    if (conf.getBoolean(INSTANCE_HAS_BEEN_SET, false)) {
        throw new IllegalStateException("Instance info can only be set once per job");
    }
    conf.setBoolean(INSTANCE_HAS_BEEN_SET, true);
    
    ArgumentChecker.notNull(instanceName, zooKeepers);
    conf.set(INSTANCE_NAME, instanceName);
    conf.set(ZOOKEEPERS, zooKeepers);
}

Source File: TestRecoveryParser.java From tez with Apache License 2.0

5 votes

@Test(timeout=5000)
public void testRecoverableSummary_VertexGroupInCommitting() throws IOException {
  ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
  TezDAGID dagID = TezDAGID.getInstance(appId, 1);
  AppContext appContext = mock(AppContext.class);
  when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath+"/1"));
  when(appContext.getClock()).thenReturn(new SystemClock());
  when(mockDAGImpl.getID()).thenReturn(dagID);
  when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
  when(appContext.getApplicationID()).thenReturn(appId);

  RecoveryService rService = new RecoveryService(appContext);
  Configuration conf = new Configuration();
  conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
  rService.init(conf);
  rService.start();

  DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
  // write a DAGSubmittedEvent first to initialize summaryStream
  rService.handle(new DAGHistoryEvent(dagID,
      new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1),
          null, "user", new Configuration(), null, null)));
  // It should be fine to skip other events, just for testing.
  rService.handle(new DAGHistoryEvent(dagID,
      new VertexGroupCommitStartedEvent(dagID, "group_1", 
          Lists.newArrayList(TezVertexID.getInstance(dagID, 0), TezVertexID.getInstance(dagID, 1)), 0L)));
  rService.stop();

  DAGRecoveryData dagData = parser.parseRecoveryData();
  assertEquals(dagID, dagData.recoveredDagID);
  assertTrue(dagData.nonRecoverable);
  assertTrue(dagData.reason.contains("Vertex Group Commit was in progress"));
}

Source File: TestFastCopyCLI.java From RDFS with Apache License 2.0

5 votes

@BeforeClass
public static void setUpClass() throws Exception {
  conf = new Configuration();
  remoteConf = new Configuration();
  conf.setBoolean("dfs.datanode.blkcopy.hardlink", false);
  remoteConf.setBoolean("dfs.datanode.blkcopy.hardlink", false);
  FastCopySetupUtil.setUpClass();
  // Each file is prefixed with this.
  srcPrefix = "/testFastCopyShellGlob/";
  generateFilesForGlobTesting(srcPrefix);
}

Source File: TestLocalRunner.java From RDFS with Apache License 2.0

5 votes

/**
 * Run a test with several mappers in parallel, operating at different
 * speeds. Verify that the correct amount of output is created.
 */
@Test
public void testMultiMaps() throws Exception {
  Path inputPath = createMultiMapsInput();
  Path outputPath = getOutputPath();

  Configuration conf = new Configuration();
  conf.setBoolean("mapred.localrunner.sequential", false);
  conf.setBoolean("mapred.localrunner.debug", true);
  conf.setInt(LocalJobRunner.LOCAL_RUNNER_SLOTS, 6);
  conf.set(JobConf.MAPRED_TASK_JAVA_OPTS, "-DtestProperty=testValue");
  Job job = new Job(conf);
  job.setMapperClass(StressMapper.class);
  job.setReducerClass(CountingReducer.class);
  job.setNumReduceTasks(1);
  job.getConfiguration().set("io.sort.record.pct", "0.50");
  job.getConfiguration().set("io.sort.mb", "25");
  FileInputFormat.addInputPath(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  FileSystem fs = FileSystem.getLocal(conf);
  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }

  job.waitForCompletion(true);

  verifyOutput(outputPath);
}

Source File: MiniAvatarCluster.java From RDFS with Apache License 2.0

5 votes

public void initClientConf(Configuration conf) {
  clientConf = new Configuration(conf);
  clientConf.set("fs.default.name", "hdfs://localhost:" + nnPort);
  clientConf.set("fs.default.name0", "hdfs://localhost:" + nn0Port);
  clientConf.set("fs.default.name1", "hdfs://localhost:" + nn1Port);
  clientConf.set("dfs.namenode.dn-address", "localhost:" + nnDnPort);
  clientConf.set("dfs.namenode.dn-address0", "localhost:" + nnDn0Port);
  clientConf.set("dfs.namenode.dn-address1", "localhost:" + nnDn1Port);
  clientConf.set("fs.hdfs.impl",
      "org.apache.hadoop.hdfs.DistributedAvatarFileSystem");
  clientConf.setBoolean("fs.hdfs.impl.disable.cache", true);
  // Lower the number of retries to close connections quickly.
  clientConf.setInt("ipc.client.connect.max.retries", 3);
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: MiniDFSCluster.java From hadoop with Apache License 2.0

4 votes

private void initMiniDFSCluster(
    Configuration conf,
    int numDataNodes, StorageType[][] storageTypes, boolean format, boolean manageNameDfsDirs,
    boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
    boolean manageDataDfsDirs, StartupOption startOpt,
    StartupOption dnStartOpt, String[] racks,
    String[] hosts,
    long[][] storageCapacities, long[] simulatedCapacities, String clusterId,
    boolean waitSafeMode, boolean setupHostsFile,
    MiniDFSNNTopology nnTopology, boolean checkExitOnShutdown,
    boolean checkDataNodeAddrConfig,
    boolean checkDataNodeHostConfig,
    Configuration[] dnConfOverlays,
    boolean skipFsyncForTesting)
throws IOException {
  boolean success = false;
  try {
    ExitUtil.disableSystemExit();

    // Re-enable symlinks for tests, see HADOOP-10020 and HADOOP-10052
    FileSystem.enableSymlinks();

    synchronized (MiniDFSCluster.class) {
      instanceId = instanceCount++;
    }

    this.conf = conf;
    base_dir = new File(determineDfsBaseDir());
    data_dir = new File(base_dir, "data");
    this.waitSafeMode = waitSafeMode;
    this.checkExitOnShutdown = checkExitOnShutdown;
  
    int replication = conf.getInt(DFS_REPLICATION_KEY, 3);
    conf.setInt(DFS_REPLICATION_KEY, Math.min(replication, numDataNodes));
    int safemodeExtension = conf.getInt(
        DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY, 0);
    conf.setInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, safemodeExtension);
    conf.setInt(DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 3); // 3 second
    conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, 
                   StaticMapping.class, DNSToSwitchMapping.class);
  
    // In an HA cluster, in order for the StandbyNode to perform checkpoints,
    // it needs to know the HTTP port of the Active. So, if ephemeral ports
    // are chosen, disable checkpoints for the test.
    if (!nnTopology.allHttpPortsSpecified() &&
        nnTopology.isHA()) {
      LOG.info("MiniDFSCluster disabling checkpointing in the Standby node " +
          "since no HTTP ports have been specified.");
      conf.setBoolean(DFS_HA_STANDBY_CHECKPOINTS_KEY, false);
    }
    if (!nnTopology.allIpcPortsSpecified() &&
        nnTopology.isHA()) {
      LOG.info("MiniDFSCluster disabling log-roll triggering in the "
          + "Standby node since no IPC ports have been specified.");
      conf.setInt(DFS_HA_LOGROLL_PERIOD_KEY, -1);
    }

    EditLogFileOutputStream.setShouldSkipFsyncForTesting(skipFsyncForTesting);
  
    federation = nnTopology.isFederated();
    try {
      createNameNodesAndSetConf(
          nnTopology, manageNameDfsDirs, manageNameDfsSharedDirs,
          enableManagedDfsDirsRedundancy,
          format, startOpt, clusterId, conf);
    } catch (IOException ioe) {
      LOG.error("IOE creating namenodes. Permissions dump:\n" +
          createPermissionsDiagnosisString(data_dir), ioe);
      throw ioe;
    }
    if (format) {
      if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) {
        throw new IOException("Cannot remove data directory: " + data_dir +
            createPermissionsDiagnosisString(data_dir));
      }
    }
  
    if (startOpt == StartupOption.RECOVER) {
      return;
    }

    // Start the DataNodes
    startDataNodes(conf, numDataNodes, storageTypes, manageDataDfsDirs,
        dnStartOpt != null ? dnStartOpt : startOpt,
        racks, hosts, storageCapacities, simulatedCapacities, setupHostsFile,
        checkDataNodeAddrConfig, checkDataNodeHostConfig, dnConfOverlays);
    waitClusterUp();
    //make sure ProxyUsers uses the latest conf
    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
    success = true;
  } finally {
    if (!success) {
      shutdown();
    }
  }
}

Source File: TestNodeRefresh.java From RDFS with Apache License 2.0

4 votes

private void startCluster(int numHosts, int numTrackerPerHost, 
                          int numExcluded, Configuration conf) 
throws IOException {
  try {
    conf.setBoolean("dfs.replication.considerLoad", false);
    
    // prepare hosts info
    hosts = new String[numHosts];
    for (int i = 1; i <= numHosts; ++i) {
      hosts[i - 1] = getHostname(i);
    }
    
    // start dfs
    dfs = new MiniDFSCluster(conf, 1, true, null, hosts);
    dfs.waitActive();
    dfs.startDataNodes(conf, numHosts, true, null, null, hosts, null);
    dfs.waitActive();

    namenode = (dfs.getFileSystem()).getUri().getHost() + ":" + 
    (dfs.getFileSystem()).getUri().getPort(); 
    
    // create tracker hosts
    trackerHosts = new String[numHosts * numTrackerPerHost];
    for (int i = 1; i <= (numHosts * numTrackerPerHost); ++i) {
      trackerHosts[i - 1] = getHostname(i);
    }
    
    // start mini mr
    JobConf jtConf = new JobConf(conf);
    mr = new MiniMRCluster(0, 0, numHosts * numTrackerPerHost, namenode, 1, 
                           null, trackerHosts, null, jtConf, 
                           numExcluded * numTrackerPerHost);
    
    jt = mr.getJobTrackerRunner().getJobTracker();
    
    // check if trackers from all the desired hosts have connected
    Set<String> hostsSeen = new HashSet<String>();
    for (TaskTrackerStatus status : jt.taskTrackers()) {
      hostsSeen.add(status.getHost());
    }
    assertEquals("Not all hosts are up", numHosts - numExcluded, 
                 hostsSeen.size());
  } catch (IOException ioe) {
    stopCluster();
  }
}

Source File: TestRecoveryParser.java From tez with Apache License 2.0

4 votes

@Test(timeout=5000)
public void testRecoverableSummary_VertexGroupFinishCommitting() throws IOException {
  ApplicationId appId = ApplicationId.newInstance(System.currentTimeMillis(), 1);
  TezDAGID dagID = TezDAGID.getInstance(appId, 1);
  AppContext appContext = mock(AppContext.class);
  when(appContext.getCurrentRecoveryDir()).thenReturn(new Path(recoveryPath+"/1"));
  when(appContext.getClock()).thenReturn(new SystemClock());
  when(mockDAGImpl.getID()).thenReturn(dagID);
  when(appContext.getHadoopShim()).thenReturn(new DefaultHadoopShim());
  when(appContext.getApplicationID()).thenReturn(appId);

  RecoveryService rService = new RecoveryService(appContext);
  Configuration conf = new Configuration();
  conf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, true);
  rService.init(conf);
  rService.start();

  DAGPlan dagPlan = TestDAGImpl.createTestDAGPlan();
  // write a DAGSubmittedEvent first to initialize summaryStream
  rService.handle(new DAGHistoryEvent(dagID,
      new DAGSubmittedEvent(dagID, 1L, dagPlan, ApplicationAttemptId.newInstance(appId, 1),
          null, "user", new Configuration(), null, null)));
  // It should be fine to skip other events, just for testing.
  TezVertexID v0 = TezVertexID.getInstance(dagID, 0);
  TezVertexID v1 = TezVertexID.getInstance(dagID, 1);
  rService.handle(new DAGHistoryEvent(dagID,
      new VertexGroupCommitStartedEvent(dagID, "group_1", 
          Lists.newArrayList(v0, v1), 0L)));
  rService.handle(new DAGHistoryEvent(dagID,
      new VertexGroupCommitFinishedEvent(dagID, "group_1", 
          Lists.newArrayList(v0, v1), 0L)));
  // also write VertexFinishedEvent, otherwise it is still non-recoverable
  // when checking with non-summary event
  rService.handle(new DAGHistoryEvent(dagID,
      new VertexFinishedEvent(v0, "v1", 10, 0L, 0L, 
          0L, 0L, 0L, VertexState.SUCCEEDED, 
          "", null, null, null, null)));
  rService.handle(new DAGHistoryEvent(dagID,
      new VertexFinishedEvent(v1, "v1", 10, 0L, 0L, 
          0L, 0L, 0L, VertexState.SUCCEEDED, 
          "", null, null, null, null)));
  rService.stop();
  
  DAGRecoveryData dagData = parser.parseRecoveryData();
  assertEquals(dagID, dagData.recoveredDagID);
  assertFalse(dagData.nonRecoverable);
}

Source File: TestStringBehavior.java From parquet-mr with Apache License 2.0

4 votes

@Test
public void testReflect() throws IOException {
  Schema reflectSchema = ReflectData.get()
      .getSchema(ReflectRecord.class);

  ReflectRecord avroRecord;
  try(DataFileReader<ReflectRecord> avro = new DataFileReader<>(
    avroFile, new ReflectDatumReader<>(reflectSchema))) {
    avroRecord = avro.next();
  }

  ReflectRecord parquetRecord;
  Configuration conf = new Configuration();
  conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
  AvroReadSupport.setAvroDataSupplier(conf, ReflectDataSupplier.class);
  AvroReadSupport.setAvroReadSchema(conf, reflectSchema);
  try(ParquetReader<ReflectRecord> parquet = AvroParquetReader
      .<ReflectRecord>builder(parquetFile)
      .withConf(conf)
      .build()) {
    parquetRecord = parquet.read();
  }

  Assert.assertEquals("Avro default string class should be String",
      String.class, avroRecord.default_class.getClass());
  Assert.assertEquals("Parquet default string class should be String",
      String.class, parquetRecord.default_class.getClass());

  Assert.assertEquals("Avro avro.java.string=String class should be String",
      String.class, avroRecord.string_class.getClass());
  Assert.assertEquals("Parquet avro.java.string=String class should be String",
      String.class, parquetRecord.string_class.getClass());

  Assert.assertEquals("Avro stringable class should be BigDecimal",
      BigDecimal.class, avroRecord.stringable_class.getClass());
  Assert.assertEquals("Parquet stringable class should be BigDecimal",
      BigDecimal.class, parquetRecord.stringable_class.getClass());
  Assert.assertEquals("Should have the correct BigDecimal value",
      BIG_DECIMAL, parquetRecord.stringable_class);

  Assert.assertEquals("Avro map default string class should be String",
      String.class, keyClass(avroRecord.default_map));
  Assert.assertEquals("Parquet map default string class should be String",
      String.class, keyClass(parquetRecord.default_map));

  Assert.assertEquals("Avro map avro.java.string=String class should be String",
      String.class, keyClass(avroRecord.string_map));
  Assert.assertEquals("Parquet map avro.java.string=String class should be String",
      String.class, keyClass(parquetRecord.string_map));

  Assert.assertEquals("Avro map stringable class should be BigDecimal",
      BigDecimal.class, keyClass(avroRecord.stringable_map));
  Assert.assertEquals("Parquet map stringable class should be BigDecimal",
      BigDecimal.class, keyClass(parquetRecord.stringable_map));
}

Source File: TestScannerWithBulkload.java From hbase with Apache License 2.0

4 votes

@Test
public void testBulkLoadNativeHFile() throws Exception {
  final TableName tableName = TableName.valueOf(name.getMethodName());
  long l = System.currentTimeMillis();
  Admin admin = TEST_UTIL.getAdmin();
  createTable(admin, tableName);
  Scan scan = createScan();
  final Table table = init(admin, l, scan, tableName);
  // use bulkload
  final Path hfilePath = writeToHFile(l, "/temp/testBulkLoadNativeHFile/",
    "/temp/testBulkLoadNativeHFile/col/file", true);
  Configuration conf = TEST_UTIL.getConfiguration();
  conf.setBoolean("hbase.mapreduce.bulkload.assign.sequenceNumbers", true);
  BulkLoadHFiles.create(conf).bulkLoad(tableName, hfilePath);
  ResultScanner scanner = table.getScanner(scan);
  Result result = scanner.next();
  // We had 'version0', 'version1' for 'row1,col:q' in the table.
  // Bulk load added 'version2'  scanner should be able to see 'version2'
  result = scanAfterBulkLoad(scanner, result, "version2");
  Put put0 = new Put(Bytes.toBytes("row1"));
  put0.add(new KeyValue(Bytes.toBytes("row1"), Bytes.toBytes("col"), Bytes.toBytes("q"), l, Bytes
      .toBytes("version3")));
  table.put(put0);
  admin.flush(tableName);
  scanner = table.getScanner(scan);
  result = scanner.next();
  while (result != null) {
    List<Cell> cells = result.getColumnCells(Bytes.toBytes("col"), Bytes.toBytes("q"));
    for (Cell _c : cells) {
      if (Bytes.toString(_c.getRowArray(), _c.getRowOffset(), _c.getRowLength())
          .equals("row1")) {
        System.out
            .println(Bytes.toString(_c.getRowArray(), _c.getRowOffset(), _c.getRowLength()));
        System.out.println(Bytes.toString(_c.getQualifierArray(), _c.getQualifierOffset(),
          _c.getQualifierLength()));
        System.out.println(
          Bytes.toString(_c.getValueArray(), _c.getValueOffset(), _c.getValueLength()));
        Assert.assertEquals("version3",
          Bytes.toString(_c.getValueArray(), _c.getValueOffset(), _c.getValueLength()));
      }
    }
    result = scanner.next();
  }
  scanner.close();
  table.close();
}

Source File: TradeTxHistoryHdfsDataVerifierV2.java From gemfirexd-oss with Apache License 2.0

4 votes

public int run(String[] args) throws Exception {
  GfxdDataSerializable.initTypes();

  Configuration conf = getConf();
  
  String hdfsHomeDir = args[0];
  String url         = args[1];
  String tableName   = args[2];

  System.out.println("TradeTxHistoryHdfsDataVerifierV2.run() invoked with " 
                     + " hdfsHomeDir = " + hdfsHomeDir 
                     + " url = " + url
                     + " tableName = " + tableName);

  // Job-specific params
  conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir);
  conf.set(RowInputFormat.INPUT_TABLE, tableName);
  conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false);
  conf.set(RowOutputFormat.OUTPUT_TABLE,tableName + "_HDFS");
  conf.set(RowOutputFormat.OUTPUT_URL, url);
  
  
  Job job = Job.getInstance(conf, "TradeTxHistoryHdfsDataVerifierV2");
  job.setJobName("TradeTxHistoryHdfsDataVerifierV2");
  job.setInputFormatClass(RowInputFormat.class);
  job.setOutputFormatClass(RowOutputFormat.class);
  
    
  job.setMapperClass(HdfsDataMapper.class);
  job.setMapOutputKeyClass(Key.class);
  job.setMapOutputValueClass(TradeTxHistoryRow.class);   
  
  job.setReducerClass(HdfsDataReducer.class);  
  job.setOutputKeyClass(Key.class);
  job.setOutputValueClass(TradeTxHistoryOutputObject.class);
  
  StringBuffer aStr = new StringBuffer();
  aStr.append("HOME_DIR = " + conf.get(RowInputFormat.HOME_DIR) + " ");
  aStr.append("INPUT_TABLE = " + conf.get(RowInputFormat.INPUT_TABLE) + " ");
  aStr.append("OUTPUT_TABLE = " + conf.get(RowOutputFormat.OUTPUT_TABLE) + " ");
  aStr.append("OUTPUT_URL = " + conf.get(RowOutputFormat.OUTPUT_URL) + " ");
  System.out.println("VerifyHdfsData running with the following conf: " + aStr.toString());
  
  return job.waitForCompletion(false) ? 0 : 1;
}

Source File: NativeCodeLoader.java From flink with Apache License 2.0

2 votes

/**
 * Set if native hadoop libraries, if present, can be used for this job.
 *
 * @param conf configuration
 * @param loadNativeLibraries can native hadoop libraries be loaded
 */
public void setLoadNativeLibraries(Configuration conf,
								   boolean loadNativeLibraries) {
	conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY,
		loadNativeLibraries);
}

Java Code Examples for org.apache.hadoop.conf.Configuration#setBoolean()