org.apache.hadoop.mapreduce.TaskAttemptContext#getConfiguration

Source File: OraOopOutputFormatBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

protected void applyMapperJdbcUrl(TaskAttemptContext context, int mapperId) {

    Configuration conf = context.getConfiguration();

    // Retrieve the JDBC URL that should be used by this mapper.
    // We achieve this by modifying the JDBC URL property in the
    // configuration, prior to the OraOopDBRecordWriter's (ancestral)
    // constructor using the configuration to establish a connection
    // to the database - via DBConfiguration.getConnection()...
    String mapperJdbcUrlPropertyName =
        OraOopUtilities.getMapperJdbcUrlPropertyName(mapperId, conf);

    // Get this mapper's JDBC URL
    String mapperJdbcUrl = conf.get(mapperJdbcUrlPropertyName, null);

    LOG.debug(String.format("Mapper %d has a JDBC URL of: %s", mapperId,
        mapperJdbcUrl == null ? "<null>" : mapperJdbcUrl));

    if (mapperJdbcUrl != null) {
      conf.set(DBConfiguration.URL_PROPERTY, mapperJdbcUrl);
    }
  }

Source File: DBOutputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0

6 votes

@Override
/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
  String tableName = dbConf.getOutputTableName();
  String[] fieldNames = dbConf.getOutputFieldNames();

  if (fieldNames == null) {
    fieldNames = new String[dbConf.getOutputFieldCount()];
  }

  try {
    Connection connection = dbConf.getConnection();
    PreparedStatement statement = null;

    statement = connection.prepareStatement(
                  constructQuery(tableName, fieldNames));
    return new com.cloudera.sqoop.mapreduce.db.DBOutputFormat.DBRecordWriter(
                   connection, statement);
  } catch (Exception ex) {
    throw new IOException(ex);
  }
}

Source File: CRAMRecordReader.java From Hadoop-BAM with MIT License

6 votes

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
  if(isInitialized) {
    close();
  }
  isInitialized = true;

  final Configuration conf = context.getConfiguration();
  final FileSplit fileSplit = (FileSplit) split;
  final Path file  = fileSplit.getPath();

  String refSourcePath = conf.get(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
  ReferenceSource refSource = new ReferenceSource(refSourcePath == null ? null :
      NIOFileUtil.asPath(refSourcePath));

  seekableStream = WrapSeekable.openPath(conf, file);
  start = fileSplit.getStart();
  length = fileSplit.getLength();
  long end = start + length;
  // CRAMIterator right shifts boundaries by 16 so we do the reverse here
  // also subtract one from end since CRAMIterator's boundaries are inclusive
  long[] boundaries = new long[] {start << 16, (end - 1) << 16};
  ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
  cramIterator = new CRAMIterator(seekableStream, refSource, boundaries, stringency);
}

Source File: BinaryReader.java From marklogic-contentpump with Apache License 2.0

5 votes

@Override
public RecordWriter<DocumentURI, BytesWritable> getRecordWriter(
        TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new BinaryWriter(getOutputPath(context), 
            context.getConfiguration());
}

Source File: IndirectBigQueryOutputFormat.java From hadoop-connectors with Apache License 2.0

5 votes

/** Wraps the delegate's committer in a {@link IndirectBigQueryOutputCommitter}. */
@Override
public OutputCommitter createCommitter(TaskAttemptContext context) throws IOException {
  Configuration conf = context.getConfiguration();
  OutputCommitter delegateCommitter = getDelegate(conf).getOutputCommitter(context);
  OutputCommitter committer = new IndirectBigQueryOutputCommitter(context, delegateCommitter);
  return committer;
}

Source File: KeyValueOutputFormat.java From marklogic-contentpump with Apache License 2.0

5 votes

@Override
public RecordWriter<KEYOUT, VALUEOUT> getRecordWriter(
        TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);
    String host = InternalUtilities.getHost(hosts);
    return new KeyValueWriter<KEYOUT, VALUEOUT>(conf, host);
}

Source File: MainframeDatasetRecordReader.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
public void initialize(InputSplit inputSplit,
    TaskAttemptContext taskAttemptContext)
    throws IOException, InterruptedException {

  split = (MainframeDatasetInputSplit)inputSplit;
  conf = taskAttemptContext.getConfiguration();
  inputClass = (Class<T>) (conf.getClass(
              DBConfiguration.INPUT_CLASS_PROPERTY, null));
  key = null;
  datasetRecord = null;
  numberRecordRead = 0;
  datasetProcessed = 0;
}

Source File: DatabaseTransformOutputFormat.java From marklogic-contentpump with Apache License 2.0

5 votes

@Override
public RecordWriter<DocumentURI, DatabaseDocumentWithMeta> getRecordWriter(
    TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    fastLoad = Boolean.valueOf(conf.get(OUTPUT_FAST_LOAD));
    Map<String, ContentSource> sourceMap = getSourceMap(fastLoad, context);
    // construct the DatabaseTransformContentWriter
    return new DatabaseTransformWriter<DatabaseDocumentWithMeta>(conf,
        sourceMap, fastLoad, am);
}

Source File: IndexedStorage.java From spork with Apache License 2.0

5 votes

@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(
        TaskAttemptContext context) throws IOException,
        InterruptedException {

    Configuration conf = context.getConfiguration();

    FileSystem fs = FileSystem.get(conf);
    Path file = this.getDefaultWorkFile(context, "");
    FSDataOutputStream fileOut = fs.create(file, false);

    IndexManager indexManager = new IndexManager(offsetsToIndexKeys);
    indexManager.createIndexFile(fs, file);
    return new IndexedStorageRecordWriter(fileOut, this.fieldDelimiter, indexManager);
}

Source File: AtomErrorDataTypeHandler.java From datawave with Apache License 2.0

5 votes

@Override
public void setup(TaskAttemptContext context) {
    super.setup(context);
    
    this.errorHelper = (ErrorShardedIngestHelper) (TypeRegistry.getType("error").getIngestHelper(context.getConfiguration()));
    
    this.conf = context.getConfiguration();
    markingFunctions = MarkingFunctions.Factory.createMarkingFunctions();
}

Source File: ArchiveOutputFormat.java From marklogic-contentpump with Apache License 2.0

5 votes

@Override
public RecordWriter<DocumentURI, MarkLogicDocument> getRecordWriter(
    TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    Path path = new Path(conf.get(ConfigConstants.CONF_OUTPUT_FILEPATH));
    return new ArchiveWriter(path, context);
}

Source File: GenerateData.java From big-c with Apache License 2.0

5 votes

@Override
public RecordWriter<NullWritable,BytesWritable> getRecordWriter(
    TaskAttemptContext job) throws IOException {

  return new ChunkWriter(getDefaultWorkFile(job, ""),
      job.getConfiguration());
}

Source File: OraOopOutputFormatBase.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

protected void updateBatchSizeInConfigurationToAllowOracleAppendValuesHint(
    TaskAttemptContext context) {

  Configuration conf = context.getConfiguration();

  // If using APPEND_VALUES, check the batch size and commit frequency...
  int originalBatchesPerCommit =
      conf.getInt(AsyncSqlOutputFormat.STATEMENTS_PER_TRANSACTION_KEY, 0);
  if (originalBatchesPerCommit != 1) {
    conf.setInt(AsyncSqlOutputFormat.STATEMENTS_PER_TRANSACTION_KEY, 1);
    LOG.info(String
        .format(
            "The number of batch-inserts to perform per commit has been "
                + "changed from %d to %d. This is in response "
                + "to the Oracle APPEND_VALUES hint being used.",
            originalBatchesPerCommit, 1));
  }

  int originalBatchSize =
      conf.getInt(AsyncSqlOutputFormat.RECORDS_PER_STATEMENT_KEY, 0);
  int minAppendValuesBatchSize =
      OraOopUtilities.getMinAppendValuesBatchSize(conf);
  if (originalBatchSize < minAppendValuesBatchSize) {
    conf.setInt(AsyncSqlOutputFormat.RECORDS_PER_STATEMENT_KEY,
        minAppendValuesBatchSize);
    LOG.info(String
        .format(
            "The number of rows per batch-insert has been changed from %d "
                + "to %d. This is in response "
                + "to the Oracle APPEND_VALUES hint being used.",
            originalBatchSize, minAppendValuesBatchSize));
  }
}

Source File: Warp10OutputFormat.java From warp10-platform with Apache License 2.0

5 votes

@Override
public RecordWriter<Writable, Writable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
  
  Properties props = new Properties();
  
  Configuration conf = context.getConfiguration();
  
  props.setProperty(Warp10RecordWriter.WARP10_GZIP, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_GZIP, "false"));
  props.setProperty(Warp10RecordWriter.WARP10_ENDPOINT, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_ENDPOINT, ""));
  props.setProperty(Warp10RecordWriter.WARP10_TOKEN, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_TOKEN, ""));
  props.setProperty(Warp10RecordWriter.WARP10_MAXRATE, Warp10InputFormat.getProperty(conf, this.suffix, Warp10RecordWriter.WARP10_MAXRATE, Long.toString(Long.MAX_VALUE)));
  
  return new Warp10RecordWriter(props);
}

Source File: AtomDataTypeHandler.java From datawave with Apache License 2.0

4 votes

@Override
public void setup(TaskAttemptContext context) {
    conf = context.getConfiguration();
    tableName = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_TABLE_NAME, String.class);
    categoryTableName = tableName + "Categories";
    subCategories = new HashMap<>();
    markingFunctions = MarkingFunctions.Factory.createMarkingFunctions();
    
    TypeRegistry.getInstance(context.getConfiguration());
    String[] types = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_TYPES_TO_PROCESS, String[].class);
    // Set up the ingest helpers for the known datatypes.
    
    fieldNames = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_FIELD_NAMES, String[].class);
    // Configuration.getStrings() eats empty values, we don't want to do that. Split it ourselves.
    String aliases = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_FIELD_ALIASES, String.class);
    fieldAliases = StringUtils.split(aliases, ',', true); // keeps empty elements
    String overrides = ConfigurationHelper.isNull(context.getConfiguration(), ATOM_FIELD_VALUE_OVERRIDES, String.class);
    fieldOverrides = StringUtils.split(overrides, ',', true); // keeps empty elements
    
    sCategories = StringUtils.split(ConfigurationHelper.isNull(context.getConfiguration(), ATOM_CATEGORY_SUB_FIELD, String.class), ',', false);
    
    Set<String> tSet;
    for (String s : sCategories) {
        String field_value[] = StringUtils.split(s, ':', false);
        if (field_value.length == 2 && (!Strings.isNullOrEmpty(field_value[0]) && !Strings.isNullOrEmpty(field_value[1]))) {
            
            if (!subCategories.containsKey(field_value[0])) {
                
                tSet = new HashSet<>();
                
            } else {
                
                tSet = subCategories.get(field_value[0]);
                
            }
            
            System.err.println("Value: " + field_value[0] + " " + field_value[1]);
            tSet.add(field_value[1]);
            subCategories.put(field_value[0], tSet);
            
        }
        
    }
    
    // Make sure these 3 arrays are all the same size.
    if (fieldNames.length != fieldAliases.length && fieldNames.length != fieldOverrides.length) {
        throw new IllegalArgumentException("AtomDataTypeHandler, configured fieldNames, fieldAliases, and fieldOverrides are different lengtsh.  "
                        + "Please fix the configuration. " + fieldNames.length + "," + fieldAliases.length + "," + fieldOverrides.length);
    }
}

Source File: KeyValueInputFormat.java From marklogic-contentpump with Apache License 2.0

4 votes

@Override
public RecordReader<KEYIN, VALUEIN> createRecordReader(InputSplit split,
        TaskAttemptContext context) 
throws IOException, InterruptedException {
    return new KeyValueReader<KEYIN, VALUEIN>(context.getConfiguration());
}

Source File: GryoOutputFormat.java From tinkerpop with Apache License 2.0

4 votes

@Override
public RecordWriter<NullWritable, VertexWritable> getRecordWriter(final TaskAttemptContext job) throws IOException, InterruptedException {
    return new GryoRecordWriter(getDataOutputStream(job), job.getConfiguration());
}

Source File: RowOutputFormat.java From gemfirexd-oss with Apache License 2.0

4 votes

/**
 * {@inheritDoc}
 */
@Override
public RecordWriter<Key, VALUE> getRecordWriter(TaskAttemptContext context)
    throws IOException {
  return new GfxdRecordWriter(context.getConfiguration());
}

Source File: RDFReader.java From marklogic-contentpump with Apache License 2.0

4 votes

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    if (version == null)
        throw new IOException("Server Version is null");
    String majorVersion = version.substring(0, version.indexOf('.'));
    graphSupported = Integer.valueOf(majorVersion) >= 8;
    conf = context.getConfiguration();

    String rdfopt = conf.get(ConfigConstants.RDF_STREAMING_MEMORY_THRESHOLD);
    if (rdfopt != null) {
        INMEMORYTHRESHOLD = Long.parseLong(rdfopt);
    }

    rdfopt = conf.get(ConfigConstants.RDF_TRIPLES_PER_DOCUMENT);
    if (rdfopt != null) {
        MAXTRIPLESPERDOCUMENT = Integer.parseInt(rdfopt);
    }

    String fnAsColl = conf.get(ConfigConstants.CONF_OUTPUT_FILENAME_AS_COLLECTION);
    if (fnAsColl != null) {
        LOG.warn("The -filename_as_collection has no effect with input_type RDF, use -output_collections instead.");
    }

    String[] collections = conf.getStrings(MarkLogicConstants.OUTPUT_COLLECTION);
    outputGraph = conf.get(MarkLogicConstants.OUTPUT_GRAPH);
    outputOverrideGraph = conf.get(MarkLogicConstants.OUTPUT_OVERRIDE_GRAPH);
    //if no defulat-graph set and output_collections is set
    ignoreCollectionQuad = (outputGraph == null && collections != null)
        || outputOverrideGraph != null;
    hasOutputCol = (collections != null);

    Class<? extends Writable> valueClass = RDFWritable.class;

    @SuppressWarnings("unchecked")
    VALUEIN localValue = (VALUEIN) ReflectionUtils.newInstance(valueClass, 
            conf);

    value = localValue;
    encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
            DEFAULT_ENCODING);

    setFile(((FileSplit) inSplit).getPath());
    fs = file.getFileSystem(context.getConfiguration());
    
    FileStatus status = fs.getFileStatus(file);
    if(status.isDirectory()) {
        iterator = new FileIterator((FileSplit)inSplit, context);
        inSplit = iterator.next();
    }

    try {
        initStream(inSplit);
    } catch (IOException e ){
        LOG.error("Invalid input: " + file.getName() + ": " + e.getMessage());
        throw e;
    }
    String[] perms = conf.getStrings(MarkLogicConstants.OUTPUT_PERMISSION);
    if(perms!=null) {
        defaultPerms = PermissionUtil.getPermissions(perms).toArray(
            new ContentPermission[perms.length>>1]);
    } else {
        List<ContentPermission> tmp = PermissionUtil.getDefaultPermissions(conf,roleMap);
        if(tmp!=null)
            defaultPerms = tmp.toArray(new ContentPermission[tmp.size()]);
    }
        
    if (roleMapExists) 
        initExistingMapPerms();
}

Source File: TreeMergeOutputFormat.java From examples with Apache License 2.0

4 votes

@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }

Java Code Examples for org.apache.hadoop.mapreduce.TaskAttemptContext#getConfiguration()