Java Code Examples for org.apache.hadoop.mapred.Reporter#progress()
The following examples show how to use
org.apache.hadoop.mapred.Reporter#progress() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CqlInputFormat.java From stratio-cassandra with Apache License 2.0 | 6 votes |
public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID))) { @Override public void progress() { reporter.progress(); } }; CqlRecordReader recordReader = new CqlRecordReader(); recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit)split, tac); return recordReader; }
Example 2
Source File: HadoopArchives.java From RDFS with Apache License 2.0 | 6 votes |
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<Text, Text> out, Reporter reporter) throws IOException { keyVal = key.get(); while(values.hasNext()) { Text value = values.next(); String towrite = value.toString() + "\n"; indexStream.write(towrite.getBytes()); written++; if (written > numIndexes -1) { // every 1000 indexes we report status reporter.setStatus("Creating index for archives"); reporter.progress(); endIndex = keyVal; String masterWrite = startIndex + " " + endIndex + " " + startPos + " " + indexStream.getPos() + " \n" ; outStream.write(masterWrite.getBytes()); startPos = indexStream.getPos(); startIndex = endIndex; written = 0; } } }
Example 3
Source File: HadoopArchives.java From hadoop with Apache License 2.0 | 6 votes |
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<Text, Text> out, Reporter reporter) throws IOException { keyVal = key.get(); while(values.hasNext()) { Text value = values.next(); String towrite = value.toString() + "\n"; indexStream.write(towrite.getBytes(Charsets.UTF_8)); written++; if (written > numIndexes -1) { // every 1000 indexes we report status reporter.setStatus("Creating index for archives"); reporter.progress(); endIndex = keyVal; String masterWrite = startIndex + " " + endIndex + " " + startPos + " " + indexStream.getPos() + " \n" ; outStream.write(masterWrite.getBytes(Charsets.UTF_8)); startPos = indexStream.getPos(); startIndex = endIndex; written = 0; } } }
Example 4
Source File: HiveDynamoDBInputFormat.java From emr-dynamodb-connector with Apache License 2.0 | 6 votes |
@Override public RecordReader<Text, DynamoDBItemWritable> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException { reporter.progress(); Map<String, String> columnMapping = HiveDynamoDBUtil.fromJsonString(conf.get(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING)); Map<String, String> hiveTypeMapping = HiveDynamoDBUtil.extractHiveTypeMapping(conf); DynamoDBQueryFilter queryFilter = getQueryFilter(conf, columnMapping, hiveTypeMapping); DynamoDBSplit bbSplit = (DynamoDBSplit) split; bbSplit.setDynamoDBFilterPushdown(queryFilter); Collection<String> attributes = (columnMapping == null ? null : columnMapping.values()); DynamoDBRecordReaderContext context = buildHiveDynamoDBRecordReaderContext(bbSplit, conf, reporter, attributes); return new DefaultDynamoDBRecordReader(context); }
Example 5
Source File: HadoopArchives.java From big-c with Apache License 2.0 | 6 votes |
public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<Text, Text> out, Reporter reporter) throws IOException { keyVal = key.get(); while(values.hasNext()) { Text value = values.next(); String towrite = value.toString() + "\n"; indexStream.write(towrite.getBytes(Charsets.UTF_8)); written++; if (written > numIndexes -1) { // every 1000 indexes we report status reporter.setStatus("Creating index for archives"); reporter.progress(); endIndex = keyVal; String masterWrite = startIndex + " " + endIndex + " " + startPos + " " + indexStream.getPos() + " \n" ; outStream.write(masterWrite.getBytes(Charsets.UTF_8)); startPos = indexStream.getPos(); startIndex = endIndex; written = 0; } } }
Example 6
Source File: HadoopArchives.java From big-c with Apache License 2.0 | 5 votes |
public void map(LongWritable key, HarEntry value, OutputCollector<IntWritable, Text> out, Reporter reporter) throws IOException { Path relPath = new Path(value.path); int hash = HarFileSystem.getHarHash(relPath); String towrite = null; Path srcPath = realPath(relPath, rootPath); long startPos = partStream.getPos(); FileSystem srcFs = srcPath.getFileSystem(conf); FileStatus srcStatus = srcFs.getFileStatus(srcPath); String propStr = encodeProperties(srcStatus); if (value.isDir()) { towrite = encodeName(relPath.toString()) + " dir " + propStr + " 0 0 "; StringBuffer sbuff = new StringBuffer(); sbuff.append(towrite); for (String child: value.children) { sbuff.append(encodeName(child) + " "); } towrite = sbuff.toString(); //reading directories is also progress reporter.progress(); } else { FSDataInputStream input = srcFs.open(srcStatus.getPath()); reporter.setStatus("Copying file " + srcStatus.getPath() + " to archive."); copyData(srcStatus.getPath(), input, partStream, reporter); towrite = encodeName(relPath.toString()) + " file " + partname + " " + startPos + " " + srcStatus.getLen() + " " + propStr + " "; } out.collect(new IntWritable(hash), new Text(towrite)); }
Example 7
Source File: HadoopArchives.java From RDFS with Apache License 2.0 | 5 votes |
public void map(LongWritable key, HarEntry value, OutputCollector<IntWritable, Text> out, Reporter reporter) throws IOException { Path relPath = new Path(value.path); int hash = HarFileSystem.getHarHash(relPath); String towrite = null; Path srcPath = realPath(relPath, rootPath); long startPos = partStream.getPos(); FileSystem srcFs = srcPath.getFileSystem(conf); FileStatus srcStatus = srcFs.getFileStatus(srcPath); String propStr = URLEncoder.encode( srcStatus.getModificationTime() + " " + srcStatus.getAccessTime() + " " + srcStatus.getPermission().toShort() + " " + URLEncoder.encode(srcStatus.getOwner(), "UTF-8") + " " + URLEncoder.encode(srcStatus.getGroup(), "UTF-8"), "UTF-8"); if (value.isDir()) { towrite = URLEncoder.encode(relPath.toString(),"UTF-8") + " dir " + propStr + " 0 0 "; StringBuffer sbuff = new StringBuffer(); sbuff.append(towrite); for (String child: value.children) { sbuff.append(URLEncoder.encode(child,"UTF-8") + " "); } towrite = sbuff.toString(); //reading directories is also progress reporter.progress(); } else { FSDataInputStream input = srcFs.open(srcStatus.getPath()); reporter.setStatus("Copying file " + srcStatus.getPath() + " to archive."); copyData(srcStatus.getPath(), input, partStream, reporter); towrite = URLEncoder.encode(relPath.toString(),"UTF-8") + " file " + partname + " " + startPos + " " + srcStatus.getLen() + " " + propStr + " "; } out.collect(new IntWritable(hash), new Text(towrite)); }
Example 8
Source File: HadoopArchives.java From RDFS with Apache License 2.0 | 5 votes |
public void copyData(Path input, FSDataInputStream fsin, FSDataOutputStream fout, Reporter reporter) throws IOException { try { for (int cbread=0; (cbread = fsin.read(buffer))>= 0;) { fout.write(buffer, 0,cbread); reporter.progress(); } } finally { fsin.close(); } }
Example 9
Source File: UDFWithOptions.java From incubator-hivemall with Apache License 2.0 | 5 votes |
protected static void reportProgress(@Nonnull Reporter reporter) { if (reporter != null) { synchronized (reporter) { reporter.progress(); } } }
Example 10
Source File: UDTFWithOptions.java From incubator-hivemall with Apache License 2.0 | 5 votes |
protected static void reportProgress(@Nullable Reporter reporter) { if (reporter != null) { synchronized (reporter) { reporter.progress(); } } }
Example 11
Source File: DynamoDBFibonacciRetryer.java From emr-dynamodb-connector with Apache License 2.0 | 5 votes |
private void incrementRetryCounter(Reporter reporter, PrintCounter retryCounter) { if (reporter != null) { if (retryCounter != null) { reporter.incrCounter(retryCounter.getGroup(), retryCounter.getName(), 1); } else { reporter.progress(); } } }
Example 12
Source File: HadoopArchives.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void map(LongWritable key, Text value, OutputCollector<IntWritable, Text> out, Reporter reporter) throws IOException { String line = value.toString(); MapStat mstat = new MapStat(line); Path srcPath = new Path(mstat.pathname); String towrite = null; Path relPath = makeRelative(srcPath); int hash = HarFileSystem.getHarHash(relPath); long startPos = partStream.getPos(); if (mstat.isDir) { towrite = relPath.toString() + " " + "dir none " + 0 + " " + 0 + " "; StringBuffer sbuff = new StringBuffer(); sbuff.append(towrite); for (String child: mstat.children) { sbuff.append(child + " "); } towrite = sbuff.toString(); //reading directories is also progress reporter.progress(); } else { FileSystem srcFs = srcPath.getFileSystem(conf); FileStatus srcStatus = srcFs.getFileStatus(srcPath); FSDataInputStream input = srcFs.open(srcStatus.getPath()); reporter.setStatus("Copying file " + srcStatus.getPath() + " to archive."); copyData(srcStatus.getPath(), input, partStream, reporter); towrite = relPath.toString() + " file " + partname + " " + startPos + " " + srcStatus.getLen() + " "; } out.collect(new IntWritable(hash), new Text(towrite)); }
Example 13
Source File: HadoopArchives.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void copyData(Path input, FSDataInputStream fsin, FSDataOutputStream fout, Reporter reporter) throws IOException { try { for (int cbread=0; (cbread = fsin.read(buffer))>= 0;) { fout.write(buffer, 0,cbread); reporter.progress(); } } finally { fsin.close(); } }
Example 14
Source File: OnDiskMapOutput.java From hadoop with Apache License 2.0 | 4 votes |
@Override public void shuffle(MapHost host, InputStream input, long compressedLength, long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter) throws IOException { input = new IFileInputStream(input, compressedLength, conf); // Copy data to local-disk long bytesLeft = compressedLength; try { final int BYTES_TO_READ = 64 * 1024; byte[] buf = new byte[BYTES_TO_READ]; while (bytesLeft > 0) { int n = ((IFileInputStream)input).readWithChecksum(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ)); if (n < 0) { throw new IOException("read past end of stream reading " + getMapId()); } disk.write(buf, 0, n); bytesLeft -= n; metrics.inputBytes(n); reporter.progress(); } LOG.info("Read " + (compressedLength - bytesLeft) + " bytes from map-output for " + getMapId()); disk.close(); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input, disk); // Re-throw throw ioe; } // Sanity check if (bytesLeft != 0) { throw new IOException("Incomplete map output received for " + getMapId() + " from " + host.getHostName() + " (" + bytesLeft + " bytes missing of " + compressedLength + ")"); } this.compressedSize = compressedLength; }
Example 15
Source File: Loops.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
/** * Performs a single loop pass looking for loop cycles within routes. If * This is not the last loop cycle then url will be mapped for further * passes. */ public void reduce(Text key, Iterator<ObjectWritable> values, OutputCollector<Text, Route> output, Reporter reporter) throws IOException { List<Route> routeList = new ArrayList<Route>(); Set<String> outlinkUrls = new LinkedHashSet<String>(); int numValues = 0; // aggregate all routes and outlinks for a given url while (values.hasNext()) { ObjectWritable next = values.next(); Object value = next.get(); if (value instanceof Route) { routeList.add(WritableUtils.clone((Route)value, conf)); } else if (value instanceof Text) { String outlinkUrl = ((Text)value).toString(); if (!outlinkUrls.contains(outlinkUrl)) { outlinkUrls.add(outlinkUrl); } } // specify progress, could be a lot of routes numValues++; if (numValues % 100 == 0) { reporter.progress(); } } // loop through the route list Iterator<Route> routeIt = routeList.listIterator(); while (routeIt.hasNext()) { // removing the route for space concerns, could be a lot of routes // if the route is already found, meaning it is a loop just collect it // urls with no outlinks that are not found will fall off Route route = routeIt.next(); routeIt.remove(); if (route.isFound()) { output.collect(key, route); } else { // if the route start url is found, set route to found and collect String lookingFor = route.getLookingFor(); if (outlinkUrls.contains(lookingFor)) { route.setFound(true); output.collect(key, route); } else if (!last) { // setup for next pass through the loop for (String outlink : outlinkUrls) { output.collect(new Text(outlink), route); } } } } }
Example 16
Source File: TestShufflePlugin.java From big-c with Apache License 2.0 | 4 votes |
@Test /** * A testing method verifying availability and accessibility of API that is needed * for sub-classes of ShuffleConsumerPlugin */ public void testConsumerApi() { JobConf jobConf = new JobConf(); ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>(); //mock creation ReduceTask mockReduceTask = mock(ReduceTask.class); TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class); Reporter mockReporter = mock(Reporter.class); FileSystem mockFileSystem = mock(FileSystem.class); Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = jobConf.getCombinerClass(); @SuppressWarnings("unchecked") // needed for mock with generic CombineOutputCollector<K, V> mockCombineOutputCollector = (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class); org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = mock(org.apache.hadoop.mapreduce.TaskAttemptID.class); LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class); CompressionCodec mockCompressionCodec = mock(CompressionCodec.class); Counter mockCounter = mock(Counter.class); TaskStatus mockTaskStatus = mock(TaskStatus.class); Progress mockProgress = mock(Progress.class); MapOutputFile mockMapOutputFile = mock(MapOutputFile.class); Task mockTask = mock(Task.class); try { String [] dirs = jobConf.getLocalDirs(); // verify that these APIs are available through super class handler ShuffleConsumerPlugin.Context<K, V> context = new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem, mockUmbilical, mockLocalDirAllocator, mockReporter, mockCompressionCodec, combinerClass, mockCombineOutputCollector, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockCounter, mockTaskStatus, mockProgress, mockProgress, mockTask, mockMapOutputFile, null); shuffleConsumerPlugin.init(context); shuffleConsumerPlugin.run(); shuffleConsumerPlugin.close(); } catch (Exception e) { assertTrue("Threw exception:" + e, false); } // verify that these APIs are available for 3rd party plugins mockReduceTask.getTaskID(); mockReduceTask.getJobID(); mockReduceTask.getNumMaps(); mockReduceTask.getPartition(); mockReporter.progress(); }
Example 17
Source File: OnDiskMapOutput.java From big-c with Apache License 2.0 | 4 votes |
@Override public void shuffle(MapHost host, InputStream input, long compressedLength, long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter) throws IOException { input = new IFileInputStream(input, compressedLength, conf); // Copy data to local-disk long bytesLeft = compressedLength; try { final int BYTES_TO_READ = 64 * 1024; byte[] buf = new byte[BYTES_TO_READ]; while (bytesLeft > 0) { int n = ((IFileInputStream)input).readWithChecksum(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ)); if (n < 0) { throw new IOException("read past end of stream reading " + getMapId()); } disk.write(buf, 0, n); bytesLeft -= n; metrics.inputBytes(n); reporter.progress(); } LOG.info("Read " + (compressedLength - bytesLeft) + " bytes from map-output for " + getMapId()); disk.close(); } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input, disk); // Re-throw throw ioe; } // Sanity check if (bytesLeft != 0) { throw new IOException("Incomplete map output received for " + getMapId() + " from " + host.getHostName() + " (" + bytesLeft + " bytes missing of " + compressedLength + ")"); } this.compressedSize = compressedLength; }
Example 18
Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0 | 4 votes |
@Override public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter) throws IOException { HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split; List<String> columns = AbstractColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping()); isTransposed = AbstractColumnSerDe.isTransposed(columns); List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf); if (columns.size() < readColIDs.size()) { throw new IOException("Cannot read more columns than the given table contains."); } org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit(); Job job = new Job(jobConf); TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) { @Override public void progress() { reporter.progress(); } }; SlicePredicate predicate = new SlicePredicate(); if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) { SliceRange range = new SliceRange(); AbstractType comparator = BytesType.instance; String comparatorType = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR); if (comparatorType != null && !comparatorType.equals("")) { try { comparator = TypeParser.parse(comparatorType); } catch (Exception ex) { throw new IOException("Comparator class not found."); } } String sliceStart = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START); String sliceEnd = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH); String reversed = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED); range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart)); range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd)); range.setReversed(reversed == null ? false : reversed.equals("true")); range.setCount(cassandraSplit.getSlicePredicateSize()); predicate.setSlice_range(range); } else { int iKey = columns.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN); predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs)); } try { ConfigHelper.setInputColumnFamily(tac.getConfiguration(), cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily()); ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate); ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize()); ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + ""); ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost()); ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner()); // Set Split Size ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize()); CassandraHiveRecordReader rr = null; if(isTransposed && tac.getConfiguration().getBoolean(AbstractColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) { rr = new CassandraHiveRecordReader(new ColumnFamilyWideRowRecordReader(), isTransposed); } else { rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed); } rr.initialize(cfSplit, tac); return rr; } catch (Exception ie) { throw new IOException(ie); } }
Example 19
Source File: ArcSegmentCreator.java From anthelion with Apache License 2.0 | 4 votes |
/** * <p>Runs the Map job to translate an arc record into output for Nutch * segments.</p> * * @param key The arc record header. * @param bytes The arc record raw content bytes. * @param output The output collecter. * @param reporter The progress reporter. */ public void map(Text key, BytesWritable bytes, OutputCollector<Text, NutchWritable> output, Reporter reporter) throws IOException { String[] headers = key.toString().split("\\s+"); String urlStr = headers[0]; String version = headers[2]; String contentType = headers[3]; // arcs start with a file description. for now we ignore this as it is not // a content record if (urlStr.startsWith("filedesc://")) { LOG.info("Ignoring file header: " + urlStr); return; } LOG.info("Processing: " + urlStr); // get the raw bytes from the arc file, create a new crawldatum Text url = new Text(); CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_DB_FETCHED, interval, 1.0f); String segmentName = getConf().get(Nutch.SEGMENT_NAME_KEY); // normalize and filter the urls try { urlStr = normalizers.normalize(urlStr, URLNormalizers.SCOPE_FETCHER); urlStr = urlFilters.filter(urlStr); // filter the url } catch (Exception e) { if (LOG.isWarnEnabled()) { LOG.warn("Skipping " + url + ":" + e); } urlStr = null; } // if still a good url then process if (urlStr != null) { url.set(urlStr); try { // set the protocol status to success and the crawl status to success // create the content from the normalized url and the raw bytes from // the arc file, TODO: currently this doesn't handle text of errors // pages (i.e. 404, etc.). We assume we won't get those. ProtocolStatus status = ProtocolStatus.STATUS_SUCCESS; Content content = new Content(urlStr, urlStr, bytes.getBytes(), contentType, new Metadata(), getConf()); // set the url version into the metadata content.getMetadata().set(URL_VERSION, version); ParseStatus pstatus = null; pstatus = output(output, segmentName, url, datum, content, status, CrawlDatum.STATUS_FETCH_SUCCESS); reporter.progress(); } catch (Throwable t) { // unexpected exception logError(url, t); output(output, segmentName, url, datum, null, null, CrawlDatum.STATUS_FETCH_RETRY); } } }
Example 20
Source File: InMemoryMapOutput.java From hadoop with Apache License 2.0 | 4 votes |
@Override public void shuffle(MapHost host, InputStream input, long compressedLength, long decompressedLength, ShuffleClientMetrics metrics, Reporter reporter) throws IOException { IFileInputStream checksumIn = new IFileInputStream(input, compressedLength, conf); input = checksumIn; // Are map-outputs compressed? if (codec != null) { decompressor.reset(); input = codec.createInputStream(input, decompressor); } try { IOUtils.readFully(input, memory, 0, memory.length); metrics.inputBytes(memory.length); reporter.progress(); LOG.info("Read " + memory.length + " bytes from map-output for " + getMapId()); /** * We've gotten the amount of data we were expecting. Verify the * decompressor has nothing more to offer. This action also forces the * decompressor to read any trailing bytes that weren't critical * for decompression, which is necessary to keep the stream * in sync. */ if (input.read() >= 0 ) { throw new IOException("Unexpected extra bytes from input stream for " + getMapId()); } } catch (IOException ioe) { // Close the streams IOUtils.cleanup(LOG, input); // Re-throw throw ioe; } finally { CodecPool.returnDecompressor(decompressor); } }