Java Code Examples for org.apache.hadoop.io.Text#getLength()
The following examples show how to use
org.apache.hadoop.io.Text#getLength() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RangeKeyDistributionMapper.java From Kylin with Apache License 2.0 | 6 votes |
@Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { lastKey = key; int bytesLength = key.getLength() + value.getLength(); bytesRead += bytesLength; if (bytesRead >= ONE_MEGA_BYTES) { outputValue.set(bytesRead); context.write(key, outputValue); // reset bytesRead bytesRead = 0; } }
Example 2
Source File: QseqInputFormat.java From Hadoop-BAM with MIT License | 6 votes |
private void setFieldPositionsAndLengths(Text line) { int pos = 0; // the byte position within the record int fieldno = 0; // the field index within the record while (pos < line.getLength() && fieldno < NUM_QSEQ_COLS) // iterate over each field { int endpos = line.find(Delim, pos); // the field's end position if (endpos < 0) endpos = line.getLength(); fieldPositions[fieldno] = pos; fieldLengths[fieldno] = endpos - pos; pos = endpos + 1; // the next starting position is the current end + 1 fieldno += 1; } if (fieldno != NUM_QSEQ_COLS) throw new FormatException("found " + fieldno + " fields instead of 11 at " + makePositionMessage(this.pos - line.getLength()) + ". Line: " + line); }
Example 3
Source File: AccumuloUtils.java From mrgeo with Apache License 2.0 | 6 votes |
/** * Convert a Text object of a tileId to a back to a long. * * @param rowId Text object to convert. * @return the long value from the Text object. */ public static long toLong(Text rowId) { byte[] outB = new byte[8]; for (int x = 0; x < outB.length; x++) { if (x >= rowId.getLength()) { outB[x] = 0x0; } else { outB[x] = rowId.getBytes()[x]; } } return ByteBuffer.wrap(outB).getLong(); }
Example 4
Source File: ShardedTableTabletBalancer.java From datawave with Apache License 2.0 | 6 votes |
private byte[] retrieveDate(KeyExtent extent) { Text endRow = extent.getEndRow(); if (endRow == null) endRow = extent.getPrevEndRow(); if (endRow == null) { log.warn("Attempting to retrieve date from empty extent " + extent + ". Is your sharded table pre-split?"); return "null".getBytes(); } else { int idx = endRow.find("_"); if (idx <= 0) { idx = endRow.getLength(); log.warn("Extent " + extent + " does not conform to sharded date scheme yyyyMMdd_num"); } return Arrays.copyOf(endRow.getBytes(), idx); } }
Example 5
Source File: TeraSort.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** * Given a sorted set of cut points, build a trie that will find the correct * partition quickly. * @param splits the list of cut points * @param lower the lower bound of partitions 0..numPartitions-1 * @param upper the upper bound of partitions 0..numPartitions-1 * @param prefix the prefix that we have already checked against * @param maxDepth the maximum depth we will build a trie for * @return the trie node that will divide the splits correctly */ private static TrieNode buildTrie(Text[] splits, int lower, int upper, Text prefix, int maxDepth) { int depth = prefix.getLength(); if (depth >= maxDepth || lower == upper) { return new LeafTrieNode(depth, splits, lower, upper); } InnerTrieNode result = new InnerTrieNode(depth); Text trial = new Text(prefix); // append an extra byte on to the prefix trial.append(new byte[1], 0, 1); int currentBound = lower; for(int ch = 0; ch < 255; ++ch) { trial.getBytes()[depth] = (byte) (ch + 1); lower = currentBound; while (currentBound < upper) { if (splits[currentBound].compareTo(trial) >= 0) { break; } currentBound += 1; } trial.getBytes()[depth] = (byte) ch; result.child[ch] = buildTrie(splits, lower, currentBound, trial, maxDepth); } // pick up the rest trial.getBytes()[depth] = 127; result.child[255] = buildTrie(splits, currentBound, upper, trial, maxDepth); return result; }
Example 6
Source File: ToISO8601UDF.java From occurrence with Apache License 2.0 | 5 votes |
public Text evaluate(Text field) { if (field == null || field.getLength() == 0) { return null; } else { try { text.set(DownloadUtils.ISO_8601_ZONED.format(Instant.ofEpochMilli(Long.parseLong(field.toString())).atZone(ZoneOffset.UTC))); return text; } catch (NumberFormatException e) { return null; } } }
Example 7
Source File: EventDataScanNestedIterator.java From datawave with Apache License 2.0 | 5 votes |
boolean isEventKey(Key k) { Text cf = k.getColumnFamily(); return cf.getLength() > 0 && cf.find("\u0000") != -1 && !((cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == 0) || (cf.getLength() == 1 && cf.charAt(0) == 'd') || (cf .getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f')); }
Example 8
Source File: XmlSerDe.java From Hive-XML-SerDe with Apache License 2.0 | 5 votes |
/** * @see org.apache.hadoop.hive.serde2.Deserializer#deserialize(org.apache.hadoop.io.Writable) */ @Override public Object deserialize(Writable writable) throws SerDeException { Text text = (Text) writable; if (text == null || text.getLength() == 0) { return (Object) null; } try { return this.xmlProcessor.parse(text.toString()); } catch (Exception e) { throw new SerDeException(e); } }
Example 9
Source File: TeraSort.java From RDFS with Apache License 2.0 | 5 votes |
int findPartition(Text key) { int level = getLevel(); if (key.getLength() <= level) { return child[0].findPartition(key); } return child[key.getBytes()[level]].findPartition(key); }
Example 10
Source File: TopKDataGen.java From sequenceiq-samples with Apache License 2.0 | 5 votes |
@Override public void run() throws Exception { KeyValueWriter streamOutputWriter = (KeyValueWriter) getOutputs().get(OUTPUT).getWriter(); long sizeLarge = 0; while (sizeLarge < streamOutputFileSize) { String str = createRowString(); Text text = new Text(str); int size = text.getLength(); streamOutputWriter.write(text, NullWritable.get()); sizeLarge += size; } }
Example 11
Source File: DeflateUDF.java From incubator-hivemall with Apache License 2.0 | 5 votes |
@Override public BytesWritable evaluate(DeferredObject[] arguments) throws HiveException { if (codec == null) { this.codec = new DeflateCodec(true, false); } Object arg0 = arguments[0].get(); if (arg0 == null) { return null; } Text text = stringOI.getPrimitiveWritableObject(arg0); byte[] original = text.getBytes(); final int len = text.getLength(); final byte[] compressed; try { compressed = codec.compress(original, 0, len, compressionLevel); } catch (IOException e) { throw new HiveException("Failed to compress", e); } original = null; if (result == null) { this.result = new BytesWritable(compressed); } else { result.set(compressed, 0, compressed.length); } return result; }
Example 12
Source File: LineRecordReader.java From hadoop with Apache License 2.0 | 5 votes |
private int skipUtfByteOrderMark(Text value) throws IOException { // Strip BOM(Byte Order Mark) // Text only support UTF-8, we only need to check UTF-8 BOM // (0xEF,0xBB,0xBF) at the start of the text stream. int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, Integer.MAX_VALUE); int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos)); // Even we read 3 extra bytes for the first line, // we won't alter existing behavior (no backwards incompat issue). // Because the newSize is less than maxLineLength and // the number of bytes copied to Text is always no more than newSize. // If the return size from readLine is not less than maxLineLength, // we will discard the current line and read the next line. pos += newSize; int textLength = value.getLength(); byte[] textBytes = value.getBytes(); if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) && (textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) { // find UTF-8 BOM, strip it. LOG.info("Found UTF-8 BOM and skipped it"); textLength -= 3; newSize -= 3; if (textLength > 0) { // It may work to use the same buffer and not do the copyBytes textBytes = value.copyBytes(); value.set(textBytes, 3, textLength); } else { value.clear(); } } return newSize; }
Example 13
Source File: ShardedTableTabletBalancer.java From datawave with Apache License 2.0 | 5 votes |
@Override public String apply(KeyExtent extent) { String date = "null"; // Don't return null if (extent != null) { Text endRow = extent.getEndRow(); if (endRow != null) { int sepIdx = endRow.find("_"); if (sepIdx < 0) sepIdx = endRow.getLength(); date = new String(endRow.getBytes(), 0, sepIdx); } } return date; }
Example 14
Source File: TeraSort.java From RDFS with Apache License 2.0 | 5 votes |
/** * Given a sorted set of cut points, build a trie that will find the correct * partition quickly. * @param splits the list of cut points * @param lower the lower bound of partitions 0..numPartitions-1 * @param upper the upper bound of partitions 0..numPartitions-1 * @param prefix the prefix that we have already checked against * @param maxDepth the maximum depth we will build a trie for * @return the trie node that will divide the splits correctly */ private static TrieNode buildTrie(Text[] splits, int lower, int upper, Text prefix, int maxDepth) { int depth = prefix.getLength(); if (depth >= maxDepth || lower == upper) { return new LeafTrieNode(depth, splits, lower, upper); } InnerTrieNode result = new InnerTrieNode(depth); Text trial = new Text(prefix); // append an extra byte on to the prefix trial.append(new byte[1], 0, 1); int currentBound = lower; for(int ch = 0; ch < 255; ++ch) { trial.getBytes()[depth] = (byte) (ch + 1); lower = currentBound; while (currentBound < upper) { if (splits[currentBound].compareTo(trial) >= 0) { break; } currentBound += 1; } trial.getBytes()[depth] = (byte) ch; result.child[ch] = buildTrie(splits, lower, currentBound, trial, maxDepth); } // pick up the rest trial.getBytes()[depth] = 127; result.child[255] = buildTrie(splits, currentBound, upper, trial, maxDepth); return result; }
Example 15
Source File: TreeExportUDF.java From incubator-hivemall with Apache License 2.0 | 5 votes |
@Nonnull public Text export(@Nonnull Text model, @Nullable String[] featureNames, @Nullable String[] classNames) throws HiveException { int length = model.getLength(); byte[] b = model.getBytes(); b = Base91.decode(b, 0, length); final String exported; if (regression) { exported = exportRegressor(b, featureNames); } else { exported = exportClassifier(b, featureNames, classNames); } return new Text(exported); }
Example 16
Source File: RandomTextWriterJob.java From big-c with Apache License 2.0 | 5 votes |
/** * Given an output filename, write a bunch of random records to it. */ public void map(Text key, Text value, Context context) throws IOException,InterruptedException { int itemCount = 0; while (numBytesToWrite > 0) { // Generate the key/value int noWordsKey = minWordsInKey + (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0); int noWordsValue = minWordsInValue + (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0); Text keyWords = generateSentence(noWordsKey); Text valueWords = generateSentence(noWordsValue); // Write the sentence context.write(keyWords, valueWords); numBytesToWrite -= (keyWords.getLength() + valueWords.getLength()); // Update counters, progress etc. context.getCounter(Counters.BYTES_WRITTEN).increment( keyWords.getLength() + valueWords.getLength()); context.getCounter(Counters.RECORDS_WRITTEN).increment(1); if (++itemCount % 200 == 0) { context.setStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left."); } } context.setStatus("done with " + itemCount + " records."); }
Example 17
Source File: RandomTextWriter.java From tez with Apache License 2.0 | 5 votes |
/** * Given an output filename, write a bunch of random records to it. */ public void map(Text key, Text value, Context context) throws IOException,InterruptedException { int itemCount = 0; while (numBytesToWrite > 0) { // Generate the key/value int noWordsKey = minWordsInKey + (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0); int noWordsValue = minWordsInValue + (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0); Text keyWords = generateSentence(noWordsKey); Text valueWords = generateSentence(noWordsValue); // Write the sentence context.write(keyWords, valueWords); numBytesToWrite -= (keyWords.getLength() + valueWords.getLength()); // Update counters, progress etc. context.getCounter(Counters.BYTES_WRITTEN).increment( keyWords.getLength() + valueWords.getLength()); context.getCounter(Counters.RECORDS_WRITTEN).increment(1); if (++itemCount % 200 == 0) { context.setStatus("wrote record " + itemCount + ". " + numBytesToWrite + " bytes left."); } } context.setStatus("done with " + itemCount + " records."); }
Example 18
Source File: PigStorage.java From spork with Apache License 2.0 | 4 votes |
@Override public Tuple getNext() throws IOException { mProtoTuple = new ArrayList<Object>(); if (!mRequiredColumnsInitialized) { if (signature!=null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); mRequiredColumns = (boolean[])ObjectSerializer.deserialize(p.getProperty(signature)); } mRequiredColumnsInitialized = true; } //Prepend input source path if source tagging is enabled if(tagFile) { mProtoTuple.add(new DataByteArray(sourcePath.getName())); } else if (tagPath) { mProtoTuple.add(new DataByteArray(sourcePath.toString())); } try { boolean notDone = in.nextKeyValue(); if (!notDone) { return null; } Text value = (Text) in.getCurrentValue(); byte[] buf = value.getBytes(); int len = value.getLength(); int start = 0; int fieldID = 0; for (int i = 0; i < len; i++) { if (buf[i] == fieldDel) { if (mRequiredColumns==null || (mRequiredColumns.length>fieldID && mRequiredColumns[fieldID])) addTupleValue(mProtoTuple, buf, start, i); start = i + 1; fieldID++; } } // pick up the last field if (start <= len && (mRequiredColumns==null || (mRequiredColumns.length>fieldID && mRequiredColumns[fieldID]))) { addTupleValue(mProtoTuple, buf, start, len); } Tuple t = mTupleFactory.newTupleNoCopy(mProtoTuple); return dontLoadSchema ? t : applySchema(t); } catch (InterruptedException e) { int errCode = 6018; String errMsg = "Error while reading input"; throw new ExecException(errMsg, errCode, PigException.REMOTE_ENVIRONMENT, e); } }
Example 19
Source File: QseqInputFormat.java From Hadoop-BAM with MIT License | 4 votes |
private void scanQseqLine(Text line, Text key, SequencedFragment fragment) { setFieldPositionsAndLengths(line); // Build the key. We concatenate all fields from 0 to 5 (machine to y-pos) // and then the read number, replacing the tabs with colons. key.clear(); // append up and including field[5] key.append(line.getBytes(), 0, fieldPositions[5] + fieldLengths[5]); // replace tabs with : byte[] bytes = key.getBytes(); int temporaryEnd = key.getLength(); for (int i = 0; i < temporaryEnd; ++i) if (bytes[i] == '\t') bytes[i] = ':'; // append the read number key.append(line.getBytes(), fieldPositions[7] - 1, fieldLengths[7] + 1); // +/- 1 to catch the preceding tab. // convert the tab preceding the read number into a : key.getBytes()[temporaryEnd] = ':'; // now the fragment try { fragment.clear(); fragment.setInstrument( Text.decode(line.getBytes(), fieldPositions[0], fieldLengths[0]) ); fragment.setRunNumber( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[1], fieldLengths[1])) ); //fragment.setFlowcellId(); fragment.setLane( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[2], fieldLengths[2])) ); fragment.setTile( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[3], fieldLengths[3])) ); fragment.setXpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[4], fieldLengths[4])) ); fragment.setYpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[5], fieldLengths[5])) ); fragment.setRead( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[7], fieldLengths[7])) ); fragment.setFilterPassed( line.getBytes()[fieldPositions[10]] != '0' ); //fragment.setControlNumber(); if (fieldLengths[6] > 0 && line.getBytes()[fieldPositions[6]] == '0') // 0 is a null index sequence fragment.setIndexSequence(null); else fragment.setIndexSequence(Text.decode(line.getBytes(), fieldPositions[6], fieldLengths[6]).replace('.', 'N')); } catch (CharacterCodingException e) { throw new FormatException("Invalid character format at " + makePositionMessage(this.pos - line.getLength()) + "; line: " + line); } fragment.getSequence().append(line.getBytes(), fieldPositions[8], fieldLengths[8]); fragment.getQuality().append(line.getBytes(), fieldPositions[9], fieldLengths[9]); }
Example 20
Source File: ShardedDataTypeHandler.java From datawave with Apache License 2.0 | 3 votes |
/** * Create Key from input parameters * * For global index keys, the granularity of the timestamp is to the millisecond, where the semantics of the index record is to the day. This makes * MapReduce unable to reduce all index keys together unless they occurred at the same millisecond. If we truncate the timestamp to the day, we should * reduce the number of keys output from a job. * * @param row * @param colf * @param colq * @param vis * @param ts * @param delete * @return Accumulo Key object */ protected Key createIndexKey(byte[] row, Text colf, Text colq, byte[] vis, long ts, boolean delete) { // Truncate the timestamp to the day long tsToDay = (ts / MS_PER_DAY) * MS_PER_DAY; Key k = new Key(row, 0, row.length, colf.getBytes(), 0, colf.getLength(), colq.getBytes(), 0, colq.getLength(), vis, 0, vis.length, tsToDay); k.setDeleted(delete); return k; }