Java Code Examples for org.apache.hadoop.io.Text#encode()
The following examples show how to use
org.apache.hadoop.io.Text#encode() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SequenceFileInputFilter.java From hadoop with Apache License 2.0 | 6 votes |
/** Filtering method * If MD5(key) % frequency==0, return true; otherwise return false * @see Filter#accept(Object) */ public boolean accept(Object key) { try { long hashcode; if (key instanceof Text) { hashcode = MD5Hashcode((Text)key); } else if (key instanceof BytesWritable) { hashcode = MD5Hashcode((BytesWritable)key); } else { ByteBuffer bb; bb = Text.encode(key.toString()); hashcode = MD5Hashcode(bb.array(), 0, bb.limit()); } if (hashcode / frequency * frequency == hashcode) return true; } catch(Exception e) { LOG.warn(e); throw new RuntimeException(e); } return false; }
Example 2
Source File: SequenceFileInputFilter.java From big-c with Apache License 2.0 | 6 votes |
/** Filtering method * If MD5(key) % frequency==0, return true; otherwise return false * @see Filter#accept(Object) */ public boolean accept(Object key) { try { long hashcode; if (key instanceof Text) { hashcode = MD5Hashcode((Text)key); } else if (key instanceof BytesWritable) { hashcode = MD5Hashcode((BytesWritable)key); } else { ByteBuffer bb; bb = Text.encode(key.toString()); hashcode = MD5Hashcode(bb.array(), 0, bb.limit()); } if (hashcode / frequency * frequency == hashcode) return true; } catch(Exception e) { LOG.warn(e); throw new RuntimeException(e); } return false; }
Example 3
Source File: SequenceFileInputFilter.java From RDFS with Apache License 2.0 | 6 votes |
/** Filtering method * If MD5(key) % frequency==0, return true; otherwise return false * @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object) */ public boolean accept(Object key) { try { long hashcode; if (key instanceof Text) { hashcode = MD5Hashcode((Text)key); } else if (key instanceof BytesWritable) { hashcode = MD5Hashcode((BytesWritable)key); } else { ByteBuffer bb; bb = Text.encode(key.toString()); hashcode = MD5Hashcode(bb.array(), 0, bb.limit()); } if (hashcode/frequency*frequency==hashcode) return true; } catch(Exception e) { LOG.warn(e); throw new RuntimeException(e); } return false; }
Example 4
Source File: SequenceFileInputFilter.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** Filtering method * If MD5(key) % frequency==0, return true; otherwise return false * @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object) */ public boolean accept(Object key) { try { long hashcode; if (key instanceof Text) { hashcode = MD5Hashcode((Text)key); } else if (key instanceof BytesWritable) { hashcode = MD5Hashcode((BytesWritable)key); } else { ByteBuffer bb; bb = Text.encode(key.toString()); hashcode = MD5Hashcode(bb.array(), 0, bb.limit()); } if (hashcode/frequency*frequency==hashcode) return true; } catch(Exception e) { LOG.warn(e); throw new RuntimeException(e); } return false; }
Example 5
Source File: TextUtil.java From datawave with Apache License 2.0 | 5 votes |
/** * Appends the UTF-8 bytes of the given string to the given {@link Text} */ public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) { try { ByteBuffer buffer = Text.encode(s, replaceBadChar); t.append(buffer.array(), 0, buffer.limit()); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } }
Example 6
Source File: TextUtil.java From datawave with Apache License 2.0 | 5 votes |
/** * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling * {@link String#getBytes(String)}. * * @param string * the string to convert * @return the UTF-8 representation of the string */ public static byte[] toUtf8(String string) { ByteBuffer buffer; try { buffer = Text.encode(string, false); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } byte[] bytes = new byte[buffer.limit()]; System.arraycopy(buffer.array(), 0, bytes, 0, bytes.length); return bytes; }
Example 7
Source File: ColumnPrefixes.java From rya with Apache License 2.0 | 5 votes |
private static Text concat(Text prefix, String str) { Text temp = new Text(prefix); try { ByteBuffer buffer = Text.encode(str, false); temp.append(buffer.array(), 0, buffer.limit()); } catch (CharacterCodingException cce) { throw new IllegalArgumentException(cce); } return temp; }
Example 8
Source File: QseqOutputFormat.java From Hadoop-BAM with MIT License | 4 votes |
public void write(Text ignored_key, SequencedFragment seq) throws IOException { sBuilder.delete(0, sBuilder.length()); // clear sBuilder.append( seq.getInstrument() == null ? "" : seq.getInstrument() ).append(delim); sBuilder.append( seq.getRunNumber() == null ? "" : seq.getRunNumber().toString() ).append(delim); sBuilder.append( seq.getLane() == null ? "" : seq.getLane().toString() ).append(delim); sBuilder.append( seq.getTile() == null ? "" : seq.getTile().toString() ).append(delim); sBuilder.append( seq.getXpos() == null ? "" : seq.getXpos().toString() ).append(delim); sBuilder.append( seq.getYpos() == null ? "" : seq.getYpos().toString() ).append(delim); String index; if (seq.getIndexSequence() == null || seq.getIndexSequence().isEmpty()) index = "0"; else index = seq.getIndexSequence().replace('N', '.'); sBuilder.append( index ).append(delim); sBuilder.append( seq.getRead() == null ? "" : seq.getRead().toString() ).append(delim); // here we also replace 'N' with '.' sBuilder.append( seq.getSequence() == null ? "" : seq.getSequence().toString().replace('N', '.')).append(delim); //////// quality may have to be re-coded if (seq.getQuality() == null) sBuilder.append(""); else { int startPos = sBuilder.length(); sBuilder.append(seq.getQuality().toString()); if (baseQualityFormat == BaseQualityEncoding.Sanger) { // do nothing } else if (baseQualityFormat == BaseQualityEncoding.Illumina) { // recode the quality in-place for (int i = startPos; i < sBuilder.length(); ++i) { // cast to avoid warning about possible loss of precision for assigning a char from an int. char newValue = (char)(sBuilder.charAt(i) + 31); // 64 - 33 = 31: difference between illumina and sanger encoding if (newValue > 126) throw new RuntimeException("output quality score over allowed range. Maybe you meant to write in Sanger format?"); sBuilder.setCharAt(i, newValue); } } else throw new RuntimeException("BUG! Unknown base quality format value " + baseQualityFormat + " in QseqRecordWriter"); } sBuilder.append(delim); ///////// sBuilder.append((seq.getFilterPassed() == null || seq.getFilterPassed() ) ? 1 : 0); try { ByteBuffer buf = Text.encode(sBuilder.toString()); out.write(buf.array(), 0, buf.limit()); } catch (java.nio.charset.CharacterCodingException e) { throw new RuntimeException("Error encoding qseq record: " + seq); } out.write(newLine, 0, newLine.length); }