Java Code Examples for org.apache.hadoop.io.Text#charAt()

The following examples show how to use org.apache.hadoop.io.Text#charAt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DocumentDataIterator.java    From datawave with Apache License 2.0 5 votes vote down vote up
protected boolean isEventKey(Key k) {
    Text cf = k.getColumnFamily();
    return cf.getLength() > 0
                    && cf.find("\u0000") != -1
                    && !((cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == 0) || (cf.getLength() == 1 && cf.charAt(0) == 'd') || (cf
                                    .getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f'));
}
 
Example 2
Source File: EventDataScanNestedIterator.java    From datawave with Apache License 2.0 5 votes vote down vote up
boolean isEventKey(Key k) {
    Text cf = k.getColumnFamily();
    return cf.getLength() > 0
                    && cf.find("\u0000") != -1
                    && !((cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == 0) || (cf.getLength() == 1 && cf.charAt(0) == 'd') || (cf
                                    .getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f'));
}
 
Example 3
Source File: MetricAgeOffIterator.java    From timely with Apache License 2.0 5 votes vote down vote up
private boolean isNextMetricTheSame(Text nextRow) {
    byte[] next = nextRow.getBytes();
    if (next.length > prevMetricBytes.getLength()
            && 0 == prevMetricBytes.compareTo(next, 0, prevMetricBytes.getLength())
            && nextRow.charAt(prevMetricBytes.getLength()) == 0x00) {
        return true;
    } else {
        return false;
    }
}
 
Example 4
Source File: L13.java    From spork with Apache License 2.0 5 votes vote down vote up
public void reduce(
        Text key,
        Iterator<Text> iter, 
        OutputCollector<Text, Text> oc,
        Reporter reporter) throws IOException {
    // For each value, figure out which file it's from and store it
    // accordingly.
    List<String> first = new ArrayList<String>();
    List<String> second = new ArrayList<String>();

    while (iter.hasNext()) {
        Text txt = iter.next();
        if (txt.charAt(0) == '1') {
            first.add(txt.toString().substring(1));
        }
        else second.add(txt.toString().substring(1));
        reporter.setStatus("OK");
    }

    reporter.setStatus("OK");

    if (first.size() == 0) return;
    if (second.size() == 0) second.add(null);

    // Do the cross product
    for (String s1 : first) {
        for (String s2 : second) {
            if (s2==null) oc.collect(null, new Text(key + "\t" + s1 + "\t\t"));
            else oc.collect(null, new Text(key + "\t" + s1 + "\t" + key + "\t" + s2));
        }
    }
    first.clear();
}
 
Example 5
Source File: WritableUtil.java    From datawave with Apache License 2.0 4 votes vote down vote up
/**
 * Natively parses a text object into a long with the specified radix.
 * 
 * @param text
 * @param radix
 * @return
 * @throws NumberFormatException
 */
public static long parseLong(Text text, int radix) throws NumberFormatException {
    if (text == null) {
        throw new NumberFormatException("null");
    }
    
    if (radix < Character.MIN_RADIX) {
        throw new NumberFormatException("radix " + radix + " less than Character.MIN_RADIX");
    }
    if (radix > Character.MAX_RADIX) {
        throw new NumberFormatException("radix " + radix + " greater than Character.MAX_RADIX");
    }
    
    long result = 0;
    boolean negative = false;
    int i = 0, max = text.getLength();
    long limit;
    long multmin;
    int digit;
    
    if (max > 0) {
        if (text.charAt(0) == '-') {
            negative = true;
            limit = Long.MIN_VALUE;
            i++;
        } else {
            limit = -Long.MAX_VALUE;
        }
        multmin = limit / radix;
        if (i < max) {
            digit = Character.digit(text.charAt(i++), radix);
            if (digit < 0) {
                throw new NumberFormatException();
            } else {
                result = -digit;
            }
        }
        while (i < max) {
            // Accumulating negatively avoids surprises near MAX_VALUE
            digit = Character.digit(text.charAt(i++), radix);
            if (digit < 0) {
                throw new NumberFormatException();
            }
            if (result < multmin) {
                throw new NumberFormatException();
            }
            result *= radix;
            if (result < limit + digit) {
                throw new NumberFormatException();
            }
            result -= digit;
        }
    } else {
        throw new NumberFormatException();
    }
    if (negative) {
        if (i > 1) {
            return result;
        } else { /* Only got "-" */
            throw new NumberFormatException();
        }
    } else {
        return -result;
    }
}
 
Example 6
Source File: FieldIndexCountingIterator.java    From datawave with Apache License 2.0 4 votes vote down vote up
private boolean isFieldIndexKey(Key key) {
    Text cf = key.getColumnFamily();
    return (cf.getLength() >= 3 && cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == '\0');
}
 
Example 7
Source File: EventDataScanNestedIterator.java    From datawave with Apache License 2.0 4 votes vote down vote up
protected void findNextDocument() {
    topKey = null;
    
    try {
        Text cf = new Text();
        
        /*
         * Given that we are already at a document key, this method will continue to advance the underlying source until it is either exhausted (hasTop()
         * returns false), the returned key is not in the totalRange, and the current top key shares the same row and column family as the source's next
         * key.
         */
        while (topKey == null && source.hasTop()) {
            Key k = source.getTopKey();
            if (log.isTraceEnabled())
                log.trace("Sought to " + k);
            k.getColumnFamily(cf);
            
            if (!isEventKey(k)) {
                if (cf.find("fi\0") == 0) {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'fi')");
                    }
                    // Try to do an optimized jump over the field index
                    cf.set("fi\1");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
                                    columnFamilies, inclusive);
                } else if (cf.getLength() == 1 && cf.charAt(0) == 'd') {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'd'");
                    }
                    // Try to do an optimized jump over the raw documents
                    cf.set("d\0");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
                                    columnFamilies, inclusive);
                } else if (cf.getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f') {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'tf'");
                    }
                    // Try to do an optimized jump over the term frequencies
                    cf.set("tf\0");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
                                    columnFamilies, inclusive);
                } else {
                    if (log.isDebugEnabled()) {
                        log.debug("Next()'ing over the current key");
                    }
                    source.next();
                }
            } else {
                if (dataTypeFilter.apply(source.getTopKey())) {
                    this.topKey = source.getTopKey();
                } else {
                    Range nextCF = new Range(nextStartKey(source.getTopKey()), true, totalRange.getEndKey(), totalRange.isEndKeyInclusive());
                    source.seek(nextCF, columnFamilies, inclusive);
                }
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not seek in findNextDocument", e);
    }
}
 
Example 8
Source File: FieldIndexCountingIteratorPerVisibility.java    From datawave with Apache License 2.0 4 votes vote down vote up
private boolean isFieldIndexKey(Key key) {
    Text cf = key.getColumnFamily();
    return (cf.getLength() >= 3 && cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == '\0');
}