org.apache.hadoop.util.UTF8ByteArrayUtils Java Exaples

Source File: TextOutputReader.java From hadoop with Apache License 2.0

6 votes

private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}

Source File: TextOutputReader.java From big-c with Apache License 2.0

6 votes

private void splitKeyVal(byte[] line, int length, Text key, Text val)
  throws IOException {
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
      length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos,
        separator.length);
    }
  } catch (CharacterCodingException e) {
    throw new IOException(StringUtils.stringifyException(e));
  }
}

Source File: PipeMapRed.java From RDFS with Apache License 2.0

6 votes

/**
 * Split a line into key and value.
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val)
throws IOException {
  int numKeyFields = getNumOfKeyFields();
  byte[] separator = getFieldSeparator();
  
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
        length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
    }
  } catch (CharacterCodingException e) {
    LOG.warn(StringUtils.stringifyException(e));
  }
}

Source File: PipeMapRed.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Split a line into key and value.
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val)
throws IOException {
  int numKeyFields = getNumOfKeyFields();
  byte[] separator = getFieldSeparator();
  
  // Need to find numKeyFields separators
  int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
  for(int k=1; k<numKeyFields && pos!=-1; k++) {
    pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
        length, separator);
  }
  try {
    if (pos == -1) {
      key.set(line, 0, length);
      val.set("");
    } else {
      StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
    }
  } catch (CharacterCodingException e) {
    LOG.warn(StringUtils.stringifyException(e));
  }
}

Source File: KeyFieldHelper.java From hadoop with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }

Source File: KeyFieldHelper.java From big-c with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }

Source File: KeyFieldHelper.java From RDFS with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }

Source File: KeyFieldHelper.java From hadoop-gpu with Apache License 2.0

5 votes

public int[] getWordLengths(byte []b, int start, int end) {
   //Given a string like "hello how are you", it returns an array
   //like [4 5, 3, 3, 3], where the first element is the number of
//fields
   if (!keySpecSeen) {
     //if there were no key specs, then the whole key is one word
     return new int[] {1};
   }
   int[] lengths = new int[10];
   int currLenLengths = lengths.length;
   int idx = 1;
   int pos;
   while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
       keyFieldSeparator)) != -1) {
     if (++idx == currLenLengths) {
       int[] temp = lengths;
       lengths = new int[(currLenLengths = currLenLengths*2)];
       System.arraycopy(temp, 0, lengths, 0, temp.length);
     }
     lengths[idx - 1] = pos - start;
     start = pos + 1;
   }
   
   if (start != end) {
     lengths[idx] = end - start;
   }
   lengths[0] = idx; //number of words is the first element
   return lengths;
 }

org.apache.hadoop.util.UTF8ByteArrayUtils Java Examples