org.apache.lucene.store.DataOutput#writeByte

Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public boolean store(OutputStream output) throws IOException {
  DataOutput dataOut = new OutputStreamDataOutput(output);
  try {
    if (fst == null) {
      return false;
    }

    fst.save(dataOut);
    dataOut.writeVInt(maxAnalyzedPathsForOneInput);
    dataOut.writeByte((byte) (hasPayloads ? 1 : 0));
  } finally {
    IOUtils.close(output);
  }
  return true;
}

Source File: CompressingStoredFieldsWriter.java From lucene-solr with Apache License 2.0

6 votes

/** 
 * Writes a float in a variable-length format.  Writes between one and 
 * five bytes. Small integral values typically take fewer bytes.
 * <p>
 * ZFloat --&gt; Header, Bytes*?
 * <ul>
 *    <li>Header --&gt; {@link DataOutput#writeByte Uint8}. When it is
 *       equal to 0xFF then the value is negative and stored in the next
 *       8 bytes. When it is equal to 0xFE then the value is stored as a
 *       float in the next 4 bytes. Otherwise if the first bit is set
 *       then the other bits in the header encode the value plus one and
 *       no other bytes are read. Otherwise, the value is a positive float
 *       value whose first byte is the header, and 7 bytes need to be read
 *       to complete it.
 *    <li>Bytes --&gt; Potential additional bytes to read depending on the
 *       header.
 * </ul>
 */
static void writeZDouble(DataOutput out, double d) throws IOException {
  int intVal = (int) d;
  final long doubleBits = Double.doubleToLongBits(d);
  
  if (d == intVal &&
      intVal >= -1 && 
      intVal <= 0x7C &&
      doubleBits != NEGATIVE_ZERO_DOUBLE) {
    // small integer value [-1..124]: single byte
    out.writeByte((byte) (0x80 | (intVal + 1)));
    return;
  } else if (d == (float) d) {
    // d has an accurate float representation: 5 bytes
    out.writeByte((byte) 0xFE);
    out.writeInt(Float.floatToIntBits((float) d));
  } else if ((doubleBits >>> 63) == 0) {
    // other positive doubles: 8 bytes
    out.writeLong(doubleBits);
  } else {
    // other negative doubles: 9 bytes
    out.writeByte((byte) 0xFF);
    out.writeLong(doubleBits);
  }
}

Source File: CompressingStoredFieldsWriter.java From lucene-solr with Apache License 2.0

6 votes

/** 
 * Writes a float in a variable-length format.  Writes between one and 
 * five bytes. Small integral values typically take fewer bytes.
 * <p>
 * ZFloat --&gt; Header, Bytes*?
 * <ul>
 *    <li>Header --&gt; {@link DataOutput#writeByte Uint8}. When it is
 *       equal to 0xFF then the value is negative and stored in the next
 *       4 bytes. Otherwise if the first bit is set then the other bits
 *       in the header encode the value plus one and no other
 *       bytes are read. Otherwise, the value is a positive float value
 *       whose first byte is the header, and 3 bytes need to be read to
 *       complete it.
 *    <li>Bytes --&gt; Potential additional bytes to read depending on the
 *       header.
 * </ul>
 */
static void writeZFloat(DataOutput out, float f) throws IOException {
  int intVal = (int) f;
  final int floatBits = Float.floatToIntBits(f);

  if (f == intVal
      && intVal >= -1
      && intVal <= 0x7D
      && floatBits != NEGATIVE_ZERO_FLOAT) {
    // small integer value [-1..125]: single byte
    out.writeByte((byte) (0x80 | (1 + intVal)));
  } else if ((floatBits >>> 31) == 0) {
    // other positive floats: 4 bytes
    out.writeInt(floatBits);
  } else {
    // other negative float: 5 bytes
    out.writeByte((byte) 0xFF);
    out.writeInt(floatBits);
  }
}

Source File: JaspellLookup.java From lucene-solr with Apache License 2.0

6 votes

private void writeRecursively(DataOutput out, TSTNode node) throws IOException {
  if (node == null) {
    return;
  }
  out.writeString(new String(new char[] {node.splitchar}, 0, 1));
  byte mask = 0;
  if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
  if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
  if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
  if (node.data != null) mask |= HAS_VALUE;
  out.writeByte(mask);
  if (node.data != null) {
    out.writeLong(((Number)node.data).longValue());
  }
  writeRecursively(out, node.relatives[TSTNode.LOKID]);
  writeRecursively(out, node.relatives[TSTNode.EQKID]);
  writeRecursively(out, node.relatives[TSTNode.HIKID]);
}

Source File: TSTLookup.java From lucene-solr with Apache License 2.0

6 votes

private void writeRecursively(DataOutput out, TernaryTreeNode node) throws IOException {
  // write out the current node
  out.writeString(new String(new char[] {node.splitchar}, 0, 1));
  // prepare a mask of kids
  byte mask = 0;
  if (node.eqKid != null) mask |= EQ_KID;
  if (node.loKid != null) mask |= LO_KID;
  if (node.hiKid != null) mask |= HI_KID;
  if (node.token != null) mask |= HAS_TOKEN;
  if (node.val != null) mask |= HAS_VALUE;
  out.writeByte(mask);
  if (node.token != null) out.writeString(node.token);
  if (node.val != null) out.writeLong(((Number)node.val).longValue());
  // recurse and write kids
  if (node.loKid != null) {
    writeRecursively(out, node.loKid);
  }
  if (node.eqKid != null) {
    writeRecursively(out, node.eqKid);
  }
  if (node.hiKid != null) {
    writeRecursively(out, node.hiKid);
  }
}

Source File: BinaryDictionaryWriter.java From lucene-solr with Apache License 2.0

6 votes

private void writePosDict(Path path) throws IOException {
  Files.createDirectories(path.getParent());
  try (OutputStream os = Files.newOutputStream(path);
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
    out.writeVInt(posDict.size());
    for (String s : posDict) {
      if (s == null) {
        out.writeByte((byte)0);
        out.writeByte((byte)0);
        out.writeByte((byte)0);
      } else {
        String[] data = CSVUtil.parse(s);
        if (data.length != 3) {
          throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 3 characters");
        }
        out.writeString(data[0]);
        out.writeString(data[1]);
        out.writeString(data[2]);
      }
    }
  }
}

Source File: CharacterDefinitionWriter.java From lucene-solr with Apache License 2.0

6 votes

public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}

Source File: BinaryDictionaryWriter.java From lucene-solr with Apache License 2.0

6 votes

private void writePosDict(Path path) throws IOException {
  Files.createDirectories(path.getParent());
  try (OutputStream os = Files.newOutputStream(path);
       OutputStream bos = new BufferedOutputStream(os)) {
    final DataOutput out = new OutputStreamDataOutput(bos);
    CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
    out.writeVInt(posDict.size());
    for (String s : posDict) {
      if (s == null) {
        out.writeByte((byte) POS.Tag.UNKNOWN.ordinal());
      } else {
        String[] data = CSVUtil.parse(s);
        if (data.length != 2) {
          throw new IllegalArgumentException("Malformed pos/inflection: " + s + "; expected 2 characters");
        }
        out.writeByte((byte) POS.Tag.valueOf(data[0]).ordinal());
      }
    }
  }
}

Source File: CharacterDefinitionWriter.java From lucene-solr with Apache License 2.0

6 votes

public void write(Path baseDir) throws IOException {
  Path path = baseDir.resolve(CharacterDefinition.class.getName().replace('.', '/') + CharacterDefinition.FILENAME_SUFFIX);
  Files.createDirectories(path.getParent());
  try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))){
    final DataOutput out = new OutputStreamDataOutput(os);
    CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
    out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
    for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
      final byte b = (byte) (
        (invokeMap[i] ? 0x01 : 0x00) | 
        (groupMap[i] ? 0x02 : 0x00)
      );
      out.writeByte(b);
    }
  }
}

Source File: LZ4.java From lucene-solr with Apache License 2.0

5 votes

private static void encodeLiterals(byte[] bytes, int token, int anchor, int literalLen, DataOutput out) throws IOException {
  out.writeByte((byte) token);

  // encode literal length
  if (literalLen >= 0x0F) {
    encodeLen(literalLen - 0x0F, out);
  }

  // encode literals
  out.writeBytes(bytes, anchor, literalLen);
}

Source File: LZ4.java From lucene-solr with Apache License 2.0

5 votes

private static void encodeLen(int l, DataOutput out) throws IOException {
  while (l >= 0xFF) {
    out.writeByte((byte) 0xFF);
    l -= 0xFF;
  }
  out.writeByte((byte) l);
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Expert: verifies the incoming {@link IndexInput} has an index header
 * and that its segment ID matches the expected one, and then copies
 * that index header into the provided {@link DataOutput}.  This is
 * useful when building compound files.
 *
 * @param in Input stream, positioned at the point where the
 *        index header was previously written. Typically this is located
 *        at the beginning of the file.
 * @param out Output stream, where the header will be copied to.
 * @param expectedID Expected segment ID
 * @throws CorruptIndexException If the first four bytes are not
 *         {@link #CODEC_MAGIC}, or if the <code>expectedID</code>
 *         does not match.
 * @throws IOException If there is an I/O error reading from the underlying medium.
 *
 * @lucene.internal 
 */
public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID) throws IOException {
  // make sure it's large enough to have a header and footer
  if (in.length() < footerLength() + headerLength("")) {
    throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: file is too small (" + in.length() + " bytes)", in);
  }

  int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CodecUtil.CODEC_MAGIC, in);
  }

  // we can't verify these, so we pass-through:
  String codec = in.readString();
  int version = in.readInt();

  // verify id:
  checkIndexHeaderID(in, expectedID);

  // we can't verify extension either, so we pass-through:
  int suffixLength = in.readByte() & 0xFF;
  byte[] suffixBytes = new byte[suffixLength];
  in.readBytes(suffixBytes, 0, suffixLength);

  // now write the header we just verified
  out.writeInt(CodecUtil.CODEC_MAGIC);
  out.writeString(codec);
  out.writeInt(version);
  out.writeBytes(expectedID, 0, expectedID.length);
  out.writeByte((byte) suffixLength);
  out.writeBytes(suffixBytes, 0, suffixLength);
}

Source File: CompressingStoredFieldsWriter.java From lucene-solr with Apache License 2.0

5 votes

/** 
 * Writes a long in a variable-length format.  Writes between one and 
 * ten bytes. Small values or values representing timestamps with day,
 * hour or second precision typically require fewer bytes.
 * <p>
 * ZLong --&gt; Header, Bytes*?
 * <ul>
 *    <li>Header --&gt; The first two bits indicate the compression scheme:
 *       <ul>
 *          <li>00 - uncompressed
 *          <li>01 - multiple of 1000 (second)
 *          <li>10 - multiple of 3600000 (hour)
 *          <li>11 - multiple of 86400000 (day)
 *       </ul>
 *       Then the next bit is a continuation bit, indicating whether more
 *       bytes need to be read, and the last 5 bits are the lower bits of
 *       the encoded value. In order to reconstruct the value, you need to
 *       combine the 5 lower bits of the header with a vLong in the next
 *       bytes (if the continuation bit is set to 1). Then
 *       {@link BitUtil#zigZagDecode(int) zigzag-decode} it and finally
 *       multiply by the multiple corresponding to the compression scheme.
 *    <li>Bytes --&gt; Potential additional bytes to read depending on the
 *       header.
 * </ul>
 */
// T for "timestamp"
static void writeTLong(DataOutput out, long l) throws IOException {
  int header; 
  if (l % SECOND != 0) {
    header = 0;
  } else if (l % DAY == 0) {
    // timestamp with day precision
    header = DAY_ENCODING;
    l /= DAY;
  } else if (l % HOUR == 0) {
    // timestamp with hour precision, or day precision with a timezone
    header = HOUR_ENCODING;
    l /= HOUR;
  } else {
    // timestamp with second precision
    header = SECOND_ENCODING;
    l /= SECOND;
  }

  final long zigZagL = BitUtil.zigZagEncode(l);
  header |= (zigZagL & 0x1F); // last 5 bits
  final long upperBits = zigZagL >>> 5;
  if (upperBits != 0) {
    header |= 0x20;
  }
  out.writeByte((byte) header);
  if (upperBits != 0) {
    out.writeVLong(upperBits);
  }
}

Source File: AnalyzingSuggester.java From lucene-solr with Apache License 2.0

5 votes

@Override
public boolean store(DataOutput output) throws IOException {
  output.writeVLong(count);
  if (fst == null) {
    return false;
  }

  fst.save(output, output);
  output.writeVInt(maxAnalyzedPathsForOneInput);
  output.writeByte((byte) (hasPayloads ? 1 : 0));
  return true;
}

Source File: AbstractBlockPackedWriter.java From lucene-solr with Apache License 2.0

5 votes

static void writeVLong(DataOutput out, long i) throws IOException {
  int k = 0;
  while ((i & ~0x7FL) != 0L && k++ < 8) {
    out.writeByte((byte)((i & 0x7FL) | 0x80L));
    i >>>= 7;
  }
  out.writeByte((byte) i);
}

Source File: SimpleTextUtil.java From lucene-solr with Apache License 2.0

5 votes

public static void write(DataOutput out, BytesRef b) throws IOException {
  for(int i=0;i<b.length;i++) {
    final byte bx = b.bytes[b.offset+i];
    if (bx == NEWLINE || bx == ESCAPE) {
      out.writeByte(ESCAPE);
    }
    out.writeByte(bx);
  }
}

Source File: DocIdsWriter.java From lucene-solr with Apache License 2.0

5 votes

static void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException {
  // docs can be sorted either when all docs in a block have the same value
  // or when a segment is sorted
  boolean sorted = true;
  for (int i = 1; i < count; ++i) {
    if (docIds[start + i - 1] > docIds[start + i]) {
      sorted = false;
      break;
    }
  }
  if (sorted) {
    out.writeByte((byte) 0);
    int previous = 0;
    for (int i = 0; i < count; ++i) {
      int doc = docIds[start + i];
      out.writeVInt(doc - previous);
      previous = doc;
    }
  } else {
    long max = 0;
    for (int i = 0; i < count; ++i) {
      max |= Integer.toUnsignedLong(docIds[start + i]);
    }
    if (max <= 0xffffff) {
      out.writeByte((byte) 24);
      for (int i = 0; i < count; ++i) {
        out.writeShort((short) (docIds[start + i] >>> 8));
        out.writeByte((byte) docIds[start + i]);
      }
    } else {
      out.writeByte((byte) 32);
      for (int i = 0; i < count; ++i) {
        out.writeInt(docIds[start + i]);
      }
    }
  }
}

Source File: SimpleTextUtil.java From lucene-solr with Apache License 2.0

4 votes

public static void writeNewline(DataOutput out) throws IOException {
  out.writeByte(NEWLINE);
}

Source File: SimplePrimaryNode.java From lucene-solr with Apache License 2.0

4 votes

private void handleIndexing(Socket socket, AtomicBoolean stop, InputStream is, DataInput in, DataOutput out, BufferedOutputStream bos) throws IOException, InterruptedException {
  Thread.currentThread().setName("indexing");
  message("start handling indexing socket=" + socket);
  while (true) {
    while (true) {
      if (is.available() > 0) {
        break;
      }
      if (stop.get()) {
        return;
      }
      Thread.sleep(10);
    }
    byte cmd;
    try {
      cmd = in.readByte();
    } catch (EOFException eofe) {
      // done
      return;
    }
    //message("INDEXING OP " + cmd);
    if (cmd == CMD_ADD_DOC) {
      handleAddDocument(in, out);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_UPDATE_DOC) {
      handleUpdateDocument(in, out);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_DELETE_DOC) {
      handleDeleteDocument(in, out);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_DELETE_ALL_DOCS) {
      writer.deleteAll();
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_FORCE_MERGE) {
      writer.forceMerge(1);
      out.writeByte((byte) 1);
      bos.flush();
    } else if (cmd == CMD_INDEXING_DONE) {
      out.writeByte((byte) 1);
      bos.flush();
      break;
    } else {
      throw new IllegalArgumentException("cmd must be add, update or delete; got " + cmd);
    }
  }
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Writes a codec header for an index file, which records both a string to
 * identify the format of the file, a version number, and data to identify
 * the file instance (ID and auxiliary suffix such as generation).
 * <p>
 * This header can be parsed and validated with 
 * {@link #checkIndexHeader(DataInput, String, int, int, byte[], String) checkIndexHeader()}.
 * <p>
 * IndexHeader --&gt; CodecHeader,ObjectID,ObjectSuffix
 * <ul>
 *    <li>CodecHeader   --&gt; {@link #writeHeader}
 *    <li>ObjectID     --&gt; {@link DataOutput#writeByte byte}<sup>16</sup>
 *    <li>ObjectSuffix --&gt; SuffixLength,SuffixBytes
 *    <li>SuffixLength  --&gt; {@link DataOutput#writeByte byte}
 *    <li>SuffixBytes   --&gt; {@link DataOutput#writeByte byte}<sup>SuffixLength</sup>
 * </ul>
 * <p>
 * Note that the length of an index header depends only upon the
 * name of the codec and suffix, so this length can be computed at any time
 * with {@link #indexHeaderLength(String,String)}.
 * 
 * @param out Output stream
 * @param codec String to identify the format of this file. It should be simple ASCII, 
 *              less than 128 characters in length.
 * @param id Unique identifier for this particular file instance.
 * @param suffix auxiliary suffix information for the file. It should be simple ASCII,
 *              less than 256 characters in length.
 * @param version Version number
 * @throws IOException If there is an I/O error writing to the underlying medium.
 * @throws IllegalArgumentException If the codec name is not simple ASCII, or 
 *         is more than 127 characters in length, or if id is invalid,
 *         or if the suffix is not simple ASCII, or more than 255 characters
 *         in length.
 */
public static void writeIndexHeader(DataOutput out, String codec, int version, byte[] id, String suffix) throws IOException {
  if (id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(id));
  }
  writeHeader(out, codec, version);
  out.writeBytes(id, 0, id.length);
  BytesRef suffixBytes = new BytesRef(suffix);
  if (suffixBytes.length != suffix.length() || suffixBytes.length >= 256) {
    throw new IllegalArgumentException("suffix must be simple ASCII, less than 256 characters in length [got " + suffix + "]");
  }
  out.writeByte((byte) suffixBytes.length);
  out.writeBytes(suffixBytes.bytes, suffixBytes.offset, suffixBytes.length);
}

Java Code Examples for org.apache.lucene.store.DataOutput#writeByte()