org.apache.hadoop.fs.FSDataOutputStream#writeLong

Source File: TestShuffleHandler.java From hadoop with Apache License 2.0

6 votes

private static void createIndexFile(File indexFile, Configuration conf)
    throws IOException {
  if (indexFile.exists()) {
    System.out.println("Deleting existing file");
    indexFile.delete();
  }
  indexFile.createNewFile();
  FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append(
      new Path(indexFile.getAbsolutePath()));
  Checksum crc = new PureJavaCrc32();
  crc.reset();
  CheckedOutputStream chk = new CheckedOutputStream(output, crc);
  String msg = "Writing new index file. This file will be used only " +
      "for the testing.";
  chk.write(Arrays.copyOf(msg.getBytes(),
      MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH));
  output.writeLong(chk.getChecksum().getValue());
  output.close();
}

Source File: TestShuffleHandler.java From big-c with Apache License 2.0

6 votes

private static void createIndexFile(File indexFile, Configuration conf)
    throws IOException {
  if (indexFile.exists()) {
    System.out.println("Deleting existing file");
    indexFile.delete();
  }
  indexFile.createNewFile();
  FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append(
      new Path(indexFile.getAbsolutePath()));
  Checksum crc = new PureJavaCrc32();
  crc.reset();
  CheckedOutputStream chk = new CheckedOutputStream(output, crc);
  String msg = "Writing new index file. This file will be used only " +
      "for the testing.";
  chk.write(Arrays.copyOf(msg.getBytes(),
      MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH));
  output.writeLong(chk.getChecksum().getValue());
  output.close();
}

Source File: TestIndexCache.java From tez with Apache License 2.0

5 votes

private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}

Source File: SpoolingRawBatchBuffer.java From Bats with Apache License 2.0

5 votes

public void writeToStream(FSDataOutputStream stream) throws IOException {
  Stopwatch watch = Stopwatch.createStarted();
  available = false;
  check = ThreadLocalRandom.current().nextLong();
  start = stream.getPos();
  logger.debug("Writing check value {} at position {}", check, start);
  stream.writeLong(check);
  batch.getHeader().writeDelimitedTo(stream);
  ByteBuf buf = batch.getBody();
  if (buf != null) {
    bodyLength = buf.capacity();
  } else {
    bodyLength = 0;
  }
  if (bodyLength > 0) {
    buf.getBytes(0, stream, bodyLength);
  }
  stream.hsync();
  FileStatus status = fs.getFileStatus(path);
  long len = status.getLen();
  logger.debug("After spooling batch, stream at position {}. File length {}", stream.getPos(), len);
  batch.sendOk();
  latch.countDown();
  long t = watch.elapsed(TimeUnit.MICROSECONDS);
  logger.debug("Took {} us to spool {} to disk. Rate {} mb/s", t, bodyLength, bodyLength / t);
  if (buf != null) {
    buf.release();
  }
}

Source File: TestIndexCache.java From tez with Apache License 2.0

5 votes

@Test
public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(INDEX_CACHE_MB, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}

Source File: TestIndexCache.java From RDFS with Apache License 2.0

5 votes

private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}

Source File: TestIndexCache.java From RDFS with Apache License 2.0

5 votes

public void testBadIndex() throws Exception {
  final int parts = 30;
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.getLocal(conf).getRaw();
  Path p = new Path(System.getProperty("test.build.data", "/tmp"),
      "cache").makeQualified(fs);
  fs.delete(p, true);
  conf.setInt("mapred.tasktracker.indexcache.mb", 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f);
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}

Source File: HdfsDirectory.java From incubator-retired-blur with Apache License 2.0

5 votes

private void writeFileCache(FSDataOutputStream outputStream) throws IOException {
  Set<Entry<String, FStat>> entrySet = _cache.entrySet();
  outputStream.writeInt(_cache.size());
  for (Entry<String, FStat> e : entrySet) {
    String name = e.getKey();
    FStat fstat = e.getValue();
    writeString(outputStream, name);
    outputStream.writeLong(fstat._lastMod);
    outputStream.writeLong(fstat._length);
  }
}

Source File: TestUtils.java From succinct with Apache License 2.0

5 votes

public static FSDataInputStream getStream(LongBuffer buf) throws IOException {
  File tmpDir = Files.createTempDir();
  Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut");
  FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration());
  FSDataOutputStream fOut = fs.create(filePath);
  buf.rewind();
  while (buf.hasRemaining()) {
    fOut.writeLong(buf.get());
  }
  fOut.close();
  buf.rewind();
  return fs.open(filePath);
}

Source File: TestIndexCache.java From hadoop-gpu with Apache License 2.0

5 votes

public void testBadIndex() throws Exception {
  final int parts = 30;
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.getLocal(conf).getRaw();
  Path p = new Path(System.getProperty("test.build.data", "/tmp"),
      "cache").makeQualified(fs);
  fs.delete(p, true);
  conf.setInt("mapred.tasktracker.indexcache.mb", 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f);
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}

Source File: TestIndexCache.java From big-c with Apache License 2.0

5 votes

private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}

Source File: TestIndexCache.java From big-c with Apache License 2.0

5 votes

public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}

Source File: TestIndexCache.java From hadoop with Apache License 2.0

5 votes

private static void writeFile(FileSystem fs, Path f, long fill, int parts)
    throws IOException {
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      dout.writeLong(fill);
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
}

Source File: TestIndexCache.java From hadoop with Apache License 2.0

5 votes

public void testBadIndex() throws Exception {
  final int parts = 30;
  fs.delete(p, true);
  conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
  IndexCache cache = new IndexCache(conf);

  Path f = new Path(p, "badindex");
  FSDataOutputStream out = fs.create(f, false);
  CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
  DataOutputStream dout = new DataOutputStream(iout);
  for (int i = 0; i < parts; ++i) {
    for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
      if (0 == (i % 3)) {
        dout.writeLong(i);
      } else {
        out.writeLong(i);
      }
    }
  }
  out.writeLong(iout.getChecksum().getValue());
  dout.close();
  try {
    cache.getIndexInformation("badindex", 7, f,
      UserGroupInformation.getCurrentUser().getShortUserName());
    fail("Did not detect bad checksum");
  } catch (IOException e) {
    if (!(e.getCause() instanceof ChecksumException)) {
      throw e;
    }
  }
}

Source File: TestHoodieLogFormat.java From hudi with Apache License 2.0

4 votes

@ParameterizedTest
@ValueSource(booleans = {true, false})
public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(boolean readBlocksLazily)
    throws IOException, URISyntaxException, InterruptedException {

  // Write a 3 Data blocs with same InstantTime (written in same batch)
  Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
  // Set a small threshold so that every block is a new version
  Writer writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();

  // Write 1
  List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
  Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
  HoodieDataBlock dataBlock = getDataBlock(records1, header);
  writer = writer.appendBlock(dataBlock);
  writer = writer.appendBlock(dataBlock);
  writer = writer.appendBlock(dataBlock);

  writer.close();
  // Append some arbit byte[] to the end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(100);
  outputStream.flush();
  outputStream.close();

  // Append some arbit byte[] to the end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(100);
  outputStream.flush();
  outputStream.close();

  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();

  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Append some arbit byte[] to the end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(100);
  outputStream.flush();
  outputStream.close();

  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  // Write 1 rollback block for the last commit instant
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
  header.put(HeaderMetadataType.TARGET_INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
      String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
  HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
  writer = writer.appendBlock(commandBlock);
  writer.close();

  List<String> allLogFiles =
      FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
          .map(s -> s.getPath().toString()).collect(Collectors.toList());

  HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101",
      10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH);
  assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records");
}

Source File: TestHoodieLogFormat.java From hudi with Apache License 2.0

4 votes

@Test
public void testAvroLogRecordReaderWithRollbackPartialBlock()
    throws IOException, URISyntaxException, InterruptedException {
  Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema());
  // Set a small threshold so that every block is a new version
  Writer writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();

  // Write 1
  List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
  List<IndexedRecord> copyOfRecords1 = records1.stream()
      .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
  Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
  HoodieDataBlock dataBlock = getDataBlock(records1, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Write 2
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
  // Append some arbit byte[] to thee end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(1000);

  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());

  // Write out some header
  outputStream.write(HoodieLogBlock.getLogMetadataBytes(header));
  outputStream.writeLong("something-random".getBytes().length);
  outputStream.write("something-random".getBytes());
  outputStream.flush();
  outputStream.close();

  // Rollback the last write
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102");
  header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101");
  header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE,
      String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal()));
  HoodieCommandBlock commandBlock = new HoodieCommandBlock(header);
  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  writer = writer.appendBlock(commandBlock);

  // Write 3
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "103");
  List<IndexedRecord> records3 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
  List<IndexedRecord> copyOfRecords3 = records3.stream()
      .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());

  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
  dataBlock = getDataBlock(records3, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  List<String> allLogFiles =
      FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100")
          .map(s -> s.getPath().toString()).collect(Collectors.toList());

  HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "103",
      10240L, true, false, bufferSize, BASE_OUTPUT_PATH);
  assertEquals(200, scanner.getTotalLogRecords(), "We would read 200 records");
  Set<String> readKeys = new HashSet<>(200);
  scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey()));
  assertEquals(200, readKeys.size(), "Stream collect should return all 200 records");
  copyOfRecords1.addAll(copyOfRecords3);
  Set<String> originalKeys =
      copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString())
          .collect(Collectors.toSet());
  assertEquals(originalKeys, readKeys, "CompositeAvroLogReader should return 200 records from 2 versions");
}

Source File: TestHoodieLogFormat.java From hudi with Apache License 2.0

4 votes

@Test
public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxException, InterruptedException {
  Writer writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
  Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
  header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  HoodieDataBlock dataBlock = getDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Append some arbit byte[] to thee end of the log (mimics a partially written commit)
  fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
  FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(474);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(400);
  // Write out incomplete content
  outputStream.write("something-random".getBytes());
  outputStream.flush();
  outputStream.close();

  // Append a proper block that is of the missing length of the corrupted block
  writer =
          HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
                  .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  records = SchemaTestUtil.generateTestRecords(0, 10);
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  dataBlock = getDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // First round of reads - we should be able to read the first block and then EOF
  Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema());
  assertTrue(reader.hasNext(), "First block should be available");
  reader.next();
  assertTrue(reader.hasNext(), "We should have corrupted block next");
  HoodieLogBlock block = reader.next();
  assertEquals(HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType(), "The read block should be a corrupt block");
  assertTrue(reader.hasNext(), "Third block should be available");
  reader.next();
  assertFalse(reader.hasNext(), "There should be no more block left");

  reader.close();

  // Simulate another failure back to back
  outputStream = fs.append(writer.getLogFile().getPath());
  // create a block with
  outputStream.write(HoodieLogFormat.MAGIC);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(1000);
  outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
  outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION);
  // Write out a length that does not confirm with the content
  outputStream.writeLong(500);
  // Write out some bytes
  outputStream.write("something-else-random".getBytes());
  outputStream.flush();
  outputStream.close();

  // Should be able to append a new block
  writer =
      HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
          .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
  records = SchemaTestUtil.generateTestRecords(0, 100);
  header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
  dataBlock = getDataBlock(records, header);
  writer = writer.appendBlock(dataBlock);
  writer.close();

  // Second round of reads - we should be able to read the first and last block
  reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema());
  assertTrue(reader.hasNext(), "First block should be available");
  reader.next();
  assertTrue(reader.hasNext(), "We should get the 1st corrupted block next");
  reader.next();
  assertTrue(reader.hasNext(), "Third block should be available");
  reader.next();
  assertTrue(reader.hasNext(), "We should get the 2nd corrupted block next");
  block = reader.next();
  assertEquals(HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType(), "The read block should be a corrupt block");
  assertTrue(reader.hasNext(), "We should get the last block next");
  reader.next();
  assertFalse(reader.hasNext(), "We should have no more blocks left");
  reader.close();
}

Source File: TestRecoveryHdfs.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testTruncatedLog() throws Exception {
  try {
    TestInjection.skipIndexWriterCommitOnClose = true;
    final Semaphore logReplay = new Semaphore(0);
    final Semaphore logReplayFinish = new Semaphore(0);

    UpdateLog.testing_logReplayHook = () -> {
      try {
        assertTrue(logReplay.tryAcquire(TIMEOUT, TimeUnit.SECONDS));
      } catch (Exception e) {
        throw new RuntimeException(e);
      }
    };

    UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release();

    String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir();

    clearIndex();
    assertU(commit());

    assertU(adoc("id","F1"));
    assertU(adoc("id","F2"));
    assertU(adoc("id","F3"));
    
    h.close();
    

    
    String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir));
    Arrays.sort(files);

    FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length-1]));
  
    dos.writeLong(0xffffffffffffffffL);
    dos.writeChars("This should be appended to a good log file, representing a bad partially written record.");
    dos.close();

    logReplay.release(1000);
    logReplayFinish.drainPermits();
    ignoreException("OutOfBoundsException");  // this is what the corrupted log currently produces... subject to change.
    createCore();
    assertTrue(logReplayFinish.tryAcquire(TIMEOUT, TimeUnit.SECONDS));
    resetExceptionIgnores();
    assertJQ(req("q","*:*") ,"/response/numFound==3");

    //
    // Now test that the bad log file doesn't mess up retrieving latest versions
    //

    updateJ(jsonAdd(sdoc("id","F4", "_version_","104")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER));
    updateJ(jsonAdd(sdoc("id","F5", "_version_","105")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER));
    updateJ(jsonAdd(sdoc("id","F6", "_version_","106")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER));

    // This currently skips the bad log file and also returns the version of the clearIndex (del *:*)
    // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]");
    assertJQ(req("qt","/get", "getVersions","3"), "/versions==[106,105,104]");

  } finally {
    UpdateLog.testing_logReplayHook = null;
    UpdateLog.testing_logReplayFinishHook = null;
  }
}

Source File: SSTableIndexIndex.java From hadoop-sstable with Apache License 2.0

4 votes

/**
 * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks
 * (splits) based on the configured chunk size.
 *
 * @param fileSystem Hadoop file system.
 * @param sstablePath SSTable Index.db.
 * @throws IOException
 */
public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException {

    final Configuration configuration = fileSystem.getConf();

    final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB,
            HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024;

    final Closer closer = Closer.create();

    final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX);
    final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX);

    boolean success = false;
    try {
        final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath));

        final TLongArrayList splitOffsets = new TLongArrayList();
        long currentStart = 0;
        long currentEnd = 0;
        final IndexOffsetScanner index = closer.register(new IndexOffsetScanner(sstablePath, fileSystem));

        while (index.hasNext()) {
            // NOTE: This does not give an exact size of this split in bytes but a rough estimate.
            // This should be good enough since it's only used for sorting splits by size in hadoop land.
            while (currentEnd - currentStart < splitSize && index.hasNext()) {
                currentEnd = index.next();
                splitOffsets.add(currentEnd);
            }

            // Record the split
            final long[] offsets = splitOffsets.toArray();
            os.writeLong(offsets[0]); // Start
            os.writeLong(offsets[offsets.length - 1]); // End

            // Clear the offsets
            splitOffsets.clear();

            if (index.hasNext()) {
                currentStart = index.next();
                currentEnd = currentStart;
                splitOffsets.add(currentStart);
            }
        }

        success = true;
    } finally {
        closer.close();

        if (!success) {
            fileSystem.delete(inProgressOutputPath, false);
        } else {
            fileSystem.rename(inProgressOutputPath, outputPath);
        }
    }
}

Source File: HadoopIgfs20FileSystemAbstractSelfTest.java From ignite with Apache License 2.0

3 votes

/** @throws Exception If failed. */
@Test
public void testAppend() throws Exception {
    Path fsHome = new Path(primaryFsUri);
    Path file = new Path(fsHome, "someFile");

    int cnt = 1024;

    FSDataOutputStream out = fs.create(file, EnumSet.noneOf(CreateFlag.class),
        Options.CreateOpts.perms(FsPermission.getDefault()));

    for (int i = 0; i < cnt; i++)
        out.writeLong(i);

    out.close();

    out = fs.create(file, EnumSet.of(CreateFlag.APPEND),
        Options.CreateOpts.perms(FsPermission.getDefault()));

    for (int i = cnt; i < cnt * 2; i++)
        out.writeLong(i);

    out.close();

    FSDataInputStream in = fs.open(file, 1024);

    for (int i = 0; i < cnt * 2; i++)
        assertEquals(i, in.readLong());

    in.close();
}

Java Code Examples for org.apache.hadoop.fs.FSDataOutputStream#writeLong()