Java Code Examples for org.apache.hadoop.fs.FSDataOutputStream#writeLong()
The following examples show how to use
org.apache.hadoop.fs.FSDataOutputStream#writeLong() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestShuffleHandler.java From hadoop with Apache License 2.0 | 6 votes |
private static void createIndexFile(File indexFile, Configuration conf) throws IOException { if (indexFile.exists()) { System.out.println("Deleting existing file"); indexFile.delete(); } indexFile.createNewFile(); FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append( new Path(indexFile.getAbsolutePath())); Checksum crc = new PureJavaCrc32(); crc.reset(); CheckedOutputStream chk = new CheckedOutputStream(output, crc); String msg = "Writing new index file. This file will be used only " + "for the testing."; chk.write(Arrays.copyOf(msg.getBytes(), MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH)); output.writeLong(chk.getChecksum().getValue()); output.close(); }
Example 2
Source File: TestShuffleHandler.java From big-c with Apache License 2.0 | 6 votes |
private static void createIndexFile(File indexFile, Configuration conf) throws IOException { if (indexFile.exists()) { System.out.println("Deleting existing file"); indexFile.delete(); } indexFile.createNewFile(); FSDataOutputStream output = FileSystem.getLocal(conf).getRaw().append( new Path(indexFile.getAbsolutePath())); Checksum crc = new PureJavaCrc32(); crc.reset(); CheckedOutputStream chk = new CheckedOutputStream(output, crc); String msg = "Writing new index file. This file will be used only " + "for the testing."; chk.write(Arrays.copyOf(msg.getBytes(), MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH)); output.writeLong(chk.getChecksum().getValue()); output.close(); }
Example 3
Source File: TestIndexCache.java From tez with Apache License 2.0 | 5 votes |
private static void writeFile(FileSystem fs, Path f, long fill, int parts) throws IOException { FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { dout.writeLong(fill); } } out.writeLong(iout.getChecksum().getValue()); dout.close(); }
Example 4
Source File: SpoolingRawBatchBuffer.java From Bats with Apache License 2.0 | 5 votes |
public void writeToStream(FSDataOutputStream stream) throws IOException { Stopwatch watch = Stopwatch.createStarted(); available = false; check = ThreadLocalRandom.current().nextLong(); start = stream.getPos(); logger.debug("Writing check value {} at position {}", check, start); stream.writeLong(check); batch.getHeader().writeDelimitedTo(stream); ByteBuf buf = batch.getBody(); if (buf != null) { bodyLength = buf.capacity(); } else { bodyLength = 0; } if (bodyLength > 0) { buf.getBytes(0, stream, bodyLength); } stream.hsync(); FileStatus status = fs.getFileStatus(path); long len = status.getLen(); logger.debug("After spooling batch, stream at position {}. File length {}", stream.getPos(), len); batch.sendOk(); latch.countDown(); long t = watch.elapsed(TimeUnit.MICROSECONDS); logger.debug("Took {} us to spool {} to disk. Rate {} mb/s", t, bodyLength, bodyLength / t); if (buf != null) { buf.release(); } }
Example 5
Source File: TestIndexCache.java From tez with Apache License 2.0 | 5 votes |
@Test public void testBadIndex() throws Exception { final int parts = 30; fs.delete(p, true); conf.setInt(INDEX_CACHE_MB, 1); IndexCache cache = new IndexCache(conf); Path f = new Path(p, "badindex"); FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < Constants.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { if (0 == (i % 3)) { dout.writeLong(i); } else { out.writeLong(i); } } } out.writeLong(iout.getChecksum().getValue()); dout.close(); try { cache.getIndexInformation("badindex", 7, f, UserGroupInformation.getCurrentUser().getShortUserName()); fail("Did not detect bad checksum"); } catch (IOException e) { if (!(e.getCause() instanceof ChecksumException)) { throw e; } } }
Example 6
Source File: TestIndexCache.java From RDFS with Apache License 2.0 | 5 votes |
private static void writeFile(FileSystem fs, Path f, long fill, int parts) throws IOException { FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { dout.writeLong(fill); } } out.writeLong(iout.getChecksum().getValue()); dout.close(); }
Example 7
Source File: TestIndexCache.java From RDFS with Apache License 2.0 | 5 votes |
public void testBadIndex() throws Exception { final int parts = 30; JobConf conf = new JobConf(); FileSystem fs = FileSystem.getLocal(conf).getRaw(); Path p = new Path(System.getProperty("test.build.data", "/tmp"), "cache").makeQualified(fs); fs.delete(p, true); conf.setInt("mapred.tasktracker.indexcache.mb", 1); IndexCache cache = new IndexCache(conf); Path f = new Path(p, "badindex"); FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { if (0 == (i % 3)) { dout.writeLong(i); } else { out.writeLong(i); } } } out.writeLong(iout.getChecksum().getValue()); dout.close(); try { cache.getIndexInformation("badindex", 7, f); fail("Did not detect bad checksum"); } catch (IOException e) { if (!(e.getCause() instanceof ChecksumException)) { throw e; } } }
Example 8
Source File: HdfsDirectory.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private void writeFileCache(FSDataOutputStream outputStream) throws IOException { Set<Entry<String, FStat>> entrySet = _cache.entrySet(); outputStream.writeInt(_cache.size()); for (Entry<String, FStat> e : entrySet) { String name = e.getKey(); FStat fstat = e.getValue(); writeString(outputStream, name); outputStream.writeLong(fstat._lastMod); outputStream.writeLong(fstat._length); } }
Example 9
Source File: TestUtils.java From succinct with Apache License 2.0 | 5 votes |
public static FSDataInputStream getStream(LongBuffer buf) throws IOException { File tmpDir = Files.createTempDir(); Path filePath = new Path(tmpDir.getAbsolutePath() + "/testOut"); FileSystem fs = FileSystem.get(filePath.toUri(), new Configuration()); FSDataOutputStream fOut = fs.create(filePath); buf.rewind(); while (buf.hasRemaining()) { fOut.writeLong(buf.get()); } fOut.close(); buf.rewind(); return fs.open(filePath); }
Example 10
Source File: TestIndexCache.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void testBadIndex() throws Exception { final int parts = 30; JobConf conf = new JobConf(); FileSystem fs = FileSystem.getLocal(conf).getRaw(); Path p = new Path(System.getProperty("test.build.data", "/tmp"), "cache").makeQualified(fs); fs.delete(p, true); conf.setInt("mapred.tasktracker.indexcache.mb", 1); IndexCache cache = new IndexCache(conf); Path f = new Path(p, "badindex"); FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { if (0 == (i % 3)) { dout.writeLong(i); } else { out.writeLong(i); } } } out.writeLong(iout.getChecksum().getValue()); dout.close(); try { cache.getIndexInformation("badindex", 7, f); fail("Did not detect bad checksum"); } catch (IOException e) { if (!(e.getCause() instanceof ChecksumException)) { throw e; } } }
Example 11
Source File: TestIndexCache.java From big-c with Apache License 2.0 | 5 votes |
private static void writeFile(FileSystem fs, Path f, long fill, int parts) throws IOException { FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { dout.writeLong(fill); } } out.writeLong(iout.getChecksum().getValue()); dout.close(); }
Example 12
Source File: TestIndexCache.java From big-c with Apache License 2.0 | 5 votes |
public void testBadIndex() throws Exception { final int parts = 30; fs.delete(p, true); conf.setInt(TTConfig.TT_INDEX_CACHE, 1); IndexCache cache = new IndexCache(conf); Path f = new Path(p, "badindex"); FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { if (0 == (i % 3)) { dout.writeLong(i); } else { out.writeLong(i); } } } out.writeLong(iout.getChecksum().getValue()); dout.close(); try { cache.getIndexInformation("badindex", 7, f, UserGroupInformation.getCurrentUser().getShortUserName()); fail("Did not detect bad checksum"); } catch (IOException e) { if (!(e.getCause() instanceof ChecksumException)) { throw e; } } }
Example 13
Source File: TestIndexCache.java From hadoop with Apache License 2.0 | 5 votes |
private static void writeFile(FileSystem fs, Path f, long fill, int parts) throws IOException { FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { dout.writeLong(fill); } } out.writeLong(iout.getChecksum().getValue()); dout.close(); }
Example 14
Source File: TestIndexCache.java From hadoop with Apache License 2.0 | 5 votes |
public void testBadIndex() throws Exception { final int parts = 30; fs.delete(p, true); conf.setInt(TTConfig.TT_INDEX_CACHE, 1); IndexCache cache = new IndexCache(conf); Path f = new Path(p, "badindex"); FSDataOutputStream out = fs.create(f, false); CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32()); DataOutputStream dout = new DataOutputStream(iout); for (int i = 0; i < parts; ++i) { for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) { if (0 == (i % 3)) { dout.writeLong(i); } else { out.writeLong(i); } } } out.writeLong(iout.getChecksum().getValue()); dout.close(); try { cache.getIndexInformation("badindex", 7, f, UserGroupInformation.getCurrentUser().getShortUserName()); fail("Did not detect bad checksum"); } catch (IOException e) { if (!(e.getCause() instanceof ChecksumException)) { throw e; } } }
Example 15
Source File: TestHoodieLogFormat.java From hudi with Apache License 2.0 | 4 votes |
@ParameterizedTest @ValueSource(booleans = {true, false}) public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(boolean readBlocksLazily) throws IOException, URISyntaxException, InterruptedException { // Write a 3 Data blocs with same InstantTime (written in same batch) Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema()); // Set a small threshold so that every block is a new version Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); // Write 1 List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); HoodieDataBlock dataBlock = getDataBlock(records1, header); writer = writer.appendBlock(dataBlock); writer = writer.appendBlock(dataBlock); writer = writer.appendBlock(dataBlock); writer.close(); // Append some arbit byte[] to the end of the log (mimics a partially written commit) fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); outputStream.writeLong(1000); outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal()); outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION); // Write out a length that does not confirm with the content outputStream.writeLong(100); outputStream.flush(); outputStream.close(); // Append some arbit byte[] to the end of the log (mimics a partially written commit) fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); outputStream.writeLong(1000); outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal()); outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION); // Write out a length that does not confirm with the content outputStream.writeLong(100); outputStream.flush(); outputStream.close(); writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); writer = writer.appendBlock(dataBlock); writer.close(); // Append some arbit byte[] to the end of the log (mimics a partially written commit) fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); outputStream.writeLong(1000); outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal()); outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION); // Write out a length that does not confirm with the content outputStream.writeLong(100); outputStream.flush(); outputStream.close(); writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); // Write 1 rollback block for the last commit instant header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101"); header.put(HeaderMetadataType.TARGET_INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE, String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal())); HoodieCommandBlock commandBlock = new HoodieCommandBlock(header); writer = writer.appendBlock(commandBlock); writer.close(); List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100") .map(s -> s.getPath().toString()).collect(Collectors.toList()); HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "101", 10240L, readBlocksLazily, false, bufferSize, BASE_OUTPUT_PATH); assertEquals(0, scanner.getTotalLogRecords(), "We would read 0 records"); }
Example 16
Source File: TestHoodieLogFormat.java From hudi with Apache License 2.0 | 4 votes |
@Test public void testAvroLogRecordReaderWithRollbackPartialBlock() throws IOException, URISyntaxException, InterruptedException { Schema schema = HoodieAvroUtils.addMetadataFields(getSimpleSchema()); // Set a small threshold so that every block is a new version Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); // Write 1 List<IndexedRecord> records1 = SchemaTestUtil.generateHoodieTestRecords(0, 100); List<IndexedRecord> copyOfRecords1 = records1.stream() .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList()); Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); HoodieDataBlock dataBlock = getDataBlock(records1, header); writer = writer.appendBlock(dataBlock); writer.close(); // Write 2 header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101"); // Append some arbit byte[] to thee end of the log (mimics a partially written commit) fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); // Write out a length that does not confirm with the content outputStream.writeLong(1000); outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION); outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal()); // Write out some header outputStream.write(HoodieLogBlock.getLogMetadataBytes(header)); outputStream.writeLong("something-random".getBytes().length); outputStream.write("something-random".getBytes()); outputStream.flush(); outputStream.close(); // Rollback the last write header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "102"); header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "101"); header.put(HoodieLogBlock.HeaderMetadataType.COMMAND_BLOCK_TYPE, String.valueOf(HoodieCommandBlock.HoodieCommandBlockTypeEnum.ROLLBACK_PREVIOUS_BLOCK.ordinal())); HoodieCommandBlock commandBlock = new HoodieCommandBlock(header); writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); writer = writer.appendBlock(commandBlock); // Write 3 header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "103"); List<IndexedRecord> records3 = SchemaTestUtil.generateHoodieTestRecords(0, 100); List<IndexedRecord> copyOfRecords3 = records3.stream() .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList()); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); dataBlock = getDataBlock(records3, header); writer = writer.appendBlock(dataBlock); writer.close(); List<String> allLogFiles = FSUtils.getAllLogFiles(fs, partitionPath, "test-fileid1", HoodieLogFile.DELTA_EXTENSION, "100") .map(s -> s.getPath().toString()).collect(Collectors.toList()); HoodieMergedLogRecordScanner scanner = new HoodieMergedLogRecordScanner(fs, basePath, allLogFiles, schema, "103", 10240L, true, false, bufferSize, BASE_OUTPUT_PATH); assertEquals(200, scanner.getTotalLogRecords(), "We would read 200 records"); Set<String> readKeys = new HashSet<>(200); scanner.forEach(s -> readKeys.add(s.getKey().getRecordKey())); assertEquals(200, readKeys.size(), "Stream collect should return all 200 records"); copyOfRecords1.addAll(copyOfRecords3); Set<String> originalKeys = copyOfRecords1.stream().map(s -> ((GenericRecord) s).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString()) .collect(Collectors.toSet()); assertEquals(originalKeys, readKeys, "CompositeAvroLogReader should return 200 records from 2 versions"); }
Example 17
Source File: TestHoodieLogFormat.java From hudi with Apache License 2.0 | 4 votes |
@Test public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxException, InterruptedException { Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100"); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieDataBlock dataBlock = getDataBlock(records, header); writer = writer.appendBlock(dataBlock); writer.close(); // Append some arbit byte[] to thee end of the log (mimics a partially written commit) fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf()); FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); // Write out a length that does not confirm with the content outputStream.writeLong(474); outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal()); outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION); // Write out a length that does not confirm with the content outputStream.writeLong(400); // Write out incomplete content outputStream.write("something-random".getBytes()); outputStream.flush(); outputStream.close(); // Append a proper block that is of the missing length of the corrupted block writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 10); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = getDataBlock(records, header); writer = writer.appendBlock(dataBlock); writer.close(); // First round of reads - we should be able to read the first block and then EOF Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema()); assertTrue(reader.hasNext(), "First block should be available"); reader.next(); assertTrue(reader.hasNext(), "We should have corrupted block next"); HoodieLogBlock block = reader.next(); assertEquals(HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType(), "The read block should be a corrupt block"); assertTrue(reader.hasNext(), "Third block should be available"); reader.next(); assertFalse(reader.hasNext(), "There should be no more block left"); reader.close(); // Simulate another failure back to back outputStream = fs.append(writer.getLogFile().getPath()); // create a block with outputStream.write(HoodieLogFormat.MAGIC); // Write out a length that does not confirm with the content outputStream.writeLong(1000); outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal()); outputStream.writeInt(HoodieLogFormat.CURRENT_VERSION); // Write out a length that does not confirm with the content outputStream.writeLong(500); // Write out some bytes outputStream.write("something-else-random".getBytes()); outputStream.flush(); outputStream.close(); // Should be able to append a new block writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION) .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 100); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = getDataBlock(records, header); writer = writer.appendBlock(dataBlock); writer.close(); // Second round of reads - we should be able to read the first and last block reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema()); assertTrue(reader.hasNext(), "First block should be available"); reader.next(); assertTrue(reader.hasNext(), "We should get the 1st corrupted block next"); reader.next(); assertTrue(reader.hasNext(), "Third block should be available"); reader.next(); assertTrue(reader.hasNext(), "We should get the 2nd corrupted block next"); block = reader.next(); assertEquals(HoodieLogBlockType.CORRUPT_BLOCK, block.getBlockType(), "The read block should be a corrupt block"); assertTrue(reader.hasNext(), "We should get the last block next"); reader.next(); assertFalse(reader.hasNext(), "We should have no more blocks left"); reader.close(); }
Example 18
Source File: TestRecoveryHdfs.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test public void testTruncatedLog() throws Exception { try { TestInjection.skipIndexWriterCommitOnClose = true; final Semaphore logReplay = new Semaphore(0); final Semaphore logReplayFinish = new Semaphore(0); UpdateLog.testing_logReplayHook = () -> { try { assertTrue(logReplay.tryAcquire(TIMEOUT, TimeUnit.SECONDS)); } catch (Exception e) { throw new RuntimeException(e); } }; UpdateLog.testing_logReplayFinishHook = () -> logReplayFinish.release(); String logDir = h.getCore().getUpdateHandler().getUpdateLog().getLogDir(); clearIndex(); assertU(commit()); assertU(adoc("id","F1")); assertU(adoc("id","F2")); assertU(adoc("id","F3")); h.close(); String[] files = HdfsUpdateLog.getLogList(fs, new Path(logDir)); Arrays.sort(files); FSDataOutputStream dos = fs.append(new Path(logDir, files[files.length-1])); dos.writeLong(0xffffffffffffffffL); dos.writeChars("This should be appended to a good log file, representing a bad partially written record."); dos.close(); logReplay.release(1000); logReplayFinish.drainPermits(); ignoreException("OutOfBoundsException"); // this is what the corrupted log currently produces... subject to change. createCore(); assertTrue(logReplayFinish.tryAcquire(TIMEOUT, TimeUnit.SECONDS)); resetExceptionIgnores(); assertJQ(req("q","*:*") ,"/response/numFound==3"); // // Now test that the bad log file doesn't mess up retrieving latest versions // updateJ(jsonAdd(sdoc("id","F4", "_version_","104")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER)); updateJ(jsonAdd(sdoc("id","F5", "_version_","105")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER)); updateJ(jsonAdd(sdoc("id","F6", "_version_","106")), params(DISTRIB_UPDATE_PARAM,FROM_LEADER)); // This currently skips the bad log file and also returns the version of the clearIndex (del *:*) // assertJQ(req("qt","/get", "getVersions","6"), "/versions==[106,105,104]"); assertJQ(req("qt","/get", "getVersions","3"), "/versions==[106,105,104]"); } finally { UpdateLog.testing_logReplayHook = null; UpdateLog.testing_logReplayFinishHook = null; } }
Example 19
Source File: SSTableIndexIndex.java From hadoop-sstable with Apache License 2.0 | 4 votes |
/** * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks * (splits) based on the configured chunk size. * * @param fileSystem Hadoop file system. * @param sstablePath SSTable Index.db. * @throws IOException */ public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException { final Configuration configuration = fileSystem.getConf(); final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB, HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024; final Closer closer = Closer.create(); final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX); final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX); boolean success = false; try { final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath)); final TLongArrayList splitOffsets = new TLongArrayList(); long currentStart = 0; long currentEnd = 0; final IndexOffsetScanner index = closer.register(new IndexOffsetScanner(sstablePath, fileSystem)); while (index.hasNext()) { // NOTE: This does not give an exact size of this split in bytes but a rough estimate. // This should be good enough since it's only used for sorting splits by size in hadoop land. while (currentEnd - currentStart < splitSize && index.hasNext()) { currentEnd = index.next(); splitOffsets.add(currentEnd); } // Record the split final long[] offsets = splitOffsets.toArray(); os.writeLong(offsets[0]); // Start os.writeLong(offsets[offsets.length - 1]); // End // Clear the offsets splitOffsets.clear(); if (index.hasNext()) { currentStart = index.next(); currentEnd = currentStart; splitOffsets.add(currentStart); } } success = true; } finally { closer.close(); if (!success) { fileSystem.delete(inProgressOutputPath, false); } else { fileSystem.rename(inProgressOutputPath, outputPath); } } }
Example 20
Source File: HadoopIgfs20FileSystemAbstractSelfTest.java From ignite with Apache License 2.0 | 3 votes |
/** @throws Exception If failed. */ @Test public void testAppend() throws Exception { Path fsHome = new Path(primaryFsUri); Path file = new Path(fsHome, "someFile"); int cnt = 1024; FSDataOutputStream out = fs.create(file, EnumSet.noneOf(CreateFlag.class), Options.CreateOpts.perms(FsPermission.getDefault())); for (int i = 0; i < cnt; i++) out.writeLong(i); out.close(); out = fs.create(file, EnumSet.of(CreateFlag.APPEND), Options.CreateOpts.perms(FsPermission.getDefault())); for (int i = cnt; i < cnt * 2; i++) out.writeLong(i); out.close(); FSDataInputStream in = fs.open(file, 1024); for (int i = 0; i < cnt * 2; i++) assertEquals(i, in.readLong()); in.close(); }