Java Code Examples for org.apache.avro.file.DataFileReader#close()
The following examples show how to use
org.apache.avro.file.DataFileReader#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AvroRecordWriterTest.java From data-highway with Apache License 2.0 | 6 votes |
@Test public void typical() throws Exception { Schema schema = SchemaBuilder .builder() .record("record") .fields() .requiredLong("id") .requiredString("name") .endRecord(); Record value = new GenericRecordBuilder(schema).set("id", 1L).set("name", "hello").build(); ByteArrayOutputStream output = new ByteArrayOutputStream(); Factory factory = new Factory(CodecFactory.nullCodec()); RecordWriter writer = factory.create(schema, output); writer.write(value); writer.close(); SeekableInput input = new SeekableByteArrayInput(output.toByteArray()); DatumReader<Record> datumReader = new GenericDatumReader<>(schema); DataFileReader<Record> dataFileReader = new DataFileReader<>(input, datumReader); assertThat(dataFileReader.next(), is(value)); assertThat(dataFileReader.hasNext(), is(false)); dataFileReader.close(); }
Example 2
Source File: BinaryAvroSchemaFileReader.java From pxf with Apache License 2.0 | 6 votes |
@Override public Schema readSchema(Configuration configuration, String schemaName, HcfsType hcfsType, AvroUtilities.FileSearcher fileSearcher) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); DataFileReader<GenericRecord> fileReader = null; try { File file = fileSearcher.searchForFile(schemaName); if (file == null) { final Path path = new Path(hcfsType.getDataUri(configuration, schemaName)); FsInput inStream = new FsInput(path, configuration); fileReader = new DataFileReader<>(inStream, datumReader); } else { fileReader = new DataFileReader<>(file, datumReader); } return fileReader.getSchema(); } finally { if (fileReader != null) { fileReader.close(); } } }
Example 3
Source File: Purge.java From Cubert with Apache License 2.0 | 6 votes |
private void loadMembersToPurge(String filename) throws IOException { // TODO: "memberId" column name should be configurable DataFileReader<GenericRecord> dataFileReader = createDataFileReader(filename, true); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); Integer memberId = (Integer) record.get("memberId"); if (memberId == null) { throw new NullPointerException("memberId is null"); } membersToPurge.add(((Number) record.get("memberId")).intValue()); } dataFileReader.close(); }
Example 4
Source File: AvroToRestJsonEntryConverterTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException { Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc")); GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); File tmp = File.createTempFile(this.getClass().getSimpleName(), null); tmp.deleteOnExit(); try { FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader); GenericRecord avroRecord = dataFileReader.next(); AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter(); RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next(); Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath()); JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false); converter.close(); dataFileReader.close(); } finally { if (tmp != null) { tmp.delete(); } } }
Example 5
Source File: TestFlumeEventAvroEventSerializer.java From mt-flume with Apache License 2.0 | 6 votes |
public void validateAvroFile(File file) throws IOException { // read the events back using GenericRecord DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); ByteBuffer body = (ByteBuffer) record.get("body"); CharsetDecoder decoder = Charsets.UTF_8.newDecoder(); String bodyStr = decoder.decode(body).toString(); System.out.println(bodyStr); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 3, numEvents); }
Example 6
Source File: TestAvroEventSerializer.java From mt-flume with Apache License 2.0 | 6 votes |
public void validateAvroFile(File file) throws IOException { // read the events back using GenericRecord DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(file, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); String bodyStr = record.get("message").toString(); System.out.println(bodyStr); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 3, numEvents); }
Example 7
Source File: SinkAvroTest.java From gcp-ingestion with Mozilla Public License 2.0 | 5 votes |
/** * Test for a single doctype being written out to the correct location. */ @Test public void testSingleDocumentType() throws IOException, SchemaNotFoundException { String input = Resources.getResource("testdata/avro-message-single-doctype.ndjson").getPath(); String schemas = Resources.getResource("avro/test-schema.tar.gz").getPath(); String output = outputPath + "/${document_namespace:-NONE}.${document_type:-NONE}.${document_version:-0}"; Sink.main(new String[] { "--inputFileFormat=json", "--inputType=file", "--input=" + input, "--outputType=avro", "--output=" + output, "--outputFileCompression=UNCOMPRESSED", "--schemasLocation=" + schemas, "--errorOutputFileCompression=UNCOMPRESSED", "--errorOutputType=stdout" }); assertThat("output count", getPrefixFileCount(outputPath, "namespace_0"), Matchers.greaterThan(0L)); AvroSchemaStore store = AvroSchemaStore.of(schemas, null); List<Path> paths = Files.walk(Paths.get(outputPath)).filter(Files::isRegularFile) .collect(Collectors.toList()); List<Integer> results = new ArrayList<>(); for (Path path : paths) { Schema schema = store.getSchema("namespace_0/foo/foo.1.avro.json"); DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> fileReader = new DataFileReader<>(path.toFile(), reader); while (fileReader.hasNext()) { GenericRecord record = fileReader.next(); results.add((Integer) record.get("test_int")); } fileReader.close(); } results.sort(null); assertEquals(results, Arrays.asList(1, 2, 3)); }
Example 8
Source File: JdbcAvroJobTest.java From dbeam with Apache License 2.0 | 5 votes |
private List<GenericRecord> readAvroRecords(File avroFile, Schema schema) throws IOException { GenericDatumReader<GenericRecord> datum = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(avroFile, datum); List<GenericRecord> records = StreamSupport.stream(dataFileReader.spliterator(), false).collect(Collectors.toList()); dataFileReader.close(); return records; }
Example 9
Source File: Purge.java From Cubert with Apache License 2.0 | 5 votes |
private void purge(String src, String dst) throws IOException { DataFileReader<GenericRecord> dataFileReader = createDataFileReader(src, false); DataFileWriter<GenericRecord> writer = createDataFileWriter(dataFileReader); numRecords = 0; recordsPurged = 0; remainingRecords = 0; // Copy while (dataFileReader.hasNext()) { numRecords++; GenericRecord record = dataFileReader.next(); if (record == null) { continue; } Number column = (Number) record.get(columnName); if ((column == null) || (!membersToPurge.contains(column.intValue()))) { remainingRecords++; writer.append(record); } } recordsPurged = numRecords - remainingRecords; writer.close(); dataFileReader.close(); }
Example 10
Source File: GenerateDictionary.java From Cubert with Apache License 2.0 | 4 votes |
public static Map<String, CodeDictionary> loadDictionary(String path, boolean isHDFS, Configuration conf) throws IOException { Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>(); Schema schema = getSchema(); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); DataFileReader<GenericRecord> dataFileReader; if (isHDFS) { dataFileReader = new DataFileReader<GenericRecord>(new FsInput(new Path(path), conf), datumReader); } else { dataFileReader = new DataFileReader<GenericRecord>(new File(path), datumReader); } GenericRecord record = null; while (dataFileReader.hasNext()) { record = dataFileReader.next(); String colName = record.get("colname").toString(); String colValue = record.get("colvalue").toString(); int code = (Integer) record.get("code"); CodeDictionary dict = dictionaries.get(colName); if (dict == null) { dict = new CodeDictionary(); dictionaries.put(colName, dict); } dict.addKeyCode(colValue, code); } dataFileReader.close(); return dictionaries; }
Example 11
Source File: AvroHdfsDataWriterTest.java From incubator-gobblin with Apache License 2.0 | 4 votes |
@Test public void testWrite() throws IOException { // Write all test records for (String record : TestConstants.JSON_RECORDS) { this.writer.write(convertRecord(record)); } Assert.assertEquals(this.writer.recordsWritten(), 3); this.writer.close(); this.writer.commit(); File outputFile = new File(TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath, TestConstants.TEST_FILE_NAME); DataFileReader<GenericRecord> reader = new DataFileReader<>(outputFile, new GenericDatumReader<GenericRecord>()); Schema fileSchema = reader.getSchema(); Assert.assertEquals(fileSchema.getProp(TEST_PROPERTY_KEY), TEST_PROPERTY_VALUE); // Read the records back and assert they are identical to the ones written GenericRecord user1 = reader.next(); // Strings are in UTF8, so we have to call toString() here and below Assert.assertEquals(user1.get("name").toString(), "Alyssa"); Assert.assertEquals(user1.get("favorite_number"), 256); Assert.assertEquals(user1.get("favorite_color").toString(), "yellow"); GenericRecord user2 = reader.next(); Assert.assertEquals(user2.get("name").toString(), "Ben"); Assert.assertEquals(user2.get("favorite_number"), 7); Assert.assertEquals(user2.get("favorite_color").toString(), "red"); GenericRecord user3 = reader.next(); Assert.assertEquals(user3.get("name").toString(), "Charlie"); Assert.assertEquals(user3.get("favorite_number"), 68); Assert.assertEquals(user3.get("favorite_color").toString(), "blue"); reader.close(); FsWriterMetrics metrics = FsWriterMetrics.fromJson(properties.getProp(FsDataWriter.FS_WRITER_METRICS_KEY)); Assert.assertEquals(metrics.fileInfos.size(),1); FsWriterMetrics.FileInfo fileInfo = metrics.fileInfos.iterator().next(); Assert.assertEquals(fileInfo.fileName, TestConstants.TEST_FILE_NAME); Assert.assertEquals(fileInfo.numRecords, 3); Assert.assertNull(metrics.partitionInfo.partitionKey); Assert.assertEquals(metrics.partitionInfo.branchId, 0); }
Example 12
Source File: TestSyslogAvroEventSerializer.java From mt-flume with Apache License 2.0 | 4 votes |
@Test public void test() throws FileNotFoundException, IOException { // Snappy currently broken on Mac in OpenJDK 7 per FLUME-2012 Assume.assumeTrue(!"Mac OS X".equals(System.getProperty("os.name")) || !System.getProperty("java.version").startsWith("1.7.")); //Schema schema = new Schema.Parser().parse(schemaFile); // create the file, write some data OutputStream out = new FileOutputStream(testFile); String builderName = SyslogAvroEventSerializer.Builder.class.getName(); Context ctx = new Context(); ctx.put("syncInterval", "4096"); ctx.put("compressionCodec", "snappy"); EventSerializer serializer = EventSerializerFactory.getInstance(builderName, ctx, out); serializer.afterCreate(); // must call this when a file is newly created List<Event> events = generateSyslogEvents(); for (Event e : events) { serializer.write(e); } serializer.flush(); serializer.beforeClose(); out.flush(); out.close(); // now try to read the file back DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(testFile, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); int facility = (Integer) record.get("facility"); int severity = (Integer) record.get("severity"); long timestamp = (Long) record.get("timestamp"); String hostname = record.get("hostname").toString(); String message = record.get("message").toString(); Assert.assertEquals("Facility should be 1", 1, facility); System.out.println(timestamp + ": " + message); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 3, numEvents); FileUtils.forceDelete(testFile); }
Example 13
Source File: TestJavaAvroEventSerializer.java From flume-plugins with MIT License | 4 votes |
@Test public void test() throws FileNotFoundException, IOException { // create the file, write some data OutputStream out = new FileOutputStream(testFile); String builderName = JavaLogAvroEventSerializer.Builder.class.getName(); Context ctx = new Context(); ctx.put("syncInterval", "4096"); EventSerializer serializer = EventSerializerFactory.getInstance(builderName, ctx, out); serializer.afterCreate(); // must call this when a file is newly created List<Event> events = generateJavaEvents(); for (Event e : events) { serializer.write(e); } serializer.flush(); serializer.beforeClose(); out.flush(); out.close(); // now try to read the file back DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(testFile, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); long timestamp = (Long) record.get("timestamp"); String datetime = record.get("datetime").toString(); String classname = record.get("classname").toString(); String message = record.get("message").toString(); System.out.println(classname + ": " + message + " (at " + datetime + ")"); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 4 events", 4, numEvents); FileUtils.forceDelete(testFile); }
Example 14
Source File: TestSyslogAvroEventSerializer.java From flume-plugins with MIT License | 4 votes |
@Test public void test() throws FileNotFoundException, IOException { // create the file, write some data OutputStream out = new FileOutputStream(testFile); String builderName = SyslogAvroEventSerializer.Builder.class.getName(); Context ctx = new Context(); ctx.put("syncInterval", "4096"); ctx.put("path", "src/test/resources/customerToHostsFile.txt"); EventSerializer serializer = EventSerializerFactory.getInstance(builderName, ctx, out); serializer.afterCreate(); // must call this when a file is newly created List<Event> events = generateSyslogEvents(); for (Event e : events) { serializer.write(e); } serializer.flush(); serializer.beforeClose(); out.flush(); out.close(); // now try to read the file back DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(testFile, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); long timestamp = (Long) record.get("timestamp"); String datetime = record.get("datetime").toString(); String hostname = record.get("hostname").toString(); Map<String, String> headers = (Map<String, String>) record.get("headers"); String message = record.get("message").toString(); System.out.println(hostname + " (" + headers + ")" + ": " + message); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 6 events", 6, numEvents); FileUtils.forceDelete(testFile); }
Example 15
Source File: TestApacheAvroEventSerializer.java From flume-plugins with MIT License | 4 votes |
@Test public void test() throws FileNotFoundException, IOException { // create the file, write some data OutputStream out = new FileOutputStream(testFile); String builderName = ApacheLogAvroEventSerializer.Builder.class.getName(); Context ctx = new Context(); ctx.put("syncInterval", "4096"); EventSerializer serializer = EventSerializerFactory.getInstance(builderName, ctx, out); serializer.afterCreate(); // must call this when a file is newly created List<Event> events = generateApacheEvents(); for (Event e : events) { serializer.write(e); } serializer.flush(); serializer.beforeClose(); out.flush(); out.close(); // now try to read the file back DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); DataFileReader<GenericRecord> fileReader = new DataFileReader<GenericRecord>(testFile, reader); GenericRecord record = new GenericData.Record(fileReader.getSchema()); int numEvents = 0; while (fileReader.hasNext()) { fileReader.next(record); String ip = record.get("ip").toString(); String uri = record.get("uri").toString(); Integer statuscode = (Integer) record.get("statuscode"); String original = record.get("original").toString(); String connectionstatus = record.get("connectionstatus").toString(); Assert.assertEquals("Ip should be 80.79.194.3", "80.79.194.3", ip); System.out.println("IP " + ip + " requested: " + uri + " with status code " + statuscode + " and connectionstatus: " + connectionstatus); System.out.println("Original logline: " + original); numEvents++; } fileReader.close(); Assert.assertEquals("Should have found a total of 3 events", 2, numEvents); FileUtils.forceDelete(testFile); }