Java Code Examples for org.kitesdk.data.DatasetWriter#write()
The following examples show how to use
org.kitesdk.data.DatasetWriter#write() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestProjection.java From kite with Apache License 2.0 | 6 votes |
@Test public void testSpecificProjectionLoad() throws IOException { DatasetWriter<StandardEvent> writer = null; try { writer = unbounded.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } Dataset<SmallEvent> dataset = repo.load( "ns", unbounded.getDataset().getName(), SmallEvent.class); Set<SmallEvent> expected = Sets.newHashSet(toSmallEvent(sepEvent), toSmallEvent(octEvent), toSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
Example 2
Source File: DatasetTestUtilities.java From kite with Apache License 2.0 | 6 votes |
public static void writeTestUsers(View<GenericData.Record> view, int count, int start, String... fields) { DatasetWriter<GenericData.Record> writer = null; try { writer = view.newWriter(); for (int i = start; i < count + start; i++) { GenericRecordBuilder recordBuilder = new GenericRecordBuilder(view.getDataset().getDescriptor ().getSchema()).set("username", "test-" + i); for (String field : fields) { recordBuilder.set(field, field + "-" + i); } writer.write(recordBuilder.build()); } if (writer instanceof Flushable) { ((Flushable) writer).flush(); } } finally { if (writer != null) { writer.close(); } } }
Example 3
Source File: TestPartitionReplacement.java From kite with Apache License 2.0 | 6 votes |
private static void writeTestRecords(View<TestRecord> view) { DatasetWriter<TestRecord> writer = null; try { writer = view.newWriter(); for (int i = 0; i < 10; i += 1) { TestRecord record = new TestRecord(); record.id = i; record.data = "test-" + i; writer.write(record); } } finally { if (writer != null) { writer.close(); } } }
Example 4
Source File: TestProjection.java From kite with Apache License 2.0 | 6 votes |
@Test public void testMixedProjection() throws IOException { Dataset<StandardEvent> original = repo.create("ns", "mixedProjection", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .build(), StandardEvent.class); DatasetWriter<StandardEvent> writer = null; try { writer = original.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } Dataset<ReflectSmallEvent> dataset = repo.load("ns", original.getName(), ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
Example 5
Source File: PartitionedDatasetWriter.java From kite with Apache License 2.0 | 6 votes |
@Override public void write(E entity) { Preconditions.checkState(state.equals(ReaderWriterState.OPEN), "Attempt to write to a writer in state:%s", state); accessor.keyFor(entity, provided, reusedKey); DatasetWriter<E> writer = cachedWriters.getIfPresent(reusedKey); if (writer == null) { // avoid checking in every whether the entity belongs in the view by only // checking when a new writer is created Preconditions.checkArgument(view.includes(entity), "View %s does not include entity %s", view, entity); // get a new key because it is stored in the cache StorageKey key = StorageKey.copy(reusedKey); try { writer = cachedWriters.getUnchecked(key); } catch (UncheckedExecutionException ex) { throw new IllegalArgumentException( "Problem creating view for entity: " + entity, ex.getCause()); } } writer.write(entity); }
Example 6
Source File: TestProjection.java From kite with Apache License 2.0 | 5 votes |
@Test public void testReflectProjectionAsType() throws IOException { Dataset<StandardEvent> original = repo.create( "ns", "reflectProjection", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .build(), StandardEvent.class); DatasetWriter<ReflectStandardEvent> writer = null; try { writer = original.asType(ReflectStandardEvent.class).newWriter(); writer.write(new ReflectStandardEvent(sepEvent)); writer.write(new ReflectStandardEvent(octEvent)); writer.write(new ReflectStandardEvent(novEvent)); } finally { Closeables.close(writer, false); } final View<ReflectSmallEvent> smallEvents = original.asType(ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, smallEvents); TestHelpers.assertThrows("Should not be able to write small events", IncompatibleSchemaException.class, new Runnable() { @Override public void run() { smallEvents.newWriter(); } }); }
Example 7
Source File: TestFileSystemUtil.java From kite with Apache License 2.0 | 5 votes |
public void writeUserToView(View<GenericRecord> dataset) { DatasetWriter<GenericRecord> writer = null; try { writer = dataset.newWriter(); writer.write(USER); } finally { if (writer != null) { writer.close(); } } }
Example 8
Source File: TestProjection.java From kite with Apache License 2.0 | 5 votes |
@Test public void testIncompatibleProjection() throws IOException { DatasetWriter<StandardEvent> writer = null; try { writer = unbounded.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } TestHelpers.assertThrows( "Should not load a dataset with an incompatible class", IncompatibleSchemaException.class, new Runnable() { @Override public void run() { repo.load("ns", unbounded.getDataset().getName(), IncompatibleEvent.class); } }); TestHelpers.assertThrows("Should reject a schema that can't read or write", IncompatibleSchemaException.class, new Runnable() { @Override public void run() { unbounded.asType(IncompatibleEvent.class); } }); TestHelpers.assertThrows("Should reject a schema that can't read or write", IncompatibleSchemaException.class, new Runnable() { @Override public void run() { unbounded.getDataset().asType(IncompatibleEvent.class); } }); }
Example 9
Source File: TestFileSystemView.java From kite with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("unchecked") public void testUnboundedMoveToTrash() throws Exception { // NOTE: this is an un-restricted write so all should succeed DatasetWriter<StandardEvent> writer = null; try { writer = unbounded.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } final Path root = new Path("target/data/ns/test"); final Path y2013 = new Path("target/data/ns/test/year=2013"); final Path sep = new Path("target/data/ns/test/year=2013/month=09"); final Path sep12 = new Path("target/data/ns/test/year=2013/month=09/day=12"); final Path oct = new Path("target/data/ns/test/year=2013/month=10"); final Path oct12 = new Path("target/data/ns/test/year=2013/month=10/day=12"); final Path nov = new Path("target/data/ns/test/year=2013/month=11"); final Path nov11 = new Path("target/data/ns/test/year=2013/month=11/day=11"); assertDirectoriesExist(fs, root, y2013, sep, sep12, oct, oct12, nov, nov11); Assert.assertTrue("Delete should return true to indicate data was deleted.", unbounded.moveToTrash()); assertDirectoriesDoNotExist(fs, y2013, sep12, sep, oct12, oct, nov11, nov); assertDirectoriesExist(fs, root); }
Example 10
Source File: TestFileSystemView.java From kite with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("unchecked") public void testUnboundedDelete() throws Exception { // NOTE: this is an un-restricted write so all should succeed DatasetWriter<StandardEvent> writer = null; try { writer = unbounded.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } final Path root = new Path("target/data/ns/test"); final Path y2013 = new Path("target/data/ns/test/year=2013"); final Path sep = new Path("target/data/ns/test/year=2013/month=09"); final Path sep12 = new Path("target/data/ns/test/year=2013/month=09/day=12"); final Path oct = new Path("target/data/ns/test/year=2013/month=10"); final Path oct12 = new Path("target/data/ns/test/year=2013/month=10/day=12"); final Path nov = new Path("target/data/ns/test/year=2013/month=11"); final Path nov11 = new Path("target/data/ns/test/year=2013/month=11/day=11"); assertDirectoriesExist(fs, root, y2013, sep, sep12, oct, oct12, nov, nov11); Assert.assertTrue("Delete should return true to indicate data was deleted.", unbounded.deleteAll()); assertDirectoriesDoNotExist(fs, y2013, sep12, sep, oct12, oct, nov11, nov); assertDirectoriesExist(fs, root); }
Example 11
Source File: TestProjection.java From kite with Apache License 2.0 | 5 votes |
@Test public void testReflectProjectionLoad() throws IOException { Dataset<ReflectStandardEvent> original = repo.create( "ns", "reflectProjection", new DatasetDescriptor.Builder() .schema(ReflectStandardEvent.class) .build(), ReflectStandardEvent.class); DatasetWriter<ReflectStandardEvent> writer = null; try { writer = original.newWriter(); writer.write(new ReflectStandardEvent(sepEvent)); writer.write(new ReflectStandardEvent(octEvent)); writer.write(new ReflectStandardEvent(novEvent)); } finally { Closeables.close(writer, false); } View<ReflectSmallEvent> dataset = repo.load("ns", original.getName(), ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
Example 12
Source File: TestCrunchDatasetsHBase.java From kite with Apache License 2.0 | 5 votes |
private void writeRecords(Dataset<GenericRecord> dataset, int count) { DatasetWriter<GenericRecord> writer = dataset.newWriter(); try { for (int i = 0; i < count; ++i) { GenericRecord entity = HBaseDatasetRepositoryTest.createGenericEntity(i); writer.write(entity); } } finally { writer.close(); } }
Example 13
Source File: TestPartitionedDatasetWriter.java From kite with Apache License 2.0 | 5 votes |
private static <E> void writeToView(View<E> view, E... entities) { DatasetWriter<E> writer = null; try { writer = view.newWriter(); for (E entity : entities) { writer.write(entity); } writer.close(); } finally { if (writer != null) { writer.close(); } } }
Example 14
Source File: DaoViewTest.java From kite with Apache License 2.0 | 5 votes |
@Test public void testLimitedWriter() { final View<TestEntity> range = ds .fromAfter(NAMES[0], "1").to(NAMES[0], "5") .fromAfter(NAMES[1], "1").to(NAMES[1], "5"); DatasetWriter<TestEntity> writer = range.newWriter(); try { writer.write(newTestEntity("3", "3")); writer.write(newTestEntity("5", "5")); } finally { writer.close(); } }
Example 15
Source File: TestSimpleView.java From kite with Apache License 2.0 | 5 votes |
@Test public void testRefineIdentity() throws Exception { PartitionStrategy strategy = new PartitionStrategy.Builder() .identity("user_id") .build(); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:standard_event.avsc") .partitionStrategy(strategy) .build(); // Create a separate dataset to avoid conflicts with the above. Dataset<StandardEvent> identityDataset = repo.create( "ns", "test_identity", descriptor); DatasetWriter<StandardEvent> writer = null; try { writer = identityDataset.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } assertContentEquals(Sets.newHashSet(sepEvent, novEvent), identityDataset.with("user_id", 0L)); }
Example 16
Source File: CreateProductDatasetPojo.java From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Create a dataset of products with the Avro schema DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(Product.class) .build(); Dataset<Product> products = Datasets.create( "dataset:hdfs:/tmp/data/products", descriptor, Product.class); // Get a writer for the dataset and write some products to it DatasetWriter<Product> writer = null; try { writer = products.newWriter(); int i = 0; for (String name : names) { Product product = new Product(); product.setName(name); product.setId(i++); writer.write(product); } } finally { if (writer != null) { writer.close(); } } return 0; }
Example 17
Source File: CreateUserDatasetGenericParquet.java From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .format(Formats.PARQUET) .build(); Dataset<Record> users = Datasets.create( "dataset:hdfs:/tmp/data/users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
Example 18
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 4 votes |
@Test public void testUseReaderSchemaParquet() throws IOException { // Create a schema with only a username, so we can test reading it // with an enhanced record structure. Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord") .fields() .requiredString("username") .endRecord(); // create the dataset Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder() .format(Formats.PARQUET).schema(oldRecordSchema).build()); Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder() .format(Formats.PARQUET).schema(oldRecordSchema).build()); Record oldUser = new Record(oldRecordSchema); oldUser.put("username", "user"); DatasetWriter<Record> writer = in.newWriter(); try { writer.write(oldUser); } finally { writer.close(); } Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); // read data from updated dataset that has the new schema. // At this point, User class has the old schema PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class)); PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class)); pipeline.write(processed, CrunchDatasets.asTarget(out)); DatasetReader reader = out.newReader(); Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded()); try { // there should be one record that is equal to our old user generic record. Assert.assertEquals(oldUser, reader.next()); Assert.assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 19
Source File: TestMapReduce.java From kite with Apache License 2.0 | 4 votes |
private void populateOutputDataset() { DatasetWriter<GenericData.Record> writer = outputDataset.newWriter(); writer.write(newStatsRecord(4, "date")); writer.close(); }
Example 20
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 4 votes |
@Test public void testUseReaderSchema() throws IOException { // Create a schema with only a username, so we can test reading it // with an enhanced record structure. Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord") .fields() .requiredString("username") .endRecord(); // create the dataset Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder() .schema(oldRecordSchema).build()); Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder() .schema(oldRecordSchema).build()); Record oldUser = new Record(oldRecordSchema); oldUser.put("username", "user"); DatasetWriter<Record> writer = in.newWriter(); try { writer.write(oldUser); } finally { writer.close(); } Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); // read data from updated dataset that has the new schema. // At this point, User class has the old schema PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class)); PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class)); pipeline.write(processed, CrunchDatasets.asTarget(out)); DatasetReader reader = out.newReader(); Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded()); try { // there should be one record that is equal to our old user generic record. Assert.assertEquals(oldUser, reader.next()); Assert.assertFalse(reader.hasNext()); } finally { reader.close(); } }