Java Code Examples for org.kitesdk.data.DatasetWriter#close()
The following examples show how to use
org.kitesdk.data.DatasetWriter#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFileSystemPartitionView.java From kite with Apache License 2.0 | 6 votes |
private static void writeTestRecords(View<TestRecord> view) { DatasetWriter<TestRecord> writer = null; try { writer = view.newWriter(); for (int i = 0; i < 10; i += 1) { TestRecord record = new TestRecord(); record.id = i; record.data = "test/-" + i; writer.write(record); } } finally { if (writer != null) { writer.close(); } } }
Example 2
Source File: TestReadCustomGeneric.java From kite with Apache License 2.0 | 6 votes |
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(MyRecord.class) .build(), MyRecord.class); DatasetWriter<MyRecord> writer = writerDataset.newWriter(); for (int i = 0; i < totalRecords; i++) { writer.write(new MyRecord(String.valueOf(i), i)); } writer.close(); readerDataset = repo.load("ns", "test", TestGenericRecord.class); }
Example 3
Source File: CreateEvents.java From kite-examples with Apache License 2.0 | 6 votes |
@Override public int run(List<String> args) throws Exception { Preconditions.checkState(!Datasets.exists(uri), "events dataset already exists"); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(StandardEvent.class).build(); View<StandardEvent> events = Datasets.create(uri, descriptor, StandardEvent.class); DatasetWriter<StandardEvent> writer = events.newWriter(); try { while (System.currentTimeMillis() - baseTimestamp < 36000) { writer.write(generateRandomEvent()); } } finally { writer.close(); } System.out.println("Generated " + counter + " events"); return 0; }
Example 4
Source File: TestPartitionReplacement.java From kite with Apache License 2.0 | 6 votes |
private static void writeTestRecords(View<TestRecord> view) { DatasetWriter<TestRecord> writer = null; try { writer = view.newWriter(); for (int i = 0; i < 10; i += 1) { TestRecord record = new TestRecord(); record.id = i; record.data = "test-" + i; writer.write(record); } } finally { if (writer != null) { writer.close(); } } }
Example 5
Source File: TestHiveExternalDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
private void writeRecord(Dataset<GenericRecord> dataset, int partition) { PartitionKey key = new PartitionKey(partition); DatasetWriter<GenericRecord> writer = ((PartitionedDataset<GenericRecord>) dataset).getPartition(key, true).newWriter(); try { GenericRecordBuilder recordBuilder = new GenericRecordBuilder( dataset.getDescriptor().getSchema()) .set("username", partition + "").set("email", partition + "@example.com"); writer.write(recordBuilder.build()); } finally { writer.close(); } }
Example 6
Source File: TestFileSystemUtil.java From kite with Apache License 2.0 | 5 votes |
public void writeUserToView(View<GenericRecord> dataset) { DatasetWriter<GenericRecord> writer = null; try { writer = dataset.newWriter(); writer.write(USER); } finally { if (writer != null) { writer.close(); } } }
Example 7
Source File: TestPartitionedDatasetWriter.java From kite with Apache License 2.0 | 5 votes |
private static <E> void writeToView(View<E> view, E... entities) { DatasetWriter<E> writer = null; try { writer = view.newWriter(); for (E entity : entities) { writer.write(entity); } writer.close(); } finally { if (writer != null) { writer.close(); } } }
Example 8
Source File: PartitionedDatasetWriter.java From kite with Apache License 2.0 | 5 votes |
@Override public void onRemoval( RemovalNotification<StorageKey, DatasetWriter<E>> notification) { DatasetWriter<E> writer = notification.getValue(); LOG.debug("Closing writer:{} for partition:{}", writer, notification.getKey()); writer.close(); }
Example 9
Source File: DaoViewTest.java From kite with Apache License 2.0 | 5 votes |
@Test public void testLimitedWriter() { final View<TestEntity> range = ds .fromAfter(NAMES[0], "1").to(NAMES[0], "5") .fromAfter(NAMES[1], "1").to(NAMES[1], "5"); DatasetWriter<TestEntity> writer = range.newWriter(); try { writer.write(newTestEntity("3", "3")); writer.write(newTestEntity("5", "5")); } finally { writer.close(); } }
Example 10
Source File: TestMapReduce.java From kite with Apache License 2.0 | 5 votes |
private void populateInputDataset() { DatasetWriter<GenericData.Record> writer = inputDataset.newWriter(); writer.write(newStringRecord("apple")); writer.write(newStringRecord("banana")); writer.write(newStringRecord("banana")); writer.write(newStringRecord("carrot")); writer.write(newStringRecord("apple")); writer.write(newStringRecord("apple")); writer.close(); }
Example 11
Source File: CreateUserDatasetGenericPartitioned.java From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Create a partition strategy that hash partitions on username with 10 buckets PartitionStrategy partitionStrategy = new PartitionStrategy.Builder() .identity("favoriteColor", "favorite_color") .build(); // Create a dataset of users with the Avro schema DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .partitionStrategy(partitionStrategy) .build(); Dataset<Record> users = Datasets.create( "dataset:hdfs:/tmp/data/users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
Example 12
Source File: TestCrunchDatasetsHBase.java From kite with Apache License 2.0 | 5 votes |
private void writeRecords(Dataset<GenericRecord> dataset, int count) { DatasetWriter<GenericRecord> writer = dataset.newWriter(); try { for (int i = 0; i < count; ++i) { GenericRecord entity = HBaseDatasetRepositoryTest.createGenericEntity(i); writer.write(entity); } } finally { writer.close(); } }
Example 13
Source File: CreateHiveUserDatasetGeneric.java From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Create a dataset of users with the Avro schema DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .build(); Dataset<Record> users = Datasets.create("dataset:hive?dataset=users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
Example 14
Source File: CreateUserDatasetGenericParquet.java From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .format(Formats.PARQUET) .build(); Dataset<Record> users = Datasets.create( "dataset:hdfs:/tmp/data/users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
Example 15
Source File: CreateUserDatasetGeneric.java From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Create a dataset of users with the Avro schema DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaUri("resource:user.avsc") .build(); Dataset<Record> users = Datasets.create( "dataset:hdfs:/tmp/data/users", descriptor, Record.class); // Get a writer for the dataset and write some users to it DatasetWriter<Record> writer = null; try { writer = users.newWriter(); Random rand = new Random(); GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema()); for (int i = 0; i < 100; i++) { Record record = builder.set("username", "user-" + i) .set("creationDate", System.currentTimeMillis()) .set("favoriteColor", colors[rand.nextInt(colors.length)]).build(); writer.write(record); } } finally { if (writer != null) { writer.close(); } } return 0; }
Example 16
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 4 votes |
@Test public void testUseReaderSchema() throws IOException { // Create a schema with only a username, so we can test reading it // with an enhanced record structure. Schema oldRecordSchema = SchemaBuilder.record("org.kitesdk.data.user.OldUserRecord") .fields() .requiredString("username") .endRecord(); // create the dataset Dataset<Record> in = repo.create("ns", "in", new DatasetDescriptor.Builder() .schema(oldRecordSchema).build()); Dataset<Record> out = repo.create("ns", "out", new DatasetDescriptor.Builder() .schema(oldRecordSchema).build()); Record oldUser = new Record(oldRecordSchema); oldUser.put("username", "user"); DatasetWriter<Record> writer = in.newWriter(); try { writer.write(oldUser); } finally { writer.close(); } Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); // read data from updated dataset that has the new schema. // At this point, User class has the old schema PCollection<NewUserRecord> data = pipeline.read(CrunchDatasets.asSource(in.getUri(), NewUserRecord.class)); PCollection<NewUserRecord> processed = data.parallelDo(new UserRecordIdentityFn(), Avros.records(NewUserRecord.class)); pipeline.write(processed, CrunchDatasets.asTarget(out)); DatasetReader reader = out.newReader(); Assert.assertTrue("Pipeline failed.", pipeline.run().succeeded()); try { // there should be one record that is equal to our old user generic record. Assert.assertEquals(oldUser, reader.next()); Assert.assertFalse(reader.hasNext()); } finally { reader.close(); } }
Example 17
Source File: TestMapReduce.java From kite with Apache License 2.0 | 4 votes |
private void populateOutputDataset() { DatasetWriter<GenericData.Record> writer = outputDataset.newWriter(); writer.write(newStatsRecord(4, "date")); writer.close(); }
Example 18
Source File: TestSpark.java From kite with Apache License 2.0 | 4 votes |
@Test @SuppressWarnings("deprecation") public void testSparkJob() throws Exception { Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder() .property("kite.allow.csv", "true") .schema(TestMapReduce.STRING_SCHEMA) .format(format) .build(), Record.class); DatasetWriter<Record> writer = inputDataset.newWriter(); writer.write(newStringRecord("apple")); writer.write(newStringRecord("banana")); writer.write(newStringRecord("banana")); writer.write(newStringRecord("carrot")); writer.write(newStringRecord("apple")); writer.write(newStringRecord("apple")); writer.close(); Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder() .property("kite.allow.csv", "true") .schema(TestMapReduce.STATS_SCHEMA) .format(format) .build(), Record.class); Job job = Job.getInstance(); DatasetKeyInputFormat.configure(job).readFrom(inputDataset); DatasetKeyOutputFormat.configure(job).writeTo(outputDataset); @SuppressWarnings("unchecked") JavaPairRDD<Record, Void> inputData = SparkTestHelper.getSparkContext() .newAPIHadoopRDD(job.getConfiguration(), DatasetKeyInputFormat.class, Record.class, Void.class); JavaPairRDD<String, Integer> mappedData = inputData.mapToPair(new ToJava()); JavaPairRDD<String, Integer> sums = mappedData.reduceByKey(new Sum()); JavaPairRDD<Record, Void> outputData = sums.mapToPair(new ToAvro()); outputData.saveAsNewAPIHadoopDataset(job.getConfiguration()); DatasetReader<Record> reader = outputDataset.newReader(); Map<String, Integer> counts = new HashMap<String, Integer>(); for (Record record : reader) { counts.put(record.get("name").toString(), (Integer) record.get("count")); } reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
Example 19
Source File: TestS3Dataset.java From kite with Apache License 2.0 | 4 votes |
@Test public void testBasics3n() { // only run this test if credentials are present Assume.assumeTrue(ID != null && !ID.isEmpty()); String uri = "dataset:s3n://" + BUCKET + "/ns/test"; // make sure the dataset doesn't already exist Datasets.delete(uri); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral("\"string\"") .build(); Dataset<String> dataset = Datasets.create(uri, descriptor, String.class); List<String> expected = Lists.newArrayList("a", "b", "time"); DatasetWriter<String> writer = null; try { writer = dataset.newWriter(); for (String s : expected) { writer.write(s); } } finally { if (writer != null) { writer.close(); } } DatasetReader<String> reader = null; try { reader = dataset.newReader(); Assert.assertEquals("Should match written strings", expected, Lists.newArrayList((Iterator<String>) reader)); } finally { if (reader != null) { reader.close(); } } // clean up Datasets.delete(uri); }
Example 20
Source File: TestHiveExternalDatasetRepository.java From kite with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") @Test public void testDeletedPartitionRemovedFromHive() throws Exception { final String NAME2 = "test2"; // use a multi-item partition strategy to ensure the system // can convert it to the corresponding Hive partition PartitionStrategy partitionStrategy = new PartitionStrategy.Builder() .identity("username") .identity("email").build(); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(testSchema) .partitionStrategy(partitionStrategy) .build(); Dataset<GenericRecord> dataset = repo.create(NAMESPACE, NAME2, descriptor); HiveTestUtils.assertTableExists(client, NAMESPACE, NAME2); HiveTestUtils.assertTableIsExternal(client, NAMESPACE, NAME2); Assert.assertTrue("No partitions yet", client.listPartitionNames(NAMESPACE, NAME2, (short) 10).isEmpty()); GenericData.Record record1 = new GenericRecordBuilder( dataset.getDescriptor().getSchema()) .set("username", "0").set("email", "0").build(); GenericData.Record record2 = new GenericRecordBuilder( dataset.getDescriptor().getSchema()) .set("username", "1").set("email", "1").build(); DatasetWriter<GenericRecord> writer = dataset.newWriter(); try { writer.write(record1); writer.write(record2); } finally { writer.close(); } Assert.assertEquals("Should be two partitions", 2, client.listPartitionNames(NAMESPACE, NAME2, (short) 10).size()); RefinableView view = dataset.with("username", "0").with("email", "0"); view.deleteAll(); Assert.assertEquals("Should be one partition", 1, client.listPartitionNames(NAMESPACE, NAME2, (short) 10).size()); view = dataset.with("username", "1").with("email", "1"); view.deleteAll(); Assert.assertEquals("Should be no partitions", 0, client.listPartitionNames(NAMESPACE, NAME2, (short) 10).size()); }