Example 1
Source File: From kite with Apache License 2.0 | 6 votes |
@Test public void testDatasetNotPartitioned() { Datasets.delete("dataset:file:/tmp/datasets/ns/test"); final Dataset<GenericRecord> ds = Datasets.create( "dataset:file:/tmp/datasets/ns/test", new DatasetDescriptor.Builder() .schema(schema) .build()); Assert.assertEquals("Should work for empty relative directory", ds, FileSystemDatasets.viewForUri(ds, "file:/tmp/datasets/ns/test")); TestHelpers.assertThrows("Should reject paths in a non-partitioned dataset", IllegalArgumentException.class, new Runnable() { @Override public void run() { FileSystemDatasets.viewForUri(ds, "y=2014/m=03/d=14"); } }); }
Example 2
Source File: From kite with Apache License 2.0 | 6 votes |
@Before public void createTestDatasets() { Datasets.delete("dataset:file:/tmp/datasets/unpartitioned"); Datasets.delete("dataset:file:/tmp/datasets/partitioned"); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(TestRecord.class) .build(); unpartitioned = Datasets.create("dataset:file:/tmp/datasets/unpartitioned", descriptor, TestRecord.class); descriptor = new DatasetDescriptor.Builder(descriptor) .partitionStrategy(new PartitionStrategy.Builder() .hash("id", 4) .build()) .build(); partitioned = Datasets.create("dataset:file:/tmp/datasets/partitioned", descriptor, TestRecord.class); writeTestRecords(unpartitioned); writeTestRecords(partitioned); }
Example 3
Source File: From kite with Apache License 2.0 | 6 votes |
@Test public void testCreateWithRepositoryURI() throws Exception { try { CreateDatasetMojo mojo = new CreateDatasetMojo(); mojo.hadoopConfiguration = dfsProps; mojo.avroSchemaFile = "schema/user.avsc"; mojo.repositoryUri = "repo:hdfs:/tmp/data"; mojo.datasetNamespace = "ns"; mojo.datasetName = "users"; mojo.execute(); Assert.assertTrue("Dataset should exist", Datasets.exists(DATASET_URI)); } finally { Datasets.delete(DATASET_URI); } }
Example 4
Source File: From localization_nifi with Apache License 2.0 | 5 votes |
@Test public void testBasicStoreToHive() throws IOException { String datasetUri = "dataset:hive:ns/test"; Dataset<Record> dataset = Datasets.create(datasetUri, descriptor, Record.class); TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class); runner.assertNotValid(); runner.setProperty(StoreInKiteDataset.KITE_DATASET_URI, datasetUri); runner.assertValid(); List<Record> users = Lists.newArrayList( user("a", ""), user("b", ""), user("c", "") ); runner.enqueue(streamFor(users));; runner.assertAllFlowFilesTransferred("success", 1); List<Record> stored = Lists.newArrayList( (Iterable<Record>) dataset.newReader()); Assert.assertEquals("Records should match", users, stored); Datasets.delete(datasetUri); }
Example 5
Source File: From sqoop-on-spark with Apache License 2.0 | 5 votes |
/** * Merges a dataset into this. */ public void mergeDataset(String uri) { FileSystemDataset<GenericRecord> update = Datasets.load(uri); if (dataset instanceof FileSystemDataset) { ((FileSystemDataset<GenericRecord>) dataset).merge(update); // And let's completely drop the temporary dataset Datasets.delete(uri); } else { throw new SqoopException( KiteConnectorError.GENERIC_KITE_CONNECTOR_0000, uri); } }
Example 6
Source File: From nifi with Apache License 2.0 | 5 votes |
@Test public void testBasicStoreToHive() throws IOException { String datasetUri = "dataset:hive:ns/test"; Dataset<Record> dataset = Datasets.create(datasetUri, descriptor, Record.class); TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class); runner.assertNotValid(); runner.setProperty(StoreInKiteDataset.KITE_DATASET_URI, datasetUri); runner.assertValid(); List<Record> users = Lists.newArrayList( user("a", ""), user("b", ""), user("c", "") ); runner.enqueue(streamFor(users));; runner.assertAllFlowFilesTransferred("success", 1); List<Record> stored = Lists.newArrayList( (Iterable<Record>) dataset.newReader()); Assert.assertEquals("Records should match", users, stored); Datasets.delete(datasetUri); }
Example 7
Source File: From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Delete the users dataset boolean success = Datasets.delete("dataset:hdfs:/tmp/data/users"); return success ? 0 : 1; }
Example 8
Source File: From kite with Apache License 2.0 | 5 votes |
@BeforeClass public static void createTestDataset() { Datasets.delete("dataset:file:/tmp/test_name"); test = Datasets.create("dataset:file:/tmp/test_name", new DatasetDescriptor.Builder() .schema(SCHEMA) .partitionStrategy(STRATEGY) .build()); }
Example 9
Source File: From kite with Apache License 2.0 | 5 votes |
@Before public void createFileSystemDataset() { String uri = "dataset:file:/tmp/datasets/ns/test"; DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(schema) .partitionStrategy(ymd) .build(); Datasets.delete(uri); this.dataset = Datasets.create(uri, descriptor); }
Example 10
Source File: From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Drop the events dataset boolean success = Datasets.delete("dataset:hive:/tmp/data/default/events"); return success ? 0 : 1; }
Example 11
Source File: From kite-examples with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { // Drop the events dataset boolean success = Datasets.delete("dataset:hive:/tmp/data/default/events"); return success ? 0 : 1; }
Example 12
Source File: From kite with Apache License 2.0 | 5 votes |
@Override public void execute() throws MojoExecutionException, MojoFailureException { getConf(); // ensure properties are added to DefaultConfig if (uri != null) { Datasets.delete(uri); } else { LOG.warn( "kite.datasetName is deprecated, instead use kite.uri=<dataset-uri>"); Preconditions.checkArgument(datasetName != null, "kite.datasetName is required if kite.uri is not used"); DatasetRepository repo = getDatasetRepository(); repo.delete(datasetNamespace, datasetName); } }
Example 13
Source File: From kite with Apache License 2.0 | 4 votes |
@Test public void testEscapedURIs() { Datasets.delete("dataset:file:/tmp/datasets/string_partitioned"); // build a new dataset with a string partition field DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .partitionStrategy(new PartitionStrategy.Builder() .identity("data", "d_copy") .build()) .schema(TestRecord.class) .build(); FileSystemDataset<TestRecord> d = Datasets.create( "dataset:file:/tmp/datasets/string_partitioned", descriptor, TestRecord.class); writeTestRecords(d); FileSystemPartitionView<TestRecord> partition = d.getPartitionView( URI.create("file:/tmp/datasets/string_partitioned/d_copy=test%2F-0")); Assert.assertEquals("Should accept escaped full URI", URI.create("file:/tmp/datasets/string_partitioned/d_copy=test%2F-0"), partition.getLocation()); Assert.assertEquals("Should should have correctly escaped relative URI", URI.create("d_copy=test%2F-0"), partition.getRelativeLocation()); Assert.assertEquals("Should have correctly escaped constraints", d.unbounded.getConstraints().with("d_copy", "test/-0"), partition.getConstraints()); partition = d.getPartitionView( new Path("file:/tmp/datasets/string_partitioned/d_copy=test%2F-0")); Assert.assertEquals("Should accept escaped full URI", URI.create("file:/tmp/datasets/string_partitioned/d_copy=test%2F-0"), partition.getLocation()); Assert.assertEquals("Should should have correctly escaped relative URI", URI.create("d_copy=test%2F-0"), partition.getRelativeLocation()); Assert.assertEquals("Should have correctly escaped constraints", d.unbounded.getConstraints().with("d_copy", "test/-0"), partition.getConstraints()); Datasets.delete("dataset:file:/tmp/datasets/string_partitioned"); }
Example 14
Source File: From kite with Apache License 2.0 | 4 votes |
@After public void removeTestDatasets() { Datasets.delete("dataset:file:/tmp/datasets/unpartitioned"); Datasets.delete("dataset:file:/tmp/datasets/partitioned"); }
Example 15
Source File: From kite with Apache License 2.0 | 4 votes |
@Test public void testBasics3a() { // only run this test if credentials are present Assume.assumeTrue(ID != null && !ID.isEmpty()); String uri = "dataset:s3a://" + BUCKET + "/ns/test"; // make sure the dataset doesn't already exist Datasets.delete(uri); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral("\"string\"") .build(); Dataset<String> dataset = Datasets.create(uri, descriptor, String.class); List<String> expected = Lists.newArrayList("a", "b", "time"); DatasetWriter<String> writer = null; try { writer = dataset.newWriter(); for (String s : expected) { writer.write(s); } } finally { if (writer != null) { writer.close(); } } DatasetReader<String> reader = null; try { reader = dataset.newReader(); Assert.assertEquals("Should match written strings", expected, Lists.newArrayList((Iterator<String>) reader)); } finally { if (reader != null) { reader.close(); } } // clean up Datasets.delete(uri); }
Example 16
Source File: From kite with Apache License 2.0 | 4 votes |
@After public void removeTestDatasets() { Datasets.delete("dataset:file:/tmp/datasets/unpartitioned"); Datasets.delete("dataset:file:/tmp/datasets/partitioned"); Datasets.delete("dataset:file:/tmp/datasets/temporary"); }
Example 17
Source File: From nifi with Apache License 2.0 | 4 votes |
@After public void deleteDataset() throws Exception { Datasets.delete(datasetUri); }
Example 18
Source File: From nifi with Apache License 2.0 | 4 votes |
@After public void deleteDataset() throws Exception { Datasets.delete(datasetUri); }
Example 19
Source File: From sqoop-on-spark with Apache License 2.0 | 4 votes |
/** * Deletes current dataset physically. */ public void deleteDataset() { Datasets.delete(dataset.getUri().toString()); }
Example 20
Source File: From kite with Apache License 2.0 | 4 votes |
@Test public void testBasics3n() { // only run this test if credentials are present Assume.assumeTrue(ID != null && !ID.isEmpty()); String uri = "dataset:s3n://" + BUCKET + "/ns/test"; // make sure the dataset doesn't already exist Datasets.delete(uri); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral("\"string\"") .build(); Dataset<String> dataset = Datasets.create(uri, descriptor, String.class); List<String> expected = Lists.newArrayList("a", "b", "time"); DatasetWriter<String> writer = null; try { writer = dataset.newWriter(); for (String s : expected) { writer.write(s); } } finally { if (writer != null) { writer.close(); } } DatasetReader<String> reader = null; try { reader = dataset.newReader(); Assert.assertEquals("Should match written strings", expected, Lists.newArrayList((Iterator<String>) reader)); } finally { if (reader != null) { reader.close(); } } // clean up Datasets.delete(uri); }