org.kitesdk.data.DatasetDescriptor Java Examples
The following examples show how to use
org.kitesdk.data.DatasetDescriptor.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestFileSystemDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@Test public void testUpdateFailsWithFormatChange() { Dataset<Record> dataset = repo.create(NAMESPACE, NAME, new DatasetDescriptor.Builder(testDescriptor) .format(Formats.AVRO) .build()); DatasetDescriptor changed = new DatasetDescriptor.Builder(dataset.getDescriptor()) .format(Formats.PARQUET) .build(); try { repo.update(NAMESPACE, NAME, changed); Assert.fail("Should fail due to format change"); } catch (ValidationException e) { // expected } Assert.assertEquals( Formats.AVRO, repo.load(NAMESPACE, NAME).getDescriptor().getFormat()); }
Example #2
Source File: TestWriteReflectReadGeneric.java From kite with Apache License 2.0 | 6 votes |
@BeforeClass public static void setup() throws IOException { fs = LocalFileSystem.getInstance(); testDirectory = new Path(Files.createTempDir().getAbsolutePath()); FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(), testDirectory); Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder() .schema(MyRecord.class) .build(), MyRecord.class); DatasetWriter<MyRecord> writer = writerDataset.newWriter(); for (int i = 0; i < totalRecords; i++) { writer.write(new MyRecord(String.valueOf(i), i)); } writer.close(); readerDataset = repo.load("ns", "test", GenericRecord.class); }
Example #3
Source File: HBaseMetadataProvider.java From kite with Apache License 2.0 | 6 votes |
@Override public DatasetDescriptor load(String namespace, String name) { Preconditions.checkArgument(DEFAULT_NAMESPACE.equals(namespace), "Non-default namespaces are not supported"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); if (!exists(namespace, name)) { throw new DatasetNotFoundException("No such dataset: " + name); } String tableName = getTableName(name); String entityName = getEntityName(name); return new DatasetDescriptor.Builder() .schemaLiteral(schemaManager.getEntitySchema(tableName, entityName) .getRawSchema()) .build(); }
Example #4
Source File: TestHiveRepositoryURIs.java From kite with Apache License 2.0 | 6 votes |
@Test public void testExternalURI() { URI hdfsUri = getDFS().getUri(); URI repoUri = URI.create("repo:hive:/tmp/hive-repo?hdfs:host=" + hdfsUri.getHost() + "&hdfs:port=" + hdfsUri.getPort()); DatasetRepository repo = DatasetRepositories.repositoryFor(repoUri); Assert.assertNotNull("Received a repository", repo); org.junit.Assert.assertTrue("Repo should be a HCatalogExternalDatasetRepository", repo instanceof HiveExternalDatasetRepository); Assert.assertEquals("Repository URI", repoUri, repo.getUri()); // verify location DatasetDescriptor created = repo.create("tmp", "test", new DatasetDescriptor.Builder() .schemaLiteral("\"string\"") .build()).getDescriptor(); Assert.assertEquals("Location should be in HDFS", "hdfs", created.getLocation().getScheme()); Assert.assertEquals("Location should have the correct HDFS host", hdfsUri.getHost(), created.getLocation().getHost()); Assert.assertEquals("Location should have the correct HDFS port", hdfsUri.getPort(), created.getLocation().getPort()); Assert.assertTrue("Location should be in the repo path", created.getLocation().getPath().startsWith("/tmp/hive-repo")); }
Example #5
Source File: FileSystemDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@Override public <E> Dataset<E> load(String namespace, String name, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); LOG.debug("Loading dataset: {}", name); DatasetDescriptor descriptor = metadataProvider.load(namespace, name); FileSystemDataset<E> ds = new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(descriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(descriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); LOG.debug("Loaded dataset:{}", ds); return ds; }
Example #6
Source File: TestMetadataProviders.java From kite with Apache License 2.0 | 6 votes |
@Test public void testCustomProperties() { final String propName = "my.custom.property"; final String propValue = "string"; DatasetDescriptor descriptorWithProp = new DatasetDescriptor.Builder(testDescriptor) .property(propName, propValue) .build(); DatasetDescriptor created = provider.create(NAMESPACE, NAME, descriptorWithProp); Assert.assertTrue("Should have custom property", created.hasProperty(propName)); Assert.assertEquals("Should have correct custom property value", propValue, created.getProperty(propName)); Assert.assertTrue("List should contain property name", created.listProperties().contains(propName)); DatasetDescriptor loaded = provider.load(NAMESPACE, NAME); Assert.assertTrue("Should have custom property", loaded.hasProperty(propName)); Assert.assertEquals("Should have correct custom property value", propValue, loaded.getProperty(propName)); Assert.assertTrue("List should contain property name", created.listProperties().contains(propName)); }
Example #7
Source File: TestHiveDatasetURIsCompatibility.java From kite with Apache License 2.0 | 6 votes |
@Test public void testLoadChangedAbsolutePathURICompatibility() { // this used to be a relative external URI, but is now a managed URI String uri = "dataset:hive:/data/ds"; DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hive:/tmp/data"); DatasetDescriptor withLocation = new DatasetDescriptor.Builder(DESCRIPTOR) .location("file:/tmp/data/ds") // old location .build(); Dataset<GenericRecord> expected = repo.create( "default", "ds", withLocation, GenericRecord.class); Dataset<GenericRecord> actual = Datasets.load(uri); Assert.assertEquals("Should load existing dataset default.ds", expected, actual); Assert.assertEquals("URI should use apparent namespace", "dataset:hive:data/ds", actual.getUri().toString()); Assert.assertTrue(Datasets.delete(uri)); }
Example #8
Source File: TestHiveExternalDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") @Test public void testNewPartitionIsVisibleToHive() throws Exception { final String NAME2 = "test2"; PartitionStrategy partitionStrategy = new PartitionStrategy.Builder() .hash("username", 2).build(); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(testSchema) .partitionStrategy(partitionStrategy) .build(); Dataset<GenericRecord> dataset = repo.create(NAMESPACE, NAME2, descriptor); HiveTestUtils.assertTableExists(client, NAMESPACE, NAME2); HiveTestUtils.assertTableIsExternal(client, NAMESPACE, NAME2); Assert.assertTrue("No partitions yet", client.listPartitionNames(NAMESPACE, NAME2, (short) 10).isEmpty()); writeRecord(dataset, 0); Assert.assertEquals("Should be one partition", 1, client.listPartitionNames(NAMESPACE, NAME2, (short) 10).size()); }
Example #9
Source File: TestFileSystemDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@Test public void testUpdateFailsWithLocationChange() { ensureCreated(); Dataset<Record> dataset = repo.load(NAMESPACE, NAME); URI location = dataset.getDescriptor().getLocation(); DatasetDescriptor changed = new DatasetDescriptor.Builder(dataset.getDescriptor()) .location(new Path(testDirectory, "newDataLocation").toUri()) .build(); try { repo.update(NAMESPACE, NAME, changed); Assert.fail("Should fail due to data location change"); } catch (ValidationException ex) { // expected } Assert.assertEquals( location, repo.load(NAMESPACE, NAME).getDescriptor().getLocation()); }
Example #10
Source File: TestUpdateDatasetCommand.java From kite with Apache License 2.0 | 6 votes |
@Test public void testUpdateSchema() throws Exception { File avroSchemaFile = new File("target/schema_update.avsc"); new FileWriter(avroSchemaFile).append(schema2).close(); command.datasets = Lists.newArrayList("users"); command.avroSchemaFile = avroSchemaFile.toString(); command.run(); DatasetDescriptor updated = new DatasetDescriptor.Builder(original) .schemaLiteral(schema2) .build(); verify(repo).load("default", "users"); // need to load the current dataset verify(ds).getDescriptor(); // should inspect and use its descriptor verify(repo).update(eq("default"), eq("users"), argThat(TestUtil.matches(updated))); verify(console).debug(contains("Updated"), eq("users")); }
Example #11
Source File: TestCreateDatasetCommandCluster.java From kite with Apache License 2.0 | 6 votes |
@Test public void testBasicUseLocalSchema() throws Exception { String avsc = "target/localUser.avsc"; FSDataOutputStream out = getFS() .create(new Path(avsc), true /* overwrite */ ); ByteStreams.copy(Resources.getResource("test-schemas/user.avsc").openStream(), out); out.close(); command.avroSchemaFile = avsc; command.datasets = Lists.newArrayList("users"); command.run(); DatasetDescriptor expectedDescriptor = new DatasetDescriptor.Builder() .schemaUri("resource:test-schemas/user.avsc") .build(); verify(getMockRepo()).create("default", "users", expectedDescriptor); verify(console).debug(contains("Created"), eq("users")); }
Example #12
Source File: TestCrunchDatasetsHBase.java From kite with Apache License 2.0 | 6 votes |
@Test public void testGeneric() throws IOException { String datasetName = tableName + ".TestGenericEntity"; DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(testGenericEntity) .build(); Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor); Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor); writeRecords(inputDataset, 10); Pipeline pipeline = new MRPipeline(TestCrunchDatasetsHBase.class, HBaseTestUtils.getConf()); PCollection<GenericRecord> data = pipeline.read( CrunchDatasets.asSource(inputDataset)); pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND); pipeline.run(); checkRecords(outputDataset, 10, 0); }
Example #13
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 6 votes |
@Test public void testGeneric() throws IOException { Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).build()); Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).build()); // write two files, each of 5 records writeTestUsers(inputDataset, 5, 0); writeTestUsers(inputDataset, 5, 5); Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); PCollection<GenericData.Record> data = pipeline.read( CrunchDatasets.asSource(inputDataset)); pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND); pipeline.run(); checkTestUsers(outputDataset, 10); }
Example #14
Source File: TestMetadataProviders.java From kite with Apache License 2.0 | 6 votes |
@Test public void testCreateWithLocation() throws URISyntaxException { Assert.assertFalse("Sanity check", provider.exists(NAMESPACE, NAME)); String auth = getDFS().getUri().getAuthority(); URI requestedLocation = new URI("hdfs://" + auth + "/tmp/data/my_data_set"); DatasetDescriptor requested = new DatasetDescriptor.Builder(testDescriptor) .location(requestedLocation) .build(); final DatasetDescriptor created; try { created = provider.create(NAMESPACE, NAME, requested); } catch (UnsupportedOperationException ex) { // this is expected if the provider doesn't support requested locations return; } // if supported, the location should be unchanged. Assert.assertNotNull("Descriptor should be returned", created); Assert.assertTrue("Descriptor should exist", provider.exists(NAMESPACE, NAME)); Assert.assertEquals("Requested locations should match", requestedLocation, created.getLocation()); }
Example #15
Source File: HBaseDatasetReaderTest.java From kite with Apache License 2.0 | 6 votes |
@BeforeClass public static void beforeClass() throws Exception { HBaseTestUtils.getMiniCluster(); // managed table should be created by HBaseDatasetRepository HBaseTestUtils.util.deleteTable(Bytes.toBytes(managedTableName)); HBaseDatasetRepository repo = new HBaseDatasetRepository.Builder() .configuration(HBaseTestUtils.getConf()).build(); String testGenericEntity = AvroUtils.inputStreamToString( HBaseDatasetRepositoryTest.class.getResourceAsStream("/TestGenericEntity.avsc")); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(testGenericEntity) .build(); dataset = repo.create("default", "testtable", descriptor); for (int i = 0; i < 10; i++) { dataset.put(HBaseDatasetRepositoryTest.createGenericEntity(i)); } }
Example #16
Source File: PartitionedDatasetWriter.java From kite with Apache License 2.0 | 6 votes |
static <E> PartitionedDatasetWriter<E, ?> newWriter(FileSystemView<E> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, Parquet is not durable if (DescriptorUtil.isDisabled( FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } }
Example #17
Source File: FileSystemWriter.java From kite with Apache License 2.0 | 6 votes |
private FileSystemWriter(FileSystem fs, Path path, long rollIntervalMillis, long targetFileSize, DatasetDescriptor descriptor, Schema writerSchema) { Preconditions.checkNotNull(fs, "File system is not defined"); Preconditions.checkNotNull(path, "Destination directory is not defined"); Preconditions.checkNotNull(descriptor, "Descriptor is not defined"); this.fs = fs; this.directory = path; this.rollIntervalMillis = rollIntervalMillis; this.targetFileSize = targetFileSize; this.descriptor = descriptor; this.conf = new Configuration(fs.getConf()); this.state = ReaderWriterState.NEW; this.schema = writerSchema; // copy file format settings from custom properties to the Configuration for (String prop : descriptor.listProperties()) { conf.set(prop, descriptor.getProperty(prop)); } // For performance reasons we will skip temp file creation if the file system does not support // efficient renaming, and write the file directly. this.useTempPath = FileSystemUtil.supportsRename(fs.getUri(), conf); }
Example #18
Source File: TestProjection.java From kite with Apache License 2.0 | 6 votes |
@Test public void testMixedProjection() throws IOException { Dataset<StandardEvent> original = repo.create("ns", "mixedProjection", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .build(), StandardEvent.class); DatasetWriter<StandardEvent> writer = null; try { writer = original.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } Dataset<ReflectSmallEvent> dataset = repo.load("ns", original.getName(), ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
Example #19
Source File: DatasetKeyOutputFormat.java From kite with Apache License 2.0 | 6 votes |
private static <E> View<E> loadOrCreateTaskAttemptView(TaskAttemptContext taskContext) { Configuration conf = Hadoop.JobContext.getConfiguration.invoke(taskContext); Map<String, String> uriOptions = Registration.lookupDatasetUri( URI.create(URI.create(conf.get(KITE_OUTPUT_URI)).getSchemeSpecificPart())).second(); Dataset<E> dataset = loadOrCreateTaskAttemptDataset(taskContext); if (dataset instanceof AbstractDataset) { DatasetDescriptor descriptor = dataset.getDescriptor(); Schema schema = descriptor.getSchema(); PartitionStrategy strategy = null; if (descriptor.isPartitioned()) { strategy = descriptor.getPartitionStrategy(); } Constraints constraints = Constraints.fromQueryMap( schema, strategy, uriOptions); return ((AbstractDataset<E>) dataset).filter(constraints); } else { return dataset; } }
Example #20
Source File: TestHiveUtils.java From kite with Apache License 2.0 | 6 votes |
@Test public void testUpdateChangesDDL() throws Exception { DatasetDescriptor original = new DatasetDescriptor.Builder() .schema(SchemaBuilder.record("Test").fields() .requiredLong("id") .requiredString("data") .endRecord()) .build(); boolean external = false; Table table = HiveUtils.tableForDescriptor("ns", "test", original, external); DatasetDescriptor updated = new DatasetDescriptor.Builder() .schema(SchemaBuilder.record("Test").fields() .requiredLong("id") .requiredString("data") .nullableString("data2", "") .endRecord()) .build(); HiveUtils.updateTableSchema(table, updated); Assert.assertEquals("Should update the table DDL", table.getSd().getCols(), HiveSchemaConverter.convertSchema(updated.getSchema())); }
Example #21
Source File: Compatibility.java From kite with Apache License 2.0 | 6 votes |
/** * Checks that the {@code existing} {@link DatasetDescriptor} is compatible * with {@code test}. * * @param existing the current {@code DatasetDescriptor} for a dataset * @param test a new {@code DatasetDescriptor} for the same dataset */ public static void checkCompatible(DatasetDescriptor existing, DatasetDescriptor test) { checkNotChanged("format", existing.getFormat(), test.getFormat()); checkNotChanged("partitioning", existing.isPartitioned(), test.isPartitioned()); if (existing.isPartitioned()) { checkStrategyUpdate( existing.getPartitionStrategy(), test.getPartitionStrategy(), test.getSchema()); } // check can read records written with old schema using new schema Schema oldSchema = existing.getSchema(); Schema testSchema = test.getSchema(); if (!SchemaValidationUtil.canRead(oldSchema, testSchema)) { throw new IncompatibleSchemaException("Schema cannot read data " + "written using existing schema. Schema: " + testSchema.toString(true) + "\nExisting schema: " + oldSchema.toString(true)); } }
Example #22
Source File: CSVFileReader.java From kite with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public CSVFileReader(FileSystem fileSystem, Path path, DatasetDescriptor descriptor, EntityAccessor<E> accessor) { this.fs = fileSystem; this.path = path; this.schema = accessor.getReadSchema(); this.recordClass = accessor.getType(); this.state = ReaderWriterState.NEW; this.props = CSVProperties.fromDescriptor(descriptor); // defaults to false: assume that callers will not make defensive copies this.reuseRecords = DescriptorUtil.isEnabled(REUSE_RECORDS, descriptor); Preconditions.checkArgument(Schema.Type.RECORD.equals(schema.getType()), "Schemas for CSV files must be records of primitive types"); }
Example #23
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 6 votes |
@Test public void testSourceView() throws IOException { PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash( "username", 2).build(); Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build()); Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).format(Formats.PARQUET).build()); writeTestUsers(inputDataset, 10); View<Record> inputView = inputDataset.with("username", "test-0"); Assert.assertEquals(1, datasetSize(inputView)); Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); PCollection<GenericData.Record> data = pipeline.read( CrunchDatasets.asSource(inputView)); pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND); pipeline.run(); Assert.assertEquals(1, datasetSize(outputDataset)); }
Example #24
Source File: TestCompatibilityChecks.java From kite with Apache License 2.0 | 6 votes |
@Test public void testAllowedPartitionSchemaCombinations() { Compatibility.checkDescriptor( new DatasetDescriptor.Builder() .schema(schema) .partitionStrategy(new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .hour("timestamp") .minute("timestamp") .identity("message", "message_copy") .identity("timestamp", "ts") .identity("number", "num") .hash("message", 48) .hash("timestamp", 48) .hash("number", 48) .hash("payload", 48) .hash("float", 48) .hash("double", 48) .hash("bool", 48) .range("number", 5, 10, 15, 20) .range("message", "m", "z", "M", "Z") .build()) .build()); }
Example #25
Source File: TestConfigurationProperty.java From nifi with Apache License 2.0 | 5 votes |
@Before public void createDataset() throws Exception { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(TestUtil.USER_SCHEMA) .build(); this.datasetUri = "dataset:file:" + temp.newFolder("ns", "temp").toString(); this.dataset = Datasets.create(datasetUri, descriptor, Record.class); }
Example #26
Source File: DescriptorUtil.java From kite with Apache License 2.0 | 5 votes |
/** * Returns whether the value of the descriptor property is {@code true}. * * @param property a String property name * @param descriptor a {@link DatasetDescriptor} * @return {@code true} if set and "true", {@code false} otherwise. */ public static boolean isEnabled(String property, DatasetDescriptor descriptor) { if (descriptor.hasProperty(property)) { // return true if and only if the property value is "true" return Boolean.valueOf(descriptor.getProperty(property)); } return false; }
Example #27
Source File: AvroKeyEntitySchemaParser.java From kite with Apache License 2.0 | 5 votes |
@Override public AvroKeySchema parseKeySchema(String rawSchema) { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(rawSchema) .build(); return new AvroKeySchema( descriptor.getSchema(), descriptor.getPartitionStrategy()); }
Example #28
Source File: TestFileSystemUtil.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMultipleAvroFilesAtDifferentDepths() throws Exception { File folder = temp.newFolder("a/b/c/d/e"); Path root = new Path(temp.getRoot().toURI()); FileSystem fs = LocalFileSystem.getInstance(); // create a two Avro files under separate folders Path parent = new Path(folder.toURI()); createAvroUserFile(fs, new Path(parent, "part=1")); createAvroUserFile(fs, parent); DatasetDescriptor descriptor = Iterables.getOnlyElement( FileSystemUtil.findPotentialDatasets(fs, root)); PartitionStrategy strategy = new PartitionStrategy.Builder() .provided("part", "int") .build(); Assert.assertTrue("Should flag data at mixed depth in the directory tree", DescriptorUtil.isEnabled("kite.filesystem.mixed-depth", descriptor)); Assert.assertEquals("Should be directly under parent", parent.toUri(), descriptor.getLocation()); Assert.assertEquals("Should use user schema", USER_SCHEMA, descriptor.getSchema()); Assert.assertEquals("Should have Avro format", Formats.AVRO, descriptor.getFormat()); Assert.assertEquals("Should be partitioned by part=int", strategy, descriptor.getPartitionStrategy()); }
Example #29
Source File: TestKiteURIHandler.java From kite with Apache License 2.0 | 5 votes |
@Before public void setUp() throws IOException, URISyntaxException { this.conf = (distributed ? MiniDFSTest.getConfiguration() : new Configuration()); this.fs = FileSystem.get(conf); this.testDescriptor = new DatasetDescriptor.Builder() .format(Formats.AVRO) .schema(SchemaBuilder.record("Event").fields() .requiredLong("timestamp") .requiredString("message") .endRecord()) .partitionStrategy(new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build()) .build(); uriHandler = new KiteURIHandler(); startingConf = DefaultConfiguration.get(); startingOozieHome = System.getProperty("oozie.home.dir"); }
Example #30
Source File: TestFileSystemDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
@Test public void testUpdateFailsWithIncompatibleSchemaChange() { Dataset<Record> dataset = repo.create(NAMESPACE, NAME, new DatasetDescriptor.Builder() .schema(testSchema).build()); Assert.assertEquals("Dataset name is propagated", NAME, dataset.getName()); Assert.assertEquals("Dataset schema is propagated", testSchema, dataset .getDescriptor().getSchema()); Schema testSchemaV2 = SchemaBuilder.record("user").fields() .requiredString("username") .requiredString("email") .requiredString("favoriteColor") // incompatible - no default .endRecord(); try { repo.update(NAMESPACE, NAME, new DatasetDescriptor.Builder( dataset.getDescriptor()).schema(testSchemaV2).build()); Assert.fail("Should fail due to incompatible update"); } catch (ValidationException e) { // expected } dataset = repo.load(NAMESPACE, NAME); Assert.assertEquals("Dataset schema is unchanged", testSchema, dataset .getDescriptor().getSchema()); }