org.kitesdk.data.DatasetNotFoundException Java Examples
The following examples show how to use
org.kitesdk.data.DatasetNotFoundException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HBaseMetadataProvider.java From kite with Apache License 2.0 | 6 votes |
@Override public DatasetDescriptor load(String namespace, String name) { Preconditions.checkArgument(DEFAULT_NAMESPACE.equals(namespace), "Non-default namespaces are not supported"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); if (!exists(namespace, name)) { throw new DatasetNotFoundException("No such dataset: " + name); } String tableName = getTableName(name); String entityName = getEntityName(name); return new DatasetDescriptor.Builder() .schemaLiteral(schemaManager.getEntitySchema(tableName, entityName) .getRawSchema()) .build(); }
Example #2
Source File: HiveAbstractMetadataProvider.java From kite with Apache License 2.0 | 6 votes |
/** * Checks whether the Hive table {@code namespace.name} exists or if * {@code default.name} exists and should be used. * * @param namespace the requested namespace * @param name the table name * @param location location that should match or null to check the default * @return if namespace.name exists, namespace. if not and default.name * exists, then default. {@code null} otherwise. */ protected String resolveNamespace(String namespace, String name, @Nullable URI location) { if (getMetaStoreUtil().exists(namespace, name)) { return namespace; } try { DatasetDescriptor descriptor = HiveUtils.descriptorForTable( conf, getMetaStoreUtil().getTable(URIBuilder.NAMESPACE_DEFAULT, name)); URI expectedLocation = location; if (location == null) { expectedLocation = expectedLocation(namespace, name); } if ((expectedLocation == null) || pathsEquivalent(expectedLocation, descriptor.getLocation())) { // table in the default db has the location that would have been used return URIBuilder.NAMESPACE_DEFAULT; } // fall through and return null } catch (DatasetNotFoundException e) { // fall through and return null } return null; }
Example #3
Source File: FileSystemMetadataProvider.java From kite with Apache License 2.0 | 6 votes |
/** * This method provides backward-compatibility for finding metadata. * <p> * This handles the case where an existing program is opening a * DatasetRepository by URI. For example, the DatasetSink and maven plugin do * this. In that case, the repository URI will directly contain a directory * named for the dataset with .metadata in it. This checks for the updated * scheme and falls back to the old scheme if the namespace is "default". * * @param namespace the requested namespace. * @param name the dataset name. * @return a Path to the correct metadata directory * @throws DatasetNotFoundException if neither location has metadata */ private Path find(String namespace, String name) { Path expectedPath = pathForMetadata(namespace, name); if (DEFAULT_NAMESPACE.equals(namespace)) { // when using the default namespace, the namespace may not be in the path try { checkExists(rootFileSystem, expectedPath); return expectedPath; } catch (DatasetNotFoundException e) { try { Path backwardCompatiblePath = new Path(rootDirectory, new Path( name.replace('.', Path.SEPARATOR_CHAR), METADATA_DIRECTORY)); checkExists(rootFileSystem, backwardCompatiblePath); return backwardCompatiblePath; } catch (DatasetNotFoundException _) { throw e; // throw the original } } } else { // no need to check other locations checkExists(rootFileSystem, expectedPath); return expectedPath; } }
Example #4
Source File: TestManagedExternalHandling.java From kite with Apache License 2.0 | 5 votes |
@Test public void testExternalWithManaged() { HiveAbstractMetadataProvider provider = new HiveManagedMetadataProvider( new HiveConf()); Assert.assertTrue(provider.isManaged("default", "managed")); Dataset<GenericData.Record> dataset = external.load("default", "managed"); Assert.assertNotNull("Should open managed dataset with external", dataset); Assert.assertEquals("Should match managed dataset", managed.load("default", "managed").getDescriptor(), dataset.getDescriptor()); DatasetDescriptor updatedDescriptor = new DatasetDescriptor.Builder(dataset.getDescriptor()) .property("kite.writer.cache-size", "34") .schemaLiteral("\"string\"") .build(); Dataset<GenericData.Record> updated = external .update("default", "managed", updatedDescriptor); Assert.assertNotNull("Should update managed dataset with external", updated); Assert.assertEquals("Should see changes in managed dataset", managed.load("default", "managed").getDescriptor(), updated.getDescriptor()); Assert.assertTrue("Should delete managed tables with external", external.delete("default", "managed")); TestHelpers.assertThrows("Should delete managed table correctly", DatasetNotFoundException.class, new Runnable() { @Override public void run() { managed.load("default", "managed"); } }); }
Example #5
Source File: TestHBaseDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingDataset() { TestHelpers.assertThrows("Should not find dataset: no such dataset", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Dataset<Object> ds = Datasets .<Object, Dataset<Object>>load("dataset:hbase:" + zk + "/nosuchdataset", Object.class); } } ); }
Example #6
Source File: TestHBaseDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingRepository() { TestHelpers.assertThrows("Should not find dataset: unknown storage scheme", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Dataset<Object> ds = Datasets .<Object, Dataset<Object>>load("dataset:unknown:" + zk + "/test", Object.class); } }); }
Example #7
Source File: HiveAbstractMetadataProvider.java From kite with Apache License 2.0 | 5 votes |
@Override public DatasetDescriptor load(String namespace, String name) { Compatibility.checkDatasetName(namespace, name); String resolved = resolveNamespace(namespace, name); if (resolved != null) { return HiveUtils.descriptorForTable( conf, getMetaStoreUtil().getTable(resolved, name)); } throw new DatasetNotFoundException( "Hive table not found: " + namespace + "." + name); }
Example #8
Source File: HiveAbstractMetadataProvider.java From kite with Apache License 2.0 | 5 votes |
@Override public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) { Compatibility.checkDatasetName(namespace, name); Compatibility.checkDescriptor(descriptor); String resolved = resolveNamespace(namespace, name); if (resolved != null) { Table table = getMetaStoreUtil().getTable(resolved, name); Path managerPath = new Path(new Path(table.getSd().getLocation()), SCHEMA_DIRECTORY); SchemaManager manager = SchemaManager.create(conf, managerPath); DatasetDescriptor newDescriptor; try { URI schemaURI = manager.writeSchema(descriptor.getSchema()); newDescriptor = new DatasetDescriptor.Builder(descriptor) .schemaUri(schemaURI).build(); } catch (IOException e) { throw new DatasetIOException("Unable to create schema", e); } HiveUtils.updateTableSchema(table, newDescriptor); getMetaStoreUtil().alterTable(table); return descriptor; } throw new DatasetNotFoundException( "Hive table not found: " + namespace + "." + name); }
Example #9
Source File: HiveAbstractDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
@Override public boolean delete(String namespace, String name) { try { if (isManaged(namespace, name)) { // avoids calling fsRepository.delete, which deletes the data path return getMetadataProvider().delete(namespace, name); } return super.delete(namespace, name); } catch (DatasetNotFoundException e) { return false; } }
Example #10
Source File: HiveAbstractDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
@Override public boolean moveToTrash(String namespace, String name) { try { if (isManaged(namespace, name)) { // avoids calling fsRepository.delete, which deletes the data path // managed tables by default go to trash if it is enabled so call delete return getMetadataProvider().delete(namespace, name); } return super.moveToTrash(namespace, name); } catch (DatasetNotFoundException e) { return false; } }
Example #11
Source File: TestManagedExternalHandling.java From kite with Apache License 2.0 | 5 votes |
@Test public void testManagedWithExternal() { HiveAbstractMetadataProvider provider = new HiveManagedMetadataProvider( new HiveConf()); Assert.assertTrue(provider.isExternal("ns", "external")); Dataset<GenericData.Record> dataset = managed.load("ns", "external"); Assert.assertNotNull("Should open external dataset with managed", dataset); Assert.assertEquals("Should match external dataset", external.load("ns", "external").getDescriptor(), dataset.getDescriptor()); DatasetDescriptor updatedDescriptor = new DatasetDescriptor.Builder(dataset.getDescriptor()) .property("kite.writer.cache-size", "34") .schemaLiteral("\"string\"") .build(); Dataset<GenericData.Record> updated = managed .update("ns", "external", updatedDescriptor); Assert.assertNotNull("Should update external dataset with managed", updated); Assert.assertEquals("Should see changes in external dataset", external.load("ns", "external").getDescriptor(), updated.getDescriptor()); Assert.assertTrue("Should delete external tables with managed", managed.delete("ns", "external")); TestHelpers.assertThrows("Should delete external table correctly", DatasetNotFoundException.class, new Runnable() { @Override public void run() { external.load("ns", "external"); } }); }
Example #12
Source File: MemoryMetadataProvider.java From kite with Apache License 2.0 | 5 votes |
@Override public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Name cannot be null"); Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); if (!exists(namespace, name)) { throw new DatasetNotFoundException("Missing dataset:" + name); } descriptors.get(namespace).put(name, descriptor); return descriptor; }
Example #13
Source File: TestExternalBackwardCompatibility.java From kite with Apache License 2.0 | 5 votes |
@Test public void testLoadChecksDefaultNamespace() { Assert.assertNotNull("Should find dataset by checking default db", Datasets.load("dataset:hive:/tmp/datasets/test")); TestHelpers.assertThrows("Should not load dataset (there isn't one)", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Datasets.load("dataset:hive:/tmp/datasets/test2"); } }); }
Example #14
Source File: TestHiveDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingNamespace() { TestHelpers.assertThrows("Should not find namespace: no such namespace", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Datasets.load("dataset:hive:/tmp/data/nosuchnamespace/nosuchdataset?" + hdfsQueryArgs, Object.class); } }); }
Example #15
Source File: TestHiveDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingDataset() { TestHelpers.assertThrows("Should not find dataset: no such dataset", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Datasets.load("dataset:hive:/tmp/data/default/nosuchdataset?" + hdfsQueryArgs, Object.class); } }); }
Example #16
Source File: TestHiveDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testExternalNotEnoughPathComponents() { TestHelpers.assertThrows("Should not match URI pattern", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Datasets.load("dataset:hive:/test", Object.class); } }); }
Example #17
Source File: TestHiveDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingRepository() { TestHelpers.assertThrows("Should not find dataset: unknown storage scheme", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Datasets.load("dataset:unknown://" + hdfsAuth + "/tmp/data/test", Object.class); } }); }
Example #18
Source File: TestHiveDatasetURIsWithDefaultConfiguration.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingDataset() { TestHelpers.assertThrows("Should not find dataset: no such dataset", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Datasets.load("dataset:hive:/tmp/data/ns/nosuchdataset"); } }); }
Example #19
Source File: TestHiveDatasetURIsWithDefaultConfiguration.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingRepository() { TestHelpers.assertThrows("Should not find dataset: unknown storage scheme", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Datasets.load("dataset:unknown:/tmp/data/ns/test"); } }); }
Example #20
Source File: TestLocalDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingNamespace() { TestHelpers.assertThrows("Should not find dataset: no such namespace", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:file:/tmp/data/nosuchnamespace/test", Record.class); } }); }
Example #21
Source File: TestCSVImportCommand.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingDataset() throws Exception { command.targets = Lists.newArrayList(sample, "notadataset"); TestHelpers.assertThrows("Should complain about missing dataset", DatasetNotFoundException.class, new Callable() { @Override public Object call() throws Exception { command.run(); return null; } } ); verify(console).trace(contains("repo:file:target/data")); verifyNoMoreInteractions(console); }
Example #22
Source File: TestShowRecordsCommand.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingDataset() throws Exception { command.datasets = Lists.newArrayList("notadataset"); TestHelpers.assertThrows("Should complain about missing dataset", DatasetNotFoundException.class, new Callable() { @Override public Object call() throws Exception { command.run(); return null; } } ); verify(console).trace(contains("repo:file:target/data")); verifyNoMoreInteractions(console); }
Example #23
Source File: FileSystemMetadataProvider.java From kite with Apache License 2.0 | 5 votes |
private boolean deleteWithTrash(String namespace, String name, boolean useTrash){ Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); LOG.debug("Deleting remove metadata name: {}", name); Path metadataDirectory; try { metadataDirectory = find(namespace, name); } catch (DatasetNotFoundException _) { return false; } try { if (rootFileSystem.exists(metadataDirectory)) { if(useTrash){ if (Trash.moveToAppropriateTrash(rootFileSystem, metadataDirectory, conf)) { return true; } else { throw new IOException("Failed to trash metadata directory:" + metadataDirectory); } }else { if (rootFileSystem.delete(metadataDirectory, true)) { return true; } else { throw new IOException("Failed to delete metadata directory:" + metadataDirectory); } } } else { return false; } } catch (IOException e) { throw new DatasetIOException( "Unable to find or remove metadata directory:" + metadataDirectory + " for dataset:" + name, e); } }
Example #24
Source File: FileSystemMetadataProvider.java From kite with Apache License 2.0 | 5 votes |
@Override public boolean exists(String namespace, String name) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); try { find(namespace, name); return true; } catch (DatasetNotFoundException e) { return false; } }
Example #25
Source File: FileSystemMetadataProvider.java From kite with Apache License 2.0 | 5 votes |
/** * Precondition-style static validation that a dataset exists * * @param fs A FileSystem where the metadata should be stored * @param location The Path where the metadata should be stored * @throws org.kitesdk.data.DatasetNotFoundException if the descriptor location is missing * @throws org.kitesdk.data.DatasetIOException if any IOException is thrown */ private static void checkExists(FileSystem fs, Path location) { try { if (!fs.exists(location)) { throw new DatasetNotFoundException( "Descriptor location does not exist: " + location); } } catch (IOException ex) { throw new DatasetIOException( "Cannot access descriptor location: " + location, ex); } }
Example #26
Source File: Registration.java From kite with Apache License 2.0 | 5 votes |
public static Pair<DatasetRepository, Map<String, String>> lookupDatasetUri(URI datasetUri) { String scheme = datasetUri.getScheme(); List<String> schemeMatches = Lists.newArrayList(); for (URIPattern pattern : DATASET_PATTERNS.keySet()) { Map<String, String> match = pattern.getMatch(datasetUri); if (match != null) { return Pair.of(DATASET_PATTERNS.get(pattern).getFromOptions(match), match); } else if (pattern.getScheme() != null && pattern.getScheme().equals(scheme)) { schemeMatches.add(pattern.getPatternString()); } } String message = "Unknown dataset URI pattern: dataset:" + datasetUri; if (schemeMatches.isEmpty()) { // no known patterns for the scheme, maybe jars are missing message += "\nCheck that JARs for " + scheme + " datasets are on the classpath"; } else { // show the known patterns in case it's a simple error message += "\nKnown patterns for " + scheme + ":\n dataset:" + Joiner.on("\n dataset:").join(schemeMatches); } throw new DatasetNotFoundException(message); }
Example #27
Source File: TestLocalDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingDataset() { TestHelpers.assertThrows("Should not find dataset: no such dataset", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:file:/tmp/data/ns/nosuchdataset", Record.class); } }); }
Example #28
Source File: HdfsOdpsImportJob.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
@Override protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { fileType = getInputFileType(); super.configureInputFormat(job, tableName, tableClassName, splitByCol); if (isHCatJob) { SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName, job.getConfiguration()); return; } else if (fileType == FileType.AVRO_DATA_FILE) { LOG.debug("Configuring for Avro export"); configureGenericRecordExportInputFormat(job, tableName); } else if (fileType == FileType.PARQUET_FILE) { LOG.debug("Configuring for Parquet export"); configureGenericRecordExportInputFormat(job, tableName); FileSystem fs = FileSystem.get(job.getConfiguration()); String uri = "dataset:" + fs.makeQualified(getInputPath()); Exception caughtException = null; try { DatasetKeyInputFormat.configure(job).readFrom(uri); } catch (DatasetNotFoundException e) { LOG.warn(e.getMessage(), e); LOG.warn("Trying to get data schema from parquet file directly"); caughtException = e; } if (caughtException != null && caughtException instanceof DatasetNotFoundException) { DatasetDescriptor descriptor = getDatasetDescriptorFromParquetFile(job, fs, uri); Dataset dataset = Datasets.create(uri, descriptor, GenericRecord.class); DatasetKeyInputFormat.configure(job).readFrom(dataset); } } FileInputFormat.addInputPath(job, getInputPath()); }
Example #29
Source File: TestLocalDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testNotEnoughPathComponents() { TestHelpers.assertThrows("Should not match URI pattern", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:file:/test", Record.class); } }); }
Example #30
Source File: TestLocalDatasetURIs.java From kite with Apache License 2.0 | 5 votes |
@Test public void testMissingRepository() { TestHelpers.assertThrows("Should not find dataset: unknown storage scheme", DatasetNotFoundException.class, new Runnable() { @Override public void run() { Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:unknown:/tmp/data/test", Record.class); } }); }