org.kitesdk.data.DatasetIOException Java Examples
The following examples show how to use
org.kitesdk.data.DatasetIOException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileSystemDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
public FileSystemDatasetRepository( Configuration conf, Path rootDirectory, MetadataProvider provider) { Preconditions.checkNotNull(conf, "Configuration cannot be null"); Preconditions.checkNotNull(rootDirectory, "Root directory cannot be null"); Preconditions.checkNotNull(provider, "Metadata provider cannot be null"); try { this.fs = rootDirectory.getFileSystem(conf); } catch (IOException e) { throw new DatasetIOException( "Cannot get FileSystem for repository location: " + rootDirectory, e); } this.conf = conf; this.rootDirectory = fs.makeQualified(rootDirectory); this.repositoryUri = URI.create("repo:" + this.rootDirectory.toUri()); this.metadataProvider = provider; }
Example #2
Source File: JSONFileReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void close() { if (!state.equals(ReaderWriterState.OPEN)) { return; } LOG.debug("Closing reader on path:{}", path); iterator = null; try { incoming.close(); } catch (IOException e) { throw new DatasetIOException("Unable to close reader path:" + path, e); } state = ReaderWriterState.CLOSED; }
Example #3
Source File: BaseEntityBatch.java From kite with Apache License 2.0 | 6 votes |
/** * Checks an HTable out of the HTablePool and modifies it to take advantage of * batch puts. This is very useful when performing many consecutive puts. * * @param clientTemplate * The client template to use * @param entityMapper * The EntityMapper to use for mapping * @param pool * The HBase table pool * @param tableName * The name of the HBase table * @param writeBufferSize * The batch buffer size in bytes. */ public BaseEntityBatch(HBaseClientTemplate clientTemplate, EntityMapper<E> entityMapper, HTablePool pool, String tableName, long writeBufferSize) { this.table = pool.getTable(tableName); this.table.setAutoFlush(false); this.clientTemplate = clientTemplate; this.entityMapper = entityMapper; this.state = ReaderWriterState.NEW; /** * If the writeBufferSize is less than the currentBufferSize, then the * buffer will get flushed automatically by HBase. This should never happen, * since we're getting a fresh table out of the pool, and the writeBuffer * should be empty. */ try { table.setWriteBufferSize(writeBufferSize); } catch (IOException e) { throw new DatasetIOException("Error flushing commits for table [" + table + "]", e); } }
Example #4
Source File: Loader.java From kite with Apache License 2.0 | 6 votes |
@Override public void load() { try { // load hdfs-site.xml by loading HdfsConfiguration FileSystem.getLocal(DefaultConfiguration.get()); } catch (IOException e) { throw new DatasetIOException("Cannot load default config", e); } OptionBuilder<DatasetRepository> builder = new URIBuilder(); // username and secret are the same; host is the bucket Registration.register( new URIPattern("s3n:/*path"), new URIPattern("s3n:/*path/:namespace/:dataset"), builder); Registration.register( new URIPattern("s3a:/*path"), new URIPattern("s3a:/*path/:namespace/:dataset"), builder); }
Example #5
Source File: Loader.java From kite with Apache License 2.0 | 6 votes |
@Override public DatasetRepository getFromOptions(Map<String, String> match) { String path = match.get("path"); final Path root = (path == null || path.isEmpty()) ? new Path("/") : new Path("/", path); Configuration conf = DefaultConfiguration.get(); FileSystem fs; try { fs = FileSystem.get(fileSystemURI(match), conf); } catch (IOException e) { // "Incomplete HDFS URI, no host" => add a helpful suggestion if (e.getMessage().startsWith("Incomplete")) { throw new DatasetIOException("Could not get a FileSystem: " + "make sure the credentials for " + match.get(URIPattern.SCHEME) + " URIs are configured.", e); } throw new DatasetIOException("Could not get a FileSystem", e); } return new FileSystemDatasetRepository.Builder() .configuration(new Configuration(conf)) // make a modifiable copy .rootDirectory(fs.makeQualified(root)) .build(); }
Example #6
Source File: FileSystemView.java From kite with Apache License 2.0 | 6 votes |
@Override public long getLastModified() { long lastMod = -1; for (Iterator<Path> i = dirIterator(); i.hasNext(); ) { Path dir = i.next(); try { for (FileStatus st : fs.listStatus(dir)) { if (lastMod < st.getModificationTime()) { lastMod = st.getModificationTime(); } } } catch (IOException e) { throw new DatasetIOException("Cannot find last modified time of of " + dir, e); } } // if view was marked ready more recently count it as the modified time if (signalManager != null) { long readyTimestamp = signalManager.getReadyTimestamp(getConstraints()); if (lastMod < readyTimestamp) { lastMod = readyTimestamp; } } return lastMod; }
Example #7
Source File: CSVFileReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void close() { if (!state.equals(ReaderWriterState.OPEN)) { return; } LOG.debug("Closing reader on path:{}", path); try { reader.close(); } catch (IOException e) { throw new DatasetIOException("Unable to close reader path:" + path, e); } state = ReaderWriterState.CLOSED; }
Example #8
Source File: ParquetFileSystemDatasetReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); LOG.debug("Opening reader on path:{}", path); try { final Configuration conf = fileSystem.getConf(); AvroReadSupport.setAvroReadSchema(conf, readerSchema); reader = new AvroParquetReader<E>( conf, fileSystem.makeQualified(path)); } catch (IOException e) { throw new DatasetIOException("Unable to create reader path:" + path, e); } advance(); state = ReaderWriterState.OPEN; }
Example #9
Source File: ParquetFileSystemDatasetReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void close() { if (!state.equals(ReaderWriterState.OPEN)) { return; } LOG.debug("Closing reader on path:{}", path); try { reader.close(); } catch (IOException e) { this.state = ReaderWriterState.ERROR; throw new DatasetIOException("Unable to close reader path:" + path, e); } state = ReaderWriterState.CLOSED; }
Example #10
Source File: TestPartitionStrategyParser.java From kite with Apache License 2.0 | 6 votes |
@Test public void testInputStreamIOException() { TestHelpers.assertThrows("Should pass DatasetIOException", DatasetIOException.class, new Runnable() { @Override public void run() { PartitionStrategyParser.parse(new InputStream() { @Override public int read() throws IOException { throw new IOException("InputStream angry."); } }); } } ); }
Example #11
Source File: TestColumnMappingParser.java From kite with Apache License 2.0 | 6 votes |
@Test public void testInputStreamIOException() { TestHelpers.assertThrows("Should pass DatasetIOException", DatasetIOException.class, new Runnable() { @Override public void run() { ColumnMappingParser.parse(new InputStream() { @Override public int read() throws IOException { throw new IOException("InputStream angry."); } }); } } ); }
Example #12
Source File: SchemaManager.java From kite with Apache License 2.0 | 6 votes |
/** * Loads a schema manager that stores data under the given dataset root * directory it exists. Returns <code>null</code> if it does not. * * @param conf the Hadoop configuration * @param schemaDirectory directory in which the manager stores schemas. * * @return a schema manager instance, or <code>null</code> if the given * directory does not exist. */ public static SchemaManager load(Configuration conf, Path schemaDirectory) { try { FileSystem rootFileSystem = schemaDirectory.getFileSystem(conf); if (rootFileSystem.exists(schemaDirectory)) { return new SchemaManager(schemaDirectory, rootFileSystem); } else { return null; } } catch (IOException e) { throw new DatasetIOException ("Cannot load schema manager at:" + schemaDirectory, e); } }
Example #13
Source File: TestFileSystemDatasetReader.java From kite with Apache License 2.0 | 6 votes |
@Test(expected = DatasetIOException.class) public void testEmptyFile() throws IOException { final Path emptyFile = new Path("/tmp/empty-file.avro"); // outside the try block; if this fails then it isn't correct to remove it Assert.assertTrue("Failed to create a new empty file", fileSystem.createNewFile(emptyFile)); try { AbstractDatasetReader<String> reader = new FileSystemDatasetReader<String>( fileSystem, emptyFile, STRING_SCHEMA, String.class); // the reader should not fail until open() Assert.assertNotNull(reader); reader.initialize(); } finally { Assert.assertTrue("Failed to clean up empty file", fileSystem.delete(emptyFile, true)); } }
Example #14
Source File: TaskUtil.java From kite with Apache License 2.0 | 6 votes |
/** * Adds a set of jar paths to the distributed cache. * * @param jars A list of jar paths * @return this for method chaining * * @since 0.16.0 */ public ConfigBuilder addJars(List<String> jars) { if (!skipDistributedCache && jars != null) { for (String jar : jars) { try { File path = new File(jar); Preconditions.checkArgument(path.exists(), "Jar files does not exist: " + jar); Preconditions.checkArgument(path.isFile(), "Not a file: " + jar); Preconditions.checkArgument(path.canRead(), "Cannot read jar file: " + jar); DistCache.addJarToDistributedCache(conf, path); } catch (IOException e) { throw new DatasetIOException( "Cannot add jar to distributed cache: " + jar, e); } } } return this; }
Example #15
Source File: InputFormatReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void close() { if (!state.equals(ReaderWriterState.OPEN)) { return; } this.state = ReaderWriterState.CLOSED; try { if (currentReader != null) { currentReader.close(); } } catch (IOException e) { throw new DatasetIOException("Unable to close reader path:" + path, e); } this.hasNext = false; }
Example #16
Source File: SignalManager.java From kite with Apache License 2.0 | 6 votes |
/** * Check the last time the specified constraints have been signaled as ready. * * @param viewConstraints The constraints to check for a signal. * * @return the timestamp of the last time the constraints were signaled as ready. * if the constraints have never been signaled, -1 will be returned. * * @throws DatasetException if the signals could not be accessed. */ public long getReadyTimestamp(Constraints viewConstraints) { String normalizedConstraints = getNormalizedConstraints(viewConstraints); Path signalPath = new Path(signalDirectory, normalizedConstraints); // check if the signal exists try { try { FileStatus signalStatus = rootFileSystem.getFileStatus(signalPath); return signalStatus.getModificationTime(); } catch (final FileNotFoundException ex) { // empty, will be thrown when the signal path doesn't exist } return -1; } catch (IOException e) { throw new DatasetIOException("Could not access signal path: " + signalPath, e); } }
Example #17
Source File: FileSystemDataset.java From kite with Apache License 2.0 | 6 votes |
public FileSystemDataset<E> build() { Preconditions.checkState(this.namespace != null, "No namespace defined"); Preconditions.checkState(this.name != null, "No dataset name defined"); Preconditions.checkState(this.descriptor != null, "No dataset descriptor defined"); Preconditions.checkState((conf != null) || (fileSystem != null), "Configuration or FileSystem must be set"); Preconditions.checkState(type != null, "No type specified"); this.directory = new Path(descriptor.getLocation().toString()); if (fileSystem == null) { try { this.fileSystem = directory.getFileSystem(conf); } catch (IOException ex) { throw new DatasetIOException("Cannot access FileSystem", ex); } } Path absoluteDirectory = fileSystem.makeQualified(directory); return new FileSystemDataset<E>( fileSystem, absoluteDirectory, namespace, name, descriptor, uri, partitionKey, partitionListener, type); }
Example #18
Source File: PartitionStrategyParser.java From kite with Apache License 2.0 | 6 votes |
public static String toString(PartitionStrategy strategy, boolean pretty) { StringWriter writer = new StringWriter(); JsonGenerator gen; try { gen = new JsonFactory().createGenerator(writer); if (pretty) { gen.useDefaultPrettyPrinter(); } gen.setCodec(new ObjectMapper()); gen.writeTree(toJson(strategy)); gen.close(); } catch (IOException e) { throw new DatasetIOException("Cannot write to JSON generator", e); } return writer.toString(); }
Example #19
Source File: FileSystemDataset.java From kite with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("deprecation") public void dropPartition(PartitionKey key) { Preconditions.checkState(descriptor.isPartitioned(), "Attempt to drop a partition on a non-partitioned dataset (name:%s)", name); Preconditions.checkNotNull(key, "Partition key may not be null"); LOG.debug("Dropping partition with key:{} dataset:{}", key, name); Path partitionDirectory = toDirectoryName(directory, key); try { if (!fileSystem.delete(partitionDirectory, true)) { throw new IOException("Partition directory " + partitionDirectory + " for key " + key + " does not exist"); } } catch (IOException e) { throw new DatasetIOException("Unable to locate or drop dataset partition directory " + partitionDirectory, e); } }
Example #20
Source File: ColumnMappingParser.java From kite with Apache License 2.0 | 6 votes |
public static String toString(ColumnMapping mapping, boolean pretty) { StringWriter writer = new StringWriter(); JsonGenerator gen; try { gen = new JsonFactory().createGenerator(writer); if (pretty) { gen.useDefaultPrettyPrinter(); } gen.setCodec(new ObjectMapper()); gen.writeTree(toJson(mapping)); gen.close(); } catch (IOException e) { throw new DatasetIOException("Cannot write to JSON generator", e); } return writer.toString(); }
Example #21
Source File: FileSystemDatasetReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void close() { if (!state.equals(ReaderWriterState.OPEN)) { return; } LOG.debug("Closing reader on path:{}", path); try { reader.close(); } catch (IOException e) { throw new DatasetIOException("Unable to close reader path:" + path, e); } state = ReaderWriterState.CLOSED; }
Example #22
Source File: FileSystemDatasetReader.java From kite with Apache License 2.0 | 6 votes |
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); LOG.debug("Opening reader on path:{}", path); try { reader = new DataFileReader<E>(new AvroFSInput(fileSystem.open(path), fileSystem.getFileStatus(path).getLen()), DataModelUtil.getDatumReaderForType(type, schema)); } catch (IOException e) { throw new DatasetIOException("Unable to create reader path:" + path, e); } state = ReaderWriterState.OPEN; }
Example #23
Source File: SchemaManager.java From kite with Apache License 2.0 | 6 votes |
/** * Returns the path of the newest schema file, or null if none exists. */ private Path newestFile() { try { FileStatus[] statuses = rootFileSystem.listStatus(schemaDirectory); // No schema files exist, so return null; if (statuses.length == 0) { return null; } // Sort the schema files and return the newest one. Arrays.sort(statuses, new FileNameComparator()); return statuses[statuses.length - 1].getPath(); } catch (IOException e) { throw new DatasetIOException("Unable to list schema files.", e); } }
Example #24
Source File: ColumnMappingParser.java From kite with Apache License 2.0 | 5 votes |
public static String toString(FieldMapping mapping) { StringWriter writer = new StringWriter(); JsonGenerator gen; try { gen = new JsonFactory().createGenerator(writer); gen.setCodec(new ObjectMapper()); gen.writeTree(toJson(mapping)); gen.close(); } catch (IOException e) { throw new DatasetIOException("Cannot write to JSON generator", e); } return writer.toString(); }
Example #25
Source File: BaseEntityBatch.java From kite with Apache License 2.0 | 5 votes |
@Override public void close() { if (state.equals(ReaderWriterState.OPEN)) { try { table.flushCommits(); table.setAutoFlush(true); table.close(); } catch (IOException e) { throw new DatasetIOException("Error closing table [" + table + "]", e); } state = ReaderWriterState.CLOSED; } }
Example #26
Source File: HiveUtils.java From kite with Apache License 2.0 | 5 votes |
static FileSystem fsForPath(Configuration conf, Path path) { try { return path.getFileSystem(conf); } catch (IOException ex) { throw new DatasetIOException("Cannot access FileSystem for uri:" + path, ex); } }
Example #27
Source File: Loader.java From kite with Apache License 2.0 | 5 votes |
@Override public DatasetRepository getFromOptions(Map<String, String> match) { LOG.debug("External URI options: {}", match); final Path root; String path = match.get("path"); if (match.containsKey("absolute") && Boolean.valueOf(match.get("absolute"))) { root = (path == null || path.isEmpty()) ? new Path("/") : new Path("/", path); } else { root = (path == null || path.isEmpty()) ? new Path(".") : new Path(path); } // make a modifiable copy (it may be changed) Configuration conf = newHiveConf(DefaultConfiguration.get()); FileSystem fs; try { fs = FileSystem.get(fileSystemURI(match, conf), conf); } catch (IOException e) { // "Incomplete HDFS URI, no host" => add a helpful suggestion if (e.getMessage().startsWith("Incomplete")) { throw new DatasetIOException("Could not get a FileSystem: " + "make sure the default " + match.get(URIPattern.SCHEME) + " URI is configured.", e); } throw new DatasetIOException("Could not get a FileSystem", e); } // setup the MetaStore URI setMetaStoreURI(conf, match); return new HiveManagedDatasetRepository.Builder() .configuration(conf) .rootDirectory(fs.makeQualified(root)) .build(); }
Example #28
Source File: TaskUtil.java From kite with Apache License 2.0 | 5 votes |
/** * Finds the jar that contains the required class and adds it to the * distributed cache configuration. * * @param requiredClass a class required for a MR job * @return this for method chaining */ public ConfigBuilder addJarForClass(Class<?> requiredClass) { if (!skipDistributedCache) { File jar = findJarForClass(requiredClass); try { DistCache.addJarToDistributedCache(conf, jar); } catch (IOException e) { throw new DatasetIOException( "Cannot add jar to distributed cache: " + jar, e); } } return this; }
Example #29
Source File: FileSystemDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
private static FileSystem fsForPath(Path dataPath, Configuration conf) { try { return dataPath.getFileSystem(conf); } catch (IOException ex) { throw new DatasetIOException( "Cannot get FileSystem for descriptor", ex); } }
Example #30
Source File: HiveExternalMetadataProvider.java From kite with Apache License 2.0 | 5 votes |
public HiveExternalMetadataProvider(Configuration conf, Path rootDirectory) { super(conf); Preconditions.checkNotNull(rootDirectory, "Root cannot be null"); try { this.rootFileSystem = rootDirectory.getFileSystem(conf); this.rootDirectory = rootFileSystem.makeQualified(rootDirectory); } catch (IOException ex) { throw new DatasetIOException("Could not get FileSystem for root path", ex); } }