org.kitesdk.data.URIBuilder Java Examples
The following examples show how to use
org.kitesdk.data.URIBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCSVImportCommandCluster.java From kite with Apache License 2.0 | 6 votes |
@Before public void setup() throws Exception { TestUtil.run("create", datasetName, "-r", repoURI, "-s", avsc); this.dataset = Datasets.load(URIBuilder.build(repoURI, "default", datasetName), GenericData.Record.class); this.console = mock(Logger.class); this.command = new CSVImportCommand(console); command.setConf(new Configuration()); // set the test repository information command.repoURI = repoURI; // set up the configuration as it would be with a cluster Configuration conf = getConfiguration(); conf.setBoolean("kite.testing", true); command.setConf(conf); }
Example #2
Source File: HiveAbstractMetadataProvider.java From kite with Apache License 2.0 | 6 votes |
/** * Checks whether the Hive table {@code namespace.name} exists or if * {@code default.name} exists and should be used. * * @param namespace the requested namespace * @param name the table name * @param location location that should match or null to check the default * @return if namespace.name exists, namespace. if not and default.name * exists, then default. {@code null} otherwise. */ protected String resolveNamespace(String namespace, String name, @Nullable URI location) { if (getMetaStoreUtil().exists(namespace, name)) { return namespace; } try { DatasetDescriptor descriptor = HiveUtils.descriptorForTable( conf, getMetaStoreUtil().getTable(URIBuilder.NAMESPACE_DEFAULT, name)); URI expectedLocation = location; if (location == null) { expectedLocation = expectedLocation(namespace, name); } if ((expectedLocation == null) || pathsEquivalent(expectedLocation, descriptor.getLocation())) { // table in the default db has the location that would have been used return URIBuilder.NAMESPACE_DEFAULT; } // fall through and return null } catch (DatasetNotFoundException e) { // fall through and return null } return null; }
Example #3
Source File: HBaseDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") private <E> RandomAccessDataset<E> newCompositeDataset(String namespace, String name, String tableName, List<DatasetDescriptor> descriptors, Class<E> type) { List<Class<SpecificRecord>> subEntityClasses = new ArrayList<Class<SpecificRecord>>(); for (DatasetDescriptor descriptor : descriptors) { try { Class<SpecificRecord> subEntityClass = (Class<SpecificRecord>) Class .forName(descriptor.getSchema().getFullName()); subEntityClasses.add(subEntityClass); } catch (ClassNotFoundException e) { throw new DatasetOperationException("Failed to resolve sub-type", e); } } Dao dao = SpecificAvroDao.buildCompositeDaoWithEntityManager(tablePool, tableName, subEntityClasses, schemaManager); return new DaoDataset<E>(namespace, name, dao, descriptors.get(0), new URIBuilder(repositoryUri, namespace, name).build(), type); }
Example #4
Source File: TestFileSystemDataset.java From kite with Apache License 2.0 | 6 votes |
@Test public void signalReadyOnUnboundedDataset() { final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor( new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format) .location(testDirectory).build()) .type(Record.class) .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name")) .build(); Assert.assertFalse("Unbounded dataset has not been signaled", ds.isReady()); ds.signalReady(); Assert.assertTrue("Unbounded dataset has been signaled and should be ready", ds.isReady()); }
Example #5
Source File: DatasetRepositories.java From kite with Apache License 2.0 | 6 votes |
/** * Load a {@link DatasetRepository} for the given dataset, view or repository URI. * <p> * URI formats are defined by {@code Dataset} implementations, but must begin * with "dataset:" or "view:". * * @param uri a {@code Dataset} or {@code View} URI. * @param <R> The type of {@code DatasetRepository} expected. * @return a {@code DatasetRepository} responsible for the given URI. */ @SuppressWarnings("unchecked") public static <R extends DatasetRepository> R repositoryFor(URI uri) { boolean isRepoUri = URIBuilder.REPO_SCHEME.equals(uri.getScheme()); Preconditions.checkArgument(isRepoUri || URIBuilder.DATASET_SCHEME.equals(uri.getScheme()) || URIBuilder.VIEW_SCHEME.equals(uri.getScheme()), "Not a repository, dataset, or view URI: " + uri); Pair<DatasetRepository, Map<String, String>> pair; if (URIBuilder.REPO_SCHEME.equals(uri.getScheme())) { pair = Registration.lookupRepoUri( URI.create(uri.getRawSchemeSpecificPart())); } else { pair = Registration.lookupDatasetUri( URI.create(uri.getRawSchemeSpecificPart())); } return (R) pair.first(); }
Example #6
Source File: FileSystemDatasetRepository.java From kite with Apache License 2.0 | 6 votes |
@Override public <E> Dataset<E> load(String namespace, String name, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); LOG.debug("Loading dataset: {}", name); DatasetDescriptor descriptor = metadataProvider.load(namespace, name); FileSystemDataset<E> ds = new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(descriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(descriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); LOG.debug("Loaded dataset:{}", ds); return ds; }
Example #7
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 6 votes |
@Test public void testDatasetUris() throws IOException { PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash( "username", 2).build(); Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build()); Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build()); writeTestUsers(inputDataset, 10); Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); PCollection<GenericData.Record> data = pipeline.read( CrunchDatasets.asSource(new URIBuilder(repo.getUri(), "ns", "in").build(), GenericData.Record.class)); pipeline.write(data, CrunchDatasets.asTarget( new URIBuilder(repo.getUri(), "ns", "out").build()), Target.WriteMode.APPEND); pipeline.run(); Assert.assertEquals(10, datasetSize(outputDataset)); }
Example #8
Source File: AbstractKiteProcessor.java From localization_nifi with Apache License 2.0 | 6 votes |
@Override public ValidationResult validate(String subject, String uri, ValidationContext context) { String message = "not set"; boolean isValid = true; if (uri.trim().isEmpty()) { isValid = false; } else { final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri); if (!elPresent) { try { new URIBuilder(URI.create(uri)).build(); } catch (RuntimeException e) { message = e.getMessage(); isValid = false; } } } return new ValidationResult.Builder() .subject(subject) .input(uri) .explanation("Dataset URI is invalid: " + message) .valid(isValid) .build(); }
Example #9
Source File: ConfigUtil.java From sqoop-on-spark with Apache License 2.0 | 6 votes |
/** * Returns a dataset uri, including the filesystem location part, if it is * provided separated, */ public static String buildDatasetUri(String authority, String uri) { if (!Strings.isNullOrEmpty(authority) && !uri.contains("://")) { URIBuilder builder = new URIBuilder(uri); String[] parts = authority.split(":"); if (parts.length > 0) { builder.with("auth:host", parts[0]); } if (parts.length > 1) { builder.with("auth:port", parts[1]); } return builder.build().toString().replaceFirst("view:", "dataset:"); } return uri; }
Example #10
Source File: AbstractKiteProcessor.java From nifi with Apache License 2.0 | 6 votes |
@Override public ValidationResult validate(String subject, String uri, ValidationContext context) { String message = "not set"; boolean isValid = true; if (uri.trim().isEmpty()) { isValid = false; } else { final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri); if (!elPresent) { try { new URIBuilder(URI.create(uri)).build(); } catch (RuntimeException e) { message = e.getMessage(); isValid = false; } } } return new ValidationResult.Builder() .subject(subject) .input(uri) .explanation("Dataset URI is invalid: " + message) .valid(isValid) .build(); }
Example #11
Source File: FileSystemDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
@Override public <E> Dataset<E> update(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); DatasetDescriptor oldDescriptor = metadataProvider.load(namespace, name); // oldDescriptor is valid if load didn't throw NoSuchDatasetException Compatibility.checkUpdate(oldDescriptor, descriptor); DatasetDescriptor updatedDescriptor = metadataProvider.update(namespace, name, descriptor); LOG.debug("Updated dataset: {} schema: {} location: {}", new Object[] { name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation() }); return new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(updatedDescriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(updatedDescriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); }
Example #12
Source File: KiteDatasetExecutor.java From sqoop-on-spark with Apache License 2.0 | 5 votes |
/** * Workaround for managing temporary datasets. */ public static String[] listTemporaryDatasetUris(String uri) { String repo = URIBuilder.REPO_SCHEME + uri.substring(URIBuilder.DATASET_SCHEME.length()); Set<String> result = new HashSet<String>(); for (URI match : Datasets.list(repo)) { if (match.toString().contains(TEMPORARY_DATASET_PREFIX)) { result.add(match.toString()); } } return result.toArray(new String[result.size()]); }
Example #13
Source File: HBaseDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private <E> RandomAccessDataset<E> newDataset(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { // TODO: use descriptor.getFormat() to decide type of DAO (Avro vs. other) String tableName = HBaseMetadataProvider.getTableName(name); String entityName = HBaseMetadataProvider.getEntityName(name); Dao dao; if (isSpecific(descriptor)) { dao = new SpecificAvroDao(tablePool, tableName, entityName, schemaManager); } else { dao = new GenericAvroDao(tablePool, tableName, entityName, schemaManager); } return new DaoDataset(namespace, name, dao, descriptor, new URIBuilder(repositoryUri, namespace, name).build(), type); }
Example #14
Source File: TestFileSystemDataset.java From kite with Apache License 2.0 | 5 votes |
@Test public void testReadySignalUpdatesModifiedTime() { final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor( new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format) .location(testDirectory).build()) .type(Record.class) .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name")) .build(); Assert.assertFalse("Dataset should not be ready before being signaled", ds.isReady()); // the modified time depends on the filesystem, and may only be granular to the second // signal and check until the modified time is after the current time, or until // enough time has past that the signal should have been distinguishable long signaledTime = 0; long currentTime = System.currentTimeMillis(); while(currentTime >= signaledTime && (System.currentTimeMillis() - currentTime) <= 2000) { ds.signalReady(); signaledTime = ds.getLastModified(); } Assert.assertTrue("Dataset should have been signaled as ready", ds.isReady()); Assert.assertTrue("Signal should update the modified time", signaledTime > currentTime); Assert.assertFalse("Only the dataset should have been signaled", ((Signalable)ds.with("username", "bob")).isReady()); }
Example #15
Source File: AbstractRefinableView.java From kite with Apache License 2.0 | 5 votes |
@Override public URI getUri() { URIBuilder builder = new URIBuilder(dataset.getUri()); for (Map.Entry<String, String> entry : constraints.toQueryMap().entrySet()) { builder.with(entry.getKey(), entry.getValue()); } return builder.build(); }
Example #16
Source File: TestDeleteCommand.java From kite with Apache License 2.0 | 5 votes |
@Test public void testDatasetUriSkipTrash() throws Exception { String datasetUri = new URIBuilder(repo.getUri(), "ns", "test") .build() .toString(); Assert.assertTrue("Should be a dataset URI", datasetUri.startsWith("dataset:")); command.targets = Lists.newArrayList(datasetUri); command.skipTrash = true; command.run(); verify(repo).delete("ns", "test"); verify(console).debug(contains("Deleted"), eq(datasetUri)); }
Example #17
Source File: FileSystemDatasetRepository.java From kite with Apache License 2.0 | 5 votes |
@Override public <E> Dataset<E> create(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); // suggest a location for this dataset: <root>/<namespace>/<name>/ Path suggestedLocation = pathForDataset(namespace, name); DatasetDescriptor newDescriptor = descriptor; if (descriptor.getLocation() == null) { newDescriptor = new DatasetDescriptor.Builder(descriptor) .location(suggestedLocation) // may be overridden by MetadataProvider .build(); } newDescriptor = metadataProvider.create(namespace, name, newDescriptor); FileSystemUtil.ensureLocationExists(newDescriptor, conf); LOG.debug("Created dataset: {} schema: {} datasetPath: {}", new Object[] { name, newDescriptor.getSchema(), newDescriptor.getLocation() }); FileSystemDataset<E> dataset = new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(newDescriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(newDescriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); // notify the partition listener about any existing data partitions dataset.addExistingPartitions(); return dataset; }
Example #18
Source File: TestDeleteCommand.java From kite with Apache License 2.0 | 5 votes |
@Test public void testDatasetUri() throws Exception { String datasetUri = new URIBuilder(repo.getUri(), "ns", "test") .build() .toString(); Assert.assertTrue("Should be a dataset URI", datasetUri.startsWith("dataset:")); command.targets = Lists.newArrayList(datasetUri); command.run(); verify(repo).moveToTrash("ns", "test"); verify(console).debug(contains("Deleted"), eq(datasetUri)); }
Example #19
Source File: TestCrunchDatasets.java From kite with Apache License 2.0 | 5 votes |
@Test public void testViewUris() throws IOException { PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash( "username", 2).build(); Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build()); Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder() .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build()); writeTestUsers(inputDataset, 10); URI sourceViewUri = new URIBuilder(repo.getUri(), "ns", "in").with("username", "test-0").build(); View<Record> inputView = Datasets.<Record, Dataset<Record>> load(sourceViewUri, Record.class); Assert.assertEquals(1, datasetSize(inputView)); Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class); PCollection<GenericData.Record> data = pipeline.read(CrunchDatasets .asSource(sourceViewUri, GenericData.Record.class)); URI targetViewUri = new URIBuilder(repo.getUri(), "ns", "out").with( "email", "email-0").build(); pipeline.write(data, CrunchDatasets.asTarget(targetViewUri), Target.WriteMode.APPEND); pipeline.run(); Assert.assertEquals(1, datasetSize(outputDataset)); }
Example #20
Source File: TestDeleteCommand.java From kite with Apache License 2.0 | 5 votes |
@Test public void testViewUri() throws Exception { DatasetDescriptor desc = new DatasetDescriptor.Builder() .schema(SchemaBuilder.record("Test").fields().requiredInt("prop").endRecord()) .build(); URI actualViewUri = new URIBuilder(repo.getUri(), "ns", "test") .with("prop", "34") .build(); String viewUri = actualViewUri.toString(); Assert.assertTrue("Should be a view URI", viewUri.startsWith("view:")); AbstractDataset ds = mock(AbstractDataset.class); when(repo.load("ns", "test", GenericRecord.class)).thenReturn(ds); when(ds.getDescriptor()).thenReturn(desc); AbstractRefinableView view = mock(AbstractRefinableView.class); when(ds.filter(any(Constraints.class))).thenReturn(view); when(view.getUri()).thenReturn(actualViewUri); command.targets = Lists.newArrayList(viewUri); command.run(); verify(repo).load("ns", "test", GenericRecord.class); verify(view).moveToTrash(); verify(console).debug(contains("Deleted"), eq(viewUri)); }
Example #21
Source File: TestTransformCommandCluster.java From kite with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings("unchecked") public void testPartitionedCopyWithNumWriters() throws Exception { command.repoURI = repoUri; command.numWriters = 3; command.datasets = Lists.newArrayList(source, "dest_partitioned"); URI dsUri = URIBuilder.build("repo:" + repoUri, "default", "dest_partitioned"); Datasets.<Object, Dataset<Object>>create(dsUri, new DatasetDescriptor.Builder() .partitionStrategy(new PartitionStrategy.Builder() .hash("id", 2) .build()) .schema(SchemaBuilder.record("User").fields() .requiredLong("id") .optionalString("username") .optionalString("email") .endRecord()) .build(), Object.class); int rc = command.run(); Assert.assertEquals("Should return success", 0, rc); DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri); FileSystemDataset<GenericData.Record> ds = (FileSystemDataset<GenericData.Record>) repo.<GenericData.Record> load("default", "dest_partitioned"); int size = DatasetTestUtilities.datasetSize(ds); Assert.assertEquals("Should contain copied records", 6, size); Assert.assertEquals("Should produce 2 partitions", 2, Iterators.size(ds.pathIterator())); verify(console).info("Added {} records to \"{}\"", 6l, "dest_partitioned"); verifyNoMoreInteractions(console); }
Example #22
Source File: TestCompactCommandCluster.java From kite with Apache License 2.0 | 5 votes |
@Before public void createDatasets() throws Exception { repoUri = "hdfs://" + getDFS().getUri().getAuthority() + "/tmp/data"; TestUtil.run("delete", unpartitioned, "-r", repoUri, "-d", "target/data"); File csvFile = temp.newFile("users.csv"); csvFile.delete(); String csv = csvFile.toString(); BufferedWriter writer = Files.newWriter( csvFile, CSVSchemaCommand.SCHEMA_CHARSET); writer.append("id,username,email\n"); numRecords = 30; for(int i = 0; i < numRecords; i++) { writer.append(i+",test"+i+",test"+i+"@example.com\n"); } writer.close(); TestUtil.run("-v", "csv-schema", csv, "-o", avsc, "--class", "User"); TestUtil.run("create", unpartitioned, "-s", avsc, "-r", repoUri, "-d", "target/data"); URI dsUri = URIBuilder.build("repo:" + repoUri, "default", partitioned); Datasets.<Object, Dataset<Object>>create(dsUri, new DatasetDescriptor.Builder() .partitionStrategy(new PartitionStrategy.Builder() .hash("id", 2) .build()) .schema(SchemaBuilder.record("User").fields() .requiredLong("id") .optionalString("username") .optionalString("email") .endRecord()) .build(), Object.class); TestUtil.run("csv-import", csv, unpartitioned, "-r", repoUri, "-d", "target/data"); TestUtil.run("csv-import", csv, partitioned, "-r", repoUri, "-d", "target/data"); }
Example #23
Source File: TestCSVImportCommand.java From kite with Apache License 2.0 | 5 votes |
@Before public void setup() throws Exception { TestUtil.run("-v", "create", datasetName, "--use-local", "-d", "target/data", "-s", avsc); this.dataset = Datasets.load( URIBuilder.build("repo:file:target/data", "default", datasetName), GenericData.Record.class); this.console = mock(Logger.class); this.command = new CSVImportCommand(console); command.setConf(new Configuration()); // set the test repository information command.local = true; command.directory = "target/data"; }
Example #24
Source File: TestDeleteCommand.java From kite with Apache License 2.0 | 5 votes |
@Test public void testViewUriSkipTrash() throws Exception { DatasetDescriptor desc = new DatasetDescriptor.Builder() .schema(SchemaBuilder.record("Test").fields().requiredInt("prop").endRecord()) .build(); URI actualViewUri = new URIBuilder(repo.getUri(), "ns", "test") .with("prop", "34") .build(); String viewUri = actualViewUri.toString(); Assert.assertTrue("Should be a view URI", viewUri.startsWith("view:")); AbstractDataset ds = mock(AbstractDataset.class); when(repo.load("ns", "test", GenericRecord.class)).thenReturn(ds); when(ds.getDescriptor()).thenReturn(desc); AbstractRefinableView view = mock(AbstractRefinableView.class); when(ds.filter(any(Constraints.class))).thenReturn(view); when(view.getUri()).thenReturn(actualViewUri); command.targets = Lists.newArrayList(viewUri); command.skipTrash = true; command.run(); verify(repo).load("ns", "test", GenericRecord.class); verify(view).deleteAll(); verify(console).debug(contains("Deleted"), eq(viewUri)); }
Example #25
Source File: BaseDatasetCommand.java From kite with Apache License 2.0 | 4 votes |
String buildDatasetUri(String uriOrName) { if (isDatasetOrViewUri(uriOrName)) { return uriOrName; } return new URIBuilder(buildRepoURI(), namespace, uriOrName).build().toString(); }
Example #26
Source File: DatasetSink.java From kite with Apache License 2.0 | 4 votes |
@Override public void configure(Context context) { // initialize login credentials this.login = KerberosUtil.login( context.getString(DatasetSinkConstants.AUTH_PRINCIPAL), context.getString(DatasetSinkConstants.AUTH_KEYTAB)); String effectiveUser = context.getString(DatasetSinkConstants.AUTH_PROXY_USER); if (effectiveUser != null) { this.login = KerberosUtil.proxyAs(effectiveUser, login); } String datasetURI = context.getString( DatasetSinkConstants.CONFIG_KITE_DATASET_URI); if (datasetURI != null) { this.target = URI.create(datasetURI); this.datasetName = uriToName(target); } else { String repositoryURI = context.getString( DatasetSinkConstants.CONFIG_KITE_REPO_URI); Preconditions.checkNotNull(repositoryURI, "Repository URI is missing"); this.datasetName = context.getString( DatasetSinkConstants.CONFIG_KITE_DATASET_NAME); Preconditions.checkNotNull(datasetName, "Dataset name is missing"); this.target = new URIBuilder(repositoryURI, URIBuilder.NAMESPACE_DEFAULT, datasetName).build(); } this.setName(target.toString()); // other configuration this.batchSize = context.getLong( DatasetSinkConstants.CONFIG_KITE_BATCH_SIZE, DatasetSinkConstants.DEFAULT_BATCH_SIZE); this.rollIntervalS = context.getInteger( DatasetSinkConstants.CONFIG_KITE_ROLL_INTERVAL, DatasetSinkConstants.DEFAULT_ROLL_INTERVAL); this.counter = new SinkCounter(datasetName); }
Example #27
Source File: Log4jAppender.java From kite with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings({"unchecked", "deprecation"}) protected void populateAvroHeaders(Map<String, String> hdrs, Schema schema, Object message) { if (!initialized) { // initialize here rather than in activateOptions to avoid initialization // cycle in Configuration and log4j try { URI datasetUri; if (datasetNamespace == null) { datasetUri = new URIBuilder(datasetRepositoryUri, URIBuilder.NAMESPACE_DEFAULT, datasetName).build(); } else { datasetUri = new URIBuilder(datasetRepositoryUri, datasetNamespace, datasetName).build(); } Dataset<Object> dataset = Datasets.load(datasetUri, Object.class); if (dataset.getDescriptor().isPartitioned()) { partitionStrategy = dataset.getDescriptor().getPartitionStrategy(); accessor = DataModelUtil.accessor( dataset.getType(), dataset.getDescriptor().getSchema()); key = new StorageKey(partitionStrategy); } URL schemaUrl = dataset.getDescriptor().getSchemaUrl(); if (schemaUrl != null) { setAvroSchemaUrl(schemaUrl.toExternalForm()); } } catch (Exception e) { throw new FlumeException(e); } finally { initialized = true; } } super.populateAvroHeaders(hdrs, schema, message); if (partitionStrategy != null) { key.reuseFor(message, accessor); int i = 0; for (FieldPartitioner fp : Accessor.getDefault().getFieldPartitioners(partitionStrategy)) { hdrs.put(PARTITION_PREFIX + fp.getName(), PathConversion.valueToString(fp, key.get(i++))); } } }
Example #28
Source File: TestDeleteCommand.java From kite with Apache License 2.0 | 4 votes |
@Test public void testViewUriWithTypo() throws Exception { DatasetDescriptor desc = new DatasetDescriptor.Builder() .schema(SchemaBuilder.record("Test").fields() .requiredLong("ts") .endRecord()) .partitionStrategy(new PartitionStrategy.Builder() .year("ts") .month("ts") .day("ts") .build()) .build(); String viewUri = new URIBuilder(repo.getUri(), "ns", "test") .with("year", "2014") .with("month", "3") .with("dy", "14") .build() .toString(); URI actualViewUri = new URIBuilder(repo.getUri(), "ns", "test") .with("month", "3") .with("year", "2014") .build(); Assert.assertTrue("Should be a view URI", viewUri.startsWith("view:")); AbstractDataset ds = mock(AbstractDataset.class); when(repo.load("ns", "test", GenericRecord.class)).thenReturn(ds); when(ds.getDescriptor()).thenReturn(desc); AbstractRefinableView view = mock(AbstractRefinableView.class); when(ds.filter(any(Constraints.class))).thenReturn(view); when(view.getUri()).thenReturn(actualViewUri); command.targets = Lists.newArrayList(viewUri); TestHelpers.assertThrows("Should reject a view with missing attribute", IllegalArgumentException.class, new Runnable() { @Override public void run() { try { command.run(); } catch (IOException e) { throw new RuntimeException("Caught IOException", e); } } }); }