org.kitesdk.data.URIBuilder Java Examples

The following examples show how to use org.kitesdk.data.URIBuilder. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCSVImportCommandCluster.java    From kite with Apache License 2.0 6 votes vote down vote up
@Before
public void setup() throws Exception {
  TestUtil.run("create", datasetName, "-r", repoURI, "-s", avsc);

  this.dataset = Datasets.load(URIBuilder.build(repoURI, "default", datasetName),
      GenericData.Record.class);
  this.console = mock(Logger.class);
  this.command = new CSVImportCommand(console);
  command.setConf(new Configuration());
  // set the test repository information
  command.repoURI = repoURI;
  // set up the configuration as it would be with a cluster
  Configuration conf = getConfiguration();
  conf.setBoolean("kite.testing", true);
  command.setConf(conf);
}
 
Example #2
Source File: HiveAbstractMetadataProvider.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Checks whether the Hive table {@code namespace.name} exists or if
 * {@code default.name} exists and should be used.
 *
 * @param namespace the requested namespace
 * @param name the table name
 * @param location location that should match or null to check the default
 * @return if namespace.name exists, namespace. if not and default.name
 *          exists, then default. {@code null} otherwise.
 */
protected String resolveNamespace(String namespace, String name,
                                  @Nullable URI location) {
  if (getMetaStoreUtil().exists(namespace, name)) {
    return namespace;
  }
  try {
    DatasetDescriptor descriptor = HiveUtils.descriptorForTable(
        conf, getMetaStoreUtil().getTable(URIBuilder.NAMESPACE_DEFAULT, name));
    URI expectedLocation = location;
    if (location == null) {
      expectedLocation = expectedLocation(namespace, name);
    }
    if ((expectedLocation == null) ||
        pathsEquivalent(expectedLocation, descriptor.getLocation())) {
      // table in the default db has the location that would have been used
      return URIBuilder.NAMESPACE_DEFAULT;
    }
    // fall through and return null
  } catch (DatasetNotFoundException e) {
    // fall through and return null
  }
  return null;
}
 
Example #3
Source File: HBaseDatasetRepository.java    From kite with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private <E> RandomAccessDataset<E> newCompositeDataset(String namespace, String name, String tableName,
    List<DatasetDescriptor> descriptors, Class<E> type) {
  List<Class<SpecificRecord>> subEntityClasses = new ArrayList<Class<SpecificRecord>>();
  for (DatasetDescriptor descriptor : descriptors) {
    try {
      Class<SpecificRecord> subEntityClass = (Class<SpecificRecord>) Class
          .forName(descriptor.getSchema().getFullName());
      subEntityClasses.add(subEntityClass);
    } catch (ClassNotFoundException e) {
      throw new DatasetOperationException("Failed to resolve sub-type", e);
    }
  }
  Dao dao = SpecificAvroDao.buildCompositeDaoWithEntityManager(tablePool,
      tableName, subEntityClasses, schemaManager);
  return new DaoDataset<E>(namespace, name, dao, descriptors.get(0),
      new URIBuilder(repositoryUri, namespace, name).build(), type);
}
 
Example #4
Source File: TestFileSystemDataset.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void signalReadyOnUnboundedDataset() {
  final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
      .namespace("ns")
      .name("users")
      .configuration(getConfiguration())
      .descriptor(
          new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
              .location(testDirectory).build())
      .type(Record.class)
      .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name"))
      .build();
  Assert.assertFalse("Unbounded dataset has not been signaled", ds.isReady());
  ds.signalReady();
  Assert.assertTrue("Unbounded dataset has been signaled and should be ready", ds.isReady());
}
 
Example #5
Source File: DatasetRepositories.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Load a {@link DatasetRepository} for the given dataset, view or repository URI.
 * <p>
 * URI formats are defined by {@code Dataset} implementations, but must begin
 * with "dataset:" or "view:".
 *
 * @param uri a {@code Dataset} or {@code View} URI.
 * @param <R> The type of {@code DatasetRepository} expected.
 * @return a {@code DatasetRepository} responsible for the given URI.
 */
@SuppressWarnings("unchecked")
public static <R extends DatasetRepository> R repositoryFor(URI uri) {
  boolean isRepoUri = URIBuilder.REPO_SCHEME.equals(uri.getScheme());
  Preconditions.checkArgument(isRepoUri ||
      URIBuilder.DATASET_SCHEME.equals(uri.getScheme()) ||
      URIBuilder.VIEW_SCHEME.equals(uri.getScheme()),
      "Not a repository, dataset, or view URI: " + uri);

  Pair<DatasetRepository, Map<String, String>> pair; 
  if (URIBuilder.REPO_SCHEME.equals(uri.getScheme())) {
    pair = Registration.lookupRepoUri(
        URI.create(uri.getRawSchemeSpecificPart()));
  } else {
    pair = Registration.lookupDatasetUri(
        URI.create(uri.getRawSchemeSpecificPart()));
  }

  return (R) pair.first();
}
 
Example #6
Source File: FileSystemDatasetRepository.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public <E> Dataset<E> load(String namespace, String name, Class<E> type) {
  Preconditions.checkNotNull(namespace, "Namespace cannot be null");
  Preconditions.checkNotNull(name, "Dataset name cannot be null");

  LOG.debug("Loading dataset: {}", name);

  DatasetDescriptor descriptor = metadataProvider.load(namespace, name);

  FileSystemDataset<E> ds = new FileSystemDataset.Builder<E>()
      .namespace(namespace)
      .name(name)
      .configuration(conf)
      .descriptor(descriptor)
      .type(type)
      .uri(new URIBuilder(getUri(), namespace, name).build())
      .partitionKey(descriptor.isPartitioned() ? new PartitionKey() : null)
      .partitionListener(getPartitionListener())
      .build();

  LOG.debug("Loaded dataset:{}", ds);

  return ds;
}
 
Example #7
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testDatasetUris() throws IOException {
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());

  writeTestUsers(inputDataset, 10);

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(new URIBuilder(repo.getUri(), "ns", "in").build(),
          GenericData.Record.class));
  pipeline.write(data, CrunchDatasets.asTarget(
      new URIBuilder(repo.getUri(), "ns", "out").build()), Target.WriteMode.APPEND);
  pipeline.run();

  Assert.assertEquals(10, datasetSize(outputDataset));
}
 
Example #8
Source File: AbstractKiteProcessor.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Override
public ValidationResult validate(String subject, String uri, ValidationContext context) {
    String message = "not set";
    boolean isValid = true;

    if (uri.trim().isEmpty()) {
        isValid = false;
    } else {
        final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri);
        if (!elPresent) {
            try {
                new URIBuilder(URI.create(uri)).build();
            } catch (RuntimeException e) {
                message = e.getMessage();
                isValid = false;
            }
        }
    }

    return new ValidationResult.Builder()
            .subject(subject)
            .input(uri)
            .explanation("Dataset URI is invalid: " + message)
            .valid(isValid)
            .build();
}
 
Example #9
Source File: ConfigUtil.java    From sqoop-on-spark with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a dataset uri, including the filesystem location part, if it is
 * provided separated,
 */
public static String buildDatasetUri(String authority, String uri) {
  if (!Strings.isNullOrEmpty(authority) && !uri.contains("://")) {
    URIBuilder builder = new URIBuilder(uri);

    String[] parts = authority.split(":");
    if (parts.length > 0) {
      builder.with("auth:host", parts[0]);
    }
    if (parts.length > 1) {
      builder.with("auth:port", parts[1]);
    }

    return builder.build().toString().replaceFirst("view:", "dataset:");
  }

  return uri;
}
 
Example #10
Source File: AbstractKiteProcessor.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public ValidationResult validate(String subject, String uri, ValidationContext context) {
    String message = "not set";
    boolean isValid = true;

    if (uri.trim().isEmpty()) {
        isValid = false;
    } else {
        final boolean elPresent = context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(uri);
        if (!elPresent) {
            try {
                new URIBuilder(URI.create(uri)).build();
            } catch (RuntimeException e) {
                message = e.getMessage();
                isValid = false;
            }
        }
    }

    return new ValidationResult.Builder()
            .subject(subject)
            .input(uri)
            .explanation("Dataset URI is invalid: " + message)
            .valid(isValid)
            .build();
}
 
Example #11
Source File: FileSystemDatasetRepository.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public <E> Dataset<E> update(String namespace, String name,
                             DatasetDescriptor descriptor, Class<E> type) {
  Preconditions.checkNotNull(namespace, "Namespace cannot be null");
  Preconditions.checkNotNull(name, "Dataset name cannot be null");
  Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");

  DatasetDescriptor oldDescriptor = metadataProvider.load(namespace, name);

  // oldDescriptor is valid if load didn't throw NoSuchDatasetException
  Compatibility.checkUpdate(oldDescriptor, descriptor);

  DatasetDescriptor updatedDescriptor = metadataProvider.update(namespace, name, descriptor);

  LOG.debug("Updated dataset: {} schema: {} location: {}", new Object[] {
      name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation() });

  return new FileSystemDataset.Builder<E>()
      .namespace(namespace)
      .name(name)
      .configuration(conf)
      .descriptor(updatedDescriptor)
      .type(type)
      .uri(new URIBuilder(getUri(), namespace, name).build())
      .partitionKey(updatedDescriptor.isPartitioned() ? new PartitionKey() : null)
      .partitionListener(getPartitionListener())
      .build();
}
 
Example #12
Source File: KiteDatasetExecutor.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
/**
 * Workaround for managing temporary datasets.
 */
public static String[] listTemporaryDatasetUris(String uri) {
  String repo = URIBuilder.REPO_SCHEME +
      uri.substring(URIBuilder.DATASET_SCHEME.length());
  Set<String> result = new HashSet<String>();
  for (URI match : Datasets.list(repo)) {
    if (match.toString().contains(TEMPORARY_DATASET_PREFIX)) {
      result.add(match.toString());
    }
  }
  return result.toArray(new String[result.size()]);
}
 
Example #13
Source File: HBaseDatasetRepository.java    From kite with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private <E> RandomAccessDataset<E> newDataset(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) {
  // TODO: use descriptor.getFormat() to decide type of DAO (Avro vs. other)
  String tableName = HBaseMetadataProvider.getTableName(name);
  String entityName = HBaseMetadataProvider.getEntityName(name);
  Dao dao;
  if (isSpecific(descriptor)) {
    dao = new SpecificAvroDao(tablePool, tableName, entityName, schemaManager);
  } else {
    dao = new GenericAvroDao(tablePool, tableName, entityName, schemaManager);
  }
  return new DaoDataset(namespace, name, dao, descriptor,
      new URIBuilder(repositoryUri, namespace, name).build(), type);
}
 
Example #14
Source File: TestFileSystemDataset.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadySignalUpdatesModifiedTime() {
  final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
      .namespace("ns")
      .name("users")
      .configuration(getConfiguration())
      .descriptor(
          new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
              .location(testDirectory).build())
      .type(Record.class)
      .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name"))
      .build();

   Assert.assertFalse("Dataset should not be ready before being signaled",
      ds.isReady());

  // the modified time depends on the filesystem, and may only be granular to the second
  // signal and check until the modified time is after the current time, or until
  // enough time has past that the signal should have been distinguishable
  long signaledTime = 0;
  long currentTime = System.currentTimeMillis();
  while(currentTime >= signaledTime && (System.currentTimeMillis() - currentTime) <= 2000) {
    ds.signalReady();
    signaledTime = ds.getLastModified();
  }

  Assert.assertTrue("Dataset should have been signaled as ready", ds.isReady());
  Assert.assertTrue("Signal should update the modified time",
      signaledTime > currentTime);
  Assert.assertFalse("Only the dataset should have been signaled",
      ((Signalable)ds.with("username", "bob")).isReady());
}
 
Example #15
Source File: AbstractRefinableView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public URI getUri() {
  URIBuilder builder = new URIBuilder(dataset.getUri());
  for (Map.Entry<String, String> entry : constraints.toQueryMap().entrySet()) {
    builder.with(entry.getKey(), entry.getValue());
  }
  return builder.build();
}
 
Example #16
Source File: TestDeleteCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testDatasetUriSkipTrash() throws Exception {
  String datasetUri = new URIBuilder(repo.getUri(), "ns", "test")
      .build()
      .toString();
  Assert.assertTrue("Should be a dataset URI",
      datasetUri.startsWith("dataset:"));
  command.targets = Lists.newArrayList(datasetUri);
  command.skipTrash = true;
  command.run();
  verify(repo).delete("ns", "test");
  verify(console).debug(contains("Deleted"), eq(datasetUri));
}
 
Example #17
Source File: FileSystemDatasetRepository.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public <E> Dataset<E> create(String namespace, String name,
                             DatasetDescriptor descriptor, Class<E> type) {
  Preconditions.checkNotNull(namespace, "Namespace cannot be null");
  Preconditions.checkNotNull(name, "Dataset name cannot be null");
  Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");

  // suggest a location for this dataset: <root>/<namespace>/<name>/
  Path suggestedLocation = pathForDataset(namespace, name);

  DatasetDescriptor newDescriptor = descriptor;
  if (descriptor.getLocation() == null) {
    newDescriptor = new DatasetDescriptor.Builder(descriptor)
        .location(suggestedLocation) // may be overridden by MetadataProvider
        .build();
  }

  newDescriptor = metadataProvider.create(namespace, name, newDescriptor);

  FileSystemUtil.ensureLocationExists(newDescriptor, conf);

  LOG.debug("Created dataset: {} schema: {} datasetPath: {}", new Object[] {
      name, newDescriptor.getSchema(), newDescriptor.getLocation() });

  FileSystemDataset<E> dataset = new FileSystemDataset.Builder<E>()
      .namespace(namespace)
      .name(name)
      .configuration(conf)
      .descriptor(newDescriptor)
      .type(type)
      .uri(new URIBuilder(getUri(), namespace, name).build())
      .partitionKey(newDescriptor.isPartitioned() ? new PartitionKey() : null)
      .partitionListener(getPartitionListener())
      .build();

  // notify the partition listener about any existing data partitions
  dataset.addExistingPartitions();

  return dataset;
}
 
Example #18
Source File: TestDeleteCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testDatasetUri() throws Exception {
  String datasetUri = new URIBuilder(repo.getUri(), "ns", "test")
      .build()
      .toString();
  Assert.assertTrue("Should be a dataset URI",
      datasetUri.startsWith("dataset:"));
  command.targets = Lists.newArrayList(datasetUri);
  command.run();
  verify(repo).moveToTrash("ns", "test");
  verify(console).debug(contains("Deleted"), eq(datasetUri));
}
 
Example #19
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testViewUris() throws IOException {
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());

  writeTestUsers(inputDataset, 10);

  URI sourceViewUri = new URIBuilder(repo.getUri(), "ns", "in").with("username",
      "test-0").build();
  View<Record> inputView = Datasets.<Record, Dataset<Record>> load(sourceViewUri,
      Record.class);
  Assert.assertEquals(1, datasetSize(inputView));

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(CrunchDatasets
      .asSource(sourceViewUri, GenericData.Record.class));
  URI targetViewUri = new URIBuilder(repo.getUri(), "ns", "out").with(
      "email", "email-0").build();
  pipeline.write(data, CrunchDatasets.asTarget(targetViewUri),
      Target.WriteMode.APPEND);
  pipeline.run();

  Assert.assertEquals(1, datasetSize(outputDataset));
}
 
Example #20
Source File: TestDeleteCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testViewUri() throws Exception {
  DatasetDescriptor desc = new DatasetDescriptor.Builder()
      .schema(SchemaBuilder.record("Test").fields().requiredInt("prop").endRecord())
      .build();

  URI actualViewUri = new URIBuilder(repo.getUri(), "ns", "test")
      .with("prop", "34")
      .build();
  String viewUri = actualViewUri.toString();

  Assert.assertTrue("Should be a view URI",
      viewUri.startsWith("view:"));

  AbstractDataset ds = mock(AbstractDataset.class);
  when(repo.load("ns", "test", GenericRecord.class)).thenReturn(ds);
  when(ds.getDescriptor()).thenReturn(desc);
  AbstractRefinableView view = mock(AbstractRefinableView.class);
  when(ds.filter(any(Constraints.class))).thenReturn(view);
  when(view.getUri()).thenReturn(actualViewUri);

  command.targets = Lists.newArrayList(viewUri);
  command.run();

  verify(repo).load("ns", "test", GenericRecord.class);
  verify(view).moveToTrash();
  verify(console).debug(contains("Deleted"), eq(viewUri));
}
 
Example #21
Source File: TestTransformCommandCluster.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testPartitionedCopyWithNumWriters() throws Exception {
  command.repoURI = repoUri;
  command.numWriters = 3;
  command.datasets = Lists.newArrayList(source, "dest_partitioned");
  URI dsUri = URIBuilder.build("repo:" + repoUri, "default", "dest_partitioned");
  Datasets.<Object, Dataset<Object>>create(dsUri, new DatasetDescriptor.Builder()
      .partitionStrategy(new PartitionStrategy.Builder()
          .hash("id", 2)
          .build())
      .schema(SchemaBuilder.record("User").fields()
          .requiredLong("id")
          .optionalString("username")
          .optionalString("email")
          .endRecord())
      .build(), Object.class);

  int rc = command.run();
  Assert.assertEquals("Should return success", 0, rc);

  DatasetRepository repo = DatasetRepositories.repositoryFor("repo:" + repoUri);
  FileSystemDataset<GenericData.Record> ds =
      (FileSystemDataset<GenericData.Record>) repo.<GenericData.Record>
          load("default", "dest_partitioned");
  int size = DatasetTestUtilities.datasetSize(ds);
  Assert.assertEquals("Should contain copied records", 6, size);

  Assert.assertEquals("Should produce 2 partitions",
      2, Iterators.size(ds.pathIterator()));

  verify(console).info("Added {} records to \"{}\"", 6l, "dest_partitioned");
  verifyNoMoreInteractions(console);
}
 
Example #22
Source File: TestCompactCommandCluster.java    From kite with Apache License 2.0 5 votes vote down vote up
@Before
public void createDatasets() throws Exception {
  repoUri = "hdfs://" + getDFS().getUri().getAuthority() + "/tmp/data";
  TestUtil.run("delete", unpartitioned, "-r", repoUri, "-d", "target/data");

  File csvFile = temp.newFile("users.csv");
  csvFile.delete();
  String csv = csvFile.toString();
  BufferedWriter writer = Files.newWriter(
      csvFile, CSVSchemaCommand.SCHEMA_CHARSET);

  writer.append("id,username,email\n");
  numRecords = 30;
  for(int i = 0; i < numRecords; i++) {
    writer.append(i+",test"+i+",test"+i+"@example.com\n");
  }
  writer.close();

  TestUtil.run("-v", "csv-schema", csv, "-o", avsc, "--class", "User");
  TestUtil.run("create", unpartitioned, "-s", avsc,
      "-r", repoUri, "-d", "target/data");

  URI dsUri = URIBuilder.build("repo:" + repoUri, "default", partitioned);
  Datasets.<Object, Dataset<Object>>create(dsUri, new DatasetDescriptor.Builder()
      .partitionStrategy(new PartitionStrategy.Builder()
          .hash("id", 2)
          .build())
      .schema(SchemaBuilder.record("User").fields()
          .requiredLong("id")
          .optionalString("username")
          .optionalString("email")
          .endRecord())
      .build(), Object.class);


  TestUtil.run("csv-import", csv, unpartitioned, "-r", repoUri, "-d", "target/data");
  TestUtil.run("csv-import", csv, partitioned, "-r", repoUri, "-d", "target/data");
}
 
Example #23
Source File: TestCSVImportCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws Exception {
  TestUtil.run("-v", "create", datasetName,
      "--use-local", "-d", "target/data", "-s", avsc);
  this.dataset = Datasets.load(
      URIBuilder.build("repo:file:target/data", "default", datasetName),
      GenericData.Record.class);
  this.console = mock(Logger.class);
  this.command = new CSVImportCommand(console);
  command.setConf(new Configuration());
  // set the test repository information
  command.local = true;
  command.directory = "target/data";
}
 
Example #24
Source File: TestDeleteCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testViewUriSkipTrash() throws Exception {
  DatasetDescriptor desc = new DatasetDescriptor.Builder()
      .schema(SchemaBuilder.record("Test").fields().requiredInt("prop").endRecord())
      .build();

  URI actualViewUri = new URIBuilder(repo.getUri(), "ns", "test")
      .with("prop", "34")
      .build();
  String viewUri = actualViewUri.toString();

  Assert.assertTrue("Should be a view URI",
      viewUri.startsWith("view:"));

  AbstractDataset ds = mock(AbstractDataset.class);
  when(repo.load("ns", "test", GenericRecord.class)).thenReturn(ds);
  when(ds.getDescriptor()).thenReturn(desc);
  AbstractRefinableView view = mock(AbstractRefinableView.class);
  when(ds.filter(any(Constraints.class))).thenReturn(view);
  when(view.getUri()).thenReturn(actualViewUri);

  command.targets = Lists.newArrayList(viewUri);
  command.skipTrash = true;
  command.run();

  verify(repo).load("ns", "test", GenericRecord.class);
  verify(view).deleteAll();
  verify(console).debug(contains("Deleted"), eq(viewUri));
}
 
Example #25
Source File: BaseDatasetCommand.java    From kite with Apache License 2.0 4 votes vote down vote up
String buildDatasetUri(String uriOrName) {
  if (isDatasetOrViewUri(uriOrName)) {
    return uriOrName;
  }
  return new URIBuilder(buildRepoURI(), namespace, uriOrName).build().toString();
}
 
Example #26
Source File: DatasetSink.java    From kite with Apache License 2.0 4 votes vote down vote up
@Override
public void configure(Context context) {
  // initialize login credentials
  this.login = KerberosUtil.login(
      context.getString(DatasetSinkConstants.AUTH_PRINCIPAL),
      context.getString(DatasetSinkConstants.AUTH_KEYTAB));
  String effectiveUser =
      context.getString(DatasetSinkConstants.AUTH_PROXY_USER);
  if (effectiveUser != null) {
    this.login = KerberosUtil.proxyAs(effectiveUser, login);
  }

  String datasetURI = context.getString(
      DatasetSinkConstants.CONFIG_KITE_DATASET_URI);
  if (datasetURI != null) {
    this.target = URI.create(datasetURI);
    this.datasetName = uriToName(target);
  } else {
    String repositoryURI = context.getString(
        DatasetSinkConstants.CONFIG_KITE_REPO_URI);
    Preconditions.checkNotNull(repositoryURI, "Repository URI is missing");
    this.datasetName = context.getString(
        DatasetSinkConstants.CONFIG_KITE_DATASET_NAME);
    Preconditions.checkNotNull(datasetName, "Dataset name is missing");

    this.target = new URIBuilder(repositoryURI, URIBuilder.NAMESPACE_DEFAULT,
        datasetName).build();
  }

  this.setName(target.toString());

  // other configuration
  this.batchSize = context.getLong(
      DatasetSinkConstants.CONFIG_KITE_BATCH_SIZE,
      DatasetSinkConstants.DEFAULT_BATCH_SIZE);
  this.rollIntervalS = context.getInteger(
      DatasetSinkConstants.CONFIG_KITE_ROLL_INTERVAL,
      DatasetSinkConstants.DEFAULT_ROLL_INTERVAL);

  this.counter = new SinkCounter(datasetName);
}
 
Example #27
Source File: Log4jAppender.java    From kite with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings({"unchecked", "deprecation"})
protected void populateAvroHeaders(Map<String, String> hdrs, Schema schema,
    Object message) {
  if (!initialized) {
    // initialize here rather than in activateOptions to avoid initialization
    // cycle in Configuration and log4j
    try {
      URI datasetUri;
      if (datasetNamespace == null) {
        datasetUri = new URIBuilder(datasetRepositoryUri, URIBuilder.NAMESPACE_DEFAULT, datasetName).build();
      } else {
        datasetUri = new URIBuilder(datasetRepositoryUri, datasetNamespace, datasetName).build();
      }
      Dataset<Object> dataset = Datasets.load(datasetUri, Object.class);
      if (dataset.getDescriptor().isPartitioned()) {
        partitionStrategy = dataset.getDescriptor().getPartitionStrategy();
        accessor = DataModelUtil.accessor(
            dataset.getType(), dataset.getDescriptor().getSchema());
        key = new StorageKey(partitionStrategy);
      }
      URL schemaUrl = dataset.getDescriptor().getSchemaUrl();
      if (schemaUrl != null) {
        setAvroSchemaUrl(schemaUrl.toExternalForm());
      }
    } catch (Exception e) {
      throw new FlumeException(e);
    } finally {
      initialized = true;
    }
  }
  super.populateAvroHeaders(hdrs, schema, message);
  if (partitionStrategy != null) {
    key.reuseFor(message, accessor);
    int i = 0;
    for (FieldPartitioner fp :
        Accessor.getDefault().getFieldPartitioners(partitionStrategy)) {
      hdrs.put(PARTITION_PREFIX + fp.getName(),
          PathConversion.valueToString(fp, key.get(i++)));
    }
  }
}
 
Example #28
Source File: TestDeleteCommand.java    From kite with Apache License 2.0 4 votes vote down vote up
@Test
public void testViewUriWithTypo() throws Exception {
  DatasetDescriptor desc = new DatasetDescriptor.Builder()
      .schema(SchemaBuilder.record("Test").fields()
          .requiredLong("ts")
          .endRecord())
      .partitionStrategy(new PartitionStrategy.Builder()
          .year("ts")
          .month("ts")
          .day("ts")
          .build())
      .build();

  String viewUri = new URIBuilder(repo.getUri(), "ns", "test")
      .with("year", "2014")
      .with("month", "3")
      .with("dy", "14")
      .build()
      .toString();
  URI actualViewUri = new URIBuilder(repo.getUri(), "ns", "test")
      .with("month", "3")
      .with("year", "2014")
      .build();

  Assert.assertTrue("Should be a view URI",
      viewUri.startsWith("view:"));

  AbstractDataset ds = mock(AbstractDataset.class);
  when(repo.load("ns", "test", GenericRecord.class)).thenReturn(ds);
  when(ds.getDescriptor()).thenReturn(desc);
  AbstractRefinableView view = mock(AbstractRefinableView.class);
  when(ds.filter(any(Constraints.class))).thenReturn(view);
  when(view.getUri()).thenReturn(actualViewUri);

  command.targets = Lists.newArrayList(viewUri);
  TestHelpers.assertThrows("Should reject a view with missing attribute",
      IllegalArgumentException.class, new Runnable() {
        @Override
        public void run() {
          try {
            command.run();
          } catch (IOException e) {
            throw new RuntimeException("Caught IOException", e);
          }
        }
      });
}