Java Code Examples for org.kitesdk.data.Datasets#exists()

The following examples show how to use org.kitesdk.data.Datasets#exists() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CorrelateEvents.java    From kite-examples with Apache License 2.0 6 votes vote down vote up
@Override
public int run(List<String> args) throws Exception {

  String inputUri = uri;
  String outputUri = "dataset:hive?dataset=correlated_events";

  if (args.size() == 1) {
    outputUri = args.get(0);
  }

  Preconditions.checkState(Datasets.exists(inputUri),
      "input dataset doesn't exists");

  if (!Datasets.exists(outputUri)) {
    Datasets.create(outputUri, new DatasetDescriptor.Builder()
        .format("avro")
        .schema(CorrelatedEvents.class)
        .build());
  }
  CorrelateEventsTask task = new CorrelateEventsTask(inputUri, outputUri);
  task.run();

  return 0;
}
 
Example 2
Source File: TestParquetImport.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
@Override
public void tearDown() {
  super.tearDown();
  String uri = "dataset:file:" + getTablePath();
  if (Datasets.exists(uri)) {
    Datasets.delete(uri);
  }
}
 
Example 3
Source File: KiteFromInitializer.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InitializerContext context,
    LinkConfiguration linkConfig, FromJobConfiguration fromJobConfig) {
  String uri = ConfigUtil.buildDatasetUri(
      linkConfig.linkConfig, fromJobConfig.fromJobConfig.uri);
  LOG.debug("Constructed dataset URI: " + uri);
  if (!Datasets.exists(uri)) {
    LOG.error("Dataset does not exist");
    throw new SqoopException(KiteConnectorError.GENERIC_KITE_CONNECTOR_0002);
  }
}
 
Example 4
Source File: KiteToInitializer.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InitializerContext context,
    LinkConfiguration linkConfig, ToJobConfiguration toJobConfig) {
  String uri = ConfigUtil.buildDatasetUri(
      linkConfig.linkConfig, toJobConfig.toJobConfig);
  LOG.debug("Constructed dataset URI: " + uri);
  if (Datasets.exists(uri)) {
    LOG.error("Overwrite an existing dataset is not expected in new create mode.");
    throw new SqoopException(KiteConnectorError.GENERIC_KITE_CONNECTOR_0001);
  }
}
 
Example 5
Source File: ParquetJob.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 4 votes vote down vote up
/**
 * Configure the import job. The import process will use a Kite dataset to
 * write data records into Parquet format internally. The input key class is
 * {@link org.apache.sqoop.lib.SqoopRecord}. The output key is
 * {@link org.apache.avro.generic.GenericRecord}.
 */
public static void configureImportJob(JobConf conf, Schema schema,
    String uri, WriteMode writeMode) throws IOException {
  Dataset dataset;

  // Add hive delegation token only if we don't already have one.
  if (uri.startsWith("dataset:hive")) {
    Configuration hiveConf = HiveConfig.getHiveConf(conf);
    if (isSecureMetastore(hiveConf)) {
      // Copy hive configs to job config
      HiveConfig.addHiveConfigs(hiveConf, conf);

      if (conf.getCredentials().getToken(new Text(HIVE_METASTORE_TOKEN_ALIAS)) == null) {
        addHiveDelegationToken(conf);
      }
    }
  }

  if (Datasets.exists(uri)) {
    if (WriteMode.DEFAULT.equals(writeMode)) {
      throw new IOException("Destination exists! " + uri);
    }

    dataset = Datasets.load(uri);
    Schema writtenWith = dataset.getDescriptor().getSchema();
    if (!SchemaValidationUtil.canRead(writtenWith, schema)) {
      throw new IOException(
          String.format("Expected schema: %s%nActual schema: %s",
              writtenWith, schema));
    }
  } else {
    dataset = createDataset(schema, getCompressionType(conf), uri);
  }
  conf.set(CONF_AVRO_SCHEMA, schema.toString());

  DatasetKeyOutputFormat.ConfigBuilder builder =
      DatasetKeyOutputFormat.configure(conf);
  if (WriteMode.OVERWRITE.equals(writeMode)) {
    builder.overwrite(dataset);
  } else if (WriteMode.APPEND.equals(writeMode)) {
    builder.appendTo(dataset);
  } else {
    builder.writeTo(dataset);
  }
}