org.kitesdk.data.View Java Examples

The following examples show how to use org.kitesdk.data.View. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
private GetStorageKey(View<E> view, int numPartitionWriters) {
  DatasetDescriptor descriptor = view.getDataset().getDescriptor();
  // get serializable versions of transient objects
  this.strategyString = descriptor.getPartitionStrategy()
      .toString(false /* no white space */);
  this.schemaString = descriptor.getSchema()
      .toString(false /* no white space */);
  this.type = view.getType();
  if (view instanceof AbstractRefinableView) {
    this.constraints = ((AbstractRefinableView) view).getConstraints()
        .toQueryMap();
  } else {
    this.constraints = null;
  }
  this.numPartitionWriters = numPartitionWriters > 0 ? numPartitionWriters : 1;
}
 
Example #2
Source File: CreateEvents.java    From kite-examples with Apache License 2.0 6 votes vote down vote up
@Override
public int run(List<String> args) throws Exception {

  Preconditions.checkState(!Datasets.exists(uri),
      "events dataset already exists");

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(StandardEvent.class).build();

  View<StandardEvent> events = Datasets.create(uri, descriptor, StandardEvent.class);
  DatasetWriter<StandardEvent> writer = events.newWriter();
  try {
    while (System.currentTimeMillis() - baseTimestamp < 36000) {
      writer.write(generateRandomEvent());
    }
  } finally {
    writer.close();
  }

  System.out.println("Generated " + counter + " events");

  return 0;
}
 
Example #3
Source File: TestFileSystemDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testViewForMissingPartitionNames() {
  Path path = new Path("2014/3/14");

  // like PathConversion, this uses names from the partition strategy
  // and will accept partitions that don't have a "name=" component
  View<GenericRecord> view = FileSystemDatasets.viewForUri(
      dataset, path.toString());
  Assert.assertEquals("Should create correct view",
      view, dataset.getPartitionView(path));

  Constraints expected = ((AbstractRefinableView<GenericRecord>)
      dataset.with("y", 2014).with("m", 3).with("d", 14)).getConstraints();
  Constraints actual = ((AbstractRefinableView<GenericRecord>) view)
      .getConstraints();
  Assert.assertEquals("Constraints should match expected",
      expected, actual);
}
 
Example #4
Source File: DatasetTestUtilities.java    From kite with Apache License 2.0 6 votes vote down vote up
public static void writeTestUsers(View<GenericData.Record> view, int count, int start, String... fields) {
  DatasetWriter<GenericData.Record> writer = null;
  try {
    writer = view.newWriter();
    for (int i = start; i < count + start; i++) {
      GenericRecordBuilder recordBuilder = new GenericRecordBuilder(view.getDataset().getDescriptor
          ().getSchema()).set("username", "test-" + i);
      for (String field : fields) {
        recordBuilder.set(field, field + "-" + i);
      }
      writer.write(recordBuilder.build());
    }
    if (writer instanceof Flushable) {
      ((Flushable) writer).flush();
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example #5
Source File: TestCrunchDatasetsHBase.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testSourceView() throws IOException {
  String datasetName = tableName + ".TestGenericEntity";

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaLiteral(testGenericEntity)
      .build();

  Dataset<GenericRecord> inputDataset = repo.create("default", "in", descriptor);
  Dataset<GenericRecord> outputDataset = repo.create("default", datasetName, descriptor);

  writeRecords(inputDataset, 10);

  View<GenericRecord> inputView = inputDataset
      .from("part1", new Utf8("part1_2")).to("part1", new Utf8("part1_7"))
      .from("part2", new Utf8("part2_2")).to("part2", new Utf8("part2_7"));
  Assert.assertEquals(6, datasetSize(inputView));

  Pipeline pipeline = new MRPipeline(TestCrunchDatasetsHBase.class, HBaseTestUtils.getConf());
  PCollection<GenericRecord> data = pipeline.read(
      CrunchDatasets.asSource(inputView));
  pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND);
  pipeline.run();

  checkRecords(outputDataset, 6, 2);
}
 
Example #6
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testSourceView() throws IOException {
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
      "username", 2).build();

  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).partitionStrategy(partitionStrategy).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).format(Formats.PARQUET).build());

  writeTestUsers(inputDataset, 10);

  View<Record> inputView = inputDataset.with("username", "test-0");
  Assert.assertEquals(1, datasetSize(inputView));

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(inputView));
  pipeline.write(data, CrunchDatasets.asTarget(outputDataset), Target.WriteMode.APPEND);
  pipeline.run();

  Assert.assertEquals(1, datasetSize(outputDataset));
}
 
Example #7
Source File: TestFileSystemDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testViewForIncompleteUri() {
  Path path = new Path("/tmp/datasets/ns/test/y=2014/m=03");

  View<GenericRecord> view = FileSystemDatasets.viewForUri(
      dataset, path.toString());
  Assert.assertEquals("Should create correct view",
      view, dataset.getPartitionView(path));

  Constraints expected = ((AbstractRefinableView<GenericRecord>)
      dataset.with("y", 2014).with("m", 3)).getConstraints();
  Constraints actual = ((AbstractRefinableView<GenericRecord>) view)
      .getConstraints();
  Assert.assertEquals("Constraints should match expected",
      expected, actual);
}
 
Example #8
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteModeOverwrite() throws IOException {
  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).build());

  writeTestUsers(inputDataset, 1, 0);
  writeTestUsers(outputDataset, 1, 1);

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(inputDataset));
  pipeline.write(data, CrunchDatasets.asTarget((View<Record>) outputDataset),
      Target.WriteMode.OVERWRITE);

  pipeline.run();

  checkTestUsers(outputDataset, 1);
}
 
Example #9
Source File: TestFileSystemDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testViewForDifferentPartitionNames() {
  Path path = new Path("year=2014/month=3/day=14");

  // like PathConversion, this uses names from the partition strategy
  // and will accept partitions that have a different "name=" component
  View<GenericRecord> view = FileSystemDatasets.viewForUri(
      dataset, path.toString());
  Assert.assertEquals("Should create correct view",
      view, dataset.getPartitionView(path));

  Constraints expected = ((AbstractRefinableView<GenericRecord>)
      dataset.with("y", 2014).with("m", 3).with("d", 14)).getConstraints();
  Constraints actual = ((AbstractRefinableView<GenericRecord>) view)
      .getConstraints();
  Assert.assertEquals("Constraints should match expected",
      expected, actual);
}
 
Example #10
Source File: TestFileSystemDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testViewForUri() {
  Path path = new Path("/tmp/datasets/ns/test/y=2014/m=03/d=14");

  View<GenericRecord> view = FileSystemDatasets.viewForUri(
      dataset, "file:" + path);
  Assert.assertEquals("Should create correct view",
      view, dataset.getPartitionView(path));

  view = FileSystemDatasets.viewForUri(
      dataset, path.toString());
  Assert.assertEquals("Should create correct view",
      view, dataset.getPartitionView(path));

  Constraints expected = ((AbstractRefinableView<GenericRecord>)
      dataset.with("y", 2014).with("m", 3).with("d", 14)).getConstraints();
  Constraints actual = ((AbstractRefinableView<GenericRecord>) view)
      .getConstraints();
  Assert.assertEquals("Constraints should match expected",
      expected, actual);
}
 
Example #11
Source File: DatasetKeyInputFormat.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Adds configuration for {@code DatasetKeyInputFormat} to read from the
 * given {@link Dataset} or {@link View} instance.
 *
 * @param view a dataset or view
 * @return this for method chaining
 */
public ConfigBuilder readFrom(View<?> view) {
  DatasetDescriptor descriptor = view.getDataset().getDescriptor();
  // if this is a partitioned dataset, add the partition location
  if (view instanceof FileSystemDataset) {
    conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation()));
  }
  // add descriptor properties to the config
  for (String property : descriptor.listProperties()) {
    conf.set(property, descriptor.getProperty(property));
  }

  if (DataModelUtil.isGeneric(view.getType())) {
    Schema datasetSchema = view.getDataset().getDescriptor().getSchema();
    // only set the read schema if the view is a projection
    if (!datasetSchema.equals(view.getSchema())) {
      withSchema(view.getSchema());
    }
  } else {
    withType(view.getType());
  }

  conf.set(KITE_INPUT_URI, view.getUri().toString());
  return this;
}
 
Example #12
Source File: TestMapReduce.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("deprecation")
public void testSignalReadyOutputView() throws Exception {
  Assume.assumeTrue(!Hadoop.isHadoop1());
  populateInputDataset();
  populateOutputDataset(); // existing output will be overwritten

  Job job = new Job();
  DatasetKeyInputFormat.configure(job).readFrom(inputDataset).withType(GenericData.Record.class);

  job.setMapperClass(LineCountMapper.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);

  job.setReducerClass(GenericStatsReducer.class);

  View<Record> outputView = outputDataset.with("name", "apple", "banana", "carrot");
  DatasetKeyOutputFormat.configure(job).appendTo(outputView).withType(GenericData.Record.class);

  Assert.assertTrue(job.waitForCompletion(true));

  Assert.assertFalse("Output dataset should not be signaled ready",
      ((Signalable)outputDataset).isReady());
  Assert.assertTrue("Output view should be signaled ready",
      ((Signalable)outputView).isReady());
}
 
Example #13
Source File: TestFileSystemDatasets.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testIgnoresAuthority() {
  Path path = new Path("/tmp/datasets/ns/test/y=2014/m=03/d=14");

  View<GenericRecord> view = FileSystemDatasets.viewForUri(
      dataset, "file://127.0.0.1/tmp/datasets/ns/test/y=2014/m=03/d=14");
  Assert.assertEquals("Should create correct view",
      view, dataset.getPartitionView(path));

  Constraints expected = ((AbstractRefinableView<GenericRecord>)
      dataset.with("y", 2014).with("m", 3).with("d", 14)).getConstraints();
  Constraints actual = ((AbstractRefinableView<GenericRecord>) view)
      .getConstraints();
  Assert.assertEquals("Constraints should match expected",
      expected, actual);
}
 
Example #14
Source File: TestViewUris.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testURIStringEquality() {
  for(int i = 0; i < 10; i++) {
    String a = UUID.randomUUID().toString();
    String b = UUID.randomUUID().toString();
    String originalUri = "view:file:/tmp/test_name?color="+ a + "," + b;
    View<GenericRecord> view = Datasets.load(originalUri);
    String afterUri = view.getUri().toString();
    if(!originalUri.equals(afterUri)) {
      System.out.println("Iteration: " + i);
      System.out.println("Start: " + originalUri);
      System.out.println("End  : " + afterUri);
    }
    Assert.assertEquals(originalUri, afterUri);
  }
}
 
Example #15
Source File: FileSystemDataset.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public boolean canReplace(View<E> part) {
  if (part instanceof FileSystemView) {
    return equals(part.getDataset()) &&
        ((FileSystemView) part).getConstraints().alignedWithBoundaries();
  } else if (part instanceof FileSystemDataset) {
    return equals(part);
  }
  return false;
}
 
Example #16
Source File: FileSystemDataset.java    From kite with Apache License 2.0 5 votes vote down vote up
View<E> viewForUri(URI location) {
  Preconditions.checkNotNull(location, "Partition location cannot be null");
  PartitionView<E> view = getPartitionView(location);
  if (view == unbounded) {
    return this;
  }
  return view;
}
 
Example #17
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testSpecificProjectionAsType() throws IOException {
  Dataset<GenericRecord> original = Datasets.load(unbounded.getUri());

  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = original.asType(StandardEvent.class).newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  final View<SmallEvent> smallEvents = original.asType(SmallEvent.class);

  Set<SmallEvent> expected = Sets.newHashSet(toSmallEvent(sepEvent),
      toSmallEvent(octEvent), toSmallEvent(novEvent));

  assertContentEquals(expected, smallEvents);

  TestHelpers.assertThrows("Should not be able to write small events",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          smallEvents.newWriter();
        }
      });
}
 
Example #18
Source File: DaoViewTest.java    From kite with Apache License 2.0 5 votes vote down vote up
private void validRange(View<TestEntity> range, int startIdx, int endIdx) {
  int cnt = startIdx;
  DatasetReader<TestEntity> reader = range.newReader();
  try {
    for (TestEntity entity : reader) {
      Assert.assertEquals(Integer.toString(cnt), entity.getPart1());
      Assert.assertEquals(Integer.toString(cnt), entity.getPart2());
      cnt++;
    }
  } finally {
    reader.close();
  }
  Assert.assertEquals(endIdx, cnt);
}
 
Example #19
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
private static <E> View<E> load(JobContext jobContext) {
  Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
  Class<E> type = getType(jobContext);

  String outputUri = conf.get(KITE_OUTPUT_URI);
  return Datasets.<E, View<E>>load(outputUri, type);
}
 
Example #20
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext taskAttemptContext) {
  Configuration conf = Hadoop.TaskAttemptContext.getConfiguration.invoke(taskAttemptContext);
  DefaultConfiguration.init(conf);
  View<E> view = load(taskAttemptContext);
  return usePerTaskAttemptDatasets(view, conf) ?
      new MergeOutputCommitter<E>() : new NullOutputCommitter();
}
 
Example #21
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(JobContext jobContext) {
  // The committer setup will fail if the output dataset does not exist
  View<E> target = load(jobContext);
  Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
  switch (conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT)) {
    case APPEND:
      break;
    case OVERWRITE:
      // if the merge won't use replace, then delete the existing data
      if (!canReplace(target)) {
        target.deleteAll();
      }
      break;
    default:
    case DEFAULT:
      boolean isReady = false;
      if (target instanceof Signalable) {
        isReady = ((Signalable)target).isReady();
      }
      if (isReady || !target.isEmpty()) {
        throw new DatasetException(
            "View is not empty or has been signaled as ready: " + target);
      }
      break;
  }
}
 
Example #22
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public RecordWriter<E, Void> getRecordWriter(TaskAttemptContext taskAttemptContext) {
  Configuration conf = Hadoop.TaskAttemptContext
      .getConfiguration.invoke(taskAttemptContext);
  View<E> target = load(taskAttemptContext);
  View<E> working;

  if (usePerTaskAttemptDatasets(target, conf)) {
    working = loadOrCreateTaskAttemptView(taskAttemptContext);
  } else {
    working = target;
  }

  boolean copyRecords = conf.getBoolean(KITE_COPY_RECORDS, false);

  String partitionDir = conf.get(KITE_PARTITION_DIR);
  if (working.getDataset().getDescriptor().isPartitioned() &&
      partitionDir != null) {
    if (!(target instanceof FileSystemDataset)) {
      throw new UnsupportedOperationException("Partitions only supported for " +
          "FileSystemDataset. Dataset: " + target);
    }
    FileSystemDataset fsDataset = (FileSystemDataset) target;
    PartitionKey key = fsDataset.keyFromDirectory(new Path(partitionDir));
    if (key != null && !key.getValues().isEmpty()) {
      working = fsDataset.getPartition(key, true);
    }
    return new DatasetRecordWriter<E>(working, copyRecords);
  } else {
    return new DatasetRecordWriter<E>(working, copyRecords);
  }
}
 
Example #23
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public void commitJob(JobContext jobContext) throws IOException {
  Configuration conf = Hadoop.JobContext
      .getConfiguration.invoke(jobContext);
  DatasetRepository repo = getDatasetRepository(jobContext);
  boolean isTemp = repo instanceof TemporaryDatasetRepository;

  String jobDatasetName = getJobDatasetName(jobContext);
  View<E> targetView = load(jobContext);
  Dataset<E> jobDataset = repo.load(TEMP_NAMESPACE, jobDatasetName);
  WriteMode mode = conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT);
  if (mode == WriteMode.OVERWRITE && canReplace(targetView)) {
    ((Replaceable<View<E>>) targetView.getDataset()).replace(targetView, jobDataset);
  } else {
    ((Mergeable<Dataset<E>>) targetView.getDataset()).merge(jobDataset);
  }

  if (targetView instanceof Signalable) {
    ((Signalable)targetView).signalReady();
  }

  if (isTemp) {
    ((TemporaryDatasetRepository) repo).delete();
  } else {
    repo.delete(TEMP_NAMESPACE, jobDatasetName);
  }
}
 
Example #24
Source File: DatasetSink.java    From kite with Apache License 2.0 5 votes vote down vote up
private DatasetWriter<GenericRecord> newWriter(
    final UserGroupInformation login, final URI uri) {
  View<GenericRecord> view = KerberosUtil.runPrivileged(login,
      new PrivilegedExceptionAction<Dataset<GenericRecord>>() {
        @Override
        public Dataset<GenericRecord> run() {
          return Datasets.load(uri);
        }
      });

  DatasetDescriptor descriptor = view.getDataset().getDescriptor();
  String formatName = descriptor.getFormat().getName();
  Preconditions.checkArgument(allowedFormats().contains(formatName),
      "Unsupported format: " + formatName);

  Schema newSchema = descriptor.getSchema();
  if (targetSchema == null || !newSchema.equals(targetSchema)) {
    this.targetSchema = descriptor.getSchema();
    // target dataset schema has changed, invalidate all readers based on it
    readers.invalidateAll();
  }

  this.reuseDatum = !("parquet".equals(formatName));
  this.datasetName = view.getDataset().getName();

  return view.newWriter();
}
 
Example #25
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testReflectProjectionLoad() throws IOException {
  Dataset<ReflectStandardEvent> original = repo.create(
      "ns", "reflectProjection",
      new DatasetDescriptor.Builder()
          .schema(ReflectStandardEvent.class)
          .build(),
      ReflectStandardEvent.class);

  DatasetWriter<ReflectStandardEvent> writer = null;
  try {
    writer = original.newWriter();
    writer.write(new ReflectStandardEvent(sepEvent));
    writer.write(new ReflectStandardEvent(octEvent));
    writer.write(new ReflectStandardEvent(novEvent));
  } finally {
    Closeables.close(writer, false);
  }

  View<ReflectSmallEvent> dataset = repo.load("ns", original.getName(),
      ReflectSmallEvent.class);

  Set<ReflectSmallEvent> expected = Sets.newHashSet(
      new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
      new ReflectSmallEvent(novEvent));

  assertContentEquals(expected, dataset);
}
 
Example #26
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
/**
 * Adds configuration for {@code DatasetKeyOutputFormat} to write to the
 * given {@link Dataset} or {@link View} instance.
 *
 * @param view a dataset or view
 * @return this for method chaining
 */
public ConfigBuilder writeTo(View<?> view) {
  if (view instanceof FileSystemDataset) {
    FileSystemDataset dataset = (FileSystemDataset) view;
    conf.set(KITE_PARTITION_DIR,
        String.valueOf(dataset.getDescriptor().getLocation()));
  }
  withType(view.getType());
  return writeTo(view.getUri());
}
 
Example #27
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 5 votes vote down vote up
private void runCheckpointPipeline(View<Record> inputView,
    View<Record> outputView) {
  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(inputView));
  pipeline.write(data, CrunchDatasets.asTarget(outputView),
      Target.WriteMode.CHECKPOINT);
  pipeline.done();
}
 
Example #28
Source File: CompactionTask.java    From kite with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void checkCompactable(View<T> view) {
  Dataset<T> dataset = view.getDataset();
  if (!(dataset instanceof Replaceable)) {
    throw new IllegalArgumentException("Cannot compact dataset: " + dataset);
  }
  Replaceable<View<T>> replaceable = ((Replaceable<View<T>>) dataset);
  Preconditions.checkArgument(replaceable.canReplace(view),
      "Cannot compact view: " + view);
}
 
Example #29
Source File: TestCrunchDatasets.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test(expected = CrunchRuntimeException.class)
public void testWriteModeDefaultFailsWithExisting() throws IOException {
  Dataset<Record> inputDataset = repo.create("ns", "in", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).build());
  Dataset<Record> outputDataset = repo.create("ns", "out", new DatasetDescriptor.Builder()
      .schema(USER_SCHEMA).build());

  writeTestUsers(inputDataset, 1, 0);
  writeTestUsers(outputDataset, 1, 0);

  Pipeline pipeline = new MRPipeline(TestCrunchDatasets.class);
  PCollection<GenericData.Record> data = pipeline.read(
      CrunchDatasets.asSource(inputDataset));
  pipeline.write(data, CrunchDatasets.asTarget((View<Record>) outputDataset));
}
 
Example #30
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static boolean canReplace(View<?> view) {
  if (Hadoop.isHadoop1()) {
    // can't use replace because it is called in the OutputCommitter.
    return false;
  }
  Dataset<?> dataset = view.getDataset();
  return (dataset instanceof Replaceable &&
      ((Replaceable<View<?>>) dataset).canReplace(view));
}