org.kitesdk.data.DatasetException Java Examples

The following examples show how to use org.kitesdk.data.DatasetException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SpecificAvroDao.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public E compose(List<S> subEntities) {
  E entity;
  try {
    entity = entityConstructor.newInstance();
  } catch (Throwable e) {
    LOG.error(
        "Error trying to construct entity of type: "
            + entityClass.getName(), e);
    throw new DatasetException(e);
  }

  int cnt = 0;
  for (S subEntity : subEntities) {
    if (subEntity != null) {
      entity.put(cnt, subEntity);
    }
    cnt++;
  }
  return entity;
}
 
Example #2
Source File: SchemaTool.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new managed schema, or migrates an existing one if one exists for
 * the table name, entity name pair.
 * 
 * @param tableName
 *          The name of the table we'll be creating or migrating a schema for.
 * @param entitySchemaString
 *          The entity schema
 * @param createTableAndFamilies
 *          If true, will create the table for this schema if it doesn't
 *          exist, and will create families if they don't exist.
 */
public void createOrMigrateSchema(String tableName, String entitySchemaString,
    boolean createTableAndFamilies) throws InterruptedException {
  boolean migrationRequired = prepareManagedSchema(tableName,
      entitySchemaString);
  if (migrationRequired && createTableAndFamilies) {
    try {
      HTableDescriptor descriptor = prepareTableDescriptor(tableName,
          entitySchemaString);
      if (hbaseAdmin.isTableAvailable(tableName)) {
        modifyTable(tableName, descriptor);
      } else {
        createTable(descriptor);
      }
    } catch (IOException e) {
      throw new DatasetException(e);
    }
  }
}
 
Example #3
Source File: DefaultSchemaManager.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Get the schema parser by its classname. This method will cache the
 * constructed schema parsers.
 * 
 * @param schemaParserClassName
 *          The class name of the schema parser
 * @return The constructed schema parser.
 */
@SuppressWarnings("unchecked")
private KeyEntitySchemaParser<?, ?> getSchemaParser(
    String schemaParserClassName) {
  if (schemaParsers.contains(schemaParserClassName)) {
    return schemaParsers.get(schemaParserClassName);
  } else {
    try {
      Class<KeyEntitySchemaParser<?, ?>> schemaParserClass = (Class<KeyEntitySchemaParser<?, ?>>) Class
          .forName(schemaParserClassName);
      KeyEntitySchemaParser<?, ?> schemaParser = schemaParserClass
          .getConstructor().newInstance();
      schemaParsers.putIfAbsent(schemaParserClassName, schemaParser);
      return schemaParser;
    } catch (Exception e) {
      throw new DatasetException(
          "Could not instantiate schema parser class: "
              + schemaParserClassName, e);
    }
  }
}
 
Example #4
Source File: AvroEntityComposer.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Build the appropriate AvroRecordBuilderFactory for this instance. Avro has
 * many different record types, of which we support two: Specific and Generic.
 * 
 * @param schema
 *          The Avro schema needed to construct the AvroRecordBuilderFactory.
 * @return The constructed AvroRecordBuilderFactory.
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
private AvroRecordBuilderFactory<E> buildAvroRecordBuilderFactory(
    Schema schema) {
  if (specific) {
    Class<E> specificClass;
    String className = schema.getFullName();
    try {
      specificClass = (Class<E>) Class.forName(className);
    } catch (ClassNotFoundException e) {
      throw new DatasetException("Could not get Class instance for "
          + className);
    }
    return new SpecificAvroRecordBuilderFactory(specificClass);
  } else {
    return (AvroRecordBuilderFactory<E>) new GenericAvroRecordBuilderFactory(
        schema);
  }
}
 
Example #5
Source File: Predicates.java    From kite with Apache License 2.0 6 votes vote down vote up
public static <T> Predicate<T> fromString(String pString, Schema schema) {
  if (pString.length() == 0) {
    return exists();
  }
  Predicate<T> predicate = Range.fromString(pString, schema);
  if (predicate != null) {
    return predicate;
  }
  predicate = RegisteredPredicate.fromString(pString, schema);
  if (predicate != null) {
    return predicate;
  }
  predicate = In.fromString(pString, schema);
  if (predicate != null) {
    return predicate;
  }
  throw new DatasetException("Unknown predicate: " + pString);
}
 
Example #6
Source File: Predicates.java    From kite with Apache License 2.0 6 votes vote down vote up
public static <T> String toNormalizedString(Predicate<T> predicate, Schema schema) {
  if (predicate instanceof Exists) {
    return "";
  } else if (predicate instanceof Range) {
    return ((Range) predicate).toString(schema);
  } else if (predicate instanceof In) {
    String values = ((In) predicate).toNormalizedString(schema);
    if (values.length() != 0) {
      return values;
    }
    // "" is a special case that conflicts with exists, use the named version
    return "in()";
  } else if (predicate instanceof RegisteredPredicate) {
    return RegisteredPredicate.toNormalizedString(
        (RegisteredPredicate) predicate, schema);
  } else {
    throw new DatasetException("Unknown predicate: " + predicate);
  }
}
 
Example #7
Source File: Predicates.java    From kite with Apache License 2.0 6 votes vote down vote up
public static <T> String toString(Predicate<T> predicate, Schema schema) {
  if (predicate instanceof Exists) {
    return "";
  } else if (predicate instanceof Range) {
    return ((Range) predicate).toString(schema);
  } else if (predicate instanceof In) {
    String values = ((In) predicate).toString(schema);
    if (values.length() != 0) {
      return values;
    }
    // "" is a special case that conflicts with exists, use the named version
    return "in()";
  } else if (predicate instanceof RegisteredPredicate) {
    return RegisteredPredicate.toString(
        (RegisteredPredicate) predicate, schema);
  } else {
    throw new DatasetException("Unknown predicate: " + predicate);
  }
}
 
Example #8
Source File: SpecificAvroRecordBuilderFactory.java    From kite with Apache License 2.0 6 votes vote down vote up
/**
 * Construct the factory, giving it the class of the SpecificRecor the
 * builders will construct.
 * 
 * @param recordClass
 *          The class of the SpecificRecords the builders will construct.
 */
public SpecificAvroRecordBuilderFactory(Class<T> recordClass) {
  this.recordClass = recordClass;
  try {
    // Get the constructor of the class so we don't have to
    // perform this expensive reflection call for every
    // builder constructed.
    this.recordClassConstructor = recordClass.getConstructor();
  } catch (Exception e) {
    // A number of reflection exceptions could be caught here.
    // No good way to handle these types of exceptions, so
    // throw an DatasetException up to the user.
    String msg = "Could not get a default constructor for class: "
        + recordClass.toString();
    LOG.error(msg, e);
    throw new DatasetException(msg, e);
  }
}
 
Example #9
Source File: SpecificAvroDao.java    From kite with Apache License 2.0 6 votes vote down vote up
public SpecificCompositeAvroDao(HTablePool tablePool, String tableName,
    List<EntityMapper<S>> entityMappers, Class<E> entityClass) {

  super(tablePool, tableName, entityMappers);
  this.entityClass = entityClass;
  try {
    entityConstructor = entityClass.getConstructor();
    entitySchema = (Schema) entityClass.getDeclaredField("SCHEMA$").get(
        null);
  } catch (Throwable e) {
    LOG.error(
        "Error getting constructor or schema field for entity of type: "
            + entityClass.getName(), e);
    throw new DatasetException(e);
  }
}
 
Example #10
Source File: BaseEntityMapper.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public Increment mapToIncrement(PartitionKey key, String fieldName,
    long amount) {
  FieldMapping fieldMapping = entitySchema.getColumnMappingDescriptor()
      .getFieldMapping(fieldName);
  if (fieldMapping == null) {
    throw new DatasetException("Unknown field in the schema: "
        + fieldName);
  }
  if (fieldMapping.getMappingType() != MappingType.COUNTER) {
    throw new DatasetException("Field is not a counter type: "
        + fieldName);
  }

  byte[] keyBytes;
  if (keySerDe == null) {
    keyBytes = new byte[] { (byte) 0 };
  } else {
    keyBytes = keySerDe.serialize(key);
  }
  Increment increment = new Increment(keyBytes);
  increment.addColumn(fieldMapping.getFamily(), fieldMapping.getQualifier(),
      amount);
  return increment;
}
 
Example #11
Source File: DatasetKeyInputFormat.java    From kite with Apache License 2.0 6 votes vote down vote up
private InputFormat<E, Void> getDelegateInputFormatForPartition(Dataset<E> dataset,
    String partitionDir, Configuration conf) {
  if (!(dataset instanceof FileSystemDataset)) {
    throw new UnsupportedOperationException("Partitions only supported for " +
        "FileSystemDataset. Dataset: " + dataset);
  }
  FileSystemDataset<E> fsDataset = (FileSystemDataset<E>) dataset;
  LOG.debug("Getting delegate input format for dataset {} with partition directory {}",
      dataset, partitionDir);
  PartitionKey key = fsDataset.keyFromDirectory(new Path(partitionDir));
  LOG.debug("Partition key: {}", key);
  if (key != null) {
    PartitionedDataset<E> partition = fsDataset.getPartition(key, false);
    LOG.debug("Partition: {}", partition);
    return getDelegateInputFormat(partition, conf);
  }
  throw new DatasetException("Cannot find partition " + partitionDir);
}
 
Example #12
Source File: CompositeBaseDao.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public PutAction mapFromEntity(E entity) {
  List<PutAction> puts = new ArrayList<PutAction>();
  List<S> subEntities = decompose(entity);
  byte[] keyBytes = null;
  for (int i = 0; i < entityMappers.size(); i++) {
    S subEntity = subEntities.get(i);
    if (subEntity != null) {
      PutAction put = entityMappers.get(i).mapFromEntity(subEntity);
      if (keyBytes == null) {
        keyBytes = put.getPut().getRow();
      } else {
        if (!Arrays.equals(keyBytes, put.getPut().getRow())) {
          throw new DatasetException(
              "Composite entity keys didn't serialize to the same row bytes.");
        }
      }
      puts.add(entityMappers.get(i).mapFromEntity(subEntity));
    }
  }
  return HBaseUtils.mergePutActions(keyBytes, puts);
}
 
Example #13
Source File: SingleFieldEntityFilter.java    From kite with Apache License 2.0 6 votes vote down vote up
public SingleFieldEntityFilter(EntitySchema entitySchema,
    EntitySerDe<?> entitySerDe, String fieldName, Object filterValue,
    CompareFilter.CompareOp equalityOperator) {
  FieldMapping fieldMapping = entitySchema.getColumnMappingDescriptor()
      .getFieldMapping(fieldName);
  if (fieldMapping.getMappingType() != MappingType.COLUMN) {
    throw new DatasetException(
        "SingleColumnValueFilter only compatible with COLUMN mapping types.");
  }

  byte[] family = fieldMapping.getFamily();
  byte[] qualifier = fieldMapping.getQualifier();
  byte[] comparisonBytes = entitySerDe.serializeColumnValueToBytes(fieldName,
      filterValue);

  this.filter = new SingleColumnValueFilter(family, qualifier,
      equalityOperator, comparisonBytes);
}
 
Example #14
Source File: CompositeDaoTest.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test(expected = DatasetException.class)
public void testIncompatibleKeys() throws Exception {
  // Construct Dao
  Dao<Map<String, SpecificRecord>> dao = SpecificAvroDao.buildCompositeDao(
      tablePool, tableName, 
      Arrays.asList(subRecord1String, subRecord2String));
  
  // Construct records and keys
  SubRecord1 subRecord1 = SubRecord1.newBuilder().setKeyPart1("1").setKeyPart2("1").setField1("field1_1")
      .setField2("field1_2").build();
  SubRecord2 subRecord2 = SubRecord2.newBuilder().setKeyPart1("1").setKeyPart2("unmatched").setField1("field2_1")
      .setField2("field2_2").build();

  Map<String, SpecificRecord> compositeRecord = new HashMap<String, SpecificRecord>();
  compositeRecord.put("SubRecord1", subRecord1);
  compositeRecord.put("SubRecord2", subRecord2);

  // Test put and get
  dao.put(compositeRecord);
}
 
Example #15
Source File: TaskUtil.java    From kite with Apache License 2.0 6 votes vote down vote up
private static File findJarForClass(Class<?> requiredClass) {
  ProtectionDomain domain = AccessController.doPrivileged(
      new GetProtectionDomain(requiredClass));
  CodeSource codeSource = domain.getCodeSource();
  if (codeSource != null) {
    try {
      return new File(codeSource.getLocation().toURI());
    } catch (URISyntaxException e) {
      throw new DatasetException(
          "Cannot locate " + requiredClass.getName() + " jar", e);
    }
  } else {
    // this should only happen for system classes
    throw new DatasetException(
        "Cannot locate " + requiredClass.getName() + " jar");
  }
}
 
Example #16
Source File: BaseEntityMapper.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public long mapFromIncrementResult(Result result, String fieldName) {
  FieldMapping fieldMapping = entitySchema.getColumnMappingDescriptor()
      .getFieldMapping(fieldName);
  if (fieldMapping == null) {
    throw new DatasetException("Unknown field in the schema: "
        + fieldName);
  }
  if (fieldMapping.getMappingType() != MappingType.COUNTER) {
    throw new DatasetException("Field is not a counter type: "
        + fieldName);
  }
  return (Long) entitySerDe.deserialize(fieldMapping, result);
}
 
Example #17
Source File: HiveUtils.java    From kite with Apache License 2.0 5 votes vote down vote up
private static String getHiveType(Class<?> type) {
  String typeName = PrimitiveObjectInspectorUtils.getTypeNameFromPrimitiveJava(type);
  if (typeName == null) {
    throw new DatasetException("Unsupported FieldPartitioner type: " + type);
  }
  return typeName;
}
 
Example #18
Source File: SpecificAvroDao.java    From kite with Apache License 2.0 5 votes vote down vote up
private static Schema getSchemaFromEntityClass(Class<?> entityClass) {
  try {
    return (Schema) entityClass.getDeclaredField("SCHEMA$").get(null);
  } catch (Throwable e) {
    LOG.error(
        "Error getting schema from entity of type: " + entityClass.getName(),
        e);
    throw new DatasetException(e);
  }
}
 
Example #19
Source File: RegexEntityFilter.java    From kite with Apache License 2.0 5 votes vote down vote up
public RegexEntityFilter(EntitySchema entitySchema,
    EntitySerDe<?> entitySerDe, String fieldName, String regex,
    boolean isEqual) {
  FieldMapping fieldMapping = entitySchema.getColumnMappingDescriptor()
      .getFieldMapping(fieldName);
  if (fieldMapping.getMappingType() != MappingType.COLUMN) {
    throw new DatasetException(
        "SingleColumnValueFilter only compatible with COLUMN mapping types.");
  }

  this.filter = constructFilter(regex, isEqual, fieldMapping);
}
 
Example #20
Source File: RegexEntityFilter.java    From kite with Apache License 2.0 5 votes vote down vote up
private Filter constructFilter(String regex, boolean isEqual, FieldMapping fieldMapping) {
  byte[] family = fieldMapping.getFamily();
  byte[] qualifier = fieldMapping.getQualifier();

  try {
    // To work we both HBase 0.94 and 0.96 we have to use reflection to construct a
    // SingleColumnValueFilter (and a RegexStringComparator) since
    // WritableByteArrayComparable (which RegexStringComparator extends) was renamed
    // to ByteArrayComparable in HBase 0.95 (HBASE-6658)
    Class<?> c = Class.forName("org.apache.hadoop.hbase.filter.SingleColumnValueFilter");
    for (Constructor<?> cons : c.getConstructors()) {
      if (cons.getParameterTypes().length == 4 &&
          !cons.getParameterTypes()[3].isArray()) { // not byte[] as the fourth arg
        Object regexStringComparator = Class.forName(
            "org.apache.hadoop.hbase.filter.RegexStringComparator")
            .getConstructor(String.class).newInstance(regex);
        return (Filter) cons.newInstance(family, qualifier,
            isEqual ? CompareFilter.CompareOp.EQUAL
            : CompareFilter.CompareOp.NOT_EQUAL, regexStringComparator);
      }
    }
  } catch (Exception e) {
    e.printStackTrace();
    throw new DatasetException("Cannot create RegexEntityFilter.", e);
  }
  throw new DatasetException("Cannot create RegexEntityFilter (no constructor found).");
}
 
Example #21
Source File: SchemaTool.java    From kite with Apache License 2.0 5 votes vote down vote up
/**
 * add the column families which are not already present to the given table
 */
private void modifyTable(String tableName, HTableDescriptor newDescriptor) {
  LOG.info("Modifying table " + tableName);
  HColumnDescriptor[] newFamilies = newDescriptor.getColumnFamilies();
  try {
    List<HColumnDescriptor> columnsToAdd = Lists.newArrayList();
    HTableDescriptor currentFamilies = hbaseAdmin
        .getTableDescriptor(Bytes.toBytes(tableName));
    for (HColumnDescriptor newFamily : newFamilies) {
      if (!currentFamilies.hasFamily(newFamily.getName())) {
        columnsToAdd.add(new HColumnDescriptor(newFamily.getName()));
      }
    }
    // Add all the necessary column families
    if (!columnsToAdd.isEmpty()) {
      hbaseAdmin.disableTable(tableName);
      try {
        for (HColumnDescriptor columnToAdd : columnsToAdd) {
          hbaseAdmin.addColumn(tableName, columnToAdd);
        }
      } finally {
        hbaseAdmin.enableTable(tableName);
      }
    }
  } catch (IOException e) {
    throw new DatasetException(e);
  }
}
 
Example #22
Source File: AvroDaoTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test(expected = DatasetException.class)
public void testPutWithNullKey() throws Exception {
  Dao<GenericRecord> dao = new GenericAvroDao(tablePool, tableName,
      schemaString);
  @SuppressWarnings("deprecation")
  GenericRecord entity = new GenericData.Record(Schema.parse(schemaString));
  entity.put("keyPart1", "part1");
  entity.put("keyPart2", null);
  entity.put("field1", "field1");
  entity.put("field2", "field2");
  dao.put(entity);
}
 
Example #23
Source File: PartitionFunctions.java    From kite with Apache License 2.0 5 votes vote down vote up
public static FieldPartitioner identity(String sourceName, String name, String className, int buckets) {
  Class<?> typeClass;
  try {
    typeClass = Class.forName(className);
  } catch (ClassNotFoundException e) {
    throw new DatasetException("Cannot find class: " + className, e);
  }
  return new IdentityFieldPartitioner(sourceName, name, typeClass, buckets);
}
 
Example #24
Source File: TransformTask.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public E map(E input) {
  if (input != null && entityClass.isAssignableFrom(input.getClass())) {
    return input;
  } else {
    throw new DatasetException(
        "Object does not match expected type " + entityClass +
        ": " + String.valueOf(input));
  }
}
 
Example #25
Source File: TestCSVSchemaCommand.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testInvalidCSVHeaderFail() throws Exception {
  command.samplePaths = Lists.newArrayList("target/users_failed.csv");
  command.recordName = "User";
  TestHelpers.assertThrows("Should fail when csv header doesn't follow alphanumeric standards",
      DatasetException.class, new Callable<Void>() {
        @Override
        public Void call() throws Exception {
           command.run();
           return null;
        }
      });
  verifyZeroInteractions(console);
}
 
Example #26
Source File: DatasetTarget.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public Converter<?, ?, ?, ?> getConverter(PType<?> ptype) {
  if (ptype instanceof AvroType) {
    return new KeyConverter<E>((AvroType<E>) ptype);
  }
  throw new DatasetException(
      "Cannot create converter for non-Avro type: " + ptype);
}
 
Example #27
Source File: DatasetKeyOutputFormat.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(JobContext jobContext) {
  // The committer setup will fail if the output dataset does not exist
  View<E> target = load(jobContext);
  Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext);
  switch (conf.getEnum(KITE_WRITE_MODE, WriteMode.DEFAULT)) {
    case APPEND:
      break;
    case OVERWRITE:
      // if the merge won't use replace, then delete the existing data
      if (!canReplace(target)) {
        target.deleteAll();
      }
      break;
    default:
    case DEFAULT:
      boolean isReady = false;
      if (target instanceof Signalable) {
        isReady = ((Signalable)target).isReady();
      }
      if (isReady || !target.isEmpty()) {
        throw new DatasetException(
            "View is not empty or has been signaled as ready: " + target);
      }
      break;
  }
}
 
Example #28
Source File: TestMapReduce.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test(expected = DatasetException.class)
public void testJobFailsWithExisting() throws Exception {
  populateInputDataset();
  populateOutputDataset(); // existing output will cause job to fail

  Job job = createJob();
  job.waitForCompletion(true);
}
 
Example #29
Source File: TestMapReduce.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test(expected = DatasetException.class)
public void testJobFailsWithEmptyButReadyOutput() throws Exception {
  Assume.assumeTrue(!Hadoop.isHadoop1());
  populateInputDataset();
  // don't populate the output, but signal it as ready
  ((Signalable)outputDataset).signalReady();

  Job job = createJob();
  job.waitForCompletion(true);
}
 
Example #30
Source File: AccessorImpl.java    From kite with Apache License 2.0 5 votes vote down vote up
public Iterator<Path> getDirectoryIterator(View view) {
  if (view instanceof FileSystemView) {
    return ((FileSystemView<?>) view).dirIterator();
  } else if (view instanceof FileSystemDataset) {
    return ((FileSystemDataset<?>) view).dirIterator();
  } else {
    throw new DatasetException(
        "Underlying Dataset must be a FileSystemDataset");
  }
}