org.apache.parquet.Strings#isNullOrEmpty

Source File: PathUtils.java From Bats with Apache License 2.0

6 votes

/**
 * Normalizes the given path eliminating repeated forward slashes.
 *
 * @return  normalized path
 */
public static final String normalize(final String path) {
  if (Strings.isNullOrEmpty(Preconditions.checkNotNull(path))) {
    return path;
  }

  final StringBuilder builder = new StringBuilder();
  char last = path.charAt(0);
  builder.append(last);
  for (int i=1; i<path.length(); i++) {
    char cur = path.charAt(i);
    if (last == '/' && cur == last) {
      continue;
    }
    builder.append(cur);
    last = cur;
  }
  return builder.toString();
}

Source File: WholeFileTransformerProcessor.java From datacollector with Apache License 2.0

6 votes

@Override
protected List<ConfigIssue> init() {
  List<ConfigIssue> issues = super.init();

  if (Strings.isNullOrEmpty(jobConfig.tempDir)) {
    issues.add(getContext().createConfigIssue(
        Groups.JOB.name(),
        JobConfig.TEMPDIR,
        Errors.CONVERT_02
    ));
  }

  this.context = getContext();
  this.errorRecordHandler = new DefaultErrorRecordHandler(getContext());

  tempDirElEval = context.createELEval("tempDir");
  compressionElEval = context.createELEval("compressionCodec");
  rateLimitElEval = FileRefUtil.createElEvalForRateLimit(getContext());

  variables = context.createELVars();

  return issues;
}

Source File: PathUtils.java From Bats with Apache License 2.0

5 votes

/**
 * Returns a normalized, combined path out of the given path segments.
 *
 * @param parts  path segments to combine
 * @see #normalize(String)
 */
public static final String join(final String... parts) {
  final StringBuilder sb = new StringBuilder();
  for (final String part:parts) {
    Preconditions.checkNotNull(part, "parts cannot contain null");
    if (!Strings.isNullOrEmpty(part)) {
      sb.append(part).append("/");
    }
  }
  if (sb.length() > 0) {
    sb.deleteCharAt(sb.length() - 1);
  }
  final String path = sb.toString();
  return normalize(path);
}

Source File: SparkExecutableLivy.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
protected void onExecuteStart(ExecutableContext executableContext) {
    final Output output = getOutput();
    if (output.getExtra().containsKey(START_TIME)) {
        final String sparkJobID = output.getExtra().get(ExecutableConstants.SPARK_JOB_ID);
        if (sparkJobID == null) {
            getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            return;
        }
        try {
            String status = getAppState(sparkJobID);
            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                //remove previous mr job info
                super.onExecuteStart(executableContext);
            } else {
                getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            }
        } catch (IOException e) {
            logger.warn("error get hadoop status");
            super.onExecuteStart(executableContext);
        }
    } else {
        super.onExecuteStart(executableContext);
    }
}

Source File: SparkExecutableLivy.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
protected ExecuteResult onResumed(String appId, ExecutableManager mgr) throws ExecuteException {
    Map<String, String> info = new HashMap<>();
    try {
        logger.info("livy spark_job_id:" + appId + " resumed");
        info.put(ExecutableConstants.SPARK_JOB_ID, appId);

        while (!isPaused() && !isDiscarded()) {
            String status = getAppState(appId);

            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                mgr.updateJobOutput(getId(), ExecutableState.ERROR, null, appId + " has failed");
                return new ExecuteResult(ExecuteResult.State.FAILED, appId + " has failed");
            }

            if (LivyStateEnum.success.name().equalsIgnoreCase(status)) {
                mgr.addJobInfo(getId(), info);
                return new ExecuteResult(ExecuteResult.State.SUCCEED, appId + " has finished");
            }

            Thread.sleep(5000);
        }

        killAppRetry(appId);

        if (isDiscarded()) {
            return new ExecuteResult(ExecuteResult.State.DISCARDED, appId + " is discarded");
        } else {
            return new ExecuteResult(ExecuteResult.State.STOPPED, appId + " is stopped");
        }

    } catch (Exception e) {
        logger.error("error run spark job:", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }

}

Source File: SparkExecutableLivy.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
protected int killAppRetry(String appId) throws IOException, InterruptedException {
    String status = getAppState(appId);
    if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
        logger.warn(appId + "is final state, no need to kill");
        return 0;
    }

    killApp(appId);

    status = getAppState(appId);
    int retry = 0;
    while (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status) && retry < 5) {
        killApp(appId);

        Thread.sleep(1000);

        status = getAppState(appId);
        retry++;
    }

    if (Strings.isNullOrEmpty(status)) {
        logger.info(appId + " killed successfully");
        return 0;
    } else {
        logger.info(appId + " killed failed");
        return 1;
    }
}

Source File: SparkExecutableLivy.java From kylin with Apache License 2.0

5 votes

@Override
protected void onExecuteStart(ExecutableContext executableContext) {
    final Output output = getOutput();
    if (output.getExtra().containsKey(START_TIME)) {
        final String sparkJobID = output.getExtra().get(ExecutableConstants.SPARK_JOB_ID);
        if (sparkJobID == null) {
            getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            return;
        }
        try {
            String status = getAppState(sparkJobID);
            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                //remove previous mr job info
                super.onExecuteStart(executableContext);
            } else {
                getManager().updateJobOutput(getId(), ExecutableState.RUNNING, null, null);
            }
        } catch (IOException e) {
            logger.warn("error get hadoop status");
            super.onExecuteStart(executableContext);
        }
    } else {
        super.onExecuteStart(executableContext);
    }
}

Source File: SparkExecutableLivy.java From kylin with Apache License 2.0

5 votes

@Override
protected ExecuteResult onResumed(String appId, ExecutableManager mgr) throws ExecuteException {
    Map<String, String> info = new HashMap<>();
    try {
        logger.info("livy spark_job_id:" + appId + " resumed");
        info.put(ExecutableConstants.SPARK_JOB_ID, appId);

        while (!isPaused() && !isDiscarded()) {
            String status = getAppState(appId);

            if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
                    || LivyStateEnum.error.name().equalsIgnoreCase(status)
                    || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
                mgr.updateJobOutput(getId(), ExecutableState.ERROR, null, appId + " has failed");
                return new ExecuteResult(ExecuteResult.State.FAILED, appId + " has failed");
            }

            if (LivyStateEnum.success.name().equalsIgnoreCase(status)) {
                mgr.addJobInfo(getId(), info);
                return new ExecuteResult(ExecuteResult.State.SUCCEED, appId + " has finished");
            }

            Thread.sleep(5000);
        }

        killAppRetry(appId);

        if (isDiscarded()) {
            return new ExecuteResult(ExecuteResult.State.DISCARDED, appId + " is discarded");
        } else {
            return new ExecuteResult(ExecuteResult.State.STOPPED, appId + " is stopped");
        }

    } catch (Exception e) {
        logger.error("error run spark job:", e);
        return new ExecuteResult(ExecuteResult.State.ERROR, e.getLocalizedMessage());
    }

}

Source File: SparkExecutableLivy.java From kylin with Apache License 2.0

5 votes

@Override
protected int killAppRetry(String appId) throws IOException, InterruptedException {
    String status = getAppState(appId);
    if (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status)) {
        logger.warn(appId + "is final state, no need to kill");
        return 0;
    }

    killApp(appId);

    status = getAppState(appId);
    int retry = 0;
    while (Strings.isNullOrEmpty(status) || LivyStateEnum.dead.name().equalsIgnoreCase(status)
            || LivyStateEnum.error.name().equalsIgnoreCase(status)
            || LivyStateEnum.shutting_down.name().equalsIgnoreCase(status) && retry < 5) {
        killApp(appId);

        Thread.sleep(1000);

        status = getAppState(appId);
        retry++;
    }

    if (Strings.isNullOrEmpty(status)) {
        logger.info(appId + " killed successfully");
        return 0;
    } else {
        logger.info(appId + " killed failed");
        return 1;
    }
}

Source File: ThriftReadSupport.java From parquet-mr with Apache License 2.0

5 votes

public static FieldProjectionFilter getFieldProjectionFilter(Configuration conf) {
  String deprecated = conf.get(THRIFT_COLUMN_FILTER_KEY);
  String strict = conf.get(STRICT_THRIFT_COLUMN_FILTER_KEY);

  if (Strings.isNullOrEmpty(deprecated) && Strings.isNullOrEmpty(strict)) {
    return null;
  }

  if(!Strings.isNullOrEmpty(deprecated) && !Strings.isNullOrEmpty(strict)) {
    throw new ThriftProjectionException(
        "You cannot provide both "
            + THRIFT_COLUMN_FILTER_KEY
            + " and "
            + STRICT_THRIFT_COLUMN_FILTER_KEY
            +"! "
            + THRIFT_COLUMN_FILTER_KEY
            + " is deprecated."
    );
  }

  if (!Strings.isNullOrEmpty(deprecated)) {
    LOG.warn("Using {} is deprecated. Please see the docs for {}!",
        THRIFT_COLUMN_FILTER_KEY, STRICT_THRIFT_COLUMN_FILTER_KEY);
    return new DeprecatedFieldProjectionFilter(deprecated);
  }

  return StrictFieldProjectionFilter.fromSemicolonDelimitedString(strict);
}

Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0

4 votes

public static HiveStorageCapabilities getHiveStorageCapabilities(final StorageDescriptor storageDescriptor) {
  final String location = storageDescriptor.getLocation();

  if (null != location) {
    final URI uri;
    try {
      uri = URI.create(location);
    } catch (IllegalArgumentException e) {
      // unknown table source, default to HDFS.
      return HiveStorageCapabilities.DEFAULT_HDFS;
    }

    final String scheme = uri.getScheme();
    if (!Strings.isNullOrEmpty(scheme)) {
      if (scheme.regionMatches(true, 0, "s3", 0, 2)) {
        /* AWS S3 does not support impersonation, last modified times or orc split file ids. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(false)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(false)
          .build();
      } else if (scheme.regionMatches(true, 0, "wasb", 0, 4) ||
        scheme.regionMatches(true, 0, "abfs", 0, 4) ||
        scheme.regionMatches(true, 0, "wasbs", 0, 5) ||
        scheme.regionMatches(true, 0, "abfss", 0, 5)) {
        /* DX-17365: Azure Storage does not support correct last modified times, Azure returns last modified times,
         *  however, the timestamps returned are incorrect. They reference the folder's create time rather
         *  that the folder content's last modified time. Please see Prototype.java for Azure storage fs uri schemes. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(true)
          .build();
      } else if (!scheme.regionMatches(true, 0, "hdfs", 0, 4)) {
        /* Most hive supported non-HDFS file systems allow for impersonation and last modified times, but
           not orc split file ids.  */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(true)
          .supportsOrcSplitFileIds(false)
          .build();
      }
    }
  }
  // Default to HDFS.
  return HiveStorageCapabilities.DEFAULT_HDFS;
}

Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0

4 votes

public static HiveStorageCapabilities getHiveStorageCapabilities(final StorageDescriptor storageDescriptor) {
  final String location = storageDescriptor.getLocation();

  if (null != location) {
    final URI uri;
    try {
      uri = URI.create(location);
    } catch (IllegalArgumentException e) {
      // unknown table source, default to HDFS.
      return HiveStorageCapabilities.DEFAULT_HDFS;
    }

    final String scheme = uri.getScheme();
    if (!Strings.isNullOrEmpty(scheme)) {
      if (scheme.regionMatches(true, 0, "s3", 0, 2)) {
        /* AWS S3 does not support impersonation, last modified times or orc split file ids. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(false)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(false)
          .build();
      } else if (scheme.regionMatches(true, 0, "wasb", 0, 4) ||
        scheme.regionMatches(true, 0, "abfs", 0, 4) ||
        scheme.regionMatches(true, 0, "wasbs", 0, 5) ||
        scheme.regionMatches(true, 0, "abfss", 0, 5)) {
        /* DX-17365: Azure Storage does not support correct last modified times, Azure returns last modified times,
         *  however, the timestamps returned are incorrect. They reference the folder's create time rather
         *  that the folder content's last modified time. Please see Prototype.java for Azure storage fs uri schemes. */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(false)
          .supportsOrcSplitFileIds(true)
          .build();
      } else if (!scheme.regionMatches(true, 0, "hdfs", 0, 4)) {
        /* Most hive supported non-HDFS file systems allow for impersonation and last modified times, but
           not orc split file ids.  */
        return HiveStorageCapabilities.newBuilder()
          .supportsImpersonation(true)
          .supportsLastModifiedTime(true)
          .supportsOrcSplitFileIds(false)
          .build();
      }
    }
  }
  // Default to HDFS.
  return HiveStorageCapabilities.DEFAULT_HDFS;
}

Java Code Examples for org.apache.parquet.Strings#isNullOrEmpty()