org.apache.hadoop.util.StringUtils#unEscapeString

Source File: JobHistory.java From hadoop-gpu with Apache License 2.0

6 votes

/**
 * Parse a single line of history. 
 * @param line
 * @param l
 * @throws IOException
 */
private static void parseLine(String line, Listener l, boolean isEscaped) 
throws IOException{
  // extract the record type 
  int idx = line.indexOf(' '); 
  String recType = line.substring(0, idx);
  String data = line.substring(idx+1, line.length());
  
  Matcher matcher = pattern.matcher(data); 

  while(matcher.find()){
    String tuple = matcher.group(0);
    String []parts = StringUtils.split(tuple, StringUtils.ESCAPE_CHAR, '=');
    String value = parts[1].substring(1, parts[1].length() -1);
    if (isEscaped) {
      value = StringUtils.unEscapeString(value, StringUtils.ESCAPE_CHAR,
                                         charsToEscape);
    }
    parseBuffer.put(Keys.valueOf(parts[0]), value);
  }

  l.handle(RecordTypes.valueOf(recType), parseBuffer); 
  
  parseBuffer.clear(); 
}

Source File: JobHistory.java From RDFS with Apache License 2.0

6 votes

/**
 * Parse a single line of history. 
 * @param line
 * @param l
 * @throws IOException
 */
private static void parseLine(String line, Listener l, boolean isEscaped) 
throws IOException{
  // extract the record type 
  int idx = line.indexOf(' '); 
  String recType = line.substring(0, idx);
  String data = line.substring(idx+1, line.length());
  
  Matcher matcher = pattern.matcher(data); 
  Map<Keys,String> parseBuffer = new HashMap<Keys, String>();

  while(matcher.find()){
    String tuple = matcher.group(0);
    String []parts = StringUtils.split(tuple, StringUtils.ESCAPE_CHAR, '=');
    String value = parts[1].substring(1, parts[1].length() -1);
    if (isEscaped) {
      value = StringUtils.unEscapeString(value, StringUtils.ESCAPE_CHAR,
                                         charsToEscape);
    }
    parseBuffer.put(Keys.valueOf(parts[0]), value);
  }

  l.handle(RecordTypes.valueOf(recType), parseBuffer); 
  
  parseBuffer.clear(); 
}

Source File: QueryInputFormat.java From Halyard with Apache License 2.0

6 votes

public static void setQueriesFromDirRecursive(Configuration conf, String dirs, boolean sparqlUpdate, int stage) throws IOException {
    for (String dir : StringUtils.split(dirs)) {
        Path p = new Path(StringUtils.unEscapeString(dir));
        FileStatus[] matches = p.getFileSystem(conf).globStatus(p);
        if (matches == null) {
            throw new IOException("Input path does not exist: " + p);
        } else if (matches.length == 0) {
            throw new IOException("Input Pattern " + p + " matches 0 files");
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    addQueryRecursively(conf, p, sparqlUpdate, stage);
                } else {
                    addQuery(conf, globStat, sparqlUpdate, stage);
                }
            }
        }
    }
}

Source File: FileInputFormat.java From hadoop-gpu with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: MetadataIdParser.java From datawave with Apache License 2.0

5 votes

/**
 * Parse "..." {, "..."} into an array of string arguments (as Object[])
 * 
 * @param args
 * @return Object[] An array of string objects
 * @throws IllegalArgumentException
 */
public static Object[] parseArgs(String args) throws IllegalArgumentException {
    List<String> argList = new ArrayList<>();
    String[] parts = StringUtils.split(args, '\\', ',');
    for (String part : parts) {
        part = part.trim();
        if (part.charAt(0) == '"' && part.charAt(part.length() - 1) == '"') {
            part = StringUtils.unEscapeString(part.substring(1, part.length() - 1));
            argList.add(part);
        } else {
            throw new IllegalArgumentException("Expected a list of strings separated by commas.  Commas within the strings must be escaped. " + part);
        }
    }
    return argList.toArray();
}

Source File: FileInputFormat.java From hadoop-gpu with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: FileInputFormat.java From RDFS with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: FileInputFormat.java From RDFS with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get("mapred.input.dir", "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: FileInputFormat.java From big-c with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get(INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: FileInputFormat.java From big-c with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get(org.apache.hadoop.mapreduce.lib.input.
    FileInputFormat.INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: FileInputFormat.java From hadoop with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
  String dirs = context.getConfiguration().get(INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: FileInputFormat.java From hadoop with Apache License 2.0

5 votes

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * 
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
  String dirs = conf.get(org.apache.hadoop.mapreduce.lib.input.
    FileInputFormat.INPUT_DIR, "");
  String [] list = StringUtils.split(dirs);
  Path[] result = new Path[list.length];
  for (int i = 0; i < list.length; i++) {
    result[i] = new Path(StringUtils.unEscapeString(list[i]));
  }
  return result;
}

Source File: CountersStrings.java From big-c with Apache License 2.0

4 votes

private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR,
                                    charsToEscape);
}

Source File: Counters.java From RDFS with Apache License 2.0

4 votes

private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR, 
                                    charsToEscape);
}

Source File: LindenJob.java From linden with Apache License 2.0

4 votes

@Override
public int run(String[] strings) throws Exception {
  Configuration conf = getConf();
  String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
  logger.info("input dir:" + dir);
  Path inputPath = new Path(StringUtils.unEscapeString(dir));
  Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
  String indexPath = conf.get(LindenJobConfig.INDEX_PATH);

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outputPath)) {
    fs.delete(outputPath, true);
  }
  if (fs.exists(new Path(indexPath))) {
    fs.delete(new Path(indexPath), true);
  }

  int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
  Shard[] shards = createShards(indexPath, numShards);

  Shard.setIndexShards(conf, shards);

  //empty trash;
  (new Trash(conf)).expunge();

  Job job = Job.getInstance(conf, "linden-hadoop-indexing");
  job.setJarByClass(LindenJob.class);
  job.setMapperClass(LindenMapper.class);
  job.setCombinerClass(LindenCombiner.class);
  job.setReducerClass(LindenReducer.class);
  job.setMapOutputKeyClass(Shard.class);
  job.setMapOutputValueClass(IntermediateForm.class);
  job.setOutputKeyClass(Shard.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(IndexUpdateOutputFormat.class);
  job.setReduceSpeculativeExecution(false);
  job.setNumReduceTasks(numShards);

  String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
  if (lindenSchemaFile == null) {
    throw new IOException("no schema file is found");
  }
  logger.info("Adding schema file: " + lindenSchemaFile);
  job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
  String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
  if (lindenPropertiesFile == null) {
    throw new IOException("no linden properties file is found");
  }
  logger.info("Adding linden properties file: " + lindenPropertiesFile);
  job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));

  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  Path[] inputs = FileInputFormat.getInputPaths(job);
  StringBuilder buffer = new StringBuilder(inputs[0].toString());
  for (int i = 1; i < inputs.length; i++) {
    buffer.append(",");
    buffer.append(inputs[i].toString());
  }
  logger.info("mapreduce.input.dir = " + buffer.toString());
  logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
  logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
  logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
  logger.info("mapreduce.input.format.class = " + job.getInputFormatClass());
  logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass());
  logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));

  job.waitForCompletion(true);
  if (!job.isSuccessful()) {
    throw new RuntimeException("Job failed");
  }
  return 0;
}

Source File: CountersStrings.java From hadoop with Apache License 2.0

4 votes

private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR,
                                    charsToEscape);
}

Source File: Counters.java From hadoop-gpu with Apache License 2.0

4 votes

private static String unescape(String string) {
  return StringUtils.unEscapeString(string, StringUtils.ESCAPE_CHAR, 
                                    charsToEscape);
}

Java Code Examples for org.apache.hadoop.util.StringUtils#unEscapeString()