Java Code Examples for org.apache.hadoop.filecache.DistributedCache#getLocalCacheFiles()
The following examples show how to use
org.apache.hadoop.filecache.DistributedCache#getLocalCacheFiles() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MapJoin.java From BigData-In-Practice with Apache License 2.0 | 6 votes |
@Override protected void setup(Mapper<LongWritable, Text, NullWritable, Emp_Dep>.Context context) throws IOException, InterruptedException { // 预处理把要关联的文件加载到缓存中 Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration()); // 我们这里只缓存了一个文件,所以取第一个即可,创建BufferReader去读取 BufferedReader reader = new BufferedReader(new FileReader(paths[0].toString())); String str = null; try { // 一行一行读取 while ((str = reader.readLine()) != null) { // 对缓存中的表进行分割 String[] splits = str.split("\t"); // 把字符数组中有用的数据存在一个Map中 joinData.put(Integer.parseInt(splits[0]), splits[1]); } } catch (Exception e) { e.printStackTrace(); } finally { reader.close(); } }
Example 2
Source File: CrossProductOperation.java From incubator-retired-mrql with Apache License 2.0 | 6 votes |
@Override protected void setup ( Context context ) throws IOException,InterruptedException { super.setup(context); try { conf = context.getConfiguration(); Plan.conf = conf; Config.read(Plan.conf); Tree code = Tree.parse(conf.get("mrql.reducer")); reduce_fnc = functional_argument(conf,code); code = Tree.parse(conf.get("mrql.mapper")); map_fnc = functional_argument(conf,code); if (conf.get("mrql.zero") != null) { code = Tree.parse(conf.get("mrql.zero")); result = Interpreter.evalE(code); code = Tree.parse(conf.get("mrql.accumulator")); acc_fnc = functional_argument(conf,code); } else result = null; counter = conf.get("mrql.counter"); uris = DistributedCache.getCacheFiles(conf); local_paths = DistributedCache.getLocalCacheFiles(conf); index = 0; } catch (Exception e) { throw new Error("Cannot setup the crossProduct: "+e); } }
Example 3
Source File: AvroDistributedCacheFileReader.java From ml-ease with Apache License 2.0 | 6 votes |
@Override protected List<Path> getPaths(String filePath) throws IOException { Path[] localFiles = DistributedCache.getLocalCacheFiles(getConf()); List<Path> paths = new ArrayList<Path>(); for (Path file: localFiles) { if (!file.toString().contains(filePath)) { continue; } paths.add(file); } return paths; }
Example 4
Source File: L2.java From spork with Apache License 2.0 | 6 votes |
public void configure(JobConf conf) { try { Path[] paths = DistributedCache.getLocalCacheFiles(conf); if (paths == null || paths.length < 1) { throw new RuntimeException("DistributedCache no work."); } // Open the small table BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(paths[0].toString()))); String line; hash = new HashSet<String>(500); while ((line = reader.readLine()) != null) { if (line.length() < 1) continue; String[] fields = line.split(""); hash.add(fields[0]); } reader.close(); } catch (IOException ioe) { throw new RuntimeException(ioe); } }
Example 5
Source File: ReplicatedUserJoin.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
public void setup(Context context) throws IOException, InterruptedException { Path[] files = DistributedCache.getLocalCacheFiles(context .getConfiguration()); // Read all files in the DistributedCache for (Path p : files) { BufferedReader rdr = new BufferedReader(new InputStreamReader( new GZIPInputStream(new FileInputStream(new File( p.toString()))))); String line = null; // For each record in the user file while ((line = rdr.readLine()) != null) { // Get the user ID for this record Map<String, String> parsed = MRDPUtils .transformXmlToMap(line); String userId = parsed.get("Id"); // Map the user ID to the record userIdToInfo.put(userId, line); } rdr.close(); } // Get the join type from the configuration joinType = context.getConfiguration().get("join.type"); }
Example 6
Source File: MapFeatures.java From hadoop-book with Apache License 2.0 | 6 votes |
@Override public void configure(JobConf job) { caseSensitive = job.getBoolean("wordcount.case.sensitive", true); inputFile = job.get("map.input.file"); if (job.getBoolean("wordcount.skip.patterns", false)) { Path[] patternsFiles = new Path[0]; try { patternsFiles = DistributedCache.getLocalCacheFiles(job); } catch (IOException ioe) { System.err.println("Caught exception getting cached files: " + StringUtils.stringifyException(ioe)); } for (Path patternsFile : patternsFiles) { parseSkipFile(patternsFile); } } }
Example 7
Source File: BloomJoin.java From hiped2 with Apache License 2.0 | 5 votes |
@Override protected void setup( Context context) throws IOException, InterruptedException { Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration()); filter = BloomFilterDumper.fromFile( new File(files[0].toString())); System.out.println("Filter = " + filter); }
Example 8
Source File: DistributedCacheHelper.java From datafu with Apache License 2.0 | 5 votes |
/** * Deserializes an object from a path in HDFS. * * @param conf Hadoop configuration * @param path Path to deserialize from * @return Deserialized object * @throws IOException IOException */ public static Object readObject(Configuration conf, org.apache.hadoop.fs.Path path) throws IOException { String localPath = null; Path[] localCacheFiles = DistributedCache.getLocalCacheFiles(conf); for (Path localCacheFile : localCacheFiles) { if (localCacheFile.getName().endsWith(path.getName())) { localPath = localCacheFile.getName(); break; } } if (localPath == null) { throw new RuntimeException("Could not find " + path + " in local cache"); } FileInputStream inputStream = new FileInputStream(new File(localPath)); ObjectInputStream objStream = new ObjectInputStream(inputStream); try { try { return objStream.readObject(); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } finally { objStream.close(); inputStream.close(); } }
Example 9
Source File: BasicJobChaining.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
protected void setup(Context context) throws IOException, InterruptedException { average = getAveragePostsPerUser(context.getConfiguration()); mos = new MultipleOutputs<Text, Text>(context); try { Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration()); if (files == null || files.length == 0) { throw new RuntimeException("User information is not set in DistributedCache"); } // Read all files in the DistributedCache for (Path p : files) { BufferedReader rdr = new BufferedReader(new InputStreamReader( new GZIPInputStream(new FileInputStream(new File(p.toString()))))); String line; // For each record in the user file while ((line = rdr.readLine()) != null) { // Get the user ID and reputation Map<String, String> parsed = MRDPUtils.transformXmlToMap(line); String userId = parsed.get("Id"); String reputation = parsed.get("Reputation"); if (userId != null && reputation != null) { // Map the user ID to the reputation userIdToReputation.put(userId, reputation); } } } } catch (IOException e) { throw new RuntimeException(e); } }
Example 10
Source File: ReduceSideJoinBloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
public void setup(Context context) throws IOException { Path[] files = DistributedCache.getLocalCacheFiles(context .getConfiguration()); DataInputStream strm = new DataInputStream(new FileInputStream( new File(files[0].toString()))); bfilter.readFields(strm); }
Example 11
Source File: BloomFilter.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
@Override public void setup(Context context) throws IOException, InterruptedException { Path[] files = DistributedCache.getLocalCacheFiles(context .getConfiguration()); System.out.println("Reading Bloom filter from: " + files[0]); DataInputStream stream = new DataInputStream(new FileInputStream( files[0].toString())); filter.readFields(stream); stream.close(); }
Example 12
Source File: JobLibLoader.java From SpyGlass with Apache License 2.0 | 5 votes |
public static Path[] getFileFromCache(String libPathStr, Configuration config) { Path[] localFiles = null; try { logger.info("Local Cache => " + DistributedCache.getLocalCacheFiles(config)); logger.info("Hadoop Cache => "+ DistributedCache.getCacheFiles(config)); if (DistributedCache.getLocalCacheFiles(config) != null) { localFiles = DistributedCache.getLocalCacheFiles(config); } logger.info("LocalFiles => " + localFiles); } catch (Exception e) { e.printStackTrace(); } return localFiles; }
Example 13
Source File: FileCache.java From Cubert with Apache License 2.0 | 4 votes |
public static void initialize(Configuration conf) throws IOException { FileCache.conf = conf; cachedFiles = DistributedCache.getLocalCacheFiles(conf); }
Example 14
Source File: AccumuloMrGeoRangePartitioner.java From mrgeo with Apache License 2.0 | 4 votes |
@SuppressFBWarnings(value = "PATH_TRAVERSAL_IN", justification = "Cutpoints file generated by code") private synchronized TileIdWritable[] getCutPoints() throws IOException { if (cutPointArray == null) { String cutFileName = conf.get(CUTFILE_KEY); Path[] cf = DistributedCache.getLocalCacheFiles(conf); if (cf != null) { for (Path path : cf) { if (path.toUri().getPath().endsWith(cutFileName.substring(cutFileName.lastIndexOf('/')))) { TreeSet<Text> cutPoints = new TreeSet<Text>(); try (Scanner in = new Scanner(new BufferedReader(new FileReader(path.toString())))) { while (in.hasNextLine()) { cutPoints.add(new Text(Base64Utils.decodeToString(in.nextLine()))); } } catch (ClassNotFoundException e) { throw new IOException("Error decoding cutpoints", e); } cutPointArray = cutPoints.toArray(new Text[cutPoints.size()]); break; } } } if (cutPointArray == null) { throw new FileNotFoundException(cutFileName + " not found in distributed cache"); } } tileIdPointArray = new TileIdWritable[cutPointArray.length]; for (int x = 0; x < cutPointArray.length; x++) { byte[] b = cutPointArray[x].getBytes(); ByteBuffer buffer = ByteBuffer.wrap(b); long k = buffer.getLong(); tileIdPointArray[x] = new TileIdWritable(k); } return tileIdPointArray; }
Example 15
Source File: GroupedKeyRangePartitioner.java From accumulo-recipes with Apache License 2.0 | 4 votes |
private synchronized Text[] getCutPoints() throws IOException { if (cutPointArray == null) { Path[] cf = DistributedCache.getLocalCacheFiles(conf); if (cf != null) { Map<String, String> curFilesAndGroups = getCurFilesAndGroups(); SortedMap<String, SortedSet<String>> cutPointMap = new TreeMap<String, SortedSet<String>>(); for (Path path : cf) { String group = null; for (Map.Entry<String, String> groupSplits : curFilesAndGroups.entrySet()) { if (path.toString().endsWith(groupSplits.getKey())) group = groupSplits.getValue(); } if (group != null) { Scanner in = new Scanner(new BufferedReader(new FileReader(path.toString()))); try { while (in.hasNextLine()) { String split = new String(Base64.decodeBase64(in.nextLine().getBytes())); SortedSet<String> splits = cutPointMap.get(group); if (splits == null) { splits = new TreeSet<String>(); cutPointMap.put(group, splits); } } SortedSet<Text> treeSet = new TreeSet<Text>(); for (Map.Entry<String, SortedSet<String>> entry : cutPointMap.entrySet()) { treeSet.add(new Text(entry.getKey() + NULL_BYTE + NULL_BYTE)); for (String string : entry.getValue()) treeSet.add(new Text(entry.getKey() + NULL_BYTE + string)); treeSet.add(new Text(entry.getKey() + NULL_BYTE + END_BYTE)); } cutPointArray = treeSet.toArray(new Text[]{}); } finally { in.close(); } break; } else { throw new FileNotFoundException("A file was not found in distribution cache files: " + path.toString()); } } } } return cutPointArray; }