org.apache.hadoop.io.UTF8 Java Examples
The following examples show how to use
org.apache.hadoop.io.UTF8.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestFileSystem.java From RDFS with Apache License 2.0 | 6 votes |
public static void readTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(ReadMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example #2
Source File: MRBench.java From hadoop with Apache License 2.0 | 6 votes |
/** * Create the job configuration. */ private JobConf setupJob(int numMaps, int numReduces, String jarFile) { JobConf jobConf = new JobConf(getConf()); jobConf.setJarByClass(MRBench.class); FileInputFormat.addInputPath(jobConf, INPUT_DIR); jobConf.setInputFormat(TextInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setOutputValueClass(UTF8.class); jobConf.setMapOutputKeyClass(UTF8.class); jobConf.setMapOutputValueClass(UTF8.class); if (null != jarFile) { jobConf.setJar(jarFile); } jobConf.setMapperClass(Map.class); jobConf.setReducerClass(Reduce.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numReduces); jobConf .setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); return jobConf; }
Example #3
Source File: TestFileSystem.java From RDFS with Apache License 2.0 | 6 votes |
public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job,CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(SeekMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example #4
Source File: TestFileSystem.java From RDFS with Apache License 2.0 | 6 votes |
public static void writeTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(DATA_DIR, true); fs.delete(WRITE_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(WriteMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, WRITE_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example #5
Source File: MRBench.java From big-c with Apache License 2.0 | 6 votes |
/** * Create the job configuration. */ private JobConf setupJob(int numMaps, int numReduces, String jarFile) { JobConf jobConf = new JobConf(getConf()); jobConf.setJarByClass(MRBench.class); FileInputFormat.addInputPath(jobConf, INPUT_DIR); jobConf.setInputFormat(TextInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setOutputValueClass(UTF8.class); jobConf.setMapOutputKeyClass(UTF8.class); jobConf.setMapOutputValueClass(UTF8.class); if (null != jarFile) { jobConf.setJar(jarFile); } jobConf.setMapperClass(Map.class); jobConf.setReducerClass(Reduce.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numReduces); jobConf .setBoolean("mapreduce.job.complete.cancel.delegation.tokens", false); return jobConf; }
Example #6
Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public static void writeTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(DATA_DIR, true); fs.delete(WRITE_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(WriteMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, WRITE_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example #7
Source File: MRBench.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** * Create the job configuration. */ private static JobConf setupJob(int numMaps, int numReduces, String jarFile) { JobConf jobConf = new JobConf(MRBench.class); FileInputFormat.addInputPath(jobConf, INPUT_DIR); jobConf.setInputFormat(TextInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setOutputValueClass(UTF8.class); jobConf.setMapOutputKeyClass(UTF8.class); jobConf.setMapOutputValueClass(UTF8.class); if (null != jarFile) { jobConf.setJar(jarFile); } jobConf.setMapperClass(Map.class); jobConf.setReducerClass(Reduce.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numReduces); return jobConf; }
Example #8
Source File: KafkaKey.java From HiveKa with Apache License 2.0 | 6 votes |
@Override public void readFields(DataInput in) throws IOException { this.leaderId = UTF8.readString(in); this.partition = in.readInt(); this.beginOffset = in.readLong(); this.offset = in.readLong(); this.checksum = in.readLong(); this.topic = in.readUTF(); this.time = in.readLong(); this.server = in.readUTF(); // left for legacy this.service = in.readUTF(); // left for legacy this.partitionMap = new MapWritable(); try { this.partitionMap.readFields(in); } catch (IOException e) { this.setServer(this.server); this.setService(this.service); } }
Example #9
Source File: MRBench.java From RDFS with Apache License 2.0 | 6 votes |
/** * Create the job configuration. */ private JobConf setupJob(int numMaps, int numReduces, String jarFile) { JobConf jobConf = new JobConf(getConf()); jobConf.setJarByClass(MRBench.class); FileInputFormat.addInputPath(jobConf, INPUT_DIR); jobConf.setInputFormat(TextInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setOutputValueClass(UTF8.class); jobConf.setMapOutputKeyClass(UTF8.class); jobConf.setMapOutputValueClass(UTF8.class); if (null != jarFile) { jobConf.setJar(jarFile); } jobConf.setMapperClass(Map.class); jobConf.setReducerClass(Reduce.class); jobConf.setNumMapTasks(numMaps); jobConf.setNumReduceTasks(numReduces); return jobConf; }
Example #10
Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public static void readTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job, CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(ReadMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example #11
Source File: TestParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testIgnoreStatsWithSignedSortOrder() { ParquetMetadataConverter converter = new ParquetMetadataConverter(); BinaryStatistics stats = new BinaryStatistics(); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("z")); stats.incrementNumNulls(); PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY) .as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, StatsHelper.V1.toParquetStatistics(stats), binaryType); Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue()); Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet()); Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls()); }
Example #12
Source File: TestParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) { ParquetMetadataConverter converter = new ParquetMetadataConverter(); BinaryStatistics stats = new BinaryStatistics(); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); stats.updateStats(Binary.fromString("A")); stats.incrementNumNulls(); PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b"); Statistics convertedStats = converter.fromParquetStatistics( Version.FULL_VERSION, ParquetMetadataConverter.toParquetStatistics(stats), binaryType); Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty()); Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes()); }
Example #13
Source File: IOMapperBase.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** * Map file name and offset into statistical data. * <p> * The map task is to get the * <tt>key</tt>, which contains the file name, and the * <tt>value</tt>, which is the offset within the file. * * The parameters are passed to the abstract method * {@link #doIO(Reporter,String,long)}, which performs the io operation, * usually read or write data, and then * {@link #collectStats(OutputCollector,String,long,Object)} * is called to prepare stat data for a subsequent reducer. */ public void map(UTF8 key, LongWritable value, OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException { String name = key.toString(); long longValue = value.get(); reporter.setStatus("starting " + name + " ::host = " + hostName); long tStart = System.currentTimeMillis(); Object statValue = doIO(reporter, name, longValue); long tEnd = System.currentTimeMillis(); long execTime = tEnd - tStart; collectStats(output, name, execTime, statValue); reporter.setStatus("finished " + name + " ::host = " + hostName); }
Example #14
Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public static void seekTest(FileSystem fs, boolean fastCheck) throws Exception { fs.delete(READ_DIR, true); JobConf job = new JobConf(conf, TestFileSystem.class); job.setBoolean("fs.test.fastCheck", fastCheck); FileInputFormat.setInputPaths(job,CONTROL_DIR); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(SeekMapper.class); job.setReducerClass(LongSumReducer.class); FileOutputFormat.setOutputPath(job, READ_DIR); job.setOutputKeyClass(UTF8.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1); JobClient.runJob(job); }
Example #15
Source File: NamespaceInfo.java From RDFS with Apache License 2.0 | 5 votes |
public void readFields(DataInput in) throws IOException { buildVersion = UTF8.readString(in); layoutVersion = in.readInt(); namespaceID = in.readInt(); cTime = in.readLong(); distributedUpgradeVersion = in.readInt(); }
Example #16
Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public static void createControlFile(FileSystem fs, long megaBytes, int numFiles, long seed) throws Exception { LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files"); Path controlFile = new Path(CONTROL_DIR, "files"); fs.delete(controlFile, true); Random random = new Random(seed); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, controlFile, UTF8.class, LongWritable.class, CompressionType.NONE); long totalSize = 0; long maxSize = ((megaBytes / numFiles) * 2) + 1; try { while (totalSize < megaBytes) { UTF8 name = new UTF8(Long.toString(random.nextLong())); long size = random.nextLong(); if (size < 0) size = -size; size = size % maxSize; //LOG.info(" adding: name="+name+" size="+size); writer.append(name, new LongWritable(size)); totalSize += size; } } finally { writer.close(); } LOG.info("created control file for: "+totalSize+" bytes"); }
Example #17
Source File: AccumulatingReducer.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void reduce(UTF8 key, Iterator<UTF8> values, OutputCollector<UTF8, UTF8> output, Reporter reporter ) throws IOException { String field = key.toString(); reporter.setStatus("starting " + field + " ::host = " + hostName); // concatenate strings if (field.startsWith("s:")) { String sSum = ""; while (values.hasNext()) sSum += values.next().toString() + ";"; output.collect(key, new UTF8(sSum)); reporter.setStatus("finished " + field + " ::host = " + hostName); return; } // sum long values if (field.startsWith("f:")) { float fSum = 0; while (values.hasNext()) fSum += Float.parseFloat(values.next().toString()); output.collect(key, new UTF8(String.valueOf(fSum))); reporter.setStatus("finished " + field + " ::host = " + hostName); return; } // sum long values if (field.startsWith("l:")) { long lSum = 0; while (values.hasNext()) { lSum += Long.parseLong(values.next().toString()); } output.collect(key, new UTF8(String.valueOf(lSum))); } reporter.setStatus("finished " + field + " ::host = " + hostName); }
Example #18
Source File: NamespaceInfo.java From RDFS with Apache License 2.0 | 5 votes |
public void write(DataOutput out) throws IOException { UTF8.writeString(out, getBuildVersion()); out.writeInt(getLayoutVersion()); out.writeInt(getNamespaceID()); out.writeLong(getCTime()); out.writeInt(getDistributedUpgradeVersion()); }
Example #19
Source File: DatanodeDescriptor.java From RDFS with Apache License 2.0 | 5 votes |
/** Serialization for FSEditLog */ void readFieldsFromFSEditLog(DataInput in) throws IOException { this.name = UTF8.readString(in); this.storageID = UTF8.readString(in); this.infoPort = in.readShort() & 0x0000ffff; this.capacity = in.readLong(); this.dfsUsed = in.readLong(); this.remaining = in.readLong(); this.lastUpdate = in.readLong(); this.xceiverCount = in.readInt(); this.location = Text.readString(in); this.hostName = Text.readString(in); setAdminState(WritableUtils.readEnum(in, AdminStates.class)); }
Example #20
Source File: FSImageSerialization.java From RDFS with Apache License 2.0 | 5 votes |
/** * Reading the path from the image and converting it to byte[][] directly * this saves us an array copy and conversions to and from String * @param in * @return the array each element of which is a byte[] representation * of a path component * @throws IOException */ @SuppressWarnings("deprecation") public static byte[][] readPathComponents(DataInputStream in) throws IOException { UTF8 ustr = TL_DATA.get().U_STR; ustr.readFields(in); return DFSUtil.bytes2byteArray(ustr.getBytes(), ustr.getLength(), (byte) Path.SEPARATOR_CHAR); }
Example #21
Source File: FSImageSerialization.java From RDFS with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") public static byte[] readBytes(DataInputStream in) throws IOException { UTF8 ustr = TL_DATA.get().U_STR; ustr.readFields(in); int len = ustr.getLength(); byte[] bytes = new byte[len]; System.arraycopy(ustr.getBytes(), 0, bytes, 0, len); return bytes; }
Example #22
Source File: TestFileSystem.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void map(UTF8 key, LongWritable value, OutputCollector<UTF8, LongWritable> collector, Reporter reporter) throws IOException { String name = key.toString(); long size = value.get(); long seed = Long.parseLong(name); random.setSeed(seed); reporter.setStatus("creating " + name); // write to temp file initially to permit parallel execution Path tempFile = new Path(DATA_DIR, name+suffix); OutputStream out = fs.create(tempFile); long written = 0; try { while (written < size) { if (fastCheck) { Arrays.fill(buffer, (byte)random.nextInt(Byte.MAX_VALUE)); } else { random.nextBytes(buffer); } long remains = size - written; int length = (remains<=buffer.length) ? (int)remains : buffer.length; out.write(buffer, 0, length); written += length; reporter.setStatus("writing "+name+"@"+written+"/"+size); } } finally { out.close(); } // rename to final location fs.rename(tempFile, new Path(DATA_DIR, name)); collector.collect(new UTF8("bytes"), new LongWritable(written)); reporter.setStatus("wrote " + name); }
Example #23
Source File: MRBench.java From hadoop with Apache License 2.0 | 5 votes |
public void reduce(UTF8 key, Iterator<UTF8> values, OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException { while(values.hasNext()) { output.collect(key, new UTF8(values.next().toString())); } }
Example #24
Source File: MRBench.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void map(WritableComparable key, Text value, OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException { String line = value.toString(); output.collect(new UTF8(process(line)), new UTF8("")); }
Example #25
Source File: MRBench.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void reduce(UTF8 key, Iterator<UTF8> values, OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException { while(values.hasNext()) { output.collect(key, new UTF8(values.next().toString())); } }
Example #26
Source File: DatanodeID.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** {@inheritDoc} */ public void readFields(DataInput in) throws IOException { name = UTF8.readString(in); storageID = UTF8.readString(in); // the infoPort read could be negative, if the port is a large number (more // than 15 bits in storage size (but less than 16 bits). // So chop off the first two bytes (and hence the signed bits) before // setting the field. this.infoPort = in.readShort() & 0x0000ffff; }
Example #27
Source File: DatanodeDescriptor.java From hadoop-gpu with Apache License 2.0 | 5 votes |
/** Serialization for FSEditLog */ void readFieldsFromFSEditLog(DataInput in) throws IOException { this.name = UTF8.readString(in); this.storageID = UTF8.readString(in); this.infoPort = in.readShort() & 0x0000ffff; this.capacity = in.readLong(); this.dfsUsed = in.readLong(); this.remaining = in.readLong(); this.lastUpdate = in.readLong(); this.xceiverCount = in.readInt(); this.location = Text.readString(in); this.hostName = Text.readString(in); setAdminState(WritableUtils.readEnum(in, AdminStates.class)); }
Example #28
Source File: NamespaceInfo.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void write(DataOutput out) throws IOException { UTF8.writeString(out, getBuildVersion()); out.writeInt(getLayoutVersion()); out.writeInt(getNamespaceID()); out.writeLong(getCTime()); out.writeInt(getDistributedUpgradeVersion()); }
Example #29
Source File: NamespaceInfo.java From hadoop-gpu with Apache License 2.0 | 5 votes |
public void readFields(DataInput in) throws IOException { buildVersion = UTF8.readString(in); layoutVersion = in.readInt(); namespaceID = in.readInt(); cTime = in.readLong(); distributedUpgradeVersion = in.readInt(); }
Example #30
Source File: MRBench.java From RDFS with Apache License 2.0 | 5 votes |
public void reduce(UTF8 key, Iterator<UTF8> values, OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException { while(values.hasNext()) { output.collect(key, new UTF8(values.next().toString())); } }