Java Code Examples for org.apache.hadoop.io.Text#write()
The following examples show how to use
org.apache.hadoop.io.Text#write() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MarkLogicInputSplit.java From marklogic-contentpump with Apache License 2.0 | 6 votes |
@Override public void write(DataOutput out) throws IOException { out.writeLong(start); out.writeLong(length); Text forestIdText = new Text(forestId.toByteArray()); forestIdText.write(out); if (hostName != null && hostName.length > 0) { Text.writeString(out, hostName[0]); } out.writeBoolean(isLastSplit); int replicaSize = (replicas != null) ? replicas.size() : 0; out.writeInt(replicaSize); for (int i=0; i < replicaSize; i++) { Text.writeString(out, replicas.get(i).getForest()); Text.writeString(out, replicas.get(i).getHostName()); } }
Example 2
Source File: TestIndexedSort.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public WritableSortable(int j) throws IOException { seed = r.nextLong(); r.setSeed(seed); Text t = new Text(); StringBuffer sb = new StringBuffer(); indices = new int[j]; offsets = new int[j]; check = new String[j]; DataOutputBuffer dob = new DataOutputBuffer(); for (int i = 0; i < j; ++i) { indices[i] = i; offsets[i] = dob.getLength(); genRandom(t, r.nextInt(15) + 1, sb); t.write(dob); check[i] = t.toString(); } eob = dob.getLength(); bytes = dob.getData(); comparator = WritableComparator.get(Text.class); }
Example 3
Source File: TestIndexedSort.java From RDFS with Apache License 2.0 | 6 votes |
public WritableSortable(int j) throws IOException { seed = r.nextLong(); r.setSeed(seed); Text t = new Text(); StringBuffer sb = new StringBuffer(); indices = new int[j]; offsets = new int[j]; check = new String[j]; DataOutputBuffer dob = new DataOutputBuffer(); for (int i = 0; i < j; ++i) { indices[i] = i; offsets[i] = dob.getLength(); genRandom(t, r.nextInt(15) + 1, sb); t.write(dob); check[i] = t.toString(); } eob = dob.getLength(); bytes = dob.getData(); comparator = WritableComparator.get(Text.class); }
Example 4
Source File: TextSerializerTest.java From incubator-gobblin with Apache License 2.0 | 6 votes |
@Test public void testDeserialize() throws IOException { // Use Hadoop's serializer, verify our deserializer can read the string back for (String textToSerialize : textsToSerialize) { ByteArrayOutputStream bOs = new ByteArrayOutputStream(); DataOutputStream dataOutputStream = new DataOutputStream(bOs); Text hadoopText = new Text(); hadoopText.set(textToSerialize); hadoopText.write(dataOutputStream); dataOutputStream.close(); ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray()); DataInputStream dataInputStream = new DataInputStream(bIn); String deserializedString = TextSerializer.readTextAsString(dataInputStream); Assert.assertEquals(deserializedString, textToSerialize); } }
Example 5
Source File: TestIndexedSort.java From big-c with Apache License 2.0 | 6 votes |
public WritableSortable(int j) throws IOException { seed = r.nextLong(); r.setSeed(seed); Text t = new Text(); StringBuilder sb = new StringBuilder(); indices = new int[j]; offsets = new int[j]; check = new String[j]; DataOutputBuffer dob = new DataOutputBuffer(); for (int i = 0; i < j; ++i) { indices[i] = i; offsets[i] = dob.getLength(); genRandom(t, r.nextInt(15) + 1, sb); t.write(dob); check[i] = t.toString(); } eob = dob.getLength(); bytes = dob.getData(); comparator = WritableComparator.get(Text.class); }
Example 6
Source File: TestIndexedSort.java From hadoop with Apache License 2.0 | 6 votes |
public WritableSortable(int j) throws IOException { seed = r.nextLong(); r.setSeed(seed); Text t = new Text(); StringBuilder sb = new StringBuilder(); indices = new int[j]; offsets = new int[j]; check = new String[j]; DataOutputBuffer dob = new DataOutputBuffer(); for (int i = 0; i < j; ++i) { indices[i] = i; offsets[i] = dob.getLength(); genRandom(t, r.nextInt(15) + 1, sb); t.write(dob); check[i] = t.toString(); } eob = dob.getLength(); bytes = dob.getData(); comparator = WritableComparator.get(Text.class); }
Example 7
Source File: TextSerializer.java From pravega-samples with Apache License 2.0 | 5 votes |
@Override public ByteBuffer serialize(Text value) { ByteArrayOutputStream bout = new ByteArrayOutputStream(); ObjectOutputStream oout; try { oout = new ObjectOutputStream(bout); value.write(oout); oout.close(); bout.close(); } catch (IOException e) { throw new RuntimeException(e); } return ByteBuffer.wrap(bout.toByteArray()); }
Example 8
Source File: RDFWritable.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
@Override public void write(DataOutput out) throws IOException { if (graphUri == null) { out.writeByte(0); } else { out.writeByte(1); Text t = new Text(graphUri); t.write(out); } out.writeByte(type); if (value instanceof Text) { ((Text) value).write(out); } else if (value instanceof MarkLogicNode) { ((MarkLogicNode) value).write(out); } else if (value instanceof BytesWritable) { ((BytesWritable) value).write(out); } //serialize permissions if (permissions == null) { out.writeByte(0); } else { out.writeByte(permissions.length); for(int i=0; i<permissions.length; i++) { Text role = new Text(permissions[i].getRole()); Text cap = new Text(permissions[i].getCapability().toString()); role.write(out); cap.write(out); } } }
Example 9
Source File: IEX2LevAMAZON.java From Clusion with GNU General Public License v3.0 | 5 votes |
@Override public void write(DataOutput dataOutput) throws IOException { dataOutput.writeInt(this.values.size()); for (Text text : values) { text.write(dataOutput); } }
Example 10
Source File: CustomerFlowElement.java From WIFIProbe with Apache License 2.0 | 5 votes |
public void write(DataOutput dataOutput) throws IOException { Text text = new Text(wifiProb==null?"":wifiProb); text.write(dataOutput); IntWritable intWritable = new IntWritable(); intWritable.set(inNoOutWifi); intWritable.write(dataOutput); intWritable.set(inNoOutStore); intWritable.write(dataOutput); intWritable.set(outNoInWifi); intWritable.write(dataOutput); intWritable.set(outNoInStore); intWritable.write(dataOutput); intWritable.set(inAndOutWifi); intWritable.write(dataOutput); intWritable.set(inAndOutStore); intWritable.write(dataOutput); intWritable.set(stayInWifi); intWritable.write(dataOutput); intWritable.set(stayInStore); intWritable.write(dataOutput); DoubleWritable doubleWritable = new DoubleWritable(); doubleWritable.set(jumpRate); doubleWritable.write(dataOutput); doubleWritable.set(deepVisit); doubleWritable.write(dataOutput); doubleWritable.set(inStoreRate); doubleWritable.write(dataOutput); }
Example 11
Source File: TaskState.java From incubator-gobblin with Apache License 2.0 | 5 votes |
@Override public void write(DataOutput out) throws IOException { Text text = new Text(); text.set(this.jobId); text.write(out); text.set(this.taskId); text.write(out); out.writeLong(this.startTime); out.writeLong(this.endTime); out.writeLong(this.duration); super.write(out); }
Example 12
Source File: NewOldCustomElement.java From WIFIProbe with Apache License 2.0 | 5 votes |
public void write(DataOutput dataOutput) throws IOException { Text text = new Text(wifiProb==null?"":wifiProb); text.write(dataOutput); LongWritable longWritable = new LongWritable(); longWritable.set(hour); longWritable.write(dataOutput); longWritable.set(newCustomer); longWritable.write(dataOutput); longWritable.set(oldCustomer); longWritable.write(dataOutput); }
Example 13
Source File: TestMRCombiner.java From tez with Apache License 2.0 | 5 votes |
@Override public DataInputBuffer getKey() throws IOException { DataInputBuffer key = new DataInputBuffer(); Text text = new Text(keys[i]); DataOutputBuffer out = new DataOutputBuffer(); text.write(out); key.reset(out.getData(), out.getLength()); return key; }
Example 14
Source File: CustomWritableWithCircle.java From pxf with Apache License 2.0 | 5 votes |
@Override public void write(DataOutput paramDataOutput) throws IOException { IntWritable localIntWritable = new IntWritable(); localIntWritable.set(this.int1); localIntWritable.write(paramDataOutput); Text localText = new Text(); localText.set(this.circle); localText.write(paramDataOutput); }
Example 15
Source File: TeraInputFormat.java From pravega-samples with Apache License 2.0 | 4 votes |
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); //Instead of reading from hdfs, now the input is from Pravega stream final PravegaInputFormat inFormat = new PravegaInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(), TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), TeraSortConfigKeys.DEFAULT_NUM_PARTITIONS), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for(int i=0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread (threadGroup,"Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentValue().toString().substring(0, 10))); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie){ System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if(threadGroup.getThrowable() != null){ throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for(Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
Example 16
Source File: CustomWritable.java From pxf with Apache License 2.0 | 4 votes |
@Override public void write(DataOutput out) throws IOException { // 0. Timestamp Text tms_text = new Text(tms); tms_text.write(out); // 1. num, int1, int2 IntWritable intw = new IntWritable(); for (int i = 0; i < num.length; i++) { intw.set(num[i]); intw.write(out); } intw.set(int1); intw.write(out); intw.set(int2); intw.write(out); // 2. st1 Text txt = new Text(); for (int i = 0; i < strings.length; i++) { txt.set(strings[i]); txt.write(out); } txt.set(st1); txt.write(out); // 3. doubles DoubleWritable dw = new DoubleWritable(); for (int i = 0; i < dubs.length; i++) { dw.set(dubs[i]); dw.write(out); } dw.set(db); dw.write(out); // 4. floats FloatWritable fw = new FloatWritable(); for (int i = 0; i < fts.length; i++) { fw.set(fts[i]); fw.write(out); } fw.set(ft); fw.write(out); // 5. longs LongWritable lw = new LongWritable(); for (int i = 0; i < lngs.length; i++) { lw.set(lngs[i]); lw.write(out); } lw.set(lng); lw.write(out); // 6. booleans BooleanWritable bw = new BooleanWritable(); for (int i = 0; i < bools.length; ++i) { bw.set(bools[i]); bw.write(out); } bw.set(bool); bw.write(out); // 7. shorts ShortWritable sw = new ShortWritable(); for (int i = 0; i < shrts.length; ++i) { sw.set(shrts[i]); sw.write(out); } sw.set(shrt); sw.write(out); // 8. bytes // BytesWritable btsw = new BytesWritable(bts); // btsw.write(out); BytesWritable btsw = new BytesWritable(); btsw.setCapacity(bts.length); btsw.setSize(bts.length); btsw.set(bts, 0, bts.length); btsw.write(out); }
Example 17
Source File: TeraInputFormat.java From big-c with Apache License 2.0 | 4 votes |
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for(int i=0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread (threadGroup,"Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie){ System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if(threadGroup.getThrowable() != null){ throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for(Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
Example 18
Source File: TeraInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for(int i=0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread (threadGroup,"Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie){ System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if(threadGroup.getThrowable() != null){ throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for(Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
Example 19
Source File: TeraInputFormat.java From incubator-tez with Apache License 2.0 | 4 votes |
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for(int i=0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread (threadGroup,"Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl( job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie){ System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64*1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if(threadGroup.getThrowable() != null){ throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for(Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }