org.apache.mahout.math.VectorWritable Java Examples
The following examples show how to use
org.apache.mahout.math.VectorWritable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: 1000021_CDbwEvaluator_s.java From coming with MIT License | 6 votes |
public double separation() { double minDistance = Double.MAX_VALUE; for (Map.Entry<Integer, List<VectorWritable>> entry1 : representativePoints.entrySet()) { Integer cI = entry1.getKey(); List<VectorWritable> repI = entry1.getValue(); for (Map.Entry<Integer, List<VectorWritable>> entry2 : representativePoints.entrySet()) { if (cI.equals(entry2.getKey())) { continue; } List<VectorWritable> repJ = entry2.getValue(); for (VectorWritable aRepI : repI) { for (VectorWritable aRepJ : repJ) { double distance = measure.distance(aRepI.get(), aRepJ.get()); if (distance < minDistance) { minDistance = distance; } } } } } return minDistance / (1.0 + interClusterDensity()); }
Example #2
Source File: LaserMessageConsumer.java From laser with Apache License 2.0 | 6 votes |
public LaserMessageConsumer(String collection, Path output, FileSystem fs, Configuration conf) throws IOException { this.collection = collection; this.output = output; this.fs = fs; this.conf = conf; Path onlinePath = new Path(output, ONLINE_FOLDER + "/" + Long.toString(onlineVersion)); onlineWriter = SequenceFile.createWriter(fs, conf, onlinePath, Text.class, OnlineVectorWritable.class); Path offlinePath = new Path(output, OFFLINE_FOLDER + "/" + Long.toString(offlineVersion)); offlineWriter = SequenceFile.createWriter(fs, conf, offlinePath, Text.class, VectorWritable.class); }
Example #3
Source File: 1000021_TestCDbwEvaluator_s.java From coming with MIT License | 6 votes |
@Test public void testDirichlet() throws Exception { ModelDistribution<VectorWritable> modelDistribution = new GaussianClusterDistribution(new VectorWritable(new DenseVector(2))); DirichletDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"), modelDistribution, 15, 5, 1.0, 1, true, true, 0, true); int numIterations = 2; Path output = getTestTempDirPath("output"); CDbwDriver.runJob(new Path(output, "clusters-5"), new Path(output, "clusteredPoints"), output, new EuclideanDistanceMeasure(), numIterations, 1); checkRefPoints(numIterations); }
Example #4
Source File: 1000021_TestCDbwEvaluator_s.java From coming with MIT License | 6 votes |
/** * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center * @param dC a double cluster center offset * @param dP a double representative point offset * @param measure TODO */ private void initData(double dC, double dP, DistanceMeasure measure) { clusters = new HashMap<Integer, Cluster>(); clusters.put(1, new Canopy(new DenseVector(new double[] { -dC, -dC }), 1, measure)); clusters.put(3, new Canopy(new DenseVector(new double[] { -dC, dC }), 3, measure)); clusters.put(5, new Canopy(new DenseVector(new double[] { dC, dC }), 5, measure)); clusters.put(7, new Canopy(new DenseVector(new double[] { dC, -dC }), 7, measure)); representativePoints = new HashMap<Integer, List<VectorWritable>>(); for (Cluster cluster : clusters.values()) { List<VectorWritable> points = new ArrayList<VectorWritable>(); representativePoints.put(cluster.getId(), points); points.add(new VectorWritable(cluster.getCenter().clone())); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, dP })))); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, -dP })))); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, -dP })))); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, dP })))); } }
Example #5
Source File: 1000021_TestCDbwEvaluator_s.java From coming with MIT License | 6 votes |
private void checkRefPoints(int numIterations) throws IOException { for (int i = 0; i <= numIterations; i++) { Path out = new Path(getTestTempDirPath("output"), "representativePoints-" + i); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); for (FileStatus file : fs.listStatus(out)) { if (!file.getPath().getName().startsWith(".")) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); try { Writable clusterId = new IntWritable(0); VectorWritable point = new VectorWritable(); while (reader.next(clusterId, point)) { System.out.println("\tC-" + clusterId + ": " + AbstractCluster.formatVector(point.get(), null)); } } finally { reader.close(); } } } } }
Example #6
Source File: Synthetic2DClusteringPrep.java From hiped2 with Apache License 2.0 | 6 votes |
public static void write(File inputFile, Path outputPath) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outputPath, NullWritable.class, VectorWritable.class, SequenceFile.CompressionType.BLOCK, new DefaultCodec()); try { for (String line : FileUtils.readLines(inputFile)) { String parts[] = StringUtils.split(line); writer.append(NullWritable.get(), new VectorWritable(new DenseVector( new double[]{ Double.valueOf(parts[0]), Double.valueOf(parts[1]) } ))); } } finally { writer.close(); } }
Example #7
Source File: 1000021_TestCDbwEvaluator_t.java From coming with MIT License | 6 votes |
@Test public void testDirichlet() throws Exception { ModelDistribution<VectorWritable> modelDistribution = new GaussianClusterDistribution(new VectorWritable(new DenseVector(2))); DirichletDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"), modelDistribution, 15, 5, 1.0, 1, true, true, 0, true); int numIterations = 2; Path output = getTestTempDirPath("output"); CDbwDriver.runJob(new Path(output, "clusters-5"), new Path(output, "clusteredPoints"), output, new EuclideanDistanceMeasure(), numIterations, 1); checkRefPoints(numIterations); }
Example #8
Source File: 1000021_TestCDbwEvaluator_t.java From coming with MIT License | 6 votes |
/** * Initialize synthetic data using 4 clusters dC units from origin having 4 representative points dP from each center * @param dC a double cluster center offset * @param dP a double representative point offset * @param measure the DistanceMeasure */ private void initData(double dC, double dP, DistanceMeasure measure) { clusters = new HashMap<Integer, Cluster>(); clusters.put(1, new Canopy(new DenseVector(new double[] { -dC, -dC }), 1, measure)); clusters.put(3, new Canopy(new DenseVector(new double[] { -dC, dC }), 3, measure)); clusters.put(5, new Canopy(new DenseVector(new double[] { dC, dC }), 5, measure)); clusters.put(7, new Canopy(new DenseVector(new double[] { dC, -dC }), 7, measure)); representativePoints = new HashMap<Integer, List<VectorWritable>>(); for (Cluster cluster : clusters.values()) { List<VectorWritable> points = new ArrayList<VectorWritable>(); representativePoints.put(cluster.getId(), points); points.add(new VectorWritable(cluster.getCenter().clone())); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, dP })))); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { dP, -dP })))); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, -dP })))); points.add(new VectorWritable(cluster.getCenter().plus(new DenseVector(new double[] { -dP, dP })))); } }
Example #9
Source File: 1000021_TestCDbwEvaluator_t.java From coming with MIT License | 6 votes |
private void checkRefPoints(int numIterations) throws IOException { for (int i = 0; i <= numIterations; i++) { Path out = new Path(getTestTempDirPath("output"), "representativePoints-" + i); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); for (FileStatus file : fs.listStatus(out)) { if (!file.getPath().getName().startsWith(".")) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf); try { Writable clusterId = new IntWritable(0); VectorWritable point = new VectorWritable(); while (reader.next(clusterId, point)) { System.out.println("\tC-" + clusterId + ": " + AbstractCluster.formatVector(point.get(), null)); } } finally { reader.close(); } } } } }
Example #10
Source File: Step5.java From recsys-offline with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "wiki job five"); job1.setNumReduceTasks(1); job1.setJarByClass(Step5.class); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setMapperClass(WikiMapper5.class); job1.setMapOutputKeyClass(VarLongWritable.class); job1.setMapOutputValueClass(VectorWritable.class); job1.setCombinerClass(WiKiCombiner5.class); job1.setReducerClass(WiKiReducer5.class); job1.setOutputKeyClass(VarLongWritable.class); job1.setOutputValueClass(RecommendedItemsWritable.class); // job1.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(job1, new Path(INPUT_PATH)); FileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH)); if(!job1.waitForCompletion(true)){ System.exit(1); // run error then exit } }
Example #11
Source File: 1000021_CDbwEvaluator_s.java From coming with MIT License | 6 votes |
private void setStDev(int cI) { List<VectorWritable> repPts = representativePoints.get(cI); //if (repPts == null) { // System.out.println(); //} int s0 = 0; Vector s1 = null; Vector s2 = null; for (VectorWritable vw : repPts) { s0++; Vector v = vw.get(); s1 = s1 == null ? v.clone() : s1.plus(v); s2 = s2 == null ? v.times(v) : s2.plus(v.times(v)); } Vector std = s2.times(s0).minus(s1.times(s1)).assign(new SquareRootFunction()).divide(s0); double d = std.zSum() / std.size(); //System.out.println("stDev[" + cI + "]=" + d); stDevs.put(cI, d); }
Example #12
Source File: 1000021_CDbwEvaluator_s.java From coming with MIT License | 6 votes |
double interDensity(Vector uIJ, int cI, int cJ) { List<VectorWritable> repI = representativePoints.get(cI); List<VectorWritable> repJ = representativePoints.get(cJ); double density = 0.0; double std = (stDevs.get(cI) + stDevs.get(cJ)) / 2.0; for (VectorWritable vwI : repI) { if (measure.distance(uIJ, vwI.get()) <= std) { density++; } } for (VectorWritable vwJ : repJ) { if (measure.distance(uIJ, vwJ.get()) <= std) { density++; } } return density / (repI.size() + repJ.size()); }
Example #13
Source File: 1000021_CDbwEvaluator_t.java From coming with MIT License | 6 votes |
private void setStDev(int cI) { List<VectorWritable> repPts = representativePoints.get(cI); //if (repPts == null) { // System.out.println(); //} int s0 = 0; Vector s1 = null; Vector s2 = null; for (VectorWritable vw : repPts) { s0++; Vector v = vw.get(); s1 = s1 == null ? v.clone() : s1.plus(v); s2 = s2 == null ? v.times(v) : s2.plus(v.times(v)); } if (s0 > 1) { Vector std = s2.times(s0).minus(s1.times(s1)).assign(new SquareRootFunction()).divide(s0); double d = std.zSum() / std.size(); //System.out.println("stDev[" + cI + "]=" + d); stDevs.put(cI, d); } }
Example #14
Source File: 1000021_CDbwEvaluator_t.java From coming with MIT License | 6 votes |
double interDensity(Vector uIJ, int cI, int cJ) { List<VectorWritable> repI = representativePoints.get(cI); List<VectorWritable> repJ = representativePoints.get(cJ); double density = 0.0; double std = (getStdev(cI) + getStdev(cJ)) / 2.0; for (VectorWritable vwI : repI) { if (measure.distance(uIJ, vwI.get()) <= std) { density++; } } for (VectorWritable vwJ : repJ) { if (measure.distance(uIJ, vwJ.get()) <= std) { density++; } } return density / (repI.size() + repJ.size()); }
Example #15
Source File: 1000021_CDbwEvaluator_t.java From coming with MIT License | 6 votes |
public double separation() { double minDistance = Double.MAX_VALUE; for (Map.Entry<Integer, List<VectorWritable>> entry1 : representativePoints.entrySet()) { Integer cI = entry1.getKey(); List<VectorWritable> repI = entry1.getValue(); for (Map.Entry<Integer, List<VectorWritable>> entry2 : representativePoints.entrySet()) { if (cI.equals(entry2.getKey())) { continue; } List<VectorWritable> repJ = entry2.getValue(); for (VectorWritable aRepI : repI) { for (VectorWritable aRepJ : repJ) { double distance = measure.distance(aRepI.get(), aRepJ.get()); if (distance < minDistance) { minDistance = distance; } } } } } return minDistance / (1.0 + interClusterDensity()); }
Example #16
Source File: Step2.java From recsys-offline with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "wiki job two"); job1.setNumReduceTasks(1); job1.setJarByClass(Step2.class); job1.setInputFormatClass(SequenceFileInputFormat.class); job1.setMapperClass(WikiMapper2.class); job1.setMapOutputKeyClass(IntWritable.class); job1.setMapOutputValueClass(IntWritable.class); job1.setReducerClass(WiKiReducer2.class); job1.setOutputKeyClass(IntWritable.class); job1.setOutputValueClass(VectorWritable.class); job1.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileInputFormat.addInputPath(job1, new Path(INPUT_PATH)); SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH)); if(!job1.waitForCompletion(true)){ System.exit(1); // run error then exit } }
Example #17
Source File: Step1.java From recsys-offline with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf1 = new Configuration(); Job job1 = new Job(conf1, "step1"); job1.setOutputFormatClass(SequenceFileOutputFormat.class); job1.setNumReduceTasks(1); job1.setJarByClass(Step1.class); job1.setMapperClass(WikiMapper1.class); job1.setMapOutputKeyClass(VarLongWritable.class); job1.setMapOutputValueClass(LongAndFloat.class); job1.setReducerClass(WiKiReducer1.class); job1.setOutputKeyClass(VarLongWritable.class); job1.setOutputValueClass(VectorWritable.class); FileInputFormat.addInputPath(job1, new Path( INPUT_PATH ) ); SequenceFileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH )); if (!job1.waitForCompletion(true)) { System.exit(1); } }
Example #18
Source File: Step32.java From recsys-offline with Apache License 2.0 | 6 votes |
public void map(VarLongWritable key,VectorWritable value,Context context) throws IOException, InterruptedException{ long userID=key.get(); Vector userVector=value.get(); Iterator<Vector.Element> it=userVector.nonZeroes().iterator(); IntWritable itemi=new IntWritable(); while(it.hasNext()){ Vector.Element e=it.next(); int itemIndex=e.index(); float preferenceValue=(float)e.get(); itemi.set(itemIndex); context.write(itemi, new VectorOrPrefWritable(userID,preferenceValue)); System.out.println("item :"+itemi+",userand val:"+userID+","+preferenceValue); } }
Example #19
Source File: Step5.java From recsys-offline with Apache License 2.0 | 5 votes |
public void map(IntWritable key,VectorAndPrefsWritable vectorAndPref,Context context) throws IOException, InterruptedException{ Vector coo=vectorAndPref.getVector(); List<Long> userIds=vectorAndPref.getUserIDs(); List<Float> prefValues=vectorAndPref.getValues(); //System.out.println("alluserids:"+userIds); for(int i=0;i<userIds.size();i++){ long userID=userIds.get(i); float prefValue=prefValues.get(i); Vector par=coo.times(prefValue); context.write(new VarLongWritable(userID), new VectorWritable(par)); System.out.println(",userid:"+userID+",vector:"+par); // if the user id = 3 is the same as my paper then is right } // System.out.println(); }
Example #20
Source File: 1000021_TestCDbwEvaluator_s.java From coming with MIT License | 5 votes |
@Override @Before public void setUp() throws Exception { super.setUp(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // Create test data List<VectorWritable> sampleData = TestKmeansClustering.getPointsWritable(REFERENCE); ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf); }
Example #21
Source File: Step5.java From recsys-offline with Apache License 2.0 | 5 votes |
public void reduce(VarLongWritable key, Iterable<VectorWritable> values,Context context) throws IOException, InterruptedException{ Vector partial=null; for(VectorWritable v:values){ partial=partial==null?v.get():partial.plus(v.get()); } context.write(key, new VectorWritable(partial)); System.err.println("userid:"+key.toString()+",vecotr:"+partial);// here also should be the same as my paper's result }
Example #22
Source File: Step5.java From recsys-offline with Apache License 2.0 | 5 votes |
public void reduce(VarLongWritable key, Iterable<VectorWritable> values,Context context) throws IOException, InterruptedException{ int userID=(int)key.get(); Vector rev=null; for(VectorWritable vec:values){ rev=rev==null? vec.get():rev.plus(vec.get()); } Queue<RecommendedItem>topItems=new PriorityQueue<RecommendedItem>( recommendationsPerUser+1, Collections.reverseOrder(ByValueRecommendedItemComparator.getInstance()) ); Iterator<Vector.Element>recommendationVectorIterator= rev.nonZeroes().iterator(); while(recommendationVectorIterator.hasNext()){ Vector.Element e=recommendationVectorIterator.next(); int index=e.index(); System.out.println("Vecotr.element.indxe:"+index); // test here find the index is item id or not ** test result : index is item if(!hasItem(userID,String.valueOf(index))){ float value=(float) e.get(); if(topItems.size()<recommendationsPerUser){ // here only set index topItems.add(new GenericRecommendedItem(index,value)); }else if(value>topItems.peek().getValue()){ topItems.add(new GenericRecommendedItem(index,value)); topItems.poll(); } } } List<RecommendedItem>recom=new ArrayList<RecommendedItem>(topItems.size()); recom.addAll(topItems); Collections.sort(recom,ByValueRecommendedItemComparator.getInstance()); context.write(key, new RecommendedItemsWritable(recom)); }
Example #23
Source File: Step2.java From recsys-offline with Apache License 2.0 | 5 votes |
public void map(VarLongWritable userID,VectorWritable userVector,Context context) throws IOException, InterruptedException{ Iterator<Vector.Element> it=userVector.get().nonZeroes().iterator(); while(it.hasNext()){ int index1=it.next().index(); System.err.println("index1:"+index1); Iterator<Vector.Element> it2=userVector.get().nonZeroes().iterator(); while(it2.hasNext()){ int index2=it2.next().index(); context.write(new IntWritable(index1), new IntWritable(index2)); } } }
Example #24
Source File: AdmmIterationMapper.java From laser with Apache License 2.0 | 5 votes |
protected void map(Text key, VectorWritable value, Context context) throws IOException, InterruptedException { // ignore per clustering records if (key.toString().contains("|clustering")) { return; } Vector v = value.get(); if (addIntercept) { v.set(0, 1.0); } inputSplitData.add(v); }
Example #25
Source File: LaserMessageConsumer.java From laser with Apache License 2.0 | 5 votes |
public Path nextOfflinePath() throws IOException { synchronized (offlineWriter) { offlineWriter.close(); Path ret = new Path(output, OFFLINE_FOLDER + "/" + Long.toString(offlineVersion)); offlineVersion++; Path offlinePath = new Path(output, ONLINE_FOLDER + "/" + Long.toString(offlineVersion)); LOG.info("Update offline feature output path, to {}", offlinePath); offlineWriter = SequenceFile.createWriter(fs, conf, offlinePath, Text.class, VectorWritable.class); return ret; } }
Example #26
Source File: Step2.java From recsys-offline with Apache License 2.0 | 5 votes |
public void reduce(IntWritable itemIndex1,Iterable<IntWritable> itemPrefs,Context context) throws IOException, InterruptedException{ // RandomAccessSparseVector(int cardinality, int initialCapacity) Vector itemVector=new RandomAccessSparseVector(Integer.MAX_VALUE,10); for(IntWritable itemPref:itemPrefs){ int itemIndex2=itemPref.get(); itemVector.set(itemIndex2, itemVector.get(itemIndex2)+1.0); } context.write(itemIndex1, new VectorWritable(itemVector)); System.out.println(itemIndex1+" ,"+itemVector); }
Example #27
Source File: Step1.java From recsys-offline with Apache License 2.0 | 5 votes |
public void reduce(VarLongWritable userID, Iterable<LongAndFloat> itemPrefs, Context context) throws IOException, InterruptedException { Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 10); for (LongAndFloat itemPref : itemPrefs) { userVector.set( Integer.parseInt(itemPref.getFirst().toString()), Float.parseFloat(itemPref.getSecond().toString())); } context.write(userID, new VectorWritable(userVector)); }
Example #28
Source File: 1000021_CDbwEvaluator_s.java From coming with MIT License | 5 votes |
/** * For testing only * * @param representativePoints * a Map<Integer,List<VectorWritable>> of representative points keyed by clusterId * @param clusters * a Map<Integer,Cluster> of the clusters keyed by clusterId * @param measure * an appropriate DistanceMeasure */ public CDbwEvaluator(Map<Integer, List<VectorWritable>> representativePoints, Map<Integer, Cluster> clusters, DistanceMeasure measure) { this.representativePoints = representativePoints; this.clusters = clusters; this.measure = measure; for (Integer cId : representativePoints.keySet()) { setStDev(cId); } }
Example #29
Source File: 1000021_CDbwReducer_s.java From coming with MIT License | 5 votes |
@Override protected void reduce(IntWritable key, Iterable<WeightedVectorWritable> values, Context context) throws IOException, InterruptedException { // find the most distant point WeightedVectorWritable mdp = null; for (WeightedVectorWritable dpw : values) { if (mdp == null || mdp.getWeight() < dpw.getWeight()) { mdp = new WeightedVectorWritable(dpw.getWeight(), dpw.getVector()); } } context.write(new IntWritable(key.get()), new VectorWritable(mdp.getVector())); }
Example #30
Source File: 1000021_CDbwReducer_t.java From coming with MIT License | 5 votes |
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Map.Entry<Integer, List<VectorWritable>> entry : representativePoints.entrySet()) { IntWritable iw = new IntWritable(entry.getKey()); for (VectorWritable vw : entry.getValue()) { context.write(iw, vw); } } super.cleanup(context); }