org.apache.commons.math3.distribution.EnumeratedIntegerDistribution Java Examples
The following examples show how to use
org.apache.commons.math3.distribution.EnumeratedIntegerDistribution.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SampleUtil.java From JavaBase with MIT License | 6 votes |
private static <T extends SampleAble> List<T> sampleResult(List<T> list, int count, BiFunction<EnumeratedIntegerDistribution, Integer, List<Integer>> function) { if (Objects.isNull(list) || list.isEmpty()) { return new ArrayList<>(); } if (list.size() < count) { log.warn("data less than count: data size={} count={}", list.size(), count); return new ArrayList<>(); } Map<Integer, T> data = IntStream.range(0, list.size()).boxed() .collect(Collectors.toMap(i -> i, list::get)); EnumeratedIntegerDistribution distribution = generateEnumerated(list, data); List<Integer> indexes = function.apply(distribution, count); return indexes.stream().map(data::get).collect(Collectors.toList()); }
Example #2
Source File: SampleUtil.java From JavaBase with MIT License | 6 votes |
private static List<Integer> sampleWithNoRepeated(EnumeratedIntegerDistribution distribution, int size) { if (Objects.isNull(distribution) || size <= 0) { return new ArrayList<>(); } Set<Integer> unique = new HashSet<>(size); int count = 0; while (unique.size() < size) { unique.add(distribution.sample()); count++; } log.debug("loop: count={}", count); return new ArrayList<>(unique); }
Example #3
Source File: MarkovChainEvaluator.java From lucene-solr with Apache License 2.0 | 6 votes |
public MarkovChain(Matrix matrix, int state) throws IOException { double[][] data = matrix.getData(); if(data.length != data[0].length) { throw new IOException("markovChain must be initialized with a square matrix."); } this.distributions = new EnumeratedIntegerDistribution[data.length]; if(state > -1) { this.state = state; } else { this.state = new Random().nextInt(data.length); } for(int i=0; i<data.length; i++) { double[] probabilities = data[i]; //Create the states array needed by the enumerated distribution int[] states = MathArrays.sequence(data.length, 0, 1); distributions[i] = new EnumeratedIntegerDistribution(states, probabilities); } }
Example #4
Source File: SampleUtil.java From JavaBase with MIT License | 5 votes |
private static <T extends SampleAble> T sampleOneWithNoReturn(List<T> list) { Map<Integer, T> data = IntStream.range(0, list.size()).boxed() .collect(Collectors.toMap(i -> i, list::get)); EnumeratedIntegerDistribution distribution = generateEnumerated(list, data); int index = distribution.sample(); T t = data.get(index); data.remove(index); list.remove(index); return t; }
Example #5
Source File: SampleUtil.java From JavaBase with MIT License | 5 votes |
private static List<Integer> sampleWithRepeated(EnumeratedIntegerDistribution distribution, int size) { List<Integer> result = new ArrayList<>(); for (int i = 0; i < size; i++) { result.add(distribution.sample()); } return result; }
Example #6
Source File: SampleUtil.java From JavaBase with MIT License | 5 votes |
private static <T extends SampleAble> EnumeratedIntegerDistribution generateEnumerated( List<T> list, Map<Integer, T> tempMap) { double sum = list.stream().mapToInt(SampleAble::getWeight).sum(); List<Double> probList = list.stream().map(SampleAble::getWeight).map(value -> value / sum) .collect(Collectors.toList()); return new EnumeratedIntegerDistribution( tempMap.keySet().stream().mapToInt(Integer::intValue).toArray(), probList.stream().mapToDouble(Double::doubleValue).toArray() ); }
Example #7
Source File: PartitionTest.java From sequence-mining with GNU General Public License v3.0 | 5 votes |
@Test public void testInterleavingGenerator() { final Random random = new Random(1); final Random randomI = new Random(10); final RandomGenerator randomC = new JDKRandomGenerator(); randomC.setSeed(100); final Multiset<Sequence> seqsI = HashMultiset.create(); seqsI.add(new Sequence(1, 2, 3)); seqsI.add(new Sequence(4, 5)); seqsI.add(new Sequence(6)); seqsI.add(new Sequence(7)); final HashMap<Sequence, Double> seqsG = new HashMap<>(); for (final Sequence seq : seqsI.elementSet()) { seqsG.put(seq, 1.0); } final Map<Sequence, EnumeratedIntegerDistribution> countDists = new HashMap<>(); final EnumeratedIntegerDistribution oneRepeat = new EnumeratedIntegerDistribution(randomC, new int[] { 1 }, new double[] { 1.0 }); countDists.put(new Sequence(1, 2, 3), oneRepeat); countDists.put(new Sequence(4, 5), oneRepeat); countDists.put(new Sequence(6), oneRepeat); countDists.put(new Sequence(7), oneRepeat); final HashSet<Transaction> transG = new HashSet<>(); for (int i = 0; i < 700000; i++) transG.add( TransactionGenerator.sampleFromDistribution(random, seqsG, countDists, new HashMap<>(), randomI)); // Note that upper bound is exact when there are no repetitions assertEquals(transG.size(), modP(seqsI.iterator()), EPS); }
Example #8
Source File: BM.java From pyramid with Apache License 2.0 | 5 votes |
/** * sample a vector from the mixture distribution * @return */ public Vector sample(){ Vector vector = new DenseVector(dimension); // first sample cluster int[] clusters = IntStream.range(0,numClusters).toArray(); EnumeratedIntegerDistribution enumeratedIntegerDistribution = new EnumeratedIntegerDistribution(clusters,mixtureCoefficients); int cluster = enumeratedIntegerDistribution.sample(); // then sample each dimension for (int d=0;d<dimension;d++){ vector.set(d,distributions[cluster][d].sample()); } return vector; }
Example #9
Source File: KMeansPlusPlus.java From pyramid with Apache License 2.0 | 5 votes |
public void initialize(boolean print){ if (print){ System.out.println("initialize"); } int dataIndex = Sampling.intUniform(0,dataSet.getNumDataPoints()-1); centers.add(dataSet.getRow(dataIndex)); pickedIds.add(dataIndex); if (print){ System.out.println("randomly pick instance "+(dataIndex+1)+" as the initial centroid for cluster "+centers.size()); } while(centers.size()<numComponents){ updateDistance(); double sum = MathUtil.arraySum(distances); for (int i=0;i<distances.length;i++){ distances[i] /= sum; } int[] indices = IntStream.range(0, dataSet.getNumDataPoints()).toArray(); EnumeratedIntegerDistribution dis = new EnumeratedIntegerDistribution(indices, distances); int sample = dis.sample(); centers.add(dataSet.getRow(sample)); pickedIds.add(sample); if (print){ System.out.println("randomly pick instance "+(sample+1)+" as the initial centroid for cluster "+centers.size()); } } }
Example #10
Source File: ConsumerVerifier.java From hermes with Apache License 2.0 | 4 votes |
@Before public void before() { int[] nackIndexes = new int[] { 0, 1 }; double[] nackDis = new double[] { 0.05, 0.95 }; nackRnd = new EnumeratedIntegerDistribution(nackIndexes, nackDis); }
Example #11
Source File: ClassifierWeightedSampling.java From AILibs with GNU Affero General Public License v3.0 | 4 votes |
@Override public List<Pair<ILabeledInstance, Double>> calculateAcceptanceThresholdsWithTrainedPilot(final D dataset, final IClassifier pilot) { /* compute mean value and base values the instances must have */ double mid = this.getMean(dataset); double baseValue = 10 * mid + 1; // arbitrary value, there most likely be better one double addForRightClassification = baseValue + 2 * mid; // like baseValue /* determine probability for each index to be chosen */ double[] weights = new double[dataset.size()]; for (int i = 0; i < weights.length; i++) { try { IPrediction prediction = pilot.predict(dataset.get(i)); if (prediction.getLabelWithHighestProbability() == dataset.get(i).getLabel()) { weights[i] = addForRightClassification - prediction.getProbabilityOfLabel(dataset.get(i).getLabel()); } else { weights[i] = baseValue + prediction.getProbabilityOfLabel(prediction.getLabelWithHighestProbability()); } } catch (Exception e) { weights[i] = 0; } } int[] indices = IntStream.range(0, this.getInput().size()).toArray(); EnumeratedIntegerDistribution finalDistribution = new EnumeratedIntegerDistribution(indices, weights); finalDistribution.reseedRandomGenerator(this.rand.nextLong()); /* now draw <number of samples> many indices whose threshold will be set to 1 */ int n = this.getSampleSize(); Set<Integer> consideredIndices = new HashSet<>(); for (int i = 0; i < n; i++) { int index; do { index = finalDistribution.sample(); } while (consideredIndices.contains(index)); consideredIndices.add(index); } /* now create the list of pairs */ List<Pair<ILabeledInstance, Double>> thresholds = new ArrayList<>(); int m = dataset.size(); for (int i = 0; i < m; i++) { ILabeledInstance inst = dataset.get(i); double threshold = consideredIndices.contains(i) ? 1 : 0; thresholds.add(new Pair<>(inst, threshold)); } return thresholds; }
Example #12
Source File: TransactionGenerator.java From sequence-mining with GNU General Public License v3.0 | 4 votes |
/** * Generate transactions from set of interesting sequences * * @return set of sequences added to transaction */ public static HashMap<Sequence, Double> generateTransactionDatabase(final Map<Sequence, Double> sequences, final Table<Sequence, Integer, Double> probabilities, final int noTransactions, final File outFile) throws IOException { // Set random number seeds final Random random = new Random(1); final Random randomI = new Random(10); final RandomGenerator randomC = new JDKRandomGenerator(); randomC.setSeed(100); // Storage for sequences actually added final HashMap<Sequence, Double> addedSequences = new HashMap<>(); // Set output file final PrintWriter out = new PrintWriter(outFile, "UTF-8"); // Add to distribution class for easy sampling final Map<Sequence, EnumeratedIntegerDistribution> dists = new HashMap<>(); for (final Sequence seq : sequences.keySet()) { final List<Integer> singletons = new ArrayList<>(); final List<Double> probs = new ArrayList<>(); for (final Entry<Integer, Double> entry : probabilities.row(seq).entrySet()) { singletons.add(entry.getKey()); probs.add(entry.getValue()); } final EnumeratedIntegerDistribution dist = new EnumeratedIntegerDistribution(randomC, Ints.toArray(singletons), Doubles.toArray(probs)); dists.put(seq, dist); } // Generate transaction database int count = 0; while (count < noTransactions) { // Generate transaction from distribution final Transaction transaction = sampleFromDistribution(random, sequences, dists, addedSequences, randomI); for (final int item : transaction) { out.print(item + " -1 "); } if (!transaction.isEmpty()) { out.print("-2"); out.println(); count++; } } out.close(); // Print file to screen if (VERBOSE) { final FileReader reader = new FileReader(outFile); final LineIterator it = new LineIterator(reader); while (it.hasNext()) { System.out.println(it.nextLine()); } LineIterator.closeQuietly(it); } return addedSequences; }
Example #13
Source File: MultiLabelSynthesizer.java From pyramid with Apache License 2.0 | 4 votes |
/** * y0: w=(0,1) * y1: w=(1,1) * y2: w=(1,0) * y3: w=(1,-1) * @param numData * @return */ public static MultiLabelClfDataSet flipOneNonUniform(int numData){ int numClass = 4; int numFeature = 2; MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature) .numClasses(numClass) .numDataPoints(numData) .build(); // generate weights Vector[] weights = new Vector[numClass]; for (int k=0;k<numClass;k++){ Vector vector = new DenseVector(numFeature); weights[k] = vector; } weights[0].set(0,0); weights[0].set(1,1); weights[1].set(0, 1); weights[1].set(1, 1); weights[2].set(0, 1); weights[2].set(1, 0); weights[3].set(0,1); weights[3].set(1,-1); // generate features for (int i=0;i<numData;i++){ for (int j=0;j<numFeature;j++){ dataSet.setFeatureValue(i,j,Sampling.doubleUniform(-1, 1)); } } // assign labels for (int i=0;i<numData;i++){ for (int k=0;k<numClass;k++){ double dot = weights[k].dot(dataSet.getRow(i)); if (dot>=0){ dataSet.addLabel(i,k); } } } int[] indices = {0,1,2,3}; double[] probs = {0.4,0.2,0.2,0.2}; IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices,probs); // flip for (int i=0;i<numData;i++){ int toChange = distribution.sample(); MultiLabel label = dataSet.getMultiLabels()[i]; if (label.matchClass(toChange)){ label.removeLabel(toChange); } else { label.addLabel(toChange); } } return dataSet; }
Example #14
Source File: MultiLabelSynthesizer.java From pyramid with Apache License 2.0 | 4 votes |
/** * C0, y0: w=(0,1) * C0, y1: w=(1,1) * C1, y0: w=(1,0) * C1, y1: w=(1,-1) * @return */ public static MultiLabelClfDataSet sampleFromMix(){ int numData = 10000; int numClass = 2; int numFeature = 2; int numClusters = 2; double[] proportions = {0.4,0.6}; int[] indices = {0,1}; MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder() .numFeatures(numFeature) .numClasses(numClass) .numDataPoints(numData) .build(); // generate weights Vector[][] weights = new Vector[numClusters][numClass]; for (int c=0;c<numClusters;c++){ for (int l=0;l<numClass;l++){ Vector vector = new DenseVector(numFeature); weights[c][l] = vector; } } weights[0][0].set(0, 0); weights[0][0].set(1, 1); weights[0][1].set(0, 1); weights[0][1].set(1, 1); weights[1][0].set(0, 1); weights[1][0].set(1, 0); weights[1][1].set(0, 1); weights[1][1].set(1,-1); // generate features for (int i=0;i<numData;i++){ for (int j=0;j<numFeature;j++){ dataSet.setFeatureValue(i,j,Sampling.doubleUniform(-1, 1)); } } IntegerDistribution distribution = new EnumeratedIntegerDistribution(indices,proportions); // assign labels for (int i=0;i<numData;i++){ int cluster = distribution.sample(); System.out.println("cluster "+cluster); for (int l=0;l<numClass;l++){ System.out.println("row = "+dataSet.getRow(i)); System.out.println("weight = "+ weights[cluster][l]); double dot = weights[cluster][l].dot(dataSet.getRow(i)); System.out.println("dot = "+dot); if (dot>=0){ dataSet.addLabel(i,l); } } } return dataSet; }
Example #15
Source File: SamplingPrediction.java From pyramid with Apache License 2.0 | 4 votes |
public static MultiLabel predict(double[] probabilities, List<MultiLabel> candidates){ int[] s = IntStream.range(0, probabilities.length).toArray(); EnumeratedIntegerDistribution distribution = new EnumeratedIntegerDistribution(s, probabilities); int i = distribution.sample(); return candidates.get(i); }