org.apache.spark.mllib.stat.MultivariateStatisticalSummary Java Examples
The following examples show how to use
org.apache.spark.mllib.stat.MultivariateStatisticalSummary.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaSummaryStatisticsExample.java From SparkDemo with MIT License | 6 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaSummaryStatisticsExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ JavaRDD<Vector> mat = jsc.parallelize( Arrays.asList( Vectors.dense(1.0, 10.0, 100.0), Vectors.dense(2.0, 20.0, 200.0), Vectors.dense(3.0, 30.0, 300.0) ) ); // an RDD of Vectors // Compute column summary statistics. MultivariateStatisticalSummary summary = Statistics.colStats(mat.rdd()); System.out.println(summary.mean()); // a dense vector containing the mean value for each column System.out.println(summary.variance()); // column-wise variance System.out.println(summary.numNonzeros()); // number of nonzeros in each column // $example off$ jsc.stop(); }
Example #2
Source File: SparkStatistics.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
public static void getStatementStatistics(String statement, ResultSet[] resultSets) throws SQLException { try { // Run sql statement Connection con = DriverManager.getConnection("jdbc:default:connection"); PreparedStatement ps = con.prepareStatement(statement); ResultSet rs = ps.executeQuery(); // Convert result set to Java RDD JavaRDD<ExecRow> resultSetRDD = ResultSetToRDD(rs); // Collect column statistics int[] fieldsToConvert = getFieldsToConvert(ps); MultivariateStatisticalSummary summary = getColumnStatisticsSummary(resultSetRDD, fieldsToConvert); IteratorNoPutResultSet resultsToWrap = wrapResults((EmbedConnection) con, getColumnStatistics(ps, summary, fieldsToConvert)); resultSets[0] = new EmbedResultSet40((EmbedConnection)con, resultsToWrap, false, null, true); } catch (StandardException e) { throw new SQLException(Throwables.getRootCause(e)); } }
Example #3
Source File: SparkStatistics.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
private static Iterable<ExecRow> getColumnStatistics(PreparedStatement ps, MultivariateStatisticalSummary summary, int[] fieldsToConvert) throws StandardException { try { List<ExecRow> rows = Lists.newArrayList(); ResultSetMetaData metaData = ps.getMetaData(); double[] min = summary.min().toArray(); double[] max = summary.max().toArray(); double[] mean = summary.mean().toArray(); double[] nonZeros = summary.numNonzeros().toArray(); double[] variance = summary.variance().toArray(); double[] normL1 = summary.normL1().toArray(); double[] normL2 = summary.normL2().toArray(); long count = summary.count(); for (int i= 0; i < fieldsToConvert.length; ++i) { int columnPosition = fieldsToConvert[i]; String columnName = metaData.getColumnName(columnPosition); ExecRow row = new ValueRow(9); row.setColumn(1, new SQLVarchar(columnName)); row.setColumn(2, new SQLDouble(min[columnPosition-1])); row.setColumn(3, new SQLDouble(max[columnPosition-1])); row.setColumn(4, new SQLDouble(nonZeros[columnPosition-1])); row.setColumn(5, new SQLDouble(variance[columnPosition-1])); row.setColumn(6, new SQLDouble(mean[columnPosition-1])); row.setColumn(7, new SQLDouble(normL1[columnPosition-1])); row.setColumn(8, new SQLDouble(normL2[columnPosition-1])); row.setColumn(9, new SQLLongint(count)); rows.add(row); } return rows; } catch (Exception e) { throw StandardException.newException(e.getLocalizedMessage()); } }
Example #4
Source File: SparkStatistics.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
private static MultivariateStatisticalSummary getColumnStatisticsSummary(JavaRDD<ExecRow> resultSetRDD, int[] fieldsToConvert) throws StandardException{ JavaRDD<Vector> vectorJavaRDD = SparkMLibUtils.locatedRowRDDToVectorRDD(resultSetRDD, fieldsToConvert); return Statistics.colStats(vectorJavaRDD.rdd()); }