Java Code Examples for org.apache.spark.sql.RowFactory#create()
The following examples show how to use
org.apache.spark.sql.RowFactory#create() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RDDConverterUtilsExt.java From systemds with Apache License 2.0 | 5 votes |
@Override public Row call(Tuple2<Row, Long> arg0) throws Exception { int oldNumCols = arg0._1.length(); Object [] fields = new Object[oldNumCols + 1]; for(int i = 0; i < oldNumCols; i++) { fields[i] = arg0._1.get(i); } fields[oldNumCols] = new Double(arg0._2 + 1); return RowFactory.create(fields); }
Example 2
Source File: JavaVectorAssemblerExample.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaVectorAssemblerExample") .getOrCreate(); // $example on$ StructType schema = createStructType(new StructField[]{ createStructField("id", IntegerType, false), createStructField("hour", IntegerType, false), createStructField("mobile", DoubleType, false), createStructField("userFeatures", new VectorUDT(), false), createStructField("clicked", DoubleType, false) }); Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0); Dataset<Row> dataset = spark.createDataFrame(Arrays.asList(row), schema); VectorAssembler assembler = new VectorAssembler() .setInputCols(new String[]{"hour", "mobile", "userFeatures"}) .setOutputCol("features"); Dataset<Row> output = assembler.transform(dataset); System.out.println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column " + "'features'"); output.select("features", "clicked").show(false); // $example off$ spark.stop(); }
Example 3
Source File: StructureToInteractingResidues.java From mmtf-spark with Apache License 2.0 | 5 votes |
private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) { double cutoffDistanceSq = cutoffDistance * cutoffDistance; float[] x = structure.getxCoords(); float[] y = structure.getyCoords(); float[] z = structure.getzCoords(); int first = groupIndices.get(index); int last = groupIndices.get(index+1); List<Row> rows = new ArrayList<>(); for (int i: matches) { if (i == index) { continue; } double minDSq = Double.MAX_VALUE; int minIndex = -1; for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) { for (int k = first; k < last; k++) { double dx = (x[j] - x[k]); double dy = (y[j] - y[k]); double dz = (z[j] - z[k]); double dSq = dx*dx + dy*dy + dz*dz; if (dSq <= cutoffDistanceSq && dSq < minDSq) { minDSq = Math.min(minDSq, dSq); minIndex = i; } } } if (minIndex >= 0) { // TODO add unique group (and atom?) for each group? Row row = RowFactory.create(structureId, groupNames.get(index), index, groupNames.get(minIndex), minIndex, (float)Math.sqrt(minDSq)); rows.add(row); } } return rows; }
Example 4
Source File: BiojavaAligner.java From mmtf-spark with Apache License 2.0 | 5 votes |
/** * Calculates a structural alignment and returns alignment metrics. * * @param alignmentAlgorithm name of the algorithm * @param key unique identifier for protein chain pair * @param points1 C-alpha positions of chain 1 * @param points2 C-alpha positions of chain 2 * @return */ public static List<Row> getAlignment(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) { // create input for BioJava alignment method Atom[] ca1 = getCAAtoms(points1); Atom[] ca2 = getCAAtoms(points2); // calculate the alignment AFPChain afp = null; try { StructureAlignment algorithm = StructureAlignmentFactory.getAlgorithm(alignmentAlgorithm); afp = algorithm.align(ca1,ca2); double tmScore = AFPChainScorer.getTMScore(afp, ca1, ca2); afp.setTMScore(tmScore); } catch (StructureException e) { e.printStackTrace(); return Collections.emptyList(); } // TODO add alignments as arrays to results // int[][] alignment = afp.getAfpIndex(); // for (int i = 0; i < alignment.length; i++) { // System.out.println(alignment[i][0] + " - " + alignment[i][1]); // } // record the alignment metrics Row row = RowFactory.create(key, afp.getOptLength(), afp.getCoverage1(), afp.getCoverage2(), (float) afp.getTotalRmsdOpt(), (float) afp.getTMScore()); return Collections.singletonList(row); }
Example 5
Source File: AtomInteraction.java From mmtf-spark with Apache License 2.0 | 5 votes |
/** * Returns interactions and geometric information in a single row. * * @return row of interactions and geometric information */ public Row getMultipleInteractionsAsRow(int maxInteractions) { // pad interaction centers and distances with nulls, if necessary, // since each row must be of fixed length while (getNumInteractions() < maxInteractions) { neighbors.add(new InteractionCenter()); } int length = InteractionCenter.getLength(); Object[] data = new Object[getNumColumns(maxInteractions)]; int index = 0; data[index++] = structureId; data[index++] = getNumberOfPolymerChains(); calcCoordinationGeometry(maxInteractions); data[index++] = q3; data[index++] = q4; data[index++] = q5; data[index++] = q6; // copy data for query atom System.arraycopy(center.getAsObject(), 0, data, index, length); index += length; // copy data for interacting atoms for (int i = 0; i < neighbors.size(); i++) { System.arraycopy(neighbors.get(i).getAsObject(), 0, data, index, length); index += length; data[index++] = distances[i]; } // copy angles System.arraycopy(angles, 0, data, index, angles.length); index += length; return RowFactory.create(data); }
Example 6
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Override public Row call(String str) throws Exception { String[] strings = str.split(","); Double[] doubles = new Double[strings.length]; for (int i = 0; i < strings.length; i++) { doubles[i] = Double.parseDouble(strings[i]); } return RowFactory.create((Object[]) doubles); }
Example 7
Source File: InstanceRelationWriter.java From rdf2x with Apache License 2.0 | 5 votes |
private static Row getAttributeRow(Instance instance, Predicate predicate, Object value) { return RowFactory.create( instance.getId(), predicate.getPredicateIndex(), LiteralType.toString(predicate.getLiteralType()), predicate.getLanguage(), value.toString() ); }
Example 8
Source File: MLContextFrameTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testTransform() { System.out.println("MLContextFrameTest - transform"); Row[] rowsA = {RowFactory.create("\"`@(\"(!&",2,"20news-bydate-train/comp.os.ms-windows.misc/9979"), RowFactory.create("\"`@(\"\"(!&\"",3,"20news-bydate-train/comp.os.ms-windows.misc/9979")}; JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); List<StructField> fieldsA = new ArrayList<>(); fieldsA.add(DataTypes.createStructField("featureName", DataTypes.StringType, true)); fieldsA.add(DataTypes.createStructField("featureValue", DataTypes.IntegerType, true)); fieldsA.add(DataTypes.createStructField("id", DataTypes.StringType, true)); StructType schemaA = DataTypes.createStructType(fieldsA); Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA); String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ featureName, id ]}\");"; Script script = dml(dmlString) .in("A", dataFrameA, new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length)) .out("tA").out("tAM"); ml.setExplain(true); ml.setExplainLevel(ExplainLevel.RECOMPILE_HOPS); MLResults results = ml.execute(script); double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA"); Assert.assertEquals(1.0, matrixtA[0][2], 0.0); Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF(); System.out.println("Number of matrix tA rows = " + dataFrame_tA.count()); dataFrame_tA.printSchema(); dataFrame_tA.show(); Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF(); System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count()); dataFrame_tAM.printSchema(); dataFrame_tAM.show(); }
Example 9
Source File: MLContextFrameTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testInputFrameAndMatrixOutputMatrixAndFrame() { System.out.println("MLContextFrameTest - input frame and matrix, output matrix and frame"); Row[] rowsA = {RowFactory.create("Doc1", "Feat1", 10), RowFactory.create("Doc1", "Feat2", 20), RowFactory.create("Doc2", "Feat1", 31)}; JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); List<StructField> fieldsA = new ArrayList<>(); fieldsA.add(DataTypes.createStructField("myID", DataTypes.StringType, true)); fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true)); fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true)); StructType schemaA = DataTypes.createStructType(fieldsA); Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA); String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");"; Script script = dml(dmlString) .in("A", dataFrameA, new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length)) .out("tA").out("tAM"); MLResults results = ml.execute(script); double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA"); Assert.assertEquals(10.0, matrixtA[0][2], 0.0); Assert.assertEquals(20.0, matrixtA[1][2], 0.0); Assert.assertEquals(31.0, matrixtA[2][2], 0.0); Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF(); System.out.println("Number of matrix tA rows = " + dataFrame_tA.count()); dataFrame_tA.printSchema(); dataFrame_tA.show(); Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF(); System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count()); dataFrame_tAM.printSchema(); dataFrame_tAM.show(); }
Example 10
Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
@Override public Row call(String record) throws Exception { String[] fields = IOUtilFunctions.splitCSV(record, _delim); Object[] objects = new Object[fields.length]; for (int i=0; i<fields.length; i++) { objects[i] = UtilFunctions.stringToObject(_schema[i], fields[i]); } return RowFactory.create(objects); }
Example 11
Source File: MLContextTest.java From systemds with Apache License 2.0 | 4 votes |
@Override public Row call(Tuple2<Double, org.apache.spark.mllib.linalg.Vector> tup) throws Exception { Double doub = tup._1(); org.apache.spark.mllib.linalg.Vector vect = tup._2(); return RowFactory.create(doub, vect); }
Example 12
Source File: MLContextTest.java From systemds with Apache License 2.0 | 4 votes |
@Override public Row call(Vector vect) throws Exception { return RowFactory.create(vect); }
Example 13
Source File: MLContextTest.java From systemds with Apache License 2.0 | 4 votes |
@Override public Row call(org.apache.spark.mllib.linalg.Vector vect) throws Exception { return RowFactory.create(vect); }
Example 14
Source File: MLContextTest.java From systemds with Apache License 2.0 | 4 votes |
@Override public Row call(Tuple2<Double, Vector> tup) throws Exception { Double doub = tup._1(); Vector vect = tup._2(); return RowFactory.create(doub, vect); }
Example 15
Source File: StructureToAllInteractions.java From mmtf-spark with Apache License 2.0 | 4 votes |
private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) { double cutoffDistanceSq = cutoffDistance * cutoffDistance; float[] x = structure.getxCoords(); float[] y = structure.getyCoords(); float[] z = structure.getzCoords(); int first = groupIndices.get(index); int last = groupIndices.get(index+1); int groupIndex1 = structure.getGroupTypeIndices()[index]; List<Row> rows = new ArrayList<>(); for (int i: matches) { // exclude self interactions if (i == index) { continue; } for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) { for (int k = first; k < last; k++) { double dx = (x[j] - x[k]); double dy = (y[j] - y[k]); double dz = (z[j] - z[k]); double dSq = dx*dx + dy*dy + dz*dz; if (dSq < cutoffDistanceSq) { int aIndex1 = k-first; String atomName1 = structure.getGroupAtomNames(groupIndex1)[aIndex1]; String element1 = structure.getGroupElementNames(groupIndex1)[aIndex1]; int groupIndex2 = structure.getGroupTypeIndices()[i]; int aIndex2 = j - groupIndices.get(i); String atomName2 = structure.getGroupAtomNames(groupIndex2)[aIndex2]; String element2 = structure.getGroupElementNames(groupIndex2)[aIndex2]; double d = Math.sqrt(dSq); Row row = RowFactory.create(structureId, groupNames.get(index), atomName1, element1, index, groupNames.get(i), atomName2, element2, i, (float)d); rows.add(row); } } } } return rows; }
Example 16
Source File: ExhaustiveAligner.java From mmtf-spark with Apache License 2.0 | 4 votes |
/** * Returns one or more structure alignments and their alignment scores. * * @param alignmentAlgorithm * name of the algorithm * @param key * unique identifier for protein chain pair * @param points1 * C-alpha positions of chain 1 * @param points2 * C-alpha positions of chain 2 * @return list of alignment metrics */ public List<Row> getAlignments(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) { List<Row> rows = new ArrayList<>(); // TODO implement exhaustive alignments here ... int length = Math.min(points1.length, points2.length); Point3d[] x = null; Point3d[] y = null; int coverage1 = 0; int coverage2 = 0; if (points1.length != length) { x = Arrays.copyOfRange(points1, 0, length); y = points2; coverage1 = (int) Math.rint(100.0 * length / x.length); coverage2 = 100; } else if (points2.length != length) { x = points1; y = Arrays.copyOfRange(points2, 0, length); coverage1 = 100; coverage2 = (int) Math.rint(100.0 * length / y.length); } SuperPositionQCP qcp = new SuperPositionQCP(false); double rmsd = qcp.getRmsd(x, y); double tm = 0.0; // if (rmsd >= maxRmsd) { qcp.superposeAndTransform(x, y); tm = TMScore(x, y); // } System.out.println("l: " + length + " c1: " + coverage1 + " c2: " + coverage2 + " rmsd: " + rmsd + " tm: " + tm); // int maxCoverage = Math.max(coverage1, coverage2); // store solutions that satisfy minimal criteria // if (length >= minLength && maxCoverage >= minCoverage && tm >= minTm) { // create a row of alignment metrics Row row = RowFactory.create(key, length, coverage1, coverage2, (float)rmsd, (float)tm); rows.add(row); // } return rows; }
Example 17
Source File: RDDConverterUtilsExtTest.java From systemds with Apache License 2.0 | 4 votes |
@Override public Row call(String str) throws Exception { return RowFactory.create(str); }
Example 18
Source File: SecondaryStructureExtractor.java From mmtf-spark with Apache License 2.0 | 4 votes |
private static Row getSecStructFractions(Tuple2<String, StructureDataInterface> t) throws Exception { String key = t._1; StructureDataInterface structure = t._2; if (t._2.getNumChains() != 1) { throw new IllegalArgumentException("This method can only be applied to single polymer chain."); } StringBuilder dsspQ8 = new StringBuilder(structure.getEntitySequence(0).length()); StringBuilder dsspQ3 = new StringBuilder(structure.getEntitySequence(0).length()); float helix = 0; float sheet = 0; float coil = 0; int dsspIndex = 0; int structureIndex = 0; int seqIndex; for (int code : structure.getSecStructList()) { seqIndex = structure.getGroupSequenceIndices()[structureIndex++]; while (dsspIndex < seqIndex) { dsspQ8.append("X"); dsspQ3.append("X"); dsspIndex++; } dsspQ8.append(DsspSecondaryStructure.getDsspCode(code).getOneLetterCode()); dsspIndex++; switch (DsspSecondaryStructure.getQ3Code(code)) { case ALPHA_HELIX: helix++; dsspQ3.append("H"); break; case EXTENDED: sheet++; dsspQ3.append("E"); break; case COIL: coil++; dsspQ3.append("C"); break; default: break; } } while (dsspIndex < structure.getEntitySequence(0).length()) { dsspQ8.append("X"); dsspQ3.append("X"); dsspIndex++; } int n = structure.getSecStructList().length; helix /= n; sheet /= n; coil /= n; return RowFactory.create(key, structure.getEntitySequence(0), helix, sheet, coil, dsspQ8.toString(), dsspQ3.toString()); }
Example 19
Source File: SparkJdbcGenerator.java From Quicksql with MIT License | 4 votes |
@Override public Row call(Object ob) throws Exception { return RowFactory.create((Object[]) ((String) ob).split("\u0006")); }
Example 20
Source File: DefinitionToSparkVisitor.java From bunsen with Apache License 2.0 | 4 votes |
@Override protected Object createComposite(Object[] children) { return RowFactory.create(children); }