org.apache.spark.sql.RowFactory#create

Source File: RDDConverterUtilsExt.java From systemds with Apache License 2.0

5 votes

@Override
public Row call(Tuple2<Row, Long> arg0) throws Exception {
	int oldNumCols = arg0._1.length();
	Object [] fields = new Object[oldNumCols + 1];
	for(int i = 0; i < oldNumCols; i++) {
		fields[i] = arg0._1.get(i);
	}
	fields[oldNumCols] = new Double(arg0._2 + 1);
	return RowFactory.create(fields);
}

Source File: JavaVectorAssemblerExample.java From SparkDemo with MIT License

5 votes

public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaVectorAssemblerExample")
    .getOrCreate();

  // $example on$
  StructType schema = createStructType(new StructField[]{
    createStructField("id", IntegerType, false),
    createStructField("hour", IntegerType, false),
    createStructField("mobile", DoubleType, false),
    createStructField("userFeatures", new VectorUDT(), false),
    createStructField("clicked", DoubleType, false)
  });
  Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
  Dataset<Row> dataset = spark.createDataFrame(Arrays.asList(row), schema);

  VectorAssembler assembler = new VectorAssembler()
    .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
    .setOutputCol("features");

  Dataset<Row> output = assembler.transform(dataset);
  System.out.println("Assembled columns 'hour', 'mobile', 'userFeatures' to vector column " +
      "'features'");
  output.select("features", "clicked").show(false);
  // $example off$

  spark.stop();
}

Source File: StructureToInteractingResidues.java From mmtf-spark with Apache License 2.0

5 votes

private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) {
       double cutoffDistanceSq = cutoffDistance * cutoffDistance;
	
	float[] x = structure.getxCoords();
	float[] y = structure.getyCoords();
	float[] z = structure.getzCoords();
	
	int first = groupIndices.get(index);
	int last = groupIndices.get(index+1);
	
	List<Row> rows = new ArrayList<>();
	for (int i: matches) {
		if (i == index) {
			continue;
		}
		double minDSq = Double.MAX_VALUE;
		int minIndex = -1;
		for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) {
			
			for (int k = first; k < last; k++) {
				double dx = (x[j] - x[k]);
				double dy = (y[j] - y[k]);
				double dz = (z[j] - z[k]);
				double dSq = dx*dx + dy*dy + dz*dz;
				if (dSq <= cutoffDistanceSq && dSq < minDSq) {
					minDSq = Math.min(minDSq, dSq);
					minIndex = i;
				}
			}
		}
		if (minIndex >= 0) {
			// TODO add unique group (and atom?) for each group?
			Row row = RowFactory.create(structureId, groupNames.get(index), index, groupNames.get(minIndex), minIndex, (float)Math.sqrt(minDSq));
			rows.add(row);
		}
	}
	return rows;
}

Source File: BiojavaAligner.java From mmtf-spark with Apache License 2.0

5 votes

/**
	 * Calculates a structural alignment and returns alignment metrics.
	 * 
	 * @param alignmentAlgorithm name of the algorithm
	 * @param key unique identifier for protein chain pair
	 * @param points1 C-alpha positions of chain 1
	 * @param points2 C-alpha positions of chain 2
	 * @return
	 */
	public static List<Row> getAlignment(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) {
		// create input for BioJava alignment method
		Atom[] ca1 = getCAAtoms(points1);
		Atom[] ca2 = getCAAtoms(points2);
		
		// calculate the alignment
		AFPChain afp = null;
		try {
			StructureAlignment algorithm  = StructureAlignmentFactory.getAlgorithm(alignmentAlgorithm);
			afp = algorithm.align(ca1,ca2);
			double tmScore = AFPChainScorer.getTMScore(afp, ca1, ca2);
			afp.setTMScore(tmScore);
		} catch (StructureException e) {
			e.printStackTrace();
			return Collections.emptyList();
		} 
		
		// TODO add alignments as arrays to results
//		int[][] alignment = afp.getAfpIndex();
//		for (int i = 0; i < alignment.length; i++) {
//			System.out.println(alignment[i][0] + " - " + alignment[i][1]);
//		}

		// record the alignment metrics
		Row row = RowFactory.create(key, afp.getOptLength(), afp.getCoverage1(), 
				afp.getCoverage2(), (float) afp.getTotalRmsdOpt(), (float) afp.getTMScore());

		return Collections.singletonList(row);
	}

Source File: AtomInteraction.java From mmtf-spark with Apache License 2.0

5 votes

/**
 * Returns interactions and geometric information in a single row.
 * 
 * @return row of interactions and geometric information
 */
public Row getMultipleInteractionsAsRow(int maxInteractions) {
	// pad interaction centers and distances with nulls, if necessary,
	// since each row must be of fixed length
	while (getNumInteractions() < maxInteractions) {
		neighbors.add(new InteractionCenter());
	}

	int length = InteractionCenter.getLength();

	Object[] data = new Object[getNumColumns(maxInteractions)];

	int index = 0;
	data[index++] = structureId;
	data[index++] = getNumberOfPolymerChains();
	
	calcCoordinationGeometry(maxInteractions);
	data[index++] = q3;
	data[index++] = q4;
	data[index++] = q5;
	data[index++] = q6;
	

	// copy data for query atom
	System.arraycopy(center.getAsObject(), 0, data, index, length);
	index += length;

	// copy data for interacting atoms
	for (int i = 0; i < neighbors.size(); i++) {
		System.arraycopy(neighbors.get(i).getAsObject(), 0, data, index, length);
		index += length;
		data[index++] = distances[i];
	}

	// copy angles
	System.arraycopy(angles, 0, data, index, angles.length);
	index += length;

	return RowFactory.create(data);
}

Source File: MLContextTest.java From systemds with Apache License 2.0

5 votes

@Override
public Row call(String str) throws Exception {
	String[] strings = str.split(",");
	Double[] doubles = new Double[strings.length];
	for (int i = 0; i < strings.length; i++) {
		doubles[i] = Double.parseDouble(strings[i]);
	}
	return RowFactory.create((Object[]) doubles);
}

Source File: InstanceRelationWriter.java From rdf2x with Apache License 2.0

5 votes

private static Row getAttributeRow(Instance instance, Predicate predicate, Object value) {
    return RowFactory.create(
            instance.getId(),
            predicate.getPredicateIndex(),
            LiteralType.toString(predicate.getLiteralType()),
            predicate.getLanguage(),
            value.toString()
    );
}

Source File: MLContextFrameTest.java From systemds with Apache License 2.0

5 votes

@Test
public void testTransform() {
	System.out.println("MLContextFrameTest - transform");
	
	Row[] rowsA = {RowFactory.create("\"`@(\"(!&",2,"20news-bydate-train/comp.os.ms-windows.misc/9979"),
			RowFactory.create("\"`@(\"\"(!&\"",3,"20news-bydate-train/comp.os.ms-windows.misc/9979")};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<>();
	fieldsA.add(DataTypes.createStructField("featureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("featureValue", DataTypes.IntegerType, true));
	fieldsA.add(DataTypes.createStructField("id", DataTypes.StringType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ featureName, id ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	ml.setExplain(true);
	ml.setExplainLevel(ExplainLevel.RECOMPILE_HOPS);
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(1.0, matrixtA[0][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}

Source File: MLContextFrameTest.java From systemds with Apache License 2.0

5 votes

@Test
public void testInputFrameAndMatrixOutputMatrixAndFrame() {
	System.out.println("MLContextFrameTest - input frame and matrix, output matrix and frame");
	
	Row[] rowsA = {RowFactory.create("Doc1", "Feat1", 10), RowFactory.create("Doc1", "Feat2", 20), RowFactory.create("Doc2", "Feat1", 31)};

	JavaRDD<Row> javaRddRowA = sc. parallelize( Arrays.asList(rowsA)); 

	List<StructField> fieldsA = new ArrayList<>();
	fieldsA.add(DataTypes.createStructField("myID", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureName", DataTypes.StringType, true));
	fieldsA.add(DataTypes.createStructField("FeatureValue", DataTypes.IntegerType, true));
	StructType schemaA = DataTypes.createStructType(fieldsA);
	Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);

	String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: false ,recode: [ myID, FeatureName ]}\");";

	Script script = dml(dmlString)
			.in("A", dataFrameA,
					new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length))
			.out("tA").out("tAM");
	MLResults results = ml.execute(script);

	double[][] matrixtA = results.getMatrixAs2DDoubleArray("tA");
	Assert.assertEquals(10.0, matrixtA[0][2], 0.0);
	Assert.assertEquals(20.0, matrixtA[1][2], 0.0);
	Assert.assertEquals(31.0, matrixtA[2][2], 0.0);

	Dataset<Row> dataFrame_tA = results.getMatrix("tA").toDF();
	System.out.println("Number of matrix tA rows = " + dataFrame_tA.count());
	dataFrame_tA.printSchema();
	dataFrame_tA.show();
	
	Dataset<Row> dataFrame_tAM = results.getFrame("tAM").toDF();
	System.out.println("Number of frame tAM rows = " + dataFrame_tAM.count());
	dataFrame_tAM.printSchema();
	dataFrame_tAM.show();
}

Source File: FrameRDDConverterUtils.java From systemds with Apache License 2.0

5 votes

@Override
public Row call(String record) throws Exception {
      String[] fields = IOUtilFunctions.splitCSV(record, _delim);
      Object[] objects = new Object[fields.length]; 
      for (int i=0; i<fields.length; i++) {
	      objects[i] = UtilFunctions.stringToObject(_schema[i], fields[i]);
      }
      return RowFactory.create(objects);
}

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(Tuple2<Double, org.apache.spark.mllib.linalg.Vector> tup) throws Exception {
	Double doub = tup._1();
	org.apache.spark.mllib.linalg.Vector vect = tup._2();
	return RowFactory.create(doub, vect);
}

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(Vector vect) throws Exception {
	return RowFactory.create(vect);
}

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(org.apache.spark.mllib.linalg.Vector vect) throws Exception {
	return RowFactory.create(vect);
}

Source File: MLContextTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(Tuple2<Double, Vector> tup) throws Exception {
	Double doub = tup._1();
	Vector vect = tup._2();
	return RowFactory.create(doub, vect);
}

Source File: StructureToAllInteractions.java From mmtf-spark with Apache License 2.0

4 votes

private List<Row> getDistanceProfile(String structureId, List<Integer> matches, int index, List<Integer> groupIndices, List<String> groupNames, StructureDataInterface structure) {
       double cutoffDistanceSq = cutoffDistance * cutoffDistance;
	
	float[] x = structure.getxCoords();
	float[] y = structure.getyCoords();
	float[] z = structure.getzCoords();
	
	int first = groupIndices.get(index);
	int last = groupIndices.get(index+1);
	
	int groupIndex1 = structure.getGroupTypeIndices()[index];
	
	List<Row> rows = new ArrayList<>();
	for (int i: matches) {
		// exclude self interactions
		if (i == index) {
			continue;
		}

		for (int j = groupIndices.get(i); j < groupIndices.get(i+1); j++) {
			
			for (int k = first; k < last; k++) {
				double dx = (x[j] - x[k]);
				double dy = (y[j] - y[k]);
				double dz = (z[j] - z[k]);
				double dSq = dx*dx + dy*dy + dz*dz;
				
				if (dSq < cutoffDistanceSq) {
					int aIndex1 =  k-first;
					String atomName1 = structure.getGroupAtomNames(groupIndex1)[aIndex1];
					String element1 = structure.getGroupElementNames(groupIndex1)[aIndex1];
					
					int groupIndex2 = structure.getGroupTypeIndices()[i];
                       int aIndex2 = j - groupIndices.get(i);
                       String atomName2 = structure.getGroupAtomNames(groupIndex2)[aIndex2];
					String element2 = structure.getGroupElementNames(groupIndex2)[aIndex2];

                       double d = Math.sqrt(dSq);
					Row row = RowFactory.create(structureId, groupNames.get(index), atomName1, element1, index, groupNames.get(i), atomName2, element2, i, (float)d);
					rows.add(row);		
				}
			}
		}
	}
	return rows;
}

Source File: ExhaustiveAligner.java From mmtf-spark with Apache License 2.0

4 votes

/**
	 * Returns one or more structure alignments and their alignment scores.
	 * 
	 * @param alignmentAlgorithm
	 *            name of the algorithm
	 * @param key
	 *            unique identifier for protein chain pair
	 * @param points1
	 *            C-alpha positions of chain 1
	 * @param points2
	 *            C-alpha positions of chain 2
	 * @return list of alignment metrics
	 */
	public List<Row> getAlignments(String alignmentAlgorithm, String key, Point3d[] points1, Point3d[] points2) {
		List<Row> rows = new ArrayList<>();

		// TODO implement exhaustive alignments here ...

		int length = Math.min(points1.length, points2.length);

		Point3d[] x = null;
		Point3d[] y = null;

		int coverage1 = 0;
		int coverage2 = 0;

		if (points1.length != length) {
			x = Arrays.copyOfRange(points1, 0, length);
			y = points2;
			coverage1 = (int) Math.rint(100.0 * length / x.length);
			coverage2 = 100;
		} else if (points2.length != length) {
			x = points1;
			y = Arrays.copyOfRange(points2, 0, length);
			coverage1 = 100;
			coverage2 = (int) Math.rint(100.0 * length / y.length);
		}

		SuperPositionQCP qcp = new SuperPositionQCP(false);
		double rmsd = qcp.getRmsd(x, y);
		double tm = 0.0;
//		if (rmsd >= maxRmsd) {
			qcp.superposeAndTransform(x, y);
			tm = TMScore(x, y);
//		}
		System.out.println("l: " + length + " c1: " + coverage1 + " c2: " + coverage2 + " rmsd: " + rmsd + " tm: " + tm);

//		int maxCoverage = Math.max(coverage1, coverage2);

		// store solutions that satisfy minimal criteria
//		if (length >= minLength && maxCoverage >= minCoverage && tm >= minTm) {
			// create a row of alignment metrics
			Row row = RowFactory.create(key, length, coverage1, coverage2, (float)rmsd, (float)tm);
			rows.add(row);
//		}

		return rows;
	}

Source File: RDDConverterUtilsExtTest.java From systemds with Apache License 2.0

4 votes

@Override
public Row call(String str) throws Exception {
	return RowFactory.create(str);
}

Source File: SecondaryStructureExtractor.java From mmtf-spark with Apache License 2.0

4 votes

private static Row getSecStructFractions(Tuple2<String, StructureDataInterface> t) throws Exception {
	String key = t._1;
	StructureDataInterface structure = t._2;
	if (t._2.getNumChains() != 1) {
		throw new IllegalArgumentException("This method can only be applied to single polymer chain.");
	}

	StringBuilder dsspQ8 = new StringBuilder(structure.getEntitySequence(0).length());
	StringBuilder dsspQ3 = new StringBuilder(structure.getEntitySequence(0).length());

	float helix = 0;
	float sheet = 0;
	float coil = 0;

	int dsspIndex = 0;
	int structureIndex = 0;
	int seqIndex;

	for (int code : structure.getSecStructList()) {
		seqIndex = structure.getGroupSequenceIndices()[structureIndex++];
		while (dsspIndex < seqIndex) {
			dsspQ8.append("X");
			dsspQ3.append("X");
			dsspIndex++;
		}
		dsspQ8.append(DsspSecondaryStructure.getDsspCode(code).getOneLetterCode());
		dsspIndex++;
		switch (DsspSecondaryStructure.getQ3Code(code)) {

		case ALPHA_HELIX:
			helix++;
			dsspQ3.append("H");
			break;
		case EXTENDED:
			sheet++;
			dsspQ3.append("E");
			break;
		case COIL:
			coil++;
			dsspQ3.append("C");
			break;
		default:
			break;
		}
	}
	while (dsspIndex < structure.getEntitySequence(0).length()) {
		dsspQ8.append("X");
		dsspQ3.append("X");
		dsspIndex++;
	}

	int n = structure.getSecStructList().length;
	helix /= n;
	sheet /= n;
	coil /= n;

	return RowFactory.create(key, structure.getEntitySequence(0), helix, sheet, coil, dsspQ8.toString(),
			dsspQ3.toString());
}

Source File: SparkJdbcGenerator.java From Quicksql with MIT License

4 votes

@Override
public Row call(Object ob) throws Exception {
    return RowFactory.create((Object[]) ((String) ob).split("\u0006"));
}

Source File: DefinitionToSparkVisitor.java From bunsen with Apache License 2.0

4 votes

@Override
protected Object createComposite(Object[] children) {
  return RowFactory.create(children);
}

Java Code Examples for org.apache.spark.sql.RowFactory#create()