Java Code Examples for org.apache.spark.sql.SparkSession#close()
The following examples show how to use
org.apache.spark.sql.SparkSession#close() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MutationToStructureDemo.java From mmtf-spark with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local[*]").appName(MutationToStructureDemo.class.getSimpleName()) .getOrCreate(); // find missense mutations that map to UniProt ID P15056 (BRAF) // that are annotated as pathogenic or likely pathogenic in ClinVar. List<String> uniprotIds = Arrays.asList("P15056"); // BRAF: P15056 String query = "clinvar.rcv.clinical_significance:pathogenic OR clinvar.rcv.clinical_significance:likely pathogenic"; Dataset<Row> df = MyVariantDataset.getVariations(uniprotIds, query).cache(); System.out.println("BRAF missense mutations: " + df.count()); df.show(); // extract the list of variant Ids List<String> variantIds = df.select("variationId").as(Encoders.STRING()).collectAsList(); // map to PDB structures Dataset<Row> ds = G2SDataset.getPositionDataset(variantIds); ds = ds.sort("structureId","chainId","pdbPosition"); ds.show(); spark.close(); }
Example 2
Source File: PdbLigandDemo.java From mmtf-spark with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local[*]").appName(PdbLigandDemo.class.getSimpleName()) .getOrCreate(); // find non-polymeric chemical components that contain carbon // and have a formula weight > 150 da String sqlQuery = "SELECT pdbid, id, formula, formula_weight, name from chem_comp " + " WHERE type = 'non-polymer' AND formula LIKE 'C%' AND formula_weight > 150"; Dataset<Row> ds = PdbjMineDataset.getDataset(sqlQuery); System.out.println("First 10 results from query: " + sqlQuery); ds.show(10, false); System.out.println("Top 10 ligands in PDB:"); ds.groupBy("id").count().sort(col("count").desc()).show(10); System.out.println("Formula weight (>150) statistics:"); ds.describe("formula_weight").show(); spark.close(); }
Example 3
Source File: PdbDrugBankMapping.java From mmtf-spark with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local[*]").appName(PdbDrugBankMapping.class.getSimpleName()) .getOrCreate(); // download open DrugBank dataset Dataset<Row> drugBank = DrugBankDataset.getOpenDrugLinks(); // find some tryrosine kinase inhibitors with generic name stem: "tinib" drugBank = drugBank.filter("Commonname LIKE '%tinib'"); // get PDB ligand annotations Dataset<Row> ligands = CustomReportService.getDataset("ligandId","ligandMolecularWeight","ligandFormula","ligandSmiles","InChIKey"); // join ligand dataset with DrugBank info by InChIKey ligands = ligands.join(drugBank, ligands.col("InChIKey").equalTo(drugBank.col("StandardInChIKey"))); // show one example per drug molecule ligands = ligands.dropDuplicates("Commonname"); ligands.select("structureChainId", "ligandId", "DrugBankID", "Commonname", "ligandMolecularWeight","ligandFormula", "InChIKey", "ligandSmiles") .sort("Commonname").show(50); spark.close(); }
Example 4
Source File: DrugBankDemo.java From mmtf-spark with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local[*]").appName(DrugBankDemo.class.getSimpleName()) .getOrCreate(); // download open DrugBank dataset Dataset<Row> openDrugLinks = DrugBankDataset.getOpenDrugLinks(); // find all drugs with an InChIKey openDrugLinks = openDrugLinks.filter("StandardInChIKey IS NOT NULL"); // show some sample data openDrugLinks.select("DrugBankID", "Commonname", "CAS", "StandardInChIKey").show(); // The DrugBank password protected datasets contain more information. // You need to create a DrugBank account and supply username/password // to access these datasets. // Download DrugBank dataset for approved drugs // String username = args[0]; // String password = args[1]; // Dataset<Row> drugLinks = // DrugBankDataset.getDrugLinks(DrugGroup.APPROVED, username, password); // drugLinks.show(); spark.close(); }
Example 5
Source File: DatasetBalancerTest.java From mmtf-spark with Apache License 2.0 | 6 votes |
@Test public void test() { List<Row> rows = Arrays.asList( RowFactory.create("a", 1), RowFactory.create("a", 2), RowFactory.create("b", 1), RowFactory.create("b", 2), RowFactory.create("b", 3), RowFactory.create("c", 1), RowFactory.create("c", 2), RowFactory.create("c", 3), RowFactory.create("c", 4)); SparkSession spark = SparkSession.builder().master("local[1]").getOrCreate(); StructType schema = new StructType( new StructField[] { DataTypes.createStructField("key", DataTypes.StringType, false), DataTypes.createStructField("value", DataTypes.IntegerType, false) }); Dataset<Row> data = spark.createDataFrame(rows, schema); long seed = 19; Dataset<Row> balancedData = DatasetBalancer.downsample(data, "key", seed); assertTrue(balancedData.count() > 0); spark.close(); }
Example 6
Source File: KafkaImportApplicationIntegrationTest.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testKafkaStreamingImportProcessLevel() throws Exception { //run main class String args[] = {"-kb", KAFKA_HOST + ":" + KAFKA_PORT, "-fd", IMPORT_TEST_OUTPUT_DIRECTORY_PROCESS, "-bm", "true", "-sr", "false", "-dl", "process", "-wd", "./src/test/resources/config/kafka_import_process/", "-sm", "overwrite"}; SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); SparkSession.builder().config(sparkConf).getOrCreate(); KafkaImportApplication.main(args); //start Spark session SparkSession sparkSession = SparkSession.builder() .master("local[*]") .appName("IntegrationTest") .getOrCreate(); //generate Dataset and create hash to compare Dataset<Row> importedDataset = sparkSession.read().load(IMPORT_TEST_OUTPUT_DIRECTORY_PROCESS); //check that dataset contains 43 lines assertEquals(43, importedDataset.count()); //check hash of dataset String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect()); assertEquals("15254E402E5D700FB125E2BD670FE716", hash); //close Spark session sparkSession.close(); }
Example 7
Source File: KafkaImportApplicationIntegrationTest.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testKafkaStreamingImportActivityLevel() throws Exception { //run main class String args[] = {"-kb", KAFKA_HOST + ":" + KAFKA_PORT, "-fd", IMPORT_TEST_OUTPUT_DIRECTORY_ACTIVITY, "-bm", "true", "-sr", "false", "-dl", "activity", "-wd", "./src/test/resources/config/kafka_import_activity/","-sm", "overwrite"}; SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); SparkSession.builder().config(sparkConf).getOrCreate(); KafkaImportApplication.main(args); //start Spark session SparkSession sparkSession = SparkSession.builder() .master("local[*]") .appName("IntegrationTest") .getOrCreate(); //generate Dataset and create hash to compare Dataset<Row> importedDataset = sparkSession.read().load(IMPORT_TEST_OUTPUT_DIRECTORY_ACTIVITY); //check that dataset contains 55 lines assertEquals(55, importedDataset.count()); //check hash of dataset String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect()); assertEquals("9CEE92C16D7803E0ECF57666FDAC60D7", hash); //close Spark session sparkSession.close(); }
Example 8
Source File: KafkaProcessingApplicationIntegrationTest.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testKafkaDataProcessingActivityLevel() throws Exception { //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0"); //run main class String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_ACTIVITY, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY, "-d", "|", "-sr", "false", "-dl", "activity", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_activity/"}; SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); SparkSession.builder().config(sparkConf).getOrCreate(); // run main class KafkaProcessingApplication.main(args); //start Spark session SparkSession sparkSession = SparkSession.builder() .master("local[*]") .appName("IntegrationTest") .getOrCreate(); //generate Dataset and create hash to compare Dataset<Row> importedDataset = sparkSession.read() .option("inferSchema", "true") .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY + "/result/parquet"); //check that dataset contains 12 lines assertEquals(12, importedDataset.count()); //check that dataset contains 43 columns assertEquals(43, importedDataset.columns().length); //check hash of dataset String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect()); System.out.println(hash); assertEquals("A8BBFC3B17C00C40C9883DA1F396D453", hash); //close Spark session sparkSession.close(); }
Example 9
Source File: SwissModelDemo.java From mmtf-spark with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local[*]").appName(SwissModelDemo.class.getSimpleName()) .getOrCreate(); List<String> uniProtIds = Arrays.asList("P36575","P24539","O00244"); Dataset<Row> ds = SwissModelDataset.getSwissModels(uniProtIds); ds.show(); spark.close(); }
Example 10
Source File: PdbMetadataDemo.java From mmtf-spark with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local[*]").appName(PdbMetadataDemo.class.getSimpleName()) .getOrCreate(); // query the following fields from the _citation category using PDBj's Mine2 web service: // journal_abbrev, pdbx_database_id_PubMed, year. // Note, mixed case column names must be quoted and escaped with \". String sqlQuery = "SELECT pdbid, journal_abbrev, \"pdbx_database_id_PubMed\", year from citation WHERE id = 'primary'"; Dataset<Row>ds = PdbjMineDataset.getDataset(sqlQuery); System.out.println("First 10 results from query: " + sqlQuery); ds.show(10, false); // filter out unpublished entries (they contain the word "published" in various upper/lower case combinations) ds = ds.filter("UPPER(journal_abbrev) NOT LIKE '%PUBLISHED%'"); // print the top 10 journals System.out.println("Top 10 journals that publish PDB structures:"); ds.groupBy("journal_abbrev").count().sort(col("count").desc()).show(10, false); // filter out entries without a PubMed Id (is -1 if PubMed Id is not available) ds = ds.filter("pdbx_database_id_PubMed > 0"); System.out.println("Entries with PubMed Ids: " + ds.count()); // show growth of papers in PubMed System.out.println("PubMed Ids per year: "); ds.groupBy("year").count().sort(col("year").desc()).show(10, false); spark.close(); }
Example 11
Source File: DrugBankDatasetTest.java From mmtf-spark with Apache License 2.0 | 5 votes |
@Test public void test() throws IOException { SparkSession spark = SparkSession .builder() .master("local[*]") .appName(DrugBankDatasetTest.class.getSimpleName()) .getOrCreate(); Dataset<Row> ds = DrugBankDataset.getOpenDrugLinks(); assertTrue(ds.count() > 10000); assertEquals("DrugBankID", ds.columns()[0]); spark.close(); }
Example 12
Source File: PortfolioCollector.java From ExecDashboard with Apache License 2.0 | 4 votes |
/** * Main collection loop */ @SuppressWarnings("PMD.NPathComplexity") public void collect() { HygieiaSparkConnection sparkConnection = new HygieiaSparkConnection(setting.getReadUri(), setting.getReadDatabase(), setting.getWriteUri(), setting.getWriteDatabase()); SparkSession sparkSession = sparkConnection.getInstance(); JavaSparkContext javaSparkContext = new JavaSparkContext(sparkSession.sparkContext()); //Build portfolio structure: Portfolio -> Product (ASV) -> Environment -> Component (BAP) collectCMDB(sparkSession, javaSparkContext); List<Portfolio> portfolioList = createPortfolios(); ArrayList<Lob> lobList = (ArrayList<Lob>) createLobs(); if (CollectionUtils.isEmpty(portfolioList)) { LOGGER.info("##### Portfolio List is empty, cannot procedd further, returning ... #####"); return; } if(setting.isScmCollectorFlag()) { LOGGER.info("##### Starting SCM Collector #####"); scmCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed SCM Collector #####"); } if(setting.isLibraryPolicyCollectorFlag()) { LOGGER.info("##### Starting Library Policy Collector #####"); libraryPolicyCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Library Policy Collector #####"); } if(setting.isIncidentsCollectorFlag()){ LOGGER.info("##### Starting Incident Collector #####"); incidentCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Incident Collector #####"); } if(setting.isStaticCodeAnalysisCollectorFlag()){ LOGGER.info("##### Starting Static Code Collector #####"); staticCodeAnalysisCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Static Code Analysis Collector #####"); } if(setting.isUnitTestCoverageCollectorFlag()){ LOGGER.info("##### Starting Unit Test Collector #####"); unitTestCoverageCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Unit Test Collector #####"); } if(setting.isPipelineCollectorFlag()){ LOGGER.info("##### Starting Pipeline Collector #####"); pipelineCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Pipeline Collector #####"); } if(setting.isTraceabilityCollectorFlag()){ LOGGER.info("##### Starting Traceability Collector #####"); traceabilityCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Traceability Collector #####"); } if(setting.isSecurityCollectorFlag()) { LOGGER.info("##### Starting Security Collector #####"); securityCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Security Collector #####"); } if(setting.isPerformanceCollectorFlag()) { LOGGER.info("##### Starting Performance Collector #####"); performanceCollector.collect(sparkSession, javaSparkContext, portfolioList); LOGGER.info("##### Completed Performance Collector #####"); } if(setting.isEngineeringMaturityFlag()) { LOGGER.info("##### Starting Engineering Maturity Collector #####"); engineeringMaturityCollector.collect(sparkSession, javaSparkContext, lobList); } sparkSession.close(); javaSparkContext.close(); }
Example 13
Source File: KafkaProcessingApplicationIntegrationTest.java From bpmn.ai with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Test public void testKafkaDataProcessingProcessLevel() throws Exception { //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0"); //run main class String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_PROCESS, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS, "-d", "|", "-sr", "false", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_process/"}; SparkConf sparkConf = new SparkConf(); sparkConf.setMaster("local[*]"); SparkSession.builder().config(sparkConf).getOrCreate(); // run main class KafkaProcessingApplication.main(args); //start Spark session SparkSession sparkSession = SparkSession.builder() .master("local[*]") .appName("IntegrationTest") .getOrCreate(); //generate Dataset and create hash to compare Dataset<Row> importedDataset = sparkSession.read() .option("inferSchema", "true") .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS + "/result/parquet"); //check that dataset contains 4 lines assertEquals(4, importedDataset.count()); //check that dataset contains 42 columns assertEquals(42, importedDataset.columns().length); //convert rows to string String[] resultLines = (String[]) importedDataset.map(row -> row.mkString(), Encoders.STRING()).collectAsList().toArray(); for(String l : resultLines) { System.out.println(l); } //check if hashes of line values are correct //kept in for easier amendment after test case change // System.out.println(DigestUtils.md5Hex(resultLines[0]).toUpperCase()); // System.out.println(DigestUtils.md5Hex(resultLines[1]).toUpperCase()); // System.out.println(DigestUtils.md5Hex(resultLines[2]).toUpperCase()); // System.out.println(DigestUtils.md5Hex(resultLines[3]).toUpperCase()); assertEquals("9088849D6374163C3E9DACB3090D4E56", DigestUtils.md5Hex(resultLines[0]).toUpperCase()); assertEquals("415A0A505F9A32002C1342171E7649F9", DigestUtils.md5Hex(resultLines[1]).toUpperCase()); assertEquals("C83F9CC0618D7FA50D63753FBC429188", DigestUtils.md5Hex(resultLines[2]).toUpperCase()); assertEquals("0559C383855FDE566069B483188E06C0", DigestUtils.md5Hex(resultLines[3]).toUpperCase()); //close Spark session sparkSession.close(); }
Example 14
Source File: AnalyzeWaterInteractions4.java From mmtf-spark with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession .builder() .master("local[*]") .appName(AnalyzeWaterInteractions4.class.getSimpleName()) .getOrCreate(); String inputfile = args[0]; Dataset<Row> data = spark.read().parquet(inputfile).cache(); System.out.println("Interactions: " + data.count()); data.describe("distance1").show(); data.describe("distance2").show(); data.describe("distance3").show(); data.describe("distance4").show(); // all interaction distance statistics Dataset<Row> distance = data.select(col("distance1").as("distance")) .union(data.select(col("distance2").as("distance"))) .union(data.select(col("distance3").as("distance"))) .union(data.select(col("distance4").as("distance"))); distance.describe("distance").show(); // water oxygen - oxygen distance statistics Dataset<Row> ooDistance = data.filter("element1 = 'O'").select(col("distance1").as("O-Odistance")) .union(data.filter("element2 = 'O'").select(col("distance2").as("O-Odistance"))) .union(data.filter("element3 = 'O'").select(col("distance3").as("O-Odistance"))) .union(data.filter("element4 = 'O'").select(col("distance4").as("O-Odistance"))); ooDistance.describe("O-Odistance").show(); // water oxygen - nitrogen distance statistics Dataset<Row> onDistance = data.filter("element1 = 'N'").select(col("distance1").as("O-Ndistance")) .union(data.filter("element2 = 'N'").select(col("distance2").as("O-Ndistance"))) .union(data.filter("element3 = 'N'").select(col("distance3").as("O-Ndistance"))) .union(data.filter("element4 = 'N'").select(col("distance4").as("O-Ndistance"))); onDistance.describe("O-Ndistance").show(); // orientational order statistics data.describe("q3").show(); data.describe("q4").show(); // angle statistics Dataset<Row> allAngles = data.select(col("angle1-2").as("angle")) .union(data.select(col("angle1-3").as("angle"))) .union(data.select(col("angle1-4").as("angle"))) .union(data.select(col("angle2-3").as("angle"))) .union(data.select(col("angle2-4").as("angle"))) .union(data.select(col("angle3-4").as("angle"))); allAngles.describe("angle").show(); // normalized B-factor statistics Dataset<Row> nbFactor = data.select(col("nbFactor0").as("nbFactor")) .union(data.select(col("nbFactor1").as("nbFactor"))) .union(data.select(col("nbFactor2").as("nbFactor"))) .union(data.select(col("nbFactor3").as("nbFactor"))) .union(data.select(col("nbFactor4").as("nbFactor"))); nbFactor.describe("nbFactor").show(); spark.close(); }
Example 15
Source File: SiftsDataDemo.java From mmtf-spark with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local[*]").appName(SiftsDataDemo.class.getSimpleName()) .getOrCreate(); // get PDB entry to PubMed Id mappings String pubmedQuery = "SELECT * FROM sifts.pdb_pubmed LIMIT 10"; Dataset<Row> pubmed = PdbjMineDataset.getDataset(pubmedQuery); System.out.println("First 10 results for query: " + pubmedQuery); pubmed.show(10); // get PDB chain to InterPro mappings String interproQuery = "SELECT * FROM sifts.pdb_chain_interpro LIMIT 10"; Dataset<Row> interpro = PdbjMineDataset.getDataset(interproQuery); System.out.println("First 10 results for query: " + interproQuery); interpro.show(); // get PDB chain to UniProt mappings String uniprotQuery = "SELECT * FROM sifts.pdb_chain_uniprot LIMIT 10"; Dataset<Row> uniprot = PdbjMineDataset.getDataset(uniprotQuery); System.out.println("First 10 results for query: " + uniprotQuery); uniprot.show(); // get PDB chain to taxonomy mappings String taxonomyQuery = "SELECT * FROM sifts.pdb_chain_taxonomy LIMIT 10"; Dataset<Row> taxonomy = PdbjMineDataset.getDataset(taxonomyQuery); System.out.println("First 10 results for query: " + taxonomyQuery); taxonomy.show(); // get PDB chain to PFAM mappings String pfamQuery = "SELECT * FROM sifts.pdb_chain_pfam LIMIT 10"; Dataset<Row> pfam = PdbjMineDataset.getDataset(pfamQuery); System.out.println("First 10 results for query: " + pfamQuery); pfam.show(); // get PDB chain to CATH mappings String cathQuery = "SELECT * FROM sifts.pdb_chain_cath_uniprot LIMIT 10"; Dataset<Row> cath = PdbjMineDataset.getDataset(cathQuery); System.out.println("First 10 results for query: " + cathQuery); cath.show(); // get PDB chain to SCOP mappings String scopQuery = "SELECT * FROM sifts.pdb_chain_scop_uniprot LIMIT 10"; Dataset<Row> scop = PdbjMineDataset.getDataset(scopQuery); System.out.println("First 10 results for query: " + scopQuery); scop.show(); // get PDB chain to Enzyme classification (EC) mappings String enzymeQuery = "SELECT * FROM sifts.pdb_chain_enzyme LIMIT 10"; Dataset<Row> enzyme = PdbjMineDataset.getDataset(enzymeQuery); System.out.println("First 10 results for query: " + enzymeQuery); enzyme.show(); // get PDB chain to Gene Ontology term mappings String goQuery = "SELECT * FROM sifts.pdb_chain_go LIMIT 10"; Dataset<Row> go = PdbjMineDataset.getDataset(goQuery); System.out.println("First 10 results for query: " + goQuery); go.show(10); spark.close(); }
Example 16
Source File: SparkIngestDriver.java From geowave with Apache License 2.0 | 4 votes |
public void close(SparkSession session) { if (session != null) { session.close(); session = null; } }