org.apache.spark.sql.hive.HiveContext Java Examples
The following examples show how to use
org.apache.spark.sql.hive.HiveContext.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkDataSourceManager.java From DDF with Apache License 2.0 | 8 votes |
@Override public DDF loadFromJDBC(JDBCDataSourceDescriptor dataSource) throws DDFException { SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager; HiveContext sqlContext = sparkDDFManager.getHiveContext(); JDBCDataSourceCredentials cred = (JDBCDataSourceCredentials)dataSource.getDataSourceCredentials(); String fullURL = dataSource.getDataSourceUri().getUri().toString(); if (cred.getUsername() != null && !cred.getUsername().equals("")) { fullURL += String.format("?user=%s&password=%s", cred.getUsername(), cred.getPassword()); } Map<String, String> options = new HashMap<String, String>(); options.put("url", fullURL); options.put("dbtable", dataSource.getDbTable()); DataFrame df = sqlContext.load("jdbc", options); DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class}, null, SparkUtils.schemaFromDataFrame(df)); // TODO? ddf.getRepresentationHandler().get(RDD.class, Row.class); ddf.getMetaDataHandler().setDataSourceDescriptor(dataSource); return ddf; }
Example #2
Source File: SparkDataSourceManager.java From DDF with Apache License 2.0 | 6 votes |
@Override public DDF loadSpecialFormat(DataFormat format, URI fileURI, Boolean flatten) throws DDFException { SparkDDFManager sparkDDFManager = (SparkDDFManager)mDDFManager; HiveContext sqlContext = sparkDDFManager.getHiveContext(); DataFrame jdf = null; switch (format) { case JSON: jdf = sqlContext.jsonFile(fileURI.toString()); break; case PQT: jdf = sqlContext.parquetFile(fileURI.toString()); break; default: throw new DDFException(String.format("Unsupported data format: %s", format.toString())); } DataFrame df = SparkUtils.getDataFrameWithValidColnames(jdf); DDF ddf = sparkDDFManager.newDDF(sparkDDFManager, df, new Class<?>[]{DataFrame.class}, null, SparkUtils.schemaFromDataFrame(df)); if(flatten == true) return ddf.getFlattenedDDF(); else return ddf; }
Example #3
Source File: TestWebServiceGet.java From quetzal with Eclipse Public License 2.0 | 6 votes |
public static void main( String[] args ) { // SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("local[2]"); // SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://Kavithas-MBP.home:7077"); SparkConf conf = new SparkConf().setAppName("App-mt").setMaster("spark://kavithas-mbp.watson.ibm.com:7077"); JavaSparkContext sc = new JavaSparkContext(conf); HiveContext sqlContext = new HiveContext(sc.sc()); Dataset urls = sqlContext.read().json("/tmp/urls.json"); urls.registerTempTable("urls"); Dataset<Row> temp = sqlContext.sql("select * from urls"); temp.show(); sqlContext.sql("add jar /tmp/quetzal.jar"); sqlContext.sql("create temporary function webservice as 'com.ibm.research.rdf.store.utilities.WebServiceGetUDTF'"); Dataset<Row> drugs = sqlContext.sql("select webservice(\"drug,id,action\", \"url\", \"\", \"GET\", \"xs=http://www.w3.org/2001/XMLSchema\", \"//row\",\"drug\",\"./drug\"," + " \"<string>\", \"id\", \"./id\",\"<string>\", \"action\", \"./action\", \"<string>\", url) as (drug, drug_typ, id, id_typ, action, action_typ) from urls"); drugs.show(); System.out.println("Num rows:" + drugs.count()); }
Example #4
Source File: UserVisitSessionAnalyzeSpark.java From BigDataPlatform with GNU General Public License v3.0 | 5 votes |
/** * 获取SQLContext * 如果是在本地测试环境的话,那么就生成SQLContext对象 * 如果是在生产环境运行的话,那么就生成HiveContext对象 * @param sc SparkContext * @return SQLContext */ private static SQLContext getSQLContext(SparkContext sc) { boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL); if(local) { return new SQLContext(sc); } else { return new HiveContext(sc); } }
Example #5
Source File: SparkUtils.java From BigDataPlatform with GNU General Public License v3.0 | 5 votes |
/** * 获取SQLContext * 如果spark.local设置为true,那么就创建SQLContext;否则,创建HiveContext * @param sc * @return */ public static SQLContext getSQLContext(SparkContext sc) { boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL); if(local) { return new SQLContext(sc); } else { return new HiveContext(sc); } }
Example #6
Source File: UserVisitAnalyze.java From UserActionAnalyzePlatform with Apache License 2.0 | 5 votes |
/** * 用于判断是否是生产环境 * @param sc * @return */ public static SQLContext getSQLContext(SparkContext sc) { boolean local= ConfigurationManager.getBoolean(Constants.SPARK_LOCAL); if(local) { return new SQLContext(sc); } return new HiveContext(sc); }
Example #7
Source File: SparkDDFManager.java From DDF with Apache License 2.0 | 5 votes |
private void initialize(SparkContext sparkContext, Map<String, String> params) throws DDFException { this.setSparkContext(sparkContext == null ? this.createSparkContext(params) : sparkContext); this.mHiveContext = new HiveContext(this.mSparkContext); String compression = System.getProperty("spark.sql.inMemoryColumnarStorage.compressed", "true"); String batchSize = System.getProperty("spark.sql.inMemoryColumnarStorage.batchSize", "1000"); mLog.info(">>>> spark.sql.inMemoryColumnarStorage.compressed= " + compression); mLog.info(">>>> spark.sql.inMemoryColumnarStorage.batchSize= " + batchSize); this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.compressed", compression); this.mHiveContext.setConf("spark.sql.inMemoryColumnarStorage.batchSize", batchSize); // register SparkSQL UDFs this.registerUDFs(); this.mDataSourceManager = new SparkDataSourceManager(this); }
Example #8
Source File: SparkDDF.java From DDF with Apache License 2.0 | 5 votes |
public boolean isTable() { HiveContext hiveContext = ((SparkDDFManager) this.getManager()).getHiveContext(); String[] tableNames = hiveContext.tableNames(); Boolean tableExists = false; for(String table: tableNames) { if(table.equals(this.getTableName())) { tableExists = true; } } return tableExists; }
Example #9
Source File: ExplorerSQLContextTest.java From Explorer with Apache License 2.0 | 5 votes |
@Test public void whenHiveContextIsSelectedInConfiguration(){ Properties properties = new Properties(); properties.put(AttributteNames.CT_HIVE_CONTEXT,"YES"); sqlContext.loadConfiguration(properties); assertThat("When exist HiveContext then create instanceof HiveContext", sqlContext.getConnector(), instanceOf(HiveContext.class)); }
Example #10
Source File: SQLQueryBAM.java From ViraPipe with MIT License | 4 votes |
public static void main(String[] args) throws IOException { SparkConf conf = new SparkConf().setAppName("SQLQueryBAM"); JavaSparkContext sc = new JavaSparkContext(conf); SQLContext sqlContext = new HiveContext(sc.sc()); Options options = new Options(); Option opOpt = new Option( "out", true, "HDFS path for output files. If not present, the output files are not moved to HDFS." ); Option queryOpt = new Option( "query", true, "SQL query string." ); Option baminOpt = new Option( "in", true, "" ); options.addOption( opOpt ); options.addOption( queryOpt ); options.addOption( baminOpt ); CommandLineParser parser = new BasicParser(); CommandLine cmd = null; try { cmd = parser.parse( options, args ); } catch( ParseException exp ) { System.err.println( "Parsing failed. Reason: " + exp.getMessage() ); } String bwaOutDir = (cmd.hasOption("out")==true)? cmd.getOptionValue("out"):null; String query = (cmd.hasOption("query")==true)? cmd.getOptionValue("query"):null; String bamin = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null; sc.hadoopConfiguration().setBoolean(BAMInputFormat.KEEP_PAIRED_READS_TOGETHER_PROPERTY, true); //Read BAM/SAM from HDFS JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(bamin, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration()); //Map to SAMRecord RDD JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get()); JavaRDD<MyAlignment> rdd = samRDD.map(bam -> new MyAlignment(bam.getReadName(), bam.getStart(), bam.getReferenceName(), bam.getReadLength(), new String(bam.getReadBases(), StandardCharsets.UTF_8), bam.getCigarString(), bam.getReadUnmappedFlag(), bam.getDuplicateReadFlag())); Dataset<Row> samDF = sqlContext.createDataFrame(rdd, MyAlignment.class); samDF.registerTempTable(tablename); if(query!=null) { //Save as parquet file Dataset df2 = sqlContext.sql(query); df2.show(100,false); if(bwaOutDir!=null) df2.write().parquet(bwaOutDir); }else{ if(bwaOutDir!=null) samDF.write().parquet(bwaOutDir); } sc.stop(); }
Example #11
Source File: SparkDDFManager.java From DDF with Apache License 2.0 | 4 votes |
public HiveContext getHiveContext() { return mHiveContext; }
Example #12
Source File: SqlHandler.java From DDF with Apache License 2.0 | 4 votes |
private HiveContext getHiveContext() { return ((SparkDDFManager) this.getManager()).getHiveContext(); }
Example #13
Source File: SparkHiveContextBuilder.java From Explorer with Apache License 2.0 | 2 votes |
/** * Build an HiveContext type . * @param sparkContext to configure HiveContext * @return HiveContext */ @Override public HiveContext build(SparkContext sparkContext) { return new HiveContext(sparkContext); }