org.elasticsearch.hadoop.mr.EsInputFormat Java Examples
The following examples show how to use
org.elasticsearch.hadoop.mr.EsInputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractMRNewApiSearchTest.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private Configuration createConf() throws IOException { Configuration conf = HdpBootstrap.hadoopConfig(); HadoopCfgUtils.setGenericOptions(conf); Job job = new Job(conf); job.setInputFormatClass(EsInputFormat.class); job.setOutputFormatClass(PrintStreamOutputFormat.class); job.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); job.setOutputValueClass(mapType); conf.set(ConfigurationOptions.ES_QUERY, query); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson)); new QueryTestParams(tempFolder).provisionQueries(conf); job.setNumReduceTasks(0); //PrintStreamOutputFormat.stream(conf, Stream.OUT); Configuration cfg = job.getConfiguration(); HdpBootstrap.addProperties(cfg, TestSettings.TESTING_PROPS, false); return cfg; }
Example #2
Source File: AbstractExtraMRTests.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private JobConf createReadJobConf() throws IOException { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(EsInputFormat.class); conf.setOutputFormat(PrintStreamOutputFormat.class); conf.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); conf.setOutputValueClass(MapWritable.class); HadoopCfgUtils.setGenericOptions(conf); conf.setNumReduceTasks(0); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(random.nextBoolean())); conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, "true"); FileInputFormat.setInputPaths(conf, new Path(MRSuite.testData.gibberishDat(conf))); return conf; }
Example #3
Source File: AbstractMROldApiSearchTest.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private JobConf createJobConf() throws IOException { JobConf conf = HdpBootstrap.hadoopConfig(); conf.setInputFormat(EsInputFormat.class); conf.setOutputFormat(PrintStreamOutputFormat.class); conf.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); conf.setOutputValueClass(mapType); HadoopCfgUtils.setGenericOptions(conf); conf.set(ConfigurationOptions.ES_QUERY, query); conf.setNumReduceTasks(0); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata)); conf.set(ConfigurationOptions.ES_READ_METADATA_VERSION, String.valueOf(true)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson)); new QueryTestParams(tempFolder).provisionQueries(conf); FileInputFormat.setInputPaths(conf, new Path(MRSuite.testData.sampleArtistsDatUri())); HdpBootstrap.addProperties(conf, TestSettings.TESTING_PROPS, false); return conf; }
Example #4
Source File: ComputeResponse.java From incubator-retired-pirk with Apache License 2.0 | 5 votes |
/** * Method to read in the data from elasticsearch, filter, and return a RDD of MapWritable data elements */ @SuppressWarnings("unchecked") public JavaRDD<MapWritable> readDataES() throws IOException, PIRException { logger.info("Reading data "); JavaRDD<MapWritable> jsonRDD; Job job = Job.getInstance(); String jobName = "pirSpark_ES_" + esQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName); job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes")); job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port")); job.getConfiguration().set("es.resource", esResource); job.getConfiguration().set("es.query", esQuery); jsonRDD = sc.newAPIHadoopRDD(job.getConfiguration(), EsInputFormat.class, Text.class, MapWritable.class).values().coalesce(numDataPartitions); // Filter out by the provided stopListFile entries if (qSchema.getFilter() != null) { return jsonRDD.filter(new FilterData(accum, bVars)); } else { logger.info("qSchema.getFilter() is null"); return jsonRDD; } }
Example #5
Source File: HadoopFormatIOElasticTest.java From beam with Apache License 2.0 | 5 votes |
/** * Set the Elasticsearch configuration parameters in the Hadoop configuration object. * Configuration object should have InputFormat class, key class and value class set. Mandatory * fields for ESInputFormat to be set are es.resource, es.nodes, es.port, es.internal.es.version. * Please refer to <a * href="https://www.elastic.co/guide/en/elasticsearch/hadoop/current/configuration.html" * >Elasticsearch Configuration</a> for more details. */ private Configuration getConfiguration() { Configuration conf = new Configuration(); conf.set(ConfigurationOptions.ES_NODES, ELASTIC_IN_MEM_HOSTNAME); conf.set(ConfigurationOptions.ES_PORT, String.format("%s", port)); conf.set(ConfigurationOptions.ES_RESOURCE, ELASTIC_RESOURCE); conf.set("es.internal.es.version", ELASTIC_INTERNAL_VERSION); conf.set(ConfigurationOptions.ES_NODES_DISCOVERY, TRUE); conf.set(ConfigurationOptions.ES_INDEX_AUTO_CREATE, TRUE); conf.setClass("mapreduce.job.inputformat.class", EsInputFormat.class, InputFormat.class); conf.setClass("key.class", Text.class, Object.class); conf.setClass("value.class", LinkedMapWritable.class, Object.class); return conf; }
Example #6
Source File: ESEntityExtractor.java From deep-spark with Apache License 2.0 | 5 votes |
public ESEntityExtractor(Class<T> t) { super(); this.deepJobConfig = new ESDeepJobConfig(t); this.inputFormat = new EsInputFormat<>(); this.outputFormat = new EsOutputFormat(); }
Example #7
Source File: ReadFromES.java From elasticsearch-hadoop with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf(), "ReadFromES"); // DO NOT SET JAR BY CLASS HERE // // job.setJarByClass(getClass()); EsMapReduceUtil.initCredentials(job); job.getConfiguration().set("es.output.json", "true"); job.setInputFormatClass(EsInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[0])); job.setMapperClass(MapperImpl.class); // Secure Hadoop CANNOT perform shuffle phases without native libraries job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); if (!job.waitForCompletion(true)) { return 1; } return 0; }
Example #8
Source File: ComputeStreamingResponse.java From incubator-retired-pirk with Apache License 2.0 | 4 votes |
/** * Method to read in the data from elasticsearch, filter, and return a RDD of MapWritable data elements */ @SuppressWarnings("unchecked") public JavaDStream<MapWritable> readDataES() throws IOException { logger.info("Reading data "); Job job = Job.getInstance(); String jobName = "pirSpark_ES_" + esQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName); job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes")); job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port")); job.getConfiguration().set("es.resource", esResource); job.getConfiguration().set("es.query", esQuery); // Read data from hdfs JavaDStream<MapWritable> mwStream; if (useQueueStream) { Queue<JavaRDD<MapWritable>> rddQueue = new LinkedList<>(); JavaRDD<MapWritable> rddIn = jssc.sparkContext().newAPIHadoopRDD(job.getConfiguration(), EsInputFormat.class, Text.class, MapWritable.class).values() .coalesce(numDataPartitions); rddQueue.add(rddIn); mwStream = jssc.queueStream(rddQueue); } else { JavaPairInputDStream<Text,MapWritable> inputRDD = jssc.fileStream(inputData, Text.class, MapWritable.class, EsInputFormat.class); mwStream = inputRDD.transform(new Function<JavaPairRDD<Text,MapWritable>,JavaRDD<MapWritable>>() { private static final long serialVersionUID = 1L; @Override public JavaRDD<MapWritable> call(JavaPairRDD<Text,MapWritable> pair) throws Exception { return pair.values(); } }).repartition(numDataPartitions); } // Filter out by the provided stopListFile entries if (qSchema.getFilter() != null) { return mwStream.filter(new FilterData(accum, bVars)); } else { return mwStream; } }
Example #9
Source File: ESCellExtractor.java From deep-spark with Apache License 2.0 | 4 votes |
public ESCellExtractor(Class<Cells> cellsClass) { super(); this.deepJobConfig = new ESDeepJobConfig(cellsClass); this.inputFormat = new EsInputFormat<>(); this.outputFormat = new EsOutputFormat(); }