org.apache.flink.ml.api.misc.param.Params Java Examples
The following examples show how to use
org.apache.flink.ml.api.misc.param.Params.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BaseLinearModelTrainBatchOp.java From Alink with Apache License 2.0 | 6 votes |
/** * Get label info: including label values and label type. * * @param in input train data in BatchOperator format. * @param params train parameters. * @param isRegProc is regression process or not. * @return label info. */ private Tuple2<DataSet<Object>, TypeInformation> getLabelInfo(BatchOperator in, Params params, boolean isRegProc) { String labelName = params.get(LinearTrainParams.LABEL_COL); // Prepare label values DataSet<Object> labelValues; TypeInformation<?> labelType = null; if (isRegProc) { labelType = Types.DOUBLE; labelValues = MLEnvironmentFactory.get(in.getMLEnvironmentId()) .getExecutionEnvironment().fromElements(new Object()); } else { labelType = in.getColTypes()[TableUtil.findColIndexWithAssertAndHint(in.getColNames(), labelName)]; labelValues = in.select(new String[] {labelName}).distinct().getDataSet().map( new MapFunction<Row, Object>() { @Override public Object map(Row row) { return row.getField(0); } }); } return Tuple2.of(labelValues, labelType); }
Example #2
Source File: ParamsTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testGetAliasParam() { ParamInfo <String> predResultColName = ParamInfoFactory .createParamInfo("predResultColName", String.class) .setDescription("Column name of predicted result.") .setRequired() .setAlias(new String[] {"predColName", "outputColName"}) .build(); Params params = Params.fromJson("{\"predResultColName\":\"\\\"f0\\\"\"}"); Assert.assertEquals("f0", params.get(predResultColName)); params = Params.fromJson("{\"predResultColName\":\"\\\"f0\\\"\", \"predColName\":\"\\\"f0\\\"\"}"); try { params.get(predResultColName); Assert.fail("failure"); } catch (IllegalArgumentException ex) { Assert.assertTrue(ex.getMessage().startsWith("Duplicate parameters of predResultColName and predColName")); } }
Example #3
Source File: VectorToColumnsMapperTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void testNull() throws Exception { TableSchema schema = new TableSchema(new String[] {"vec"}, new TypeInformation <?>[] {Types.STRING}); Params params = new Params() .set(VectorToColumnsParams.SELECTED_COL, "vec") .set(VectorToColumnsParams.RESERVED_COLS, new String[] {}) .set(VectorToColumnsParams.OUTPUT_COLS, new String[] {"f0", "f1"}); VectorToColumnsMapper mapper = new VectorToColumnsMapper(schema, params); Row row = mapper.map(Row.of((Object) null)); assertEquals(row.getField(0), null); assertEquals(row.getField(1), null); assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"f0", "f1"}, new TypeInformation <?>[] {Types.DOUBLE, Types.DOUBLE})); }
Example #4
Source File: FeatureHasherMapperTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void test2() throws Exception { TableSchema schema = new TableSchema(new String[] {"double", "bool", "number", "str"}, new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING()}); Params params = new Params() .set(FeatureHasherParams.SELECTED_COLS, new String[] {"double", "bool", "number", "str"}) .set(FeatureHasherParams.OUTPUT_COL, "output") .set(FeatureHasherParams.NUM_FEATURES, 10); FeatureHasherMapper mapper = new FeatureHasherMapper(schema, params); assertEquals(mapper.map(Row.of(1.1, true, "2", "A")).getField(4), new SparseVector(10, new int[]{5, 8, 9}, new double[]{2.0, 1.1, 1.0})); assertEquals(mapper.map(Row.of(2.1, true, "1", "B")).getField(4), new SparseVector(10, new int[]{1, 5, 6, 8}, new double[]{1.0, 1.0, 1.0, 2.1})); assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"double", "bool", "number", "str", "output"}, new TypeInformation<?>[] {Types.DOUBLE(), Types.BOOLEAN(), Types.STRING(), Types.STRING(), VectorTypes.VECTOR})); }
Example #5
Source File: BucketizerMapperTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void testMultiFeatures() throws Exception { TableSchema schema = new TableSchema(new String[] {"featureA", "featureB"}, new TypeInformation<?>[] {Types.LONG, Types.LONG}); Params params = new Params() .set(BucketizerParams.SELECTED_COLS, new String[] {"featureA", "featureB"}) .set(BucketizerParams.CUTS_ARRAY, cutsArray); BucketizerMapper mapper = new BucketizerMapper(schema, params); assertEquals(mapper.map(Row.of(-999.9, -999.9)).getField(1), 0L); assertEquals(mapper.map(Row.of(-0.5, -0.2)).getField(1), 1L); assertEquals(mapper.map(Row.of(-0.3, -0.6)).getField(1), 0L); assertEquals(mapper.map(Row.of(0.0, 0.0)).getField(1), 1L); assertEquals(mapper.map(Row.of(0.5, 0.4)).getField(1), 3L); assertEquals(mapper.map(Row.of(0.5, null)).getField(1), 5L); assertEquals(mapper.getOutputSchema(), schema); }
Example #6
Source File: BinarizerMapper.java From Alink with Apache License 2.0 | 6 votes |
public BinarizerMapper(TableSchema dataSchema, Params params) { super(dataSchema, params); this.threshold = this.params.get(BinarizerParams.THRESHOLD); selectedColType = TableUtil.findColTypeWithAssertAndHint( dataSchema, this.params.get(BinarizerParams.SELECTED_COL) ); if (TableUtil.isNumber(selectedColType)) { try { Constructor constructor = selectedColType.getTypeClass().getConstructor(String.class); objectValue0 = constructor.newInstance("0"); objectValue1 = constructor.newInstance("1"); } catch (Exception e) { throw new RuntimeException(e); } } }
Example #7
Source File: ClassificationEvaluationUtilTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void judgeEvaluationTypeTest(){ Params params = new Params() .set(HasPredictionDetailCol.PREDICTION_DETAIL_COL, "detail"); ClassificationEvaluationUtil.Type type = ClassificationEvaluationUtil.judgeEvaluationType(params); Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_DETAIL); params.set(HasPredictionCol.PREDICTION_COL, "pred"); type = ClassificationEvaluationUtil.judgeEvaluationType(params); Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_DETAIL); params.remove(HasPredictionDetailCol.PREDICTION_DETAIL_COL); type = ClassificationEvaluationUtil.judgeEvaluationType(params); Assert.assertEquals(type, ClassificationEvaluationUtil.Type.PRED_RESULT); params.remove(HasPredictionCol.PREDICTION_COL); thrown.expect(RuntimeException.class); thrown.expectMessage("Error Input, must give either predictionCol or predictionDetailCol!"); ClassificationEvaluationUtil.judgeEvaluationType(params); }
Example #8
Source File: SelectMapperTest.java From Alink with Apache License 2.0 | 6 votes |
@Test public void testGeneral() throws Exception { TableSchema dataSchema = TableSchema.builder().fields( new String[] {"id", "name"}, new DataType[] {DataTypes.INT(), DataTypes.STRING()}).build(); Params params = new Params(); params.set(HasClause.CLAUSE, "id, name as eman, id + 1 as id2, CASE WHEN id=1 THEN 'q' ELSE 'p' END as col3, UPPER(name) as col4"); SelectMapper selectMapper = new SelectMapper(dataSchema, params); selectMapper.open(); Row expected = Row.of(1, "'abc'", 2, "q", "'ABC'"); Row output = selectMapper.map(Row.of(1, "'abc'")); try { assertEquals(expected, output); } finally { selectMapper.close(); } }
Example #9
Source File: CsvSourceBatchOp.java From Alink with Apache License 2.0 | 5 votes |
public CsvSourceBatchOp(String filePath, String[] colNames, TypeInformation<?>[] colTypes, String fieldDelim, String rowDelim) { this(new Params() .set(FILE_PATH, filePath) .set(SCHEMA_STR, CsvUtil.schema2SchemaStr(new TableSchema(colNames, colTypes))) .set(FIELD_DELIMITER, fieldDelim) .set(ROW_DELIMITER, rowDelim) ); }
Example #10
Source File: ParamsTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void testColorAlias() { Params params = new Params() .set("enumType2", "green") .set("appendType", "DENSE"); Assert.assertEquals(Color.GREEN, params.get(HasEnumTypeColor.ENUM_TYPE_COLOR)); TestBatchOpColor testBatchOp = new TestBatchOpColor(params); Assert.assertEquals(Color.GREEN, testBatchOp.getEnumTypeColor()); Assert.assertEquals(Color.GREEN, testBatchOp.get(HasEnumTypeColor.ENUM_TYPE_COLOR)); }
Example #11
Source File: NaiveBayesTextModelDataConverter.java From Alink with Apache License 2.0 | 5 votes |
/** * Deserialize the model data. * * @param meta The model meta data. * @param data The model data. * @param distinctLabels The labels. * @return The model data used by mapper. */ @Override public NaiveBayesTextPredictModelData deserializeModel(Params meta, Iterable<String> data, Iterable<Object> distinctLabels) { NaiveBayesTextPredictModelData modelData = new NaiveBayesTextPredictModelData(); modelData.meta = meta; String json = data.iterator().next(); NaiveBayesTextProbInfo dataInfo = JsonConverter.fromJson(json, NaiveBayesTextProbInfo.class); modelData.pi = dataInfo.piArray; modelData.theta = dataInfo.theta; modelData.label = Iterables.toArray(distinctLabels, Object.class); modelData.vectorColName = modelData.meta.get(NaiveBayesTextTrainParams.VECTOR_COL); modelData.modelType = modelData.meta.get(NaiveBayesTextTrainParams.MODEL_TYPE); modelData.featLen = modelData.theta.numCols(); int rowSize = modelData.theta.numRows(); modelData.phi = new double[rowSize]; modelData.minMat = new DenseMatrix(rowSize, modelData.featLen); //construct special model data for the bernoulli model. if (ModelType.Bernoulli.equals(modelData.modelType)) { for (int i = 0; i < rowSize; ++i) { for (int j = 0; j < modelData.featLen; ++j) { double tmp = Math.log(1 - Math.exp(modelData.theta.get(i, j))); modelData.phi[i] += tmp; modelData.minMat.set(i, j, modelData.theta.get(i, j) - tmp); } } } return modelData; }
Example #12
Source File: JdbcDB.java From Alink with Apache License 2.0 | 5 votes |
@Override public void sinkBatch(String tableName, Table in, Params parameter, Long sessionId) { dropAndCreateTable(this, tableName, in, parameter); TableSchema schema = in.getSchema(); String[] colNames = schema.getFieldNames(); StringBuilder sbd = new StringBuilder(); sbd.append("INSERT INTO ").append(tableName).append(" (").append(colNames[0]); for (int i = 1; i < colNames.length; i++) { sbd.append(",").append(colNames[i]); } sbd.append(") VALUES (?"); for (int i = 1; i < colNames.length; i++) { sbd.append(",").append("?"); } sbd.append(")"); JDBCAppendTableSink jdbcAppendTableSink = JDBCAppendTableSink.builder() .setUsername(getUserName()) .setPassword(getPassword()) .setDrivername(getDriverName()) .setDBUrl(getDbUrl()) .setQuery(sbd.toString()) .setParameterTypes(schema.getFieldTypes()) .build(); jdbcAppendTableSink.emitDataSet(BatchOperator.fromTable(in).setMLEnvironmentId(sessionId).getDataSet()); }
Example #13
Source File: MySqlDB.java From Alink with Apache License 2.0 | 5 votes |
public MySqlDB(Params params) { this(params.get(MySqlDBParams.DB_NAME), params.get(MySqlDBParams.IP), params.get(MySqlDBParams.PORT), params.get(MySqlDBParams.USERNAME), params.get(MySqlDBParams.PASSWORD)); }
Example #14
Source File: IsotonicRegressionModelMapperTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void testRowData() throws Exception { Row[] rows = new Row[] { Row.of(0L, "{\"vectorColName\":\"\\\"vector\\\"\",\"modelName\":\"\\\"IsotonicRegressionModel\\\"\"," + "\"featureColName\":null,\"featureIndex\":\"0\",\"modelSchema\":\"\\\"model_id bigint,model_info " + "string\\\"\",\"isNewFormat\":\"true\"}\n"), Row.of(1048576L, "[0.02,0.1,0.2,0.27,0.3,0.35,0.45,0.5,0.7,0.8,0.9]"), Row.of(2097152L, "[0.0,0.3333333333333333,0.3333333333333333,0.5,0.5,0.6666666666666666,0.6666666666666666,0.75,0.75," + "1.0,1.0]") }; List <Row> model = Arrays.asList(rows); TableSchema modelSchema = new TableSchema(new String[] {"model_id", "model_info"}, new TypeInformation[] {Types.LONG, Types.STRING}); TableSchema dataSchema = new TableSchema(new String[] {"vector"}, new TypeInformation <?>[] {Types.DOUBLE}); Params params = new Params() .set(IsotonicRegPredictParams.PREDICTION_COL, "pred"); IsotonicRegressionModelMapper mapper = new IsotonicRegressionModelMapper(modelSchema, dataSchema, params); mapper.loadModel(model); assertEquals(Double.parseDouble(mapper.map(Row.of("0.81, 0.35")).getField(1).toString()), 1.0, 0.01); assertEquals(mapper.getOutputSchema(), new TableSchema(new String[] {"vector", "pred"}, new TypeInformation <?>[] {Types.DOUBLE, Types.DOUBLE})); }
Example #15
Source File: LabeledModelDataConverter.java From Alink with Apache License 2.0 | 5 votes |
@Override public M2 load(List<Row> rows) { Tuple2<Params, Iterable<String>> metaAndData = extractModelMetaAndData(rows); Iterable<Object> labels = extractAuxiliaryData(rows, true); return deserializeModel(metaAndData.f0, metaAndData.f1, labels); }
Example #16
Source File: HiveDB.java From Alink with Apache License 2.0 | 5 votes |
@Override public Table getBatchTable(String tableName, Params parameter, Long sessionId) throws Exception { ExecutionEnvironment env = MLEnvironmentFactory.get(sessionId).getExecutionEnvironment(); HiveBatchSource hiveTableSource = getHiveBatchSource(tableName, parameter); DataSet<BaseRow> dataSet = hiveTableSource.getDataSet(env); TableSchema schema = hiveTableSource.getTableSchema(); final DataType[] dataTypes = schema.getFieldDataTypes(); DataSet<Row> rows = dataSet.map(new BaseRowToRow(dataTypes)); Table tbl = DataSetConversionUtil.toTable(sessionId, rows, schema); if (getPartitionCols(tableName).size() > 0) { // remove static partition columns String[] fieldNames = getColNames(tableName); tbl = tbl.select(Strings.join(fieldNames, ",")); } return tbl; }
Example #17
Source File: MultiStringIndexerModelDataConverter.java From Alink with Apache License 2.0 | 5 votes |
@Override public void save(Tuple2<Params, Iterable<Tuple3<Integer, String, Long>>> modelData, Collector<Row> collector) { if (modelData.f0 != null) { collector.collect(Row.of(-1L, modelData.f0.toJson(), null)); } modelData.f1.forEach(tuple -> { collector.collect(Row.of(tuple.f0.longValue(), tuple.f1, tuple.f2)); }); }
Example #18
Source File: LdaModelDataConverterTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void ldaModelDataConverterTest() { Tuple2<Params, Iterable<String>> res = converter.serializeModel(generateLdaModelData()); LdaModelData modelData = converter.deserializeModel(res.f0, res.f1); assertEquals(modelData.alpha, new Double[]{0.2, 0.2, 0.2, 0.2, 0.2}); assertEquals(modelData.list, generateDocData()); }
Example #19
Source File: BinarizerMapperTest.java From Alink with Apache License 2.0 | 5 votes |
@Test public void test3() throws Exception { TableSchema schema = new TableSchema(new String[] {"feature"}, new TypeInformation<?>[] {Types.DOUBLE}); Params params = new Params() .set(BinarizerParams.SELECTED_COL, "feature"); BinarizerMapper mapper = new BinarizerMapper(schema, params); assertEquals(mapper.map(Row.of(0.6)).getField(0), 1.0); assertEquals(mapper.getOutputSchema(), schema); }
Example #20
Source File: BaseFormatTrans.java From Alink with Apache License 2.0 | 5 votes |
public BaseFormatTrans(FormatType fromFormat, FormatType toFormat, Params params) { this( (null == params ? new Params() : params) .set(FormatTransParams.FROM_FORMAT, fromFormat) .set(FormatTransParams.TO_FORMAT, toFormat) ); }
Example #21
Source File: FeatureSplitter.java From Alink with Apache License 2.0 | 5 votes |
public FeatureSplitter( Params params, DenseData data, FeatureMeta featureMeta, SequentialPartition partition) { this.params = params; this.data = data; this.featureMeta = featureMeta; this.partition = partition; this.minSamplesPerLeaf = params.get(HasMinSamplesPerLeaf.MIN_SAMPLES_PER_LEAF); this.minSampleRatioPerChild = params.get(HasMinSampleRatioPerChild.MIN_SAMPLE_RATIO_PERCHILD); this.minInfoGain = params.get(HasMinInfoGain.MIN_INFO_GAIN); this.maxDepth = params.get(HasMaxDepth.MAX_DEPTH); this.maxLeaves = params.get(HasMaxLeaves.MAX_LEAVES); }
Example #22
Source File: LinearModelData.java From Alink with Apache License 2.0 | 5 votes |
/** * Construct function. * @param labelType label Type. * @param meta meta information of model. * @param featureNames the feature column names. * @param coefVector */ public LinearModelData(TypeInformation labelType, Params meta, String[] featureNames, DenseVector coefVector) { this.labelType = labelType; this.coefVector = coefVector; this.featureNames = featureNames; if (meta.contains(ModelParamName.LABEL_VALUES)) { this.labelValues = FeatureLabelUtil.recoverLabelType(meta.get(ModelParamName.LABEL_VALUES), this.labelType); } setMetaInfo(meta); }
Example #23
Source File: PipelineStageBase.java From flink with Apache License 2.0 | 5 votes |
@Override public Params getParams() { if (null == this.params) { this.params = new Params(); } return this.params; }
Example #24
Source File: NumSeqSourceStreamOp.java From Alink with Apache License 2.0 | 5 votes |
public NumSeqSourceStreamOp(long from, long to, String colName, Double[] timeZones, Params params) { super(params); DataStreamSource<Long> seq = MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamExecutionEnvironment().generateSequence(from, to); DataStream<Long> data = seq.map(new transform(timeZones)); this.setOutputTable(MLEnvironmentFactory.get(getMLEnvironmentId()).getStreamTableEnvironment().fromDataStream(data, colName)); }
Example #25
Source File: LinearSvmPredictStreamOp.java From Alink with Apache License 2.0 | 4 votes |
public LinearSvmPredictStreamOp(BatchOperator model, Params params) { super(model, LinearModelMapper::new, params); }
Example #26
Source File: DecisionTreePredictBatchOp.java From Alink with Apache License 2.0 | 4 votes |
public DecisionTreePredictBatchOp(Params params) { super(RandomForestModelMapper::new, params); }
Example #27
Source File: KvToColumnsBatchOp.java From Alink with Apache License 2.0 | 4 votes |
public KvToColumnsBatchOp() { this(new Params()); }
Example #28
Source File: JsonValueStreamOp.java From Alink with Apache License 2.0 | 4 votes |
public JsonValueStreamOp(Params param) { super(JsonPathMapper::new, param); }
Example #29
Source File: AnnotationUtilsTest.java From Alink with Apache License 2.0 | 4 votes |
public FakeOp2(Params params) { super(params); }
Example #30
Source File: KvToCsvStreamOp.java From Alink with Apache License 2.0 | 4 votes |
public KvToCsvStreamOp() { this(new Params()); }