Java Code Examples for org.apache.kylin.metadata.model.PartitionDesc#isPartitioned()
The following examples show how to use
org.apache.kylin.metadata.model.PartitionDesc#isPartitioned() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SegmentPruner.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) { DataModelDesc model = seg.getModel(); PartitionDesc part = model.getPartitionDesc(); if (!part.isPartitioned()) return null; if (!col.equals(part.getPartitionDateColumnRef())) return null; // deduce the dim range from TSRange TSRange tsRange = seg.getTSRange(); if (tsRange.start.isMin || tsRange.end.isMax) return null; // DimensionRangeInfo cannot express infinite String min = tsRangeToStr(tsRange.start.v, part); String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive return new DimensionRangeInfo(min, max); }
Example 2
Source File: KeyValueBuilder.java From kylin with Apache License 2.0 | 6 votes |
/** * Use the segment start time as the map key, the time unit depends on the partition columns * If the partition_time_column is null, the unit is day; * otherwise, the unit is second */ private String getSegmentStartTime(CubeSegment segment) { long startTime = segment.getTSRange().start.v; DataModelDesc model = segment.getModel(); PartitionDesc partitionDesc = model.getPartitionDesc(); if (partitionDesc == null || !partitionDesc.isPartitioned()) { return "0"; } else if (partitionDesc.partitionColumnIsTimeMillis()) { return "" + startTime; } else if (partitionDesc.getPartitionTimeColumnRef() != null) { return "" + startTime / 1000L; } else if (partitionDesc.getPartitionDateColumnRef() != null) { return "" + startTime / 86400000L; } return "0"; }
Example 3
Source File: SegmentPruner.java From kylin with Apache License 2.0 | 6 votes |
public static DimensionRangeInfo tryDeduceRangeFromPartitionCol(CubeSegment seg, TblColRef col) { DataModelDesc model = seg.getModel(); PartitionDesc part = model.getPartitionDesc(); if (!part.isPartitioned()) return null; if (!col.equals(part.getPartitionDateColumnRef())) return null; // deduce the dim range from TSRange TSRange tsRange = seg.getTSRange(); if (tsRange.start.isMin || tsRange.end.isMax) return null; // DimensionRangeInfo cannot express infinite String min = tsRangeToStr(tsRange.start.v, part); String max = tsRangeToStr(tsRange.end.v - 1, part); // note the -1, end side is exclusive return new DimensionRangeInfo(min, max); }
Example 4
Source File: CubeManager.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
CubeSegment appendSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange, Map<Integer, Long> sourcePartitionOffsetStart, Map<Integer, Long> sourcePartitionOffsetEnd) throws IOException { CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy checkInputRanges(tsRange, segRange); // fix start/end a bit PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc(); if (partitionDesc != null && partitionDesc.isPartitioned()) { // if missing start, set it to where last time ends if (tsRange != null && tsRange.start.v == 0) { CubeDesc cubeDesc = cubeCopy.getDescriptor(); CubeSegment last = cubeCopy.getLastSegment(); if (last == null) tsRange = new TSRange(cubeDesc.getPartitionDateStart(), tsRange.end.v); else if (!last.isOffsetCube()) tsRange = new TSRange(last.getTSRange().end.v, tsRange.end.v); } } else { // full build tsRange = null; segRange = null; } CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange); newSegment.setSourcePartitionOffsetStart(sourcePartitionOffsetStart); newSegment.setSourcePartitionOffsetEnd(sourcePartitionOffsetEnd); validateNewSegments(cubeCopy, newSegment); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToAddSegs(newSegment); updateCube(update); return newSegment; }
Example 5
Source File: CubeManager.java From kylin with Apache License 2.0 | 5 votes |
CubeSegment appendSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange, Map<Integer, Long> sourcePartitionOffsetStart, Map<Integer, Long> sourcePartitionOffsetEnd) throws IOException { CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy checkInputRanges(tsRange, segRange); // fix start/end a bit PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc(); if (partitionDesc != null && partitionDesc.isPartitioned()) { // if missing start, set it to where last time ends if (tsRange != null && tsRange.start.v == 0) { CubeDesc cubeDesc = cubeCopy.getDescriptor(); CubeSegment last = cubeCopy.getLastSegment(); if (last == null) tsRange = new TSRange(cubeDesc.getPartitionDateStart(), tsRange.end.v); else if (!last.isOffsetCube()) tsRange = new TSRange(last.getTSRange().end.v, tsRange.end.v); } } else { // full build tsRange = null; segRange = null; } CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange); newSegment.setSourcePartitionOffsetStart(sourcePartitionOffsetStart); newSegment.setSourcePartitionOffsetEnd(sourcePartitionOffsetEnd); validateNewSegments(cubeCopy, newSegment); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToAddSegs(newSegment); updateCube(update); return newSegment; }
Example 6
Source File: JdbcHiveInputBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) { KylinConfig config = getConfig(); PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc(); String partCol = null; if (partitionDesc.isPartitioned()) { partCol = partitionDesc.getPartitionDateColumn();//tablename.colname } String splitTableAlias; String splitColumn; String splitDatabase; TblColRef splitColRef = determineSplitColumn(); splitTableAlias = splitColRef.getTableAlias(); splitColumn = getColumnIdentityQuoted(splitColRef, jdbcMetadataDialect, metaMap, true); splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase(); String selectSql = generateSelectDataStatementRDBMS(flatDesc, true, new String[] { partCol }, jdbcMetadataDialect, metaMap); selectSql = escapeQuotationInSql(selectSql); String hiveTable = flatDesc.getTableName(); String connectionUrl = config.getJdbcSourceConnectionUrl(); String driverClass = config.getJdbcSourceDriver(); String jdbcUser = config.getJdbcSourceUser(); String jdbcPass = config.getJdbcSourcePass(); String sqoopHome = config.getSqoopHome(); String sqoopNullString = config.getSqoopNullString(); String sqoopNullNonString = config.getSqoopNullNonString(); String filedDelimiter = config.getJdbcSourceFieldDelimiter(); int mapperNum = config.getSqoopMapperNum(); String bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s ", splitColumn, splitColumn, getSchemaQuoted(metaMap, splitDatabase, jdbcMetadataDialect, true), getTableIdentityQuoted(splitColRef.getTableRef(), metaMap, jdbcMetadataDialect, true)); if (partitionDesc.isPartitioned()) { SegmentRange segRange = flatDesc.getSegRange(); if (segRange != null && !segRange.isInfinite()) { if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias) && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) { String quotedPartCond = partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition( partitionDesc, flatDesc.getSegment(), segRange, col -> getTableColumnIdentityQuoted(col, jdbcMetadataDialect, metaMap, true)); bquery += " WHERE " + quotedPartCond; } } } bquery = escapeQuotationInSql(bquery); // escape ` in cmd splitColumn = escapeQuotationInSql(splitColumn); String cmd = String.format(Locale.ROOT, "%s/bin/sqoop import" + generateSqoopConfigArgString() + "--connect \"%s\" --driver %s --username %s --password \"%s\" --query \"%s AND \\$CONDITIONS\" " + "--target-dir %s/%s --split-by %s --boundary-query \"%s\" --null-string '%s' " + "--null-non-string '%s' --fields-terminated-by '%s' --num-mappers %d", sqoopHome, connectionUrl, driverClass, jdbcUser, jdbcPass, selectSql, jobWorkingDir, hiveTable, splitColumn, bquery, sqoopNullString, sqoopNullNonString, filedDelimiter, mapperNum); logger.debug("sqoop cmd : {}", cmd); CmdStep step = new CmdStep(); step.setCmd(cmd); step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE); return step; }
Example 7
Source File: JdbcHiveInputBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
@Override protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) { KylinConfig config = flatDesc.getDataModel().getConfig(); PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc(); String partCol = null; boolean enableQuote = dataSource.getSqlConverter().getConfigurer().enableQuote(); SqlDialect sqlDialect = enableQuote ? dataSource.getSqlConverter().getConfigurer().getSqlDialect() : FlatTableSqlQuoteUtils.NON_QUOTE_DIALECT; SqlConverter.IConfigurer iconfigurer = dataSource.getSqlConverter().getConfigurer(); if (partitionDesc.isPartitioned()) { partCol = partitionDesc.getPartitionDateColumn(); //tablename.colname } String splitTable; String splitTableAlias; String splitColumn; String splitDatabase; TblColRef splitColRef = determineSplitColumn(); splitTable = splitColRef.getTableRef().getTableDesc().getName(); splitTableAlias = splitColRef.getTableAlias(); splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase(); if (enableQuote) { splitColumn = sqlDialect.quoteIdentifier(splitColRef.getTableAlias()) + "." + sqlDialect.quoteIdentifier(splitColRef.getName()); splitDatabase = sqlDialect.quoteIdentifier(splitDatabase); splitTable = sqlDialect.quoteIdentifier(splitTable); splitTableAlias = sqlDialect.quoteIdentifier(splitTableAlias); } else { splitColumn = splitColRef.getTableAlias() + "." + splitColRef.getName(); } String selectSql = JoinedFlatTable.generateSelectDataStatement(flatDesc, true, new String[]{partCol}, sqlDialect); selectSql = escapeQuotationInSql(dataSource.convertSql(selectSql)); String hiveTable = flatDesc.getTableName(); String filedDelimiter = config.getJdbcSourceFieldDelimiter(); int mapperNum = config.getSqoopMapperNum(); String bquery; bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s as %s", splitColumn, splitColumn, splitDatabase, splitTable, splitTableAlias); bquery = dataSource.convertSql(bquery); if (partitionDesc.isPartitioned()) { SegmentRange segRange = flatDesc.getSegRange(); if (segRange != null && !segRange.isInfinite()) { if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias) && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) { String quotedPartCond = FlatTableSqlQuoteUtils.quoteIdentifierInSqlExpr(flatDesc, partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition(partitionDesc, flatDesc.getSegment(), segRange, null), sqlDialect); bquery += " WHERE " + quotedPartCond; } } } bquery = escapeQuotationInSql(bquery); splitColumn = escapeQuotationInSql(splitColumn); String cmd = StringUtils.format( "--connect \"%s\" --driver \"%s\" --username \"%s\" --password \"%s\" --query \"%s AND \\$CONDITIONS\" " + "--target-dir \"%s/%s\" --split-by \"%s\" --boundary-query \"%s\" --null-string '' " + "--fields-terminated-by '%s' --num-mappers %d", dataSource.getJdbcUrl(), dataSource.getJdbcDriver(), dataSource.getJdbcUser(), dataSource.getJdbcPassword(), selectSql, jobWorkingDir, hiveTable, splitColumn, bquery, filedDelimiter, mapperNum); if (iconfigurer.getTransactionIsolationLevel() != null) { cmd = cmd + " --relaxed-isolation --metadata-transaction-isolation-level " + iconfigurer.getTransactionIsolationLevel(); } logger.debug("sqoop cmd: {}", cmd); SqoopCmdStep step = new SqoopCmdStep(); step.setCmd(cmd); step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE); return step; }
Example 8
Source File: GTCubeStorageQueryBase.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
private boolean isExactAggregation(StorageContext context, Cuboid cuboid, Collection<TblColRef> groups, Set<TblColRef> othersD, Set<TblColRef> singleValuesD, Set<TblColRef> derivedPostAggregation, Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls, boolean groupByExpression) { if (context.isNeedStorageAggregation()) { logger.info("exactAggregation is false because need storage aggregation"); return false; } if (cuboid.requirePostAggregation()) { logger.info("exactAggregation is false because cuboid {}=>{}", cuboid.getInputID(), cuboid.getId()); return false; } // derived aggregation is bad, unless expanded columns are already in group by if (!groups.containsAll(derivedPostAggregation)) { logger.info("exactAggregation is false because derived column require post aggregation: {}", derivedPostAggregation); return false; } // other columns (from filter) is bad, unless they are ensured to have single value if (!singleValuesD.containsAll(othersD)) { logger.info("exactAggregation is false because some column not on group by: {} (single value column: {})", othersD, singleValuesD); return false; } //for DimensionAsMetric like max(cal_dt), the dimension column maybe not in real group by for (FunctionDesc functionDesc : functionDescs) { if (functionDesc.isDimensionAsMetric()) { logger.info("exactAggregation is false because has DimensionAsMetric"); return false; } } for (SQLDigest.SQLCall aggrSQLCall : aggrSQLCalls) { if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT) || aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_VALUE)) { logger.info("exactAggregation is false because has INTERSECT_COUNT OR INTERSECT_VALUE"); return false; } } // for partitioned cube, the partition column must belong to group by or has single value PartitionDesc partDesc = cuboid.getCubeDesc().getModel().getPartitionDesc(); if (partDesc.isPartitioned()) { TblColRef col = partDesc.getPartitionDateColumnRef(); if (!groups.contains(col) && !singleValuesD.contains(col)) { logger.info("exactAggregation is false because cube is partitioned and {} is not on group by", col); return false; } } // for group by expression like: group by seller_id/100. seller_id_1(200) get 2, seller_id_2(201) also get 2, so can't aggregate exactly if (groupByExpression) { logger.info("exactAggregation is false because group by expression"); return false; } logger.info("exactAggregation is true, cuboid id is {}", cuboid.getId()); return true; }
Example 9
Source File: CubeManager.java From kylin-on-parquet-v2 with Apache License 2.0 | 4 votes |
public CubeSegment refreshSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange) throws IOException { CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy checkInputRanges(tsRange, segRange); PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc(); if (partitionDesc == null || partitionDesc.isPartitioned() == false) { // full build tsRange = null; segRange = null; } CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange); Pair<Boolean, Boolean> pair = cubeCopy.getSegments().fitInSegments(newSegment); if (pair.getFirst() == false || pair.getSecond() == false) throw new IllegalArgumentException("The new refreshing segment " + newSegment + " does not match any existing segment in cube " + cubeCopy); if (segRange != null) { CubeSegment toRefreshSeg = null; for (CubeSegment cubeSegment : cubeCopy.getSegments()) { if (cubeSegment.getSegRange().equals(segRange)) { toRefreshSeg = cubeSegment; break; } } if (toRefreshSeg == null) { throw new IllegalArgumentException( "For streaming cube, only one segment can be refreshed at one time"); } newSegment.setSourcePartitionOffsetStart(toRefreshSeg.getSourcePartitionOffsetStart()); newSegment.setSourcePartitionOffsetEnd(toRefreshSeg.getSourcePartitionOffsetEnd()); } CubeUpdate update = new CubeUpdate(cubeCopy); update.setToAddSegs(newSegment); updateCube(update); return newSegment; }
Example 10
Source File: JdbcHiveInputBase.java From kylin with Apache License 2.0 | 4 votes |
protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) { KylinConfig config = getConfig(); PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc(); String partCol = null; if (partitionDesc.isPartitioned()) { partCol = partitionDesc.getPartitionDateColumn();//tablename.colname } String splitTableAlias; String splitColumn; String splitDatabase; TblColRef splitColRef = determineSplitColumn(); splitTableAlias = splitColRef.getTableAlias(); splitColumn = getColumnIdentityQuoted(splitColRef, jdbcMetadataDialect, metaMap, true); splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase(); String selectSql = generateSelectDataStatementRDBMS(flatDesc, true, new String[] { partCol }, jdbcMetadataDialect, metaMap); selectSql = escapeQuotationInSql(selectSql); String hiveTable = flatDesc.getTableName(); String connectionUrl = config.getJdbcSourceConnectionUrl(); String driverClass = config.getJdbcSourceDriver(); String jdbcUser = config.getJdbcSourceUser(); String jdbcPass = config.getJdbcSourcePass(); String sqoopHome = config.getSqoopHome(); String sqoopNullString = config.getSqoopNullString(); String sqoopNullNonString = config.getSqoopNullNonString(); String filedDelimiter = config.getJdbcSourceFieldDelimiter(); int mapperNum = config.getSqoopMapperNum(); String bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s ", splitColumn, splitColumn, getSchemaQuoted(metaMap, splitDatabase, jdbcMetadataDialect, true), getTableIdentityQuoted(splitColRef.getTableRef(), metaMap, jdbcMetadataDialect, true)); if (partitionDesc.isPartitioned()) { SegmentRange segRange = flatDesc.getSegRange(); if (segRange != null && !segRange.isInfinite()) { if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias) && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) { String quotedPartCond = partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition( partitionDesc, flatDesc.getSegment(), segRange, col -> getTableColumnIdentityQuoted(col, jdbcMetadataDialect, metaMap, true)); bquery += " WHERE " + quotedPartCond; } } } bquery = escapeQuotationInSql(bquery); // escape ` in cmd splitColumn = escapeQuotationInSql(splitColumn); String cmd = String.format(Locale.ROOT, "%s/bin/sqoop import" + generateSqoopConfigArgString() + "--connect \"%s\" --driver %s --username %s --password \"%s\" --query \"%s AND \\$CONDITIONS\" " + "--target-dir %s/%s --split-by %s --boundary-query \"%s\" --null-string '%s' " + "--null-non-string '%s' --fields-terminated-by '%s' --num-mappers %d", sqoopHome, connectionUrl, driverClass, jdbcUser, jdbcPass, selectSql, jobWorkingDir, hiveTable, splitColumn, bquery, sqoopNullString, sqoopNullNonString, filedDelimiter, mapperNum); logger.debug("sqoop cmd : {}", cmd); CmdStep step = new CmdStep(); step.setCmd(cmd); step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE); return step; }
Example 11
Source File: JdbcHiveInputBase.java From kylin with Apache License 2.0 | 4 votes |
@Override protected AbstractExecutable createSqoopToFlatHiveStep(String jobWorkingDir, String cubeName) { KylinConfig config = flatDesc.getDataModel().getConfig(); PartitionDesc partitionDesc = flatDesc.getDataModel().getPartitionDesc(); String partCol = null; boolean enableQuote = dataSource.getSqlConverter().getConfigurer().enableQuote(); enableQuote = enableQuote && config.enableHiveDdlQuote(); logger.debug("Quote switch is set to {}", enableQuote); SqlDialect sqlDialect = enableQuote ? dataSource.getSqlConverter().getConfigurer().getSqlDialect() : FlatTableSqlQuoteUtils.NON_QUOTE_DIALECT; SqlConverter.IConfigurer iconfigurer = dataSource.getSqlConverter().getConfigurer(); if (partitionDesc.isPartitioned()) { partCol = partitionDesc.getPartitionDateColumn(); //tablename.colname } String splitTable; String splitTableAlias; String splitColumn; String splitDatabase; TblColRef splitColRef = determineSplitColumn(); splitTable = splitColRef.getTableRef().getTableDesc().getName(); splitTableAlias = splitColRef.getTableAlias(); splitDatabase = splitColRef.getColumnDesc().getTable().getDatabase(); if (enableQuote) { splitColumn = sqlDialect.quoteIdentifier(splitColRef.getTableAlias()) + "." + sqlDialect.quoteIdentifier(splitColRef.getName()); splitDatabase = sqlDialect.quoteIdentifier(splitDatabase); splitTable = sqlDialect.quoteIdentifier(splitTable); splitTableAlias = sqlDialect.quoteIdentifier(splitTableAlias); } else { splitColumn = splitColRef.getTableAlias() + "." + splitColRef.getName(); } String selectSql = JoinedFlatTable.generateSelectDataStatement(flatDesc, true, new String[]{partCol}, sqlDialect); selectSql = escapeQuotationInSql(dataSource.convertSql(selectSql)); String hiveTable = flatDesc.getTableName(); String filedDelimiter = config.getJdbcSourceFieldDelimiter(); int mapperNum = config.getSqoopMapperNum(); String bquery; bquery = String.format(Locale.ROOT, "SELECT min(%s), max(%s) FROM %s.%s as %s", splitColumn, splitColumn, splitDatabase, splitTable, splitTableAlias); bquery = dataSource.convertSql(bquery); if (partitionDesc.isPartitioned()) { SegmentRange segRange = flatDesc.getSegRange(); if (segRange != null && !segRange.isInfinite()) { if (partitionDesc.getPartitionDateColumnRef().getTableAlias().equals(splitTableAlias) && (partitionDesc.getPartitionTimeColumnRef() == null || partitionDesc .getPartitionTimeColumnRef().getTableAlias().equals(splitTableAlias))) { String quotedPartCond = FlatTableSqlQuoteUtils.quoteIdentifierInSqlExpr(flatDesc, partitionDesc.getPartitionConditionBuilder().buildDateRangeCondition(partitionDesc, flatDesc.getSegment(), segRange, null), sqlDialect); bquery += " WHERE " + quotedPartCond; } } } bquery = escapeQuotationInSql(bquery); splitColumn = escapeQuotationInSql(splitColumn); String cmd = StringUtils.format( "--connect \"%s\" --driver \"%s\" --username \"%s\" --password \"%s\" --query \"%s AND \\$CONDITIONS\" " + "--target-dir \"%s/%s\" --split-by \"%s\" --boundary-query \"%s\" --null-string '' " + "--fields-terminated-by '%s' --num-mappers %d", dataSource.getJdbcUrl(), dataSource.getJdbcDriver(), dataSource.getJdbcUser(), dataSource.getJdbcPassword(), selectSql, jobWorkingDir, hiveTable, splitColumn, bquery, filedDelimiter, mapperNum); if (iconfigurer.getTransactionIsolationLevel() != null) { cmd = cmd + " --relaxed-isolation --metadata-transaction-isolation-level " + iconfigurer.getTransactionIsolationLevel(); } logger.debug("sqoop cmd: {}", cmd); SqoopCmdStep step = new SqoopCmdStep(); step.setCmd(cmd); step.setName(ExecutableConstants.STEP_NAME_SQOOP_TO_FLAT_HIVE_TABLE); return step; }
Example 12
Source File: GTCubeStorageQueryBase.java From kylin with Apache License 2.0 | 4 votes |
private boolean isExactAggregation(StorageContext context, Cuboid cuboid, Collection<TblColRef> groups, Set<TblColRef> othersD, Set<TblColRef> singleValuesD, Set<TblColRef> derivedPostAggregation, Collection<FunctionDesc> functionDescs, List<SQLDigest.SQLCall> aggrSQLCalls, boolean groupByExpression) { if (context.isNeedStorageAggregation()) { logger.info("exactAggregation is false because need storage aggregation"); return false; } if (cuboid.requirePostAggregation()) { logger.info("exactAggregation is false because cuboid {}=>{}", cuboid.getInputID(), cuboid.getId()); return false; } // derived aggregation is bad, unless expanded columns are already in group by if (!groups.containsAll(derivedPostAggregation)) { logger.info("exactAggregation is false because derived column require post aggregation: {}", derivedPostAggregation); return false; } // other columns (from filter) is bad, unless they are ensured to have single value if (!singleValuesD.containsAll(othersD)) { logger.info("exactAggregation is false because some column not on group by: {} (single value column: {})", othersD, singleValuesD); return false; } //for DimensionAsMetric like max(cal_dt), the dimension column maybe not in real group by for (FunctionDesc functionDesc : functionDescs) { if (functionDesc.isDimensionAsMetric()) { logger.info("exactAggregation is false because has DimensionAsMetric"); return false; } } for (SQLDigest.SQLCall aggrSQLCall : aggrSQLCalls) { if (aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_COUNT_DISTINCT) || aggrSQLCall.function.equals(BitmapMeasureType.FUNC_INTERSECT_VALUE)) { logger.info("exactAggregation is false because has INTERSECT_COUNT OR INTERSECT_VALUE"); return false; } } // for partitioned cube, the partition column must belong to group by or has single value PartitionDesc partDesc = cuboid.getCubeDesc().getModel().getPartitionDesc(); if (partDesc.isPartitioned()) { TblColRef col = partDesc.getPartitionDateColumnRef(); if (!groups.contains(col) && !singleValuesD.contains(col)) { logger.info("exactAggregation is false because cube is partitioned and {} is not on group by", col); return false; } } // for group by expression like: group by seller_id/100. seller_id_1(200) get 2, seller_id_2(201) also get 2, so can't aggregate exactly if (groupByExpression) { logger.info("exactAggregation is false because group by expression"); return false; } logger.info("exactAggregation is true, cuboid id is {}", cuboid.getId()); return true; }
Example 13
Source File: CubeManager.java From kylin with Apache License 2.0 | 4 votes |
public CubeSegment refreshSegment(CubeInstance cube, TSRange tsRange, SegmentRange segRange) throws IOException { CubeInstance cubeCopy = cube.latestCopyForWrite(); // get a latest copy checkInputRanges(tsRange, segRange); PartitionDesc partitionDesc = cubeCopy.getModel().getPartitionDesc(); if (partitionDesc == null || partitionDesc.isPartitioned() == false) { // full build tsRange = null; segRange = null; } CubeSegment newSegment = newSegment(cubeCopy, tsRange, segRange); Pair<Boolean, Boolean> pair = cubeCopy.getSegments().fitInSegments(newSegment); if (pair.getFirst() == false || pair.getSecond() == false) throw new IllegalArgumentException("The new refreshing segment " + newSegment + " does not match any existing segment in cube " + cubeCopy); if (segRange != null) { CubeSegment toRefreshSeg = null; for (CubeSegment cubeSegment : cubeCopy.getSegments()) { if (cubeSegment.getSegRange().equals(segRange)) { toRefreshSeg = cubeSegment; break; } } if (toRefreshSeg == null) { throw new IllegalArgumentException( "For streaming cube, only one segment can be refreshed at one time"); } newSegment.setSourcePartitionOffsetStart(toRefreshSeg.getSourcePartitionOffsetStart()); newSegment.setSourcePartitionOffsetEnd(toRefreshSeg.getSourcePartitionOffsetEnd()); } CubeUpdate update = new CubeUpdate(cubeCopy); update.setToAddSegs(newSegment); updateCube(update); return newSegment; }