Posted by Akilis on 26 Jun, 2024
项目自身仅修改附加模板、数据文件,不修改 Parser.jj 文件;Calcite版本升级时只需要修改项目pom依赖的Calcite版本号,而不用去反复拷贝Calcite文件,合并Parser.jj。
```
<project>
<build>
<plugins>
<plugin>
<!-- Extract parser grammar template from calcite-core.jar and put
it under ${project.build.directory} where all freemarker templates are. -->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.8</version>
<executions>
<execution>
<id>unpack-parser-template</id>
<phase>initialize</phase>
<goals>
<goal>unpack</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-core</artifactId>
<type>jar</type>
<overWrite>true</overWrite>
<outputDirectory>${project.build.directory}/</outputDirectory>
<includes>**/Parser.jj</includes>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<!-- adding fmpp code gen -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-fmpp-resources</id>
<phase>initialize</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/codegen</outputDirectory>
<resources>
<resource>
<directory>src/main/codegen</directory>
<filtering>false</filtering>
</resource>
</resources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<!-- javacc + fmpp -->
<groupId>org.codehaus.mojo</groupId>
<artifactId>javacc-maven-plugin</artifactId>
<executions>
<execution>
<id>javacc</id>
<goals>
<goal>javacc</goal>
</goals>
<configuration>
<sourceDirectory>${project.build.directory}/generated-sources/fmpp</sourceDirectory>
<includes>
<include>**/Parser.jj</include>
</includes>
<lookAhead>2</lookAhead>
<isStatic>false</isStatic>
</configuration>
</execution>
<execution>
<id>javacc-test</id>
<phase>generate-test-sources</phase>
<goals>
<goal>javacc</goal>
</goals>
<configuration>
<sourceDirectory>${project.build.directory}/generated-test-sources/fmpp</sourceDirectory>
<outputDirectory>${project.build.directory}/generated-test-sources/javacc</outputDirectory>
<includes>
<include>**/Parser.jj</include>
</includes>
<lookAhead>2</lookAhead>
<isStatic>false</isStatic>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.drill.tools</groupId>
<artifactId>drill-fmpp-maven-plugin</artifactId>
<executions>
<execution>
<configuration>
<config>src/main/codegen/config.fmpp</config>
<output>${project.build.directory}/generated-sources/fmpp</output>
<templates>src/main/codegen/templates</templates>
</configuration>
<id>generate-fmpp-sources</id>
<phase>validate</phase>
<goals>
<goal>generate</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
```
```
找到
package: "org.apache.calcite.sql.parser.impl",
将下方的class,替换成一个你自己的类名,后面会用到。例如
class: "JackySqlParserImpl",
```
```
import org.apache.calcite.sql.SqlJacky;
...
SqlNode SqlJacky() :
{
SqlNode stringNode;
}
{
<JACKY> <JOB>
stringNode = StringLiteral()
{
return new SqlJacky(getPos(), token.image);
}
}
...
SqlNode SqlStmt():
...
stmt = SqlJacky()
<DEFAULT, DQID, BTID> TOKEN :
| < JACKY: "JACKY">
| < JOB: "JOB">
```
```
package org.apache.calcite.sql;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.util.SqlVisitor;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql.validate.SqlValidatorScope;
import org.apache.calcite.util.Litmus;
public class SqlJacky extends SqlNode {
private String jackyString;
private SqlParserPos pos;
public SqlJacky(SqlParserPos pos, String jackyString){
super(pos);
this.pos = pos;
this.jackyString = jackyString;
}
public String getJackyString(){
System.out.println("getJackyString");
return this.jackyString;
}
@Override
public SqlNode clone(SqlParserPos sqlParserPos) {
System.out.println("clone");
return null;
}
@Override
public void unparse(SqlWriter sqlWriter, int i, int i1) {
sqlWriter.keyword("jacky");
sqlWriter.keyword("job");
sqlWriter.print("\n");
sqlWriter.keyword("" + jackyString + "");
}
@Override
public void validate(SqlValidator sqlValidator, SqlValidatorScope sqlValidatorScope) {
System.out.println("validate");
}
@Override
public <R> R accept(SqlVisitor<R> sqlVisitor) {
System.out.println("accept");
return null;
}
@Override
public boolean equalsDeep(SqlNode sqlNode, Litmus litmus) {
System.out.println("equalsDeep");
return false;
}
}
```
```
package cn.flinkhub;
import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.avatica.util.Quoting;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.sql.parser.impl.JackySqlParserImpl;
public class CustomParser {
public static void main(String[] args) {
SchemaPlus rootSchema = Frameworks.createRootSchema(true);
final FrameworkConfig config = Frameworks.newConfigBuilder()
.parserConfig(SqlParser.configBuilder()
//.setLex(Lex.ORACLE)
.setParserFactory(JackySqlParserImpl.FACTORY)
.setCaseSensitive(false)
.setQuoting(Quoting.BACK_TICK)
.setQuotedCasing(Casing.TO_UPPER)
.setUnquotedCasing(Casing.TO_UPPER)
//.setConformance(SqlConformanceEnum.ORACLE_12)
.build())
.build();
// "jacky 'select ids, name from test where id < 5'";
String sql = "jacky job 'select ids, name from test where id < 5'";
SqlParser parser = SqlParser.create(sql, config.getParserConfig());
try {
SqlNode sqlNode = parser.parseStmt();
System.out.println(sqlNode.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
}
```
mvn clean generate-sources -pl '<group>:<artifact>' -am -DskipTests -Dcicheck.skip=true -Dcheckstyle.skip=true
可借助 Calcite built-in SqlParserTest 测试工具,高效、严谨测试。
```
<dependency>
<groupId>org.apache.calcite</groupId>
<artifactId>calcite-testkit</artifactId>
<version>${calcite.version}</version>
</dependency>
```
c1 as d
不是合法的 expr,作用于relation, 只能用于 Statement解析, 例如 Select c1 as d
.```
// Litmus: IGNORE, THROW
SqlNode#equalsDeep(SqlNode, Litmus)
```
unparse unicode
```
// Ansi "select '烫烫烫'"
public class SqlDialect {
public final String quoteStringLiteral(String val) {
final StringBuilder buf = new StringBuilder();
quoteStringLiteral(buf, null, val);
return buf.toString();
}
}
// CH "select '中文'"
public class CustomClickHouseSqlDialect extends ClickHouseSqlDialect {
@Override
public void quoteStringLiteral(StringBuilder buf, @Nullable String charsetName,
String val) {
buf.append(literalQuoteString);
buf.append(val.replace(literalEndQuoteString, literalEscapedQuote));
buf.append(literalEndQuoteString);
}
}
```
unparse Dialect
```
// Ansi - desc change
String sql = String.format("desc %s", tableIdentifier.toSqlString().toString());
// MySQL - desc `change`
String sql = String.format("desc %s", tableIdentifier.toSqlString(CustomMysqlSqlDialect.DEFAULT).toString());
```
unparse function name
```
public class GroupBitmapAggFunction extends SqlAggFunction {
@Override
public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) {
RelToSqlConverterUtil.specialOperatorByName("groupBitmap").unparse(writer, call, leftPrec, rightPrec);
}
}
```
```
public class IfFunction extends BiSqlFunction {
public IfFunction() {
super("IF",
SqlKind.OTHER_FUNCTION,
// ARG1 transforms to nullable, 否则降级 ARG2_NULLABLE
ReturnTypes.ARG1_NULLABLE.orElse(ReturnTypes.ARG2_NULLABLE),
// fallback to operandTypeChecker
(SqlOperandTypeInference) null,
// operandTypeChecker: 任意类型、不限制个数
OperandTypes.VARIADIC,
// 涉及类型系统兼容
SqlFunctionCategory.USER_DEFINED_FUNCTION);
}
}
```
默认Parser解析的UDF为 unresolved, 也可以在Parser实例化阶段注册UDF, 语法定义阶段使用解析实例,使用已注册的函数来实例化SqlCall,使得parse之后,已经为resolved UDF.
```
// CustomSqlParser
// getInstance()使用parser之前init注册function
public class BiSqlParser {
private BiSqlParser() {
init();
}
public static BiSqlParser getInstance() {
return Inner.instance;
}
private static class Inner {
private static BiSqlParser instance;
static {
instance = new BiSqlParser();
}
}
}
// parser.jj
protected SqlCall createCall(SqlIdentifier funName, SqlParserPos pos, SqlFunctionCategory funcType, SqlLiteral functionQualifier, SqlNode[] operands) {
SqlOperator fun = null;
if (funName.isSimple()) {
List<SqlOperator> list = new ArrayList();
// lookup
SqlStdOperatorTable.instance().lookupOperatorOverloads(funName, funcType, SqlSyntax.FUNCTION, list, SqlNameMatchers.withCaseSensitive(false));
if (list.size() == 1) {
fun = (SqlOperator)list.get(0);
}
}
// or Unresolved
if (fun == null) {
fun = new SqlUnresolvedFunction(funName, (SqlReturnTypeInference)null, (SqlOperandTypeInference)null, (SqlOperandTypeChecker)null, (List)null, funcType);
}
return ((SqlOperator)fun).createCall(functionQualifier, pos, operands);
}
```
Validation阶段会查找函数,找不到即存在 unresolve UDF 会报错。可临时定义算子表,用于validation。配置如下,
```
// LENIENT_OPERATOR_LOOKUP: 是否严格匹配参数个数
Frameworks.ConfigBuilder configBuilder = Frameworks.newConfigBuilder();
SqlOperatorTable opTab =
ChainedSqlOperatorTable.of( SqlStdOperatorTable.instance(),
new ListSqlOperatorTable(
ImmutableList.of(new MycatAggFunctions.MyAvgAggFunction())));
configBuilder.operatorTable(opTab);
```
设计一套编解码器 Codec,实现解析指定dialect 的 sql,然后翻译成另一种 dialect 的 sql
Codec 转换 sourceSql 为 targetSql 的流程