From e2a82e523cde6f51bb59ae5695c942fc952d7021 Mon Sep 17 00:00:00 2001 From: funcpp Date: Mon, 30 Mar 2026 11:04:42 +0900 Subject: [PATCH 1/2] Support optional AS keyword in CTE definitions for Databricks Databricks allows omitting the AS keyword in CTE definitions: `WITH cte (SELECT ...) SELECT * FROM cte` Add `supports_cte_without_as()` dialect method and enable it for Databricks and Generic dialects. --- src/dialect/databricks.rs | 5 +++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 11 +++++ src/parser/mod.rs | 81 ++++++++++++++++++++++++----------- tests/sqlparser_databricks.rs | 27 ++++++++++++ 5 files changed, 104 insertions(+), 24 deletions(-) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 09cac96fa..c76b464a0 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -99,4 +99,9 @@ impl Dialect for DatabricksDialect { fn supports_bang_not_operator(&self) -> bool { true } + + /// See + fn supports_cte_without_as(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 1d5461fec..c7f17351b 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -288,4 +288,8 @@ impl Dialect for GenericDialect { fn supports_comma_separated_trim(&self) -> bool { true } + + fn supports_cte_without_as(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index bbf7d5804..ef9cafed5 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1670,6 +1670,17 @@ pub trait Dialect: Debug + Any { fn supports_comma_separated_trim(&self) -> bool { false } + + /// Returns true if the dialect supports the `AS` keyword being + /// optional in a CTE definition. For example: + /// ```sql + /// WITH cte_name (SELECT ...) + /// ``` + /// + /// [Databricks](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-cte) + fn supports_cte_without_as(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 70e8ce28f..09b051fd9 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14060,7 +14060,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + /// Parse a CTE (`alias [( col1, col2, ... )] [AS] (subquery)`) pub fn parse_cte(&mut self) -> Result { let name = self.parse_identifier()?; @@ -14091,32 +14091,65 @@ impl<'a> Parser<'a> { closing_paren_token: closing_paren_token.into(), } } else { - let columns = self.parse_table_alias_column_defs()?; - self.expect_keyword_is(Keyword::AS)?; - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); + let as_optional = self.dialect.supports_cte_without_as(); + let opt_query = if as_optional { + self.maybe_parse(|p| { + p.expect_token(&Token::LParen)?; + let query = p.parse_query()?; + let closing_paren_token = p.expect_token(&Token::RParen)?; + Ok((query, closing_paren_token)) + })? + } else { + None + }; + match opt_query { + Some((query, closing_paren_token)) => { + let alias = TableAlias { + explicit: false, + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + materialized: None, + closing_paren_token: closing_paren_token.into(), + } } - } - self.expect_token(&Token::LParen)?; + None => { + let columns = self.parse_table_alias_column_defs()?; + if as_optional { + let _ = self.parse_keyword(Keyword::AS); + } else { + self.expect_keyword_is(Keyword::AS)?; + } + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; - let alias = TableAlias { - explicit: false, - name, - columns, - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), + let alias = TableAlias { + explicit: false, + name, + columns, + }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + } } }; if self.parse_keyword(Keyword::FROM) { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index e9b19fcce..dd7d2881c 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -651,3 +651,30 @@ fn parse_numeric_prefix_identifier() { databricks().verified_stmt("SELECT * FROM a.b.1c"); } + +#[test] +fn parse_cte_without_as() { + databricks_and_generic().one_statement_parses_to( + "WITH cte (SELECT 1) SELECT * FROM cte", + "WITH cte AS (SELECT 1) SELECT * FROM cte", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH a AS (SELECT 1), b (SELECT 2) SELECT * FROM a, b", + "WITH a AS (SELECT 1), b AS (SELECT 2) SELECT * FROM a, b", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH cte (col1, col2) (SELECT 1, 2) SELECT * FROM cte", + "WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte", + ); + + databricks_and_generic().verified_query("WITH cte AS (SELECT 1) SELECT * FROM cte"); + + databricks_and_generic() + .verified_query("WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte"); + + assert!(all_dialects_where(|d| !d.supports_cte_without_as()) + .parse_sql_statements("WITH cte (SELECT 1) SELECT * FROM cte") + .is_err()); +} From a37c193eef00f94cfd7b0a0a393d225f11c73724 Mon Sep 17 00:00:00 2001 From: funcpp Date: Thu, 9 Apr 2026 18:29:32 +0900 Subject: [PATCH 2/2] Reduce duplication in parse_cte --- src/parser/mod.rs | 137 +++++++++++++++++++--------------------------- 1 file changed, 57 insertions(+), 80 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 09b051fd9..9056f85d3 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14064,93 +14064,70 @@ impl<'a> Parser<'a> { pub fn parse_cte(&mut self) -> Result { let name = self.parse_identifier()?; - let mut cte = if self.parse_keyword(Keyword::AS) { - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); - } - } - self.expect_token(&Token::LParen)?; - - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; - - let alias = TableAlias { - explicit: false, - name, - columns: vec![], - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), - } - } else { - let as_optional = self.dialect.supports_cte_without_as(); - let opt_query = if as_optional { - self.maybe_parse(|p| { - p.expect_token(&Token::LParen)?; - let query = p.parse_query()?; - let closing_paren_token = p.expect_token(&Token::RParen)?; - Ok((query, closing_paren_token)) - })? - } else { - None - }; - match opt_query { - Some((query, closing_paren_token)) => { - let alias = TableAlias { + let as_optional = self.dialect.supports_cte_without_as(); + + // If AS is optional, first try to parse `name (query)` directly + if as_optional && !self.peek_keyword(Keyword::AS) { + if let Some((query, closing_paren_token)) = self.maybe_parse(|p| { + p.expect_token(&Token::LParen)?; + let query = p.parse_query()?; + let closing_paren_token = p.expect_token(&Token::RParen)?; + Ok((query, closing_paren_token)) + })? { + let mut cte = Cte { + alias: TableAlias { explicit: false, name, columns: vec![], - }; - Cte { - alias, - query, - from: None, - materialized: None, - closing_paren_token: closing_paren_token.into(), - } + }, + query, + from: None, + materialized: None, + closing_paren_token: closing_paren_token.into(), + }; + if self.parse_keyword(Keyword::FROM) { + cte.from = Some(self.parse_identifier()?); } - None => { - let columns = self.parse_table_alias_column_defs()?; - if as_optional { - let _ = self.parse_keyword(Keyword::AS); - } else { - self.expect_keyword_is(Keyword::AS)?; - } - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); - } - } - self.expect_token(&Token::LParen)?; + return Ok(cte); + } + } - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; + // Determine column definitions and consume AS + let columns = if self.parse_keyword(Keyword::AS) { + vec![] + } else { + let columns = self.parse_table_alias_column_defs()?; + if as_optional { + let _ = self.parse_keyword(Keyword::AS); + } else { + self.expect_keyword_is(Keyword::AS)?; + } + columns + }; - let alias = TableAlias { - explicit: false, - name, - columns, - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), - } - } + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); } + } + + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; + + let mut cte = Cte { + alias: TableAlias { + explicit: false, + name, + columns, + }, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), }; if self.parse_keyword(Keyword::FROM) { cte.from = Some(self.parse_identifier()?);