From b7680e0f2e6425db92dc02dcb05b86391d84e1ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89amonn=20McManus?= Date: Fri, 24 Apr 2026 17:49:01 -0700 Subject: [PATCH] Add support for Markdown tables. This is very basic, and the main intent is just to avoid mangling tables. We essentially preserve the formatting of any table we find, without attempting to adjust it in any way. PiperOrigin-RevId: 905301629 --- core/pom.xml | 16 +++++--- .../java/javadoc/JavadocFormatter.java | 2 + .../java/javadoc/JavadocWriter.java | 19 +++++++++ .../java/javadoc/MarkdownPositions.java | 40 +++++++++++++++++-- .../googlejavaformat/java/javadoc/Token.java | 21 +++++++++- .../java/JavadocFormattingTest.java | 29 ++++++++++---- .../java/javadoc/MarkdownPositionsTest.java | 13 ++++-- 7 files changed, 121 insertions(+), 19 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index d3235cb18..b6f070c5c 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -39,11 +39,6 @@ com.google.guava guava - - org.commonmark - commonmark - 0.28.0 - @@ -66,6 +61,17 @@ auto-service-annotations true + + org.commonmark + commonmark + 0.28.0 + + + org.commonmark + commonmark-ext-gfm-tables + 0.28.0 + compile + diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java index dbc2be841..f801a4d75 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocFormatter.java @@ -44,6 +44,7 @@ import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; import com.google.googlejavaformat.java.javadoc.Token.MarkdownHardLineBreak; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownTable; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; import com.google.googlejavaformat.java.javadoc.Token.OptionalLineBreak; @@ -137,6 +138,7 @@ private static String render(List input, int blockIndent, boolean classic case MarkdownHardLineBreak unused -> output.writeMarkdownHardLineBreak(); case Literal t -> output.writeLiteral(t); case MarkdownFencedCodeBlock t -> output.writeMarkdownFencedCodeBlock(t); + case MarkdownTable t -> output.writeMarkdownTable(t); case ListItemCloseTag unused -> {} case OptionalLineBreak unused -> {} case ParagraphCloseTag unused -> {} diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java index 492bb0150..ae3226301 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/JavadocWriter.java @@ -35,6 +35,7 @@ import com.google.googlejavaformat.java.javadoc.Token.ListOpenTag; import com.google.googlejavaformat.java.javadoc.Token.Literal; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownTable; import com.google.googlejavaformat.java.javadoc.Token.MoeBeginStripComment; import com.google.googlejavaformat.java.javadoc.Token.MoeEndStripComment; import com.google.googlejavaformat.java.javadoc.Token.PreCloseTag; @@ -44,6 +45,7 @@ import com.google.googlejavaformat.java.javadoc.Token.StartOfLineToken; import com.google.googlejavaformat.java.javadoc.Token.TableCloseTag; import com.google.googlejavaformat.java.javadoc.Token.TableOpenTag; +import java.util.List; /** * Stateful object that accepts "requests" and "writes," producing formatted Javadoc. @@ -335,6 +337,9 @@ void writeLiteral(Literal token) { } void writeMarkdownFencedCodeBlock(MarkdownFencedCodeBlock token) { + if (!token.precededByNonWhitespace() && wroteAnythingSignificant) { + requestBlankLine(); + } flushWhitespace(); output.append(token.start()); token @@ -350,6 +355,20 @@ void writeMarkdownFencedCodeBlock(MarkdownFencedCodeBlock token) { requestBlankLine(); } + void writeMarkdownTable(MarkdownTable token) { + if (!token.precededByNonWhitespace() && wroteAnythingSignificant) { + requestBlankLine(); + } + flushWhitespace(); + List lines = token.value().lines().toList(); + output.append(lines.get(0)); + for (String line : lines.subList(1, lines.size())) { + writeNewline(AutoIndent.NO_AUTO_INDENT); + output.append(line); + } + requestBlankLine(); + } + @Override public String toString() { return output.toString(); diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java index 7f66b32e8..11cddea68 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/MarkdownPositions.java @@ -28,10 +28,14 @@ import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanEnd; import com.google.googlejavaformat.java.javadoc.Token.MarkdownCodeSpanStart; import com.google.googlejavaformat.java.javadoc.Token.MarkdownFencedCodeBlock; +import com.google.googlejavaformat.java.javadoc.Token.MarkdownTable; import com.google.googlejavaformat.java.javadoc.Token.ParagraphCloseTag; import com.google.googlejavaformat.java.javadoc.Token.ParagraphOpenTag; +import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.commonmark.ext.gfm.tables.TableBlock; +import org.commonmark.ext.gfm.tables.TablesExtension; import org.commonmark.node.BulletList; import org.commonmark.node.Code; import org.commonmark.node.FencedCodeBlock; @@ -92,6 +96,10 @@ void visit(Node node) { case OrderedList orderedList -> addSpan(orderedList, LIST_OPEN_TOKEN, LIST_CLOSE_TOKEN); case ListItem listItem -> alreadyVisitedChildren = visitListItem(listItem); case FencedCodeBlock fencedCodeBlock -> visitFencedCodeBlock(fencedCodeBlock); + case TableBlock tableBlock -> { + visitTableBlock(tableBlock); + alreadyVisitedChildren = true; + } case Code code -> visitCodeSpan(code); // TODO: others default -> {} @@ -128,16 +136,30 @@ private void visitFencedCodeBlock(FencedCodeBlock fencedCodeBlock) { // indentation gets subtracted from FencedCodeBlock.getLiteral(), which is the actual text // represented by the code block. int start = startPosition(fencedCodeBlock) + fencedCodeBlock.getFenceIndent(); + boolean precededByNonWhitespace = precededByNonWhitespace(start); + int closingLength = + Objects.requireNonNullElse( + fencedCodeBlock.getClosingFenceLength(), fencedCodeBlock.getOpeningFenceLength()); MarkdownFencedCodeBlock token = new MarkdownFencedCodeBlock( input.substring(start, endPosition(fencedCodeBlock)), fencedCodeBlock.getFenceCharacter().repeat(fencedCodeBlock.getOpeningFenceLength()) + fencedCodeBlock.getInfo(), - fencedCodeBlock.getFenceCharacter().repeat(fencedCodeBlock.getClosingFenceLength()), - fencedCodeBlock.getLiteral()); + fencedCodeBlock.getFenceCharacter().repeat(closingLength), + fencedCodeBlock.getLiteral(), + precededByNonWhitespace); positionToToken.get(start).addLast(token); } + private void visitTableBlock(TableBlock tableBlock) { + int start = startPosition(tableBlock); + boolean precededByNonWhitespace = precededByNonWhitespace(start); + int end = endPosition(tableBlock); + positionToToken + .get(start) + .addLast(new MarkdownTable(input.substring(start, end), precededByNonWhitespace)); + } + private void visitCodeSpan(Code code) { int start = startPosition(code); int end = endPosition(code); @@ -164,6 +186,15 @@ private void visitNodeList(Node node) { } } + private boolean precededByNonWhitespace(int position) { + for (int i = position - 1; i >= 0 && input.charAt(i) != '\n'; i--) { + if (!Character.isWhitespace(input.charAt(i))) { + return true; + } + } + return false; + } + /** * Adds tokens for the given node, {@code startToken} at the point where the node starts in the * input, and {@code endToken} at the point where it ends. The {@code startToken} goes after any @@ -195,7 +226,10 @@ public String toString() { } private static final Parser PARSER = - Parser.builder().includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES).build(); + Parser.builder() + .includeSourceSpans(IncludeSourceSpans.BLOCKS_AND_INLINES) + .extensions(ImmutableList.of(TablesExtension.create())) + .build(); private static final HeaderOpenTag HEADER_OPEN_TOKEN = new HeaderOpenTag(""); private static final HeaderCloseTag HEADER_CLOSE_TOKEN = new HeaderCloseTag(""); diff --git a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java index 134fbc8f6..e66a5c4cc 100644 --- a/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java +++ b/core/src/main/java/com/google/googlejavaformat/java/javadoc/Token.java @@ -135,10 +135,29 @@ record MarkdownCodeSpanEnd(String value) implements Token {} * @param end the end fence. * @param literal the text that the code block represents. This does not include the start and end * fences, nor any indentation that precedes these fences and every intervening line. + * @param precededByNonWhitespace whether the start of the code block is preceded by at least one + * non-whitespace character on the same line, for example {@code - ```}. */ - record MarkdownFencedCodeBlock(String value, String start, String end, String literal) + record MarkdownFencedCodeBlock( + String value, String start, String end, String literal, boolean precededByNonWhitespace) implements Token {} + /** + * A Markdown table, like: + * + * {@snippet : + * | foo | bar | + * | --- | --- | + * | baz | qux | + * } + * + * @param value the full text of the table as it appeared in the input, including the delimiters + * and the literal content. + * @param precededByNonWhitespace whether the start of the table is preceded by at least one + * non-whitespace character on the same line, for example {@code - |foo|bar|}. + */ + record MarkdownTable(String value, boolean precededByNonWhitespace) implements Token {} + /** * Whitespace that is not in a {@code
} or {@code } section. Whitespace includes
    * leading newlines, asterisks, and tabs and spaces. In the output, it is translated to newlines
diff --git a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
index bf7ac1c2d..983d66387 100644
--- a/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
+++ b/core/src/test/java/com/google/googlejavaformat/java/JavadocFormattingTest.java
@@ -1738,6 +1738,12 @@ public void markdownFencedCodeBlocks() {
 ///    in a list
 ///    ```
 ///
+/// - flibbertigibbet
+///
+///   ```
+///   code block in a list after text
+///   ```
+///
 /// ~~~java
 /// code block
 /// with tildes and an info string ("java")
@@ -1761,6 +1767,12 @@ class Test {}
 ///   in a list
 ///   ```
 ///
+/// - flibbertigibbet
+///
+///   ```
+///   code block in a list after text
+///   ```
+///
 /// ~~~java
 /// code block
 /// with tildes and an info string ("java")
@@ -1993,6 +2005,8 @@ public void markdownTables() {
     assume().that(MARKDOWN_JAVADOC_SUPPORTED).isTrue();
     String input =
 """
+/// Table McTableface
+///
 /// | foo | bar |
 /// | --- | --- |
 /// | baz | qux |
@@ -2000,15 +2014,16 @@ public void markdownTables() {
 /// - |foo|bar|
 ///   |--:|:--|
 ///   |baz|qux|
+///
+/// - Another list.
+///
+///   | which | contains |
+///   | ----- | -------- |
+///   | a | table |
 class Test {}
 """;
-    // TODO: unmangle the tables
-    String expected =
-"""
-/// | foo | bar | | --- | --- | | baz | qux |
-/// - |foo|bar| |--:|:--| |baz|qux|
-class Test {}
-""";
+    // We don't currently try to align the column markers in the rows of the last table.
+    String expected = input;
     doFormatTest(input, expected);
   }
 
diff --git a/core/src/test/java/com/google/googlejavaformat/java/javadoc/MarkdownPositionsTest.java b/core/src/test/java/com/google/googlejavaformat/java/javadoc/MarkdownPositionsTest.java
index e1b5fcd12..f3104e119 100644
--- a/core/src/test/java/com/google/googlejavaformat/java/javadoc/MarkdownPositionsTest.java
+++ b/core/src/test/java/com/google/googlejavaformat/java/javadoc/MarkdownPositionsTest.java
@@ -129,6 +129,7 @@ public void codeBlock() {
     int secondCodeEnd = text.indexOf("~~~", secondCodeStart + 3) + 3;
     int thirdCodeStart = text.indexOf("````", secondCodeEnd);
     int thirdCodeEnd = text.indexOf("````", thirdCodeStart + 4) + 4;
+    boolean precededByNonWhitespace = true;
     ImmutableListMultimap expected =
         ImmutableListMultimap.builder()
             .put(bullet, new ListOpenTag(""))
@@ -136,7 +137,11 @@ public void codeBlock() {
             .put(
                 firstCodeStart,
                 new MarkdownFencedCodeBlock(
-                    text.substring(firstCodeStart, firstCodeEnd), "```", "```", "foo\nbar\n"))
+                    text.substring(firstCodeStart, firstCodeEnd),
+                    "```",
+                    "```",
+                    "foo\nbar\n",
+                    /* precededByNonWhitespace= */ true))
             .put(firstCodeEnd, new ListItemCloseTag(""))
             .put(firstCodeEnd, new ListCloseTag(""))
             .put(
@@ -145,14 +150,16 @@ public void codeBlock() {
                     text.substring(secondCodeStart, secondCodeEnd),
                     "~~~java",
                     "~~~",
-                    "code\nwith tildes\n"))
+                    "code\nwith tildes\n",
+                    /* precededByNonWhitespace= */ false))
             .put(
                 thirdCodeStart,
                 new MarkdownFencedCodeBlock(
                     text.substring(thirdCodeStart, thirdCodeEnd),
                     "````",
                     "````",
-                    "indented code\nwith more than three backticks\n"))
+                    "indented code\nwith more than three backticks\n",
+                    /* precededByNonWhitespace= */ false))
             .build();
     assertThat(map).isEqualTo(expected);
   }