From 508bfc576eb48b8a655fbce7f63a20a1ab8f64c1 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sun, 12 Apr 2026 17:26:01 +0200 Subject: [PATCH 1/6] Remove `FileMode.getMode()` The octal value associated to `FileMode` is not very useful to users, since it is only used internally, when generating the tree id. Therefore we can probably remove the getter for now. --- .../commons/codec/digest/GitIdentifiers.java | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java index bd02afe290..4e2b54f44c 100644 --- a/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java +++ b/src/main/java/org/apache/commons/codec/digest/GitIdentifiers.java @@ -180,29 +180,14 @@ private static FileMode get(final Path path) { return REGULAR; } - /** - * The octal mode as used by Git. - */ - private final String mode; - /** * Serialized {@code mode}: since this is mutable, it must remain private. */ private final byte[] modeBytes; FileMode(final String mode) { - this.mode = mode; this.modeBytes = mode.getBytes(StandardCharsets.US_ASCII); } - - /** - * Gets the octal mode as used by Git. - * - * @return The octal mode. - */ - public String getMode() { - return mode; - } } /** From 250993b873b4367887344d2083cef32f798e9b12 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sun, 12 Apr 2026 18:40:44 +0200 Subject: [PATCH 2/6] Add filesystem test --- .../codec/digest/GitIdentifiersTest.java | 179 ++++++++++++++---- 1 file changed, 145 insertions(+), 34 deletions(-) diff --git a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java index 8c8b7c45b0..bc8a3c16e6 100644 --- a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java +++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java @@ -28,16 +28,20 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.attribute.PosixFilePermissions; import java.security.MessageDigest; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Stream; +import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.digest.GitIdentifiers.DirectoryEntry; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledOnOs; +import org.junit.jupiter.api.condition.OS; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; @@ -49,9 +53,93 @@ */ class GitIdentifiersTest { - private static final byte[] ZERO_ID = new byte[20]; + // Virtual tree: + // + // link -> src (symlink) + // link.txt -> src/hello.txt (symlink) + // src/ + // hello.txt (regular file) + // run.sh (executable file) + + /** Decodes a compile-time hex literal; throws {@link AssertionError} on malformed input. */ + private static byte[] hex(final String hex) { + try { + return Hex.decodeHex(hex); + } catch (final DecoderException e) { + throw new AssertionError(e); + } + } + + /** Content of {@code src/hello.txt}. */ + private static final byte[] HELLO_CONTENT = "hello\n".getBytes(StandardCharsets.UTF_8); + /** SHA-1 blob id of {@link #HELLO_CONTENT}: {@code printf 'hello\n' | git hash-object --stdin} */ + private static final byte[] HELLO_BLOB_ID_SHA1 = hex("ce013625030ba8dba906f756967f9e9ca394464a"); + /** SHA-256 blob id of {@link #HELLO_CONTENT}. */ + private static final byte[] HELLO_BLOB_ID_SHA256 = hex("2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4"); + + /** Content of {@code src/run.sh}. */ + private static final byte[] RUN_CONTENT = "#!/bin/sh\n".getBytes(StandardCharsets.UTF_8); + /** SHA-1 blob id of {@link #RUN_CONTENT}: {@code printf '#!/bin/sh\n' | git hash-object --stdin} */ + private static final byte[] RUN_BLOB_ID_SHA1 = hex("1a2485251c33a70432394c93fb89330ef214bfc9"); + /** SHA-256 blob id of {@link #RUN_CONTENT}. */ + private static final byte[] RUN_BLOB_ID_SHA256 = hex("1249034e3cf9007362d695b09b1fbdb4c578903bf10b665749b94743f8177ce1"); + + /** Target of symlink {@code link}. */ + private static final String LINK_CONTENT = "src"; + /** SHA-1 blob id of the symlink target {@link #LINK_CONTENT}: {@code printf 'src' | git hash-object --stdin} */ + private static final byte[] LINK_BLOB_ID_SHA1 = hex("e8310385c56dc4bbe379f43400f3181f6a59f260"); + /** SHA-256 blob id of the symlink target {@link #LINK_CONTENT}. */ + private static final byte[] LINK_BLOB_ID_SHA256 = hex("e1bdca538422554ea204da85e0cec156b12b6808473083610ff95ea390843ab6"); + + /** Target of symlink {@code link.txt}. */ + private static final String LINK_TXT_CONTENT = "src/hello.txt"; + /** SHA-1 blob id of the symlink target {@link #LINK_TXT_CONTENT}: {@code printf 'src/hello.txt' | git hash-object --stdin} */ + private static final byte[] LINK_TXT_BLOB_ID_SHA1 = hex("132a953033e00dcff94f5cccb261f52cd1d71173"); + /** SHA-256 blob id of the symlink target {@link #LINK_TXT_CONTENT}. */ + private static final byte[] LINK_TXT_BLOB_ID_SHA256 = hex("2499925193a48a84a546a2f7cd3ce7789d4e073ef1e7276fe682bfbb2b636cef"); + + // Tree ids can be recomputed in a git repository with: + // git init /tmp/t && cd /tmp/t + // followed by writing the blob objects and calling git mktree. + + /** + * SHA-1 tree id of {@code src/} (hello.txt + run.sh): + *
+     * printf '100644 blob ce013625030ba8dba906f756967f9e9ca394464a\thello.txt\n
+     *         100755 blob 1a2485251c33a70432394c93fb89330ef214bfc9\trun.sh\n' | git mktree
+     * 
+ */ + private static final byte[] SRC_TREE_ID_SHA1 = hex("5575b4a0141a2287ec2836a620e5d6aa8fb203ba"); + /** + * SHA-256 tree id of {@code src/}: + *
+     * printf '100644 blob 2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4\thello.txt\n
+     *         100755 blob 1249034e3cf9007362d695b09b1fbdb4c578903bf10b665749b94743f8177ce1\trun.sh\n' | git mktree
+     * 
+ */ + private static final byte[] SRC_TREE_ID_SHA256 = hex("5b4e74befcb98e3050c511d02353d00565b2172be0a2bc5de833f011ad27f694"); + + /** + * SHA-1 tree id of the main directory (link + link.txt + src/): + *
+     * printf '120000 blob e8310385c56dc4bbe379f43400f3181f6a59f260\tlink\n
+     *         120000 blob 132a953033e00dcff94f5cccb261f52cd1d71173\tlink.txt\n
+     *         040000 tree 5575b4a0141a2287ec2836a620e5d6aa8fb203ba\tsrc\n' | git mktree
+     * 
+ */ + private static final byte[] MAIN_TREE_ID_SHA1 = hex("3217900fd0a6624cd6aa169c2a9f289f7f34432b"); + /** + * SHA-256 tree id of the main directory: + *
+     * printf '120000 blob e1bdca538422554ea204da85e0cec156b12b6808473083610ff95ea390843ab6\tlink\n
+     *         120000 blob 2499925193a48a84a546a2f7cd3ce7789d4e073ef1e7276fe682bfbb2b636cef\tlink.txt\n
+     *         040000 tree 5b4e74befcb98e3050c511d02353d00565b2172be0a2bc5de833f011ad27f694\tsrc\n' | git mktree
+     * 
+ */ + private static final byte[] MAIN_TREE_ID_SHA256 = hex("58e9a59940e4d2ae7e374b63fedf3b7bba8cfdc60308f64abd066db137300bcd"); + static Stream blobIdProvider() { return Stream.of(Arguments.of("DigestUtilsTest/hello.txt", "5f4a83288e67f1be2d6fcdad84165a86c6a970d7"), Arguments.of("DigestUtilsTest/greetings.txt", "6cf4f797455661e61d1ee6913fc29344f5897243"), @@ -62,18 +150,12 @@ private static Path resourcePath(final String resourceName) throws Exception { return Paths.get(GitIdentifiersTest.class.getClassLoader().getResource(resourceName).toURI()); } - static Stream testTreeIdBuilder() { + static Stream virtualTreeProvider() { return Stream.of( - Arguments.of(MessageDigestAlgorithms.SHA_1, - "ce013625030ba8dba906f756967f9e9ca394464a", // blob id of "hello\n" - "8bbe8a53790056316b23b7c270f10ab6bf6bb1b4", // blob id of "subdir" - "1a2485251c33a70432394c93fb89330ef214bfc9", // blob id of "#!/bin/sh\n" - "4b825dc642cb6eb9a060e54bf8d69288fbee4904"), // tree id of empty directory - Arguments.of(MessageDigestAlgorithms.SHA_256, - "2cf8d83d9ee29543b34a87727421fdecb7e3f3a183d337639025de576db9ebb4", - "33910dae80b0db75dbad7fa521dbbf1885a07edfab1228871c41a2e94ccd7edb", - "1249034e3cf9007362d695b09b1fbdb4c578903bf10b665749b94743f8177ce1", - "6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321")); + Arguments.of(MessageDigestAlgorithms.SHA_1, HELLO_BLOB_ID_SHA1, LINK_BLOB_ID_SHA1, LINK_TXT_BLOB_ID_SHA1, RUN_BLOB_ID_SHA1, + SRC_TREE_ID_SHA1, MAIN_TREE_ID_SHA1), + Arguments.of(MessageDigestAlgorithms.SHA_256, HELLO_BLOB_ID_SHA256, LINK_BLOB_ID_SHA256, LINK_TXT_BLOB_ID_SHA256, RUN_BLOB_ID_SHA256, + SRC_TREE_ID_SHA256, MAIN_TREE_ID_SHA256)); } @ParameterizedTest @@ -159,34 +241,27 @@ void testDirectoryEntrySortOrder() { } @ParameterizedTest - @MethodSource - void testTreeIdBuilder(final String algorithm, final String helloHex, final String linkHex, final String runHex, final String srcHex) throws Exception { - final byte[] helloContent = "hello\n".getBytes(StandardCharsets.UTF_8); - final byte[] runContent = "#!/bin/sh\n".getBytes(StandardCharsets.UTF_8); - final String linkTarget = "subdir"; + @MethodSource("virtualTreeProvider") + void testTreeIdBuilder(final String algorithm, final byte[] helloId, final byte[] linkId, final byte[] linkTxtId, final byte[] runId, + final byte[] srcTreeId, final byte[] mainTreeId) throws Exception { final MessageDigest md = DigestUtils.getDigest(algorithm); // Verify individual blob IDs against pre-computed constants. - assertArrayEquals(Hex.decodeHex(helloHex), GitIdentifiers.blobId(md, helloContent)); - assertArrayEquals(Hex.decodeHex(linkHex), GitIdentifiers.blobId(md, linkTarget.getBytes(StandardCharsets.UTF_8))); - assertArrayEquals(Hex.decodeHex(runHex), GitIdentifiers.blobId(md, runContent)); + assertArrayEquals(helloId, GitIdentifiers.blobId(md, HELLO_CONTENT)); + assertArrayEquals(linkId, GitIdentifiers.blobId(md, LINK_CONTENT.getBytes(StandardCharsets.UTF_8))); + assertArrayEquals(linkTxtId, GitIdentifiers.blobId(md, LINK_TXT_CONTENT.getBytes(StandardCharsets.UTF_8))); + assertArrayEquals(runId, GitIdentifiers.blobId(md, RUN_CONTENT)); // Entries are supplied out of order to verify that the builder sorts them correctly. final GitIdentifiers.TreeIdBuilder builder = GitIdentifiers.treeIdBuilder(md); - builder.addDirectory("src"); - builder.addFile(GitIdentifiers.FileMode.EXECUTABLE, "run.sh", runContent); - builder.addFile(GitIdentifiers.FileMode.REGULAR, "hello.txt", helloContent); - builder.addSymbolicLink("link.txt", linkTarget); - - // Expected tree body: entries in Git sort order (hello.txt, link.txt, run.sh, src/). - // Each entry: hex-encoded " \0" followed by the object id. - final byte[] treeBody = Hex.decodeHex("3130303634342068656c6c6f2e74787400" + helloHex + // 100644 hello.txt\0 - "313230303030206c696e6b2e74787400" + linkHex + // 120000 link.txt\0 - "3130303735352072756e2e736800" + runHex + // 100755 run.sh\0 - "34303030302073726300" + srcHex); // 40000 src\0 - md.reset(); - DigestUtils.updateDigest(md, ("tree " + treeBody.length + "\0").getBytes(StandardCharsets.UTF_8)); - assertArrayEquals(DigestUtils.updateDigest(md, treeBody).digest(), builder.build()); + builder.addSymbolicLink("link.txt", LINK_TXT_CONTENT); + builder.addFile(GitIdentifiers.FileMode.REGULAR, "src/hello.txt", HELLO_CONTENT); + builder.addSymbolicLink("link", LINK_CONTENT); + builder.addFile(GitIdentifiers.FileMode.EXECUTABLE, "src/run.sh", RUN_CONTENT); + + // Check trees + assertArrayEquals(mainTreeId, builder.build()); + assertArrayEquals(srcTreeId, builder.addDirectory("src").build()); } @Test @@ -266,4 +341,40 @@ void testTreeIdPath() throws Exception { GitIdentifiers.treeId(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest"))); } + @DisabledOnOs(OS.WINDOWS) + @ParameterizedTest + @MethodSource("virtualTreeProvider") + void testTreeIdPathUnix(final String algorithm, final byte[] helloId, final byte[] linkId, final byte[] linkTxtId, + final byte[] runId, final byte[] srcTreeId, final byte[] mainTreeId, final @TempDir Path tempDir) throws Exception { + MessageDigest md = DigestUtils.getDigest(algorithm); + + // Files + Path link = tempDir.resolve("link"); + Path linkTxt = tempDir.resolve("link.txt"); + Path src = tempDir.resolve("src"); + Path hello = src.resolve("hello.txt"); + Path run = src.resolve("run.sh"); + + // Create the same structure as the virtual tree. + try { + Files.createSymbolicLink(link, Paths.get(LINK_CONTENT)); + Files.createSymbolicLink(linkTxt, Paths.get(LINK_TXT_CONTENT)); + } catch (final UnsupportedOperationException e) { + Assumptions.abort("Symbolic links not supported on this filesystem"); + } + Files.createDirectory(src); + Files.write(hello, HELLO_CONTENT); + Files.write(run, RUN_CONTENT); + Files.setPosixFilePermissions(run, PosixFilePermissions.fromString("rwxr-xr-x")); + + // Verify individual blob IDs against pre-computed constants. + assertArrayEquals(helloId, GitIdentifiers.blobId(md, hello)); + assertArrayEquals(linkId, GitIdentifiers.blobId(md, link)); + assertArrayEquals(linkTxtId, GitIdentifiers.blobId(md, linkTxt)); + assertArrayEquals(runId, GitIdentifiers.blobId(md, run)); + + // Check trees + assertArrayEquals(mainTreeId, GitIdentifiers.treeId(md, tempDir)); + assertArrayEquals(srcTreeId, GitIdentifiers.treeId(md, src)); + } } From b047d7a8501514d31eca74bdc8b08a8a5b1a3854 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sun, 12 Apr 2026 18:45:42 +0200 Subject: [PATCH 3/6] Sort methods --- .../codec/digest/GitIdentifiersTest.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java index bc8a3c16e6..48fe8e4d52 100644 --- a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java +++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java @@ -63,15 +63,6 @@ class GitIdentifiersTest { // hello.txt (regular file) // run.sh (executable file) - /** Decodes a compile-time hex literal; throws {@link AssertionError} on malformed input. */ - private static byte[] hex(final String hex) { - try { - return Hex.decodeHex(hex); - } catch (final DecoderException e) { - throw new AssertionError(e); - } - } - /** Content of {@code src/hello.txt}. */ private static final byte[] HELLO_CONTENT = "hello\n".getBytes(StandardCharsets.UTF_8); /** SHA-1 blob id of {@link #HELLO_CONTENT}: {@code printf 'hello\n' | git hash-object --stdin} */ @@ -146,6 +137,15 @@ static Stream blobIdProvider() { Arguments.of("DigestUtilsTest/subdir/nested.txt", "07a392ddb4dbff06a373a7617939f30b2dcfe719")); } + /** Decodes a compile-time hex literal; throws {@link AssertionError} on malformed input. */ + private static byte[] hex(final String hex) { + try { + return Hex.decodeHex(hex); + } catch (final DecoderException e) { + throw new AssertionError(e); + } + } + private static Path resourcePath(final String resourceName) throws Exception { return Paths.get(GitIdentifiersTest.class.getClassLoader().getResource(resourceName).toURI()); } From aa4627d283fd9ae1d9b6448e8f9098c6f40a85ee Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sun, 12 Apr 2026 18:51:47 +0200 Subject: [PATCH 4/6] Fix formatting --- .../commons/codec/digest/GitIdentifiersTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java index 48fe8e4d52..1c504f6cb9 100644 --- a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java +++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java @@ -346,14 +346,14 @@ void testTreeIdPath() throws Exception { @MethodSource("virtualTreeProvider") void testTreeIdPathUnix(final String algorithm, final byte[] helloId, final byte[] linkId, final byte[] linkTxtId, final byte[] runId, final byte[] srcTreeId, final byte[] mainTreeId, final @TempDir Path tempDir) throws Exception { - MessageDigest md = DigestUtils.getDigest(algorithm); + final MessageDigest md = DigestUtils.getDigest(algorithm); // Files - Path link = tempDir.resolve("link"); - Path linkTxt = tempDir.resolve("link.txt"); - Path src = tempDir.resolve("src"); - Path hello = src.resolve("hello.txt"); - Path run = src.resolve("run.sh"); + final Path link = tempDir.resolve("link"); + final Path linkTxt = tempDir.resolve("link.txt"); + final Path src = tempDir.resolve("src"); + final Path hello = src.resolve("hello.txt"); + final Path run = src.resolve("run.sh"); // Create the same structure as the virtual tree. try { From cdb25198c7583efd795bd00d656be9cd27a7611a Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sun, 12 Apr 2026 18:52:19 +0200 Subject: [PATCH 5/6] Enable test on Windows The test will check anyway if symbolic links are supported. --- .../java/org/apache/commons/codec/digest/GitIdentifiersTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java index 1c504f6cb9..97d89c4385 100644 --- a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java +++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java @@ -341,7 +341,6 @@ void testTreeIdPath() throws Exception { GitIdentifiers.treeId(DigestUtils.getSha1Digest(), resourcePath("DigestUtilsTest"))); } - @DisabledOnOs(OS.WINDOWS) @ParameterizedTest @MethodSource("virtualTreeProvider") void testTreeIdPathUnix(final String algorithm, final byte[] helloId, final byte[] linkId, final byte[] linkTxtId, From b6a5f74c25e256f4a5fb70bde38e36f6809ff9a1 Mon Sep 17 00:00:00 2001 From: "Piotr P. Karwasz" Date: Sun, 12 Apr 2026 18:52:58 +0200 Subject: [PATCH 6/6] Remove unused imports --- .../org/apache/commons/codec/digest/GitIdentifiersTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java index 97d89c4385..e311f89f49 100644 --- a/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java +++ b/src/test/java/org/apache/commons/codec/digest/GitIdentifiersTest.java @@ -40,8 +40,6 @@ import org.apache.commons.codec.digest.GitIdentifiers.DirectoryEntry; import org.junit.jupiter.api.Assumptions; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnOs; -import org.junit.jupiter.api.condition.OS; import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments;