diff --git a/analytics-sidecar-client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java b/analytics-sidecar-client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java index c084c4fff..55beba1c5 100644 --- a/analytics-sidecar-client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java +++ b/analytics-sidecar-client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java @@ -244,6 +244,19 @@ public CompletableFuture gossipInfo() return executor.executeRequestAsync(requestBuilder().gossipInfoRequest().build()); } + /** + * Executes the gossip info request using the default retry policy and configured selection policy + * + * @param instance the instance where the request will be executed + * @return a completable future of the gossip info + */ + public CompletableFuture gossipInfo(SidecarInstance instance) + { + return executor.executeRequestAsync(requestBuilder().singleInstanceSelectionPolicy(instance) + .gossipInfoRequest() + .build()); + } + /** * Executes the GET gossip health request using the default retry policy and configured selection policy diff --git a/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/BridgeInitializationParameters.java b/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/BridgeInitializationParameters.java index d7857a1be..3cc7714d3 100644 --- a/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/BridgeInitializationParameters.java +++ b/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/BridgeInitializationParameters.java @@ -24,21 +24,20 @@ */ public class BridgeInitializationParameters { - private final String sstableFormat; + private final String configuredSSTableFormat; - public BridgeInitializationParameters(String sstableFormat) + public BridgeInitializationParameters(String configuredSSTableFormat) { - this.sstableFormat = sstableFormat; + this.configuredSSTableFormat = configuredSSTableFormat; } public static BridgeInitializationParameters fromEnvironment() { - String sstableFormat = CassandraVersion.sstableFormat(); - return new BridgeInitializationParameters(sstableFormat); + return new BridgeInitializationParameters(CassandraVersion.configuredSSTableFormat()); } - public String getSstableFormat() + public String getConfiguredSSTableFormat() { - return sstableFormat; + return configuredSSTableFormat; } } diff --git a/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/CassandraVersion.java b/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/CassandraVersion.java index 1e23d1350..904573995 100644 --- a/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/CassandraVersion.java +++ b/cassandra-analytics-common/src/main/java/org/apache/cassandra/bridge/CassandraVersion.java @@ -20,7 +20,9 @@ package org.apache.cassandra.bridge; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Optional; import java.util.Set; @@ -45,22 +47,51 @@ */ public enum CassandraVersion { - THREEZERO(30, "3.0", "three-zero", "big"), - FOURZERO(40, "4.0", "four-zero", "big"), - FOURONE(41, "4.1", "four-zero", "big"), - FIVEZERO(50, "5.0", "five-zero", "big", "bti"); + THREEZERO(30, "3.0", "three-zero", new String[]{"big"}, + new String[]{ + // Cassandra 3.x native sstable versions + "big-ma", + "big-mb", + "big-mc", + "big-md", + "big-me", + "big-mf" + }, 30), + FOURZERO(40, "4.0", "four-zero", new String[]{"big"}, + new String[]{ + // Cassandra 4.0 native sstable versions + "big-na", + "big-nb", + }, 30), + FOURONE(41, "4.1", "four-zero", new String[]{"big"}, + new String[]{ + // Cassandra 4.1 did not introduce new native SSTable versions + }, 30), + FIVEZERO(50, "5.0", "five-zero", new String[]{"big", "bti"}, + new String[]{ + // Cassandra 5.0 native sstable versions + "big-oa", + "bti-da", + }, 40); private final int number; private final String name; private final String jarBaseName; // Must match shadowJar.archiveFileName from Gradle configuration (without extension) private final Set sstableFormats; + private final List nativeSStableVersions; + // Lowest Cassandra version number whose SSTables this version can read (inclusive). + private final int lowestCompatibleVersionNumber; - CassandraVersion(int number, String name, String jarBaseName, String... sstableFormats) + + CassandraVersion(int number, String name, String jarBaseName, String[] sstableFormats, String[] nativeSStableVersions, + int lowestCompatibleVersionNumber) { this.number = number; this.name = name; this.jarBaseName = jarBaseName; this.sstableFormats = new HashSet<>(Arrays.asList(sstableFormats)); + this.nativeSStableVersions = List.of(nativeSStableVersions); + this.lowestCompatibleVersionNumber = lowestCompatibleVersionNumber; } public int versionNumber() @@ -78,13 +109,69 @@ public String jarBaseName() return jarBaseName; } - private static final String sstableFormat; + /** + * Get the set of SSTable formats supported by this Cassandra version. + * + * @return Set of supported SSTable format strings + */ + public Set sstableFormats() + { + return sstableFormats; + } + + /** + * Get the list of native SSTable version strings for this Cassandra version. + * + * @return List of native SSTable version strings + */ + public List getNativeSStableVersions() + { + return nativeSStableVersions; + } + + /** + * Get the set of SSTable version strings that this Cassandra version can read. + * This includes the native versions of every Cassandra version from {@link #lowestCompatibleVersionNumber} + * (inclusive) up to this version (inclusive). For example, Cassandra 5.0 can read 4.0, 4.1 and 5.0 + * SSTables (lowestCompatible=40) but NOT 3.x. + * + * @return Set of full SSTable version strings that can be read + */ + public Set getSupportedSStableVersionsForRead() + { + Set readableVersions = new HashSet<>(); + for (CassandraVersion version : CassandraVersion.values()) + { + if (version.number >= lowestCompatibleVersionNumber && version.number <= this.number) + { + readableVersions.addAll(version.nativeSStableVersions); + } + } + + return Collections.unmodifiableSet(readableVersions); + } + + /** + * Whether this Cassandra version can read (and therefore import) SSTables natively written by {@code other}. + * A version can read SSTables from every version in the inclusive range + * [{@link #lowestCompatibleVersionNumber}, this version]. For example, Cassandra 5.0 can read 4.0/4.1/5.0 + * SSTables but not 3.x. + * + * @param other the Cassandra version whose SSTables would be read + * @return {@code true} if this version can read {@code other}'s SSTables + */ + public boolean canRead(CassandraVersion other) + { + return other.number >= this.lowestCompatibleVersionNumber && other.number <= this.number; + } + + private static final String configuredSSTableFormat; private static final CassandraVersion[] implementedVersions; private static final String[] supportedVersions; static { - sstableFormat = System.getProperty("cassandra.analytics.bridges.sstable_format", "big"); + configuredSSTableFormat = System.getProperty("cassandra.analytics.bridges.sstable_format", "big"); // NOTE: These default enum names must stay in sync with cassandraVersionEnumMap in build.gradle. // FOURONE is intentionally excluded from local-dev defaults to keep iteration fast; @@ -93,7 +180,7 @@ public String jarBaseName() String.join(",", FOURZERO.name(), FIVEZERO.name())); implementedVersions = Arrays.stream(providedVersionsOrDefault.split(",")) .map(CassandraVersion::valueOf) - .filter(v -> v.sstableFormats.contains(sstableFormat)) + .filter(v -> v.sstableFormats().contains(configuredSSTableFormat)) .toArray(CassandraVersion[]::new); // NOTE: These default versions must stay in sync with cassandraFullVersionMap in build.gradle. @@ -101,7 +188,7 @@ public String jarBaseName() "cassandra-4.0.17,cassandra-5.0.5"); supportedVersions = Arrays.stream(providedSupportedVersionsOrDefault.split(",")) .filter(version -> CassandraVersion.fromVersion(version) - .filter(v -> v.sstableFormats.contains(sstableFormat)) + .filter(v -> v.sstableFormats().contains(configuredSSTableFormat)) .isPresent()) .toArray(String[]::new); @@ -109,9 +196,9 @@ public String jarBaseName() "No versions available"); } - public static String sstableFormat() + public static String configuredSSTableFormat() { - return sstableFormat; + return configuredSSTableFormat; } public static Optional fromVersion(String cassandraVersion) @@ -122,6 +209,37 @@ public static Optional fromVersion(String cassandraVersion) .findAny(); } + /** + * Find the Cassandra version that originally writes SSTables with this version string. + * Returns the native Cassandra version that introduced this SSTable version. + * + * @param sstableVersion full version string including format (e.g., "big-na", "bti-da") + * @return Optional containing the CassandraVersion that natively writes this format, + * or Optional.empty() if: + *
    + *
  • The version string is null
  • + *
  • The version string is unrecognized (not in any enum's nativeSStableVersions)
  • + *
  • The version format is invalid or doesn't match expected pattern
  • + *
+ */ + public static Optional fromSSTableVersion(String sstableVersion) + { + if (sstableVersion == null) + { + return Optional.empty(); + } + + for (CassandraVersion version : CassandraVersion.values()) + { + if (version.nativeSStableVersions.contains(sstableVersion)) + { + return Optional.of(version); + } + } + + return Optional.empty(); + } + public static CassandraVersion[] implementedVersions() { return implementedVersions; diff --git a/cassandra-analytics-common/src/test/java/org/apache/cassandra/bridge/CassandraVersionTest.java b/cassandra-analytics-common/src/test/java/org/apache/cassandra/bridge/CassandraVersionTest.java new file mode 100644 index 000000000..2158cb3fa --- /dev/null +++ b/cassandra-analytics-common/src/test/java/org/apache/cassandra/bridge/CassandraVersionTest.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.bridge; + +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests for CassandraVersion SSTable version methods + */ +public class CassandraVersionTest +{ + @ParameterizedTest + @CsvSource({ + "big-ma, THREEZERO", "big-mb, THREEZERO", "big-mc, THREEZERO", + "big-md, THREEZERO", "big-me, THREEZERO", "big-mf, THREEZERO", + "big-na, FOURZERO", "big-nb, FOURZERO", + "big-oa, FIVEZERO", "bti-da, FIVEZERO" + }) + void testFromSSTableVersionNativeVersions(String ssTableVersion, String expectedVersion) + { + Optional result = CassandraVersion.fromSSTableVersion(ssTableVersion); + assertThat(result).isPresent(); + assertThat(result.get()).isEqualTo(CassandraVersion.valueOf(expectedVersion)); + } + + @Test + void testFromSSTableVersionReturnsEmpty() + { + Optional result = CassandraVersion.fromSSTableVersion("unknown-xx"); + assertThat(result).isEmpty(); + } + + @Test + void testGetSupportedSStableVersionsForReadFourZero() + { + Set supported = CassandraVersion.FOURZERO.getSupportedSStableVersionsForRead(); + + // C* 4.0 can read its own versions + assertThat(supported).contains("big-na", "big-nb"); + + // C* 4.0 can read C* 3.0 versions (previous major version family) + assertThat(supported).contains("big-ma", "big-mb", "big-mc", "big-md", "big-me", "big-mf"); + + // C* 4.0 cannot read C* 5.0 versions + assertThat(supported).doesNotContain("big-oa", "bti-da"); + } + + @Test + void testGetSupportedSStableVersionsForReadFiveZero() + { + Set supported = CassandraVersion.FIVEZERO.getSupportedSStableVersionsForRead(); + + // C* 5.0 can read its own versions + assertThat(supported).contains("big-oa", "bti-da"); + + // C* 5.0 can read C* 4.0 and 4.1 versions (previous major version family) + assertThat(supported).contains("big-na", "big-nb"); + + // C* 5.0 cannot read C* 3.0 versions (not in previous major version family) + assertThat(supported).doesNotContain("big-ma", "big-mb", "big-mc", "big-md", "big-me", "big-mf"); + } + + @Test + void testGetSupportedSStableVersionsForReadThreeZero() + { + Set supported = CassandraVersion.THREEZERO.getSupportedSStableVersionsForRead(); + + // C* 3.0 can read its own versions + assertThat(supported).contains("big-ma", "big-mb", "big-mc", "big-md", "big-me", "big-mf"); + + // C* 3.0 cannot read C* 4.0+ versions + assertThat(supported).doesNotContain("big-na", "big-nb", "big-oa", "bti-da"); + + // C* 3.0's previous major version (2.x) is not defined, so only its own versions are readable + assertThat(supported).hasSize(6); // Only the 6 native 3.0 versions + } + + @Test + void testGetSupportedSStableVersionsForReadFourOne() + { + Set supported = CassandraVersion.FOURONE.getSupportedSStableVersionsForRead(); + + // C* 4.1 has no native SSTable versions of its own, but can read C* 4.0 and C* 3.0 versions + // C* 4.1 can read C* 4.0 versions (same major version family) + assertThat(supported).contains("big-na", "big-nb"); + + // C* 4.1 can read C* 3.0 versions (previous major version family) + assertThat(supported).contains("big-ma", "big-mb", "big-mc", "big-md", "big-me", "big-mf"); + + // C* 4.1 cannot read C* 5.0 versions + assertThat(supported).doesNotContain("big-oa", "bti-da"); + } + + @Test + void testGetNativeSStableVersionsFourZero() + { + List nativeVersions = CassandraVersion.FOURZERO.getNativeSStableVersions(); + assertThat(nativeVersions).containsExactlyInAnyOrder("big-na", "big-nb"); + } + + @Test + void testGetNativeSStableVersionsFiveZero() + { + List nativeVersions = CassandraVersion.FIVEZERO.getNativeSStableVersions(); + assertThat(nativeVersions).containsExactlyInAnyOrder("big-oa", "bti-da"); + } + + @Test + void testGetNativeSStableVersionsFourOneEmpty() + { + // C* 4.1 did not introduce new native SSTable versions + List nativeVersions = CassandraVersion.FOURONE.getNativeSStableVersions(); + assertThat(nativeVersions).isEmpty(); + } + + @Test + void testConfiguredSSTableFormatDefault() + { + // Assuming no system property set, should return "big" + String format = CassandraVersion.configuredSSTableFormat(); + assertThat(format).isNotNull(); + assertThat(format).isIn("big", "bti"); // Could be either depending on env + } + + @Test + void testSStableFormatsFourZero() + { + Set formats = CassandraVersion.FOURZERO.sstableFormats(); + assertThat(formats).containsExactly("big"); + } + + @Test + void testSStableFormatsFiveZero() + { + Set formats = CassandraVersion.FIVEZERO.sstableFormats(); + assertThat(formats).containsExactly("big", "bti"); + } +} diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/bridge/SSTableVersionAnalyzer.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/bridge/SSTableVersionAnalyzer.java new file mode 100644 index 000000000..0a0807a14 --- /dev/null +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/bridge/SSTableVersionAnalyzer.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.bridge; + +import java.util.Comparator; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.cassandra.spark.bulkwriter.BulkSparkConf; + +/** + * Determines which Cassandra bridge to load from the SSTable versions present on a cluster. + */ +public final class SSTableVersionAnalyzer +{ + private SSTableVersionAnalyzer() + { + // Utility class + } + + /** + * Determines the bridge version for bulk write from the SSTable versions present on the cluster. + * Picks the lowest Cassandra version present so the produced SSTables can be imported by every node. + * + * @param sstableVersionsOnCluster set of SSTable versions found on cluster nodes + * @param requestedFormat user's requested format, e.g. "big" or "bti" + * @return the CassandraVersion bridge to load for write + * @throws IllegalStateException if the versions are empty/null or contain an unknown version + * @throws UnsupportedOperationException if the versions are mutually incompatible, or the chosen + * bridge does not support the requested format + */ + public static CassandraVersion determineBridgeVersionForWrite(Set sstableVersionsOnCluster, String requestedFormat) + { + requireSSTableVersionsPresent(sstableVersionsOnCluster); + + // Every observed version must be mutually compatible (readable by the highest version present). + ensureMutuallyCompatibleVersions(sstableVersionsOnCluster, highestCompatibleVersion(sstableVersionsOnCluster)); + + CassandraVersion bridgeVersion = cassandraVersionsFromSSTableVersions(sstableVersionsOnCluster) + .min(Comparator.comparingInt(CassandraVersion::versionNumber)) + .orElseThrow(() -> new IllegalStateException("Unable to determine the lowest SSTable version")); + + if (!bridgeVersion.sstableFormats().contains(requestedFormat)) + { + throw new UnsupportedOperationException(String.format( + "Cluster does not support requested SSTable format '%s'. Bridge version determined is %s, " + + "which only supports formats: %s", + requestedFormat, bridgeVersion.versionName(), bridgeVersion.sstableFormats())); + } + return bridgeVersion; + } + + /** + * Determines the bridge version for bulk read from the SSTable versions present on the cluster. + * Picks the highest Cassandra version present, which can read all (older) versions present. + * + * @param sstableVersionsOnCluster set of SSTable versions found on cluster nodes + * @return the CassandraVersion bridge to load for read + * @throws IllegalStateException if the versions are empty/null or contain an unknown version + * @throws UnsupportedOperationException if the versions are mutually incompatible + */ + public static CassandraVersion determineBridgeVersionForRead(Set sstableVersionsOnCluster) + { + requireSSTableVersionsPresent(sstableVersionsOnCluster); + + CassandraVersion highest = highestCompatibleVersion(sstableVersionsOnCluster); + ensureMutuallyCompatibleVersions(sstableVersionsOnCluster, highest); + return highest; + } + + /** + * Fails fast with an actionable hint when no SSTable versions could be retrieved while SSTable version-based + * bridge selection is enabled. Shared by the read and write paths. + * + * @param sstableVersionsOnCluster the observed SSTable versions + * @throws IllegalStateException if {@code sstableVersionsOnCluster} is null or empty + */ + private static void requireSSTableVersionsPresent(Set sstableVersionsOnCluster) + { + if (sstableVersionsOnCluster == null || sstableVersionsOnCluster.isEmpty()) + { + throw new IllegalStateException(String.format( + "Unable to retrieve SSTable versions from cluster. This is required for SSTable version-based " + + "bridge selection. To bypass this check and use cassandra.version for bridge selection, set %s=true", + BulkSparkConf.DISABLE_SSTABLE_VERSION_BASED_BRIDGE)); + } + } + + /** + * Determines the bridge version for a coordinated bulk write across multiple clusters, given the per-cluster + * bridge versions. Writes at the lowest version present so the produced SSTables are importable by + * every cluster, but first verifies that lowest version's SSTables can actually be read (imported) by the + * highest version present; otherwise the clusters span incompatible Cassandra majors and the + * coordinated write cannot proceed. + * + * @param bridgeVersions the per-cluster bridge versions + * @return the lowest version, which is the version to write at + * @throws IllegalStateException if {@code bridgeVersions} is null or empty + * @throws UnsupportedOperationException if the lowest version's SSTables cannot be imported by the highest + * version present + */ + public static CassandraVersion lowestCompatibleWriteVersionForCoordinatedWrites(Collection bridgeVersions) + { + if (bridgeVersions == null || bridgeVersions.isEmpty()) + { + throw new IllegalStateException("No cluster bridge versions available"); + } + + CassandraVersion highest = bridgeVersions.stream() + .max(Comparator.comparingInt(CassandraVersion::versionNumber)) + .orElseThrow(() -> new IllegalStateException("No cluster bridge versions available")); + CassandraVersion lowest = bridgeVersions.stream() + .min(Comparator.comparingInt(CassandraVersion::versionNumber)) + .orElseThrow(() -> new IllegalStateException("No cluster bridge versions available")); + + // Writing at the lowest version is only safe if the highest version present can import those SSTables. + if (!highest.canRead(lowest)) + { + throw new UnsupportedOperationException(String.format( + "Cluster bridge versions are not mutually compatible for a coordinated write: SSTables written at the " + + "lowest version present (%s) cannot be imported by the highest version present (%s, which reads %s). " + + "Per-cluster bridge versions: %s", + lowest.versionName(), highest.versionName(), highest.getSupportedSStableVersionsForRead(), bridgeVersions)); + } + + return lowest; + } + + /** + * Verifies that every observed SSTable version is readable by the given highest version present; otherwise + * the cluster spans incompatible Cassandra majors and the job cannot proceed. + * + * @param sstableVersionsOnCluster the observed SSTable versions + * @param highest the highest Cassandra version present (see {@link #highestCompatibleVersion(Set)}) + * @throws UnsupportedOperationException if any observed version cannot be read by {@code highest} + */ + private static void ensureMutuallyCompatibleVersions(Set sstableVersionsOnCluster, CassandraVersion highest) + { + Set readable = highest.getSupportedSStableVersionsForRead(); + List incompatible = sstableVersionsOnCluster.stream() + .filter(version -> !readable.contains(version)) + .collect(Collectors.toList()); + if (!incompatible.isEmpty()) + { + throw new UnsupportedOperationException(String.format( + "SSTable versions on the cluster are not mutually compatible: %s cannot be read by the highest " + + "version present (%s, which reads %s). Observed SSTable versions: %s", + incompatible, highest.versionName(), readable, sstableVersionsOnCluster)); + } + } + + /** + * Returns the highest Cassandra version among the observed SSTable versions. + * + * @throws IllegalStateException if the versions are empty/null or contain an unknown version + */ + private static CassandraVersion highestCompatibleVersion(Set sstableVersionsOnCluster) + { + return cassandraVersionsFromSSTableVersions(sstableVersionsOnCluster) + .max(Comparator.comparingInt(CassandraVersion::versionNumber)) + .orElseThrow(() -> new IllegalStateException("Unable to determine the highest SSTable version")); + } + + private static Stream cassandraVersionsFromSSTableVersions(Set sstableVersionsOnCluster) + { + if (sstableVersionsOnCluster == null || sstableVersionsOnCluster.isEmpty()) + { + throw new IllegalStateException("Unable to determine Cassandra versions: no SSTable versions found on the cluster"); + } + + return sstableVersionsOnCluster.stream().map(SSTableVersionAnalyzer::toCassandraVersion); + } + + private static CassandraVersion toCassandraVersion(String sstableVersion) + { + return CassandraVersion.fromSSTableVersion(sstableVersion) + .orElseThrow(() -> new IllegalStateException("Unknown SSTable version: " + sstableVersion)); + } +} diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/KryoRegister.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/KryoRegister.java index f53112169..a0cdff760 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/KryoRegister.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/KryoRegister.java @@ -22,7 +22,10 @@ import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -31,7 +34,6 @@ import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.Serializer; -import org.apache.cassandra.bridge.BaseCassandraBridgeFactory; import org.apache.cassandra.bridge.BigNumberConfigImpl; import org.apache.cassandra.bridge.CassandraBridgeFactory; import org.apache.cassandra.bridge.CassandraVersion; @@ -47,8 +49,6 @@ import org.apache.spark.serializer.KryoRegistrator; import org.jetbrains.annotations.NotNull; -import static org.apache.cassandra.spark.bulkwriter.BulkSparkConf.CASSANDRA_VERSION; - /** * Helper class to register classes for Kryo serialization */ @@ -104,26 +104,33 @@ public static void setup(@NotNull SparkConf configuration) LOGGER.info("Setting up Kryo"); configuration.set(SPARK_SERIALIZER, "org.apache.spark.serializer.KryoSerializer"); - // Add KryoRegister to SparkConf serialization if not already there + // Preserve any pre-existing (e.g. user-supplied) registrators and keep them first. + // LinkedHashSet gives a stable, predictable registration order on the driver; the same + // resulting spark.kryo.registrator string is then propagated to all executors via SparkConf. Set registratorsSet = Arrays.stream(configuration.get(SPARK_REGISTRATORS, "").split(",")) .filter(string -> string != null && !string.isEmpty()) - .collect(Collectors.toSet()); - - // TODO: Find a better way to initialize Kryo serializer, instead of relaying - // on Cassandra version specified as parameter of Spark job. Can we get Cassandra version from Sidecar? - CassandraVersion cassandraVersion = BaseCassandraBridgeFactory.getCassandraVersion(configuration.get(CASSANDRA_VERSION, "4.0.0")); - Class registratorClass = KRYO_REGISTRATORS.get(cassandraVersion); - if (registratorClass == null) + .collect(Collectors.toCollection(LinkedHashSet::new)); + + // SSTable based bridge selection feature selects the bridge version, which may differ + // from cassandra.version; registering every loadable bridge's registrator ensures Spark + // can serialize objects for whichever bridge is chosen. Only implemented (bundled) versions + // are used, so we never attempt to load a bridge JAR that is not available. + List> registratorClasses = Arrays.stream(CassandraVersion.implementedVersions()) + .map(KRYO_REGISTRATORS::get) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + if (registratorClasses.isEmpty()) { - throw new IllegalArgumentException("Kryo registrator not configured for Cassandra version: " + cassandraVersion); + throw new IllegalStateException("No Kryo registrators configured for implemented Cassandra versions: " + + Arrays.toString(CassandraVersion.implementedVersions())); } - registratorsSet.add(registratorClass.getName()); + registratorClasses.forEach(registratorClass -> registratorsSet.add(registratorClass.getName())); String registratorsString = String.join(",", registratorsSet); LOGGER.info("Setting kryo registrators: " + registratorsString); configuration.set(SPARK_REGISTRATORS, registratorsString); - configuration.registerKryoClasses(new Class[]{registratorClass}); + configuration.registerKryoClasses(registratorClasses.toArray(new Class[0])); } public static class V40 extends KryoRegister diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/AbstractBulkWriterContext.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/AbstractBulkWriterContext.java index 447285ff3..7f051e77a 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/AbstractBulkWriterContext.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/AbstractBulkWriterContext.java @@ -76,7 +76,7 @@ public abstract class AbstractBulkWriterContext implements BulkWriterContext, Kr private final JobInfo jobInfo; private final ClusterInfo clusterInfo; private final SchemaInfo schemaInfo; - private final String lowestCassandraVersion; + private final String bridgeVersion; // Note: do not declare transient fields as final; but they need to be volatile as there could be contention when recreating them after deserialization // For the transient field, they are assigned null once deserialized, remember to use getOrRebuildAfterDeserialization for their getters private transient volatile CassandraBridge bridge; @@ -101,7 +101,7 @@ protected AbstractBulkWriterContext(@NotNull BulkSparkConf conf, // Build everything fresh on driver this.clusterInfo = buildClusterInfo(); this.clusterInfo.startupValidate(); - this.lowestCassandraVersion = findLowestCassandraVersion(); + this.bridgeVersion = findBridgeVersion(); this.bridge = buildCassandraBridge(); this.jobInfo = buildJobInfo(); this.schemaInfo = buildSchemaInfo(structType); @@ -123,7 +123,7 @@ protected AbstractBulkWriterContext(@NotNull BulkWriterConfig config) // Reconstruct from broadcast data on executor this.clusterInfo = reconstructClusterInfoOnExecutor(config.getBroadcastableClusterInfo()); - this.lowestCassandraVersion = config.getLowestCassandraVersion(); + this.bridgeVersion = config.getBridgeVersion(); this.bridge = buildCassandraBridge(); this.jobInfo = reconstructJobInfoOnExecutor(config.getBroadcastableJobInfo()); this.schemaInfo = reconstructSchemaInfoOnExecutor(config.getBroadcastableSchemaInfo()); @@ -141,9 +141,9 @@ protected final int sparkDefaultParallelism() return sparkDefaultParallelism; } - protected String lowestCassandraVersion() + protected String bridgeVersion() { - return lowestCassandraVersion; + return bridgeVersion; } /*--- Methods to build required fields ---*/ @@ -216,7 +216,7 @@ protected JobInfo buildJobInfo() protected CassandraBridge buildCassandraBridge() { - return CassandraBridgeFactory.get(lowestCassandraVersion()); + return CassandraBridgeFactory.get(bridgeVersion()); } protected TransportContext buildTransportContext(boolean isOnDriver) @@ -229,9 +229,9 @@ protected JobStatsPublisher buildJobStatsPublisher() return new LogStatsPublisher(); } - protected String findLowestCassandraVersion() + protected String findBridgeVersion() { - return cluster().getLowestCassandraVersion(); + return cluster().getBridgeVersion(); } protected SchemaInfo buildSchemaInfo(StructType structType) @@ -248,7 +248,7 @@ protected SchemaInfo buildSchemaInfo(StructType structType) CqlTable cqlTable = bridge().buildSchema(createTableSchema, keyspace, replicationFactor, partitioner, udts, null, indexCount, false); TableInfoProvider tableInfoProvider = new CqlTableInfoProvider(createTableSchema, cqlTable); - TableSchema tableSchema = initializeTableSchema(bulkSparkConf(), structType, tableInfoProvider, lowestCassandraVersion()); + TableSchema tableSchema = initializeTableSchema(bulkSparkConf(), structType, tableInfoProvider, bridgeVersion()); return new CassandraSchemaInfo(tableSchema, udts); } @@ -313,14 +313,14 @@ public void shutdown() protected TableSchema initializeTableSchema(@NotNull BulkSparkConf conf, @NotNull StructType dfSchema, TableInfoProvider tableInfoProvider, - String lowestCassandraVersion) + String bridgeVersion) { return new TableSchema(dfSchema, tableInfoProvider, conf.writeMode, conf.getTTLOptions(), conf.getTimestampOptions(), - lowestCassandraVersion, + bridgeVersion, job().qualifiedTableName().quoteIdentifiers(), conf.skipSecondaryIndexCheck); } diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfo.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfo.java index de06151ef..ec36fcb99 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfo.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfo.java @@ -45,7 +45,7 @@ public final class BroadcastableClusterInfo implements IBroadcastableClusterInfo // Essential fields broadcast to executors private final Partitioner partitioner; - private final String cassandraVersion; + private final String bridgeVersion; private final String clusterId; private final BulkSparkConf conf; @@ -58,16 +58,16 @@ public final class BroadcastableClusterInfo implements IBroadcastableClusterInfo */ public static BroadcastableClusterInfo from(@NotNull ClusterInfo source, @NotNull BulkSparkConf conf) { - return new BroadcastableClusterInfo(source.getPartitioner(), source.getLowestCassandraVersion(), source.clusterId(), conf); + return new BroadcastableClusterInfo(source.getPartitioner(), source.getBridgeVersion(), source.clusterId(), conf); } private BroadcastableClusterInfo(@NotNull Partitioner partitioner, - @NotNull String cassandraVersion, + @NotNull String bridgeVersion, @Nullable String clusterId, @NotNull BulkSparkConf conf) { this.partitioner = partitioner; - this.cassandraVersion = cassandraVersion; + this.bridgeVersion = bridgeVersion; this.clusterId = clusterId; this.conf = conf; } @@ -78,9 +78,9 @@ public BulkSparkConf getConf() } @Override - public String getLowestCassandraVersion() + public String getBridgeVersion() { - return cassandraVersion; + return bridgeVersion; } @Override diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfoGroup.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfoGroup.java index d5ad1e13d..5084b72b6 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfoGroup.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableClusterInfoGroup.java @@ -34,7 +34,7 @@ * Broadcastable wrapper for coordinated writes with ZERO transient fields to optimize Spark broadcasting. *

* This class wraps multiple BroadcastableCluster instances for multi-cluster scenarios. - * Pre-computed values (partitioner, lowestCassandraVersion) are extracted from CassandraClusterInfoGroup on the driver + * Pre-computed values (partitioner, bridgeVersion) are extracted from CassandraClusterInfoGroup on the driver * to avoid duplicating aggregation/validation logic on executors. *

* Why ZERO transient fields matters:
@@ -56,11 +56,11 @@ public final class BroadcastableClusterInfoGroup implements IBroadcastableCluste private final String clusterId; private final BulkSparkConf conf; private final Partitioner partitioner; - private final String lowestCassandraVersion; + private final String bridgeVersion; /** * Creates a BroadcastableClusterInfoGroup from a source ClusterInfo group. - * Extracts pre-computed values (partitioner, lowestCassandraVersion) from the source + * Extracts pre-computed values (partitioner, bridgeVersion) from the source * to avoid duplicating aggregation/validation logic on executors. * * @param source the source CassandraClusterInfoGroup @@ -75,22 +75,22 @@ public static BroadcastableClusterInfoGroup from(@NotNull CassandraClusterInfoGr // Extract pre-computed values from CassandraClusterInfoGroup // These have already been validated/computed on the driver Partitioner partitioner = source.getPartitioner(); - String lowestVersion = source.getLowestCassandraVersion(); + String bridgeVersion = source.getBridgeVersion(); - return new BroadcastableClusterInfoGroup(broadcastableInfos, source.clusterId(), conf, partitioner, lowestVersion); + return new BroadcastableClusterInfoGroup(broadcastableInfos, source.clusterId(), conf, partitioner, bridgeVersion); } private BroadcastableClusterInfoGroup(List clusterInfos, String clusterId, BulkSparkConf conf, Partitioner partitioner, - String lowestCassandraVersion) + String bridgeVersion) { this.clusterInfos = Collections.unmodifiableList(clusterInfos); this.conf = conf; this.clusterId = clusterId; this.partitioner = partitioner; - this.lowestCassandraVersion = lowestCassandraVersion; + this.bridgeVersion = bridgeVersion; } @Override @@ -101,11 +101,11 @@ public BulkSparkConf getConf() } @Override - public String getLowestCassandraVersion() + public String getBridgeVersion() { // Return pre-computed value from CassandraClusterInfoGroup // No need to duplicate aggregation/validation logic - return lowestCassandraVersion; + return bridgeVersion; } @Override diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableTableSchema.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableTableSchema.java index 322e815dd..69aebf9b2 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableTableSchema.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BroadcastableTableSchema.java @@ -58,7 +58,7 @@ public final class BroadcastableTableSchema implements Serializable private final WriteMode writeMode; private final TTLOption ttlOption; private final TimestampOption timestampOption; - private final String lowestCassandraVersion; + private final String bridgeVersion; private final boolean quoteIdentifiers; /** @@ -80,7 +80,7 @@ public static BroadcastableTableSchema from(@NotNull TableSchema source) source.writeMode, source.ttlOption, source.timestampOption, - source.lowestCassandraVersion, + source.bridgeVersion, source.quoteIdentifiers ); } @@ -94,7 +94,7 @@ private BroadcastableTableSchema(String createStatement, WriteMode writeMode, TTLOption ttlOption, TimestampOption timestampOption, - String lowestCassandraVersion, + String bridgeVersion, boolean quoteIdentifiers) { this.createStatement = createStatement; @@ -106,7 +106,7 @@ private BroadcastableTableSchema(String createStatement, this.writeMode = writeMode; this.ttlOption = ttlOption; this.timestampOption = timestampOption; - this.lowestCassandraVersion = lowestCassandraVersion; + this.bridgeVersion = bridgeVersion; this.quoteIdentifiers = quoteIdentifiers; } @@ -155,9 +155,9 @@ public TimestampOption getTimestampOption() return timestampOption; } - public String getLowestCassandraVersion() + public String getBridgeVersion() { - return lowestCassandraVersion; + return bridgeVersion; } public boolean isQuoteIdentifiers() diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConf.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConf.java index a01c2d1e3..d6c41ab7d 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConf.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConf.java @@ -117,6 +117,9 @@ public class BulkSparkConf implements Serializable // Cassandra version of target cluster. Configuration parameter is exposed to be able to correctly initialize static // components, before cluster version is discovered via Sidecar. public static final String CASSANDRA_VERSION = SETTING_PREFIX + "cassandra.version"; + // Disable SSTable version-based bridge determination. When true, falls back to using cassandra.version for bridge selection. + // This provides a safety fallback mechanism if SSTable version detection fails or encounters issues. + public static final String DISABLE_SSTABLE_VERSION_BASED_BRIDGE = SETTING_PREFIX + "bridge.disable_sstable_version_based"; public static final String HTTP_MAX_CONNECTIONS = SETTING_PREFIX + "request.max_connections"; public static final String HTTP_RESPONSE_TIMEOUT = SETTING_PREFIX + "request.response_timeout"; public static final String HTTP_CONNECTION_TIMEOUT = SETTING_PREFIX + "request.connection_timeout"; @@ -169,6 +172,7 @@ public class BulkSparkConf implements Serializable protected final String configuredJobId; protected boolean useOpenSsl; protected int ringRetryCount; + protected final boolean disableSSTableVersionBasedBridge; // create sidecarInstances from sidecarContactPointsValue and effectiveSidecarPort private final String sidecarContactPointsValue; // It takes comma separated values private transient Set sidecarContactPoints; // not serialized @@ -221,6 +225,7 @@ public BulkSparkConf(SparkConf conf, Map options, @Nullable Logg // else fall back to props, and then default if neither specified this.useOpenSsl = getBoolean(USE_OPENSSL, true); this.ringRetryCount = getInt(RING_RETRY_COUNT, DEFAULT_RING_RETRY_COUNT); + this.disableSSTableVersionBasedBridge = getBoolean(DISABLE_SSTABLE_VERSION_BASED_BRIDGE, false); this.importCoordinatorTimeoutMultiplier = getDouble(IMPORT_COORDINATOR_TIMEOUT_MULTIPLIER, 0.5); this.ttl = MapUtils.getOrDefault(options, WriterOptions.TTL.name(), null); this.timestamp = MapUtils.getOrDefault(options, WriterOptions.TIMESTAMP.name(), null); @@ -732,6 +737,11 @@ public int getRingRetryCount() return ringRetryCount; } + public boolean isSSTableVersionBasedBridgeDisabled() + { + return disableSSTableVersionBasedBridge; + } + public StorageClientConfig getStorageClientConfig() { return storageClientConfig; diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfig.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfig.java index 0e126fc95..621acaf48 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfig.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfig.java @@ -21,6 +21,7 @@ import java.io.Serializable; +import org.apache.cassandra.bridge.CassandraVersion; import org.apache.cassandra.spark.bulkwriter.cloudstorage.coordinated.CassandraCoordinatedBulkWriterContext; import org.jetbrains.annotations.NotNull; @@ -56,31 +57,31 @@ public class BulkWriterConfig implements Serializable // BroadcastableClusterInfo can be either BroadcastableCluster or BroadcastableClusterInfoGroup private final IBroadcastableClusterInfo clusterInfo; private final BroadcastableSchemaInfo schemaInfo; - private final String lowestCassandraVersion; + private final String bridgeVersion; /** * Creates a new immutable BulkWriterConfig with pre-computed values * - * @param conf Bulk writer Spark configuration - * @param sparkDefaultParallelism Spark default parallelism setting - * @param jobInfo Broadcastable job information - * @param clusterInfo Broadcastable cluster information (BroadcastableCluster or BroadcastableClusterInfoGroup) - * @param schemaInfo Broadcastable schema information - * @param lowestCassandraVersion Lowest Cassandra version in the cluster + * @param conf Bulk writer Spark configuration + * @param sparkDefaultParallelism Spark default parallelism setting + * @param jobInfo Broadcastable job information + * @param clusterInfo Broadcastable cluster information (BroadcastableCluster or BroadcastableClusterInfoGroup) + * @param schemaInfo Broadcastable schema information + * @param bridgeVersion Cassandra bridge version to use */ public BulkWriterConfig(@NotNull BulkSparkConf conf, int sparkDefaultParallelism, @NotNull BroadcastableJobInfo jobInfo, @NotNull IBroadcastableClusterInfo clusterInfo, @NotNull BroadcastableSchemaInfo schemaInfo, - @NotNull String lowestCassandraVersion) + @NotNull String bridgeVersion) { this.conf = conf; this.sparkDefaultParallelism = sparkDefaultParallelism; this.jobInfo = jobInfo; this.clusterInfo = clusterInfo; this.schemaInfo = schemaInfo; - this.lowestCassandraVersion = lowestCassandraVersion; + this.bridgeVersion = bridgeVersion; } public BulkSparkConf getConf() @@ -108,9 +109,9 @@ public BroadcastableSchemaInfo getBroadcastableSchemaInfo() return schemaInfo; } - public String getLowestCassandraVersion() + public String getBridgeVersion() { - return lowestCassandraVersion; + return bridgeVersion; } /** diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraBulkWriterContext.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraBulkWriterContext.java index e658b3dc7..92721c5a4 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraBulkWriterContext.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraBulkWriterContext.java @@ -95,6 +95,6 @@ public BulkWriterConfig toBulkWriterConfigForBroadcasting(JavaSparkContext spark broadcastableJobInfo, broadcastableClusterInfo, broadcastableSchemaInfo, - lowestCassandraVersion()); + bridgeVersion()); } } diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraClusterInfo.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraClusterInfo.java index a9c120871..7ef25b099 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraClusterInfo.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraClusterInfo.java @@ -47,7 +47,9 @@ import o.a.c.sidecar.client.shaded.common.response.TokenRangeReplicasResponse; import org.apache.cassandra.bridge.CassandraBridge; import org.apache.cassandra.bridge.CassandraBridgeFactory; +import org.apache.cassandra.bridge.CassandraVersion; import org.apache.cassandra.bridge.CassandraVersionFeatures; +import org.apache.cassandra.bridge.SSTableVersionAnalyzer; import org.apache.cassandra.clients.Sidecar; import o.a.c.sidecar.client.shaded.client.SidecarInstance; import o.a.c.sidecar.client.shaded.client.SidecarInstanceImpl; @@ -84,7 +86,7 @@ public class CassandraClusterInfo implements ClusterInfo, Closeable // Changes here must be reflected in BroadcastableClusterInfo. protected final BulkSparkConf conf; protected final String clusterId; - protected String cassandraVersion; + protected String bridgeVersion; protected Partitioner partitioner; // -- Driver-only fields (not broadcast) -- @@ -117,7 +119,7 @@ public CassandraClusterInfo(BulkSparkConf conf, String clusterId) /** * Reconstruct from BroadcastableCluster on executor. - * Reuses cassandraVersion and partitioner from broadcast, + * Reuses bridgeVersion and partitioner from broadcast, * fetches other data (tokenRangeMapping, replicationFactor, keyspaceSchema, writeAvailability) fresh from Sidecar. * * @param broadcastable the broadcastable cluster info from broadcast @@ -126,12 +128,12 @@ public CassandraClusterInfo(BroadcastableClusterInfo broadcastable) { this.conf = broadcastable.getConf(); this.clusterId = broadcastable.clusterId(); - this.cassandraVersion = broadcastable.getLowestCassandraVersion(); this.partitioner = broadcastable.getPartitioner(); + this.bridgeVersion = broadcastable.getBridgeVersion(); this.cassandraContext = buildCassandraContext(); LOGGER.info("Reconstructing CassandraClusterInfo on executor from BroadcastableCluster. clusterId={}", clusterId); this.nodeSettings = new AtomicReference<>(null); - // Executors do not need to query all node settings since cassandraVersion is already set from broadcast + // Executors do not need to query all node settings since the bridge version is already set from broadcast this.allNodeSettingFutures = null; } @@ -390,32 +392,67 @@ public TokenRangeMapping getTokenRangeMapping(boolean cached) } } + /** + * Returns the Cassandra bridge version for this cluster, computed lazily on first use and cached. + * Mirrors {@link #getVersionFromSidecar()}'s lazy pattern: it works on the driver (where the value is + * computed from the cluster) and on executors (where the cached value is seeded from the broadcast + * {@link BroadcastableClusterInfo}, so no re-query is needed). The determination itself is in + * {@link #determineBridgeVersion()}. + * + * @return the determined Cassandra bridge version string (e.g. "5.0.0") + */ @Override - public String getLowestCassandraVersion() + public String getBridgeVersion() { - String currentCassandraVersion = cassandraVersion; - if (currentCassandraVersion != null) + String currentBridgeVersion = bridgeVersion; + if (currentBridgeVersion != null) { - return currentCassandraVersion; + return currentBridgeVersion; } synchronized (this) { - if (cassandraVersion == null) + if (bridgeVersion == null) { - String versionFromFeature = getVersionFromFeature(); - if (versionFromFeature != null) - { - // Forcing writer to use a particular version - cassandraVersion = versionFromFeature; - } - else - { - cassandraVersion = getVersionFromSidecar(); - } + bridgeVersion = determineBridgeVersion(); } + return bridgeVersion; } - return cassandraVersion; + } + + /** + * Determines the Cassandra bridge version for this cluster, in priority order: + *

    + *
  1. an explicit version override from {@link #getVersionFromFeature()} (an operator escape hatch) — + * takes precedence even when SSTable-version-based selection is enabled;
  2. + *
  3. otherwise, when SSTable version-based selection is enabled, the lowest version derived from the + * SSTable versions present, so the produced SSTables remain importable by every node;
  4. + *
  5. otherwise (feature disabled), the lowest Cassandra release version reported by the cluster.
  6. + *
+ * + * @return a version string for the determined bridge (e.g. "5.0.0") + */ + private String determineBridgeVersion() + { + String versionOverride = getVersionFromFeature(); + if (versionOverride != null) + { + // Forcing writer to use a particular version; validate it is a supported version up front + CassandraVersion.fromVersion(versionOverride) + .orElseThrow(() -> new UnsupportedOperationException( + "Unsupported Cassandra version override: " + versionOverride)); + return versionOverride; + } + + if (!conf.isSSTableVersionBasedBridgeDisabled()) + { + CassandraVersion determined = SSTableVersionAnalyzer.determineBridgeVersionForWrite(getSSTableVersionsOnCluster(), + CassandraVersion.configuredSSTableFormat()); + // Return a full major.minor.patch string so it parses via CassandraVersionFeatures downstream + return determined.versionName() + ".0"; + } + + return getVersionFromSidecar(); } @Override @@ -515,9 +552,27 @@ public String getLowestVersion(List allNodeSettings) return ns.releaseVersion(); } + /** + * Retrieves SSTable versions using the existing cassandraContext. + * Reuses the existing CassandraContext instead of creating a separate one. + * + * @return set of SSTable version strings present on the cluster + */ + public Set getSSTableVersionsOnCluster() + { + CassandraContext context = getCassandraContext(); + + return Sidecar.getSSTableVersionsFromCluster( + context.getSidecarClient(), + context.getCluster(), + conf.getSidecarRequestMaxRetryDelayMillis(), + conf.getSidecarRequestRetries() + ); + } + protected CassandraBridge bridge() { - return CassandraBridgeFactory.get(getLowestCassandraVersion()); + return CassandraBridgeFactory.get(getBridgeVersion()); } // Startup Validation diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraDirectDataTransportContext.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraDirectDataTransportContext.java index bed36ac4b..68eb19b7e 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraDirectDataTransportContext.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/CassandraDirectDataTransportContext.java @@ -71,7 +71,7 @@ public DirectDataTransferApi dataTransferApi() // only invoke in constructor protected DirectDataTransferApi createDirectDataTransferApi() { - CassandraBridge bridge = CassandraBridgeFactory.get(clusterInfo.getLowestCassandraVersion()); + CassandraBridge bridge = CassandraBridgeFactory.get(clusterInfo.getBridgeVersion()); return new SidecarDataTransferApi(clusterInfo.getCassandraContext(), bridge, jobInfo); } } diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/ClusterInfo.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/ClusterInfo.java index 64b654449..93e39bd2c 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/ClusterInfo.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/ClusterInfo.java @@ -51,7 +51,7 @@ public interface ClusterInfo extends StartupValidatable TokenRangeMapping getTokenRangeMapping(boolean cached); - String getLowestCassandraVersion(); + String getBridgeVersion(); /** * @return WriteAvailability per RingInstance in the cluster diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/IBroadcastableClusterInfo.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/IBroadcastableClusterInfo.java index fb4eb2e29..8a311a151 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/IBroadcastableClusterInfo.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/IBroadcastableClusterInfo.java @@ -37,7 +37,7 @@ * Methods in this interface: *
    *
  • {@link #getPartitioner()} - static cluster partitioner configuration
  • - *
  • {@link #getLowestCassandraVersion()} - pre-computed version string
  • + *
  • {@link #getBridgeVersion()} - pre-computed version string
  • *
  • {@link #clusterId()} - cluster identifier (optional)
  • *
  • {@link #getConf()} - BulkSparkConf needed for reconstruction on executors
  • *
  • {@link #reconstruct()} - reconstructs full ClusterInfo instance on executors
  • @@ -51,9 +51,9 @@ public interface IBroadcastableClusterInfo extends Serializable Partitioner getPartitioner(); /** - * @return the lowest Cassandra version in the cluster + * @return the pre-computed Cassandra bridge version determined on the driver */ - String getLowestCassandraVersion(); + String getBridgeVersion(); /** * ID string that can uniquely identify a cluster. diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SSTableWriterFactory.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SSTableWriterFactory.java index 77b8f5f42..7aee4f602 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SSTableWriterFactory.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SSTableWriterFactory.java @@ -33,7 +33,7 @@ private SSTableWriterFactory() throw new IllegalStateException(getClass() + " is static utility class and shall not be instantiated"); } - public static SSTableWriter getSSTableWriter(CassandraVersionFeatures serverVersion, + public static SSTableWriter getSSTableWriter(CassandraVersionFeatures bridgeVersion, String inDirectory, String partitioner, String createStatement, @@ -41,7 +41,7 @@ public static SSTableWriter getSSTableWriter(CassandraVersionFeatures serverVers Set userDefinedTypeStatements, int bufferSizeMB) { - CassandraBridge cassandraBridge = CassandraBridgeFactory.get(serverVersion); + CassandraBridge cassandraBridge = CassandraBridgeFactory.get(bridgeVersion); return cassandraBridge.getSSTableWriter(inDirectory, partitioner, createStatement, diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriter.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriter.java index 877aafa15..804051781 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriter.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriter.java @@ -120,8 +120,8 @@ public SortedSSTableWriter(BulkWriterContext writerContext, Path outDir, DigestA this.digestAlgorithm = digestAlgorithm; this.partitionId = partitionId; - String lowestCassandraVersion = writerContext.cluster().getLowestCassandraVersion(); - String packageVersion = getPackageVersion(lowestCassandraVersion); + String bridgeVersion = writerContext.cluster().getBridgeVersion(); + String packageVersion = getPackageVersion(bridgeVersion); LOGGER.info("Running with version {}", packageVersion); SchemaInfo schema = writerContext.schema(); @@ -137,9 +137,9 @@ public SortedSSTableWriter(BulkWriterContext writerContext, Path outDir, DigestA } @NotNull - public String getPackageVersion(String lowestCassandraVersion) + public String getPackageVersion(String bridgeVersion) { - return CASSANDRA_VERSION_PREFIX + lowestCassandraVersion; + return CASSANDRA_VERSION_PREFIX + bridgeVersion; } /** @@ -363,7 +363,7 @@ public void validateSSTables(@NotNull BulkWriterContext writerContext, @NotNull private LocalDataLayer buildLocalDataLayer(@NotNull BulkWriterContext writerContext, @NotNull Path outputDirectory, @Nullable Set dataFilePaths) { - CassandraVersion version = CassandraBridgeFactory.getCassandraVersion(writerContext.cluster().getLowestCassandraVersion()); + CassandraVersion version = CassandraBridgeFactory.getCassandraVersion(writerContext.cluster().getBridgeVersion()); String keyspace = writerContext.job().qualifiedTableName().keyspace(); String schema = writerContext.schema().getTableSchema().createStatement; Partitioner partitioner = writerContext.cluster().getPartitioner(); diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TableSchema.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TableSchema.java index b198af984..2efbd81cf 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TableSchema.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/TableSchema.java @@ -60,7 +60,7 @@ public class TableSchema final WriteMode writeMode; final TTLOption ttlOption; final TimestampOption timestampOption; - final String lowestCassandraVersion; + final String bridgeVersion; final boolean quoteIdentifiers; public TableSchema(StructType dfSchema, @@ -68,7 +68,7 @@ public TableSchema(StructType dfSchema, WriteMode writeMode, TTLOption ttlOption, TimestampOption timestampOption, - String lowestCassandraVersion, + String bridgeVersion, boolean quoteIdentifiers, boolean skipSecondaryIndexCheck) { @@ -76,7 +76,7 @@ public TableSchema(StructType dfSchema, this.ttlOption = ttlOption; this.timestampOption = timestampOption; - this.lowestCassandraVersion = lowestCassandraVersion; + this.bridgeVersion = bridgeVersion; this.quoteIdentifiers = quoteIdentifiers; validateDataFrameCompatibility(dfSchema, tableInfo); @@ -95,7 +95,7 @@ else if (tableInfo.hasSecondaryIndex()) + "take place automatically after writing. Reads against the index during this time " + "window will produce inconsistent or stale results until index rebuild is complete."); } - validateUserAddedColumns(lowestCassandraVersion, quoteIdentifiers, ttlOption, timestampOption); + validateUserAddedColumns(bridgeVersion, quoteIdentifiers, ttlOption, timestampOption); this.createStatement = getCreateStatement(tableInfo); this.modificationStatement = getModificationStatement(dfSchema, tableInfo); @@ -123,7 +123,7 @@ public TableSchema(BroadcastableTableSchema broadcastable) this.writeMode = broadcastable.getWriteMode(); this.ttlOption = broadcastable.getTtlOption(); this.timestampOption = broadcastable.getTimestampOption(); - this.lowestCassandraVersion = broadcastable.getLowestCassandraVersion(); + this.bridgeVersion = broadcastable.getBridgeVersion(); this.quoteIdentifiers = broadcastable.isQuoteIdentifiers(); } @@ -198,7 +198,7 @@ private String getInsertStatement(StructType dfSchema, TTLOption ttlOption, TimestampOption timestampOption) { - CassandraBridge bridge = CassandraBridgeFactory.get(lowestCassandraVersion); + CassandraBridge bridge = CassandraBridgeFactory.get(bridgeVersion); List columnNames = Arrays.stream(dfSchema.fieldNames()) .filter(fieldName -> !fieldName.equals(ttlOption.columnName())) @@ -242,7 +242,7 @@ else if (ttlOption.withTTl()) private String getDeleteStatement(StructType dfSchema, TableInfoProvider tableInfo) { - CassandraBridge bridge = CassandraBridgeFactory.get(lowestCassandraVersion); + CassandraBridge bridge = CassandraBridgeFactory.get(bridgeVersion); Stream fieldEqualityStatements = Arrays.stream(dfSchema.fieldNames()).map(key -> maybeQuotedIdentifier(bridge, quoteIdentifiers, key) + "=?"); String deleteStatement = String.format("DELETE FROM %s.%s where %s;", maybeQuotedIdentifier(bridge, quoteIdentifiers, tableInfo.getKeyspaceName()), @@ -327,12 +327,12 @@ private static List getKeyFieldPositions(StructType dfSchema, .collect(Collectors.toList()); } - private static void validateUserAddedColumns(String lowestCassandraVersion, boolean quoteIdentifiers, + private static void validateUserAddedColumns(String bridgeVersion, boolean quoteIdentifiers, TTLOption ttlOption, TimestampOption timestampOption) { if (!quoteIdentifiers) { - CassandraBridge bridge = CassandraBridgeFactory.get(lowestCassandraVersion); + CassandraBridge bridge = CassandraBridgeFactory.get(bridgeVersion); validateColumnName(bridge, ttlOption.columnName(), WriterOptions.TTL.name()); validateColumnName(bridge, timestampOption.columnName(), WriterOptions.TIMESTAMP.name()); } diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/CloudStorageStreamSession.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/CloudStorageStreamSession.java index d2a401f29..b749a3597 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/CloudStorageStreamSession.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/CloudStorageStreamSession.java @@ -82,7 +82,7 @@ public CloudStorageStreamSession(BulkWriterContext bulkWriterContext, SortedSSTa ExecutorService executorService) { this(bulkWriterContext, sstableWriter, transportContext, sessionID, tokenRange, - CassandraBridgeFactory.get(bulkWriterContext.cluster().getLowestCassandraVersion()), + CassandraBridgeFactory.get(bulkWriterContext.cluster().getBridgeVersion()), failureHandler, executorService); } diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroup.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroup.java index 4a434a7a5..fc8f7e909 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroup.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroup.java @@ -40,7 +40,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.cassandra.bridge.CassandraVersionFeatures; +import org.apache.cassandra.bridge.CassandraVersion; +import org.apache.cassandra.bridge.SSTableVersionAnalyzer; import org.apache.cassandra.spark.bulkwriter.BulkSparkConf; import org.apache.cassandra.spark.bulkwriter.CassandraClusterInfo; import org.apache.cassandra.spark.bulkwriter.CassandraContext; @@ -84,7 +85,7 @@ public class CassandraClusterInfoGroup implements ClusterInfo, MultiClusterSuppo private volatile TokenRangeMapping consolidatedTokenRangeMapping; // Pre-computed values from BroadcastableClusterInfoGroup (only set when reconstructed on executors) private Partitioner cachedPartitioner; - private String cachedLowestCassandraVersion; + private String cachedBridgeVersion; /** * Creates {@link CassandraClusterInfoGroup} with the list of {@link ClusterInfo} from {@link BulkSparkConf} and validation @@ -100,7 +101,7 @@ public static CassandraClusterInfoGroup fromBulkSparkConf(BulkSparkConf conf) /** * Reconstruct from BroadcastableClusterInfoGroup on executor. * Creates CassandraClusterInfo instances for each cluster that will fetch data from Sidecar. - * Leverages pre-computed values (partitioner, lowestCassandraVersion) from the broadcastable + * Leverages pre-computed values (partitioner, bridgeVersion) from the broadcastable * to avoid re-validation and re-computation on executors. * * @param broadcastable the broadcastable cluster info group from broadcast @@ -175,7 +176,7 @@ private CassandraClusterInfoGroup(BroadcastableClusterInfoGroup broadcastable) // Extract pre-computed values from driver to avoid re-validation on executors this.cachedPartitioner = broadcastable.getPartitioner(); - this.cachedLowestCassandraVersion = broadcastable.getLowestCassandraVersion(); + this.cachedBridgeVersion = broadcastable.getBridgeVersion(); this.clusterId = broadcastable.clusterId(); clusterInfoById(); } @@ -212,35 +213,41 @@ public TokenRangeMapping getTokenRangeMapping(boolean cached) } /** - * @return the lowest cassandra version among all clusters + * Determines the Cassandra bridge version for a coordinated write across all clusters. + * + *

    Each cluster determines its own bridge version (see {@link CassandraClusterInfo#getBridgeVersion()}), + * which is the lowest mutually-compatible SSTable version present on that cluster. Across clusters the + * lowest of those is chosen so the produced SSTables remain importable by every cluster (a node can import + * its own and older SSTable versions, but not newer ones). The selection fails if the lowest version's + * SSTables cannot be imported by the highest version present across clusters. + * + *

    Computed lazily and cached; on executors the value is seeded from the broadcastable. + * + * @return the determined Cassandra bridge version */ @Override - public String getLowestCassandraVersion() + public String getBridgeVersion() { // Return cached value if available (executor-side reconstruction) - if (cachedLowestCassandraVersion != null) + if (cachedBridgeVersion != null) { - return cachedLowestCassandraVersion; + return cachedBridgeVersion; } if (clusterInfos.size() == 1) { - return clusterInfos.get(0).getLowestCassandraVersion(); + return clusterInfos.get(0).getBridgeVersion(); } - Map aggregated = applyOnEach(ClusterInfo::getLowestCassandraVersion); - List versions = aggregated.values() - .stream() - .map(CassandraVersionFeatures::cassandraVersionFeaturesFromCassandraVersion) - .sorted() - .collect(Collectors.toList()); - CassandraVersionFeatures first = versions.get(0); - CassandraVersionFeatures last = versions.get(versions.size() - 1); - Preconditions.checkState(first.getMajorVersion() == last.getMajorVersion(), - "Cluster versions are not compatible. lowest=%s and highest=%s", - first.getRawVersionString(), last.getRawVersionString()); - - return first.getRawVersionString(); + // Write at the lowest so every cluster can import the produced SSTables, but only after verifying the + // lowest version's SSTables are importable by the highest version present. + List bridgeVersions = clusterInfos.stream() + .map(ClusterInfo::getBridgeVersion) + .map(version -> CassandraVersion.fromVersion(version) + .orElseThrow(() -> new UnsupportedOperationException("Unsupported Cassandra version: " + version))) + .collect(Collectors.toList()); + CassandraVersion lowest = SSTableVersionAnalyzer.lowestCompatibleWriteVersionForCoordinatedWrites(bridgeVersions); + return lowest.versionName() + ".0"; } @Override diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraCoordinatedBulkWriterContext.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraCoordinatedBulkWriterContext.java index 6cd199bf2..5b9f2ff15 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraCoordinatedBulkWriterContext.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraCoordinatedBulkWriterContext.java @@ -134,6 +134,6 @@ public BulkWriterConfig toBulkWriterConfigForBroadcasting(JavaSparkContext spark broadcastableJobInfo, broadcastableClusterInfo, broadcastableSchemaInfo, - lowestCassandraVersion()); + bridgeVersion()); } } diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/CassandraDataLayer.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/CassandraDataLayer.java index 99cbc6827..e2931aa63 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/CassandraDataLayer.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/CassandraDataLayer.java @@ -68,6 +68,7 @@ import org.apache.cassandra.bridge.CassandraBridge; import org.apache.cassandra.bridge.CassandraBridgeFactory; import org.apache.cassandra.bridge.CassandraVersion; +import org.apache.cassandra.bridge.SSTableVersionAnalyzer; import org.apache.cassandra.clients.ExecutorHolder; import org.apache.cassandra.clients.Sidecar; import org.apache.cassandra.secrets.SslConfig; @@ -77,6 +78,7 @@ import o.a.c.sidecar.client.shaded.client.SidecarInstanceImpl; import o.a.c.sidecar.client.shaded.client.SimpleSidecarInstancesProvider; import o.a.c.sidecar.client.shaded.client.exception.RetriesExhaustedException; +import org.apache.cassandra.spark.bulkwriter.BulkSparkConf; import org.apache.cassandra.spark.common.SidecarInstanceFactory; import org.apache.cassandra.spark.common.SizingFactory; import org.apache.cassandra.spark.config.SchemaFeature; @@ -98,6 +100,7 @@ import org.apache.cassandra.spark.validation.SidecarValidation; import org.apache.cassandra.spark.validation.StartupValidatable; import org.apache.cassandra.spark.validation.StartupValidator; +import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.types.DataType; import org.apache.spark.util.ShutdownHookManager; @@ -140,6 +143,7 @@ public class CassandraDataLayer extends PartitionedDataLayer implements StartupV protected Map rfMap; @Nullable protected String lastModifiedTimestampField; + protected Set sstableVersionsOnCluster; // volatile in order to publish the reference for visibility protected volatile CqlTable cqlTable; protected transient TimeProvider timeProvider; @@ -185,7 +189,7 @@ protected CassandraDataLayer(@Nullable String keyspace, @Nullable SslConfig sslConfig, @NotNull CqlTable cqlTable, @NotNull TokenPartitioner tokenPartitioner, - @NotNull CassandraVersion version, + @NotNull CassandraVersion bridgeVersion, @NotNull ConsistencyLevel consistencyLevel, @NotNull String sidecarInstances, @NotNull int sidecarPort, @@ -198,11 +202,12 @@ protected CassandraDataLayer(@Nullable String keyspace, List requestedFeatures, @NotNull Map rfMap, TimeProvider timeProvider, - SSTableTimeRangeFilter sstableTimeRangeFilter) + SSTableTimeRangeFilter sstableTimeRangeFilter, + Set sstableVersionsOnCluster) { super(consistencyLevel, datacenter); this.snapshotName = snapshotName; - this.bridge = CassandraBridgeFactory.get(version); + this.bridge = CassandraBridgeFactory.get(bridgeVersion); this.keyspace = keyspace; this.table = table; this.quoteIdentifiers = quoteIdentifiers; @@ -225,6 +230,7 @@ protected CassandraDataLayer(@Nullable String keyspace, this.rfMap = rfMap; this.timeProvider = timeProvider; this.sstableTimeRangeFilter = sstableTimeRangeFilter; + this.sstableVersionsOnCluster = sstableVersionsOnCluster; this.maybeQuoteKeyspaceAndTable(); this.initSidecarClient(); this.initInstanceMap(); @@ -272,7 +278,11 @@ private int initBulkReader(@NotNull ClientConfig options) throws ExecutionExcept NodeSettings nodeSettings = sidecar.nodeSettings().get(); String cassandraVersion = getEffectiveCassandraVersionForRead(clusterConfig, nodeSettings); Partitioner partitioner = Partitioner.from(nodeSettings.partitioner()); - bridge = CassandraBridgeFactory.get(cassandraVersion); + + // Initialize SSTable versions and bridge version + CassandraVersion bridgeVersion = initializeSSTableVersionsAndBridgeVersion(cassandraVersion); + bridge = CassandraBridgeFactory.get(bridgeVersion); + // optionally quote identifiers if the option has been set, we need an instance for the bridge maybeQuoteKeyspaceAndTable(); @@ -324,6 +334,79 @@ private int initBulkReader(@NotNull ClientConfig options) throws ExecutionExcept return effectiveNumberOfCores; } + /** + * Checks if SSTable version-based bridge selection is disabled, reading the flag from the active + * (driver-side) {@link SparkSession}. Protected to allow test overrides without a SparkSession. + * + * @return true if disabled, false if enabled + */ + @VisibleForTesting + protected boolean isSSTableVersionBasedBridgeDisabled() + { + // Read from the active SparkSession on the driver. This intentionally does not create a SparkContext: + // SparkSession.active() fails fast if called without an active/default session (e.g. on an executor), + // rather than silently building a master-less context. + return SparkSession.active() + .sparkContext() + .getConf() + .getBoolean(BulkSparkConf.DISABLE_SSTABLE_VERSION_BASED_BRIDGE, false); + } + + /** + * Initializes SSTable versions from cluster gossip and determines bridge version. + * + * @param cassandraVersion the Cassandra version + * @return the determined bridge version + */ + @VisibleForTesting + protected CassandraVersion initializeSSTableVersionsAndBridgeVersion(String cassandraVersion) + { + // Check if user has explicitly disabled SSTable version-based selection via Spark configuration + boolean isSSTableVersionBasedBridgeDisabled = isSSTableVersionBasedBridgeDisabled(); + + CassandraVersion bridgeVersion; + if (isSSTableVersionBasedBridgeDisabled) + { + // Disabled: skip cluster SSTable-version retrieval and fall back to the configured cassandra.version. + // Use an empty set (never null) so downstream code - including executor-side validation and + // serialization - needs no null handling; on executors an empty set signals the feature was disabled + // on the driver. HashSet specifically, because Kryo reads this field back via kryo.readObject(in, HashSet.class). + this.sstableVersionsOnCluster = new HashSet<>(); + bridgeVersion = CassandraVersion.fromVersion(cassandraVersion) + .orElseThrow(() -> new UnsupportedOperationException( + "Unsupported Cassandra version: " + cassandraVersion)); + } + else + { + // Wrap in a HashSet: retrieveSSTableVersionsFromCluster() may return Collections.emptySet() + // or an unspecified Set type from Collectors.toSet(), but Kryo reads this field back via + // kryo.readObject(in, HashSet.class), so the concrete type must be HashSet. + this.sstableVersionsOnCluster = new HashSet<>(retrieveSSTableVersionsFromCluster()); + // Pick the highest (mutually-compatible) version present on the cluster. determineBridgeVersionForRead + // fails fast with an actionable hint when no SSTable versions were retrieved while the feature is enabled. + bridgeVersion = SSTableVersionAnalyzer.determineBridgeVersionForRead(sstableVersionsOnCluster); + } + + return bridgeVersion; + } + + /** + * Retrieves SSTable versions from cluster via Sidecar gossip. + * Protected to allow test overrides. + * + * @return set of SSTable versions from cluster + */ + @VisibleForTesting + protected Set retrieveSSTableVersionsFromCluster() + { + return Sidecar.getSSTableVersionsFromCluster( + sidecar, + clusterConfig, + sidecarClientConfig.maxMillisToSleep(), + sidecarClientConfig.maxRetries() + ); + } + protected void shutdownHook(ClientConfig options) { ClientConfig.ClearSnapshotStrategy clearSnapshotStrategy = options.clearSnapshotStrategy(); @@ -625,7 +708,7 @@ private List collectSSTableList(SidecarInstance sidecarInstance, } // Map to SSTable - return result.values().stream() + List sstables = result.values().stream() .map(components -> new SidecarProvisionedSSTable(sidecar, sidecarClientConfig, sidecarInstance, @@ -636,6 +719,11 @@ private List collectSSTableList(SidecarInstance sidecarInstance, partitionId, stats())) .collect(Collectors.toList()); + + // Validate SSTable versions against expected versions from gossip + validateSStableVersions(sstables); + + return sstables; } @Override @@ -773,6 +861,7 @@ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundE this.rfMap = (Map) in.readObject(); this.timeProvider = new ReaderTimeProvider(in.readLong()); this.sstableTimeRangeFilter = (SSTableTimeRangeFilter) in.readObject(); + this.sstableVersionsOnCluster = (Set) in.readObject(); this.maybeQuoteKeyspaceAndTable(); this.initSidecarClient(); this.initInstanceMap(); @@ -819,6 +908,7 @@ private void writeObject(ObjectOutputStream out) throws IOException, ClassNotFou out.writeObject(this.rfMap); out.writeLong(timeProvider.referenceEpochInSeconds()); out.writeObject(this.sstableTimeRangeFilter); + out.writeObject(this.sstableVersionsOnCluster); } private static void writeNullable(ObjectOutputStream out, @Nullable String string) throws IOException @@ -844,6 +934,67 @@ private static String readNullable(ObjectInputStream in) throws IOException return null; } + /** + * Validates that all SSTables being read have versions that were observed in gossip info. + * This catches cases where SSTables have unexpected versions that weren't seen during driver initialization. + * This validation runs on executors. + * + * @param sstables list of SSTables to validate + * @throws UnsupportedOperationException if any SSTable has a version not in expected sstable versions + */ + @VisibleForTesting + void validateSStableVersions(List sstables) + { + Set expectedVersions = this.sstableVersionsOnCluster; + // An empty (or null) expected set means SSTable version-based bridge selection was disabled + // on the driver. In enabled mode the driver fails fast when no versions are observed, so + // executors never see an empty set in that mode; hence empty here == disabled -> skip. + // This avoids reading Spark configuration on executors (no SparkContext is available there). + if (expectedVersions == null || expectedVersions.isEmpty()) + { + LOGGER.debug("Skipping SSTable version validation on executor - no expected versions; " + + "SSTable version based bridge selection is disabled (set {}=true)", + BulkSparkConf.DISABLE_SSTABLE_VERSION_BASED_BRIDGE); + return; + } + + // Accept any SSTable version the already-selected read bridge can read, not just the exact set observed + // in the driver's gossip snapshot. The bridge was determined on the driver (the highest version present) + // and reconstructed here; a flush/compaction producing a still-readable version between the snapshot and + // the read should not spuriously fail. Genuinely unreadable versions (e.g. a newer major than the bridge) + // are still rejected. + Set readableVersions = bridge().getVersion().getSupportedSStableVersionsForRead(); + + for (SSTable ssTable : sstables) + { + String ssTableFileName = ssTable.getDataFileName(); + // Extract full version string (e.g., "big-nb" from the filename) + String ssTableVersion = ssTable.getFormat() + "-" + ssTable.getVersion(); + + if (!readableVersions.contains(ssTableVersion)) + { + String errorMessage = String.format( + "SSTable '%s' has version '%s' which is not readable by the bridge selected from cluster gossip info. " + + "Versions observed in gossip: %s. Versions readable by the selected bridge: %s. " + + "To retry using cassandra.version based bridge selection, " + + "set %s=true", + ssTableFileName, + ssTableVersion, + expectedVersions, + readableVersions, + BulkSparkConf.DISABLE_SSTABLE_VERSION_BASED_BRIDGE); + LOGGER.error(errorMessage); + throw new UnsupportedOperationException(errorMessage); + } + } + + if (!sstables.isEmpty()) + { + LOGGER.debug("Validated {} SSTable(s) against versions readable by the selected bridge: {}", + sstables.size(), readableVersions); + } + } + // Kryo Serialization public static class Serializer extends com.esotericsoftware.kryo.Serializer @@ -895,6 +1046,7 @@ public void write(Kryo kryo, Output out, CassandraDataLayer dataLayer) kryo.writeObject(out, dataLayer.rfMap); out.writeLong(dataLayer.timeProvider.referenceEpochInSeconds()); kryo.writeObject(out, dataLayer.sstableTimeRangeFilter); + kryo.writeObject(out, dataLayer.sstableVersionsOnCluster); } @SuppressWarnings("unchecked") @@ -937,7 +1089,8 @@ public CassandraDataLayer read(Kryo kryo, Input in, Class ty kryo.readObject(in, SchemaFeaturesListWrapper.class).toList(), kryo.readObject(in, HashMap.class), new ReaderTimeProvider(in.readLong()), - kryo.readObject(in, SSTableTimeRangeFilter.class)); + kryo.readObject(in, SSTableTimeRangeFilter.class), + kryo.readObject(in, HashSet.class)); } // Wrapper only used internally for Kryo serialization/deserialization diff --git a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/LocalDataLayer.java b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/LocalDataLayer.java index 0a30cde03..d48001c94 100644 --- a/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/LocalDataLayer.java +++ b/cassandra-analytics-core/src/main/java/org/apache/cassandra/spark/data/LocalDataLayer.java @@ -187,12 +187,12 @@ public static LocalDataLayer from(Map options) getOrThrow(options, lowerCaseKey("dirs")).split(",")); } - public LocalDataLayer(@NotNull CassandraVersion version, + public LocalDataLayer(@NotNull CassandraVersion bridgeVersion, @NotNull String keyspace, @NotNull String createStatement, String... paths) { - this(version, + this(bridgeVersion, Partitioner.Murmur3Partitioner, keyspace, createStatement, @@ -204,13 +204,13 @@ public LocalDataLayer(@NotNull CassandraVersion version, paths); } - public LocalDataLayer(@NotNull CassandraVersion version, + public LocalDataLayer(@NotNull CassandraVersion bridgeVersion, @NotNull String keyspace, @NotNull String createStatement, @NotNull Set udtStatements, String... paths) { - this(version, + this(bridgeVersion, Partitioner.Murmur3Partitioner, keyspace, createStatement, @@ -223,7 +223,7 @@ public LocalDataLayer(@NotNull CassandraVersion version, } // CHECKSTYLE IGNORE: Constructor with many parameters - public LocalDataLayer(@NotNull CassandraVersion version, + public LocalDataLayer(@NotNull CassandraVersion bridgeVersion, @NotNull Partitioner partitioner, @NotNull String keyspace, @NotNull String createStatement, @@ -234,7 +234,7 @@ public LocalDataLayer(@NotNull CassandraVersion version, @NotNull SSTableTimeRangeFilter sstableTimeRangeFilter, String... paths) { - this.bridge = CassandraBridgeFactory.get(version); + this.bridge = CassandraBridgeFactory.get(bridgeVersion); this.partitioner = partitioner; this.cqlTable = bridge().buildSchema(createStatement, keyspace, @@ -259,7 +259,7 @@ public LocalDataLayer(@NotNull CassandraVersion version, } // For serialization - private LocalDataLayer(@NotNull CassandraVersion version, + private LocalDataLayer(@NotNull CassandraVersion bridgeVersion, @NotNull Partitioner partitioner, @NotNull CqlTable cqlTable, @NotNull String jobId, @@ -269,7 +269,7 @@ private LocalDataLayer(@NotNull CassandraVersion version, @NotNull SSTableTimeRangeFilter sstableTimeRangeFilter, String... paths) { - this.bridge = CassandraBridgeFactory.get(version); + this.bridge = CassandraBridgeFactory.get(bridgeVersion); this.partitioner = partitioner; this.cqlTable = cqlTable; this.jobId = jobId; diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/bridge/SSTableVersionAnalyzerTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/bridge/SSTableVersionAnalyzerTest.java new file mode 100644 index 000000000..b34f22fab --- /dev/null +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/bridge/SSTableVersionAnalyzerTest.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.bridge; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Stream; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for SSTableVersionAnalyzer + */ +public class SSTableVersionAnalyzerTest +{ + // --- determineBridgeVersionForWrite: picks the LOWEST mutually-compatible version --- + + static Stream writeLowestVersionCases() + { + return Stream.of( + Arguments.of(Collections.singleton("big-oa"), "big", CassandraVersion.FIVEZERO), + Arguments.of(new HashSet<>(Arrays.asList("big-na", "big-nb")), "big", CassandraVersion.FOURZERO), + // mixed 4.0 + 5.0 sstables: lowest that every node can import is 4.0 + Arguments.of(new HashSet<>(Arrays.asList("big-na", "big-oa")), "big", CassandraVersion.FOURZERO), + Arguments.of(Collections.singleton("bti-da"), "bti", CassandraVersion.FIVEZERO) + ); + } + + @ParameterizedTest + @MethodSource("writeLowestVersionCases") + void testDetermineBridgeVersionForWritePicksLowest(Set versions, String format, CassandraVersion expected) + { + assertThat(SSTableVersionAnalyzer.determineBridgeVersionForWrite(versions, format)).isEqualTo(expected); + } + + @Test + void testDetermineBridgeVersionForWriteRejectsUnsupportedFormat() + { + // lowest version is FOURZERO, which does not support bti + assertThatThrownBy(() -> SSTableVersionAnalyzer.determineBridgeVersionForWrite( + new HashSet<>(Arrays.asList("big-na", "big-oa")), "bti")) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("does not support requested SSTable format 'bti'"); + } + + @Test + void testDetermineBridgeVersionForWriteRejectsIncompatibleVersions() + { + // big-mf (3.0) cannot be read by the highest version present (5.0) + assertThatThrownBy(() -> SSTableVersionAnalyzer.determineBridgeVersionForWrite( + new HashSet<>(Arrays.asList("big-mf", "big-oa")), "big")) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("not mutually compatible"); + } + + static Stream nullOrEmptyCases() + { + return Stream.of(Arguments.of(Collections.emptySet()), Arguments.of((Set) null)); + } + + @ParameterizedTest + @MethodSource("nullOrEmptyCases") + void testDetermineBridgeVersionForWriteNullOrEmptyThrows(Set versions) + { + assertThatThrownBy(() -> SSTableVersionAnalyzer.determineBridgeVersionForWrite(versions, "big")) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Unable to retrieve SSTable versions from cluster") + .hasMessageContaining("disable_sstable_version_based"); + } + + @Test + void testDetermineBridgeVersionForWriteUnknownVersionThrows() + { + assertThatThrownBy(() -> SSTableVersionAnalyzer.determineBridgeVersionForWrite(Collections.singleton("unknown-xx"), "big")) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Unknown SSTable version"); + } + + // --- determineBridgeVersionForRead: picks the HIGHEST mutually-compatible version --- + + static Stream readHighestVersionCases() + { + return Stream.of( + Arguments.of(Collections.singleton("big-oa"), CassandraVersion.FIVEZERO), + Arguments.of(new HashSet<>(Arrays.asList("big-na", "big-nb")), CassandraVersion.FOURZERO), + Arguments.of(new HashSet<>(Arrays.asList("big-na", "big-oa")), CassandraVersion.FIVEZERO) + ); + } + + @ParameterizedTest + @MethodSource("readHighestVersionCases") + void testDetermineBridgeVersionForReadPicksHighest(Set versions, CassandraVersion expected) + { + assertThat(SSTableVersionAnalyzer.determineBridgeVersionForRead(versions)).isEqualTo(expected); + } + + @Test + void testDetermineBridgeVersionForReadRejectsIncompatibleVersions() + { + assertThatThrownBy(() -> SSTableVersionAnalyzer.determineBridgeVersionForRead( + new HashSet<>(Arrays.asList("big-mf", "big-oa")))) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("not mutually compatible"); + } + + @ParameterizedTest + @MethodSource("nullOrEmptyCases") + void testDetermineBridgeVersionForReadNullOrEmptyThrows(Set versions) + { + assertThatThrownBy(() -> SSTableVersionAnalyzer.determineBridgeVersionForRead(versions)) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Unable to retrieve SSTable versions from cluster") + .hasMessageContaining("disable_sstable_version_based"); + } + + // --- lowestCompatibleWriteVersionForCoordinatedWrites: coordinated multi-cluster write target --- + + @Test + void testLowestCompatibleWriteVersionPicksLowestWhenCompatible() + { + // 4.0 + 5.0: SSTables written at 4.0 are importable by 5.0, so the lowest (4.0) is chosen + assertThat(SSTableVersionAnalyzer.lowestCompatibleWriteVersionForCoordinatedWrites( + Arrays.asList(CassandraVersion.FOURZERO, CassandraVersion.FIVEZERO))) + .isEqualTo(CassandraVersion.FOURZERO); + } + + @Test + void testLowestCompatibleWriteVersionSameVersion() + { + assertThat(SSTableVersionAnalyzer.lowestCompatibleWriteVersionForCoordinatedWrites( + Arrays.asList(CassandraVersion.FIVEZERO, CassandraVersion.FIVEZERO))) + .isEqualTo(CassandraVersion.FIVEZERO); + } + + @Test + void testLowestCompatibleWriteVersionRejectsIncompatibleMajors() + { + // 3.0 + 5.0: SSTables written at 3.0 cannot be imported by 5.0 -> reject + assertThatThrownBy(() -> SSTableVersionAnalyzer.lowestCompatibleWriteVersionForCoordinatedWrites( + Arrays.asList(CassandraVersion.THREEZERO, CassandraVersion.FIVEZERO))) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("not mutually compatible"); + } + + @Test + void testLowestCompatibleWriteVersionNullOrEmptyThrows() + { + assertThatThrownBy(() -> SSTableVersionAnalyzer.lowestCompatibleWriteVersionForCoordinatedWrites(Collections.emptyList())) + .isInstanceOf(IllegalStateException.class); + assertThatThrownBy(() -> SSTableVersionAnalyzer.lowestCompatibleWriteVersionForCoordinatedWrites(null)) + .isInstanceOf(IllegalStateException.class); + } +} diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/KryoRegisterTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/KryoRegisterTest.java new file mode 100644 index 000000000..396e28703 --- /dev/null +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/KryoRegisterTest.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import org.junit.jupiter.api.Test; + +import org.apache.cassandra.bridge.CassandraVersion; +import org.apache.spark.SparkConf; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatNoException; + +/** + * Unit tests for KryoRegister + */ +public class KryoRegisterTest +{ + @Test + void testKryoRegistratorClassesAreCorrect() + { + assertThat(KryoRegister.KRYO_REGISTRATORS.get(CassandraVersion.FOURZERO)) + .isEqualTo(KryoRegister.V40.class); + + assertThat(KryoRegister.KRYO_REGISTRATORS.get(CassandraVersion.FOURONE)) + .isEqualTo(KryoRegister.V41.class); + + assertThat(KryoRegister.KRYO_REGISTRATORS.get(CassandraVersion.FIVEZERO)) + .isEqualTo(KryoRegister.V50.class); + } + + @Test + void testSetupRegistersAllImplementedVersions() + { + SparkConf conf = new SparkConf(); + KryoRegister.setup(conf); + + List expected = Arrays.stream(CassandraVersion.implementedVersions()) + .map(KryoRegister.KRYO_REGISTRATORS::get) + .map(Class::getName) + .collect(Collectors.toList()); + + // setup() must register a registrator for every implemented (bundled) bridge version, + // independent of spark.cassandra_analytics.cassandra.version, so serialization works for + // whichever bridge the SSTable-version analyzer selects at runtime. + assertThat(expected).isNotEmpty(); + List registrators = Arrays.asList(conf.get("spark.kryo.registrator").split(",")); + assertThat(registrators).containsAll(expected); + } + + @Test + void testSetupDoesNotDependOnCassandraVersionConfig() + { + // Even with a cassandra.version that differs from the bridge that may be selected, + // setup() registers all implemented versions and never throws based on that config. + SparkConf conf = new SparkConf() + .set("spark.cassandra_analytics.cassandra.version", "5.0.0"); + assertThatNoException().isThrownBy(() -> KryoRegister.setup(conf)); + assertThat(conf.get("spark.serializer")).isEqualTo("org.apache.spark.serializer.KryoSerializer"); + } + + @Test + void testSetupPreservesExistingRegistrators() + { + SparkConf conf = new SparkConf() + .set("spark.kryo.registrator", "com.example.CustomRegistrator"); + KryoRegister.setup(conf); + + List registrators = Arrays.asList(conf.get("spark.kryo.registrator").split(",")); + assertThat(registrators).contains("com.example.CustomRegistrator"); + assertThat(registrators.get(0)) + .describedAs("pre-existing registrators should be kept first (deterministic order)") + .isEqualTo("com.example.CustomRegistrator"); + } +} diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/AbstractBulkWriterContextTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/AbstractBulkWriterContextTest.java new file mode 100644 index 000000000..57810c7ac --- /dev/null +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/AbstractBulkWriterContextTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.bulkwriter; + +import java.util.UUID; + +import org.junit.jupiter.api.Test; + +import org.apache.cassandra.spark.bulkwriter.cloudstorage.coordinated.MultiClusterContainer; +import org.apache.spark.sql.types.StructType; +import org.jetbrains.annotations.NotNull; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class AbstractBulkWriterContextTest +{ + @Test + void testKryoRegistrationWarningMessage() + { + // Test that the KRYO_REGISTRATION_WARNING constant exists and has expected content + assertThat(AbstractBulkWriterContext.KRYO_REGISTRATION_WARNING) + .isNotNull() + .contains("Spark Bulk Writer Kryo Registrator") + .contains("SbwKryoRegistrator") + .contains("was not registered with Spark"); + } + + @Test + void testContextUsesDeterminedBridgeVersion() + { + // The context's bridge version is whatever getBridgeVersion() resolves to. The determination logic + // itself (override / SSTable-version-based / legacy) lives on CassandraClusterInfo and is covered by + // CassandraClusterInfoBridgeVersionTest. + BulkSparkConf conf = mock(BulkSparkConf.class); + StructType schema = mock(StructType.class); + TestBulkWriterContext context = TestBulkWriterContext.create(conf, schema, 1, "5.0.0"); + + assertThat(context.bridgeVersion()).isEqualTo("5.0.0"); + } + + /** + * Concrete test implementation of AbstractBulkWriterContext for testing. The bridge version is supplied + * via the (mock) ClusterInfo returned by {@link #buildClusterInfo()} (the real determination is exercised + * at the ClusterInfo level). + */ + static class TestBulkWriterContext extends AbstractBulkWriterContext + { + private static String staticBridgeVersion; + + private TestBulkWriterContext(@NotNull BulkSparkConf conf, + @NotNull StructType structType, + int sparkDefaultParallelism) + { + super(conf, structType, sparkDefaultParallelism); + } + + static TestBulkWriterContext create(@NotNull BulkSparkConf conf, + @NotNull StructType structType, + int sparkDefaultParallelism, + String bridgeVersion) + { + staticBridgeVersion = bridgeVersion; + return new TestBulkWriterContext(conf, structType, sparkDefaultParallelism); + } + + @Override + protected ClusterInfo buildClusterInfo() + { + ClusterInfo clusterInfo = mock(ClusterInfo.class); + when(clusterInfo.getBridgeVersion()).thenReturn(staticBridgeVersion); + return clusterInfo; + } + + @Override + protected JobInfo buildJobInfo() + { + // Return a mock JobInfo to avoid complex dependencies + return mock(JobInfo.class); + } + + @Override + protected SchemaInfo buildSchemaInfo(StructType structType) + { + // Return a mock SchemaInfo to avoid complex dependencies + return mock(SchemaInfo.class); + } + + @Override + protected TransportContext buildTransportContext(boolean isOnDriver) + { + // Return a mock TransportContext to avoid complex dependencies + return mock(TransportContext.class); + } + + @Override + protected void validateKeyspaceReplication() + { + // No-op for testing + } + + @Override + protected MultiClusterContainer generateRestoreJobIds() + { + return null; + } + + @Override + public BulkWriterConfig toBulkWriterConfigForBroadcasting(org.apache.spark.api.java.JavaSparkContext sparkContext) + { + throw new UnsupportedOperationException("Not needed for test"); + } + } +} diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConfTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConfTest.java index 944f0a186..5fbca3504 100644 --- a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConfTest.java +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkSparkConfTest.java @@ -394,6 +394,38 @@ void testStorageCredentialTypeInvalidValue() .hasMessageContaining("Invalid value 'INVALID' for option 'STORAGE_CREDENTIAL_TYPE'"); } + @Test + void testDefaultDisableSSTableVersionBasedBridge() + { + Map options = copyDefaultOptions(); + BulkSparkConf conf = new BulkSparkConf(sparkConf, options); + assertThat(conf.isSSTableVersionBasedBridgeDisabled()) + .describedAs("By default, SSTable version based bridge should be enabled (false)") + .isFalse(); + } + + @Test + void testSSTableVersionBasedBridgeDisabledViaSparkConf() + { + SparkConf conf = new SparkConf() + .set(BulkSparkConf.DISABLE_SSTABLE_VERSION_BASED_BRIDGE, "true"); + BulkSparkConf bulkConf = new BulkSparkConf(conf, defaultOptions); + assertThat(bulkConf.isSSTableVersionBasedBridgeDisabled()) + .describedAs("SSTable version based bridge should be disabled when configured") + .isTrue(); + } + + @Test + void testSSTableVersionBasedBridgeEnabledExplicitly() + { + SparkConf conf = new SparkConf() + .set(BulkSparkConf.DISABLE_SSTABLE_VERSION_BASED_BRIDGE, "false"); + BulkSparkConf bulkConf = new BulkSparkConf(conf, defaultOptions); + assertThat(bulkConf.isSSTableVersionBasedBridgeDisabled()) + .describedAs("SSTable version based bridge should be enabled") + .isFalse(); + } + private Map copyDefaultOptions() { TreeMap map = new TreeMap<>(String.CASE_INSENSITIVE_ORDER); diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfigExtensibilityTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfigExtensibilityTest.java index 380c44600..f0da1fb39 100644 --- a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfigExtensibilityTest.java +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/BulkWriterConfigExtensibilityTest.java @@ -93,7 +93,8 @@ void testReconstructJobInfoOnExecutorCanBeOverridden() BulkSparkConf mockConf = mock(BulkSparkConf.class); BroadcastableJobInfo mockJobInfo = mock(BroadcastableJobInfo.class); BroadcastableSchemaInfo mockSchemaInfo = mock(BroadcastableSchemaInfo.class); - BulkWriterConfig config = new BulkWriterConfig(mockConf, 4, mockJobInfo, mock(IBroadcastableClusterInfo.class), mockSchemaInfo, "4.0.0"); + BulkWriterConfig config = new BulkWriterConfig(mockConf, 4, mockJobInfo, mock(IBroadcastableClusterInfo.class), + mockSchemaInfo, "4.0.0"); // Subclass that overrides reconstructJobInfoOnExecutor to return custom JobInfo TestBulkWriterContext context = new TestBulkWriterContext(config) diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/CassandraClusterInfoBridgeVersionTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/CassandraClusterInfoBridgeVersionTest.java new file mode 100644 index 000000000..608fc9f1f --- /dev/null +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/CassandraClusterInfoBridgeVersionTest.java @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.bulkwriter; + +import java.util.Collections; +import java.util.Set; + +import org.junit.jupiter.api.Test; + +import o.a.c.sidecar.client.shaded.client.SidecarClient; +import o.a.c.sidecar.client.shaded.client.SidecarInstance; +import org.apache.cassandra.bridge.CassandraVersion; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link CassandraClusterInfo#getBridgeVersion()} — the bridge-version priority chain: + * version override (operator escape hatch) > SSTable-version-based selection > legacy cassandra.version. + * + *

    No mocking framework is used for the cluster connectivity (an in-memory {@link CassandraContext} with no + * Sidecar client); only the {@link BulkSparkConf} feature flag is stubbed. The cluster-derived inputs + * (feature override, SSTable versions, lowest version) are supplied via a test subclass that also records + * whether each was consulted. + */ +public class CassandraClusterInfoBridgeVersionTest +{ + @Test + void testOverrideWinsWhenFeatureEnabled() + { + TestClusterInfo info = new TestClusterInfo(conf(false), "4.0.0", Collections.singleton("big-oa"), "5.0.0"); + + assertThat(info.getBridgeVersion()).isEqualTo("4.0.0"); + // override short-circuits: neither SSTable versions nor the lowest version are consulted + assertThat(info.sstableVersionsCalls).isEqualTo(0); + assertThat(info.lowestVersionCalls).isEqualTo(0); + } + + @Test + void testOverrideWinsWhenFeatureDisabled() + { + TestClusterInfo info = new TestClusterInfo(conf(true), "4.0.0", Collections.singleton("big-oa"), "5.0.0"); + + assertThat(info.getBridgeVersion()).isEqualTo("4.0.0"); + assertThat(info.sstableVersionsCalls).isEqualTo(0); + assertThat(info.lowestVersionCalls).isEqualTo(0); + } + + @Test + void testSSTableVersionBasedWhenEnabledAndNoOverride() + { + // SSTable-version-based selection picks the lowest version whose SSTables every node can import. + // The viable versions depend on the configured write format: bti is a 5.0-only format, so a bti job + // resolves to 5.0; a big job with mixed 4.0 + 5.0 SSTables writes at the lowest importable (4.0). + boolean bti = "bti".equals(CassandraVersion.configuredSSTableFormat()); + Set sstableVersions = bti + ? Collections.singleton("bti-da") + : new java.util.HashSet<>(java.util.Arrays.asList("big-na", "big-oa")); + CassandraVersion expected = bti ? CassandraVersion.FIVEZERO : CassandraVersion.FOURZERO; + TestClusterInfo info = new TestClusterInfo(conf(false), null, sstableVersions, "5.0.0"); + + assertThat(info.getBridgeVersion()).isEqualTo(expected.versionName() + ".0"); + assertThat(info.sstableVersionsCalls).isEqualTo(1); + // SSTable-based selection does not consult the legacy lowest version + assertThat(info.lowestVersionCalls).isEqualTo(0); + } + + @Test + void testLegacyVersionWhenDisabledAndNoOverride() + { + TestClusterInfo info = new TestClusterInfo(conf(true), null, Collections.singleton("big-oa"), "5.0.0"); + + assertThat(info.getBridgeVersion()).isEqualTo("5.0.0"); + assertThat(info.lowestVersionCalls).isEqualTo(1); + // legacy path does not consult SSTable versions + assertThat(info.sstableVersionsCalls).isEqualTo(0); + } + + private static BulkSparkConf conf(boolean sstableVersionBasedBridgeDisabled) + { + BulkSparkConf conf = mock(BulkSparkConf.class); + when(conf.isSSTableVersionBasedBridgeDisabled()).thenReturn(sstableVersionBasedBridgeDisabled); + return conf; + } + + /** + * {@link CassandraClusterInfo} backed by an in-memory {@link CassandraContext} (no Sidecar), with the + * cluster-derived determination inputs supplied directly and call-counting so tests can assert which + * branch of the priority chain was taken. + */ + private static final class TestClusterInfo extends CassandraClusterInfo + { + private final String versionFromFeature; + private final Set sstableVersions; + private final String lowestVersion; + private int sstableVersionsCalls = 0; + private int lowestVersionCalls = 0; + + TestClusterInfo(BulkSparkConf conf, String versionFromFeature, Set sstableVersions, String lowestVersion) + { + super(conf); + this.versionFromFeature = versionFromFeature; + this.sstableVersions = sstableVersions; + this.lowestVersion = lowestVersion; + } + + @Override + protected CassandraContext buildCassandraContext() + { + return new InMemoryCassandraContext(); + } + + @Override + public String getVersionFromFeature() + { + return versionFromFeature; + } + + @Override + public Set getSSTableVersionsOnCluster() + { + sstableVersionsCalls++; + return sstableVersions; + } + + @Override + public String getVersionFromSidecar() + { + lowestVersionCalls++; + return lowestVersion; + } + } + + /** + * A {@link CassandraContext} with no real Sidecar connectivity: an empty cluster and a null client. + * {@code Sidecar.allNodeSettings} is invoked with an empty instance set during construction, so the + * null client is never dereferenced. + */ + private static final class InMemoryCassandraContext extends CassandraContext + { + InMemoryCassandraContext() + { + super(null, null); + } + + @Override + protected Set createClusterConfig() + { + return Collections.emptySet(); + } + + @Override + protected SidecarClient initializeSidecarClient(BulkSparkConf conf) + { + return null; + } + } +} diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/MockBulkWriterContext.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/MockBulkWriterContext.java index cb5564f0c..932a3e92a 100644 --- a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/MockBulkWriterContext.java +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/MockBulkWriterContext.java @@ -43,6 +43,7 @@ import org.apache.cassandra.bridge.CassandraBridge; import org.apache.cassandra.bridge.CassandraBridgeFactory; +import org.apache.cassandra.bridge.CassandraVersion; import org.apache.cassandra.spark.bulkwriter.cloudstorage.coordinated.CoordinatedWriteConf; import org.apache.cassandra.spark.bulkwriter.token.ConsistencyLevel; import org.apache.cassandra.spark.bulkwriter.token.ReplicaAwareFailureHandler; @@ -378,11 +379,19 @@ public void checkBulkWriterIsEnabledOrThrow() } @Override - public String getLowestCassandraVersion() + public String getBridgeVersion() { return cassandraVersion; } + public Set getSSTableVersionsOnCluster() + { + // Return SSTable versions based on Cassandra version for testing + CassandraVersion version = CassandraVersion.fromVersion(cassandraVersion) + .orElse(CassandraVersion.FIVEZERO); + return new HashSet<>(version.getNativeSStableVersions()); + } + private List buildCompleteBatchIds(List uuids) { return uuids.stream().map(uuid -> uuid + "-" + jobId).collect(Collectors.toList()); diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriterTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriterTest.java index 2bd5b5634..fba73361e 100644 --- a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriterTest.java +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/SortedSSTableWriterTest.java @@ -131,7 +131,7 @@ public void canCreateWriterForVersion(String version) throws IOException break; case 50: // Format is "oa--big" or "da--bti" - if ("big".equals(CassandraVersion.sstableFormat())) + if ("big".equals(CassandraVersion.configuredSSTableFormat())) { assertThat(baseFileName).matches("oa-\\d+-big"); } diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/TableSchemaTestCommon.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/TableSchemaTestCommon.java index 701ca337d..ea3ffb9ba 100644 --- a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/TableSchemaTestCommon.java +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/TableSchemaTestCommon.java @@ -36,6 +36,7 @@ import org.apache.cassandra.bridge.CassandraBridge; import org.apache.cassandra.bridge.CassandraBridgeFactory; +import org.apache.cassandra.bridge.CassandraVersion; import org.apache.cassandra.spark.common.schema.ColumnType; import org.apache.cassandra.spark.data.CqlField; import org.apache.spark.sql.types.DataTypes; @@ -339,6 +340,9 @@ public TableSchema build() dataFrameSchema = dataFrameSchema.add(timestampOption.columnName(), DataTypes.LongType); updatedCqlColumns = addColumnToCqlColumns(updatedCqlColumns, timestampOption.columnName(), SqlToCqlTypeConverter.BIGINT); } + // Validate the configured version is supported; TableSchema itself takes the version string. + CassandraVersion.fromVersion(cassandraVersion) + .orElseThrow(() -> new IllegalArgumentException("Unsupported Cassandra version: " + cassandraVersion)); MockTableInfoProvider tableInfoProvider = new MockTableInfoProvider(bridge, updatedCqlColumns, diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroupTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroupTest.java index 4825c8dea..af10673a8 100644 --- a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroupTest.java +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/bulkwriter/cloudstorage/coordinated/CassandraClusterInfoGroupTest.java @@ -90,12 +90,10 @@ void testDelegationOfSingleCluster() () -> Partitioner.Murmur3Partitioner, RingInstance::new); when(clusterInfo.getTokenRangeMapping(anyBoolean())).thenReturn(expectedTokenRangeMapping); - when(clusterInfo.getLowestCassandraVersion()).thenReturn("lowestCassandraVersion"); when(clusterInfo.clusterWriteAvailability()).thenReturn(Collections.emptyMap()); CassandraClusterInfoGroup group = mockClusterGroup(1, index -> clusterInfo); // Since there is a single clusterInfo in the group. It behaves as a simple delegation to the sole clusterInfo assertThat(group.clusterWriteAvailability()).isSameAs(clusterInfo.clusterWriteAvailability()); - assertThat(group.getLowestCassandraVersion()).isSameAs(clusterInfo.getLowestCassandraVersion()); assertThat(group.getTokenRangeMapping(true)).isSameAs(clusterInfo.getTokenRangeMapping(true)); } @@ -138,30 +136,6 @@ void testAggregateWriteAvailability() .containsValues(WriteAvailability.AVAILABLE, WriteAvailability.UNAVAILABLE_DOWN); } - @Test - void testAggregateLowestCassandraVersion() - { - CassandraClusterInfoGroup goodGroup = mockClusterGroup(2, index -> { - CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); - when(clusterInfo.getLowestCassandraVersion()).thenReturn("4.0." + index); - return clusterInfo; - }); - assertThat(goodGroup.getLowestCassandraVersion()).isEqualTo("4.0.0"); - } - - @Test - void testAggregateLowestCassandraVersionFailDueToDifference() - { - CassandraClusterInfoGroup badGroup = mockClusterGroup(2, index -> { - CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); - when(clusterInfo.getLowestCassandraVersion()).thenReturn((4 + index) + ".0.0"); - return clusterInfo; - }); - assertThatThrownBy(badGroup::getLowestCassandraVersion) - .isExactlyInstanceOf(IllegalStateException.class) - .hasMessage("Cluster versions are not compatible. lowest=4.0.0 and highest=5.0.0"); - } - @Test void testCheckBulkWriterIsEnabledOrThrow() { @@ -277,7 +251,7 @@ void testSerDeser() CassandraClusterInfoGroup originalGroup = mockClusterGroup(2, index -> { CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); when(clusterInfo.getPartitioner()).thenReturn(Partitioner.Murmur3Partitioner); - when(clusterInfo.getLowestCassandraVersion()).thenReturn("4.0.0"); + when(clusterInfo.getBridgeVersion()).thenReturn("5.0.0"); return clusterInfo; }); @@ -300,9 +274,9 @@ void testSerDeser() .describedAs("Partitioner should be preserved after serialization") .isEqualTo(Partitioner.Murmur3Partitioner); - assertThat(deserializedBroadcastable.getLowestCassandraVersion()) - .describedAs("Lowest Cassandra version should be preserved after serialization") - .isEqualTo("4.0.0"); + assertThat(deserializedBroadcastable.getBridgeVersion()) + .describedAs("Bridge version should be preserved after serialization") + .isEqualTo("5.0.0"); assertThat(deserializedBroadcastable.size()) .describedAs("Number of clusters should be preserved after serialization") @@ -318,6 +292,67 @@ void testSerDeser() assertThat(clusterCount[0]).isEqualTo(2); } + @Test + void testGetBridgeVersionSingleClusterDelegates() + { + CassandraClusterInfoGroup group = mockClusterGroup(1, index -> { + CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); + when(clusterInfo.getBridgeVersion()).thenReturn("5.0.0"); + return clusterInfo; + }); + assertThat(group.getBridgeVersion()).isEqualTo("5.0.0"); + } + + @Test + void testGetBridgeVersionMultiClusterSameVersionReturnsIt() + { + // All clusters resolve to the same version -> compatible; the (equal) lowest is returned + CassandraClusterInfoGroup group = mockClusterGroup(2, index -> { + CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); + when(clusterInfo.getBridgeVersion()).thenReturn("4.0.0"); + return clusterInfo; + }); + assertThat(group.getBridgeVersion()).isEqualTo("4.0.0"); + } + + @Test + void testGetBridgeVersionMultiClusterAcrossMajorsReturnsLowest() + { + // 4.0 and 5.0 clusters -> write at the lowest (4.0) so the produced SSTables are importable by both + CassandraClusterInfoGroup group = mockClusterGroup(2, index -> { + CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); + when(clusterInfo.getBridgeVersion()).thenReturn(index == 0 ? "4.0.0" : "5.0.0"); + return clusterInfo; + }); + assertThat(group.getBridgeVersion()).isEqualTo("4.0.0"); + } + + @Test + void testGetBridgeVersionFourZeroAndFourOneReturnsLowest() + { + // 4.0 and 4.1 clusters -> write at the lowest (4.0) + CassandraClusterInfoGroup group = mockClusterGroup(2, index -> { + CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); + when(clusterInfo.getBridgeVersion()).thenReturn(index == 0 ? "4.0.0" : "4.1.0"); + return clusterInfo; + }); + assertThat(group.getBridgeVersion()).isEqualTo("4.0.0"); + } + + @Test + void testGetBridgeVersionIncompatibleMajorsThrows() + { + // 3.0 and 5.0 clusters -> SSTables written at 3.0 cannot be imported by 5.0, so the coordinated write fails + CassandraClusterInfoGroup group = mockClusterGroup(2, index -> { + CassandraClusterInfo clusterInfo = mockClusterInfo("cluster" + index); + when(clusterInfo.getBridgeVersion()).thenReturn(index == 0 ? "3.0.0" : "5.0.0"); + return clusterInfo; + }); + assertThatThrownBy(group::getBridgeVersion) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("not mutually compatible"); + } + private CassandraClusterInfoGroup mockClusterGroup(int size, Function clusterInfoCreator) { diff --git a/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/data/CassandraDataLayerValidationTest.java b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/data/CassandraDataLayerValidationTest.java new file mode 100644 index 000000000..c70a96cac --- /dev/null +++ b/cassandra-analytics-core/src/test/java/org/apache/cassandra/spark/data/CassandraDataLayerValidationTest.java @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.spark.data; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.junit.jupiter.api.Test; + +import org.apache.cassandra.bridge.CassandraVersion; +import org.apache.cassandra.bridge.SSTableVersionAnalyzer; +import org.apache.cassandra.clients.Sidecar; +import org.apache.cassandra.spark.data.partitioner.ConsistencyLevel; +import org.apache.cassandra.spark.data.partitioner.TokenPartitioner; +import org.apache.cassandra.spark.utils.TimeProvider; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatNoException; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Unit tests for CassandraDataLayer validation methods + */ +public class CassandraDataLayerValidationTest +{ + @Test + void testValidateSStableVersionsListWithValidVersions() + { + Set expectedVersions = new HashSet<>(Arrays.asList("big-na", "big-nb")); + CassandraDataLayer dataLayer = createTestDataLayerWithVersions(expectedVersions); + + SSTable ssTable1 = createMockSSTable("big", "na", "test1-big-na-Data.db"); + SSTable ssTable2 = createMockSSTable("big", "nb", "test2-big-nb-Data.db"); + List sstables = Arrays.asList(ssTable1, ssTable2); + + assertThatNoException() + .describedAs("All SSTables have expected versions") + .isThrownBy(() -> dataLayer.validateSStableVersions(sstables)); + } + + @Test + void testValidateSStableVersionsAcceptsReadableVersionNotInGossip() + { + // Only big-na was observed in gossip, but the selected read bridge (4.0) can also read big-nb. A version + // that appears after the gossip snapshot but is still readable by the bridge must not fail validation. + Set expectedVersions = new HashSet<>(List.of("big-na")); + CassandraDataLayer dataLayer = createTestDataLayerWithVersions(expectedVersions); + + SSTable ssTable1 = createMockSSTable("big", "na", "test1-big-na-Data.db"); + SSTable ssTable2 = createMockSSTable("big", "nb", "test2-big-nb-Data.db"); + List sstables = Arrays.asList(ssTable1, ssTable2); + + assertThatNoException() + .describedAs("A version readable by the selected bridge but not in the gossip snapshot should be accepted") + .isThrownBy(() -> dataLayer.validateSStableVersions(sstables)); + } + + @Test + void testValidateSStableVersionsListWithEmptyList() + { + Set expectedVersions = new HashSet<>(List.of("big-na")); + CassandraDataLayer dataLayer = createTestDataLayerWithVersions(expectedVersions); + + assertThatNoException() + .describedAs("Empty SSTable list should not throw exception") + .isThrownBy(() -> dataLayer.validateSStableVersions(Collections.emptyList())); + } + + @Test + void testValidateSStableVersionsListErrorMessageIncludesFileName() + { + Set expectedVersions = new HashSet<>(List.of("big-na")); + CassandraDataLayer dataLayer = createTestDataLayerWithVersions(expectedVersions); + + SSTable ssTable = createMockSSTable("big", "oa", "keyspace-table-big-oa-Data.db"); + List sstables = Collections.singletonList(ssTable); + + assertThatThrownBy(() -> dataLayer.validateSStableVersions(sstables)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("keyspace-table-big-oa-Data.db") + .hasMessageContaining("Versions observed in gossip:") + .hasMessageContaining("set spark.cassandra_analytics.bridge.disable_sstable_version_based=true"); + } + + @Test + void testValidateSStableVersionsListWithMixedBigAndBtiFormats() + { + Set expectedVersions = new HashSet<>(Arrays.asList("big-oa", "bti-da")); + CassandraDataLayer dataLayer = createTestDataLayerWithVersions(expectedVersions); + + SSTable ssTable1 = createMockSSTable("big", "oa", "test1-big-oa-Data.db"); + SSTable ssTable2 = createMockSSTable("bti", "da", "test2-bti-da-Data.db"); + List sstables = Arrays.asList(ssTable1, ssTable2); + + assertThatNoException() + .describedAs("Mixed big and bti format SSTables should be validated successfully") + .isThrownBy(() -> dataLayer.validateSStableVersions(sstables)); + } + + @Test + void testValidateSStableVersionsRejectsUnreadableOlderVersion() + { + // The selected read bridge (5.0, from big-oa) cannot read 3.x SSTables (big-mf), so it must be rejected + // even though both are "big" format. + Set expectedVersions = new HashSet<>(List.of("big-oa")); + CassandraDataLayer dataLayer = createTestDataLayerWithVersions(expectedVersions); + + SSTable ssTable1 = createMockSSTable("big", "oa", "test1-big-oa-Data.db"); + SSTable ssTable2 = createMockSSTable("big", "mf", "keyspace-table-big-mf-Data.db"); + List sstables = Arrays.asList(ssTable1, ssTable2); + + assertThatThrownBy(() -> dataLayer.validateSStableVersions(sstables)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessageContaining("has version 'big-mf' which is not readable by the bridge") + .hasMessageContaining("keyspace-table-big-mf-Data.db") + .hasMessageContaining("Versions observed in gossip:") + .hasMessageContaining("set spark.cassandra_analytics.bridge.disable_sstable_version_based=true"); + } + + @Test + void testValidateSStableVersionsListSkipsValidationWhenFeatureDisabled() + { + // When the feature is disabled the driver leaves sstableVersionsOnCluster empty; on the executor + // an empty expected set is the signal that the feature is disabled, so validation is skipped. + CassandraDataLayer dataLayer = createTestDataLayerWithVersions(Collections.emptySet()); + + // SSTable has a version that would be rejected if validation actually ran + SSTable ssTable = createMockSSTable("big", "oa", "test-big-oa-Data.db"); + List sstables = Collections.singletonList(ssTable); + + assertThatNoException() + .describedAs("Validation should be skipped when feature is disabled (empty expected versions)") + .isThrownBy(() -> dataLayer.validateSStableVersions(sstables)); + } + + // Tests for initializeSSTableVersionsAndBridgeVersion (called from initBulkReader - lines 283-313) + + @Test + void testInitializeSSTableVersionsAndBridgeVersionWithFeatureEnabled() + { + Set mockVersions = new HashSet<>(Arrays.asList("big-na", "big-nb")); + CassandraDataLayer dataLayer = new TestCassandraDataLayerForInitTests(mockVersions, false); + + CassandraVersion result = dataLayer.initializeSSTableVersionsAndBridgeVersion("4.0.0"); + + // Should return FOURZERO bridge version + assertThat((Object) result).isEqualTo(CassandraVersion.FOURZERO); + + // Should set sstableVersionsOnCluster to the retrieved versions + assertThat((Object) dataLayer.sstableVersionsOnCluster) + .isNotNull(); + assertThat(dataLayer.sstableVersionsOnCluster) + .containsExactlyInAnyOrder("big-na", "big-nb"); + } + + @Test + void testInitializeSSTableVersionsAndBridgeVersionWithFeatureDisabled() + { + // Versions won't be retrieved when feature is disabled + CassandraDataLayer dataLayer = new TestCassandraDataLayerForInitTests(null, true); + + CassandraVersion result = dataLayer.initializeSSTableVersionsAndBridgeVersion("4.0.0"); + + // Should return FOURZERO bridge version (legacy cassandra.version-based bridge selection) + assertThat((Object) result).isEqualTo(CassandraVersion.FOURZERO); + + // Should set sstableVersionsOnCluster to an empty set (skipped retrieval, never null) + assertThat(dataLayer.sstableVersionsOnCluster).isEmpty(); + } + + @Test + void testInitializeSSTableVersionsAndBridgeVersionSelectsFiveZeroBridge() + { + Set mockVersions = new HashSet<>(Arrays.asList("big-oa", "bti-da")); + CassandraDataLayer dataLayer = new TestCassandraDataLayerForInitTests(mockVersions, false); + + CassandraVersion result = dataLayer.initializeSSTableVersionsAndBridgeVersion("5.0.0"); + + // Should return FIVEZERO bridge version based on highest SSTable version + assertThat((Object) result).isEqualTo(CassandraVersion.FIVEZERO); + + // Should set sstableVersionsOnCluster to the retrieved versions + assertThat((Object) dataLayer.sstableVersionsOnCluster) + .isNotNull(); + assertThat(dataLayer.sstableVersionsOnCluster) + .containsExactlyInAnyOrder("big-oa", "bti-da"); + } + + @Test + void testInitializeSSTableVersionsAndBridgeVersionWithMixedVersions() + { + // Cluster has both C* 4.0 and C* 5.0 versions (during rolling upgrade) + Set mockVersions = new HashSet<>(Arrays.asList("big-na", "big-oa")); + CassandraDataLayer dataLayer = new TestCassandraDataLayerForInitTests(mockVersions, false); + + CassandraVersion result = dataLayer.initializeSSTableVersionsAndBridgeVersion("5.0.0"); + + // Should return FIVEZERO bridge (highest version) + assertThat((Object) result).isEqualTo(CassandraVersion.FIVEZERO); + + // Should set sstableVersionsOnCluster to all retrieved versions + assertThat((Object) dataLayer.sstableVersionsOnCluster) + .isNotNull(); + assertThat(dataLayer.sstableVersionsOnCluster) + .containsExactlyInAnyOrder("big-na", "big-oa"); + } + + @Test + void testInitializeSSTableVersionsAndBridgeVersionFailsFastWhenEnabledAndNoVersions() + { + // Feature enabled but the cluster returns no SSTable versions -> fail fast with an actionable hint + CassandraDataLayer dataLayer = new TestCassandraDataLayerForInitTests(Collections.emptySet(), false); + + assertThatThrownBy(() -> dataLayer.initializeSSTableVersionsAndBridgeVersion("5.0.0")) + .isInstanceOf(IllegalStateException.class) + .hasMessageContaining("Unable to retrieve SSTable versions from cluster") + .hasMessageContaining("set spark.cassandra_analytics.bridge.disable_sstable_version_based=true"); + } + + private CassandraDataLayer createTestDataLayerWithVersions(Set sstableVersions) + { + return new TestCassandraDataLayer(sstableVersions); + } + + private SSTable createMockSSTable(String format, String version, String fileName) + { + SSTable ssTable = mock(SSTable.class); + when(ssTable.getFormat()).thenReturn(format); + when(ssTable.getVersion()).thenReturn(version); + when(ssTable.getDataFileName()).thenReturn(fileName); + return ssTable; + } + + /** + * Test subclass to allow setting sstableVersionsOnCluster for testing + * This subclass uses the serialization constructor and overrides problematic methods + */ + private static class TestCassandraDataLayer extends CassandraDataLayer + { + TestCassandraDataLayer(Set sstableVersions) + { + // Use the serialization constructor which doesn't call initialize() + super("test_keyspace", // keyspace + "test_table", // table + false, // quoteIdentifiers + "", // snapshotName + null, // datacenter + Sidecar.ClientConfig.create(), // sidecarClientConfig + null, // sslConfig + mock(CqlTable.class), // cqlTable + mock(TokenPartitioner.class), // tokenPartitioner + bridgeVersionFor(sstableVersions), // bridgeVersion (matches what the driver would select) + ConsistencyLevel.LOCAL_QUORUM, // consistencyLevel + "127.0.0.1", // sidecarInstances + 9043, // sidecarPort + Collections.emptyMap(), // availabilityHints + Collections.emptyMap(), // bigNumberConfigMap + false, // enableStats + false, // readIndexOffset + false, // useIncrementalRepair + null, // lastModifiedTimestampField + Collections.emptyList(), // requestedFeatures + Collections.emptyMap(), // rfMap + mock(TimeProvider.class), // timeProvider + null, // sstableTimeRangeFilter + sstableVersions); // sstableVersionsOnCluster + this.sstableVersionsOnCluster = sstableVersions; + } + + /** + * Derives the bridge version the driver would select for these SSTable versions, so that + * {@code bridge().getVersion()} (used by validateSStableVersions) matches the scenario. Falls back to + * 4.0 when no versions are supplied (disabled-feature / init-only cases). + */ + private static CassandraVersion bridgeVersionFor(Set sstableVersions) + { + return (sstableVersions == null || sstableVersions.isEmpty()) + ? CassandraVersion.FOURZERO + : SSTableVersionAnalyzer.determineBridgeVersionForRead(sstableVersions); + } + + @Override + public void startupValidate() + { + // Skip startup validation in tests to avoid SparkContext initialization + } + + @Override + protected void initSidecarClient() + { + // Skip sidecar client initialization in tests + } + + @Override + protected void initInstanceMap() + { + // Skip instance map initialization in tests + } + + @Override + protected boolean isSSTableVersionBasedBridgeDisabled() + { + // Override to avoid reading from the active SparkSession, which is not available in unit tests. + // Always return false to test the validation logic + return false; + } + } + + /** + * Test subclass for testing initializeSSTableVersionsAndBridgeVersion method + * Mocks the Sidecar interaction to avoid actual cluster calls + */ + private static class TestCassandraDataLayerForInitTests extends TestCassandraDataLayer + { + private final Set mockSSTableVersions; + private final boolean featureDisabled; + + TestCassandraDataLayerForInitTests(Set mockSSTableVersions, boolean featureDisabled) + { + super(null); + this.mockSSTableVersions = mockSSTableVersions; + this.featureDisabled = featureDisabled; + } + + @Override + protected boolean isSSTableVersionBasedBridgeDisabled() + { + return featureDisabled; + } + + @Override + protected Set retrieveSSTableVersionsFromCluster() + { + // Mock the Sidecar call - return the mock versions instead of calling actual sidecar + return mockSSTableVersions; + } + } +} diff --git a/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBigSSTableVersionBridgeDisabledTest.java b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBigSSTableVersionBridgeDisabledTest.java new file mode 100644 index 000000000..c4c0bca5d --- /dev/null +++ b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBigSSTableVersionBridgeDisabledTest.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.analytics; + +/** + * Runs the {@link BulkRoundtripSSTableVersionBridgeTestBase} roundtrip suite with the BIG (big-oa/big-nb) SSTable + * format and SSTable version-based bridge selection DISABLED (legacy {@code cassandra.version}-based bridge selection). + */ +class BulkRoundtripBigSSTableVersionBridgeDisabledTest extends BulkRoundtripSSTableVersionBridgeTestBase +{ + @Override + protected boolean sstableVersionBridgeEnabled() + { + return false; + } +} diff --git a/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBigSSTableVersionBridgeEnabledTest.java b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBigSSTableVersionBridgeEnabledTest.java new file mode 100644 index 000000000..1de367750 --- /dev/null +++ b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBigSSTableVersionBridgeEnabledTest.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.analytics; + +/** + * Runs the {@link BulkRoundtripSSTableVersionBridgeTestBase} roundtrip suite with the BIG (big-oa/big-nb) SSTable + * format and SSTable version-based bridge selection ENABLED. + */ +class BulkRoundtripBigSSTableVersionBridgeEnabledTest extends BulkRoundtripSSTableVersionBridgeTestBase +{ + @Override + protected boolean sstableVersionBridgeEnabled() + { + return true; + } +} diff --git a/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBtiSSTableVersionBridgeDisabledTest.java b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBtiSSTableVersionBridgeDisabledTest.java new file mode 100644 index 000000000..b08213f5e --- /dev/null +++ b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBtiSSTableVersionBridgeDisabledTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.analytics; + +/** + * Runs the {@link BulkRoundtripSSTableVersionBridgeTestBase} roundtrip suite with the BTI (bti-da) SSTable format + * and SSTable version-based bridge selection DISABLED (legacy {@code cassandra.version}-based bridge selection). + */ +class BulkRoundtripBtiSSTableVersionBridgeDisabledTest extends BulkRoundtripSSTableVersionBridgeTestBase +{ + @Override + protected boolean sstableVersionBridgeEnabled() + { + return false; + } + + @Override + protected String sstableFormat() + { + return "bti"; + } +} diff --git a/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBtiSSTableVersionBridgeEnabledTest.java b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBtiSSTableVersionBridgeEnabledTest.java new file mode 100644 index 000000000..b4f204ba9 --- /dev/null +++ b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripBtiSSTableVersionBridgeEnabledTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.analytics; + +/** + * Runs the {@link BulkRoundtripSSTableVersionBridgeTestBase} roundtrip suite with the BTI (bti-da) SSTable format + * and SSTable version-based bridge selection ENABLED. + */ +class BulkRoundtripBtiSSTableVersionBridgeEnabledTest extends BulkRoundtripSSTableVersionBridgeTestBase +{ + @Override + protected boolean sstableVersionBridgeEnabled() + { + return true; + } + + @Override + protected String sstableFormat() + { + return "bti"; + } +} diff --git a/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripSSTableVersionBridgeTestBase.java b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripSSTableVersionBridgeTestBase.java new file mode 100644 index 000000000..20a773d79 --- /dev/null +++ b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/BulkRoundtripSSTableVersionBridgeTestBase.java @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.analytics; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import com.vdurmont.semver4j.Semver; +import org.junit.jupiter.api.Test; + +import org.apache.cassandra.distributed.api.ConsistencyLevel; +import org.apache.cassandra.distributed.api.IInstance; +import org.apache.cassandra.sidecar.testing.QualifiedName; +import org.apache.cassandra.testing.ClusterBuilderConfiguration; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +import static org.apache.cassandra.testing.TestUtils.DC1_RF3; +import static org.apache.cassandra.testing.TestUtils.TEST_KEYSPACE; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +/** + * Shared roundtrip integration tests for SSTable version-based bridge selection. The same test logic runs + * for both the feature-enabled and feature-disabled cases. + * + *

    Each test exercises a full write/read cycle and asserts the on-disk SSTable files use the format and + * version expected for the Cassandra version under test.

    + *
      + *
    • {@link #testBulkWriteReadRoundtrip()} writes via the bulk writer, then reads back via the bulk reader.
    • + *
    • {@link #testBulkReadAcrossMultipleSSTables()} writes via CQL across several flushed SSTables, then bulk reads.
    • + *
    • {@link #testBulkReadFromSnapshot()} writes via CQL, snapshots, then bulk reads from the snapshot.
    • + *
    + */ +abstract class BulkRoundtripSSTableVersionBridgeTestBase extends SharedClusterSparkIntegrationTestBase +{ + static final List DATASET = Arrays.asList("alpha", "beta", "gamma", "delta", "epsilon"); + static final QualifiedName TABLE_ROUNDTRIP = new QualifiedName(TEST_KEYSPACE, "test_roundtrip"); + static final QualifiedName TABLE_MULTIPLE = new QualifiedName(TEST_KEYSPACE, "test_multiple"); + static final QualifiedName TABLE_SNAPSHOT = new QualifiedName(TEST_KEYSPACE, "test_snapshot"); + + static final StructType SCHEMA = DataTypes.createStructType(new StructField[]{ + DataTypes.createStructField("id", DataTypes.IntegerType, false), + DataTypes.createStructField("value", DataTypes.StringType, false) + }); + + /** + * @return whether SSTable version-based bridge selection should be enabled for this test class + */ + protected abstract boolean sstableVersionBridgeEnabled(); + + /** + * @return the SSTable format the analytics bulk writer should produce ({@code big} by default; + * overridden to {@code bti} by the BTI variant). + */ + protected String sstableFormat() + { + return "big"; + } + + @Override + protected ClusterBuilderConfiguration testClusterConfiguration() + { + ClusterBuilderConfiguration conf = super.testClusterConfiguration() + .nodesPerDc(3); + // Preserve the base instance config (e.g. storage_compatibility_mode) and pin the node SSTable format. + Map instanceConfig = new HashMap<>(); + if (conf.additionalInstanceConfig != null) + { + instanceConfig.putAll(conf.additionalInstanceConfig); + } + instanceConfig.put("sstable.selected_format", sstableFormat()); + return conf.additionalInstanceConfig(instanceConfig); + } + + @Override + protected void beforeClusterProvisioning() + { + // Set before CassandraVersion class initialization (which reads this property once, statically) so the bulk + // writer generates SSTables in the configured format. This runs at the start of cluster provisioning, before + // any analytics bridge usage, and relies on forkEvery = 1 (the integration-test default) for a fresh JVM per + // test class. + System.setProperty("cassandra.analytics.bridges.sstable_format", sstableFormat()); + if ("bti".equals(sstableFormat())) + { + // BTI (bti-da) is a Cassandra 5.0+ format; skip on older versions. + Semver version = new Semver(testVersion.version(), Semver.SemverType.LOOSE); + assumeTrue(version.isGreaterThanOrEqualTo(new Semver("5.0", Semver.SemverType.LOOSE)), + "BTI format (bti-da) requires Cassandra 5.0+, but test version is " + testVersion.version()); + } + } + + @Override + protected SparkConf getOrCreateSparkConf() + { + SparkConf conf = super.getOrCreateSparkConf(); + // The feature is toggled via the "disable" flag, so it is the inverse of the enabled state. + conf.set("spark.cassandra_analytics.bridge.disable_sstable_version_based", + String.valueOf(!sstableVersionBridgeEnabled())); + return conf; + } + + @Override + protected void initializeSchemaForTest() + { + createTestKeyspace(TEST_KEYSPACE, DC1_RF3); + createTestTable(TABLE_ROUNDTRIP, "CREATE TABLE %s (id int PRIMARY KEY, value text) WITH compression = {'enabled': false};"); + createTestTable(TABLE_MULTIPLE, "CREATE TABLE %s (id int PRIMARY KEY, value text) WITH compression = {'enabled': false};"); + createTestTable(TABLE_SNAPSHOT, "CREATE TABLE %s (id int PRIMARY KEY, value text) WITH compression = {'enabled': false};"); + } + + @Test + void testBulkWriteReadRoundtrip() + { + SparkSession spark = getOrCreateSparkSession(); + + List data = Arrays.asList( + RowFactory.create(0, "a"), + RowFactory.create(1, "b"), + RowFactory.create(2, "c"), + RowFactory.create(3, "d"), + RowFactory.create(4, "e") + ); + Dataset dfWrite = spark.createDataFrame(data, SCHEMA); + + // Write through the bulk writer + bulkWriterDataFrameWriter(dfWrite, TABLE_ROUNDTRIP).save(); + flushKeyspace(); + + // The bulk writer should have produced SSTables in the expected format/version for this Cassandra version + assertExpectedSSTableFormat(TABLE_ROUNDTRIP); + + // Read the data back through the bulk reader and confirm the roundtrip + Dataset dfRead = bulkReaderDataFrame(TABLE_ROUNDTRIP).load(); + checkSmallDataFrameEquality(dfWrite, dfRead); + } + + @Test + void testBulkReadAcrossMultipleSSTables() + { + IInstance firstRunningInstance = cluster.getFirstRunningInstance(); + + // Insert in 3 batches, flushing between each to create multiple SSTables + for (int batch = 0; batch < 3; batch++) + { + for (int i = batch * 10; i < (batch + 1) * 10; i++) + { + String query = String.format("INSERT INTO %s (id, value) VALUES (%d, 'value_%d');", TABLE_MULTIPLE, i, i); + firstRunningInstance.coordinator().execute(query, ConsistencyLevel.ALL); + } + cluster.stream().forEach(instance -> instance.nodetool("flush", TEST_KEYSPACE, TABLE_MULTIPLE.table())); + } + + assertExpectedSSTableFormat(TABLE_MULTIPLE); + + Dataset df = bulkReaderDataFrame(TABLE_MULTIPLE).load(); + List ids = df.collectAsList() + .stream() + .map(row -> row.getInt(0)) + .sorted() + .collect(Collectors.toList()); + assertThat(ids).hasSize(30); // 3 batches * 10 rows each + for (int i = 0; i < 30; i++) + { + assertThat(ids.get(i)).isEqualTo(i); + } + } + + @Test + void testBulkReadFromSnapshot() + { + IInstance firstRunningInstance = cluster.getFirstRunningInstance(); + for (int i = 0; i < DATASET.size(); i++) + { + String query = String.format("INSERT INTO %s (id, value) VALUES (%d, '%s');", TABLE_SNAPSHOT, i, DATASET.get(i)); + firstRunningInstance.coordinator().execute(query, ConsistencyLevel.ALL); + } + cluster.stream().forEach(instance -> instance.nodetool("flush", TEST_KEYSPACE, TABLE_SNAPSHOT.table())); + + assertExpectedSSTableFormat(TABLE_SNAPSHOT); + + String snapshotName = "test_snapshot_" + System.currentTimeMillis(); + cluster.stream().forEach(instance -> instance.nodetool("snapshot", "-t", snapshotName, TEST_KEYSPACE)); + try + { + Dataset df = bulkReaderDataFrame(TABLE_SNAPSHOT) + .option("snapshotName", snapshotName) + .load(); + + List rows = df.collectAsList() + .stream() + .sorted(Comparator.comparing(row -> row.getInt(0))) + .collect(Collectors.toList()); + + assertThat(rows).hasSize(DATASET.size()); + for (int i = 0; i < DATASET.size(); i++) + { + assertThat(rows.get(i).getInt(0)).isEqualTo(i); + assertThat(rows.get(i).getString(1)).isEqualTo(DATASET.get(i)); + } + } + finally + { + cluster.stream().forEach(instance -> instance.nodetool("clearsnapshot", "-t", snapshotName, TEST_KEYSPACE)); + } + } + + private void flushKeyspace() + { + cluster.stream().forEach(instance -> instance.nodetool("flush", TEST_KEYSPACE)); + } + + /** + * Asserts the on-disk SSTables for the given table match the format produced by this test class: + * {@code bti-da} for the BTI variant (5.x only), otherwise {@code big} with the version expected for the + * Cassandra version under test ({@code oa} for 5.x, {@code nb} for 4.x). + */ + private void assertExpectedSSTableFormat(QualifiedName table) + { + if ("bti".equals(sstableFormat())) + { + assertSSTableFormatOnDisk(table, "bti", "da"); + } + else + { + String version = testVersion.version(); + String expectedSSTableVersion; + if (version.startsWith("5.")) + { + expectedSSTableVersion = "oa"; + } + else if (version.startsWith("4.")) + { + expectedSSTableVersion = "nb"; + } + else + { + throw new IllegalStateException("Unsupported Cassandra version for SSTable format assertion: " + version); + } + assertSSTableFormatOnDisk(table, "big", expectedSSTableVersion); + } + } +} diff --git a/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/SharedClusterSparkIntegrationTestBase.java b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/SharedClusterSparkIntegrationTestBase.java index c6a98ca73..9c4d9e8fa 100644 --- a/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/SharedClusterSparkIntegrationTestBase.java +++ b/cassandra-analytics-integration-tests/src/test/java/org/apache/cassandra/analytics/SharedClusterSparkIntegrationTestBase.java @@ -19,6 +19,10 @@ package org.apache.cassandra.analytics; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -26,6 +30,7 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.junit.jupiter.api.TestInstance; import org.junit.jupiter.api.TestInstance.Lifecycle; @@ -36,6 +41,7 @@ import io.vertx.junit5.VertxExtension; import org.apache.cassandra.bridge.CassandraBridge; import org.apache.cassandra.bridge.CassandraBridgeFactory; +import org.apache.cassandra.distributed.api.IInstance; import org.apache.cassandra.sidecar.testing.QualifiedName; import org.apache.cassandra.sidecar.testing.SharedClusterIntegrationTestBase; import org.apache.spark.SparkConf; @@ -190,6 +196,74 @@ public void checkSmallDataFrameEquality(Dataset expected, Dataset actu } } + /** + * Asserts that every on-disk SSTable data file for the given table matches the expected SSTable format and + * version across all running nodes. Data file names follow the pattern + * {@code ---Data.db} (e.g. {@code oa-1-big-Data.db}). The generation component is + * matched loosely since it may be sequence- or UUID-based depending on cluster configuration. + * + * @param table the table whose on-disk SSTables are inspected + * @param format the expected SSTable format (e.g. {@code big}) + * @param expectedVersion the expected SSTable version (e.g. {@code oa} or {@code nb}) + */ + protected void assertSSTableFormatOnDisk(QualifiedName table, String format, String expectedVersion) + { + String dataFileRegex = expectedVersion + "-[^-]+-" + format + "-Data\\.db"; + boolean foundDataFiles = false; + for (int i = 1; i <= cluster.size(); i++) + { + IInstance instance = cluster.get(i); + if (instance.isShutdown()) + { + continue; + } + for (String fileName : findSSTableDataFiles(instance, table)) + { + foundDataFiles = true; + assertThat(fileName) + .as("SSTable data file for %s on node %d should be in %s format with version %s: %s", + table, i, format, expectedVersion, fileName) + .matches(dataFileRegex); + } + } + assertThat(foundDataFiles) + .as("Expected to find at least one SSTable data file for %s on a running node", table) + .isTrue(); + } + + /** + * Finds the names of all SSTable {@code *-Data.db} files belonging to the given table on a single node, + * scanning every configured data directory and scoping to the table's own data subdirectory. + */ + private Set findSSTableDataFiles(IInstance instance, QualifiedName table) + { + String[] dataDirs = (String[]) instance.config().getParams().get("data_file_directories"); + Set dataFileNames = new HashSet<>(); + String tableDirPrefix = table.table() + "-"; + for (String dataDir : dataDirs) + { + Path keyspacePath = Paths.get(dataDir, table.keyspace()); + if (!Files.exists(keyspacePath)) + { + continue; + } + try (Stream walkStream = Files.walk(keyspacePath)) + { + walkStream.filter(Files::isRegularFile) + .filter(path -> path.getFileName().toString().endsWith("-Data.db")) + .filter(path -> path.getParent() != null + && path.getParent().getFileName().toString().startsWith(tableDirPrefix)) + .forEach(path -> dataFileNames.add(path.getFileName().toString())); + } + catch (IOException e) + { + throw new RuntimeException("Failed to list SSTable data files for " + table, e); + } + } + + return dataFileNames; + } + public void validateWritesWithDriverResultSet(List sparkData, ResultSet driverData, Function driverRowFormatter) { diff --git a/cassandra-analytics-sidecar-client/build.gradle b/cassandra-analytics-sidecar-client/build.gradle index cad4d9593..d0d23f53c 100644 --- a/cassandra-analytics-sidecar-client/build.gradle +++ b/cassandra-analytics-sidecar-client/build.gradle @@ -38,4 +38,23 @@ dependencies { implementation "org.slf4j:slf4j-api:${slf4jApiVersion}" api(project(path: ':analytics-sidecar-vertx-client-shaded', configuration: 'shadow')) + + // Test dependencies + testImplementation("org.junit.jupiter:junit-jupiter-api:${project.junitVersion}") + testImplementation("org.junit.jupiter:junit-jupiter-params:${project.junitVersion}") + testImplementation("org.junit.jupiter:junit-jupiter-engine:${project.junitVersion}") + testImplementation("org.assertj:assertj-core:${assertjCoreVersion}") + testImplementation(group: 'org.mockito', name: 'mockito-core', version: "${project.rootProject.mockitoVersion}") + testImplementation(group: 'org.mockito', name: 'mockito-inline', version: "${project.rootProject.mockitoVersion}") +} + +test { + useJUnitPlatform() + testLogging { + events "passed", "skipped", "failed" + showExceptions true + exceptionFormat "full" + showCauses true + showStackTraces true + } } diff --git a/cassandra-analytics-sidecar-client/src/main/java/org/apache/cassandra/clients/Sidecar.java b/cassandra-analytics-sidecar-client/src/main/java/org/apache/cassandra/clients/Sidecar.java index d5922c2d3..a12c9bb23 100644 --- a/cassandra-analytics-sidecar-client/src/main/java/org/apache/cassandra/clients/Sidecar.java +++ b/cassandra-analytics-sidecar-client/src/main/java/org/apache/cassandra/clients/Sidecar.java @@ -20,15 +20,18 @@ package org.apache.cassandra.clients; import java.io.IOException; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import o.a.c.sidecar.client.shaded.common.response.GossipInfoResponse; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -50,6 +53,7 @@ import org.apache.cassandra.spark.common.model.CassandraInstance; import org.apache.cassandra.spark.data.FileType; import org.apache.cassandra.spark.utils.BuildInfo; +import org.apache.cassandra.spark.utils.FutureUtils; import org.apache.cassandra.spark.utils.MapUtils; import org.apache.cassandra.spark.validation.KeyStoreValidation; import org.apache.cassandra.spark.validation.StartupValidator; @@ -149,6 +153,79 @@ public static List> allNodeSettings(SidecarClien .collect(Collectors.toList()); } + /** + * Retrieve gossip info from all nodes on the cluster + * @param client Sidecar client + * @param instances all Sidecar instances + * @return completable futures with GossipInfoResponse + */ + public static List> gossipInfoFromAllNodes(SidecarClient client, + Set instances) + { + return instances.stream() + .map(instance -> client + .gossipInfo(instance) + .exceptionally(throwable -> { + LOGGER.warn(String.format("Failed to retrieve gossipinfo from instance=%s", + instance), throwable); + return null; + })) + .collect(Collectors.toList()); + } + + /** + * Retrieves SSTable versions from all nodes in the cluster via gossip info. + * This method fetches gossip information from all Sidecar instances and extracts + * the SSTable versions running on each node. + * + * @param client Sidecar client + * @param instances all Sidecar instances in the cluster + * @param maxRetryDelayMillis maximum delay in milliseconds between retries + * @param maxRetries maximum number of retry attempts + * @return a set of SSTable versions across all nodes in the cluster + * @throws RuntimeException if unable to retrieve gossip info from any nodes + */ + public static Set getSSTableVersionsFromCluster(SidecarClient client, + Set instances, + long maxRetryDelayMillis, + int maxRetries) + { + LOGGER.debug("Retrieving SSTable versions from cluster via gossip..."); + + List> gossipInfoFutures = + gossipInfoFromAllNodes(client, instances); + + // Calculate total timeout + final long totalTimeout = maxRetryDelayMillis * maxRetries * gossipInfoFutures.size(); + + List gossipInfoResponses = + FutureUtils.bestEffortGet(gossipInfoFutures, totalTimeout, TimeUnit.MILLISECONDS); + + if (gossipInfoResponses.isEmpty()) + { + LOGGER.warn("Unable to retrieve gossip info from any nodes. 0/{} instances available.", + gossipInfoFutures.size()); + // do not fail here, bridge determination logic checks for feature flag and proceeds accordingly + return Collections.emptySet(); + } + else if (gossipInfoResponses.size() < gossipInfoFutures.size()) + { + LOGGER.warn("{}/{} instances were used to retrieve gossip info and determine SSTable versions", + gossipInfoResponses.size(), gossipInfoFutures.size()); + } + + // Extract and collect SSTable versions from all gossip info responses + Set sstableVersions = gossipInfoResponses.stream() + .flatMap(response -> response.values().stream()) + .map(GossipInfoResponse.GossipInfo::sstableVersions) + .filter(Objects::nonNull) + .flatMap(List::stream) + .collect(Collectors.toSet()); + + LOGGER.info("Detected SSTable versions on cluster: {}", sstableVersions); + return sstableVersions; + } + public static SidecarInstance toSidecarInstance(CassandraInstance instance, int sidecarPort) { return new SidecarInstanceImpl(instance.nodeName(), sidecarPort); diff --git a/cassandra-analytics-sidecar-client/src/test/java/org/apache/cassandra/clients/SidecarTest.java b/cassandra-analytics-sidecar-client/src/test/java/org/apache/cassandra/clients/SidecarTest.java new file mode 100644 index 000000000..82b454868 --- /dev/null +++ b/cassandra-analytics-sidecar-client/src/test/java/org/apache/cassandra/clients/SidecarTest.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cassandra.clients; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; + +import org.junit.jupiter.api.Test; + +import o.a.c.sidecar.client.shaded.client.SidecarClient; +import o.a.c.sidecar.client.shaded.client.SidecarInstance; +import o.a.c.sidecar.client.shaded.client.SidecarInstanceImpl; +import o.a.c.sidecar.client.shaded.common.response.GossipInfoResponse; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Unit tests for Sidecar utility methods + */ +public class SidecarTest +{ + @Test + void testGetSSTableVersionsFromClusterWithSingleNode() + { + SidecarClient client = mock(SidecarClient.class); + SidecarInstance instance = new SidecarInstanceImpl("localhost", 9043); + Set instances = Collections.singleton(instance); + + // Mock gossip response + GossipInfoResponse response = new GossipInfoResponse(); + GossipInfoResponse.GossipInfo info = createGossipInfo(Arrays.asList("big-na", "big-nb")); + response.put("localhost", info); + + when(client.gossipInfo(any(SidecarInstance.class))) + .thenReturn(CompletableFuture.completedFuture(response)); + + Set versions = Sidecar.getSSTableVersionsFromCluster(client, instances, 1000L, 3); + + assertThat(versions) + .describedAs("Should extract SSTable versions from gossip info") + .containsExactlyInAnyOrder("big-na", "big-nb"); + } + + @Test + void testGetSSTableVersionsFromClusterWithMultipleNodesAndMixedVersions() + { + SidecarClient client = mock(SidecarClient.class); + SidecarInstance instance1 = new SidecarInstanceImpl("node1", 9043); + SidecarInstance instance2 = new SidecarInstanceImpl("node2", 9043); + Set instances = new HashSet<>(Arrays.asList(instance1, instance2)); + + // Node1 has C* 4.0 versions (big-na, big-nb) + GossipInfoResponse response1 = new GossipInfoResponse(); + GossipInfoResponse.GossipInfo info1 = createGossipInfo(Arrays.asList("big-na", "big-nb")); + response1.put("node1", info1); + + // Node2 has mixed versions: one C* 4.0 (big-nb) and C* 5.0 versions (big-oa, bti-da) + GossipInfoResponse response2 = new GossipInfoResponse(); + GossipInfoResponse.GossipInfo info2 = createGossipInfo(Arrays.asList("big-nb", "big-oa", "bti-da")); + response2.put("node2", info2); + + when(client.gossipInfo(instance1)) + .thenReturn(CompletableFuture.completedFuture(response1)); + when(client.gossipInfo(instance2)) + .thenReturn(CompletableFuture.completedFuture(response2)); + + Set versions = Sidecar.getSSTableVersionsFromCluster(client, instances, 1000L, 3); + + assertThat(versions) + .describedAs("Should aggregate and deduplicate SSTable versions from multiple nodes with mixed Cassandra versions") + .containsExactlyInAnyOrder("big-na", "big-nb", "big-oa", "bti-da"); + } + + @Test + void testGetSSTableVersionsFromClusterWithFailedNode() + { + SidecarClient client = mock(SidecarClient.class); + SidecarInstance instance1 = new SidecarInstanceImpl("node1", 9043); + SidecarInstance instance2 = new SidecarInstanceImpl("node2", 9043); + Set instances = new HashSet<>(Arrays.asList(instance1, instance2)); + + // Node1 succeeds + GossipInfoResponse response1 = new GossipInfoResponse(); + GossipInfoResponse.GossipInfo info1 = createGossipInfo(Arrays.asList("big-na", "big-nb")); + response1.put("node1", info1); + + when(client.gossipInfo(instance1)) + .thenReturn(CompletableFuture.completedFuture(response1)); + + // Node2 fails + when(client.gossipInfo(instance2)) + .thenReturn(CompletableFuture.failedFuture(new RuntimeException("Connection failed"))); + + Set versions = Sidecar.getSSTableVersionsFromCluster(client, instances, 1000L, 3); + + assertThat(versions) + .describedAs("Should return versions from successful nodes even when some fail") + .containsExactlyInAnyOrder("big-na", "big-nb"); + } + + @Test + void testGetSSTableVersionsFromClusterWhenAllNodesFail() + { + SidecarClient client = mock(SidecarClient.class); + SidecarInstance instance1 = new SidecarInstanceImpl("node1", 9043); + SidecarInstance instance2 = new SidecarInstanceImpl("node2", 9043); + Set instances = new HashSet<>(Arrays.asList(instance1, instance2)); + + // Both nodes fail + when(client.gossipInfo(any(SidecarInstance.class))) + .thenReturn(CompletableFuture.failedFuture(new RuntimeException("Connection failed"))); + + Set versions = Sidecar.getSSTableVersionsFromCluster(client, instances, 1000L, 3); + + assertThat(versions) + .describedAs("Should return empty set when all nodes fail") + .isEmpty(); + } + + @Test + void testGetSSTableVersionsFromClusterWithNullSSTableVersions() + { + SidecarClient client = mock(SidecarClient.class); + SidecarInstance instance = new SidecarInstanceImpl("localhost", 9043); + Set instances = Collections.singleton(instance); + + // Mock gossip response with null SSTable versions + GossipInfoResponse response = new GossipInfoResponse(); + GossipInfoResponse.GossipInfo info = createGossipInfo(null); + response.put("localhost", info); + + when(client.gossipInfo(any(SidecarInstance.class))) + .thenReturn(CompletableFuture.completedFuture(response)); + + Set versions = Sidecar.getSSTableVersionsFromCluster(client, instances, 1000L, 3); + + assertThat(versions) + .describedAs("Should return empty set when SSTable versions are null") + .isEmpty(); + } + + @Test + void testGetSSTableVersionsFromClusterWithEmptyInstancesSet() + { + SidecarClient client = mock(SidecarClient.class); + Set instances = Collections.emptySet(); + + Set versions = Sidecar.getSSTableVersionsFromCluster(client, instances, 1000L, 3); + + assertThat(versions) + .describedAs("Should return empty set when no instances provided") + .isEmpty(); + } + + private GossipInfoResponse.GossipInfo createGossipInfo(List sstableVersions) + { + GossipInfoResponse.GossipInfo info = new GossipInfoResponse.GossipInfo(); + info.put("status", "NORMAL"); + info.put("rack", "RACK1"); + info.put("datacenter", "DC1"); + info.put("releaseVersion", "4.0.0"); + info.put("rpcAddress", "127.0.0.1"); + if (sstableVersions != null && !sstableVersions.isEmpty()) + { + info.put("sstableVersions", String.join(",", sstableVersions)); + } + return info; + } +} diff --git a/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/io/sstable/format/bti/BtiReaderUtilsTest.java b/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/io/sstable/format/bti/BtiReaderUtilsTest.java index 8539b3ff2..11236cda8 100644 --- a/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/io/sstable/format/bti/BtiReaderUtilsTest.java +++ b/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/io/sstable/format/bti/BtiReaderUtilsTest.java @@ -61,7 +61,7 @@ public class BtiReaderUtilsTest public void testPartitionIndexTokenRange(boolean compression) { // Only test BTI format for Cassandra 5.0+ - assumeThat(CassandraVersion.sstableFormat()).isEqualTo("bti"); + assumeThat(CassandraVersion.configuredSSTableFormat()).isEqualTo("bti"); qt().forAll(arbitrary().enumValues(Partitioner.class)) .checkAssert(partitioner -> { @@ -103,7 +103,7 @@ public void testPartitionIndexTokenRange(boolean compression) public void testPrimaryIndexContainsAnyKey(boolean compression) { // Only test BTI format for Cassandra 5.0+ - assumeThat(CassandraVersion.sstableFormat()).isEqualTo("bti"); + assumeThat(CassandraVersion.configuredSSTableFormat()).isEqualTo("bti"); qt().forAll(arbitrary().enumValues(Partitioner.class)) .checkAssert(partitioner -> { @@ -162,7 +162,7 @@ public void testPrimaryIndexContainsAnyKey(boolean compression) public void testReadPrimaryIndex(boolean compression) { // Only test BTI format for Cassandra 5.0+ - assumeThat(CassandraVersion.sstableFormat()).isEqualTo("bti"); + assumeThat(CassandraVersion.configuredSSTableFormat()).isEqualTo("bti"); qt().forAll(arbitrary().enumValues(Partitioner.class)) .checkAssert(partitioner -> { @@ -221,7 +221,7 @@ public void testReadPrimaryIndex(boolean compression) public void testConsumePrimaryIndex(boolean compression) { // Only test BTI format for Cassandra 5.0+ - assumeThat(CassandraVersion.sstableFormat()).isEqualTo("bti"); + assumeThat(CassandraVersion.configuredSSTableFormat()).isEqualTo("bti"); qt().forAll(arbitrary().enumValues(Partitioner.class)) .checkAssert(partitioner -> { diff --git a/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/spark/reader/SummaryDbTests.java b/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/spark/reader/SummaryDbTests.java index b16330e9b..17621e346 100644 --- a/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/spark/reader/SummaryDbTests.java +++ b/cassandra-five-zero-bridge/src/test/java/org/apache/cassandra/spark/reader/SummaryDbTests.java @@ -86,7 +86,7 @@ public BigInteger tokenAt(int index) public void testSearchSummary() { // Summary.db file is present only in BIG sstables - assumeThat(CassandraVersion.sstableFormat()).isEqualTo("big"); + assumeThat(CassandraVersion.configuredSSTableFormat()).isEqualTo("big"); qt().forAll(arbitrary().enumValues(Partitioner.class)) .checkAssert(partitioner -> { @@ -149,7 +149,7 @@ public void testSearchSummary() public void testSummaryBinarySearch() { // Summary.db file is present only in BIG sstables - assumeThat(CassandraVersion.sstableFormat()).isEqualTo("big"); + assumeThat(CassandraVersion.configuredSSTableFormat()).isEqualTo("big"); SummaryDbUtils.TokenList list = new ArrayTokenList(LongStream.range(5, 10000).boxed().toArray(Long[]::new)); assertThat(SummaryDbUtils.binarySearchSummary(list, BigInteger.valueOf(154L))).isEqualTo(148); @@ -169,7 +169,7 @@ public void testSummaryBinarySearch() public void testSummaryBinarySearchSparse() { // Summary.db file is present only in BIG sstables - assumeThat(CassandraVersion.sstableFormat()).isEqualTo("big"); + assumeThat(CassandraVersion.configuredSSTableFormat()).isEqualTo("big"); SummaryDbUtils.TokenList list = new ArrayTokenList(5L, 10L, 15L, 20L, 25L); assertThat(SummaryDbUtils.binarySearchSummary(list, BigInteger.valueOf(-500L))).isEqualTo(0); diff --git a/cassandra-five-zero-types/src/main/java/org/apache/cassandra/bridge/CassandraTypesImplementation.java b/cassandra-five-zero-types/src/main/java/org/apache/cassandra/bridge/CassandraTypesImplementation.java index a13071c11..6d5804cbf 100644 --- a/cassandra-five-zero-types/src/main/java/org/apache/cassandra/bridge/CassandraTypesImplementation.java +++ b/cassandra-five-zero-types/src/main/java/org/apache/cassandra/bridge/CassandraTypesImplementation.java @@ -54,7 +54,7 @@ public static synchronized void setup(BridgeInitializationParameters params) config.diagnostic_events_enabled = false; config.max_mutation_size = new DataStorageSpec.IntKibibytesBound(config.commitlog_segment_size.toKibibytes() / 2); config.concurrent_compactors = 4; - config.sstable.selected_format = params.getSstableFormat(); + config.sstable.selected_format = params.getConfiguredSSTableFormat(); Path tempDirectory; try {