-
Notifications
You must be signed in to change notification settings - Fork 30
CASSANALYTICS-31 SAI index support in analytics #220
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: trunk
Are you sure you want to change the base?
Changes from all commits
32f2c45
41020b0
80cc253
bf094e7
556ebe5
b45b60b
bb27eed
d3d583d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,14 +22,52 @@ | |
| import java.nio.file.Path; | ||
|
|
||
| import org.apache.cassandra.bridge.SSTableDescriptor; | ||
| import org.apache.cassandra.spark.data.FileType; | ||
|
|
||
| public final class SSTables | ||
| { | ||
| /** | ||
| * Suffix identifying the primary SSTable data component, e.g. "-Data.db". | ||
| * The leading '-' is significant: it excludes SAI per-index components such as | ||
| * "...+TermsData.db" that also end with "Data.db". | ||
| */ | ||
| private static final String DATA_COMPONENT_SUFFIX = "-" + FileType.DATA.getFileSuffix(); | ||
|
|
||
| /** | ||
| * Glob matching primary SSTable data components, e.g. "*-Data.db". | ||
| * Suitable for {@link java.nio.file.Files#newDirectoryStream(Path, String)}. | ||
| */ | ||
| public static final String DATA_COMPONENT_GLOB = "*" + DATA_COMPONENT_SUFFIX; | ||
|
|
||
| private SSTables() | ||
| { | ||
| throw new IllegalStateException(getClass() + " is static utility class and shall not be instantiated"); | ||
| } | ||
|
|
||
| /** | ||
| * Determine whether the given file name is a primary SSTable data component ("<descriptor>-Data.db"). | ||
| * The leading '-' check excludes SAI per-index components such as "...+TermsData.db" which also end with "Data.db". | ||
| * | ||
| * @param fileName file name (not a full path) | ||
| * @return true if the name is a primary data component | ||
| */ | ||
| public static boolean isDataComponent(String fileName) | ||
| { | ||
| return fileName.endsWith(DATA_COMPONENT_SUFFIX); | ||
| } | ||
|
|
||
| /** | ||
| * Determine whether the given path is a primary SSTable data component ("<descriptor>-Data.db"). | ||
| * | ||
| * @param path file path | ||
| * @return true if the path's file name is a primary data component | ||
| * @see #isDataComponent(String) | ||
| */ | ||
| public static boolean isDataComponent(Path path) | ||
| { | ||
| return isDataComponent(path.getFileName().toString()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we do a null check? |
||
| } | ||
|
|
||
| /** | ||
| * Get the sstable base name from data file path. | ||
| * For example, the base name of data file '/path/to/table/nb-1-big-Data.db' is 'nb-1-big' | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25,6 +25,7 @@ | |
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
| import java.util.List; | ||
| import java.util.Locale; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.regex.Matcher; | ||
|
|
@@ -59,6 +60,9 @@ public final class CqlUtils | |
| private static final Pattern ESCAPED_DOUBLE_BACKSLASH = Pattern.compile("\\\\"); | ||
| private static final Pattern COMPACTION_STRATEGY_PATTERN = Pattern.compile("compaction\\s*=\\s*\\{\\s*'class'\\s*:\\s*'([^']+)'"); | ||
|
|
||
| private static final Pattern MULTI_WHITESPACE_PATTERN = Pattern.compile("\\s+"); | ||
| private static final Pattern SAI_USING_PATTERN = Pattern.compile("USING '[^']*STORAGEATTACHEDINDEX'"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This pattern matches with something like "USING 'PrefixStorageAttachedIndex'" which is incorrect, how about using something like "Pattern.compile("USING '([^']*\.)?STORAGEATTACHEDINDEX'")" |
||
|
|
||
| private CqlUtils() | ||
| { | ||
| throw new IllegalStateException(getClass() + " is static utility class and shall not be instantiated"); | ||
|
|
@@ -259,16 +263,66 @@ public static Set<String> extractUdts(@NotNull String schemaStr, @NotNull String | |
| } | ||
|
|
||
| public static int extractIndexCount(@NotNull String schemaStr, @NotNull String keyspace, @NotNull String table) | ||
| { | ||
| return extractIndexStatements(schemaStr, keyspace, table).size(); | ||
| } | ||
|
|
||
| /** | ||
| * Extracts CREATE INDEX statements for the given table from the schema string. | ||
| * | ||
| * @param schemaStr full cluster schema text | ||
| * @param keyspace the keyspace name | ||
| * @param table the table name | ||
| * @return set of CREATE INDEX statements for the table | ||
| */ | ||
| public static Set<String> extractIndexStatements(@NotNull String schemaStr, | ||
| @NotNull String keyspace, | ||
| @NotNull String table) | ||
| { | ||
| String cleaned = cleanCql(schemaStr); | ||
| Pattern pattern = Pattern.compile(String.format("CREATE (CUSTOM )?INDEX \"?[^ ]* ON ?\"?%s?\"?\\.{1}\"?%s\"?[^;]*;", keyspace, table)); | ||
| Matcher matcher = pattern.matcher(cleaned); | ||
| int indexCount = 0; | ||
| Set<String> statements = new HashSet<>(); | ||
| while (matcher.find()) | ||
| { | ||
| indexCount++; | ||
| statements.add(matcher.group()); | ||
| } | ||
| return indexCount; | ||
| return statements; | ||
| } | ||
|
|
||
| /** | ||
| * Returns true if the given CREATE INDEX statement defines a Storage Attached Index (SAI). | ||
| * <p> | ||
| * SAI class may appear either as the short name ("StorageAttachedIndex") or fully qualified | ||
| * ("org.apache.cassandra.index.sai.StorageAttachedIndex"). | ||
| * | ||
| * @param createIndexStatement a CREATE INDEX CQL statement | ||
| * @return true if the index uses SAI | ||
| */ | ||
| public static boolean isSaiIndex(@NotNull String createIndexStatement) | ||
| { | ||
| // Matches any run of whitespace (spaces, tabs, newlines). Used to collapse a CREATE INDEX statement to | ||
| // single-spaced text so the SAI marker can be matched regardless of how the schema text was formatted | ||
| // (e.g. "USING 'StorageAttachedIndex'", a newline before USING, or a tab all normalize to one space). | ||
| String normalized = MULTI_WHITESPACE_PATTERN.matcher(createIndexStatement).replaceAll(" ").toUpperCase(Locale.ROOT); | ||
|
|
||
| // Matches the SAI marker inside a USING '...' clause (statement already upper-cased and whitespace-collapsed). | ||
| // The leading [^']* tolerates the fully-qualified class form | ||
| // (e.g. 'org.apache.cassandra.index.sai.StorageAttachedIndex') as well as the short name. | ||
| return SAI_USING_PATTERN.matcher(normalized).find(); | ||
| } | ||
|
|
||
| /** | ||
| * Returns true when {@code indexStatements} is non-empty and every statement defines a Storage Attached Index. | ||
| * This is the single "all-SAI table" predicate shared by schema validation and the write/commit paths, so the | ||
| * decision to generate SAI components and the decision to enable SAI import options can never disagree. | ||
| * | ||
| * @param indexStatements the CREATE INDEX statements for a table | ||
| * @return true if all indexes are SAI (and at least one exists) | ||
| */ | ||
| public static boolean hasOnlySaiIndexes(@NotNull Set<String> indexStatements) | ||
| { | ||
| return !indexStatements.isEmpty() && indexStatements.stream().allMatch(CqlUtils::isSaiIndex); | ||
| } | ||
|
|
||
| /** | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please add test for new methods that are being added specially for SAI file names?