Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 13 additions & 16 deletions packages/cli/src/util/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ import type { Logger } from './logger';

export const CACHE_DIR = '.cli-cache';

// TODO this is all a bit over complicated tbh
// When called without workflowName/stepId, returns the CACHE_DIR root.
// This is used directly in saveToCache to locate the .gitignore.
export const getCachePath = (
options: Pick<Opts, 'baseDir' | 'cachePath'>,
workflowName?: string,
Expand All @@ -24,7 +25,8 @@ export const getCachePath = (

const basePath = path.resolve(
baseDir ?? process.cwd(),
`${CACHE_DIR}/${workflowName}`
CACHE_DIR,
workflowName ?? ''
);

if (stepId) {
Expand All @@ -33,38 +35,33 @@ export const getCachePath = (
return basePath;
};

const ensureGitIgnore = (options: any, cachePath: string) => {
const ensureGitIgnore = (options: any, cacheRoot: string) => {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old logic here, the reason we walk up through the folder structure, is to see if there's a parenting .gitignore file which we can edit. This would live at the root of the github repo, which we may or may not find.

This PR changes the approach to force a .gitignore file as a child of the cache folder.

I'm not against this. It might be a bit messy but I suppose it's not too big a deal. And I can see the original issue of writing a .gitignore file to root (although I can't reproduce this locally but that might be something in my local setup)

So Ok, let's do it!

if (!options._hasGitIgnore) {
// Find the root cache folder
let root = cachePath;
while (root.length > 1 && !root.endsWith(CACHE_DIR)) {
root = path.dirname(root);
}
// From the root cache, look for a .gitignore
const ignorePath = path.resolve(root, '.gitignore');
const ignorePath = path.join(cacheRoot, '.gitignore');
try {
fs.accessSync(ignorePath);
} catch (e) {
// doesn't exist!
fs.writeFileSync(ignorePath, '*');
}
options._hasGitIgnore = true;
}
options._hasGitIgnore = true;
};

export const saveToCache = async (
plan: ExecutionPlan,
stepId: string,
output: any,
options: Pick<Opts, 'baseDir' | 'cacheSteps'>,
options: Pick<Opts, 'baseDir' | 'cachePath' | 'cacheSteps'>,
logger: Logger
) => {
if (options.cacheSteps) {
const cachePath = await getCachePath(options, plan.workflow.name, stepId);
const cachePath = getCachePath(options, plan.workflow.name, stepId);
// Note that this is sync because other execution order gets messed up
fs.mkdirSync(path.dirname(cachePath), { recursive: true });

ensureGitIgnore(options, path.dirname(cachePath));
// getCachePath with no workflowName returns the CACHE_DIR root
ensureGitIgnore(options, getCachePath(options));

logger.info(`Writing ${stepId} output to ${cachePath}`);
fs.writeFileSync(cachePath, JSON.stringify(output));
Expand All @@ -73,10 +70,10 @@ export const saveToCache = async (

export const clearCache = async (
plan: ExecutionPlan,
options: Pick<Opts, 'baseDir'>,
options: Pick<Opts, 'baseDir' | 'cachePath'>,
logger: Logger
) => {
const cacheDir = await getCachePath(options, plan.workflow?.name);
const cacheDir = getCachePath(options, plan.workflow?.name);

try {
await rmdir(cacheDir, { recursive: true });
Expand Down
49 changes: 49 additions & 0 deletions packages/cli/test/execute/execute.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,55 @@ test.serial('.cli-cache has a gitignore', async (t) => {
t.is(gitignore, '*');
});

// Regression test for https://github.com/OpenFn/kit/issues/669
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this test making such a distinction between workflow.js and expression.js? I don't think it materially affects the cache behaviour?

test.serial('cache steps when running a .js expression', async (t) => {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test just ensures that the cache is written, not read. Which is fine - I'd just like the test name to be accurate.

mockFs({
'/job.js': `${fn}fn((state) => ({ ...state, x: 1 }));`,
});

const options = {
...defaultOptions,
expressionPath: '/job.js',
baseDir: '/',
cacheSteps: true,
};

const result = await handler(options, logger);
t.is(result.x, 1);

// workflow name is derived from the filename: 'job'
// step id is a generated uuid, so list the directory
const files = await fs.readdir('/.cli-cache/job');
t.is(files.length, 1);
t.true(files[0].endsWith('.json'));

const cached = JSON.parse(
await fs.readFile(`/.cli-cache/job/${files[0]}`, 'utf8')
);
t.is(cached.x, 1);
});

test.serial(
'.cli-cache has a gitignore when caching a .js expression',
async (t) => {
mockFs({
'/job.js': `${fn}fn((state) => ({ ...state, x: 1 }));`,
});

const options = {
...defaultOptions,
expressionPath: '/job.js',
baseDir: '/',
cacheSteps: true,
};

await handler(options, logger);

const gitignore = await fs.readFile('/.cli-cache/.gitignore', 'utf8');
t.is(gitignore, '*');
}
);

test.serial('run a workflow with initial state from stdin', async (t) => {
const workflow = {
workflow: {
Expand Down