Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
ac589b1
feat: create scheduled task-completion snapshots
audreypho Mar 23, 2026
2cee127
feat: task completion stats aggregate by tutorial and job scheduled t…
audreypho Apr 2, 2026
8064aa3
feat: add tests for task completion snapshots retrieval and capture
audreypho Apr 8, 2026
63afeff
feat: add factory and tests for task_completion_snapshot model
audreypho Apr 8, 2026
e267436
feat: add tests for aggregating and capturing task-completion-stats s…
audreypho Apr 8, 2026
169c2a2
Merge branch '10.0.x' into task-completion-snapshots
audreypho Apr 19, 2026
3265a83
refactor: remove foreign key constraint from task_completion_snapshot…
audreypho Apr 19, 2026
7c1fc5d
fix: `aggregate_task_complete_stats` uses existing status_for_task_de…
audreypho Apr 19, 2026
d9228f4
refactor: task completion stats uses async sidekiq job for snapshots
audreypho Apr 19, 2026
98f29d5
feat: add convenor permission for capturing task completion snapshots
audreypho Apr 19, 2026
f876fc5
feat: add rate limit to task completion snapshot (30mins)
audreypho Apr 19, 2026
a589be0
refactor: change snapshots to be stored as JSON files
audreypho Apr 20, 2026
69b2f72
feat: task completion snapshot captures data in CSV and stores indivi…
audreypho Apr 21, 2026
60c1476
fix: task completion stats csv uses task status names instead of id
audreypho Apr 22, 2026
3ce1cf3
fix: task completion csv correctly uses task status names
audreypho Apr 22, 2026
f7c30d7
feat: task completion snapshots include campus information
audreypho Apr 22, 2026
0a67fe3
feat: add campus to task completion snapshot
audreypho Apr 22, 2026
95c0d9b
feat: store task completion snapshots as zip
audreypho Apr 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions app/api/units_api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,61 @@ class UnitsApi < Grape::API
present unit.student_task_completion_stats, with: Grape::Presenters::Presenter
end

desc 'Get historical task completion snapshots'
params do
optional :start_date, type: Date, desc: 'Include snapshots captured on or after this date'
optional :end_date, type: Date, desc: 'Include snapshots captured on or before this date'
optional :limit, type: Integer, desc: 'Maximum number of snapshots to return', default: 365
end
get '/units/:id/stats/task_completion_snapshots' do
unit = Unit.find(params[:id])
unless authorise? current_user, unit, :download_stats
error!({ error: "Not authorised to download stats of student tasks in #{unit.code}" }, 403)
end

snapshots = unit.task_completion_snapshots.order(snapshot_timestamp: :desc)
if params[:start_date].present?
start_timestamp = params[:start_date].in_time_zone.beginning_of_day.to_i
snapshots = snapshots.where('CAST(snapshot_timestamp AS UNSIGNED) >= ?', start_timestamp)
end
if params[:end_date].present?
end_timestamp = params[:end_date].in_time_zone.end_of_day.to_i
snapshots = snapshots.where('CAST(snapshot_timestamp AS UNSIGNED) <= ?', end_timestamp)
end
snapshots = snapshots.limit([params[:limit].to_i, 365].min)

present snapshots.map { |snapshot|
stats = snapshot.load_stats

{
snapshot_date: snapshot.snapshot_date,
snapshot_timestamp: snapshot.snapshot_timestamp,
stats: stats
}
}, with: Grape::Presenters::Presenter
end

desc 'Capture task completion snapshot immediately for this unit'
post '/units/:id/stats/task_completion_snapshots/capture' do
Comment thread
audreypho marked this conversation as resolved.
unit = Unit.find(params[:id])
unless authorise? current_user, unit, :capture_task_completion_snapshot
error!({ error: "Not authorised to capture stats of student tasks in #{unit.code}" }, 403)
end

Comment thread
audreypho marked this conversation as resolved.
# Check if a snapshot was captured within the past 30 minutes
recent_snapshot = unit.task_completion_snapshots.where('CAST(snapshot_timestamp AS UNSIGNED) > ?', 30.minutes.ago.to_i).order(snapshot_timestamp: :desc).first
if recent_snapshot.present?
recent_snapshot_time = recent_snapshot.snapshot_time
remaining_seconds = [(recent_snapshot_time + 30.minutes - Time.zone.now).ceil, 0].max
remaining_minutes = [(remaining_seconds / 60.0).ceil, 1].max
error!({ error: "A snapshot was captured at #{recent_snapshot_time.strftime('%H:%M')}. Please wait #{remaining_minutes} more minute(s) before capturing another snapshot." }, 429)
end

job_id = AggregateTaskCompletionStatsJob.perform_async(unit.id)
job = setup_job(job_id)
present job, with: Entities::SidekiqJobEntity
end

desc 'Download stats related to the number of tasks assessed by each tutor'
get '/csv/units/:id/tutor_assessments' do
unit = Unit.find(params[:id])
Expand Down
24 changes: 24 additions & 0 deletions app/helpers/file_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,27 @@ def unit_portfolio_dir(unit, create: true, archived: true)
dst
end

def unit_analytics_dir(unit, create: true, archived: true)
dst = unit_work_root(unit, archived: archived)
dst << 'analytics/'

FileUtils.mkdir_p(dst) if create
dst
end

def unit_task_status_snapshots_dir(unit, create: true, archived: true)
dst = unit_analytics_dir(unit, create: create, archived: archived)
dst << 'task-statuses/'

FileUtils.mkdir_p(dst) if create
dst
end

def unit_task_status_snapshot_path(unit, snapshot_timestamp, create: true, archived: true)
snapshot_filename = "#{sanitized_filename(snapshot_timestamp.to_s)}.zip"
File.join(unit_task_status_snapshots_dir(unit, create: create, archived: archived), snapshot_filename)
end
Comment on lines +295 to +298
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than storing each snapshot in its own zip file, it would be more efficient to store a single task-status-snapshots.zip in {unit}/analytics/task-status-snapshots.zip where we then place all the CSV files inside.

You'll need to then search for the timestamped snapshot within the ZIP


#
# Generates a path for storing student portfolios
#
Expand Down Expand Up @@ -778,6 +799,9 @@ def line_wrap(path, width: 160)
module_function :unit_dir
module_function :root_portfolio_dir
module_function :unit_portfolio_dir
module_function :unit_analytics_dir
module_function :unit_task_status_snapshots_dir
module_function :unit_task_status_snapshot_path
module_function :unit_work_root
module_function :project_work_root
module_function :student_portfolio_dir
Expand Down
110 changes: 110 additions & 0 deletions app/models/task_completion_snapshot.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# frozen_string_literal: true

require 'csv'
require 'zip'

class TaskCompletionSnapshot < ApplicationRecord
include FileHelper

belongs_to :unit

validates :snapshot_timestamp, presence: true
validates :snapshot_timestamp, uniqueness: { scope: :unit_id }

after_destroy :delete_snapshot_file

def snapshot_file_path
FileHelper.unit_task_status_snapshot_path(unit, snapshot_timestamp, create: true)
end

def snapshot_contents
if File.exist?(snapshot_file_path)
return read_csv_from_zip(snapshot_file_path)
end
nil
rescue Zip::Error
nil
end

def snapshot_date
return nil if snapshot_timestamp.blank?

snapshot_time.to_date
end

def snapshot_time
return nil if snapshot_timestamp.blank?

Time.zone.at(snapshot_timestamp.to_i)
end

def load_stats
snapshot_contents = self.snapshot_contents

return {} if snapshot_contents.blank?

parse_csv_stats(snapshot_contents)
rescue CSV::MalformedCSVError
{}
end

def store_stats!(payload)
FileUtils.mkdir_p(File.dirname(snapshot_file_path))

tmp_path = "#{snapshot_file_path}.tmp"
Zip::OutputStream.open(tmp_path) do |zip|
zip.put_next_entry('snapshot.csv')
zip.write(payload.to_s)
end

FileUtils.mv(tmp_path, snapshot_file_path)
ensure
FileUtils.rm_f(tmp_path) if defined?(tmp_path)
end

private

def parse_csv_stats(csv_text)
csv = CSV.parse(csv_text, headers: true)
return {} if csv.empty?

stream_headers = unit.tutorial_streams.pluck(:abbreviation)
stream_headers = ['Tutorial'] if stream_headers.empty?
task_definitions = unit.task_definitions_by_grade

stats = Hash.new { |hash, key| hash[key] = Hash.new { |tutorial_hash, tutorial_key| tutorial_hash[tutorial_key] = Hash.new { |task_hash, task_key| task_hash[task_key] = Hash.new(0) } } }

csv.each do |row|
campus_abbreviation = row['Campus'].to_s.strip
next if campus_abbreviation.blank?

campus_name = Campus.find_by(abbreviation: campus_abbreviation)&.name || campus_abbreviation

stream_headers.each do |stream_header|
tutorial_name = row[stream_header].to_s.strip
next if tutorial_name.blank?

task_definitions.each do |task_definition|
status_value = row[task_definition.abbreviation].to_s.strip
status_key = TaskStatus.id_to_key(status_value.to_i) || :not_started
stats[campus_name][tutorial_name][task_definition.abbreviation][status_key.to_s] += 1
end
end
end

stats
end

def read_csv_from_zip(zip_path)
Zip::File.open(zip_path) do |zip_file|
entry = zip_file.find_entry('snapshot.csv') || zip_file.entries.first
return nil if entry.nil?

entry.get_input_stream.read
end
end

def delete_snapshot_file
FileUtils.rm_f(snapshot_file_path)
end
end
70 changes: 53 additions & 17 deletions app/models/unit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def self.permissions
:get_tutor_times_summary,
:get_marking_sessions,
:upload_grades_csv,
:get_staff_notes
:get_staff_notes,
:capture_task_completion_snapshot
]

# What can admin do with units?
Expand Down Expand Up @@ -157,6 +158,7 @@ def role_for(user)
has_many :unit_roles, dependent: :destroy, inverse_of: :unit
has_many :learning_outcomes, as: :context, dependent: :destroy # inverse_of: :unit
has_many :marking_sessions, dependent: :destroy
has_many :task_completion_snapshots, dependent: :destroy, inverse_of: :unit

has_many :comments, through: :projects
has_many :tasks, through: :projects
Expand Down Expand Up @@ -1772,23 +1774,31 @@ def days_awaiting_feedback_by_tutorial_csv
end

def task_completion_csv
task_completion_csv_generator()
end

def task_completion_csv_generator(task_status_uses_id: false, includes_campus: false)
task_def_by_grade = task_definitions_by_grade
streams = tutorial_streams
grp_sets = group_sets
base_headers = [
'Student ID',
'Username',
'Student Name',
]
base_headers << 'Campus' if includes_campus
base_headers.push(
'Target Grade',
'Email',
'Portfolio',
'Grade',
'Rationale',
'Assessor',
)

CSV.generate() do |csv|
# Add header row
csv << ([
'Student ID',
'Username',
'Student Name',
'Target Grade',
'Email',
'Portfolio',
'Grade',
'Rationale',
'Assessor',
] +
csv << (base_headers +
(streams.count > 0 ? streams.map { |t| t.abbreviation } : ['Tutorial']) +
grp_sets.map(&:name) +
task_def_by_grade.map do |task_definition|
Expand All @@ -1803,7 +1813,11 @@ def task_completion_csv
# Get the details to fetch for each task definition...
td_select = task_def_by_grade.map do |td|
result = []
result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN (CASE WHEN task_statuses.name IS NULL THEN 'Not Started' ELSE task_statuses.name END) ELSE NULL END) AS status_#{td.id}"
if task_status_uses_id
result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN (CASE WHEN tasks.task_status_id IS NULL THEN #{TaskStatus.not_started.id} ELSE tasks.task_status_id END) ELSE NULL END) AS status_#{td.id}"
else
result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN (CASE WHEN task_statuses.name IS NULL THEN 'Not Started' ELSE task_statuses.name END) ELSE NULL END) AS status_#{td.id}"
end
result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN tasks.grade ELSE NULL END) AS grade_#{td.id}" if td.is_graded?
result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN tasks.quality_pts ELSE NULL END) AS stars_#{td.id}" if td.has_stars?
result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN tasks.contribution_pts ELSE NULL END) AS people_#{td.id}" if td.is_group_task?
Expand All @@ -1815,6 +1829,7 @@ def task_completion_csv
.joins(
:unit,
'INNER JOIN users ON projects.user_id = users.id',
'LEFT OUTER JOIN campuses ON campuses.id = projects.campus_id',
'INNER JOIN task_definitions ON task_definitions.unit_id = units.id',
'LEFT OUTER JOIN tutorial_streams ON tutorial_streams.unit_id = units.id',
'LEFT OUTER JOIN tutorial_enrolments ON tutorial_enrolments.project_id = projects.id',
Expand All @@ -1825,20 +1840,24 @@ def task_completion_csv
'LEFT OUTER JOIN groups ON groups.id = group_memberships.group_id'
).select(
'projects.id as project_id', 'users.student_id as student_id', 'users.username as username', 'users.first_name as first_name', 'projects.assessor_id as project_assessor',
'users.last_name as last_name', 'projects.target_grade', 'users.email as email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale',
'users.last_name as last_name', 'campuses.abbreviation as campus_abbreviation', 'projects.target_grade', 'users.email as email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale',
*td_select,
# Get tutorial for each stream in unit
*streams.map { |s| "MAX(CASE WHEN tutorials.tutorial_stream_id = #{s.id} OR tutorials.tutorial_stream_id IS NULL THEN tutorials.abbreviation ELSE NULL END) AS tutorial_#{s.id}" },
# Get tutorial for case when no stream
"MAX(CASE WHEN tutorial_streams.id IS NULL THEN tutorials.abbreviation ELSE NULL END) AS tutorial",
*grp_sets.map { |gs| "MAX(CASE WHEN groups.group_set_id = #{gs.id} THEN groups.name ELSE NULL END) AS grp_#{gs.id}" }
).group(
'projects.id', 'student_id', 'username', 'first_name', 'last_name', 'target_grade', 'email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale'
'projects.id', 'student_id', 'username', 'first_name', 'last_name', 'campus_abbreviation', 'target_grade', 'email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale'
).each do |row|
csv << ([
student_details = [
row['student_id'],
row['username'],
"#{row['first_name']} #{row['last_name']}",
]
student_details << row['campus_abbreviation'] if includes_campus
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can simplify this by just always including campus in the CSV, so you can remove the includes_campus argument.

task_status_uses_id should be fine to stay


csv << (student_details + [
GradeHelper.grade_for(row['target_grade']),
row['email'],
row['portfolio_production_date'].present? && !row['compile_portfolio'] && File.exist?(FileHelper.student_portfolio_path(self, row['username'], create: true)),
Expand All @@ -1854,7 +1873,11 @@ def task_completion_csv
end.flatten + grp_sets.map do |gs|
row["grp_#{gs.id}"]
end + task_def_by_grade.map do |td|
result = [row["status_#{td.id}"].nil? ? TaskStatus.not_started.name : row["status_#{td.id}"]]
if task_status_uses_id
result = [row["status_#{td.id}"].nil? ? TaskStatus.not_started.id : row["status_#{td.id}"].to_i]
else
result = [row["status_#{td.id}"].nil? ? TaskStatus.not_started.name : row["status_#{td.id}"]]
end
result << GradeHelper.short_grade_for(row["grade_#{td.id}"]) if td.is_graded?
result << row["stars_#{td.id}"] if td.has_stars?
result << row["people_#{td.id}"] if td.is_group_task?
Expand Down Expand Up @@ -3450,6 +3473,19 @@ def get_tutor_times_csv(start_date: nil, end_date: nil, timezone: nil, ignore_se
end
end

def capture_task_complete_stats_snapshot!(snapshot_time: Time.zone.now)
snapshot_payload = task_completion_csv_generator(task_status_uses_id: true, includes_campus: true)

timestamp = snapshot_time.to_i.to_s

task_completion_snapshots
.find_or_initialize_by(snapshot_timestamp: timestamp)
.tap do |snapshot|
snapshot.save!
snapshot.store_stats!(snapshot_payload)
end
end

private

def delete_associated_files
Expand Down
32 changes: 32 additions & 0 deletions app/sidekiq/aggregate_task_completion_stats_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# frozen_string_literal: true

class AggregateTaskCompletionStatsJob
include Sidekiq::Job
include Sidekiq::Status::Worker
include LogHelper
include ApplicationHelper

sidekiq_options lock: :until_executed,
lock_args_method: ->(args) { [args.first] },
on_conflict: :reject,
retry: false

def perform(unit_id = nil)
logger.info 'Starting task completion stats aggregation...'

at(0)
total(1)

if unit_id.present?
Unit.find(unit_id).capture_task_complete_stats_snapshot!
else
Unit.active_units.find_each(&:capture_task_complete_stats_snapshot!)
end

at(1)
logger.info 'Completed task completion stats aggregation!'
rescue StandardError => e
logger.error e
raise e
end
end
4 changes: 4 additions & 0 deletions config/schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ refresh_moderation_feedback_timestamps:
cron: "every 60 minutes"
class: "RefreshModerationFeedbackTimestampsJob"

aggregate_task_completion_stats:
cron: "every day at 11:55pm"
class: "AggregateTaskCompletionStatsJob"

# archive_old_units:
# cron: "every 6 months"
# class: "ArchiveOldUnitsJob"
Loading
Loading