doubtfire-lms · audreypho · Mar 23, 2026 · Apr 2, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/app/api/units_api.rb b/app/api/units_api.rb
@@ -578,6 +578,61 @@ class UnitsApi < Grape::API
     present unit.student_task_completion_stats, with: Grape::Presenters::Presenter
   end
 
+  desc 'Get historical task completion snapshots'
+  params do
+    optional :start_date, type: Date, desc: 'Include snapshots captured on or after this date'
+    optional :end_date, type: Date, desc: 'Include snapshots captured on or before this date'
+    optional :limit, type: Integer, desc: 'Maximum number of snapshots to return', default: 365
+  end
+  get '/units/:id/stats/task_completion_snapshots' do
+    unit = Unit.find(params[:id])
+    unless authorise? current_user, unit, :download_stats
+      error!({ error: "Not authorised to download stats of student tasks in #{unit.code}" }, 403)
+    end
+
+    snapshots = unit.task_completion_snapshots.order(snapshot_timestamp: :desc)
+    if params[:start_date].present?
+      start_timestamp = params[:start_date].in_time_zone.beginning_of_day.to_i
+      snapshots = snapshots.where('CAST(snapshot_timestamp AS UNSIGNED) >= ?', start_timestamp)
+    end
+    if params[:end_date].present?
+      end_timestamp = params[:end_date].in_time_zone.end_of_day.to_i
+      snapshots = snapshots.where('CAST(snapshot_timestamp AS UNSIGNED) <= ?', end_timestamp)
+    end
+    snapshots = snapshots.limit([params[:limit].to_i, 365].min)
+
+    present snapshots.map { |snapshot|
+      stats = snapshot.load_stats
+
+      {
+        snapshot_date: snapshot.snapshot_date,
+        snapshot_timestamp: snapshot.snapshot_timestamp,
+        stats: stats
+      }
+    }, with: Grape::Presenters::Presenter
+  end
+
+  desc 'Capture task completion snapshot immediately for this unit'
+  post '/units/:id/stats/task_completion_snapshots/capture' do
+    unit = Unit.find(params[:id])
+    unless authorise? current_user, unit, :capture_task_completion_snapshot
+      error!({ error: "Not authorised to capture stats of student tasks in #{unit.code}" }, 403)
+    end
+
+    # Check if a snapshot was captured within the past 30 minutes
+    recent_snapshot = unit.task_completion_snapshots.where('CAST(snapshot_timestamp AS UNSIGNED) > ?', 30.minutes.ago.to_i).order(snapshot_timestamp: :desc).first
+    if recent_snapshot.present?
+      recent_snapshot_time = recent_snapshot.snapshot_time
+      remaining_seconds = [(recent_snapshot_time + 30.minutes - Time.zone.now).ceil, 0].max
+      remaining_minutes = [(remaining_seconds / 60.0).ceil, 1].max
+      error!({ error: "A snapshot was captured at #{recent_snapshot_time.strftime('%H:%M')}. Please wait #{remaining_minutes} more minute(s) before capturing another snapshot." }, 429)
+    end
+
+    job_id = AggregateTaskCompletionStatsJob.perform_async(unit.id)
+    job = setup_job(job_id)
+    present job, with: Entities::SidekiqJobEntity
+  end
+
   desc 'Download stats related to the number of tasks assessed by each tutor'
   get '/csv/units/:id/tutor_assessments' do
     unit = Unit.find(params[:id])

diff --git a/app/helpers/file_helper.rb b/app/helpers/file_helper.rb
@@ -276,6 +276,27 @@ def unit_portfolio_dir(unit, create: true, archived: true)
     dst
   end
 
+  def unit_analytics_dir(unit, create: true, archived: true)
+    dst = unit_work_root(unit, archived: archived)
+    dst << 'analytics/'
+
+    FileUtils.mkdir_p(dst) if create
+    dst
+  end
+
+  def unit_task_status_snapshots_dir(unit, create: true, archived: true)
+    dst = unit_analytics_dir(unit, create: create, archived: archived)
+    dst << 'task-statuses/'
+
+    FileUtils.mkdir_p(dst) if create
+    dst
+  end
+
+  def unit_task_status_snapshot_path(unit, snapshot_timestamp, create: true, archived: true)
+    snapshot_filename = "#{sanitized_filename(snapshot_timestamp.to_s)}.zip"
+    File.join(unit_task_status_snapshots_dir(unit, create: create, archived: archived), snapshot_filename)
+  end
+
   #
   # Generates a path for storing student portfolios
   #
@@ -778,6 +799,9 @@ def line_wrap(path, width: 160)
   module_function :unit_dir
   module_function :root_portfolio_dir
   module_function :unit_portfolio_dir
+  module_function :unit_analytics_dir
+  module_function :unit_task_status_snapshots_dir
+  module_function :unit_task_status_snapshot_path
   module_function :unit_work_root
   module_function :project_work_root
   module_function :student_portfolio_dir

diff --git a/app/models/task_completion_snapshot.rb b/app/models/task_completion_snapshot.rb
@@ -0,0 +1,110 @@
+# frozen_string_literal: true
+
+require 'csv'
+require 'zip'
+
+class TaskCompletionSnapshot < ApplicationRecord
+  include FileHelper
+
+  belongs_to :unit
+
+  validates :snapshot_timestamp, presence: true
+  validates :snapshot_timestamp, uniqueness: { scope: :unit_id }
+
+  after_destroy :delete_snapshot_file
+
+  def snapshot_file_path
+    FileHelper.unit_task_status_snapshot_path(unit, snapshot_timestamp, create: true)
+  end
+
+  def snapshot_contents
+    if File.exist?(snapshot_file_path)
+      return read_csv_from_zip(snapshot_file_path)
+    end
+    nil
+  rescue Zip::Error
+    nil
+  end
+
+  def snapshot_date
+    return nil if snapshot_timestamp.blank?
+
+    snapshot_time.to_date
+  end
+
+  def snapshot_time
+    return nil if snapshot_timestamp.blank?
+
+    Time.zone.at(snapshot_timestamp.to_i)
+  end
+
+  def load_stats
+    snapshot_contents = self.snapshot_contents
+
+    return {} if snapshot_contents.blank?
+
+    parse_csv_stats(snapshot_contents)
+  rescue CSV::MalformedCSVError
+    {}
+  end
+
+  def store_stats!(payload)
+    FileUtils.mkdir_p(File.dirname(snapshot_file_path))
+
+    tmp_path = "#{snapshot_file_path}.tmp"
+    Zip::OutputStream.open(tmp_path) do |zip|
+      zip.put_next_entry('snapshot.csv')
+      zip.write(payload.to_s)
+    end
+
+    FileUtils.mv(tmp_path, snapshot_file_path)
+  ensure
+    FileUtils.rm_f(tmp_path) if defined?(tmp_path)
+  end
+
+  private
+
+  def parse_csv_stats(csv_text)
+    csv = CSV.parse(csv_text, headers: true)
+    return {} if csv.empty?
+
+    stream_headers = unit.tutorial_streams.pluck(:abbreviation)
+    stream_headers = ['Tutorial'] if stream_headers.empty?
+    task_definitions = unit.task_definitions_by_grade
+
+    stats = Hash.new { |hash, key| hash[key] = Hash.new { |tutorial_hash, tutorial_key| tutorial_hash[tutorial_key] = Hash.new { |task_hash, task_key| task_hash[task_key] = Hash.new(0) } } }
+
+    csv.each do |row|
+      campus_abbreviation = row['Campus'].to_s.strip
+      next if campus_abbreviation.blank?
+
+      campus_name = Campus.find_by(abbreviation: campus_abbreviation)&.name || campus_abbreviation
+
+      stream_headers.each do |stream_header|
+        tutorial_name = row[stream_header].to_s.strip
+        next if tutorial_name.blank?
+
+        task_definitions.each do |task_definition|
+          status_value = row[task_definition.abbreviation].to_s.strip
+          status_key = TaskStatus.id_to_key(status_value.to_i) || :not_started
+          stats[campus_name][tutorial_name][task_definition.abbreviation][status_key.to_s] += 1
+        end
+      end
+    end
+
+    stats
+  end
+
+  def read_csv_from_zip(zip_path)
+    Zip::File.open(zip_path) do |zip_file|
+      entry = zip_file.find_entry('snapshot.csv') || zip_file.entries.first
+      return nil if entry.nil?
+
+      entry.get_input_stream.read
+    end
+  end
+
+  def delete_snapshot_file
+    FileUtils.rm_f(snapshot_file_path)
+  end
+end
diff --git a/app/models/unit.rb b/app/models/unit.rb
@@ -67,7 +67,8 @@ def self.permissions
       :get_tutor_times_summary,
       :get_marking_sessions,
       :upload_grades_csv,
-      :get_staff_notes
+      :get_staff_notes,
+      :capture_task_completion_snapshot
     ]
 
     # What can admin do with units?
@@ -157,6 +158,7 @@ def role_for(user)
   has_many :unit_roles, dependent: :destroy, inverse_of: :unit
   has_many :learning_outcomes, as: :context, dependent: :destroy # inverse_of: :unit
   has_many :marking_sessions, dependent: :destroy
+  has_many :task_completion_snapshots, dependent: :destroy, inverse_of: :unit
 
   has_many :comments, through: :projects
   has_many :tasks, through: :projects
@@ -1772,23 +1774,31 @@ def days_awaiting_feedback_by_tutorial_csv
   end
 
   def task_completion_csv
+    task_completion_csv_generator()
+  end
+
+  def task_completion_csv_generator(task_status_uses_id: false, includes_campus: false)
     task_def_by_grade = task_definitions_by_grade
     streams = tutorial_streams
     grp_sets = group_sets
+    base_headers = [
+      'Student ID',
+      'Username',
+      'Student Name',
+    ]
+    base_headers << 'Campus' if includes_campus
+    base_headers.push(
+      'Target Grade',
+      'Email',
+      'Portfolio',
+      'Grade',
+      'Rationale',
+      'Assessor',
+    )
 
     CSV.generate() do |csv|
       # Add header row
-      csv << ([
-        'Student ID',
-        'Username',
-        'Student Name',
-        'Target Grade',
-        'Email',
-        'Portfolio',
-        'Grade',
-        'Rationale',
-        'Assessor',
-      ] +
+      csv << (base_headers +
              (streams.count > 0 ? streams.map { |t| t.abbreviation } : ['Tutorial']) +
              grp_sets.map(&:name) +
              task_def_by_grade.map do |task_definition|
@@ -1803,7 +1813,11 @@ def task_completion_csv
       # Get the details to fetch for each task definition...
       td_select = task_def_by_grade.map do |td|
         result = []
-        result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN (CASE WHEN task_statuses.name IS NULL THEN 'Not Started' ELSE task_statuses.name END) ELSE NULL END) AS status_#{td.id}"
+        if task_status_uses_id
+          result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN (CASE WHEN tasks.task_status_id IS NULL THEN #{TaskStatus.not_started.id} ELSE tasks.task_status_id END) ELSE NULL END) AS status_#{td.id}"
+        else
+          result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN (CASE WHEN task_statuses.name IS NULL THEN 'Not Started' ELSE task_statuses.name END) ELSE NULL END) AS status_#{td.id}"
+        end
         result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN tasks.grade ELSE NULL END) AS grade_#{td.id}" if td.is_graded?
         result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN tasks.quality_pts ELSE NULL END) AS stars_#{td.id}" if td.has_stars?
         result << "MAX(CASE WHEN tasks.task_definition_id = #{td.id} THEN tasks.contribution_pts ELSE NULL END) AS people_#{td.id}" if td.is_group_task?
@@ -1815,6 +1829,7 @@ def task_completion_csv
         .joins(
           :unit,
           'INNER JOIN users ON projects.user_id = users.id',
+          'LEFT OUTER JOIN campuses ON campuses.id = projects.campus_id',
           'INNER JOIN task_definitions ON task_definitions.unit_id = units.id',
           'LEFT OUTER JOIN tutorial_streams ON tutorial_streams.unit_id = units.id',
           'LEFT OUTER JOIN tutorial_enrolments ON tutorial_enrolments.project_id = projects.id',
@@ -1825,20 +1840,24 @@ def task_completion_csv
           'LEFT OUTER JOIN groups ON groups.id = group_memberships.group_id'
         ).select(
           'projects.id as project_id', 'users.student_id as student_id', 'users.username as username', 'users.first_name as first_name', 'projects.assessor_id as project_assessor',
-          'users.last_name as last_name', 'projects.target_grade', 'users.email as email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale',
+          'users.last_name as last_name', 'campuses.abbreviation as campus_abbreviation', 'projects.target_grade', 'users.email as email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale',
           *td_select,
           # Get tutorial for each stream in unit
           *streams.map { |s| "MAX(CASE WHEN tutorials.tutorial_stream_id = #{s.id} OR tutorials.tutorial_stream_id IS NULL THEN tutorials.abbreviation ELSE NULL END) AS tutorial_#{s.id}" },
           # Get tutorial for case when no stream
           "MAX(CASE WHEN tutorial_streams.id IS NULL THEN tutorials.abbreviation ELSE NULL END) AS tutorial",
           *grp_sets.map { |gs| "MAX(CASE WHEN groups.group_set_id = #{gs.id} THEN groups.name ELSE NULL END) AS grp_#{gs.id}" }
         ).group(
-          'projects.id', 'student_id', 'username', 'first_name', 'last_name', 'target_grade', 'email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale'
+          'projects.id', 'student_id', 'username', 'first_name', 'last_name', 'campus_abbreviation', 'target_grade', 'email', 'compile_portfolio', 'portfolio_production_date', 'grade', 'grade_rationale'
         ).each do |row|
-          csv << ([
+          student_details = [
             row['student_id'],
             row['username'],
             "#{row['first_name']} #{row['last_name']}",
+          ]
+          student_details << row['campus_abbreviation'] if includes_campus
+
+          csv << (student_details + [
             GradeHelper.grade_for(row['target_grade']),
             row['email'],
             row['portfolio_production_date'].present? && !row['compile_portfolio'] && File.exist?(FileHelper.student_portfolio_path(self, row['username'], create: true)),
@@ -1854,7 +1873,11 @@ def task_completion_csv
           end.flatten + grp_sets.map do |gs|
             row["grp_#{gs.id}"]
           end + task_def_by_grade.map do |td|
-            result = [row["status_#{td.id}"].nil? ? TaskStatus.not_started.name : row["status_#{td.id}"]]
+            if task_status_uses_id
+              result = [row["status_#{td.id}"].nil? ? TaskStatus.not_started.id : row["status_#{td.id}"].to_i]
+            else
+              result = [row["status_#{td.id}"].nil? ? TaskStatus.not_started.name : row["status_#{td.id}"]]
+            end
             result << GradeHelper.short_grade_for(row["grade_#{td.id}"]) if td.is_graded?
             result << row["stars_#{td.id}"] if td.has_stars?
             result << row["people_#{td.id}"] if td.is_group_task?
@@ -3450,6 +3473,19 @@ def get_tutor_times_csv(start_date: nil, end_date: nil, timezone: nil, ignore_se
     end
   end
 
+  def capture_task_complete_stats_snapshot!(snapshot_time: Time.zone.now)
+    snapshot_payload = task_completion_csv_generator(task_status_uses_id: true, includes_campus: true)
+
+    timestamp = snapshot_time.to_i.to_s
+
+    task_completion_snapshots
+      .find_or_initialize_by(snapshot_timestamp: timestamp)
+      .tap do |snapshot|
+      snapshot.save!
+      snapshot.store_stats!(snapshot_payload)
+    end
+  end
+
   private
 
   def delete_associated_files

diff --git a/app/sidekiq/aggregate_task_completion_stats_job.rb b/app/sidekiq/aggregate_task_completion_stats_job.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+class AggregateTaskCompletionStatsJob
+  include Sidekiq::Job
+  include Sidekiq::Status::Worker
+  include LogHelper
+  include ApplicationHelper
+
+  sidekiq_options lock: :until_executed,
+                  lock_args_method: ->(args) { [args.first] },
+                  on_conflict: :reject,
+                  retry: false
+
+  def perform(unit_id = nil)
+    logger.info 'Starting task completion stats aggregation...'
+
+    at(0)
+    total(1)
+
+    if unit_id.present?
+      Unit.find(unit_id).capture_task_complete_stats_snapshot!
+    else
+      Unit.active_units.find_each(&:capture_task_complete_stats_snapshot!)
+    end
+
+    at(1)
+    logger.info 'Completed task completion stats aggregation!'
+  rescue StandardError => e
+    logger.error e
+    raise e
+  end
+end
diff --git a/config/schedule.yml b/config/schedule.yml
@@ -16,6 +16,10 @@ refresh_moderation_feedback_timestamps:
   cron: "every 60 minutes"
   class: "RefreshModerationFeedbackTimestampsJob"
 
+aggregate_task_completion_stats:
+  cron: "every day at 11:55pm"
+  class: "AggregateTaskCompletionStatsJob"
+
 # archive_old_units:
 #   cron: "every 6 months"
 #   class: "ArchiveOldUnitsJob"