5554: Add Rake task to delete old job log records.
authorBrett Smith <brett@curoverse.com>
Thu, 23 Apr 2015 16:40:14 +0000 (12:40 -0400)
committerBrett Smith <brett@curoverse.com>
Mon, 27 Apr 2015 15:23:21 +0000 (11:23 -0400)
services/api/Rakefile
services/api/config/application.default.yml
services/api/lib/tasks/delete_old_job_logs.rake [new file with mode: 0644]
services/api/lib/tasks/test_tasks.rake [new file with mode: 0644]
services/api/test/fixtures/jobs.yml
services/api/test/fixtures/logs.yml
services/api/test/tasks/delete_old_job_logs_test.rb [new file with mode: 0644]

index 223f5ca2168c5ab25d316ef97dd3eb6081fb1463..22b25313a5f9536764a7838ada2cb4222ebe0225 100644 (file)
@@ -11,3 +11,24 @@ rescue
 end
 
 Server::Application.load_tasks
+
+namespace :test do
+  task(:run).clear
+  # Copied from the definition in Rails 3.2.
+  # This may need to be updated if we upgrade Rails.
+  task :run do
+    errors = %w(test:units test:functionals test:integration test:tasks).collect do |task|
+      begin
+        Rake::Task[task].invoke
+        nil
+      rescue => e
+        { :task => task, :exception => e }
+      end
+    end.compact
+
+    if errors.any?
+      puts errors.map { |e| "Errors running #{e[:task]}! #{e[:exception].inspect}" }.join("\n")
+      abort
+    end
+  end
+end
index 0ea685a121534d78a51cde3ab65bc2198a375332..2c69325e2cad668135d6021d738b9adc5cd6f077 100644 (file)
@@ -275,3 +275,11 @@ common:
   # Note you must separately configure the upstream web server or proxy to
   # actually enforce the desired maximum request size on the server side.
   max_request_size: 134217728
+
+  # When you run the db:delete_old_job_logs task, it will find jobs that
+  # have been finished for at least this many seconds, and delete their
+  # stderr logs from the logs table.
+  # If a pipeline runs longer than this, client applications like
+  # Workbench will lose the ability to display logs from jobs in the
+  # pipeline that finished this long ago.
+  clean_job_log_rows_after: <%= 30.days %>
diff --git a/services/api/lib/tasks/delete_old_job_logs.rake b/services/api/lib/tasks/delete_old_job_logs.rake
new file mode 100644 (file)
index 0000000..7f2b31e
--- /dev/null
@@ -0,0 +1,17 @@
+# This task finds jobs that have been finished for at least as long as
+# the duration specified in the `clean_job_log_rows_after`
+# configuration setting, and deletes their stderr logs from the logs table.
+
+namespace :db do
+  desc "Remove old job stderr entries from the logs table"
+  task delete_old_job_logs: :environment do
+    Log.select("logs.id").
+        joins("JOIN jobs ON object_uuid = jobs.uuid").
+        where("event_type = :etype AND jobs.log IS NOT NULL AND jobs.finished_at < :age",
+              etype: "stderr",
+              age: Rails.configuration.clean_job_log_rows_after.ago).
+        find_in_batches do |old_log_ids|
+      Log.where(id: old_log_ids.map(&:id)).delete_all
+    end
+  end
+end
diff --git a/services/api/lib/tasks/test_tasks.rake b/services/api/lib/tasks/test_tasks.rake
new file mode 100644 (file)
index 0000000..27bf232
--- /dev/null
@@ -0,0 +1,6 @@
+namespace :test do
+  new_task = Rake::TestTask.new(tasks: "test:prepare") do |t|
+    t.libs << "test"
+    t.pattern = "test/tasks/**/*_test.rb"
+  end
+end
index 23b32ef9cde2bfbce7d9435982fb9c8ae7bab737..aa1613449104994785fb3a07bb46af5bd43ed182 100644 (file)
@@ -163,6 +163,7 @@ runningbarbaz:
 previous_job_run:
   uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
   created_at: <%= 14.minute.ago.to_s(:db) %>
+  finished_at: <%= 13.minutes.ago.to_s(:db) %>
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   repository: active/foo
   script: hash
@@ -171,6 +172,23 @@ previous_job_run:
     input: fa7aeb5140e2848d39b416daeef4ffc5+45
     an_integer: "1"
   success: true
+  log: d41d8cd98f00b204e9800998ecf8427e+0
+  output: ea10d51bcf88862dbcc36eb292017dfd+45
+  state: Complete
+
+previous_ancient_job_run:
+  uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+  created_at: <%= 366.days.ago.to_s(:db) %>
+  finished_at: <%= 365.days.ago.to_s(:db) %>
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/foo
+  script: hash
+  script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+  script_parameters:
+    input: fa7aeb5140e2848d39b416daeef4ffc5+45
+    an_integer: "2"
+  success: true
+  log: d41d8cd98f00b204e9800998ecf8427e+0
   output: ea10d51bcf88862dbcc36eb292017dfd+45
   state: Complete
 
index aea7980b0509fda97c40a662807ae2e671f3f77f..9179e6dff92a4c62a0271dd78786b98dc726fef4 100644 (file)
@@ -101,3 +101,41 @@ log_line_for_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere
   updated_at: 2014-11-07 23:33:42.347455000 Z
   modified_at: 2014-11-07 23:33:42.347455000 Z
   object_owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+
+crunchstat_for_previous_job:
+  id: 10
+  uuid: zzzzz-57u5n-eir3aesha3kaene
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+  event_at: 2014-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2014-11-07_23:33:41 zzzzz-8i9sb-cjs4pklxxjykqqq 11592 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2014-11-07 23:33:42.351913000 Z
+  updated_at: 2014-11-07 23:33:42.347455000 Z
+  modified_at: 2014-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-xurymjxw79nv3jz
+
+crunchstat_for_ancient_job:
+  id: 11
+  uuid: zzzzz-57u5n-ixioph7ieb5ung8
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+  event_at: 2013-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2013-11-07_23:33:41 zzzzz-8i9sb-ahd7cie8jah9qui 29610 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2013-11-07 23:33:42.351913000 Z
+  updated_at: 2013-11-07 23:33:42.347455000 Z
+  modified_at: 2013-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-xurymjxw79nv3jz
diff --git a/services/api/test/tasks/delete_old_job_logs_test.rb b/services/api/test/tasks/delete_old_job_logs_test.rb
new file mode 100644 (file)
index 0000000..b922fb3
--- /dev/null
@@ -0,0 +1,50 @@
+require 'test_helper'
+require 'rake'
+
+Rake.application.rake_require "tasks/delete_old_job_logs"
+Rake::Task.define_task(:environment)
+
+class DeleteOldJobLogsTaskTest < ActiveSupport::TestCase
+  TASK_NAME = "db:delete_old_job_logs"
+
+  def log_uuids(*fixture_names)
+    fixture_names.map { |name| logs(name).uuid }
+  end
+
+  def run_with_expiry(clean_after)
+    Rails.configuration.clean_job_log_rows_after = clean_after
+    Rake::Task[TASK_NAME].reenable
+    Rake.application.invoke_task TASK_NAME
+  end
+
+  def job_stderr_logs
+    Log.where("object_uuid LIKE :pattern AND event_type = :etype",
+              pattern: "_____-8i9sb-_______________",
+              etype: "stderr")
+  end
+
+  def check_existence(test_method, fixture_uuids)
+    uuids_now = job_stderr_logs.map(&:uuid)
+    fixture_uuids.each do |expect_uuid|
+      send(test_method, uuids_now, expect_uuid)
+    end
+  end
+
+  test "delete all logs" do
+    uuids_to_keep = log_uuids(:crunchstat_for_running_job)
+    uuids_to_clean = log_uuids(:crunchstat_for_previous_job,
+                               :crunchstat_for_ancient_job)
+    run_with_expiry(1)
+    check_existence(:assert_includes, uuids_to_keep)
+    check_existence(:refute_includes, uuids_to_clean)
+  end
+
+  test "delete only old logs" do
+    uuids_to_keep = log_uuids(:crunchstat_for_running_job,
+                              :crunchstat_for_previous_job)
+    uuids_to_clean = log_uuids(:crunchstat_for_ancient_job)
+    run_with_expiry(360.days)
+    check_existence(:assert_includes, uuids_to_keep)
+    check_existence(:refute_includes, uuids_to_clean)
+  end
+end