Skip to content

Commit

Permalink
Merge pull request #271 from ecosyste-ms/store-commits
Browse files Browse the repository at this point in the history
Sync individual commits
  • Loading branch information
andrew authored Feb 7, 2024
2 parents c0e13c7 + b3ce865 commit 3be86ec
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 50 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ gem "groupdate"
gem 'google-protobuf', '3.25.2'
gem 'octokit'
gem 'redis'
gem 'rugged'

group :development, :test do
gem "debug", platforms: %i[ mri mingw x64_mingw ]
Expand Down
4 changes: 3 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ GEM
marcel (1.0.2)
mini_mime (1.1.5)
mini_portile2 (2.8.5)
minitest (5.22.0)
minitest (5.22.1)
mocha (2.1.0)
ruby2_keywords (>= 0.0.5)
msgpack (1.7.2)
Expand Down Expand Up @@ -267,6 +267,7 @@ GEM
actionpack (>= 3.1, < 7.2)
railties (>= 3.1, < 7.2)
ruby2_keywords (0.0.5)
rugged (1.7.2)
sassc (2.4.0)
ffi (~> 1.9)
sassc-rails (2.1.2)
Expand Down Expand Up @@ -364,6 +365,7 @@ DEPENDENCIES
redis
rswag-api
rswag-ui
rugged
sassc-rails
shoulda-context
shoulda-matchers
Expand Down
6 changes: 6 additions & 0 deletions app/models/commit.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class Commit < ApplicationRecord
belongs_to :repository
has_one :host, through: :repository

validates :sha, presence: true, uniqueness: { scope: :repository_id }
end
137 changes: 95 additions & 42 deletions app/models/repository.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
class Repository < ApplicationRecord
belongs_to :host

has_many :commits

validates :full_name, presence: true

scope :active, -> { where(status: nil) }
Expand Down Expand Up @@ -88,6 +90,10 @@ def fetch_head_sha
`git ls-remote #{git_clone_url} #{default_branch}`.split("\t").first
end

def clone_repository(dir)
Rugged::Repository.clone_at(git_clone_url, dir)
end

# TODO support hg and svn repos

def count_commits
Expand All @@ -99,56 +105,63 @@ def count_commits
update(last_synced_at: Time.now)
else
begin
Dir.mktmpdir do |dir|
`GIT_TERMINAL_PROMPT=0 git clone -b #{default_branch} --single-branch #{git_clone_url} #{dir}`
last_commit = `git -C #{dir} rev-parse HEAD`.strip
output = `git -C #{dir} shortlog -s -n -e --no-merges HEAD`
Dir.mktmpdir do |dir|
repo = clone_repository(dir)
counts = count_commits_internal(dir)
commit_hashes = fetch_commits_internal(repo)
Commit.upsert_all(commit_hashes) unless commit_hashes.empty?
update(updates)
end
rescue => e
# TODO record error in clone (likely missing repo but also maybe host downtime)
puts "Error counting commits for #{full_name}: #{e}"
end
end

end

past_year_output = `git -C #{dir} shortlog -s -n -e --no-merges --since="1 year ago" HEAD`
def count_commits_internal(dir)
last_commit = `git -C #{dir} rev-parse HEAD`.strip
output = `git -C #{dir} shortlog -s -n -e --no-merges HEAD`

committers = parse_commit_counts(output)
past_year_output = `git -C #{dir} shortlog -s -n -e --no-merges --since="1 year ago" HEAD`

past_year_committers = parse_commit_counts(past_year_output)
committers = parse_commit_counts(output)

total_commits = committers.sum{|h| h[:count]}
total_bot_commits = committers.select{|h| h[:name].ends_with?('[bot]')}.sum{|h| h[:count]}
past_year_committers = parse_commit_counts(past_year_output)

past_year_total_commits = past_year_committers.sum{|h| h[:count]}
past_year_total_bot_commits = past_year_committers.select{|h| h[:name].ends_with?('[bot]')}.sum{|h| h[:count]}
total_commits = committers.sum{|h| h[:count]}
total_bot_commits = committers.select{|h| h[:name].ends_with?('[bot]')}.sum{|h| h[:count]}

if past_year_committers.first
past_year_dds = 1 - (past_year_committers.first[:count].to_f / past_year_total_commits)
past_year_mean_commits = (past_year_total_commits.to_f / past_year_committers.length)
else
past_year_dds = 0
past_year_mean_commits = 0
end
past_year_total_commits = past_year_committers.sum{|h| h[:count]}
past_year_total_bot_commits = past_year_committers.select{|h| h[:name].ends_with?('[bot]')}.sum{|h| h[:count]}

updates = {
committers: committers,
last_synced_commit: last_commit,
total_commits: total_commits,
total_committers: committers.length,
total_bot_commits: total_bot_commits,
total_bot_committers: committers.select{|h| h[:name].ends_with?('[bot]')}.length,
mean_commits: (total_commits.to_f / committers.length),
dds: 1 - (committers.first[:count].to_f / total_commits),
past_year_committers: past_year_committers,
past_year_total_commits: past_year_total_commits,
past_year_total_committers: past_year_committers.length,
past_year_total_bot_commits: past_year_total_bot_commits,
past_year_total_bot_committers: past_year_committers.select{|h| h[:name].ends_with?('[bot]')}.length,
past_year_mean_commits: past_year_mean_commits,
past_year_dds: past_year_dds,
last_synced_at: Time.now
}
update(updates)
end
rescue
# TODO record error in clone (likely missing repo but also maybe host downtime)
end
if past_year_committers.first
past_year_dds = 1 - (past_year_committers.first[:count].to_f / past_year_total_commits)
past_year_mean_commits = (past_year_total_commits.to_f / past_year_committers.length)
else
past_year_dds = 0
past_year_mean_commits = 0
end


updates = {
committers: committers,
last_synced_commit: last_commit,
total_commits: total_commits,
total_committers: committers.length,
total_bot_commits: total_bot_commits,
total_bot_committers: committers.select{|h| h[:name].ends_with?('[bot]')}.length,
mean_commits: (total_commits.to_f / committers.length),
dds: 1 - (committers.first[:count].to_f / total_commits),
past_year_committers: past_year_committers,
past_year_total_commits: past_year_total_commits,
past_year_total_committers: past_year_committers.length,
past_year_total_bot_commits: past_year_total_bot_commits,
past_year_total_bot_committers: past_year_committers.select{|h| h[:name].ends_with?('[bot]')}.length,
past_year_mean_commits: past_year_mean_commits,
past_year_dds: past_year_dds,
last_synced_at: Time.now
}
end

def parse_commit_counts(output)
Expand Down Expand Up @@ -335,6 +348,46 @@ def check_tokens
end
end

def fetch_commits
commits = []
Dir.mktmpdir do |dir|
repo = clone_repository(dir)
commits = fetch_commits_internal(repo)
end
commits
end

def fetch_commits_internal(repo)
commits = []
walker = Rugged::Walker.new(repo)
walker.hide(repo.lookup(last_synced_commit)) if last_synced_commit
walker.sorting(Rugged::SORT_DATE)
walker.push(repo.head.target)
walker.each do |commit|
commits << {
repository_id: id,
sha: commit.oid,
message: commit.message.strip,
timestamp: commit.time.iso8601,
merge: commit.parents.length > 1,
author: "#{commit.author[:name]} <#{commit.author[:email]}>",
committer: "#{commit.committer[:name]} <#{commit.committer[:email]}>",
stats: commit.diff.stat
}
end
walker.reset
repo.close
commits
end

def sync_commits
commit_hashes = fetch_commits
return if commit_hashes.empty?
Commit.upsert_all(commit_hashes)
rescue => e
puts "Error syncing commits for #{full_name}: #{e}"
end

private

def api_client(token = nil, options = {})
Expand Down
17 changes: 17 additions & 0 deletions db/migrate/20240129170313_create_commits.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
class CreateCommits < ActiveRecord::Migration[7.1]
def change
create_table :commits do |t|
t.integer :repository_id
t.string :sha
t.string :message
t.datetime :timestamp
t.boolean :merge
t.string :author
t.string :committer
t.integer :stats, array: true, default: []

t.timestamps
end
add_index :commits, [:repository_id, :sha]
end
end
16 changes: 15 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions test/models/commit_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
require "test_helper"

class CommitTest < ActiveSupport::TestCase
context 'associations' do
should belong_to(:repository)
end

context 'validations' do
should validate_presence_of(:sha)
should validate_uniqueness_of(:sha).scoped_to(:repository_id)
end
end
13 changes: 10 additions & 3 deletions test/models/host_test.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
require "test_helper"

class HostTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
context 'associations' do
should have_many(:repositories)
end

context 'validations' do
should validate_presence_of(:url)
should validate_presence_of(:name)
should validate_presence_of(:kind)
should validate_uniqueness_of(:name)
end
end
11 changes: 8 additions & 3 deletions test/models/repository_test.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
require "test_helper"

class RepositoryTest < ActiveSupport::TestCase
# test "the truth" do
# assert true
# end
context 'associations' do
should belong_to(:host)
should have_many(:commits)
end

context 'validations' do
should validate_presence_of(:full_name)
end
end

0 comments on commit 3be86ec

Please sign in to comment.