diff options
author | Max Magorsch <max@magorsch.de> | 2019-09-06 00:04:32 +0200 |
---|---|---|
committer | Max Magorsch <max@magorsch.de> | 2019-09-06 00:04:32 +0200 |
commit | b0f938ad1d88164aabb9943053dec4617e5bd678 (patch) | |
tree | 339d6a854800b52ebe97dc1e8c57fbe3d48df0b4 | |
parent | Run the initialization scripts using docker-compose (diff) | |
download | packages-5-b0f938ad1d88164aabb9943053dec4617e5bd678.tar.gz packages-5-b0f938ad1d88164aabb9943053dec4617e5bd678.tar.bz2 packages-5-b0f938ad1d88164aabb9943053dec4617e5bd678.zip |
Add an ES index containing the git commits
So far, any history-related information has been fetched using git.
That is, whenever a user requested any history-related information
'git log' was run and the output was parsed. That is time-consuming.
Loading the page https://packages.gentoo.org/packages/keyworded takes
around 120 seconds this way.
Instead of doing so, any git commits are now added to an ES index and
retrieved using ES. This way, the same page as mentioned before, loads
in under 3 seconds.
The commits for populating the index are fetched incrementally. This
way, the first run may take some time, but afterwards, updates are fast.
Signed-off-by: Max Magorsch <max@magorsch.de>
-rw-r--r-- | app/controllers/packages_controller.rb | 2 | ||||
-rw-r--r-- | app/helpers/packages_helper.rb | 4 | ||||
-rw-r--r-- | app/jobs/commits_update_job.rb | 8 | ||||
-rw-r--r-- | app/models/commit.rb | 37 | ||||
-rw-r--r-- | app/repositories/base_repository.rb | 20 | ||||
-rw-r--r-- | app/repositories/commit_repository.rb | 28 | ||||
-rw-r--r-- | app/views/packages/_changed_package.html.erb | 4 | ||||
-rw-r--r-- | app/views/packages/_changelog_entry.html.erb | 21 | ||||
-rw-r--r-- | lib/kkuleomi/store.rb | 1 | ||||
-rw-r--r-- | lib/portage/util/history.rb | 65 | ||||
-rw-r--r-- | lib/tasks/kkuleomi.rake | 1 |
11 files changed, 152 insertions, 39 deletions
diff --git a/app/controllers/packages_controller.rb b/app/controllers/packages_controller.rb index 67cc86f..9580752 100644 --- a/app/controllers/packages_controller.rb +++ b/app/controllers/packages_controller.rb @@ -39,7 +39,7 @@ class PackagesController < ApplicationController if stale?(etag: Time.parse(@package.updated_at), last_modified: Time.parse(@package.updated_at), public: true) @changelog = Rails.cache.fetch("changelog/#{@package.atom}") do - Portage::Util::History.for(@package.category, @package.name, 5) + CommitRepository.find_sorted_by('packages', @package.category + '/'+ @package.name, "date", "desc", 5) end respond_to do |wants| diff --git a/app/helpers/packages_helper.rb b/app/helpers/packages_helper.rb index ee83a2e..fe15564 100644 --- a/app/helpers/packages_helper.rb +++ b/app/helpers/packages_helper.rb @@ -79,11 +79,11 @@ module PackagesHelper # Tries to find a matching changelog entry for a change object def matching_changelog_entry(change) changelog = Rails.cache.fetch("changelog/#{cp_to_atom(change.category, change.package)}", expires_in: 10.minutes) do - Portage::Util::History.for(change.category, change.package, 5) + CommitRepository.find_sorted_by('packages', change.category + '/' + change.package, "date", "desc", 5) end changelog.each do |changelog_entry| - if changelog_entry[:files][:added].include?('%s-%s.ebuild' % [change.package, change.version]) + if changelog_entry.files["added"].include?('%s-%s.ebuild' % [change.package, change.version]) return changelog_entry end end diff --git a/app/jobs/commits_update_job.rb b/app/jobs/commits_update_job.rb new file mode 100644 index 0000000..f4c170b --- /dev/null +++ b/app/jobs/commits_update_job.rb @@ -0,0 +1,8 @@ +class CommitsUpdateJob < ApplicationJob + queue_as :default + + def perform(*args) + Portage::Util::History.update() + end + +end diff --git a/app/models/commit.rb b/app/models/commit.rb new file mode 100644 index 0000000..2512ced --- /dev/null +++ b/app/models/commit.rb @@ -0,0 +1,37 @@ +class Commit + include ActiveModel::Model + include ActiveModel::Validations + + ATTRIBUTES = [:id, + :author, + :email, + :date, + :message, + :files, + :packages, + :created_at, + :updated_at] + attr_accessor(*ATTRIBUTES) + attr_reader :attributes + + def initialize(attr={}) + attr.each do |k,v| + if ATTRIBUTES.include?(k.to_sym) + send("#{k}=", v) + end + end + end + + def attributes + @created_at ||= DateTime.now + @updated_at = DateTime.now + ATTRIBUTES.inject({}) do |hash, attr| + if value = send(attr) + hash[attr] = value + end + hash + end + end + alias :to_hash :attributes + +end diff --git a/app/repositories/base_repository.rb b/app/repositories/base_repository.rb index 7154691..397b275 100644 --- a/app/repositories/base_repository.rb +++ b/app/repositories/base_repository.rb @@ -11,6 +11,7 @@ class BaseRepository class << self extend Forwardable def_delegators :instance, :find_all_by, :filter_all, :find_by, :find_all_by_parent, :all_sorted_by + def_delegators :instance, :find_sorted_by, :n_sorted_by def_delegators :instance, :count, :search, :delete, :save, :refresh_index!, :create_index end @@ -58,6 +59,25 @@ class BaseRepository ) end + # Returns the given number of records of this class sorted by a field. + def find_sorted_by(field, value, sort_field, order, num_return, options = {}) + search({ + size: num_return, + query: { term: { field => value } }, + sort: { sort_field => { order: order } } + }.merge(options)) + end + + + # Returns n records of this class sorted by a field. + def n_sorted_by(n, field, order, options = {}) + search({ + size: n, + query: { match_all: {} }, + sort: { field => { order: order } } + }.merge(options)) + end + # Returns all (by default 10k) records of this class sorted by a field. def all_sorted_by(field, order, options = {}) search({ diff --git a/app/repositories/commit_repository.rb b/app/repositories/commit_repository.rb new file mode 100644 index 0000000..146087a --- /dev/null +++ b/app/repositories/commit_repository.rb @@ -0,0 +1,28 @@ +require 'singleton' + +class CommitRepository < BaseRepository + include Singleton + + client ElasticsearchClient.default + + index_name "commit-#{Rails.env}" + + klass Commit + + mapping do + indexes :id, type: 'keyword' + indexes :author, type: 'keyword' + indexes :email, type: 'keyword' + indexes :date, type: 'date' + indexes :message, type: 'text' + indexes :files do + indexes :modified, type: 'keyword' + indexes :deleted, type: 'keyword' + indexes :added, type: 'keyword' + end + indexes :packages, type: 'keyword' + indexes :created_at, type: 'date' + indexes :updated_at, type: 'date' + end + +end diff --git a/app/views/packages/_changed_package.html.erb b/app/views/packages/_changed_package.html.erb index 5b407fb..d6a0d00 100644 --- a/app/views/packages/_changed_package.html.erb +++ b/app/views/packages/_changed_package.html.erb @@ -46,10 +46,10 @@ </small> <% unless (changelog_entry = matching_changelog_entry(change)).nil? %> <div class="kk-inline-changelog-entry"> - <a href="<%= gitweb_commit_url(changelog_entry[:id]) %>" title="<%= t :git_commit %>"> + <a href="<%= gitweb_commit_url(changelog_entry.id) %>" title="<%= t :git_commit %>"> <span class="octicon octicon-git-pull-request"></span> <span class="kk-commit-message"> - <%= changelog_entry[:message].lines.first %> + <%= changelog_entry.message.lines.first %> </span> </a> </div> diff --git a/app/views/packages/_changelog_entry.html.erb b/app/views/packages/_changelog_entry.html.erb index e592f89..17a6e66 100644 --- a/app/views/packages/_changelog_entry.html.erb +++ b/app/views/packages/_changelog_entry.html.erb @@ -1,28 +1,29 @@ <li class="list-group-item"> - <strong><%= annotate_bugs changelog[:message].lines.first %></strong> + <strong><%= annotate_bugs changelog.message.lines.first %></strong> <div class="kk-byline"> - <%= mail_to changelog[:email], changelog[:author] %>, - <%= i18n_date(changelog[:date]) %>, - commit <%= link_to_gitweb_commit changelog[:id]%> + <%= mail_to changelog.email, changelog.author %>, + <%= i18n_date(changelog.date) %>, + commit <%= link_to_gitweb_commit changelog.id%> </div> <table class="table table-condensed kk-changelog-diffstat"> - <% unless changelog[:files][:added].empty? %> + + <% unless changelog.files["added"].empty? %> <tr class="success"> <td class="kk-changelog-diffstat-icon"><span class="octicon octicon-diff-added"></span></td> - <td><%= safe_join(changelog[:files][:added].map {|f| link_to_gitweb_ebuild_diff(f, changelog[:id], @package.category, @package.name) }, ', ') %></td> + <td><%= safe_join(changelog.files["added"].select { |file| file.include?(@package.category + '/' + @package.name) }.map {|f| link_to_gitweb_ebuild_diff(f.split('/').last, changelog.id, @package.category, @package.name) }, ', ') %></td> </tr> <% end %> - <% unless changelog[:files][:modified].empty? %> + <% unless changelog.files["modified"].empty? %> <tr class="warning"> <td class="kk-changelog-diffstat-icon"><span class="octicon octicon-diff-modified"></span></td> - <td><%= safe_join(changelog[:files][:modified].map {|f| link_to_gitweb_ebuild_diff(f, changelog[:id], @package.category, @package.name) }, ', ') %></td> + <td><%= safe_join(changelog.files["modified"].select { |file| file.include?(@package.category + '/' + @package.name) }.map {|f| link_to_gitweb_ebuild_diff(f.split('/').last, changelog.id, @package.category, @package.name) }, ', ') %></td> </tr> <% end %> - <% unless changelog[:files][:deleted].empty? %> + <% unless changelog.files["deleted"].empty? %> <tr class="danger"> <td class="kk-changelog-diffstat-icon"><span class="octicon octicon-diff-removed"></span></td> - <td><%= safe_join(changelog[:files][:deleted].map {|f| link_to_gitweb_ebuild_diff(f, changelog[:id], @package.category, @package.name) }, ', ') %></td> + <td><%= safe_join(changelog.files["deleted"].select { |file| file.include?(@package.category + '/' + @package.name) }.map {|f| link_to_gitweb_ebuild_diff(f.split('/').last, changelog.id, @package.category, @package.name) }, ', ') %></td> </tr> <% end %> diff --git a/lib/kkuleomi/store.rb b/lib/kkuleomi/store.rb index a36f0ca..ec27d7a 100644 --- a/lib/kkuleomi/store.rb +++ b/lib/kkuleomi/store.rb @@ -7,6 +7,7 @@ module Kkuleomi::Store VersionRepository, ChangeRepository, UseflagRepository, + CommitRepository ] base_settings = { diff --git a/lib/portage/util/history.rb b/lib/portage/util/history.rb index b2348b3..dfa7449 100644 --- a/lib/portage/util/history.rb +++ b/lib/portage/util/history.rb @@ -2,17 +2,23 @@ require 'time' class Portage::Util::History class << self - def for(category, package, limit = 20) + def update() return [] if KKULEOMI_DISABLE_GIT == true - files = "#{category}/#{package}/" + latest_commit_id = KKULEOMI_FIRST_COMMIT + latest_commit = CommitRepository.n_sorted_by(1, "date", "desc").first + + unless latest_commit.nil? + latest_commit_id = latest_commit.id + end + git = Kkuleomi::Util::Exec - .cmd(KKULEOMI_GIT) - .in(KKULEOMI_RUNTIME_PORTDIR) - .args( - 'log', '--name-status', '--no-merges', '--date=iso8601', "-n #{limit.to_i}", - "#{KKULEOMI_FIRST_COMMIT}..HEAD", files) - .run + .cmd(KKULEOMI_GIT) + .in(KKULEOMI_RUNTIME_PORTDIR) + .args( + 'log', '--name-status', '--no-merges', '--date=iso8601', "--reverse", + "#{latest_commit_id}..HEAD") + .run raw_log, stderr, status = git.stdout, git.stderr, git.exit_status fail "Cannot get git log: #{stderr}" unless status == 0 @@ -23,9 +29,11 @@ class Portage::Util::History private def parse(raw_log) - log_items = [] - raw_log.split("\n\ncommit ").each do |raw_commit| + count = raw_log.split("\n\ncommit ").slice(0, 10000).size + + raw_log.split("\n\ncommit ").slice(0, 10000).each do |raw_commit| + commit_lines = raw_commit.lines _id = commit_lines.shift.gsub('commit ', '').strip @@ -38,37 +46,46 @@ class Portage::Util::History commit_lines.shift _raw_message = [] - while (line = commit_lines.shift) != "\n" + while (line = commit_lines.shift) != "\n" && !line.nil? _raw_message << line end _raw_files = commit_lines _files = {added: [], modified: [], deleted: []} + _packages = [] _raw_files.each do |file| mode, file = file.split "\t" - filename = file.strip.split('/').last + + if file.strip.split('/').size >= 3 + _packages << (file.strip.split('/')[0] + '/' + file.strip.split('/')[1]) + end case mode when 'M' - _files[:modified] << filename + _files[:modified] << file.strip when 'D' - _files[:deleted] << filename + _files[:deleted] << file.strip when 'A' - _files[:added] << filename + _files[:added] << file.strip end end - log_items << { - id: _id, - author: _author, - email: _email, - date: _date, - message: _raw_message.map { |l| l.strip }.join("\n"), - files: _files - } + + commit = Commit.new + commit.id = _id + commit.author = _author + commit.email = _email + commit.date = _date + commit.message = _raw_message.map { |l| l.strip }.join("\n") + commit.files = _files + commit.packages = _packages.to_set + CommitRepository.save(commit) + end + + if count >= 10000 + CommitsUpdateJob.perform_later end - log_items end end end diff --git a/lib/tasks/kkuleomi.rake b/lib/tasks/kkuleomi.rake index 9b8bca0..9362b7a 100644 --- a/lib/tasks/kkuleomi.rake +++ b/lib/tasks/kkuleomi.rake @@ -50,4 +50,5 @@ end def initialize_caches MasksUpdateJob.perform_later UseflagsUpdateJob.perform_later + CommitsUpdateJob.perform_later end |