260 lines
9.2 KiB
Python
Executable File
260 lines
9.2 KiB
Python
Executable File
#!/usr/bin/python3 -B
|
|
|
|
# Copyright 2021 The Android Open Source Project
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Read the EXPECTED_UPSTREAM and update the files from the upstream."""
|
|
import argparse
|
|
import logging
|
|
# pylint: disable=g-importing-member
|
|
from pathlib import Path
|
|
import sys
|
|
from typing import List
|
|
from typing import Sequence
|
|
|
|
# pylint: disable=g-multiple-import
|
|
from common_util import (
|
|
ExpectedUpstreamEntry,
|
|
ExpectedUpstreamFile,
|
|
has_file_in_tree,
|
|
LIBCORE_DIR,
|
|
)
|
|
|
|
from git import (
|
|
Blob,
|
|
IndexFile,
|
|
Repo,
|
|
)
|
|
|
|
# Enable INFO logging for error emitted by GitPython
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
# Pick an arbitrary existing commit with an empty tree
|
|
EMPTY_COMMIT_SHA = "d85bc16ba1cdcc20bec6fcbfe46dc90f9fcd2f78"
|
|
|
|
|
|
def validate_and_remove_updated_entries(
|
|
entries: List[ExpectedUpstreamEntry],
|
|
repo: Repo) -> List[ExpectedUpstreamEntry]:
|
|
"""Returns a list of entries of which the file content needs to be updated."""
|
|
head_tree = repo.head.commit.tree
|
|
result: List[ExpectedUpstreamEntry] = []
|
|
|
|
for e in entries:
|
|
try:
|
|
# The following step validate each entry by querying the git database
|
|
commit = repo.commit(e.git_ref)
|
|
source_blob = commit.tree.join(e.src_path)
|
|
if not has_file_in_tree(e.dst_path, head_tree):
|
|
# Add the entry if the file is missing in the HEAD
|
|
result.append(e)
|
|
continue
|
|
|
|
dst_blob = head_tree.join(e.dst_path)
|
|
# Add the entry if the content is different.
|
|
# data_stream will be close during GC.
|
|
if source_blob.data_stream.read() != dst_blob.data_stream.read():
|
|
result.append(e)
|
|
except:
|
|
print(f"ERROR: reading entry: {e}", file=sys.stderr)
|
|
raise
|
|
|
|
return result
|
|
|
|
|
|
def partition_entries_by_ref(
|
|
entries: List[ExpectedUpstreamEntry]) -> List[List[ExpectedUpstreamEntry]]:
|
|
result_map = {}
|
|
for e in entries:
|
|
if result_map.get(e.git_ref) is None:
|
|
result_map[e.git_ref] = []
|
|
result_map[e.git_ref].append(e)
|
|
|
|
return list(result_map.values())
|
|
|
|
|
|
THIS_TOOL_PATH = Path(__file__).relative_to(LIBCORE_DIR)
|
|
MSG_FIRST_COMMIT = ("Import {summary} from {ref}\n"
|
|
"\n"
|
|
"List of files:\n"
|
|
" {files}\n"
|
|
"\n"
|
|
f"Generated by {THIS_TOOL_PATH}"
|
|
"\n"
|
|
"Test: N/A")
|
|
|
|
MSG_SECOND_COMMIT = ("Merge {summary} from {ref} into the "
|
|
"expected_upstream branch\n"
|
|
"\n"
|
|
"List of files:\n"
|
|
" {files}\n"
|
|
"\n"
|
|
f"Generated by {THIS_TOOL_PATH}"
|
|
"\n"
|
|
"Test: N/A")
|
|
|
|
|
|
def merge_files_and_create_commit(entry_set: List[ExpectedUpstreamEntry],
|
|
repo: Repo, checkout_only: bool) -> None:
|
|
r"""Create the commits importing the given files into the current branch.
|
|
|
|
`--------<ref>--------------- aosp/upstream_openjdkXXX
|
|
\
|
|
<first_commit>
|
|
\
|
|
-------<second_commit>------ expected_upstream
|
|
|
|
This function creates the 2 commits, i.e. first_commit and second_commit, in
|
|
the diagram. The goal is to checkout a subset files specified in the
|
|
entry_set, and merged into the pected_upstream branch in order to keep the
|
|
git-blame history of the individual files. first_commit is needed in order
|
|
to move the files specified in the entry_set.
|
|
|
|
In the implementation, first_commit isn't really modified from the ref, but
|
|
created from an empty tree, and all files in entry_set will be added into
|
|
the first_commit, second_commit is a merged commit and modified from
|
|
the parent in the expected_upstream branch, and any file contents in the
|
|
first commit will override the file content in the second commit.
|
|
|
|
You may reference the following git commands for understanding which should
|
|
create the same commits, but the python implementation is cleaner, because
|
|
it doesn't change the working tree or create a new branch.
|
|
first_commit:
|
|
git checkout -b temp_branch <entry.git_ref>
|
|
rm -r * .jcheck/ .hgignore .hgtags # Remove hidden files
|
|
git checkout <entry.git_ref> <entry.src_path>
|
|
mkdir -p <entry.dst_path>.directory && git mv <entry.src_path>
|
|
<entry.dst_path>
|
|
git commit -a
|
|
second_commit:
|
|
git merge temp_branch
|
|
git checkout HEAD -- ojluni/ # Force checkout to resolve merge conflict
|
|
git checkout temp_branch -- <entry.dst_path>
|
|
git commit
|
|
|
|
Args:
|
|
entry_set: a list of entries
|
|
repo: the repository object
|
|
checkout_only: True if it creates no commit
|
|
"""
|
|
ref = entry_set[0].git_ref
|
|
upstream_commit = repo.commit(ref)
|
|
|
|
dst_paths = [e.dst_path for e in entry_set]
|
|
str_dst_paths = "\n ".join(dst_paths)
|
|
|
|
for entry in entry_set:
|
|
src_blob = upstream_commit.tree[entry.src_path]
|
|
# Write into the file system directly because GitPython provides no API
|
|
# writing into the index in memory. IndexFile.move doesn't help here,
|
|
# because the API requires the file on the working tree too.
|
|
# However, it's fine, because we later reset the HEAD to the second commit.
|
|
# The user expects the file showing in the file system, and the file is
|
|
# not staged/untracked because the file is in the second commit too.
|
|
absolute_dst_path = Path(LIBCORE_DIR, entry.dst_path)
|
|
absolute_dst_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with absolute_dst_path.open("wb") as file:
|
|
file.write(src_blob.data_stream.read())
|
|
|
|
if not checkout_only:
|
|
# We need an index empty initially, i.e. no staged files. Note that the
|
|
# empty commit is not the parent. The parents can be set later.
|
|
first_index = IndexFile.from_tree(repo, repo.commit(EMPTY_COMMIT_SHA))
|
|
for entry in entry_set:
|
|
first_index.add(entry.dst_path)
|
|
|
|
summary_msg = "files"
|
|
if len(entry_set) == 1:
|
|
summary_msg = Path(entry_set[0].dst_path).stem
|
|
msg = MSG_FIRST_COMMIT.format(
|
|
summary=summary_msg, ref=ref, files=str_dst_paths)
|
|
|
|
first_commit = first_index.commit(
|
|
message=msg, parent_commits=[upstream_commit], head=False)
|
|
|
|
# The second commit is a merge commit. It doesn't use the current index,
|
|
# i.e. repo.index, to avoid affecting the current staged files.
|
|
prev_head = repo.active_branch.commit
|
|
second_index = IndexFile.from_tree(repo, prev_head)
|
|
blob_filter = lambda obj, i: isinstance(obj, Blob)
|
|
blobs = first_commit.tree.traverse(blob_filter)
|
|
second_index.add(blobs)
|
|
msg = MSG_SECOND_COMMIT.format(
|
|
summary=summary_msg, ref=ref, files=str_dst_paths)
|
|
second_commit = second_index.commit(
|
|
message=msg, parent_commits=[prev_head, first_commit], head=True)
|
|
|
|
# We updated the HEAD to the second commit. Thus, git-reset updates the
|
|
# current index. Otherwise, the current index, aka, repo.index, shows that
|
|
# the files are deleted.
|
|
repo.index.reset()
|
|
|
|
if checkout_only:
|
|
print(f"Checked out the following files from {ref}:")
|
|
else:
|
|
print(f"New merge commit {second_commit} contains:")
|
|
print(f" {str_dst_paths}")
|
|
|
|
|
|
def create_commits(repo: Repo, checkout_only: bool) -> None:
|
|
"""Create the commits importing files according to the EXPECTED_UPSTREAM."""
|
|
current_tracking_branch = repo.active_branch.tracking_branch()
|
|
if current_tracking_branch.name != "aosp/expected_upstream":
|
|
print("This script should only run on aosp/expected_upstream branch. "
|
|
f"Currently, this is on branch {repo.active_branch} "
|
|
f"tracking {current_tracking_branch}")
|
|
return
|
|
|
|
print("Reading EXPECTED_UPSTREAM file...")
|
|
expected_upstream_entries = ExpectedUpstreamFile().read_all_entries()
|
|
|
|
outdated_entries = validate_and_remove_updated_entries(
|
|
expected_upstream_entries, repo)
|
|
|
|
if not outdated_entries:
|
|
print("No need to update. All files are updated.")
|
|
return
|
|
|
|
print("The following entries will be updated from upstream")
|
|
for e in outdated_entries:
|
|
print(f" {e.dst_path}")
|
|
|
|
entry_sets_to_be_merged = partition_entries_by_ref(outdated_entries)
|
|
|
|
for entry_set in entry_sets_to_be_merged:
|
|
merge_files_and_create_commit(entry_set, repo, checkout_only)
|
|
|
|
|
|
def main(argv: Sequence[str]) -> None:
|
|
arg_parser = argparse.ArgumentParser(
|
|
description="Read the EXPECTED_UPSTREAM and update the files from the "
|
|
"OpenJDK. By default, it creates commits forking from "
|
|
"the upstream version in order to preserve the line history.")
|
|
arg_parser.add_argument(
|
|
"--checkout-only", action="store_true",
|
|
help="Checkout the files, but creates no commits")
|
|
|
|
args = arg_parser.parse_args(argv)
|
|
|
|
checkout_only = args.checkout_only
|
|
|
|
repo = Repo(LIBCORE_DIR.as_posix())
|
|
try:
|
|
create_commits(repo, checkout_only)
|
|
finally:
|
|
repo.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv[1:])
|