Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement explicit lockfiles integration #23

Merged
merged 23 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions conda_pypi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""
conda-pypi
"""

__version__ = "0.0.0"
3 changes: 3 additions & 0 deletions conda_pypi/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from . import install, list, pip

__all__ = ["install", "list", "pip"]
111 changes: 111 additions & 0 deletions conda_pypi/cli/install.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from __future__ import annotations

import shlex
import sys
from logging import getLogger
from pathlib import Path
from typing import TYPE_CHECKING

from conda.base.context import context
from conda.common.io import Spinner

from ..main import run_pip_install, dry_run_pip_json, compute_record_sum
from ..utils import get_env_site_packages

if TYPE_CHECKING:
from typing import Iterable

log = getLogger(f"conda.{__name__}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, is there a reason to reuse the conda logger instead of an own?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been doing this in all my plugins to reuse conda's configuration for consistency. CLS does this too, for example.



def _prepare_pypi_transaction(pypi_lines) -> dict[str, dict[str, str]]:
pkgs = {}
for args in pypi_lines:
args = shlex.split(args)
record_hash = None
if "--" in args:
double_dash_idx = args.index("--")
if double_dash_idx >= 0:
args, extra_args = args[:double_dash_idx], args[double_dash_idx:]
if (
"--record-checksum" in extra_args
and (hash_idx := extra_args.index("--record-checksum")) > 0
and extra_args[hash_idx + 1].startswith(("md5:", "sha256:"))
):
record_hash = extra_args[hash_idx + 1]
else:
for arg in extra_args:
if arg.startswith("--record-checksum="):
record_hash = arg.split("=", 1)[1]
report = dry_run_pip_json(["--no-deps", *args])
pkg_name = report["install"][0]["metadata"]["name"]
version = report["install"][0]["metadata"]["version"]
pkgs[(pkg_name, version)] = {"url": report["install"][0]["download_info"]["url"]}
if record_hash:
pkgs[(pkg_name, version)]["hash"] = record_hash
return pkgs


def post_command(command: str) -> int:
if command not in ("install", "create"):
return 0

pypi_lines = pypi_lines_from_sys_argv()
if not pypi_lines:
return 0

with Spinner("\nPreparing PyPI transaction", enabled=not context.quiet, json=context.json):
pkgs = _prepare_pypi_transaction(pypi_lines)

with Spinner("Executing PyPI transaction", enabled=not context.quiet, json=context.json):
run_pip_install(
context.target_prefix,
args=[pkg["url"] for pkg in pkgs.values()],
dry_run=context.dry_run,
quiet=context.quiet,
verbosity=context.verbosity,
force_reinstall=context.force_reinstall,
yes=context.always_yes,
check=True,
)

if any(pkg.get("hash") for pkg in pkgs.values()):
with Spinner("Verifying PyPI transaction", enabled=not context.quiet, json=context.json):
site_packages = get_env_site_packages(context.target_prefix)
for dist_info in site_packages.glob("*.dist-info"):
if not dist_info.is_dir():
continue
name, version = dist_info.stem.split("-")
expected_hash = pkgs.get((name, version), {}).get("hash")
if expected_hash:
algo, expected_hash = expected_hash.split(":")
if (dist_info / "RECORD").is_file():
found_hash = compute_record_sum(dist_info / "RECORD", algo)
if expected_hash != found_hash:
log.warning(
"%s checksum for %s==%s didn't match! Expected=%s, found=%s",
algo,
name,
version,
expected_hash,
found_hash,
)

return 0


def pypi_lines_from_sys_argv(argv: Iterable[str] | None = None) -> list[str]:
argv = argv or sys.argv
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs docs

if "--file" not in argv:
return []
pypi_lines = []
pypi_prefix = "# pypi: "
pypi_prefix_len = len(pypi_prefix)
for i, arg in enumerate(argv):
if arg == "--file":
pypi_lines += [
line[pypi_prefix_len:]
for line in Path(argv[i + 1]).read_text().splitlines()
if line.strip().startswith(pypi_prefix)
]
return pypi_lines
22 changes: 22 additions & 0 deletions conda_pypi/cli/list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import sys
from conda.base.context import context

from .. import __version__
from ..main import pypi_lines_for_explicit_lockfile


def post_command(command: str):
if command != "list":
return
cmd_line = context.raw_data.get("cmd_line", {})
if "--explicit" not in sys.argv and not cmd_line.get("explicit"):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pattern here with checking first sys.argv and the using conda internals to check for the cmd_line value will be really tricky in the future when we refactor conda's CLI code. Could we move this into a utility function so it's not spaghetti code, please?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish we had a good API for the parsed args here. I want to use context.raw_data explicitly but I fear this is a data structure API we are not watching closely. See conda/conda#13924.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed so we only use context.raw_data now.

return
if "--no-pip" in sys.argv or not cmd_line.get("pip"):
return
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if "--explicit" not in sys.argv and not cmd_line.get("explicit"):
return
if "--no-pip" in sys.argv or not cmd_line.get("pip"):
return
if "--explicit" not in sys.argv and not cmd_line.get("explicit"):
return
elif "--no-pip" in sys.argv or not cmd_line.get("pip"):
return

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've seen this kind of suggestion before but never understood why. Isn't the return above enough to guarantee the lack of execution? To me, using if explicitly emphasizes the early return, but I can also see how using elif better reveals the conditional relationship between the statements.

checksum = "md5" if ("--md5" in sys.argv or cmd_line.get("md5")) else None
to_print = pypi_lines_for_explicit_lockfile(context.target_prefix, checksum=checksum)
if to_print:
sys.stdout.flush()
print(f"# The following lines were added by conda-pypi v{__version__}")
print("# This is an experimental feature subject to change. Do not use in production.")
print(*to_print, sep="\n")
14 changes: 7 additions & 7 deletions conda_pypi/cli.py → conda_pypi/cli/pip.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


def configure_parser(parser: argparse.ArgumentParser):
from .dependencies import BACKENDS
from ..dependencies import BACKENDS

add_parser_help(parser)
add_parser_prefix(parser)
Expand Down Expand Up @@ -69,14 +69,14 @@ def configure_parser(parser: argparse.ArgumentParser):
def execute(args: argparse.Namespace) -> int:
from conda.common.io import Spinner
from conda.models.match_spec import MatchSpec
from .dependencies import analyze_dependencies
from .main import (
from ..dependencies import analyze_dependencies
from ..main import (
validate_target_env,
ensure_externally_managed,
run_conda_install,
run_pip_install,
)
from .utils import get_prefix
from ..utils import get_prefix

prefix = get_prefix(args.prefix, args.name)
packages_not_installed = validate_target_env(prefix, args.packages)
Expand Down Expand Up @@ -150,7 +150,7 @@ def execute(args: argparse.Namespace) -> int:
if pypi_specs:
if not args.quiet or not args.json:
print("Running pip install...")
retcode = run_pip_install(
process = run_pip_install(
prefix,
pypi_specs,
dry_run=args.dry_run,
Expand All @@ -159,8 +159,8 @@ def execute(args: argparse.Namespace) -> int:
force_reinstall=args.force_reinstall,
yes=args.yes,
)
if retcode:
return retcode
if process.returncode:
return process.returncode
if os.environ.get("CONDA_BUILD_STATE") != "BUILD":
ensure_externally_managed(prefix)
return 0
46 changes: 2 additions & 44 deletions conda_pypi/dependencies/pip.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
from __future__ import annotations

import json
import os
from logging import getLogger
from collections import defaultdict
from subprocess import run
from tempfile import NamedTemporaryFile

from conda.exceptions import CondaError

from ..utils import get_env_python
from ..main import dry_run_pip_json

logger = getLogger(f"conda.{__name__}")

Expand All @@ -19,43 +13,7 @@ def _analyze_with_pip(
prefix: str | None = None,
force_reinstall: bool = False,
) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
# pip can output to stdout via `--report -` (dash), but this
# creates issues on Windows due to undecodable characters on some
# project descriptions (e.g. charset-normalizer, amusingly), which
# makes pip crash internally. Probably a bug on their end.
# So we use a temporary file instead to work with bytes.
json_output = NamedTemporaryFile(suffix=".json", delete=False)
json_output.close() # Prevent access errors on Windows

cmd = [
str(get_env_python(prefix)),
"-mpip",
"install",
"--dry-run",
"--ignore-installed",
*(("--force-reinstall",) if force_reinstall else ()),
"--report",
json_output.name,
*packages,
]
process = run(cmd, capture_output=True, text=True)
if process.returncode != 0:
raise CondaError(
f"Failed to analyze dependencies with pip:\n"
f" command: {' '.join(cmd)}\n"
f" exit code: {process.returncode}\n"
f" stderr:\n{process.stderr}\n"
f" stdout:\n{process.stdout}\n"
)
logger.debug("pip (%s) provided the following report:\n%s", " ".join(cmd), process.stdout)

with open(json_output.name, "rb") as f:
# We need binary mode because the JSON output might
# contain weird unicode stuff (as part of the project
# description or README).
report = json.loads(f.read())
os.unlink(json_output.name)

report = dry_run_pip_json(("--prefix", prefix, *packages), force_reinstall)
deps_from_pip = defaultdict(list)
conda_deps = defaultdict(list)
for item in report["install"]:
Expand Down
Loading
Loading