diff --git a/conda_build/cran.py b/conda_build/cran.py index 3d97da1229..d2bffc9d9d 100644 --- a/conda_build/cran.py +++ b/conda_build/cran.py @@ -20,7 +20,7 @@ from os.path import join, exists, isfile, basename, isdir from itertools import chain import subprocess -from difflib import get_close_matches +from conda_build import utils from conda_build import source, metadata from .conda_interface import rm_rf @@ -489,13 +489,8 @@ def main(args, parser): # XXX: We should maybe normalize these d['license'] = cran_package.get("License", "None") + d['license_family'] = utils.guess_license_family(d['license'], metadata.allowed_license_families) - # Tend towards the more clear GPL3 and away from the ambiguity of GPL2. - if 'GPL (>= 2)' in d['license'] or d['license'] == 'GPL': - d['license_family'] = 'GPL3' - else: - d['license_family'] = get_close_matches(d['license'], - metadata.allowed_license_families, 1, 0.0)[0] if 'License_is_FOSS' in cran_package: d['license'] += ' (FOSS)' if cran_package.get('License_restricts_use', None) == 'yes': diff --git a/conda_build/pypi.py b/conda_build/pypi.py index e535d75866..2539ef29a6 100644 --- a/conda_build/pypi.py +++ b/conda_build/pypi.py @@ -28,11 +28,12 @@ from .conda_interface import normalized_version from .conda_interface import human_bytes, hashsum_file -from conda_build.utils import tar_xf, unzip +from conda_build.utils import tar_xf, unzip, guess_license_family from conda_build.source import SRC_CACHE, apply_patch from conda_build.build import create_env from conda_build.config import config -from conda_build.metadata import MetaData +from conda_build.metadata import (MetaData, allowed_license_families) + if sys.version_info < (3,): from xmlrpclib import ServerProxy, Transport, ProtocolError @@ -96,6 +97,7 @@ {home_comment}home: {homeurl} license: {license} {summary_comment}summary: {summary} + license_family: {license_family} # See # http://docs.continuum.io/conda/build.html for @@ -674,6 +676,7 @@ def get_package_metadata(args, package, d, data): else: license = ' or '.join(licenses) d['license'] = license + d['license_family'] = guess_license_family(license, allowed_license_families) def valid(name): diff --git a/conda_build/source.py b/conda_build/source.py index ebe5874f3b..8b3c29694d 100644 --- a/conda_build/source.py +++ b/conda_build/source.py @@ -7,8 +7,13 @@ from os.path import join, isdir, isfile, abspath, expanduser, basename from shutil import copytree, copy2 from subprocess import check_call, Popen, PIPE, check_output, CalledProcessError +if sys.version_info[0] == 3: + from urllib.parse import urljoin +else: + from urlparse import urljoin import locale import time +from conda.compat import TemporaryDirectory from .conda_interface import download from .conda_interface import hashsum_file @@ -22,6 +27,7 @@ HG_CACHE = join(config.croot, 'hg_cache') SVN_CACHE = join(config.croot, 'svn_cache') WORK_DIR = join(config.croot, 'work') +git_submod_re = re.compile(r'(?:.+)\.(.+)\.(?:.+)\s(.+)') log = logging.getLogger(__file__) @@ -90,40 +96,39 @@ def unpack(meta, verbose=False): copy2(src_path, WORK_DIR) -def git_source(meta, recipe_dir, verbose=False): - ''' Download a source from Git repo. ''' +def git_mirror_checkout_recursive(git, mirror_dir, checkout_dir, git_url, git_ref=None, + git_depth=-1, is_top_level=True, verbose=True): + """ Mirror (and checkout) a Git repository recursively. + + It's not possible to use `git submodule` on a bare + repository, so the checkout must be done before we + know which submodules there are. + + Worse, submodules can be identified by using either + absolute URLs or relative paths. If relative paths + are used those need to be relocated upon mirroring, + but you could end up with `../../../../blah` and in + that case conda-build could be tricked into writing + to the root of the drive and overwriting the system + folders unless steps are taken to prevent that. + """ + if verbose: stdout = None else: FNULL = open(os.devnull, 'w') stdout = FNULL - - if not isdir(GIT_CACHE): - os.makedirs(GIT_CACHE) - - git = external.find_executable('git') - if not git: - sys.exit("Error: git is not installed") - git_url = meta['git_url'] - git_depth = int(meta.get('git_depth', -1)) - if git_url.startswith('.'): - # It's a relative path from the conda recipe - os.chdir(recipe_dir) - git_dn = abspath(expanduser(git_url)) - git_dn = "_".join(git_dn.split(os.path.sep)[1:]) - else: - git_dn = git_url.split(':')[-1].replace('/', '_') - cache_repo = cache_repo_arg = join(GIT_CACHE, git_dn) - if sys.platform == 'win32': - is_cygwin = 'cygwin' in git.lower() - cache_repo_arg = cache_repo_arg.replace('\\', '/') - if is_cygwin: - cache_repo_arg = '/cygdrive/c/' + cache_repo_arg[3:] - - # update (or create) the cache repo - if isdir(cache_repo): - if meta.get('git_rev', 'HEAD') != 'HEAD': - check_call([git, 'fetch'], cwd=cache_repo, stdout=stdout) + if not mirror_dir.startswith(GIT_CACHE + os.sep): + sys.exit("Error: Attempting to mirror to %s which is outside of GIT_CACHE %s" + % (mirror_dir, GIT_CACHE)) + if not isdir(os.path.dirname(mirror_dir)): + os.makedirs(os.path.dirname(mirror_dir)) + mirror_dir_arg = mirror_dir + if sys.platform == 'win32' and 'cygwin' in git.lower(): + mirror_dir_arg = '/cygdrive/c/' + mirror_dir[3:].replace('\\', '/') + if isdir(mirror_dir): + if git_ref != 'HEAD': + check_call([git, 'fetch'], cwd=mirror_dir, stdout=stdout) else: # Unlike 'git clone', fetch doesn't automatically update the cache's HEAD, # So here we explicitly store the remote HEAD in the cache's local refs/heads, @@ -132,42 +137,94 @@ def git_source(meta, recipe_dir, verbose=False): # but the user is working with a branch other than 'master' without # explicitly providing git_rev. check_call([git, 'fetch', 'origin', '+HEAD:_conda_cache_origin_head'], - cwd=cache_repo, stdout=stdout) + cwd=mirror_dir, stdout=stdout) check_call([git, 'symbolic-ref', 'HEAD', 'refs/heads/_conda_cache_origin_head'], - cwd=cache_repo, stdout=stdout) + cwd=mirror_dir, stdout=stdout) else: args = [git, 'clone', '--mirror'] if git_depth > 0: args += ['--depth', str(git_depth)] + check_call(args + [git_url, mirror_dir_arg], stdout=stdout) + assert isdir(mirror_dir) + + # Now clone from mirror_dir into checkout_dir. + check_call([git, 'clone', mirror_dir_arg, checkout_dir], stdout=stdout) + if is_top_level: + checkout = git_ref + if git_url.startswith('.'): + process = Popen(["git", "rev-parse", checkout], + stdout=PIPE, cwd=git_url) + output = process.communicate()[0].strip() + checkout = output.decode('utf-8') + if verbose: + print('checkout: %r' % checkout) + if checkout: + check_call([git, 'checkout', checkout], + cwd=checkout_dir, stdout=stdout) + + # submodules may have been specified using relative paths. + # Those paths are relative to git_url, and will not exist + # relative to mirror_dir, unless we do some work to make + # it so. + try: + submodules = check_output([git, 'config', '--file', '.gitmodules', '--get-regexp', + 'url'], stderr=stdout, cwd=checkout_dir) + submodules = submodules.decode('utf-8').splitlines() + except: + submodules = [] + for submodule in submodules: + matches = git_submod_re.match(submodule) + if matches and matches.group(2)[0] == '.': + submod_name = matches.group(1) + submod_rel_path = matches.group(2) + submod_url = urljoin(git_url + '/', submod_rel_path) + submod_mirror_dir = os.path.normpath( + os.path.join(mirror_dir, submod_rel_path)) + if verbose: + print('Relative submodule %s found: url is %s, submod_mirror_dir is %s' % ( + submod_name, submod_url, submod_mirror_dir)) + with TemporaryDirectory() as temp_checkout_dir: + git_mirror_checkout_recursive(git, submod_mirror_dir, temp_checkout_dir, submod_url, + git_ref, git_depth, False, verbose) + + if is_top_level: + # Now that all relative-URL-specified submodules are locally mirrored to + # relatively the same place we can go ahead and checkout the submodules. + check_call([git, 'submodule', 'update', '--init', + '--recursive'], cwd=checkout_dir, stdout=stdout) + git_info(verbose=verbose) + if not verbose: + FNULL.close() - check_call(args + [git_url, cache_repo_arg], stdout=stdout) - assert isdir(cache_repo) - - # now clone into the work directory - checkout = meta.get('git_rev') - # if rev is not specified, and the git_url is local, - # assume the user wants the current HEAD - if not checkout and git_url.startswith('.'): - process = Popen(["git", "rev-parse", "HEAD"], - stdout=PIPE, cwd=git_url) - output = process.communicate()[0].strip() - checkout = output.decode('utf-8') - if checkout and verbose: - print('checkout: %r' % checkout) - - check_call([git, 'clone', cache_repo_arg, WORK_DIR], stdout=stdout) - if checkout: - check_call([git, 'checkout', checkout], cwd=WORK_DIR, stdout=stdout) - # Submodules must be updated after checkout. - check_call([git, 'submodule', 'update', '--init', '--recursive'], cwd=WORK_DIR, stdout=stdout) +def git_source(meta, recipe_dir, verbose=False): + ''' Download a source from a Git repo (or submodule, recursively) ''' + if not isdir(GIT_CACHE): + os.makedirs(GIT_CACHE) - git_info(verbose=verbose) + git = external.find_executable('git') + if not git: + sys.exit("Error: git is not installed") - if not verbose: - FNULL.close() + git_url = meta['git_url'] + git_depth = int(meta.get('git_depth', -1)) + git_ref = meta.get('git_rev', 'HEAD') - return WORK_DIR + if git_url.startswith('.'): + # It's a relative path from the conda recipe + os.chdir(recipe_dir) + if sys.platform == 'win32': + git_dn = abspath(expanduser(git_url)).replace(':', '_') + else: + git_dn = abspath(expanduser(git_url))[1:] + else: + git_dn = git_url.split('://')[-1].replace('/', os.sep) + if git_dn.startswith(os.sep): + git_dn = git_dn[1:] + mirror_dir = join(GIT_CACHE, git_dn) + git_mirror_checkout_recursive( + git, mirror_dir, WORK_DIR, git_url, git_ref, git_depth, True, verbose) + return git def git_info(fo=None, verbose=False): @@ -349,37 +406,58 @@ def _guess_patch_strip_level(filesstr, src_dir): return patchlevel -def _source_files_from_patch_file(path): +def _get_patch_file_details(path): re_files = re.compile('^(?:---|\+\+\+) ([^\n\t]+)') files = set() with open(path) as f: - files = {m.group(1) for l in f.readlines() - for m in [re_files.search(l)] - if m and m.group(1) != '/dev/null'} - return files - - -def apply_patch(src_dir, path): - print('Applying patch: %r' % path) + files = [] + first_line = True + is_git_format = True + for l in f.readlines(): + if first_line and not re.match('From [0-9a-f]{40}', l): + is_git_format = False + first_line = False + m = re_files.search(l) + if m and m.group(1) != '/dev/null': + files.append(m.group(1)) + elif is_git_format and l.startswith('git') and not l.startswith('git --diff'): + is_git_format = False + return (files, is_git_format) + + +def apply_patch(src_dir, path, git=None): if not isfile(path): sys.exit('Error: no such patch: %s' % path) - patch = external.find_executable('patch') - if patch is None: - sys.exit("""\ -Error: - Did not find 'patch' in: %s - You can install 'patch' using apt-get, yum (Linux), Xcode (MacOSX), - or conda, m2-patch (Windows), -""" % (os.pathsep.join(external.dir_paths))) - files = _source_files_from_patch_file(path) - patch_strip_level = _guess_patch_strip_level(files, src_dir) - patch_args = ['-p%d' % patch_strip_level, '-i', path] - if sys.platform == 'win32': - patch_args[-1] = _ensure_unix_line_endings(path) - check_call([patch] + patch_args, cwd=src_dir) - if sys.platform == 'win32' and os.path.exists(patch_args[-1]): - os.remove(patch_args[-1]) # clean up .patch_unix file + files, is_git_format = _get_patch_file_details(path) + if git and is_git_format: + # Prevents git from asking interactive questions, + # also necessary to achieve sha1 reproducibility; + # as is --committer-date-is-author-date. By this, + # we mean a round-trip of git am/git format-patch + # gives the same file. + git_env = os.environ + git_env['GIT_COMMITTER_NAME'] = 'conda-build' + git_env['GIT_COMMITTER_EMAIL'] = 'conda@conda-build.org' + check_call([git, 'am', '--committer-date-is-author-date', path], + cwd=src_dir, stdout=None, env=git_env) + else: + print('Applying patch: %r' % path) + patch = external.find_executable('patch') + if patch is None: + sys.exit("""\ + Error: + Cannot use 'git' (not a git repo and/or patch) and did not find 'patch' in: %s + You can install 'patch' using apt-get, yum (Linux), Xcode (MacOSX), + or conda, m2-patch (Windows), + """ % (os.pathsep.join(external.dir_paths))) + patch_strip_level = _guess_patch_strip_level(files, src_dir) + patch_args = ['-p%d' % patch_strip_level, '-i', path] + if sys.platform == 'win32': + patch_args[-1] = _ensure_unix_line_endings(path) + check_call([patch] + patch_args, cwd=src_dir) + if sys.platform == 'win32' and os.path.exists(patch_args[-1]): + os.remove(patch_args[-1]) # clean up .patch_unix file def provide(recipe_dir, meta, verbose=False, patch=True): @@ -390,10 +468,11 @@ def provide(recipe_dir, meta, verbose=False, patch=True): - apply patches (if any) """ + git = None if any(k in meta for k in ('fn', 'url')): unpack(meta, verbose=verbose) elif 'git_url' in meta: - git_source(meta, recipe_dir, verbose=verbose) + git = git_source(meta, recipe_dir, verbose=verbose) # build to make sure we have a work directory with source in it. We want to make sure that # whatever version that is does not interfere with the test we run next. elif 'hg_url' in meta: @@ -411,7 +490,7 @@ def provide(recipe_dir, meta, verbose=False, patch=True): if patch: src_dir = get_dir() for patch in meta.get('patches', []): - apply_patch(src_dir, join(recipe_dir, patch)) + apply_patch(src_dir, join(recipe_dir, patch), git) if __name__ == '__main__': diff --git a/conda_build/utils.py b/conda_build/utils.py index 15596ca841..c12fc4449b 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -19,6 +19,8 @@ from conda_build import external +from difflib import get_close_matches + # Backwards compatibility import. Do not remove. from .conda_interface import rm_rf # NOQA @@ -264,3 +266,12 @@ def get_site_packages(prefix): def move_to_trash(path, placeholder=""): from .conda_interface import move_path_to_trash as trash return trash(path) + + +def guess_license_family(license, allowed_license_families): + # Tend towards the more clear GPL3 and away from the ambiguity of GPL2. + if 'GPL (>= 2)' in license or license == 'GPL': + return 'GPL3' + else: + return get_close_matches(license, + allowed_license_families, 1, 0.0)[0] diff --git a/tests/test_build_recipes.py b/tests/test_build_recipes.py index d7774c78f0..b00748aab0 100644 --- a/tests/test_build_recipes.py +++ b/tests/test_build_recipes.py @@ -1,9 +1,13 @@ +from __future__ import print_function + import os import subprocess import shutil import sys import tarfile import tempfile +import yaml +from collections import OrderedDict import pytest @@ -26,6 +30,21 @@ fail_dir = os.path.join(thisdir, 'test-recipes', 'fail') +def represent_ordereddict(dumper, data): + value = [] + + for item_key, item_value in data.items(): + node_key = dumper.represent_data(item_key) + node_value = dumper.represent_data(item_value) + + value.append((node_key, node_value)) + + return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) + + +yaml.add_representer(OrderedDict, represent_ordereddict) + + # Used for translating local paths into url (file://) paths # http://stackoverflow.com/a/14298190/1170370 def path2url(path): @@ -143,6 +162,85 @@ def test_relative_git_url_git_versioning(): assert tag in output +@pytest.mark.skipif(sys.platform == "win32", + reason="Windows permission errors w/ git when removing repo files on cleanup.") +def test_relative_git_url_submodule_clone(): + """Test git submodules identified with relative URLs can be mirrored then cloned. Also tests + pushing changes and new tags and making sure those are reflected with GIT_DESCRIBE_TAG""" + basedir = os.getcwd() + try: + with TemporaryDirectory() as tmp: + os.chdir(tmp) + toplevel = os.path.join(tmp, 'toplevel') + os.mkdir(toplevel) + relative_sub = os.path.join(tmp, 'relative_sub') + os.mkdir(relative_sub) + absolute_sub = os.path.join(tmp, 'absolute_sub') + os.mkdir(absolute_sub) + + git_env = os.environ + git_env['GIT_AUTHOR_NAME'] = 'conda-build' + git_env['GIT_COMMITTER_NAME'] = 'conda-build' + git_env['GIT_COMMITTER_EMAIL'] = 'conda@conda-build.org' + + for tag in range(2): + os.chdir(absolute_sub) + if tag == 0: + subprocess.check_call(['git', 'init'], env=git_env) + with open('absolute', 'w') as f: + print(str(tag), file=f) + subprocess.check_call(['git', 'add', 'absolute'], env=git_env) + subprocess.check_call(['git', 'commit', '-m', 'absolute {}'.format(tag)], + env=git_env) + + os.chdir(relative_sub) + if tag == 0: + subprocess.check_call(['git', 'init']) + with open('relative', 'w') as f: + print(str(tag), file=f) + subprocess.check_call(['git', 'add', 'relative'], env=git_env) + subprocess.check_call(['git', 'commit', '-m', 'relative {}'.format(tag)], + env=git_env) + + os.chdir(toplevel) + if tag == 0: + subprocess.check_call(['git', 'init'], env=git_env) + with open('toplevel', 'w') as f: + print(str(tag), file=f) + subprocess.check_call(['git', 'add', 'toplevel'], env=git_env) + subprocess.check_call(['git', 'commit', '-m', 'toplevel {}'.format(tag)], + env=git_env) + if tag == 0: + subprocess.check_call(['git', 'submodule', 'add', absolute_sub, 'absolute'], + env=git_env) + subprocess.check_call(['git', 'submodule', 'add', + os.path.join('..', 'relative_sub'), 'relative'], + env=git_env) + subprocess.check_call(['git', 'tag', '-a', str(tag), '-m', 'tag {}'.format(tag)], + env=git_env) + + filename = os.path.join(tmp, 'meta.yaml') + data = OrderedDict([ + ('package', OrderedDict([ + ('name', 'relative_submodules'), + ('version', '{{ GIT_DESCRIBE_TAG }}')])), + ('source', OrderedDict([ + ('git_url', toplevel), + ('git_tag', str(tag))])) + ]) + + with open(filename, 'w') as outfile: + outfile.write(yaml.dump(data, default_flow_style=False)) + output = subprocess.check_output(['conda', 'build', '--output', tmp]) + if PY3: + output = output.decode("UTF-8") + assert ("relative_submodules-{}-0".format(tag) in output) + except: + raise + finally: + os.chdir(basedir) + + def test_package_test(): """Test calling conda build -t - rather than """ filename = "jinja2-2.8-py{}{}_0.tar.bz2".format(sys.version_info.major, sys.version_info.minor)