#!/usr/bin/env python """cythonize Cythonize pyx files into C files as needed. Usage: cythonize [root_dir] Default [root_dir] is 'scipy'. The number of parallel Cython processes is controlled by the environment variable SCIPY_NUM_CYTHONIZE_JOBS. If not set, determined from the number of CPUs. Checks pyx files to see if they have been changed relative to their corresponding C files. If they have, then runs cython on these files to recreate the C files. The script thinks that the pyx files have changed relative to the C files by comparing hashes stored in a database file. Simple script to invoke Cython (and Tempita) on all .pyx (.pyx.in) files; while waiting for a proper build system. Uses file hashes to figure out if rebuild is needed. For now, this script should be run by developers when changing Cython files only, and the resulting C files checked in, so that end-users (and Python-only developers) do not get the Cython/Tempita dependencies. Originally written by Dag Sverre Seljebotn, and copied here from: https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py Note: this script does not check any of the dependent C libraries; it only operates on the Cython .pyx files. """ import os import re import sys import hashlib import subprocess from multiprocessing import cpu_count from multiprocessing.dummy import Pool, Lock from os.path import dirname, join HASH_FILE = 'cythonize.dat' DEFAULT_ROOT = 'scipy' # # Rules # def process_pyx(fromfile, tofile, cwd): try: from Cython.Compiler.Version import version as cython_version from scipy._lib import _pep440 # Try to find pyproject.toml pyproject_toml = join(dirname(__file__), '..', 'pyproject.toml') if not os.path.exists(pyproject_toml): raise ImportError() # Try to find the minimum version from pyproject.toml with open(pyproject_toml) as pt: for line in pt: if "cython" not in line.lower(): continue line = ''.join(line.split('=')[1:]) # get rid of "Cython>=" if ',<' in line: # There's an upper bound as well split_on = ',<' if ',<=' in line: split_on = ',<=' min_required_version, max_required_version = line.split(split_on) max_required_version, _ = max_required_version.split('"') else: min_required_version, _ = line.split('"') break else: raise ImportError() # Note: we only check lower bound, for upper bound we rely on pip # respecting pyproject.toml. Reason: we want to be able to build/test # with more recent Cython locally or on main, upper bound is for # sdist in a release. if _pep440.parse(cython_version) < _pep440.Version(min_required_version): raise Exception('Building SciPy requires Cython >= {}, found ' '{}'.format(min_required_version, cython_version)) except ImportError: pass flags = ['--fast-fail', '-3'] if tofile.endswith('.cxx'): flags += ['--cplus'] try: try: r = subprocess.call(['cython'] + flags + ["-o", tofile, fromfile], cwd=cwd) if r != 0: raise Exception('Cython failed') except OSError as e: # There are ways of installing Cython that don't result in a cython # executable on the path, see gh-2397. r = subprocess.call([sys.executable, '-c', 'import sys; from Cython.Compiler.Main import ' 'setuptools_main as main; sys.exit(main())'] + flags + ["-o", tofile, fromfile], cwd=cwd) if r != 0: raise Exception("Cython either isn't installed or it failed.") from e except OSError as e: raise OSError('Cython needs to be installed') from e def process_tempita_pyx(fromfile, tofile, cwd): try: try: from Cython import Tempita as tempita except ImportError: import tempita except ImportError as e: raise Exception('Building SciPy requires Tempita: ' 'pip install --user Tempita') from e with open(os.path.join(cwd, fromfile), mode='r') as f_in: template = f_in.read() pyxcontent = tempita.sub(template) assert fromfile.endswith('.pyx.in') pyxfile = fromfile[:-len('.in')] with open(os.path.join(cwd, pyxfile), "w", encoding='utf8') as f_out: f_out.write(pyxcontent) process_pyx(pyxfile, tofile, cwd) rules = { # fromext : function '.pyx': process_pyx, '.pyx.in': process_tempita_pyx } # # Hash db # def load_hashes(filename): # Return { filename : (sha1 of input, sha1 of output) } if os.path.isfile(filename): hashes = {} with open(filename, 'r') as f: for line in f: filename, inhash, outhash = line.split() if outhash == "None": outhash = None hashes[filename] = (inhash, outhash) else: hashes = {} return hashes def save_hashes(hash_db, filename): with open(filename, 'w') as f: for key, value in sorted(hash_db.items()): f.write("%s %s %s\n" % (key, value[0], value[1])) def sha1_of_file(filename): h = hashlib.sha1() with open(filename, "rb") as f: h.update(f.read()) return h.hexdigest() # # Main program # def normpath(path): path = path.replace(os.sep, '/') if path.startswith('./'): path = path[2:] return path def get_hash(frompath, topath): from_hash = sha1_of_file(frompath) if topath: to_hash = sha1_of_file(topath) if os.path.exists(topath) else None else: to_hash = None return (from_hash, to_hash) def get_cython_dependencies(fullfrompath): fullfromdir = os.path.dirname(fullfrompath) deps = set() with open(fullfrompath, 'r') as f: pxipattern = re.compile(r'include "([a-zA-Z0-9_]+\.pxi)"') pxdpattern1 = re.compile(r'from \. cimport ([a-zA-Z0-9_]+)') pxdpattern2 = re.compile(r'from \.([a-zA-Z0-9_]+) cimport') for line in f: m = pxipattern.match(line) if m: deps.add(os.path.join(fullfromdir, m.group(1))) m = pxdpattern1.match(line) if m: deps.add(os.path.join(fullfromdir, m.group(1) + '.pxd')) m = pxdpattern2.match(line) if m: deps.add(os.path.join(fullfromdir, m.group(1) + '.pxd')) return list(deps) def process(path, fromfile, tofile, processor_function, hash_db, dep_hashes, lock): with lock: fullfrompath = os.path.join(path, fromfile) fulltopath = os.path.join(path, tofile) current_hash = get_hash(fullfrompath, fulltopath) if current_hash == hash_db.get(normpath(fullfrompath), None): file_changed = False else: file_changed = True deps_changed = False deps = get_cython_dependencies(fullfrompath) for dep in deps: dep_hash = get_hash(dep, None) if dep_hash == hash_db.get(normpath(dep), None): continue else: dep_hashes[normpath(dep)] = dep_hash deps_changed = True if not file_changed and not deps_changed: print('%s has not changed' % fullfrompath) sys.stdout.flush() return print('Processing %s' % fullfrompath) sys.stdout.flush() processor_function(fromfile, tofile, cwd=path) with lock: # changed target file, recompute hash current_hash = get_hash(fullfrompath, fulltopath) # store hash in db hash_db[normpath(fullfrompath)] = current_hash def process_generate_pyx(path, lock): with lock: print('Running {}'.format(path)) ret = subprocess.call([sys.executable, path]) with lock: if ret != 0: raise RuntimeError("Running {} failed".format(path)) def find_process_files(root_dir): lock = Lock() try: num_proc = int(os.environ.get('SCIPY_NUM_CYTHONIZE_JOBS', cpu_count())) pool = Pool(processes=num_proc) except ImportError as e: # Allow building (single-threaded) on GNU/Hurd, which does not # support semaphores so Pool cannot initialize. pool = type('', (), {'imap_unordered': lambda self, func, iterable: map(func, iterable)})() except ValueError: pool = Pool() hash_db = load_hashes(HASH_FILE) # Keep changed pxi/pxd hashes in a separate dict until the end # because if we update hash_db and multiple files include the same # .pxi file the changes won't be detected. dep_hashes = {} # Run any _generate_pyx.py scripts jobs = [] for cur_dir, dirs, files in os.walk(root_dir): generate_pyx = os.path.join(cur_dir, '_generate_pyx.py') if os.path.exists(generate_pyx): jobs.append(generate_pyx) for result in pool.imap_unordered(lambda fn: process_generate_pyx(fn, lock), jobs): pass # Process pyx files jobs = [] for cur_dir, dirs, files in os.walk(root_dir): for filename in files: in_file = os.path.join(cur_dir, filename + ".in") if filename.endswith('.pyx') and os.path.isfile(in_file): continue for fromext, function in rules.items(): if filename.endswith(fromext): toext = ".c" with open(os.path.join(cur_dir, filename), 'rb') as f: data = f.read() m = re.search(br"^\s*#\s*distutils:\s*language\s*=\s*c\+\+\s*$", data, re.I | re.M) if m: toext = ".cxx" fromfile = filename tofile = filename[:-len(fromext)] + toext jobs.append((cur_dir, fromfile, tofile, function, hash_db, dep_hashes, lock)) for result in pool.imap_unordered(lambda args: process(*args), jobs): pass hash_db.update(dep_hashes) save_hashes(hash_db, HASH_FILE) def main(): try: root_dir = sys.argv[1] except IndexError: root_dir = DEFAULT_ROOT find_process_files(root_dir) if __name__ == '__main__': main()