/* * sparsetools.cxx * * Python module wrapping the sparsetools C++ routines. * * Each C++ routine is templated vs. an integer (I) and a data (T) parameter. * The `generate_sparsetools.py` script generates `*_impl.h` headers * that contain thunk functions with a datatype-based switch statement calling * each templated instantiation. * * `generate_sparsetools.py` also generates a PyMethodDef list of Python * routines and the corresponding functions call the thunk functions via * `call_thunk`. * * The `call_thunk` function below determines the templated I and T data types * based on the Python arguments. It then allocates arrays with pointers to * the raw data, with appropriate types, and calls the thunk function after * that. * * The types of arguments are specified by a "spec". This is given in a format * where one character represents one argument. The one-character values are * listed below in the call_spec function. */ #define PY_ARRAY_UNIQUE_SYMBOL _scipy_sparse_sparsetools_ARRAY_API #include #include #include #include #include #include "numpy/ndarrayobject.h" #include "sparsetools.h" #include "util.h" #define MAX_ARGS 16 #if NPY_API_VERSION >= 0x0000000c #define HAVE_WRITEBACKIFCOPY #endif static const int supported_I_typenums[] = {NPY_INT32, NPY_INT64}; static const int n_supported_I_typenums = sizeof(supported_I_typenums) / sizeof(int); static const int supported_T_typenums[] = {NPY_BOOL, NPY_BYTE, NPY_UBYTE, NPY_SHORT, NPY_USHORT, NPY_INT, NPY_UINT, NPY_LONG, NPY_ULONG, NPY_LONGLONG, NPY_ULONGLONG, NPY_FLOAT, NPY_DOUBLE, NPY_LONGDOUBLE, NPY_CFLOAT, NPY_CDOUBLE, NPY_CLONGDOUBLE}; static const int n_supported_T_typenums = sizeof(supported_T_typenums) / sizeof(int); static PyObject *array_from_std_vector_and_free(int typenum, void *p); static void *allocate_std_vector_typenum(int typenum); static void free_std_vector_typenum(int typenum, void *p); static PyObject *c_array_from_object(PyObject *obj, int typenum, int is_output); /* * Call a thunk function, dealing with input and output arrays. * * Resolves the templated and dtypes from the `args` argument * list. * * Parameters * ---------- * ret_spec : {'i', 'v'} * Return value spec. 'i' for integer, 'v' for void. * spec * String whose each character specifies a types of an * argument: * * 'i': scalar * 'I': array * 'T': array * 'V': std::vector * 'W': std::vector * 'B': npy_bool array * '*': indicates that the next argument is an output argument * thunk : PY_LONG_LONG thunk(int I_typenum, int T_typenum, void **) * Thunk function to call. It is passed a void** array of pointers to * arguments, constructed according to `spec`. The types of data pointed * to by each element agree with I_typenum and T_typenum, or are bools. * args * Python tuple containing unprocessed arguments. * * Returns * ------- * return_value * The Python return value * */ NPY_VISIBILITY_HIDDEN PyObject * call_thunk(char ret_spec, const char *spec, thunk_t *thunk, PyObject *args) { void *arg_list[MAX_ARGS]; PyObject *arg_arrays[MAX_ARGS]; int is_output[MAX_ARGS]; PyObject *return_value = NULL; int I_typenum = NPY_INT32; int T_typenum = -1; int VW_count = 0; int I_in_arglist = 0; int T_in_arglist = 0; int next_is_output = 0; int j, k, arg_j; const char *p; PY_LONG_LONG ret; Py_ssize_t max_array_size = 0; NPY_BEGIN_THREADS_DEF; if (!PyTuple_Check(args)) { PyErr_SetString(PyExc_ValueError, "args is not a tuple"); return NULL; } for (j = 0; j < MAX_ARGS; ++j) { arg_list[j] = NULL; arg_arrays[j] = NULL; is_output[j] = 0; } /* * Detect data types in the signature */ arg_j = 0; j = 0; for (p = spec; *p != '\0'; ++p, ++j, ++arg_j) { const int *supported_typenums; int n_supported_typenums; int cur_typenum; PyObject *arg; if (j >= MAX_ARGS) { PyErr_SetString(PyExc_ValueError, "internal error: too many arguments in spec"); goto fail; } is_output[j] = next_is_output; next_is_output = 0; switch (*p) { case '*': next_is_output = 1; --j; --arg_j; continue; case 'i': case 'l': /* Integer scalars */ arg = PyTuple_GetItem(args, arg_j); if (arg == NULL) { goto fail; } Py_INCREF(arg); arg_arrays[j] = arg; continue; case 'I': /* Integer arrays */ supported_typenums = supported_I_typenums; n_supported_typenums = n_supported_I_typenums; cur_typenum = I_typenum; I_in_arglist = 1; break; case 'T': /* Data arrays */ supported_typenums = supported_T_typenums; n_supported_typenums = n_supported_T_typenums; cur_typenum = T_typenum; T_in_arglist = 1; break; case 'B': /* Boolean arrays */ arg = PyTuple_GetItem(args, arg_j); if (arg == NULL) { goto fail; } arg_arrays[j] = c_array_from_object(arg, NPY_BOOL, is_output[j]); if (arg_arrays[j] == NULL) { goto fail; } continue; case 'V': /* std::vector integer output array */ I_in_arglist = 1; --arg_j; VW_count += 1; continue; case 'W': /* std::vector data output array */ T_in_arglist = 1; --arg_j; VW_count += 1; continue; default: PyErr_SetString(PyExc_ValueError, "unknown character in spec"); goto fail; } arg = PyTuple_GetItem(args, arg_j); if (arg == NULL) { goto fail; } arg_arrays[j] = c_array_from_object(arg, -1, is_output[j]); if (arg_arrays[j] == NULL) { goto fail; } /* Find a compatible supported data type */ for (k = 0; k < n_supported_typenums; ++k) { if (PyArray_CanCastSafely(PyArray_TYPE((PyArrayObject *)arg_arrays[j]), supported_typenums[k]) && (cur_typenum == -1 || PyArray_CanCastSafely(cur_typenum, supported_typenums[k]))) { cur_typenum = supported_typenums[k]; break; } } if (k == n_supported_typenums) { PyErr_SetString(PyExc_ValueError, "unsupported data types in input"); goto fail; } if (*p == 'I') { I_typenum = cur_typenum; } else { T_typenum = cur_typenum; } } if (arg_j != PyTuple_Size(args)) { PyErr_SetString(PyExc_ValueError, "too many arguments"); goto fail; } if ((I_in_arglist && I_typenum == -1) || (T_in_arglist && T_typenum == -1)) { PyErr_SetString(PyExc_ValueError, "unsupported data types in input"); goto fail; } /* * Cast and extract argument arrays */ j = 0; for (p = spec; *p != '\0'; ++p, ++j) { PyObject *arg; int cur_typenum; if (*p == '*') { --j; continue; } else if (*p == 'i' || *p == 'l') { /* Integer scalars */ PY_LONG_LONG value; value = PyLong_AsLongLong(arg_arrays[j]); if (PyErr_Occurred()) { goto fail; } if ((*p == 'l' || PyArray_EquivTypenums(I_typenum, NPY_INT64)) && value == (npy_int64)value) { arg_list[j] = std::malloc(sizeof(npy_int64)); *(npy_int64*)arg_list[j] = (npy_int64)value; } else if (*p == 'i' && PyArray_EquivTypenums(I_typenum, NPY_INT32) && value == (npy_int32)value) { arg_list[j] = std::malloc(sizeof(npy_int32)); *(npy_int32*)arg_list[j] = (npy_int32)value; } else { PyErr_SetString(PyExc_ValueError, "could not convert integer scalar"); goto fail; } continue; } else if (*p == 'B') { /* Boolean arrays already cast */ } else if (*p == 'V') { arg_list[j] = allocate_std_vector_typenum(I_typenum); if (arg_list[j] == NULL) { goto fail; } continue; } else if (*p == 'W') { arg_list[j] = allocate_std_vector_typenum(T_typenum); if (arg_list[j] == NULL) { goto fail; } continue; } else { cur_typenum = (*p == 'I' || *p == 'i') ? I_typenum : T_typenum; /* Cast if necessary */ arg = arg_arrays[j]; if (PyArray_EquivTypenums(PyArray_TYPE((PyArrayObject *)arg), cur_typenum)) { /* No cast needed. */ } else if (!is_output[j] || PyArray_CanCastSafely(cur_typenum, PyArray_TYPE((PyArrayObject *)arg))) { /* Cast needed. Output arrays require safe cast back. */ arg_arrays[j] = c_array_from_object(arg, cur_typenum, is_output[j]); Py_DECREF(arg); if (arg_arrays[j] == NULL) { goto fail; } } else { /* Cast back into output array was not safe. */ PyErr_SetString(PyExc_ValueError, "Output dtype not compatible with inputs."); goto fail; } } /* Grab value */ arg_list[j] = PyArray_DATA((PyArrayObject *)arg_arrays[j]); /* Find maximum array size */ if (PyArray_SIZE((PyArrayObject *)arg_arrays[j]) > max_array_size) { max_array_size = PyArray_SIZE((PyArrayObject *)arg_arrays[j]); } } /* * Call thunk */ if (max_array_size > 100) { /* Threshold GIL release: it's not a free operation */ NPY_BEGIN_THREADS; } try { ret = thunk(I_typenum, T_typenum, arg_list); NPY_END_THREADS; } catch (const std::bad_alloc &e) { NPY_END_THREADS; PyErr_SetString(PyExc_MemoryError, e.what()); goto fail; } catch (const std::exception &e) { NPY_END_THREADS; PyErr_SetString(PyExc_RuntimeError, e.what()); goto fail; } /* * Generate return value; */ switch (ret_spec) { case 'i': case 'l': return_value = PyLong_FromLongLong(ret); break; case 'v': Py_INCREF(Py_None); return_value = Py_None; break; default: PyErr_SetString(PyExc_ValueError, "internal error: invalid return value spec"); } /* * Convert any std::vector output arrays to arrays */ if (VW_count > 0) { PyObject *new_ret; PyObject *old_ret = return_value; int pos; return_value = NULL; new_ret = PyTuple_New(VW_count + (old_ret == Py_None ? 0 : 1)); if (new_ret == NULL) { goto fail; } if (old_ret != Py_None) { PyTuple_SET_ITEM(new_ret, 0, old_ret); pos = 1; } else { Py_DECREF(old_ret); pos = 0; } j = 0; for (p = spec; *p != '\0'; ++p, ++j) { if (*p == '*') { --j; continue; } else if (*p == 'V' || *p == 'W') { PyObject *arg; if (*p == 'V') { arg = array_from_std_vector_and_free(I_typenum, arg_list[j]); } else { arg = array_from_std_vector_and_free(T_typenum, arg_list[j]); } arg_list[j] = NULL; if (arg == NULL) { Py_XDECREF(new_ret); goto fail; } PyTuple_SET_ITEM(new_ret, pos, arg); ++pos; } } return_value = new_ret; } fail: /* * Cleanup */ for (j = 0, p = spec; *p != '\0'; ++p, ++j) { if (*p == '*') { --j; continue; } #ifdef HAVE_WRITEBACKIFCOPY if (is_output[j] && arg_arrays[j] != NULL && PyArray_Check(arg_arrays[j])) { PyArray_ResolveWritebackIfCopy((PyArrayObject *)arg_arrays[j]); } #endif Py_XDECREF(arg_arrays[j]); if ((*p == 'i' || *p == 'l') && arg_list[j] != NULL) { std::free(arg_list[j]); } else if (*p == 'V' && arg_list[j] != NULL) { free_std_vector_typenum(I_typenum, arg_list[j]); } else if (*p == 'W' && arg_list[j] != NULL) { free_std_vector_typenum(T_typenum, arg_list[j]); } } return return_value; } /* * Helper functions for dealing with std::vector templated instantiation. */ static void *allocate_std_vector_typenum(int typenum) { #define PROCESS(ntype, ctype) \ if (PyArray_EquivTypenums(typenum, ntype)) { \ return (void*)(new std::vector()); \ } try { SPTOOLS_FOR_EACH_DATA_TYPE_CODE(PROCESS) } catch (std::exception &e) { /* failed */ } #undef PROCESS PyErr_SetString(PyExc_RuntimeError, "failed to allocate std::vector"); return NULL; } static void free_std_vector_typenum(int typenum, void *p) { #define PROCESS(ntype, ctype) \ if (PyArray_EquivTypenums(typenum, ntype)) { \ delete ((std::vector*)p); \ return; \ } SPTOOLS_FOR_EACH_DATA_TYPE_CODE(PROCESS) #undef PROCESS } static PyObject *array_from_std_vector_and_free(int typenum, void *p) { #define PROCESS(ntype, ctype) \ if (PyArray_EquivTypenums(typenum, ntype)) { \ std::vector *v = (std::vector*)p; \ npy_intp length = v->size(); \ PyObject *obj = PyArray_SimpleNew(1, &length, typenum); \ if (length > 0) { \ memcpy(PyArray_DATA((PyArrayObject *)obj), &((*v)[0]), \ sizeof(ctype)*length); \ } \ delete v; \ return obj; \ } SPTOOLS_FOR_EACH_DATA_TYPE_CODE(PROCESS) #undef PROCESS PyErr_SetString(PyExc_RuntimeError, "failed to convert std::vector output array"); return NULL; } static PyObject *c_array_from_object(PyObject *obj, int typenum, int is_output) { if (!is_output) { if (typenum == -1) { return PyArray_FROM_OF(obj, NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_NOTSWAPPED); } else { return PyArray_FROM_OTF(obj, typenum, NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_NOTSWAPPED); } } else { #ifdef HAVE_WRITEBACKIFCOPY int flags = NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_WRITEABLE|NPY_ARRAY_WRITEBACKIFCOPY|NPY_ARRAY_NOTSWAPPED; #else int flags = NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_WRITEABLE|NPY_UPDATEIFCOPY|NPY_ARRAY_NOTSWAPPED; #endif if (typenum == -1) { return PyArray_FROM_OF(obj, flags); } else { return PyArray_FROM_OTF(obj, typenum, flags); } } } /* * Python module initialization */ extern "C" { #include "sparsetools_impl.h" static struct PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_sparsetools", NULL, -1, sparsetools_methods, NULL, NULL, NULL, NULL }; PyMODINIT_FUNC PyInit__sparsetools(void) { import_array(); return PyModule_Create(&moduledef); } } /* extern "C" */