Source code for mmf.utils.myvectorize

r"""New version of numpy.vectorize to support kwargs."""

import warnings
import types
import sys
import numpy.core.numeric as _nx
from numpy.core import linspace
from numpy.core.numeric import ones, zeros, arange, concatenate, array, \
        asarray, asanyarray, empty, empty_like, ndarray, around
from numpy.core.numeric import ScalarType, dot, where, newaxis, intp, \
        integer, isscalar
from numpy.core.umath import pi, multiply, add, arctan2,  \
        frompyfunc, isnan, cos, less_equal, sqrt, sin, mod, exp, log10
from numpy.core.fromnumeric import ravel, nonzero, choose, sort, mean
from numpy.core.numerictypes import typecodes, number
from numpy.core import atleast_1d, atleast_2d
from numpy.lib.twodim_base import diag
#from _compiled_base import _insert, add_docstring
#from _compiled_base import digitize, bincount, interp as compiled_interp
#from arraysetops import setdiff1d
#from utils import deprecate
#from _compiled_base import add_newdoc_ufunc
import numpy as np

from numpy.lib.function_base import *

[docs]class vectorize(object): """ vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False) Generalized function class. Define a vectorized function which takes a nested sequence of objects or numpy arrays as inputs and returns a numpy array as output. The vectorized function evaluates `pyfunc` over successive tuples of the input arrays like the python map function, except it uses the broadcasting rules of numpy. The data type of the output of `vectorized` is determined by calling the function with the first element of the input. This can be avoided by specifying the `otypes` argument. Parameters ---------- pyfunc : callable A python function or method. otypes : str or list of dtypes, optional The output data type. It must be specified as either a string of typecode characters or a list of data type specifiers. There should be one data type specifier for each output. doc : str, optional The docstring for the function. If `None`, the docstring will be the ``pyfunc.__doc__``. excluded : set, optional Set of strings or integers representing the positional or keyword arguments for which the function will not be vectorized. These will be passed directly to `pyfunc` unmodified. .. versionadded:: 1.7.0 cache : bool, optional If `True`, then cache the first function call that determines the number of outputs if `otypes` is not provided. .. versionadded:: 1.7.0 Returns ------- vectorized : callable Vectorized function. Examples -------- >>> def myfunc(a, b): ... "Return a-b if a>b, otherwise return a+b" ... if a > b: ... return a - b ... else: ... return a + b >>> vfunc = np.vectorize(myfunc) >>> vfunc([1, 2, 3, 4], 2) array([3, 4, 1, 2]) The docstring is taken from the input function to `vectorize` unless it is specified >>> vfunc.__doc__ 'Return a-b if a>b, otherwise return a+b' >>> vfunc = np.vectorize(myfunc, doc='Vectorized `myfunc`') >>> vfunc.__doc__ 'Vectorized `myfunc`' The output type is determined by evaluating the first element of the input, unless it is specified >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) <type 'numpy.int32'> >>> vfunc = np.vectorize(myfunc, otypes=[np.float]) >>> out = vfunc([1, 2, 3, 4], 2) >>> type(out[0]) <type 'numpy.float64'> The `excluded` argument can be used to prevent vectorizing over certain arguments. This can be useful for array-like arguments of a fixed length such as the coefficients for a polynomial as in `polyval`: >>> def mypolyval(p, x): ... _p = list(p) ... res = _p.pop(0) ... while _p: ... res = res*x + _p.pop(0) ... return res >>> vpolyval = np.vectorize(mypolyval, excluded=['p']) >>> vpolyval(p=[1, 2, 3], x=[0, 1]) array([3, 6]) Positional arguments may also be excluded by specifying their position: >>> vpolyval.excluded.add(0) >>> vpolyval([1, 2, 3], x=[0, 1]) array([3, 6]) Notes ----- The `vectorize` function is provided primarily for convenience, not for performance. The implementation is essentially a for loop. If `otypes` is not specified, then a call to the function with the first argument will be used to determine the number of outputs. The results of this call will be cached if `cache` is `True` to prevent calling the function twice. However, to implement the cache, the original function must be wrapped which will slow down subsequent calls, so only do this if your function is expensive. The new keyword argument interface and `excluded` argument support further degrades performance. """
[docs] def __init__(self, pyfunc, otypes='', doc=None, excluded=None, cache=False): self.pyfunc = pyfunc self.cache = cache if doc is None: self.__doc__ = pyfunc.__doc__ else: self.__doc__ = doc if isinstance(otypes, str): self.otypes = otypes for char in self.otypes: if char not in typecodes['All']: raise ValueError("Invalid otype specified: %s" % (char,)) elif iterable(otypes): self.otypes = ''.join([_nx.dtype(x).char for x in otypes]) else: raise ValueError("Invalid otype specification") # Excluded variable support if excluded is None: excluded = set() self.excluded = set(excluded) if self.otypes and not self.excluded: self._ufunc = None # Caching to improve default performance
def __call__(self, *args, **kwargs): """ Return arrays with the results of `pyfunc` broadcast (vectorized) over `args` and `kwargs` not in `excluded`. """ excluded = self.excluded if not kwargs and not excluded: func = self.pyfunc vargs = args else: # The wrapper accepts only positional arguments: we use `names` and # `inds` to mutate `the_args` and `kwargs` to pass to the original # function. nargs = len(args) names = [_n for _n in kwargs if _n not in excluded] inds = [_i for _i in range(nargs) if _i not in excluded] the_args = list(args) def func(*vargs): for _n, _i in enumerate(inds): the_args[_i] = vargs[_n] kwargs.update(zip(names, vargs[len(inds):])) return self.pyfunc(*the_args, **kwargs) vargs = [args[_i] for _i in inds] vargs.extend([kwargs[_n] for _n in names]) return self._vectorize_call(func=func, args=vargs) def _get_ufunc_and_otypes(self, func, args): """Return (ufunc, otypes).""" # frompyfunc will fail if args is empty assert args if self.otypes: otypes = self.otypes nout = len(otypes) # Note logic here: We only *use* self._ufunc if func is self.pyfunc # even though we set self._ufunc regardless. if func is self.pyfunc and self._ufunc is not None: ufunc = self._ufunc else: ufunc = self._ufunc = frompyfunc(func, len(args), nout) else: # Get number of outputs and output types by calling the function on # the first entries of args. We also cache the result to prevent # the subsequent call when the ufunc is evaluated. # Assumes that ufunc first evaluates the 0th elements in the input # arrays (the input values are not checked to ensure this) inputs = [asarray(_a).flat[0] for _a in args] outputs = func(*inputs) # Performance note: profiling indicates that -- for simple functions # at least -- this wrapping can almost double the execution time. # Hence we make it optional. if self.cache: _cache = [outputs] def _func(*vargs): if _cache: return _cache.pop() else: return func(*vargs) else: _func = func if isinstance(outputs, tuple): nout = len(outputs) else: nout = 1 outputs = (outputs,) otypes = ''.join([asarray(outputs[_k]).dtype.char for _k in range(nout)]) # Performance note: profiling indicates that creating the ufunc is # not a significant cost compared with wrapping so it seems not # worth trying to cache this. ufunc = frompyfunc(_func, len(args), nout) return ufunc, otypes def _vectorize_call(self, func, args): """Vectorized call to `func` over positional `args`.""" if not args: _res = func() else: ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args) # Convert args to object arrays first inputs = [array(_a, copy=False, subok=True, dtype=object) for _a in args] outputs = ufunc(*inputs) if ufunc.nout == 1: _res = array(outputs, copy=False, subok=True, dtype=otypes[0]) else: _res = tuple([array(_x, copy=False, subok=True, dtype=_t) for _x, _t in zip(outputs, otypes)]) return _res

Project Versions