Source code for mmf.utils.myvectorize

r"""New version of numpy.vectorize to support kwargs."""

import warnings
import types
import sys
import numpy.core.numeric as _nx
from numpy.core import linspace
from numpy.core.numeric import ones, zeros, arange, concatenate, array, \
        asarray, asanyarray, empty, empty_like, ndarray, around
from numpy.core.numeric import ScalarType, dot, where, newaxis, intp, \
        integer, isscalar
from numpy.core.umath import pi, multiply, add, arctan2,  \
        frompyfunc, isnan, cos, less_equal, sqrt, sin, mod, exp, log10
from numpy.core.fromnumeric import ravel, nonzero, choose, sort, mean
from numpy.core.numerictypes import typecodes, number
from numpy.core import atleast_1d, atleast_2d
from numpy.lib.twodim_base import diag
#from _compiled_base import _insert, add_docstring
#from _compiled_base import digitize, bincount, interp as compiled_interp
#from arraysetops import setdiff1d
#from utils import deprecate
#from _compiled_base import add_newdoc_ufunc
import numpy as np

from numpy.lib.function_base import *

[docs]class vectorize(object):
    """
    vectorize(pyfunc, otypes='', doc=None, excluded=None, cache=False)

    Generalized function class.

    Define a vectorized function which takes a nested sequence
    of objects or numpy arrays as inputs and returns a
    numpy array as output. The vectorized function evaluates `pyfunc` over
    successive tuples of the input arrays like the python map function,
    except it uses the broadcasting rules of numpy.

    The data type of the output of `vectorized` is determined by calling
    the function with the first element of the input.  This can be avoided
    by specifying the `otypes` argument.

    Parameters
    ----------
    pyfunc : callable
        A python function or method.
    otypes : str or list of dtypes, optional
        The output data type. It must be specified as either a string of
        typecode characters or a list of data type specifiers. There should
        be one data type specifier for each output.
    doc : str, optional
        The docstring for the function. If `None`, the docstring will be the
        ``pyfunc.__doc__``.
    excluded : set, optional
        Set of strings or integers representing the positional or keyword
        arguments for which the function will not be vectorized.  These will be
        passed directly to `pyfunc` unmodified.
        
        .. versionadded:: 1.7.0
    
    cache : bool, optional
       If `True`, then cache the first function call that determines the number
       of outputs if `otypes` is not provided.

        .. versionadded:: 1.7.0

    Returns
    -------
    vectorized : callable
        Vectorized function.

    Examples
    --------
    >>> def myfunc(a, b):
    ...     "Return a-b if a>b, otherwise return a+b"
    ...     if a > b:
    ...         return a - b
    ...     else:
    ...         return a + b

    >>> vfunc = np.vectorize(myfunc)
    >>> vfunc([1, 2, 3, 4], 2)
    array([3, 4, 1, 2])

    The docstring is taken from the input function to `vectorize` unless it
    is specified

    >>> vfunc.__doc__
    'Return a-b if a>b, otherwise return a+b'
    >>> vfunc = np.vectorize(myfunc, doc='Vectorized `myfunc`')
    >>> vfunc.__doc__
    'Vectorized `myfunc`'

    The output type is determined by evaluating the first element of the input,
    unless it is specified

    >>> out = vfunc([1, 2, 3, 4], 2)
    >>> type(out[0])
    <type 'numpy.int32'>
    >>> vfunc = np.vectorize(myfunc, otypes=[np.float])
    >>> out = vfunc([1, 2, 3, 4], 2)
    >>> type(out[0])
    <type 'numpy.float64'>

    The `excluded` argument can be used to prevent vectorizing over certain
    arguments.  This can be useful for array-like arguments of a fixed length
    such as the coefficients for a polynomial as in `polyval`:

    >>> def mypolyval(p, x):
    ...     _p = list(p)
    ...     res = _p.pop(0)
    ...     while _p:
    ...         res = res*x + _p.pop(0)
    ...     return res
    >>> vpolyval = np.vectorize(mypolyval, excluded=['p'])
    >>> vpolyval(p=[1, 2, 3], x=[0, 1])
    array([3, 6])

    Positional arguments may also be excluded by specifying their position:

    >>> vpolyval.excluded.add(0)
    >>> vpolyval([1, 2, 3], x=[0, 1])
    array([3, 6])

    Notes
    -----
    The `vectorize` function is provided primarily for convenience, not for
    performance. The implementation is essentially a for loop.

    If `otypes` is not specified, then a call to the function with the first
    argument will be used to determine the number of outputs.  The results of
    this call will be cached if `cache` is `True` to prevent calling the
    function twice.  However, to implement the cache, the original function must
    be wrapped which will slow down subsequent calls, so only do this if your
    function is expensive.

    The new keyword argument interface and `excluded` argument support further
    degrades performance.
    """
[docs]    def __init__(self, pyfunc, otypes='', doc=None, excluded=None, cache=False):
        self.pyfunc = pyfunc
        self.cache = cache

        if doc is None:
            self.__doc__ = pyfunc.__doc__
        else:
            self.__doc__ = doc

        if isinstance(otypes, str):
            self.otypes = otypes
            for char in self.otypes:
                if char not in typecodes['All']:
                    raise ValueError("Invalid otype specified: %s" % (char,))
        elif iterable(otypes):
            self.otypes = ''.join([_nx.dtype(x).char for x in otypes])
        else:
            raise ValueError("Invalid otype specification")

        # Excluded variable support
        if excluded is None:
            excluded = set()
        self.excluded = set(excluded)

        if self.otypes and not self.excluded:
            self._ufunc = None      # Caching to improve default performance

    def __call__(self, *args, **kwargs):
        """
        Return arrays with the results of `pyfunc` broadcast (vectorized) over
        `args` and `kwargs` not in `excluded`.
        """
        excluded = self.excluded
        if not kwargs and not excluded:
            func = self.pyfunc
            vargs = args
        else:
            # The wrapper accepts only positional arguments: we use `names` and
            # `inds` to mutate `the_args` and `kwargs` to pass to the original
            # function.
            nargs = len(args)

            names = [_n for _n in kwargs if _n not in excluded]
            inds = [_i for _i in range(nargs) if _i not in excluded]
            the_args = list(args)
            def func(*vargs):
                for _n, _i in enumerate(inds):
                    the_args[_i] = vargs[_n] 
                kwargs.update(zip(names, vargs[len(inds):]))
                return self.pyfunc(*the_args, **kwargs)

            vargs = [args[_i] for _i in inds]
            vargs.extend([kwargs[_n] for _n in names])

        return self._vectorize_call(func=func, args=vargs)

    def _get_ufunc_and_otypes(self, func, args):
        """Return (ufunc, otypes)."""
        # frompyfunc will fail if args is empty
        assert args

        if self.otypes:
            otypes = self.otypes
            nout = len(otypes)

            # Note logic here: We only *use* self._ufunc if func is self.pyfunc
            # even though we set self._ufunc regardless.
            if func is self.pyfunc and self._ufunc is not None:
                ufunc = self._ufunc
            else:
                ufunc = self._ufunc = frompyfunc(func, len(args), nout)
        else:
            # Get number of outputs and output types by calling the function on
            # the first entries of args.  We also cache the result to prevent
            # the subsequent call when the ufunc is evaluated.
            # Assumes that ufunc first evaluates the 0th elements in the input
            # arrays (the input values are not checked to ensure this)
            inputs = [asarray(_a).flat[0] for _a in args]
            outputs = func(*inputs)

            # Performance note: profiling indicates that -- for simple functions
            # at least -- this wrapping can almost double the execution time.
            # Hence we make it optional.
            if self.cache:
                _cache = [outputs]
                def _func(*vargs):
                    if _cache:
                        return _cache.pop()
                    else:
                        return func(*vargs)
            else:
                _func = func

            if isinstance(outputs, tuple):
                nout = len(outputs)
            else:
                nout = 1
                outputs = (outputs,)

            otypes = ''.join([asarray(outputs[_k]).dtype.char
                              for _k in range(nout)])

            # Performance note: profiling indicates that creating the ufunc is
            # not a significant cost compared with wrapping so it seems not
            # worth trying to cache this.
            ufunc = frompyfunc(_func, len(args), nout)

        return ufunc, otypes
        
    def _vectorize_call(self, func, args):
        """Vectorized call to `func` over positional `args`."""
        if not args:
            _res = func()
        else:
            ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)

            # Convert args to object arrays first
            inputs = [array(_a, copy=False, subok=True, dtype=object) 
                      for _a in args]            

            outputs = ufunc(*inputs)

            if ufunc.nout == 1:
                _res = array(outputs,
                             copy=False, subok=True, dtype=otypes[0])
            else:
                _res = tuple([array(_x, copy=False, subok=True, dtype=_t)
                              for _x, _t in zip(outputs, otypes)])
        return _res
Navigation

Source code for mmf.utils.myvectorize

Project Versions

RTD Search

Quick search

Navigation