from __future__ import division
from collections.abc import Iterable, Mapping
from typing import Any, Callable, List, NoReturn, Optional, Tuple, Union
from joblib import effective_n_jobs
__all__ = ("CherryPicker",)
[docs]class CherryPicker(object):
"""
Reduces nestings of iterable and mappable objects into flat tables.
The CherryPicker class allows you to apply chained filter and extract
operations to an object with complex structure. All the cherry picker uses
to navigate your object is iterable and mapping interfaces. Anything
without either of those interfaces (or a string) is treated as a leaf node.
Each chained operation will return a new :class:`CherryPicker` which
wraps the resulting data from that operation. To get the wrapped data back,
use the :meth:`CherryPicker.get` method.
:param obj: The data to operate on.
:type obj: object.
:param on_missing: Action to perform when trying to get an attribute that
doesn't exist from an object with a Mapping interface. ``ignore``
will do nothing, ``raise`` will raise an :class:`AttributeError`.
:type on_missing: str, default = ``ignore``.
:param on_error: Action to perform if an error occurs during filtering.
``ignore`` will just mean the filter operation returns False, and
``raise`` will mean the error is raised.
:type on_error: str, default = ``ignore``
:param on_leaf: Action to perform when calling :meth:`__getitem__` on a
leaf node. ``raise`` will cause a
:class:`cherrypicker.exceptions.LeafError`` to be raised. ``get``
will return the result of :meth:`__getitem__` on the wrapped item.
:type on_leaf: str, default = ``raise``.
:param leaf_types: By default, anything doesn't have an Iterable or
Mapping interface will be treated as a leaf. Any classes specifed
in this parameter will also be treated as leaves regardless of any
interfaces they conform to. ``leaf_types`` may be a class, a method
that resolves to True if an object passed to it should be treated
as a leaf, or a tuple of classes/methods.
:param default: The item to return when extracting an attribute that does
not exist from an object.
:type default: object, default = None
:param n_jobs: The maximum number of parallel processes to run when
performing operations on iterable objects. If n_jobs > 1 then the
iterable will be processed in parallel batches. If n_jobs = -1, all
the CPUs are used. For n_jobs below -1, (n_cpus + 1 + n_jobs) are
used. Thus for n_jobs = -2, all CPUs but one are used. See
:class:`joblib.Parallel` for more details on this parameter.
:type n_jobs: int, default = None
:Examples:
Data extraction may be done with the getitem interface. Let's say we have a
list of objects and we want to get a flat list of the ``name`` attributes
for each item in the list:
>>> data = [ { 'name': 'Alice', 'age': 20}, { 'name': 'Bob', 'age': 30 } ]
>>> picker = CherryPicker(data)
>>> picker['name'].get()
['Alice', 'Bob']
We can also request multiple attributes for each item to produce a flat
table:
>>> data = [ { 'name': 'Alice', 'age': 20}, { 'name': 'Bob', 'age': 30 } ]
>>> picker = CherryPicker(data)
>>> picker['name', 'age'].get()
[['Alice', 20], ['Bob', 30]]
Filter operations are applied with parentheses. For example, to get every
``name`` attribute from each item in a list called ``data``:
>>> data = [ { 'name': 'Alice', 'age': 20}, { 'name': 'Bob', 'age': 30 } ]
>>> picker = CherryPicker(data)
>>> picker(name='Alice')['age'].get()
[30]
Multiple filters may be provided:
>>> data = [ { 'name': 'Alice', 'age': 20}, { 'name': 'Bob', 'age': 30 } ]
>>> picker = CherryPicker(data)
>>> picker(name='Alice' age=lambda x: x>10, how='any').get()
[{'name': 'Alice', 'age': 20}, {'name': 'Bob', 'age': 30}]
Filters can also be chained:
>>> data = [ { 'name': 'Alice', 'age': 20}, { 'name': 'Bob', 'age': 30 } ]
>>> picker = CherryPicker(data)
>>> picker(age=lambda x: x>10)(name='B*')['name'].get()
['Bob']
See :meth:`CherryPicker.filter` for more filtering options.
"""
_PRED_RULES = "all", "any"
_leaf_types = (str, bytes)
_leaf_funcs = tuple()
_opts = {
"on_missing": "ignore",
"on_error": "ignore",
"on_leaf": "raise",
"leaf_types": _leaf_types + _leaf_funcs,
"default": None,
"n_jobs": None,
}
_cherry_types = {}
def __new__(cls, obj, **kwargs) -> Any:
ccls = cls._get_cherry_class(obj)
picker = super(CherryPicker, cls).__new__(ccls)
return picker
def __eq__(self, other) -> Any:
return self._obj == other._obj
def __init__(
self,
obj,
on_missing=_opts["on_missing"],
on_error=_opts["on_error"],
on_leaf=_opts["on_leaf"],
leaf_types=_opts["leaf_types"],
default=_opts["default"],
n_jobs=_opts["n_jobs"],
) -> None:
# Anything that gets shared with children goes in here.
self._opts = {
"on_missing": on_missing,
"on_error": on_error,
"on_leaf": on_leaf,
"default": default,
"leaf_types": leaf_types,
"n_jobs": n_jobs,
}
# Properties that are unique to this instance.
self._repr = None
self._leaf_types, self._leaf_funcs = self._parse_leaf_types(leaf_types)
self._effective_n_jobs = effective_n_jobs(n_jobs)
self._parent = None
self._obj = obj
def __getattr__(self, attr) -> Any:
try:
return self.__getitem__(attr)
except KeyError:
raise AttributeError(
"'{}' object has no attribute '{}'".format(
self.__class__.__name__, attr
)
) from None
def _parse_leaf_types(
self, leaf_types: Optional[List[Union[type, Callable]]]
) -> Tuple[tuple, tuple]:
if leaf_types is None:
_leaf_types = tuple()
_leaf_funcs = tuple()
else:
try:
_leaf_types = tuple(
leaf for leaf in leaf_types if isinstance(leaf, type)
)
_leaf_funcs = tuple(
leaf for leaf in leaf_types if leaf not in _leaf_types
)
if any([not hasattr(func, "__call__") for func in _leaf_funcs]):
raise ValueError(
"leaf_types must only contain types and Callables."
)
except TypeError:
if isinstance(leaf_types, type):
_leaf_types = (leaf_types,)
_leaf_funcs = tuple()
elif hasattr(leaf_types, "__call__"):
_leaf_types = tuple()
_leaf_funcs = (leaf_types,)
else:
raise ValueError(
"leaf_types must only contain types and Callables."
)
return _leaf_types, _leaf_funcs
@classmethod
def _get_cherry_class(cls, obj, parent=None) -> Any:
ccls = None
if parent is None:
leaf_types = cls._leaf_types
leaf_funcs = cls._leaf_funcs
else:
leaf_types = parent._leaf_types
leaf_funcs = parent._leaf_funcs
if isinstance(obj, leaf_types):
ccls = cls._cherry_types["leaf"] # pytype: disable=key-error
elif len(leaf_funcs) > 0:
for func in leaf_funcs:
try:
if func(obj):
ccls = cls._cherry_types["leaf"] # pytype: disable=key-error
break
except:
# TODO: Should we warn, or have a user-defined action?
pass
if ccls is None:
if isinstance(obj, Mapping):
ccls = cls._cherry_types["mapping"] # pytype: disable=key-error
elif isinstance(obj, Iterable):
ccls = cls._cherry_types["iterable"] # pytype: disable=key-error
else:
ccls = cls._cherry_types["leaf"] # pytype: disable=key-error
return ccls
@classmethod
def register_cherry_type(cls, cherry, typ) -> None:
cls._cherry_types[cherry] = typ
@property
def is_leaf(self):
return False
@property
def parents(self):
"""
Alias for :meth:`.parent`.
"""
return self.parent
@property
def parent(self):
"""
Get the parent or iterable of parents.
"""
if self._parent is not None:
return self._parent
raise AttributeError("Root node has no parent.")
[docs] def get(self) -> Any:
"""
Obtain the original data that this object wraps.
"""
return self._obj
def keys(self, peek=5) -> NoReturn:
raise NotImplementedError()
def __getitem__(self, args) -> NoReturn:
raise NotImplementedError()