Source code for pyannote.core.feature

#!/usr/bin/env python
# encoding: utf-8

# The MIT License (MIT)

# Copyright (c) 2014-2019 CNRS

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

# AUTHORS
# Hervé BREDIN - http://herve.niderb.fr


"""
########
Features
########

See :class:`pyannote.core.SlidingWindowFeature` for the complete reference.
"""
import numbers
import warnings
from typing import Tuple, Optional, Union, Iterator, List, Text

import numpy as np

from pyannote.core.utils.types import Alignment
from .segment import Segment
from .segment import SlidingWindow
from .timeline import Timeline


[docs]class SlidingWindowFeature(np.lib.mixins.NDArrayOperatorsMixin): """Periodic feature vectors Parameters ---------- data : (n_frames, n_features) numpy array sliding_window : SlidingWindow labels : list, optional Textual description of each dimension. """
[docs] def __init__( self, data: np.ndarray, sliding_window: SlidingWindow, labels: List[Text] = None ): self.sliding_window: SlidingWindow = sliding_window self.data = data self.labels = labels self.__i: int = -1
[docs] def __len__(self): """Number of feature vectors""" return self.data.shape[0]
@property def extent(self): return self.sliding_window.range_to_segment(0, len(self)) @property def dimension(self): """Dimension of feature vectors""" return self.data.shape[1] def getNumber(self): warnings.warn("This is deprecated in favor of `__len__`", DeprecationWarning) return self.data.shape[0] def getDimension(self): warnings.warn( "This is deprecated in favor of `dimension` property", DeprecationWarning ) return self.dimension def getExtent(self): warnings.warn( "This is deprecated in favor of `extent` property", DeprecationWarning ) return self.extent
[docs] def __getitem__(self, i: int) -> np.ndarray: """Get ith feature vector""" return self.data[i]
def __iter__(self): self.__i = -1 return self def __next__(self) -> Tuple[Segment, np.ndarray]: self.__i += 1 try: return self.sliding_window[self.__i], self.data[self.__i] except IndexError as e: raise StopIteration() def next(self): return self.__next__()
[docs] def iterfeatures( self, window: Optional[bool] = False ) -> Iterator[Union[Tuple[np.ndarray, Segment], np.ndarray]]: """Feature vector iterator Parameters ---------- window : bool, optional When True, yield both feature vector and corresponding window. Default is to only yield feature vector """ n_samples = self.data.shape[0] for i in range(n_samples): if window: yield self.data[i], self.sliding_window[i] else: yield self.data[i]
[docs] def crop( self, focus: Union[Segment, Timeline], mode: Alignment = "loose", fixed: Optional[float] = None, return_data: bool = True, ) -> Union[np.ndarray, "SlidingWindowFeature"]: """Extract frames Parameters ---------- focus : Segment or Timeline mode : {'loose', 'strict', 'center'}, optional In 'strict' mode, only frames fully included in 'focus' support are returned. In 'loose' mode, any intersecting frames are returned. In 'center' mode, first and last frames are chosen to be the ones whose centers are the closest to 'focus' start and end times. Defaults to 'loose'. fixed : float, optional Overrides `Segment` 'focus' duration and ensures that the number of returned frames is fixed (which might otherwise not be the case because of rounding errors). return_data : bool, optional Return a numpy array (default). For `Segment` 'focus', setting it to False will return a `SlidingWindowFeature` instance. Returns ------- data : `numpy.ndarray` or `SlidingWindowFeature` Frame features. See also -------- SlidingWindow.crop """ if (not return_data) and (not isinstance(focus, Segment)): msg = ( '"focus" must be a "Segment" instance when "return_data"' "is set to False." ) raise ValueError(msg) if (not return_data) and (fixed is not None): msg = '"fixed" cannot be set when "return_data" is set to False.' raise ValueError(msg) ranges = self.sliding_window.crop( focus, mode=mode, fixed=fixed, return_ranges=True ) # total number of samples in features n_samples = self.data.shape[0] # 1 for vector features (e.g. MFCC in pyannote.audio) # 2 for matrix features (e.g. grey-level frames in pyannote.video) # 3 for 3rd order tensor (e.g. RBG frames in pyannote.video) n_dimensions = len(self.data.shape) - 1 # clip ranges clipped_ranges, repeat_first, repeat_last = [], 0, 0 for start, end in ranges: # count number of requested samples before first sample repeat_first += min(end, 0) - min(start, 0) # count number of requested samples after last sample repeat_last += max(end, n_samples) - max(start, n_samples) # if all requested samples are out of bounds, skip if end < 0 or start >= n_samples: continue else: # keep track of non-empty clipped ranges clipped_ranges += [[max(start, 0), min(end, n_samples)]] if clipped_ranges: data = np.vstack([self.data[start:end, :] for start, end in clipped_ranges]) else: # if all ranges are out of bounds, just return empty data shape = (0,) + self.data.shape[1:] data = np.empty(shape) # corner case when "fixed" duration cropping is requested: # correct number of samples even with out-of-bounds indices if fixed is not None: data = np.vstack( [ # repeat first sample as many times as needed np.tile(self.data[0], (repeat_first,) + (1,) * n_dimensions), data, # repeat last sample as many times as needed np.tile( self.data[n_samples - 1], (repeat_last,) + (1,) * n_dimensions ), ] ) # return data if return_data: return data # wrap data in a SlidingWindowFeature and return sliding_window = SlidingWindow( start=self.sliding_window[clipped_ranges[0][0]].start, duration=self.sliding_window.duration, step=self.sliding_window.step, ) return SlidingWindowFeature(data, sliding_window, labels=self.labels)
def _repr_png_(self): from .notebook import MATPLOTLIB_IS_AVAILABLE, MATPLOTLIB_WARNING if not MATPLOTLIB_IS_AVAILABLE: warnings.warn(MATPLOTLIB_WARNING.format(klass=self.__class__.__name__)) return None from .notebook import repr_feature return repr_feature(self) _HANDLED_TYPES = (np.ndarray, numbers.Number) def __array__(self) -> np.ndarray: return self.data def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): out = kwargs.get("out", ()) for x in inputs + out: # Only support operations with instances of _HANDLED_TYPES. # Use SlidingWindowFeature instead of type(self) for isinstance to # allow subclasses that don't override __array_ufunc__ to # handle SlidingWindowFeature objects. if not isinstance(x, self._HANDLED_TYPES + (SlidingWindowFeature,)): return NotImplemented # Defer to the implementation of the ufunc on unwrapped values. inputs = tuple( x.data if isinstance(x, SlidingWindowFeature) else x for x in inputs ) if out: kwargs["out"] = tuple( x.data if isinstance(x, SlidingWindowFeature) else x for x in out ) data = getattr(ufunc, method)(*inputs, **kwargs) if type(data) is tuple: # multiple return values return tuple( type(self)(x, self.sliding_window, labels=self.labels) for x in data ) elif method == "at": # no return value return None else: # one return value return type(self)(data, self.sliding_window, labels=self.labels)
[docs] def align(self, to: "SlidingWindowFeature") -> "SlidingWindowFeature": """Align features by linear temporal interpolation Parameters ---------- to : SlidingWindowFeature Features to align with. Returns ------- aligned : SlidingWindowFeature Aligned features """ old_start = self.sliding_window.start old_step = self.sliding_window.step old_duration = self.sliding_window.duration old_samples = len(self) old_t = old_start + 0.5 * old_duration + np.arange(old_samples) * old_step new_start = to.sliding_window.start new_step = to.sliding_window.step new_duration = to.sliding_window.duration new_samples = len(to) new_t = new_start + 0.5 * new_duration + np.arange(new_samples) * new_step new_data = np.hstack( [ np.interp(new_t, old_t, old_data)[:, np.newaxis] for old_data in self.data.T ] ) return SlidingWindowFeature(new_data, to.sliding_window, labels=self.labels)
if __name__ == "__main__": import doctest doctest.testmod()