Source code for maelzel.snd.features

from __future__ import annotations
import numpy as np

from pitchtools import db2amp, amp2db

from maelzel.snd import _common
from maelzel.snd.numpysnd import numChannels, rmsBpf

from typing import TYPE_CHECKING
if TYPE_CHECKING:
    import csoundengine
    import csoundengine.synth
    import bpf4


# def onsetsAubio(samples: np.ndarray,
#                 sr: int,
#                 method='mkl',
#                 winsize=1024,
#                 hopsize=512,
#                 threshold=0.03,
#                 mingap=0.050,
#                 silencedb=-70
#                 ) -> list[float]:
#     """
#     Detect onsets in samples

#     Args:
#         samples: the samples, as numpy array (1D), between -1 and 1
#         sr: the sample rate of samples
#         winsize: the size of the fft window size, in samples
#         hopsize: the hop size, in samples
#         threshold: depends on the method. The lower this value, the more probable
#             is it that an onset is detected
#         method: the method to detect onsets. One of:
#             - `energy`: local energy,
#             - `hfc`: high frequency content,
#             - `complex`: complex domain,
#             - `phase`: phase-based method,
#             - `wphase`: weighted phase deviation,
#             - `specdiff`: spectral difference,
#             - `kl`: Kullback-Liebler,
#             - `mkl`: modified Kullback-Liebler,
#             - `specflux`: spectral flux.
#         mingap: the min. amount of time (in seconds) between two onsets
#         silencedb: onsets will only be detected if the amplitude exceeds this value (in dB)

#     Returns:
#         a list of floats, representing the times of the onsets
#     """
#     assert isinstance(samples, np.ndarray) and len(samples.shape) == 1
#     try:
#         import aubio
#     except ImportError:
#         raise ImportError("aubio (https://github.com/aubio/aubio) is needed for this "
#                           "functionality")
#     ao = aubio.onset(method, buf_size=winsize, hop_size=hopsize)
#     ao.set_threshold(threshold)
#     ao.set_silence(silencedb)
#     ao.set_minioi_s(mingap)
#     samples = samples.astype('float32')
#     onsets = [ao.get_last()/sr for chunk in chunks(samples, hopsize, padwith=0.0)
#               if ao(chunk)]
#     return onsets


[docs] def playTicks(times: list[float] | np.ndarray, engine: csoundengine.Engine | None = None, chan=1, midinote: int|float|list[float] = 69, amp=0.5, attack=0.01, decay=0.05, sustain=0.5, release=0.100, extraLatency=0. ) -> csoundengine.synth.SynthGroup: """ Given a list of times offsets, play these as ticks Args: times: a list of time offsets chan: which channel to play the ticks to midinote: the pitch of the ticks or a list of pitches, one for each tick amp: the amplitude of the ticks attack: attack duration decay: decay duration sustain: sustain amplitude release: release dur. engine: a csoundengine.Engine used for playback. If not given a new engine is created extraLatency: extra latency added to playback Returns: a csoundengine.SynthGroup, which constrols playback. Examples ~~~~~~~~ >>> import sndfileio >>> samples, info = sndfileio.sndget("/path/to/sound.wav") >>> onsets = onsetsAubio(samples, info.sr) >>> synthgroup = playTicks(onsets) # if needed to stop the playback at any moment: >>> synthgroup.stop() """ if engine is None: engine = _common.getEngine() session = engine.session() instr = session.defInstr("features.tick", body=r""" |iPitch, iAmp, iAtt, iDec, iSust, iRel, iChan| iFreq mtof iPitch a0 vco2 iAmp, iFreq, 12 ; triangular shape aenv adsr iAtt, iDec, iSust, iRel outch iChan, a0*aenv """, priority=1) dur = attack + decay + release if isinstance(midinote, (int, float)): midinotes = [midinote] * len(times) else: midinotes = midinote with engine.lockedClock(): synths = [] for time, pitch in zip(times, midinotes): args = dict(iPitch=pitch, iAmp=amp, iAtt=attack, iDec=decay, iSust=sustain, iRel=release, iChan=chan) synths.append(session.sched(instr.name, delay=time+extraLatency, dur=dur, args=args)) # type: ignore import csoundengine.synth return csoundengine.synth.SynthGroup(synths)
[docs] def onsets(samples: np.ndarray, sr: int, winsize=2048, hopsize=512, threshold=0.07, mingap=0.050, backtrack=False, ) -> tuple[np.ndarray, bpf4.BpfBase]: """ Detect onsets This is based on `rosita`, a minimal version of librosa with some fixes and simplifications (it avoids having to add numba as a dependency) The onset detection algorithm uses the variation in mel spectrum to calculate an onset strength in time. Peaks above the given threshold are detected as onsets. Args: samples: mono samples sr: sr winsize: the size of the fft window hopsize: samples to skip between windows threshold: the delta needed to trigger an onset mingap: min. time gap between onsets backtrack: detected onsets are backtracked to the nearest energy minimum Returns: a tuple (onset array, onset strength bpf) """ if (n := numChannels(samples)) != 1: raise ValueError(f"Only mono samples are accepted, but got {n} channels of audio") from maelzel.snd import rosita env = rosita.onset_strength(y=samples, sr=sr, hop_length=hopsize, n_fft=winsize) envtimes = rosita.times_like(env, sr=sr, hop_length=hopsize, n_fft=winsize) onsets = rosita.onset_detect(samples, sr, onset_envelope=env, hop_length=hopsize, units='time', delta=threshold, mingap=mingap, n_fft=winsize, backtrack=backtrack) import bpf4 onsetbpf = bpf4.Linear(envtimes, env) return onsets, onsetbpf
[docs] def plotOnsets(samples: np.ndarray, sr: int, onsets: np.ndarray, onsetbpf: bpf4.BpfBase | None = None, samplesgain=20, envalpha=0.8, samplesalpha=0.4, onsetsalpha=0.3, figsize: tuple[int, int] | None = None, offsets: np.ndarray | list[float] | None = None ) -> None: """ Plot the results of onsets detection Args: samples: the samples from which onsets were detected sr: the sr of samples onsets: the onsets as returned via onsetsRosita onsetbpf: the onsetbpf as returned via onsetsRosita samplesgain: a gain to apply to the samples for plotting envalpha: alpha channel for onsets strength samplesalpha: alpha channel for samples plot onsetsalpha: alpha channel for onsets offsets: if given, a region is plotted instead of a line. An offset of 0 indicates that the given onset has no offset and in this case also a line will be plotted Example ------- >>> from maelzel.snd.audiosample import Sample >>> from maelzel.snd import features >>> s = Sample("/path/to/sndfile.wav").getChannel(0) >>> onsets, onsetstrength = features.onsets(s.samples, s.sr) >>> features.plotOnsets(samples=s.samples, sr=s.sr, onsets=onsets, ... onsetbpf=onsetstrength) """ import matplotlib.pyplot as plt if figsize: plt.figure(figsize=figsize) if onsetbpf: xs, ys = onsetbpf.points() plt.plot(xs, ys, alpha=envalpha) duration = len(samples) / sr plt.plot(np.arange(0, duration, 1 / sr), samples ** 2 * samplesgain, alpha=samplesalpha, linewidth=1) if offsets: for onset, offset in zip(onsets, offsets): if offset > 0: plt.axvspan(xmin=onset, xmax=offset, alpha=onsetsalpha) else: plt.axvline(x=onset, ymin=0, alpha=onsetsalpha, linewidth=1) else: for onset in onsets: plt.axvline(x=onset, alpha=onsetsalpha, linewidth=1, ymin=0)
[docs] def filterOnsets(onsets: np.ndarray, samples: np.ndarray, sr: int, minampdb = -60, rmscurve: bpf4.BpfInterface | None = None, rmsperiod = 0.05, onsetStrengthBpf: bpf4.BpfInterface | None = None, ) -> np.ndarray: """ Returns a selection array where a value of 1 marks an onset as relevant The returned array can be used to remove superfluous onsets, based on secondary features (rms, ...) Args: onsets: the list of onsets samples: the samples for which these onsets where calculated sr: the sample rate of the samples minampdb: the min. amptliude of audio in order for an onset to be valid (in dB) rmsperiod: the period in seconds to use for calculating the RMS rmscurve: an rms curve can be given if it has been already calculated. onsetStrengthBpf: the onset strength as returned by the :func:`onsets` function Returns: a tuple (onsets selection array, rmscurve), where the array is of the same size of *onsets*. For each onset a value of 1 marks the onset as relevant, a value of 0 indicates that the onset might be superfluous. During the filtering a rms curve is calculated. This curve is returned to the user, who might use it later (for example, to calculate the offsets via :func:`findOffsets` Example ~~~~~~~ TODO!!! """ sel = np.ones_like(onsets, dtype=bool) if onsetStrengthBpf: before = onsets - 0.05 after = onsets + 0.15 sel *= onsetStrengthBpf.map(after) - onsetStrengthBpf.map(before) > -0.1 if rmsperiod > 0: if rmscurve is None: rmscurve = rmsBpf(samples, sr=sr, dt=rmsperiod, overlap=2) rmsdelay = rmsperiod * 2 sel *= rmscurve.map(onsets + rmsdelay) > db2amp(minampdb) return sel
[docs] def findOffsets(onsets: list[float] | np.ndarray, samples: np.ndarray, sr: int, rmscurve: bpf4.BpfInterface | None = None, silenceThreshold=-60, relativeThreshold: int = 90, rmsperiod=0.05, notfoundValue=-1 ) -> list[float]: """For each onset find its corresponding offset If no offset is found before the next onset, the corresponding offset time is set to be *notfoundValue* Args: onsets: the onset times samples: the samples for which the onsets where calculated sr: the samplerate of the samples rmscurve: if already calculated, can be passed here to avoid calculation rmsperiod: rms period used to calculate rms curve, only needed if not rms curve is being passed silenceThreshold: silence threshold in dB relativeThreshold: if the sound falls this amount of dB relative to the onset then it is also considered an offset (possitive dB) notfoundValue: the value used to indicate that no offset was found for a given onset, indicating that a new onset was found before the previous onset was allowed to decay into silence Returns: a list of offsets, one for each onset given. An offset is -1 if a new onset is found before any silence is found """ if rmscurve is None: rmscurve = rmsBpf(samples, sr, dt=rmsperiod, overlap=2) assert rmscurve is not None end = len(samples) / sr offsets = [] lasti = len(onsets) - 1 for i, onset in enumerate(onsets): nextonset = onsets[i + 1] if i < lasti else end rmsfragm = rmscurve[onset:nextonset] threshdb = max(amp2db(rmscurve(onset)) - relativeThreshold, silenceThreshold) thresh = db2amp(threshdb) intersect = rmsfragm - thresh zeros = intersect.zeros(maxzeros=1) if zeros: zero = zeros[0] assert onset < zero < nextonset, f'{onset=}, {zero=}, {nextonset=}' else: zero = notfoundValue offsets.append(zero) for i in range(len(onsets)-1): assert offsets[i] < 0 or (onsets[i] < offsets[i] < onsets[i+1]), f"{i=}, {onsets[i]=}, {offsets[i]=}, {offsets[i+1]=}" return offsets
[docs] def voicedness(samples: np.ndarray, sr: int, fftsize: int = 2048, overlap: int = 4, winsize: int = 0, minfreq: int = 50, maxfreq: int = 16000, magsumbins: int = 10, window='hann') -> dict[str, np.ndarray]: dur = len(samples) / sr winsize = winsize or fftsize hopsize = min(winsize, fftsize) // overlap numdata = (len(samples) - fftsize + hopsize) // hopsize wintype = { 'hamming': 0, 'hann': 1, 'kaiser': 3 }.get(window) if wintype is None: raise ValueError(f"Expected one of 'hamming', 'hann', 'kaiser', got '{window}'") orc = r''' instr analysis |iaudiotab, isr, itimestab, iflatnesstab, icresttab, ipeakynesstab, iminfreq=50, imaxfreq=16000, imagsumbins=10, ifftsize, iwinsize, ihopsize, iwintype=0| inumsamps = ftlen(iaudiotab) kcount init 0 ktime = eventtime() idatasize = ftlen(iflatnesstab) asig poscil3 1, isr/inumsamps, iaudiotab fsig = pvsanal(asig, ifftsize, ihopsize, iwinsize, iwintype) kcrest = pvscrest(fsig, iminfreq, imaxfreq) kflatness = pvsflatness(fsig) kmagsum0 = pvsmagsum(fsig, iminfreq, imaxfreq) kmagsum = pvsmagsumn(fsig, imagsumbins, iminfreq, imaxfreq) kpeakyness = kmagsum0 == 0 ? 0 : kmagsum / kmagsum0 kframe = pvsframecount(fsig) if changed(kframe) then tabw ktime, kcount, itimestab tabw kflatness, kcount, iflatnesstab tabw kcrest, kcount, icresttab tabw kpeakyness, kcount, ipeakynesstab endif if kcount == idatasize then turnoff endif chnset kcount, "count" kcount += 1 endin ''' from csoundengine.offline import OfflineEngine engine = OfflineEngine(sr=sr, numAudioBuses=0, numControlBuses=0, commandlineOptions=['--nosound']) # TODO audiotab = engine.makeTable(samples, sr=sr) timestab = engine.makeEmptyTable(numdata) flatnesstab = engine.makeEmptyTable(numdata) cresttab = engine.makeEmptyTable(numdata) peakynesstab = engine.makeEmptyTable(numdata) engine.compile(orc) engine.sched('analysis', 0, dur=dur, args=[audiotab, sr, flatnesstab, cresttab, peakynesstab, minfreq, maxfreq, magsumbins, fftsize, winsize, hopsize, wintype]) engine.perform() count = engine.getControlChannel("count") assert count > 0 flatnessdata = engine.getTableData(flatnesstab).copy() crestdata = engine.getTableData(cresttab).copy() peakynessdata = engine.getTableData(peakynesstab).copy() times = engine.getTableData(timestab).copy() engine.stop() return {'times': times, 'flatness': flatnessdata, 'crest': crestdata, 'peakyness': peakynessdata}
[docs] def centroidBpf(samples: np.ndarray, sr: int, fftsize: int = 2048, overlap: int = 4, winsize: int = 0, window='hann' ) -> bpf4.Sampled: """ Construct a bpf representing the centroid of the given audio over time Args: samples: a 1D numpy array representing a mono audio fragment sr: the sampling rate fftsize: the fft size overlap: amount of overlap winsize: the size of the window. If not given then winsize is assumed to be the same as fftsize. if given it must be <= fftsize window: kind of window Returns: a bpf representing the centroid over time """ from maelzel.snd import rosita if len(samples.shape) > 1: raise ValueError("Only mono samples are supported") winsize = winsize or fftsize hopsize = winsize // overlap frames = rosita.spectral_centroid(y=samples, sr=sr, n_fft=fftsize, hop_length=hopsize, win_length=winsize, window=window) import bpf4 return bpf4.Sampled(frames[0], x0=0, dx=hopsize/sr)