listeningpy.processing

This module contains functions for processing audio signals. It contains functions for basic processing, such as normalization, convolution, etc.

  1"""
  2This module contains functions for processing audio signals.
  3It contains functions for basic processing, such as normalization, convolution, etc.
  4"""
  5
  6import numpy.fft as fft
  7from numpy import ndarray, where, zeros_like
  8import scipy.signal as signal
  9import logging
 10import pyloudnorm as pyln
 11from listeningpy.normalization import (
 12    peak_normalize,
 13    rms_normalize,
 14    ir_sum_normalize,
 15    lufs_normalize
 16    )
 17from listeningpy.audiotools import audio_stats
 18
 19FILTERS = ['hp', 'lp']
 20
 21# logging.basicConfig(level=logging.DEBUG)
 22
 23### BASIC PROCESSING ###
 24
 25def straight(
 26        audio: ndarray, 
 27        fs: int,
 28        **kwargs
 29        ) -> tuple[ndarray, int]:
 30    '''Passes the audio without further processing.
 31    
 32    Parameters
 33    ----------
 34    audio : numpy.ndarray
 35        2-D audio array
 36
 37    Returns
 38    -------
 39    audio : numpy.ndarray
 40        2-D audio array
 41    '''
 42    return audio, fs
 43
 44def gain_adjustment(
 45        stimuli: ndarray,
 46        fs_stimuli: int,
 47        gain: float
 48        ) -> tuple[ndarray, int]:
 49    """Adjusts the gain of the stimuli.
 50
 51    This function applies a gain adjustment to the input stimuli based on the specified gain value.
 52    The gain adjustment is applied by multiplying the stimuli by a factor calculated from the gain value.
 53
 54    Parameters
 55    ----------
 56    stimuli : ndarray
 57        The input stimuli to be adjusted.
 58    fs_stimuli : int
 59        The sampling rate of the stimuli.
 60    gain : float
 61        The gain value in decibels (dB) to be applied.
 62
 63    Returns
 64    -------
 65    tuple[ndarray, int]
 66        A tuple containing the adjusted stimuli and the sampling rate.
 67
 68    """
 69    factor = 10**(gain/20)
 70    stimuli *= factor
 71    audio_stats_logging(stimuli, fs_stimuli)
 72    return stimuli, fs_stimuli
 73
 74def convolution(
 75        in1: ndarray,
 76        fs_in1: int,
 77        in2: ndarray,
 78        fs_in2: int,
 79        fade_out: bool=True,
 80        normalization: str='ir_sum',
 81        normalization_target: float=-6,
 82        normalization_prefilter: str='',
 83        prefilter_critical_freq = 200
 84        ) -> tuple[ndarray, int]:
 85    '''Performs convolution between IR and stimuli.
 86
 87    Should accept both mono and stereo signals, 
 88    but both in a form of 2D array.
 89    
 90    Parameters
 91    ----------
 92    in1 : numpy.ndarray
 93        2-D audio array (IR)
 94    fs_in1 : int
 95        IR sampling frequency
 96    in2 : numpy.ndarray
 97        2-D audio array (stimulus)
 98    fs_in2 : int
 99        sampling frequency of stimuli
100    fade_out : bool, optional
101        Flag indicating whether to apply fade-out to the IR signal, by default True
102    normalization : str, optional
103        Type of normalization to apply, by default 'ir_sum'. The alternatives can be peak, rms, lufs, ir_sum.
104    normalization_target : float, optional
105        Target value for normalization, by default -6
106    normalization_prefilter : str, optional
107        Type of prefiltering to apply before normalization, by default ''
108    prefilter_critical_freq : int, optional
109        Critical frequency for the prefilter, by default 200
110
111    Returns
112    -------
113    audio : numpy.ndarray
114        2-D audio array
115    fs_in1 : int
116        IR sampling frequency
117    '''
118    if fs_in1 == fs_in2:
119        logging.debug('IR and Stimuli sample rates are equal, no resampling needed.')
120    else:
121        logging.debug('IR and Stimuli sample rates differs, IR audio was resampled.')
122        in1, fs_in1 = match_fs(in1, fs_in2, fs_in1)
123    
124    logging.debug(f'Stimuli shape before convolution: {in2.shape}')
125    logging.debug(f'IR shape before convolution:      {in1.shape}')
126    logging.debug(f"The peak values are {abs(in2).max()} and {abs(in1).max()}")
127
128    if fade_out:
129        HFT90D = [1, 1.942604, 1.340318, 0.440811, 0.043097]
130        size = int(fs_in2/12.5)
131        fade_out_win = signal.windows.general_cosine(2*size,HFT90D)[-size:]
132        fade_out_win = fade_out_win/fade_out_win.max()
133        for i in in1.T:
134            i[-size:] *= fade_out_win
135        logging.debug(f'HFT90D Fade-out applied to last 0.1 s of IR.')
136    
137    # convolution
138    audio = signal.oaconvolve(in2.T, in1.T)[[0,-1]]
139    audio = audio.T
140    
141    # prefiltering for normalization
142    if normalization_prefilter == '':
143        audio_prefiltered = audio
144    elif normalization_prefilter in FILTERS:
145        sos = signal.butter(
146            12,
147            prefilter_critical_freq,
148            normalization_prefilter,
149            fs=fs_in1,
150            output='sos')
151        audio_prefiltered = signal.sosfilt(sos, audio, axis=0)
152    else:
153        logging.warning('Specified normalization prefilter is not implemented.')
154
155    # normalization
156    if normalization == 'peak':
157        audio,_ = peak_normalize(
158            audio, 
159            fs_in1, 
160            peak=normalization_target,
161            reference=audio_prefiltered
162            )
163    elif normalization == 'ir_sum':
164        audio,_ = ir_sum_normalize(
165            audio, 
166            ir = in1, 
167            fs = fs_in1, 
168            ir_sum=normalization_target
169            )
170    elif normalization == 'rms':
171        audio,_ = rms_normalize(
172            audio, 
173            fs_in1, 
174            rms=normalization_target,
175            reference=audio_prefiltered
176            )
177    elif normalization == 'lufs':
178        audio,_ = lufs_normalize(
179            audio, 
180            fs_in1, 
181            lufs=normalization_target,
182            reference=audio_prefiltered
183            )
184    elif normalization is None:
185        logging.info('Normalization was not applied.')
186    else:
187        logging.info('Specified normalization type not implemented.')
188    
189    logging.debug(f'Stimuli shape after convolution: {audio.shape}')
190    
191    audio_stats_logging(audio, fs_in1)
192    return audio, fs_in1
193
194# def lf_dirac_combination(
195#         lf_ir: ndarray,
196#         fs_lf_ir: int,
197#         crossover: int=200,
198#         span: int=2,
199#         norm_factor=None
200#         ) -> tuple[ndarray, int]:
201#     """NOT RECOMMENDED, USE lf_convolution INSTEAD.
202#     """
203#     dirac = zeros_like(lf_ir)
204#     logging.debug(f'dirac shape {dirac.shape}')
205#     dirac[where(lf_ir.sum(axis=1) == lf_ir.sum(axis=1).max())] = 1
206#     logging.debug(f'dirac shape {dirac.shape}')
207
208#     sos = signal.butter(12, crossover, 'hp', fs=fs_lf_ir, output='sos')
209#     dirac_filtered = signal.sosfilt(sos, dirac, axis=0)
210#     sos2 = signal.butter(12, crossover, 'lp', fs=fs_lf_ir, output='sos')
211#     lf_ir_filtered = signal.sosfilt(sos2, lf_ir, axis=0)
212
213#     tf_low = fft.fft(lf_ir, axis=0)
214#     tf_high = fft.fft(dirac_filtered, axis=0)
215    
216#     freqs = fft.fftfreq(tf_low.shape[0], 1/fs_lf_ir)
217#     crossover_idx = int(crossover/freqs[1])
218#     if norm_factor == None:
219#         norm_factor = (
220#             (abs(tf_low).sum(axis=1)[crossover_idx:int(crossover_idx*span)]).sum(axis=0)/
221#             (abs(tf_high).sum(axis=1)[crossover_idx:int(crossover_idx*span)]).sum(axis=0)
222#         )
223#     logging.info(f'norm factor {norm_factor}')
224
225#     lf_ir_filtered = lf_ir_filtered/norm_factor
226#     dirac_norm_filtered = dirac_filtered
227
228    
229
230#     ir_full = dirac_norm_filtered +lf_ir_filtered
231#     return ir_full
232
233def match_fs(
234        in1 : ndarray,
235        fs_in2 : int,
236        fs_in1 : int
237        ) -> tuple[ndarray, int]:
238    '''Resamples in1 to match fs_in2.'''
239    logging.info(f'old length:{in1.shape[0]}, old fs:{fs_in1}')
240    new_len = int(in1.shape[0]*fs_in2/fs_in1)
241    new_in1 = signal.resample(in1, new_len)
242    fs_in1 = fs_in2
243    logging.info(f'new length:{new_in1.shape[0]}, new fs:{fs_in1}')
244    return new_in1, fs_in1
245
246### BASIC ADAPTIVE PROCESSING METHODS ###
247
248def up_down(
249        audio: ndarray, 
250        direction: bool, 
251        last: float=0, 
252        step: float=2
253        ) -> ndarray:
254    '''Changes the volume of audio based on direction and step in dB.
255    
256    Parameters
257    ----------
258    audio : numpy.ndarray
259        2-D audio array
260    direction : bool
261        True value means up, False means down
262    last : float
263        volume level for previous stimuli
264    step : float
265        step size in dB, 2 dB by default
266
267    Returns
268    -------
269    audio : numpy.ndarray
270        2-D audio array
271    '''
272    audio *= 10**(last/20)
273    ratio = 10**(step/20)
274    if direction:
275        audio*ratio
276    else:
277        audio/ratio    
278    return audio
279
280def up_down_noise(
281        audio: ndarray,
282        noise: ndarray,
283        direction: bool,
284        last: float=0,
285        step: float=2
286        ) -> ndarray:
287    """Add noise to the audio signal in an up or down direction.
288
289    Parameters
290    ----------
291    audio : ndarray
292        The audio signal to which the noise will be added.
293    noise : ndarray
294        The noise signal to be added to the audio.
295    direction : bool
296        The direction of the noise addition. True for up, False for down.
297    last : float, optional
298        The last value of the noise added in the previous call, by default 0.
299    step : float, optional
300        The step size for the noise addition, by default 2.
301
302    Returns
303    -------
304    ndarray
305        The audio signal with the added noise.
306    """
307    noise = noise[:audio.shape[0]]
308    noise = up_down(audio, direction, last, step)
309    audio += noise
310    return audio
311
312def audio_stats_logging(
313        audio : ndarray, 
314        fs : int
315        ) -> None:
316    peak, rms, loudness = audio_stats(audio, fs)
317    logging.info(f'Processed audio stats: peak: {peak:.2f} dBFS, '+
318        f'rms: {rms:.2f} dBFS, loudness: {loudness:.2f} dB LUFS.')
319    if abs(audio).max() > 1:
320        logging.warning(f'Clipping occured on full scale after processing!')
def straight(audio: numpy.ndarray, fs: int, **kwargs) -> tuple[numpy.ndarray, int]:
26def straight(
27        audio: ndarray, 
28        fs: int,
29        **kwargs
30        ) -> tuple[ndarray, int]:
31    '''Passes the audio without further processing.
32    
33    Parameters
34    ----------
35    audio : numpy.ndarray
36        2-D audio array
37
38    Returns
39    -------
40    audio : numpy.ndarray
41        2-D audio array
42    '''
43    return audio, fs

Passes the audio without further processing.

Parameters
  • audio (numpy.ndarray): 2-D audio array
Returns
  • audio (numpy.ndarray): 2-D audio array
def gain_adjustment( stimuli: numpy.ndarray, fs_stimuli: int, gain: float) -> tuple[numpy.ndarray, int]:
45def gain_adjustment(
46        stimuli: ndarray,
47        fs_stimuli: int,
48        gain: float
49        ) -> tuple[ndarray, int]:
50    """Adjusts the gain of the stimuli.
51
52    This function applies a gain adjustment to the input stimuli based on the specified gain value.
53    The gain adjustment is applied by multiplying the stimuli by a factor calculated from the gain value.
54
55    Parameters
56    ----------
57    stimuli : ndarray
58        The input stimuli to be adjusted.
59    fs_stimuli : int
60        The sampling rate of the stimuli.
61    gain : float
62        The gain value in decibels (dB) to be applied.
63
64    Returns
65    -------
66    tuple[ndarray, int]
67        A tuple containing the adjusted stimuli and the sampling rate.
68
69    """
70    factor = 10**(gain/20)
71    stimuli *= factor
72    audio_stats_logging(stimuli, fs_stimuli)
73    return stimuli, fs_stimuli

Adjusts the gain of the stimuli.

This function applies a gain adjustment to the input stimuli based on the specified gain value. The gain adjustment is applied by multiplying the stimuli by a factor calculated from the gain value.

Parameters
  • stimuli (ndarray): The input stimuli to be adjusted.
  • fs_stimuli (int): The sampling rate of the stimuli.
  • gain (float): The gain value in decibels (dB) to be applied.
Returns
  • tuple[ndarray, int]: A tuple containing the adjusted stimuli and the sampling rate.
def convolution( in1: numpy.ndarray, fs_in1: int, in2: numpy.ndarray, fs_in2: int, fade_out: bool = True, normalization: str = 'ir_sum', normalization_target: float = -6, normalization_prefilter: str = '', prefilter_critical_freq=200) -> tuple[numpy.ndarray, int]:
 75def convolution(
 76        in1: ndarray,
 77        fs_in1: int,
 78        in2: ndarray,
 79        fs_in2: int,
 80        fade_out: bool=True,
 81        normalization: str='ir_sum',
 82        normalization_target: float=-6,
 83        normalization_prefilter: str='',
 84        prefilter_critical_freq = 200
 85        ) -> tuple[ndarray, int]:
 86    '''Performs convolution between IR and stimuli.
 87
 88    Should accept both mono and stereo signals, 
 89    but both in a form of 2D array.
 90    
 91    Parameters
 92    ----------
 93    in1 : numpy.ndarray
 94        2-D audio array (IR)
 95    fs_in1 : int
 96        IR sampling frequency
 97    in2 : numpy.ndarray
 98        2-D audio array (stimulus)
 99    fs_in2 : int
100        sampling frequency of stimuli
101    fade_out : bool, optional
102        Flag indicating whether to apply fade-out to the IR signal, by default True
103    normalization : str, optional
104        Type of normalization to apply, by default 'ir_sum'. The alternatives can be peak, rms, lufs, ir_sum.
105    normalization_target : float, optional
106        Target value for normalization, by default -6
107    normalization_prefilter : str, optional
108        Type of prefiltering to apply before normalization, by default ''
109    prefilter_critical_freq : int, optional
110        Critical frequency for the prefilter, by default 200
111
112    Returns
113    -------
114    audio : numpy.ndarray
115        2-D audio array
116    fs_in1 : int
117        IR sampling frequency
118    '''
119    if fs_in1 == fs_in2:
120        logging.debug('IR and Stimuli sample rates are equal, no resampling needed.')
121    else:
122        logging.debug('IR and Stimuli sample rates differs, IR audio was resampled.')
123        in1, fs_in1 = match_fs(in1, fs_in2, fs_in1)
124    
125    logging.debug(f'Stimuli shape before convolution: {in2.shape}')
126    logging.debug(f'IR shape before convolution:      {in1.shape}')
127    logging.debug(f"The peak values are {abs(in2).max()} and {abs(in1).max()}")
128
129    if fade_out:
130        HFT90D = [1, 1.942604, 1.340318, 0.440811, 0.043097]
131        size = int(fs_in2/12.5)
132        fade_out_win = signal.windows.general_cosine(2*size,HFT90D)[-size:]
133        fade_out_win = fade_out_win/fade_out_win.max()
134        for i in in1.T:
135            i[-size:] *= fade_out_win
136        logging.debug(f'HFT90D Fade-out applied to last 0.1 s of IR.')
137    
138    # convolution
139    audio = signal.oaconvolve(in2.T, in1.T)[[0,-1]]
140    audio = audio.T
141    
142    # prefiltering for normalization
143    if normalization_prefilter == '':
144        audio_prefiltered = audio
145    elif normalization_prefilter in FILTERS:
146        sos = signal.butter(
147            12,
148            prefilter_critical_freq,
149            normalization_prefilter,
150            fs=fs_in1,
151            output='sos')
152        audio_prefiltered = signal.sosfilt(sos, audio, axis=0)
153    else:
154        logging.warning('Specified normalization prefilter is not implemented.')
155
156    # normalization
157    if normalization == 'peak':
158        audio,_ = peak_normalize(
159            audio, 
160            fs_in1, 
161            peak=normalization_target,
162            reference=audio_prefiltered
163            )
164    elif normalization == 'ir_sum':
165        audio,_ = ir_sum_normalize(
166            audio, 
167            ir = in1, 
168            fs = fs_in1, 
169            ir_sum=normalization_target
170            )
171    elif normalization == 'rms':
172        audio,_ = rms_normalize(
173            audio, 
174            fs_in1, 
175            rms=normalization_target,
176            reference=audio_prefiltered
177            )
178    elif normalization == 'lufs':
179        audio,_ = lufs_normalize(
180            audio, 
181            fs_in1, 
182            lufs=normalization_target,
183            reference=audio_prefiltered
184            )
185    elif normalization is None:
186        logging.info('Normalization was not applied.')
187    else:
188        logging.info('Specified normalization type not implemented.')
189    
190    logging.debug(f'Stimuli shape after convolution: {audio.shape}')
191    
192    audio_stats_logging(audio, fs_in1)
193    return audio, fs_in1

Performs convolution between IR and stimuli.

Should accept both mono and stereo signals, but both in a form of 2D array.

Parameters
  • in1 (numpy.ndarray): 2-D audio array (IR)
  • fs_in1 (int): IR sampling frequency
  • in2 (numpy.ndarray): 2-D audio array (stimulus)
  • fs_in2 (int): sampling frequency of stimuli
  • fade_out (bool, optional): Flag indicating whether to apply fade-out to the IR signal, by default True
  • normalization (str, optional): Type of normalization to apply, by default 'ir_sum'. The alternatives can be peak, rms, lufs, ir_sum.
  • normalization_target (float, optional): Target value for normalization, by default -6
  • normalization_prefilter (str, optional): Type of prefiltering to apply before normalization, by default ''
  • prefilter_critical_freq (int, optional): Critical frequency for the prefilter, by default 200
Returns
  • audio (numpy.ndarray): 2-D audio array
  • fs_in1 (int): IR sampling frequency
def match_fs( in1: numpy.ndarray, fs_in2: int, fs_in1: int) -> tuple[numpy.ndarray, int]:
234def match_fs(
235        in1 : ndarray,
236        fs_in2 : int,
237        fs_in1 : int
238        ) -> tuple[ndarray, int]:
239    '''Resamples in1 to match fs_in2.'''
240    logging.info(f'old length:{in1.shape[0]}, old fs:{fs_in1}')
241    new_len = int(in1.shape[0]*fs_in2/fs_in1)
242    new_in1 = signal.resample(in1, new_len)
243    fs_in1 = fs_in2
244    logging.info(f'new length:{new_in1.shape[0]}, new fs:{fs_in1}')
245    return new_in1, fs_in1

Resamples in1 to match fs_in2.

def up_down( audio: numpy.ndarray, direction: bool, last: float = 0, step: float = 2) -> numpy.ndarray:
249def up_down(
250        audio: ndarray, 
251        direction: bool, 
252        last: float=0, 
253        step: float=2
254        ) -> ndarray:
255    '''Changes the volume of audio based on direction and step in dB.
256    
257    Parameters
258    ----------
259    audio : numpy.ndarray
260        2-D audio array
261    direction : bool
262        True value means up, False means down
263    last : float
264        volume level for previous stimuli
265    step : float
266        step size in dB, 2 dB by default
267
268    Returns
269    -------
270    audio : numpy.ndarray
271        2-D audio array
272    '''
273    audio *= 10**(last/20)
274    ratio = 10**(step/20)
275    if direction:
276        audio*ratio
277    else:
278        audio/ratio    
279    return audio

Changes the volume of audio based on direction and step in dB.

Parameters
  • audio (numpy.ndarray): 2-D audio array
  • direction (bool): True value means up, False means down
  • last (float): volume level for previous stimuli
  • step (float): step size in dB, 2 dB by default
Returns
  • audio (numpy.ndarray): 2-D audio array
def up_down_noise( audio: numpy.ndarray, noise: numpy.ndarray, direction: bool, last: float = 0, step: float = 2) -> numpy.ndarray:
281def up_down_noise(
282        audio: ndarray,
283        noise: ndarray,
284        direction: bool,
285        last: float=0,
286        step: float=2
287        ) -> ndarray:
288    """Add noise to the audio signal in an up or down direction.
289
290    Parameters
291    ----------
292    audio : ndarray
293        The audio signal to which the noise will be added.
294    noise : ndarray
295        The noise signal to be added to the audio.
296    direction : bool
297        The direction of the noise addition. True for up, False for down.
298    last : float, optional
299        The last value of the noise added in the previous call, by default 0.
300    step : float, optional
301        The step size for the noise addition, by default 2.
302
303    Returns
304    -------
305    ndarray
306        The audio signal with the added noise.
307    """
308    noise = noise[:audio.shape[0]]
309    noise = up_down(audio, direction, last, step)
310    audio += noise
311    return audio

Add noise to the audio signal in an up or down direction.

Parameters
  • audio (ndarray): The audio signal to which the noise will be added.
  • noise (ndarray): The noise signal to be added to the audio.
  • direction (bool): The direction of the noise addition. True for up, False for down.
  • last (float, optional): The last value of the noise added in the previous call, by default 0.
  • step (float, optional): The step size for the noise addition, by default 2.
Returns
  • ndarray: The audio signal with the added noise.