listeningpy.processing
This module contains functions for processing audio signals. It contains functions for basic processing, such as normalization, convolution, etc.
1""" 2This module contains functions for processing audio signals. 3It contains functions for basic processing, such as normalization, convolution, etc. 4""" 5 6import numpy.fft as fft 7from numpy import ndarray, where, zeros_like 8import scipy.signal as signal 9import logging 10import pyloudnorm as pyln 11from listeningpy.normalization import ( 12 peak_normalize, 13 rms_normalize, 14 ir_sum_normalize, 15 lufs_normalize 16 ) 17from listeningpy.audiotools import audio_stats 18 19FILTERS = ['hp', 'lp'] 20 21# logging.basicConfig(level=logging.DEBUG) 22 23### BASIC PROCESSING ### 24 25def straight( 26 audio: ndarray, 27 fs: int, 28 **kwargs 29 ) -> tuple[ndarray, int]: 30 '''Passes the audio without further processing. 31 32 Parameters 33 ---------- 34 audio : numpy.ndarray 35 2-D audio array 36 37 Returns 38 ------- 39 audio : numpy.ndarray 40 2-D audio array 41 ''' 42 return audio, fs 43 44def gain_adjustment( 45 stimuli: ndarray, 46 fs_stimuli: int, 47 gain: float 48 ) -> tuple[ndarray, int]: 49 """Adjusts the gain of the stimuli. 50 51 This function applies a gain adjustment to the input stimuli based on the specified gain value. 52 The gain adjustment is applied by multiplying the stimuli by a factor calculated from the gain value. 53 54 Parameters 55 ---------- 56 stimuli : ndarray 57 The input stimuli to be adjusted. 58 fs_stimuli : int 59 The sampling rate of the stimuli. 60 gain : float 61 The gain value in decibels (dB) to be applied. 62 63 Returns 64 ------- 65 tuple[ndarray, int] 66 A tuple containing the adjusted stimuli and the sampling rate. 67 68 """ 69 factor = 10**(gain/20) 70 stimuli *= factor 71 audio_stats_logging(stimuli, fs_stimuli) 72 return stimuli, fs_stimuli 73 74def convolution( 75 in1: ndarray, 76 fs_in1: int, 77 in2: ndarray, 78 fs_in2: int, 79 fade_out: bool=True, 80 normalization: str='ir_sum', 81 normalization_target: float=-6, 82 normalization_prefilter: str='', 83 prefilter_critical_freq = 200 84 ) -> tuple[ndarray, int]: 85 '''Performs convolution between IR and stimuli. 86 87 Should accept both mono and stereo signals, 88 but both in a form of 2D array. 89 90 Parameters 91 ---------- 92 in1 : numpy.ndarray 93 2-D audio array (IR) 94 fs_in1 : int 95 IR sampling frequency 96 in2 : numpy.ndarray 97 2-D audio array (stimulus) 98 fs_in2 : int 99 sampling frequency of stimuli 100 fade_out : bool, optional 101 Flag indicating whether to apply fade-out to the IR signal, by default True 102 normalization : str, optional 103 Type of normalization to apply, by default 'ir_sum'. The alternatives can be peak, rms, lufs, ir_sum. 104 normalization_target : float, optional 105 Target value for normalization, by default -6 106 normalization_prefilter : str, optional 107 Type of prefiltering to apply before normalization, by default '' 108 prefilter_critical_freq : int, optional 109 Critical frequency for the prefilter, by default 200 110 111 Returns 112 ------- 113 audio : numpy.ndarray 114 2-D audio array 115 fs_in1 : int 116 IR sampling frequency 117 ''' 118 if fs_in1 == fs_in2: 119 logging.debug('IR and Stimuli sample rates are equal, no resampling needed.') 120 else: 121 logging.debug('IR and Stimuli sample rates differs, IR audio was resampled.') 122 in1, fs_in1 = match_fs(in1, fs_in2, fs_in1) 123 124 logging.debug(f'Stimuli shape before convolution: {in2.shape}') 125 logging.debug(f'IR shape before convolution: {in1.shape}') 126 logging.debug(f"The peak values are {abs(in2).max()} and {abs(in1).max()}") 127 128 if fade_out: 129 HFT90D = [1, 1.942604, 1.340318, 0.440811, 0.043097] 130 size = int(fs_in2/12.5) 131 fade_out_win = signal.windows.general_cosine(2*size,HFT90D)[-size:] 132 fade_out_win = fade_out_win/fade_out_win.max() 133 for i in in1.T: 134 i[-size:] *= fade_out_win 135 logging.debug(f'HFT90D Fade-out applied to last 0.1 s of IR.') 136 137 # convolution 138 audio = signal.oaconvolve(in2.T, in1.T)[[0,-1]] 139 audio = audio.T 140 141 # prefiltering for normalization 142 if normalization_prefilter == '': 143 audio_prefiltered = audio 144 elif normalization_prefilter in FILTERS: 145 sos = signal.butter( 146 12, 147 prefilter_critical_freq, 148 normalization_prefilter, 149 fs=fs_in1, 150 output='sos') 151 audio_prefiltered = signal.sosfilt(sos, audio, axis=0) 152 else: 153 logging.warning('Specified normalization prefilter is not implemented.') 154 155 # normalization 156 if normalization == 'peak': 157 audio,_ = peak_normalize( 158 audio, 159 fs_in1, 160 peak=normalization_target, 161 reference=audio_prefiltered 162 ) 163 elif normalization == 'ir_sum': 164 audio,_ = ir_sum_normalize( 165 audio, 166 ir = in1, 167 fs = fs_in1, 168 ir_sum=normalization_target 169 ) 170 elif normalization == 'rms': 171 audio,_ = rms_normalize( 172 audio, 173 fs_in1, 174 rms=normalization_target, 175 reference=audio_prefiltered 176 ) 177 elif normalization == 'lufs': 178 audio,_ = lufs_normalize( 179 audio, 180 fs_in1, 181 lufs=normalization_target, 182 reference=audio_prefiltered 183 ) 184 elif normalization is None: 185 logging.info('Normalization was not applied.') 186 else: 187 logging.info('Specified normalization type not implemented.') 188 189 logging.debug(f'Stimuli shape after convolution: {audio.shape}') 190 191 audio_stats_logging(audio, fs_in1) 192 return audio, fs_in1 193 194# def lf_dirac_combination( 195# lf_ir: ndarray, 196# fs_lf_ir: int, 197# crossover: int=200, 198# span: int=2, 199# norm_factor=None 200# ) -> tuple[ndarray, int]: 201# """NOT RECOMMENDED, USE lf_convolution INSTEAD. 202# """ 203# dirac = zeros_like(lf_ir) 204# logging.debug(f'dirac shape {dirac.shape}') 205# dirac[where(lf_ir.sum(axis=1) == lf_ir.sum(axis=1).max())] = 1 206# logging.debug(f'dirac shape {dirac.shape}') 207 208# sos = signal.butter(12, crossover, 'hp', fs=fs_lf_ir, output='sos') 209# dirac_filtered = signal.sosfilt(sos, dirac, axis=0) 210# sos2 = signal.butter(12, crossover, 'lp', fs=fs_lf_ir, output='sos') 211# lf_ir_filtered = signal.sosfilt(sos2, lf_ir, axis=0) 212 213# tf_low = fft.fft(lf_ir, axis=0) 214# tf_high = fft.fft(dirac_filtered, axis=0) 215 216# freqs = fft.fftfreq(tf_low.shape[0], 1/fs_lf_ir) 217# crossover_idx = int(crossover/freqs[1]) 218# if norm_factor == None: 219# norm_factor = ( 220# (abs(tf_low).sum(axis=1)[crossover_idx:int(crossover_idx*span)]).sum(axis=0)/ 221# (abs(tf_high).sum(axis=1)[crossover_idx:int(crossover_idx*span)]).sum(axis=0) 222# ) 223# logging.info(f'norm factor {norm_factor}') 224 225# lf_ir_filtered = lf_ir_filtered/norm_factor 226# dirac_norm_filtered = dirac_filtered 227 228 229 230# ir_full = dirac_norm_filtered +lf_ir_filtered 231# return ir_full 232 233def match_fs( 234 in1 : ndarray, 235 fs_in2 : int, 236 fs_in1 : int 237 ) -> tuple[ndarray, int]: 238 '''Resamples in1 to match fs_in2.''' 239 logging.info(f'old length:{in1.shape[0]}, old fs:{fs_in1}') 240 new_len = int(in1.shape[0]*fs_in2/fs_in1) 241 new_in1 = signal.resample(in1, new_len) 242 fs_in1 = fs_in2 243 logging.info(f'new length:{new_in1.shape[0]}, new fs:{fs_in1}') 244 return new_in1, fs_in1 245 246### BASIC ADAPTIVE PROCESSING METHODS ### 247 248def up_down( 249 audio: ndarray, 250 direction: bool, 251 last: float=0, 252 step: float=2 253 ) -> ndarray: 254 '''Changes the volume of audio based on direction and step in dB. 255 256 Parameters 257 ---------- 258 audio : numpy.ndarray 259 2-D audio array 260 direction : bool 261 True value means up, False means down 262 last : float 263 volume level for previous stimuli 264 step : float 265 step size in dB, 2 dB by default 266 267 Returns 268 ------- 269 audio : numpy.ndarray 270 2-D audio array 271 ''' 272 audio *= 10**(last/20) 273 ratio = 10**(step/20) 274 if direction: 275 audio*ratio 276 else: 277 audio/ratio 278 return audio 279 280def up_down_noise( 281 audio: ndarray, 282 noise: ndarray, 283 direction: bool, 284 last: float=0, 285 step: float=2 286 ) -> ndarray: 287 """Add noise to the audio signal in an up or down direction. 288 289 Parameters 290 ---------- 291 audio : ndarray 292 The audio signal to which the noise will be added. 293 noise : ndarray 294 The noise signal to be added to the audio. 295 direction : bool 296 The direction of the noise addition. True for up, False for down. 297 last : float, optional 298 The last value of the noise added in the previous call, by default 0. 299 step : float, optional 300 The step size for the noise addition, by default 2. 301 302 Returns 303 ------- 304 ndarray 305 The audio signal with the added noise. 306 """ 307 noise = noise[:audio.shape[0]] 308 noise = up_down(audio, direction, last, step) 309 audio += noise 310 return audio 311 312def audio_stats_logging( 313 audio : ndarray, 314 fs : int 315 ) -> None: 316 peak, rms, loudness = audio_stats(audio, fs) 317 logging.info(f'Processed audio stats: peak: {peak:.2f} dBFS, '+ 318 f'rms: {rms:.2f} dBFS, loudness: {loudness:.2f} dB LUFS.') 319 if abs(audio).max() > 1: 320 logging.warning(f'Clipping occured on full scale after processing!')
def
straight(audio: numpy.ndarray, fs: int, **kwargs) -> tuple[numpy.ndarray, int]:
26def straight( 27 audio: ndarray, 28 fs: int, 29 **kwargs 30 ) -> tuple[ndarray, int]: 31 '''Passes the audio without further processing. 32 33 Parameters 34 ---------- 35 audio : numpy.ndarray 36 2-D audio array 37 38 Returns 39 ------- 40 audio : numpy.ndarray 41 2-D audio array 42 ''' 43 return audio, fs
Passes the audio without further processing.
Parameters
- audio (numpy.ndarray): 2-D audio array
Returns
- audio (numpy.ndarray): 2-D audio array
def
gain_adjustment( stimuli: numpy.ndarray, fs_stimuli: int, gain: float) -> tuple[numpy.ndarray, int]:
45def gain_adjustment( 46 stimuli: ndarray, 47 fs_stimuli: int, 48 gain: float 49 ) -> tuple[ndarray, int]: 50 """Adjusts the gain of the stimuli. 51 52 This function applies a gain adjustment to the input stimuli based on the specified gain value. 53 The gain adjustment is applied by multiplying the stimuli by a factor calculated from the gain value. 54 55 Parameters 56 ---------- 57 stimuli : ndarray 58 The input stimuli to be adjusted. 59 fs_stimuli : int 60 The sampling rate of the stimuli. 61 gain : float 62 The gain value in decibels (dB) to be applied. 63 64 Returns 65 ------- 66 tuple[ndarray, int] 67 A tuple containing the adjusted stimuli and the sampling rate. 68 69 """ 70 factor = 10**(gain/20) 71 stimuli *= factor 72 audio_stats_logging(stimuli, fs_stimuli) 73 return stimuli, fs_stimuli
Adjusts the gain of the stimuli.
This function applies a gain adjustment to the input stimuli based on the specified gain value. The gain adjustment is applied by multiplying the stimuli by a factor calculated from the gain value.
Parameters
- stimuli (ndarray): The input stimuli to be adjusted.
- fs_stimuli (int): The sampling rate of the stimuli.
- gain (float): The gain value in decibels (dB) to be applied.
Returns
- tuple[ndarray, int]: A tuple containing the adjusted stimuli and the sampling rate.
def
convolution( in1: numpy.ndarray, fs_in1: int, in2: numpy.ndarray, fs_in2: int, fade_out: bool = True, normalization: str = 'ir_sum', normalization_target: float = -6, normalization_prefilter: str = '', prefilter_critical_freq=200) -> tuple[numpy.ndarray, int]:
75def convolution( 76 in1: ndarray, 77 fs_in1: int, 78 in2: ndarray, 79 fs_in2: int, 80 fade_out: bool=True, 81 normalization: str='ir_sum', 82 normalization_target: float=-6, 83 normalization_prefilter: str='', 84 prefilter_critical_freq = 200 85 ) -> tuple[ndarray, int]: 86 '''Performs convolution between IR and stimuli. 87 88 Should accept both mono and stereo signals, 89 but both in a form of 2D array. 90 91 Parameters 92 ---------- 93 in1 : numpy.ndarray 94 2-D audio array (IR) 95 fs_in1 : int 96 IR sampling frequency 97 in2 : numpy.ndarray 98 2-D audio array (stimulus) 99 fs_in2 : int 100 sampling frequency of stimuli 101 fade_out : bool, optional 102 Flag indicating whether to apply fade-out to the IR signal, by default True 103 normalization : str, optional 104 Type of normalization to apply, by default 'ir_sum'. The alternatives can be peak, rms, lufs, ir_sum. 105 normalization_target : float, optional 106 Target value for normalization, by default -6 107 normalization_prefilter : str, optional 108 Type of prefiltering to apply before normalization, by default '' 109 prefilter_critical_freq : int, optional 110 Critical frequency for the prefilter, by default 200 111 112 Returns 113 ------- 114 audio : numpy.ndarray 115 2-D audio array 116 fs_in1 : int 117 IR sampling frequency 118 ''' 119 if fs_in1 == fs_in2: 120 logging.debug('IR and Stimuli sample rates are equal, no resampling needed.') 121 else: 122 logging.debug('IR and Stimuli sample rates differs, IR audio was resampled.') 123 in1, fs_in1 = match_fs(in1, fs_in2, fs_in1) 124 125 logging.debug(f'Stimuli shape before convolution: {in2.shape}') 126 logging.debug(f'IR shape before convolution: {in1.shape}') 127 logging.debug(f"The peak values are {abs(in2).max()} and {abs(in1).max()}") 128 129 if fade_out: 130 HFT90D = [1, 1.942604, 1.340318, 0.440811, 0.043097] 131 size = int(fs_in2/12.5) 132 fade_out_win = signal.windows.general_cosine(2*size,HFT90D)[-size:] 133 fade_out_win = fade_out_win/fade_out_win.max() 134 for i in in1.T: 135 i[-size:] *= fade_out_win 136 logging.debug(f'HFT90D Fade-out applied to last 0.1 s of IR.') 137 138 # convolution 139 audio = signal.oaconvolve(in2.T, in1.T)[[0,-1]] 140 audio = audio.T 141 142 # prefiltering for normalization 143 if normalization_prefilter == '': 144 audio_prefiltered = audio 145 elif normalization_prefilter in FILTERS: 146 sos = signal.butter( 147 12, 148 prefilter_critical_freq, 149 normalization_prefilter, 150 fs=fs_in1, 151 output='sos') 152 audio_prefiltered = signal.sosfilt(sos, audio, axis=0) 153 else: 154 logging.warning('Specified normalization prefilter is not implemented.') 155 156 # normalization 157 if normalization == 'peak': 158 audio,_ = peak_normalize( 159 audio, 160 fs_in1, 161 peak=normalization_target, 162 reference=audio_prefiltered 163 ) 164 elif normalization == 'ir_sum': 165 audio,_ = ir_sum_normalize( 166 audio, 167 ir = in1, 168 fs = fs_in1, 169 ir_sum=normalization_target 170 ) 171 elif normalization == 'rms': 172 audio,_ = rms_normalize( 173 audio, 174 fs_in1, 175 rms=normalization_target, 176 reference=audio_prefiltered 177 ) 178 elif normalization == 'lufs': 179 audio,_ = lufs_normalize( 180 audio, 181 fs_in1, 182 lufs=normalization_target, 183 reference=audio_prefiltered 184 ) 185 elif normalization is None: 186 logging.info('Normalization was not applied.') 187 else: 188 logging.info('Specified normalization type not implemented.') 189 190 logging.debug(f'Stimuli shape after convolution: {audio.shape}') 191 192 audio_stats_logging(audio, fs_in1) 193 return audio, fs_in1
Performs convolution between IR and stimuli.
Should accept both mono and stereo signals, but both in a form of 2D array.
Parameters
- in1 (numpy.ndarray): 2-D audio array (IR)
- fs_in1 (int): IR sampling frequency
- in2 (numpy.ndarray): 2-D audio array (stimulus)
- fs_in2 (int): sampling frequency of stimuli
- fade_out (bool, optional): Flag indicating whether to apply fade-out to the IR signal, by default True
- normalization (str, optional): Type of normalization to apply, by default 'ir_sum'. The alternatives can be peak, rms, lufs, ir_sum.
- normalization_target (float, optional): Target value for normalization, by default -6
- normalization_prefilter (str, optional): Type of prefiltering to apply before normalization, by default ''
- prefilter_critical_freq (int, optional): Critical frequency for the prefilter, by default 200
Returns
- audio (numpy.ndarray): 2-D audio array
- fs_in1 (int): IR sampling frequency
def
match_fs( in1: numpy.ndarray, fs_in2: int, fs_in1: int) -> tuple[numpy.ndarray, int]:
234def match_fs( 235 in1 : ndarray, 236 fs_in2 : int, 237 fs_in1 : int 238 ) -> tuple[ndarray, int]: 239 '''Resamples in1 to match fs_in2.''' 240 logging.info(f'old length:{in1.shape[0]}, old fs:{fs_in1}') 241 new_len = int(in1.shape[0]*fs_in2/fs_in1) 242 new_in1 = signal.resample(in1, new_len) 243 fs_in1 = fs_in2 244 logging.info(f'new length:{new_in1.shape[0]}, new fs:{fs_in1}') 245 return new_in1, fs_in1
Resamples in1 to match fs_in2.
def
up_down( audio: numpy.ndarray, direction: bool, last: float = 0, step: float = 2) -> numpy.ndarray:
249def up_down( 250 audio: ndarray, 251 direction: bool, 252 last: float=0, 253 step: float=2 254 ) -> ndarray: 255 '''Changes the volume of audio based on direction and step in dB. 256 257 Parameters 258 ---------- 259 audio : numpy.ndarray 260 2-D audio array 261 direction : bool 262 True value means up, False means down 263 last : float 264 volume level for previous stimuli 265 step : float 266 step size in dB, 2 dB by default 267 268 Returns 269 ------- 270 audio : numpy.ndarray 271 2-D audio array 272 ''' 273 audio *= 10**(last/20) 274 ratio = 10**(step/20) 275 if direction: 276 audio*ratio 277 else: 278 audio/ratio 279 return audio
Changes the volume of audio based on direction and step in dB.
Parameters
- audio (numpy.ndarray): 2-D audio array
- direction (bool): True value means up, False means down
- last (float): volume level for previous stimuli
- step (float): step size in dB, 2 dB by default
Returns
- audio (numpy.ndarray): 2-D audio array
def
up_down_noise( audio: numpy.ndarray, noise: numpy.ndarray, direction: bool, last: float = 0, step: float = 2) -> numpy.ndarray:
281def up_down_noise( 282 audio: ndarray, 283 noise: ndarray, 284 direction: bool, 285 last: float=0, 286 step: float=2 287 ) -> ndarray: 288 """Add noise to the audio signal in an up or down direction. 289 290 Parameters 291 ---------- 292 audio : ndarray 293 The audio signal to which the noise will be added. 294 noise : ndarray 295 The noise signal to be added to the audio. 296 direction : bool 297 The direction of the noise addition. True for up, False for down. 298 last : float, optional 299 The last value of the noise added in the previous call, by default 0. 300 step : float, optional 301 The step size for the noise addition, by default 2. 302 303 Returns 304 ------- 305 ndarray 306 The audio signal with the added noise. 307 """ 308 noise = noise[:audio.shape[0]] 309 noise = up_down(audio, direction, last, step) 310 audio += noise 311 return audio
Add noise to the audio signal in an up or down direction.
Parameters
- audio (ndarray): The audio signal to which the noise will be added.
- noise (ndarray): The noise signal to be added to the audio.
- direction (bool): The direction of the noise addition. True for up, False for down.
- last (float, optional): The last value of the noise added in the previous call, by default 0.
- step (float, optional): The step size for the noise addition, by default 2.
Returns
- ndarray: The audio signal with the added noise.