listeningpy.normalization
This module contains functions for normalizing audio signals.
1""" 2This module contains functions for normalizing audio signals. 3""" 4import numpy as np 5from scipy.signal import resample 6import pyloudnorm as pyln 7import mosqito 8import logging 9 10def fs_to_pressure( 11 audio : np.ndarray, 12 dbfs_db : float, 13 p0 : float=2e-5): 14 """Converts audio from full-scale (FS) to pressure (Pa). 15 16 Parameters 17 ---------- 18 audio : np.ndarray 19 The input audio signal. 20 dbfs_db : float 21 The sound pressure level associated with 0 dBFS. 22 p0 : float, optional 23 The reference sound pressure in pascals (Pa), by default 2e-5. 24 25 Returns 26 ------- 27 np.ndarray 28 The audio signal converted to pressure (Pa). 29 """ 30 ratio_db = p0 * 10 ** (dbfs_db/20) 31 audio_pressure = audio * ratio_db 32 return audio_pressure 33 34def eq_loudness_lvl( 35 audio_pressure : np.ndarray, 36 fs : int, 37 field_type : str="diffuse"): 38 """ 39 Calculate the log average loudness level of an audio signal. 40 41 Parameters 42 ---------- 43 audio_pressure : ndarray 44 Array containing the audio pressure values. 45 fs : int 46 Sampling frequency of the audio signal. 47 field_type : str, optional 48 Type of sound field. Possible values are "diffuse" (default) or "free". 49 50 Returns 51 ------- 52 float 53 The loudness level in phon. 54 tuple 55 A tuple containing the loudness values and corresponding time values. 56 """ 57 audio_pressure = audio_pressure.mean(axis=1) 58 audio_pressure = resample(audio_pressure, int(audio_pressure.shape[0]*48000/fs)) 59 fs=48000 60 N, N_spec, _, time = mosqito.loudness_zwtv( 61 audio_pressure, 62 fs, 63 field_type=field_type 64 ) 65 loud_lvl = 40 + 10*np.log2(N.mean()) 66 return loud_lvl, (N, time) 67 68def peak_normalize( 69 audio : np.ndarray, 70 fs : int, 71 peak : float=0, 72 reference : np.ndarray=None 73 ) -> tuple[np.ndarray, float]: 74 """ 75 Normalize the peak level of an audio signal. 76 77 Parameters 78 ---------- 79 audio : np.ndarray 80 The input audio signal. 81 fs : int 82 The sample rate of the audio signal. 83 peak : float, optional 84 The desired peak level in decibels (dB), by default 0. 85 reference : np.ndarray, optional 86 The reference audio signal for normalization, by default None. 87 88 Returns 89 ------- 90 tuple[np.ndarray, float] 91 A tuple containing the normalized audio signal and the sample rate. 92 93 Notes 94 ----- 95 This function normalizes the peak level of the input audio signal to the specified peak level. 96 If a reference audio signal is provided, the normalization is performed relative to the peak level of the reference signal. 97 If no reference signal is provided, the normalization is performed relative to the peak level of the input audio signal itself. 98 The normalization factor is calculated based on the desired peak level and the maximum absolute value of the reference signal. 99 """ 100 if type(reference) == type(None): 101 reference = audio 102 factor = 10**(peak/20) / abs(reference).max() 103 logging.info(f'Stimuli was peak normalized to {peak:.1f} dB') 104 return audio * factor, fs 105 106def rms_normalize( 107 audio : np.ndarray, 108 fs : int, 109 rms : float=-9, 110 reference : np.ndarray=None 111 ) -> tuple[np.ndarray, float]: 112 """ 113 Normalize the audio signal to a target RMS level. 114 115 Parameters 116 ---------- 117 audio : np.ndarray 118 The input audio signal. 119 fs : int 120 The sample rate of the audio signal. 121 rms : float, optional 122 The target RMS level in decibels (dB), by default -9 dB. 123 reference : np.ndarray, optional 124 The reference audio signal used for normalization, by default None. 125 126 Returns 127 ------- 128 tuple[np.ndarray, float] 129 A tuple containing the normalized audio signal and the sample rate. 130 131 Notes 132 ----- 133 This function normalizes the audio signal to a target RMS level specified in decibels (dB). 134 If a reference audio signal is provided, the normalization is performed relative to the RMS level of the reference signal. 135 If no reference signal is provided, the normalization is performed relative to the RMS level of the input audio signal. 136 The resulting normalized audio signal is multiplied by a scaling factor to achieve the target RMS level. 137 """ 138 if type(reference) == type(None): 139 reference = audio 140 factor = 10**(rms/20) / np.sqrt(np.mean(reference**2)) 141 logging.info(f'Stimuli was normalized to RMS average of {rms:.1f} dB') 142 return audio * factor, fs 143 144def lufs_normalize( 145 audio : np.ndarray, 146 fs : int, 147 lufs : float=-16, 148 reference : np.ndarray=None 149 ) -> tuple[np.ndarray, int]: 150 """Normalize the loudness of an audio signal to a target LUFS level. 151 152 Parameters 153 ---------- 154 audio : np.ndarray 155 The input audio signal as a NumPy array. 156 fs : int 157 The sample rate of the audio signal. 158 lufs : float, optional 159 The target loudness level in LUFS (Loudness Units Full Scale). Default is -16 LUFS. 160 reference : np.ndarray, optional 161 The reference audio signal to calculate the loudness. If not provided, the input audio signal is used as the reference. 162 163 Returns 164 ------- 165 tuple[np.ndarray, int] 166 A tuple containing the normalized audio signal as a NumPy array and the sample rate as an int. 167 """ 168 if type(reference) == type(None): 169 reference = audio 170 meter = pyln.Meter(fs) 171 loudness = meter.integrated_loudness(reference) 172 delta = lufs - loudness 173 factor = 10**(delta/20) 174 logging.info(f'Stimuli was loudness normalized to {lufs:.1f} dB LUFS') 175 return audio * factor, fs 176 177def ir_sum_normalize( 178 audio : np.ndarray, 179 ir : np.ndarray, 180 fs : int, 181 ir_sum : float=-9): 182 """Normalize the audio based on the sum of the impulse response (IR). 183 184 This function normalizes the given audio signal based on the sum of the absolute values of the impulse response (IR). 185 The normalization factor is calculated as 10^(ir_sum/20) divided by the sum of the absolute values of the IR. 186 The audio signal is then multiplied by this factor to achieve the desired normalization. 187 188 Parameters 189 ---------- 190 audio : np.ndarray 191 The input audio signal. 192 ir : np.ndarray 193 The impulse response (IR) signal. 194 fs : int 195 The sampling rate of the audio signal. 196 ir_sum : float, optional 197 The desired sum of the IR in decibels (dB), by default -9. 198 199 Returns 200 ------- 201 np.ndarray 202 The normalized audio signal. 203 int 204 The sampling rate of the normalized audio signal. 205 """ 206 factor = 10**(ir_sum/20) / abs(ir).sum() 207 logging.info(f'Stimuli was normalized based IR sum to {ir_sum:.1f} dB') 208 return audio * factor, fs 209 210def zwicker_loudness_normalize( 211 audio : np.ndarray, 212 fs : int, 213 target_phon : float, 214 dbfs_db: float, 215 return_ratio: bool = False 216 ) -> tuple[np.ndarray, int]: 217 audio_pressure = fs_to_pressure(audio, dbfs_db) 218 loud_lvl, loudness = eq_loudness_lvl(audio_pressure, fs) 219 loud_diff = target_phon - loud_lvl 220 logging.info(f"Before normalization: N={loud_lvl:.1f}") 221 ratio_loud = 1 222 n = 0 223 while abs(loud_diff) > 0.1: 224 ratio_loud *= 10 ** ((loud_diff)/20) 225 audio_pressure_it = audio_pressure * ratio_loud 226 loud_lvl, loudness = eq_loudness_lvl(audio_pressure_it, fs) 227 loud_diff = target_phon - loud_lvl 228 print(loud_diff) 229 n += 1 230 logging.info(f" After {n}th normalization: N={loud_lvl:.1f}") 231 audio *= ratio_loud 232 headroom = 1/abs(audio).max() 233 headroom_db = 20*np.log10(headroom) 234 if abs(audio).max() > 1: 235 logging.warning("Audio signal clipped after normalization.") 236 raise ValueError("Audio signal clipped after normalization. "+ 237 f"The level overflows for {headroom_db:.2f} dB. "+ 238 "Adjust headphone level.") 239 else: 240 logging.info(f"Audio signal normalized to {target_phon:.2f} phons.") 241 logging.info(f"Normalization factor: {ratio_loud:.2f}") 242 logging.info(f"Headroom: {-headroom_db:.2f} dB") 243 244 if return_ratio: 245 ret = audio, fs, ratio_loud 246 else: 247 ret = audio, fs 248 return ret
11def fs_to_pressure( 12 audio : np.ndarray, 13 dbfs_db : float, 14 p0 : float=2e-5): 15 """Converts audio from full-scale (FS) to pressure (Pa). 16 17 Parameters 18 ---------- 19 audio : np.ndarray 20 The input audio signal. 21 dbfs_db : float 22 The sound pressure level associated with 0 dBFS. 23 p0 : float, optional 24 The reference sound pressure in pascals (Pa), by default 2e-5. 25 26 Returns 27 ------- 28 np.ndarray 29 The audio signal converted to pressure (Pa). 30 """ 31 ratio_db = p0 * 10 ** (dbfs_db/20) 32 audio_pressure = audio * ratio_db 33 return audio_pressure
Converts audio from full-scale (FS) to pressure (Pa).
Parameters
- audio (np.ndarray): The input audio signal.
- dbfs_db (float): The sound pressure level associated with 0 dBFS.
- p0 (float, optional): The reference sound pressure in pascals (Pa), by default 2e-5.
Returns
- np.ndarray: The audio signal converted to pressure (Pa).
35def eq_loudness_lvl( 36 audio_pressure : np.ndarray, 37 fs : int, 38 field_type : str="diffuse"): 39 """ 40 Calculate the log average loudness level of an audio signal. 41 42 Parameters 43 ---------- 44 audio_pressure : ndarray 45 Array containing the audio pressure values. 46 fs : int 47 Sampling frequency of the audio signal. 48 field_type : str, optional 49 Type of sound field. Possible values are "diffuse" (default) or "free". 50 51 Returns 52 ------- 53 float 54 The loudness level in phon. 55 tuple 56 A tuple containing the loudness values and corresponding time values. 57 """ 58 audio_pressure = audio_pressure.mean(axis=1) 59 audio_pressure = resample(audio_pressure, int(audio_pressure.shape[0]*48000/fs)) 60 fs=48000 61 N, N_spec, _, time = mosqito.loudness_zwtv( 62 audio_pressure, 63 fs, 64 field_type=field_type 65 ) 66 loud_lvl = 40 + 10*np.log2(N.mean()) 67 return loud_lvl, (N, time)
Calculate the log average loudness level of an audio signal.
Parameters
- audio_pressure (ndarray): Array containing the audio pressure values.
- fs (int): Sampling frequency of the audio signal.
- field_type (str, optional): Type of sound field. Possible values are "diffuse" (default) or "free".
Returns
- float: The loudness level in phon.
- tuple: A tuple containing the loudness values and corresponding time values.
69def peak_normalize( 70 audio : np.ndarray, 71 fs : int, 72 peak : float=0, 73 reference : np.ndarray=None 74 ) -> tuple[np.ndarray, float]: 75 """ 76 Normalize the peak level of an audio signal. 77 78 Parameters 79 ---------- 80 audio : np.ndarray 81 The input audio signal. 82 fs : int 83 The sample rate of the audio signal. 84 peak : float, optional 85 The desired peak level in decibels (dB), by default 0. 86 reference : np.ndarray, optional 87 The reference audio signal for normalization, by default None. 88 89 Returns 90 ------- 91 tuple[np.ndarray, float] 92 A tuple containing the normalized audio signal and the sample rate. 93 94 Notes 95 ----- 96 This function normalizes the peak level of the input audio signal to the specified peak level. 97 If a reference audio signal is provided, the normalization is performed relative to the peak level of the reference signal. 98 If no reference signal is provided, the normalization is performed relative to the peak level of the input audio signal itself. 99 The normalization factor is calculated based on the desired peak level and the maximum absolute value of the reference signal. 100 """ 101 if type(reference) == type(None): 102 reference = audio 103 factor = 10**(peak/20) / abs(reference).max() 104 logging.info(f'Stimuli was peak normalized to {peak:.1f} dB') 105 return audio * factor, fs
Normalize the peak level of an audio signal.
Parameters
- audio (np.ndarray): The input audio signal.
- fs (int): The sample rate of the audio signal.
- peak (float, optional): The desired peak level in decibels (dB), by default 0.
- reference (np.ndarray, optional): The reference audio signal for normalization, by default None.
Returns
- tuple[np.ndarray, float]: A tuple containing the normalized audio signal and the sample rate.
Notes
This function normalizes the peak level of the input audio signal to the specified peak level. If a reference audio signal is provided, the normalization is performed relative to the peak level of the reference signal. If no reference signal is provided, the normalization is performed relative to the peak level of the input audio signal itself. The normalization factor is calculated based on the desired peak level and the maximum absolute value of the reference signal.
107def rms_normalize( 108 audio : np.ndarray, 109 fs : int, 110 rms : float=-9, 111 reference : np.ndarray=None 112 ) -> tuple[np.ndarray, float]: 113 """ 114 Normalize the audio signal to a target RMS level. 115 116 Parameters 117 ---------- 118 audio : np.ndarray 119 The input audio signal. 120 fs : int 121 The sample rate of the audio signal. 122 rms : float, optional 123 The target RMS level in decibels (dB), by default -9 dB. 124 reference : np.ndarray, optional 125 The reference audio signal used for normalization, by default None. 126 127 Returns 128 ------- 129 tuple[np.ndarray, float] 130 A tuple containing the normalized audio signal and the sample rate. 131 132 Notes 133 ----- 134 This function normalizes the audio signal to a target RMS level specified in decibels (dB). 135 If a reference audio signal is provided, the normalization is performed relative to the RMS level of the reference signal. 136 If no reference signal is provided, the normalization is performed relative to the RMS level of the input audio signal. 137 The resulting normalized audio signal is multiplied by a scaling factor to achieve the target RMS level. 138 """ 139 if type(reference) == type(None): 140 reference = audio 141 factor = 10**(rms/20) / np.sqrt(np.mean(reference**2)) 142 logging.info(f'Stimuli was normalized to RMS average of {rms:.1f} dB') 143 return audio * factor, fs
Normalize the audio signal to a target RMS level.
Parameters
- audio (np.ndarray): The input audio signal.
- fs (int): The sample rate of the audio signal.
- rms (float, optional): The target RMS level in decibels (dB), by default -9 dB.
- reference (np.ndarray, optional): The reference audio signal used for normalization, by default None.
Returns
- tuple[np.ndarray, float]: A tuple containing the normalized audio signal and the sample rate.
Notes
This function normalizes the audio signal to a target RMS level specified in decibels (dB). If a reference audio signal is provided, the normalization is performed relative to the RMS level of the reference signal. If no reference signal is provided, the normalization is performed relative to the RMS level of the input audio signal. The resulting normalized audio signal is multiplied by a scaling factor to achieve the target RMS level.
145def lufs_normalize( 146 audio : np.ndarray, 147 fs : int, 148 lufs : float=-16, 149 reference : np.ndarray=None 150 ) -> tuple[np.ndarray, int]: 151 """Normalize the loudness of an audio signal to a target LUFS level. 152 153 Parameters 154 ---------- 155 audio : np.ndarray 156 The input audio signal as a NumPy array. 157 fs : int 158 The sample rate of the audio signal. 159 lufs : float, optional 160 The target loudness level in LUFS (Loudness Units Full Scale). Default is -16 LUFS. 161 reference : np.ndarray, optional 162 The reference audio signal to calculate the loudness. If not provided, the input audio signal is used as the reference. 163 164 Returns 165 ------- 166 tuple[np.ndarray, int] 167 A tuple containing the normalized audio signal as a NumPy array and the sample rate as an int. 168 """ 169 if type(reference) == type(None): 170 reference = audio 171 meter = pyln.Meter(fs) 172 loudness = meter.integrated_loudness(reference) 173 delta = lufs - loudness 174 factor = 10**(delta/20) 175 logging.info(f'Stimuli was loudness normalized to {lufs:.1f} dB LUFS') 176 return audio * factor, fs
Normalize the loudness of an audio signal to a target LUFS level.
Parameters
- audio (np.ndarray): The input audio signal as a NumPy array.
- fs (int): The sample rate of the audio signal.
- lufs (float, optional): The target loudness level in LUFS (Loudness Units Full Scale). Default is -16 LUFS.
- reference (np.ndarray, optional): The reference audio signal to calculate the loudness. If not provided, the input audio signal is used as the reference.
Returns
- tuple[np.ndarray, int]: A tuple containing the normalized audio signal as a NumPy array and the sample rate as an int.
178def ir_sum_normalize( 179 audio : np.ndarray, 180 ir : np.ndarray, 181 fs : int, 182 ir_sum : float=-9): 183 """Normalize the audio based on the sum of the impulse response (IR). 184 185 This function normalizes the given audio signal based on the sum of the absolute values of the impulse response (IR). 186 The normalization factor is calculated as 10^(ir_sum/20) divided by the sum of the absolute values of the IR. 187 The audio signal is then multiplied by this factor to achieve the desired normalization. 188 189 Parameters 190 ---------- 191 audio : np.ndarray 192 The input audio signal. 193 ir : np.ndarray 194 The impulse response (IR) signal. 195 fs : int 196 The sampling rate of the audio signal. 197 ir_sum : float, optional 198 The desired sum of the IR in decibels (dB), by default -9. 199 200 Returns 201 ------- 202 np.ndarray 203 The normalized audio signal. 204 int 205 The sampling rate of the normalized audio signal. 206 """ 207 factor = 10**(ir_sum/20) / abs(ir).sum() 208 logging.info(f'Stimuli was normalized based IR sum to {ir_sum:.1f} dB') 209 return audio * factor, fs
Normalize the audio based on the sum of the impulse response (IR).
This function normalizes the given audio signal based on the sum of the absolute values of the impulse response (IR). The normalization factor is calculated as 10^(ir_sum/20) divided by the sum of the absolute values of the IR. The audio signal is then multiplied by this factor to achieve the desired normalization.
Parameters
- audio (np.ndarray): The input audio signal.
- ir (np.ndarray): The impulse response (IR) signal.
- fs (int): The sampling rate of the audio signal.
- ir_sum (float, optional): The desired sum of the IR in decibels (dB), by default -9.
Returns
- np.ndarray: The normalized audio signal.
- int: The sampling rate of the normalized audio signal.