listeningpy.normalization

This module contains functions for normalizing audio signals.

View Source

  1"""
  2This module contains functions for normalizing audio signals.
  3"""
  4import numpy as np
  5from scipy.signal import resample
  6import pyloudnorm as pyln
  7import mosqito
  8import logging
  9
 10def fs_to_pressure(
 11        audio : np.ndarray,
 12        dbfs_db : float,
 13        p0 : float=2e-5):
 14    """Converts audio from full-scale (FS) to pressure (Pa).
 15
 16    Parameters
 17    ----------
 18    audio : np.ndarray
 19        The input audio signal.
 20    dbfs_db : float
 21        The sound pressure level associated with 0 dBFS.
 22    p0 : float, optional
 23        The reference sound pressure in pascals (Pa), by default 2e-5.
 24
 25    Returns
 26    -------
 27    np.ndarray
 28        The audio signal converted to pressure (Pa).
 29    """
 30    ratio_db = p0 * 10 ** (dbfs_db/20)
 31    audio_pressure = audio * ratio_db
 32    return audio_pressure
 33
 34def eq_loudness_lvl(
 35        audio_pressure : np.ndarray, 
 36        fs : int, 
 37        field_type : str="diffuse"):
 38    """
 39    Calculate the log average loudness level of an audio signal.
 40
 41    Parameters
 42    ----------
 43    audio_pressure : ndarray
 44        Array containing the audio pressure values.
 45    fs : int
 46        Sampling frequency of the audio signal.
 47    field_type : str, optional
 48        Type of sound field. Possible values are "diffuse" (default) or "free".
 49    
 50    Returns
 51    -------
 52    float
 53        The loudness level in phon.
 54    tuple
 55        A tuple containing the loudness values and corresponding time values.
 56    """
 57    audio_pressure = audio_pressure.mean(axis=1)
 58    audio_pressure = resample(audio_pressure, int(audio_pressure.shape[0]*48000/fs))
 59    fs=48000
 60    N, N_spec, _, time = mosqito.loudness_zwtv(
 61        audio_pressure, 
 62        fs, 
 63        field_type=field_type
 64    )
 65    loud_lvl = 40 + 10*np.log2(N.mean())
 66    return loud_lvl, (N, time)
 67
 68def peak_normalize(
 69        audio : np.ndarray, 
 70        fs : int, 
 71        peak : float=0, 
 72        reference : np.ndarray=None
 73        ) -> tuple[np.ndarray, float]:
 74    """
 75    Normalize the peak level of an audio signal.
 76
 77    Parameters
 78    ----------
 79    audio : np.ndarray
 80        The input audio signal.
 81    fs : int
 82        The sample rate of the audio signal.
 83    peak : float, optional
 84        The desired peak level in decibels (dB), by default 0.
 85    reference : np.ndarray, optional
 86        The reference audio signal for normalization, by default None.
 87
 88    Returns
 89    -------
 90    tuple[np.ndarray, float]
 91        A tuple containing the normalized audio signal and the sample rate.
 92
 93    Notes
 94    -----
 95    This function normalizes the peak level of the input audio signal to the specified peak level.
 96    If a reference audio signal is provided, the normalization is performed relative to the peak level of the reference signal.
 97    If no reference signal is provided, the normalization is performed relative to the peak level of the input audio signal itself.
 98    The normalization factor is calculated based on the desired peak level and the maximum absolute value of the reference signal.
 99    """
100    if type(reference) == type(None):
101        reference = audio
102    factor = 10**(peak/20) / abs(reference).max()
103    logging.info(f'Stimuli was peak normalized to {peak:.1f} dB')
104    return audio * factor, fs
105
106def rms_normalize(
107        audio : np.ndarray, 
108        fs : int, 
109        rms : float=-9, 
110        reference : np.ndarray=None
111        ) -> tuple[np.ndarray, float]:
112    """
113    Normalize the audio signal to a target RMS level.
114
115    Parameters
116    ----------
117    audio : np.ndarray
118        The input audio signal.
119    fs : int
120        The sample rate of the audio signal.
121    rms : float, optional
122        The target RMS level in decibels (dB), by default -9 dB.
123    reference : np.ndarray, optional
124        The reference audio signal used for normalization, by default None.
125
126    Returns
127    -------
128    tuple[np.ndarray, float]
129        A tuple containing the normalized audio signal and the sample rate.
130
131    Notes
132    -----
133    This function normalizes the audio signal to a target RMS level specified in decibels (dB).
134    If a reference audio signal is provided, the normalization is performed relative to the RMS level of the reference signal.
135    If no reference signal is provided, the normalization is performed relative to the RMS level of the input audio signal.
136    The resulting normalized audio signal is multiplied by a scaling factor to achieve the target RMS level.
137    """
138    if type(reference) == type(None):
139        reference = audio
140    factor = 10**(rms/20) / np.sqrt(np.mean(reference**2))
141    logging.info(f'Stimuli was normalized to RMS average of {rms:.1f} dB')
142    return audio * factor, fs
143
144def lufs_normalize(
145        audio : np.ndarray, 
146        fs : int, 
147        lufs : float=-16, 
148        reference : np.ndarray=None
149        ) -> tuple[np.ndarray, int]:
150    """Normalize the loudness of an audio signal to a target LUFS level.
151
152    Parameters
153    ----------
154    audio : np.ndarray
155        The input audio signal as a NumPy array.
156    fs : int
157        The sample rate of the audio signal.
158    lufs : float, optional
159        The target loudness level in LUFS (Loudness Units Full Scale). Default is -16 LUFS.
160    reference : np.ndarray, optional
161        The reference audio signal to calculate the loudness. If not provided, the input audio signal is used as the reference.
162
163    Returns
164    -------
165    tuple[np.ndarray, int]
166        A tuple containing the normalized audio signal as a NumPy array and the sample rate as an int.
167    """
168    if type(reference) == type(None):
169        reference = audio
170    meter = pyln.Meter(fs)
171    loudness = meter.integrated_loudness(reference)
172    delta = lufs - loudness
173    factor = 10**(delta/20)
174    logging.info(f'Stimuli was loudness normalized to {lufs:.1f} dB LUFS')
175    return audio * factor, fs
176
177def ir_sum_normalize(
178        audio : np.ndarray, 
179        ir : np.ndarray, 
180        fs : int, 
181        ir_sum : float=-9):
182    """Normalize the audio based on the sum of the impulse response (IR).
183
184    This function normalizes the given audio signal based on the sum of the absolute values of the impulse response (IR).
185    The normalization factor is calculated as 10^(ir_sum/20) divided by the sum of the absolute values of the IR.
186    The audio signal is then multiplied by this factor to achieve the desired normalization.
187
188    Parameters
189    ----------
190    audio : np.ndarray
191        The input audio signal.
192    ir : np.ndarray
193        The impulse response (IR) signal.
194    fs : int
195        The sampling rate of the audio signal.
196    ir_sum : float, optional
197        The desired sum of the IR in decibels (dB), by default -9.
198
199    Returns
200    -------
201    np.ndarray
202        The normalized audio signal.
203    int
204        The sampling rate of the normalized audio signal.
205    """
206    factor = 10**(ir_sum/20) / abs(ir).sum()
207    logging.info(f'Stimuli was normalized based IR sum to {ir_sum:.1f} dB')
208    return audio * factor, fs
209
210def zwicker_loudness_normalize(
211        audio : np.ndarray,
212        fs : int,
213        target_phon : float,
214        dbfs_db: float,
215        return_ratio: bool = False
216        ) -> tuple[np.ndarray, int]:
217    audio_pressure = fs_to_pressure(audio, dbfs_db)
218    loud_lvl, loudness = eq_loudness_lvl(audio_pressure, fs)
219    loud_diff = target_phon - loud_lvl
220    logging.info(f"Before normalization: N={loud_lvl:.1f}")
221    ratio_loud = 1
222    n = 0
223    while abs(loud_diff) > 0.1:
224        ratio_loud *= 10 ** ((loud_diff)/20)
225        audio_pressure_it = audio_pressure * ratio_loud
226        loud_lvl, loudness = eq_loudness_lvl(audio_pressure_it, fs)
227        loud_diff = target_phon - loud_lvl
228        print(loud_diff)
229        n += 1
230    logging.info(f" After {n}th normalization: N={loud_lvl:.1f}")
231    audio *= ratio_loud
232    headroom = 1/abs(audio).max()
233    headroom_db = 20*np.log10(headroom)
234    if abs(audio).max() > 1:
235        logging.warning("Audio signal clipped after normalization.")
236        raise ValueError("Audio signal clipped after normalization. "+
237                         f"The level overflows for {headroom_db:.2f} dB. "+
238                         "Adjust headphone level.")
239    else:
240        logging.info(f"Audio signal normalized to {target_phon:.2f} phons.")
241        logging.info(f"Normalization factor: {ratio_loud:.2f}")
242        logging.info(f"Headroom: {-headroom_db:.2f} dB")
243    
244    if return_ratio:
245        ret = audio, fs, ratio_loud
246    else:
247        ret = audio, fs
248    return ret

def fs_to_pressure(audio: numpy.ndarray, dbfs_db: float, p0: float = 2e-05): View Source

11def fs_to_pressure(
12        audio : np.ndarray,
13        dbfs_db : float,
14        p0 : float=2e-5):
15    """Converts audio from full-scale (FS) to pressure (Pa).
16
17    Parameters
18    ----------
19    audio : np.ndarray
20        The input audio signal.
21    dbfs_db : float
22        The sound pressure level associated with 0 dBFS.
23    p0 : float, optional
24        The reference sound pressure in pascals (Pa), by default 2e-5.
25
26    Returns
27    -------
28    np.ndarray
29        The audio signal converted to pressure (Pa).
30    """
31    ratio_db = p0 * 10 ** (dbfs_db/20)
32    audio_pressure = audio * ratio_db
33    return audio_pressure

Converts audio from full-scale (FS) to pressure (Pa).

Parameters

audio (np.ndarray): The input audio signal.
dbfs_db (float): The sound pressure level associated with 0 dBFS.
p0 (float, optional): The reference sound pressure in pascals (Pa), by default 2e-5.

Returns

np.ndarray: The audio signal converted to pressure (Pa).

def eq_loudness_lvl(audio_pressure: numpy.ndarray, fs: int, field_type: str = 'diffuse'): View Source

35def eq_loudness_lvl(
36        audio_pressure : np.ndarray, 
37        fs : int, 
38        field_type : str="diffuse"):
39    """
40    Calculate the log average loudness level of an audio signal.
41
42    Parameters
43    ----------
44    audio_pressure : ndarray
45        Array containing the audio pressure values.
46    fs : int
47        Sampling frequency of the audio signal.
48    field_type : str, optional
49        Type of sound field. Possible values are "diffuse" (default) or "free".
50    
51    Returns
52    -------
53    float
54        The loudness level in phon.
55    tuple
56        A tuple containing the loudness values and corresponding time values.
57    """
58    audio_pressure = audio_pressure.mean(axis=1)
59    audio_pressure = resample(audio_pressure, int(audio_pressure.shape[0]*48000/fs))
60    fs=48000
61    N, N_spec, _, time = mosqito.loudness_zwtv(
62        audio_pressure, 
63        fs, 
64        field_type=field_type
65    )
66    loud_lvl = 40 + 10*np.log2(N.mean())
67    return loud_lvl, (N, time)

Calculate the log average loudness level of an audio signal.

Parameters

audio_pressure (ndarray): Array containing the audio pressure values.
fs (int): Sampling frequency of the audio signal.
field_type (str, optional): Type of sound field. Possible values are "diffuse" (default) or "free".

Returns

float: The loudness level in phon.
tuple: A tuple containing the loudness values and corresponding time values.

def peak_normalize( audio: numpy.ndarray, fs: int, peak: float = 0, reference: numpy.ndarray = None) -> tuple[numpy.ndarray, float]: View Source

 69def peak_normalize(
 70        audio : np.ndarray, 
 71        fs : int, 
 72        peak : float=0, 
 73        reference : np.ndarray=None
 74        ) -> tuple[np.ndarray, float]:
 75    """
 76    Normalize the peak level of an audio signal.
 77
 78    Parameters
 79    ----------
 80    audio : np.ndarray
 81        The input audio signal.
 82    fs : int
 83        The sample rate of the audio signal.
 84    peak : float, optional
 85        The desired peak level in decibels (dB), by default 0.
 86    reference : np.ndarray, optional
 87        The reference audio signal for normalization, by default None.
 88
 89    Returns
 90    -------
 91    tuple[np.ndarray, float]
 92        A tuple containing the normalized audio signal and the sample rate.
 93
 94    Notes
 95    -----
 96    This function normalizes the peak level of the input audio signal to the specified peak level.
 97    If a reference audio signal is provided, the normalization is performed relative to the peak level of the reference signal.
 98    If no reference signal is provided, the normalization is performed relative to the peak level of the input audio signal itself.
 99    The normalization factor is calculated based on the desired peak level and the maximum absolute value of the reference signal.
100    """
101    if type(reference) == type(None):
102        reference = audio
103    factor = 10**(peak/20) / abs(reference).max()
104    logging.info(f'Stimuli was peak normalized to {peak:.1f} dB')
105    return audio * factor, fs

Normalize the peak level of an audio signal.

Parameters

audio (np.ndarray): The input audio signal.
fs (int): The sample rate of the audio signal.
peak (float, optional): The desired peak level in decibels (dB), by default 0.
reference (np.ndarray, optional): The reference audio signal for normalization, by default None.

Returns

tuple[np.ndarray, float]: A tuple containing the normalized audio signal and the sample rate.

Notes

This function normalizes the peak level of the input audio signal to the specified peak level. If a reference audio signal is provided, the normalization is performed relative to the peak level of the reference signal. If no reference signal is provided, the normalization is performed relative to the peak level of the input audio signal itself. The normalization factor is calculated based on the desired peak level and the maximum absolute value of the reference signal.

def rms_normalize( audio: numpy.ndarray, fs: int, rms: float = -9, reference: numpy.ndarray = None) -> tuple[numpy.ndarray, float]: View Source

107def rms_normalize(
108        audio : np.ndarray, 
109        fs : int, 
110        rms : float=-9, 
111        reference : np.ndarray=None
112        ) -> tuple[np.ndarray, float]:
113    """
114    Normalize the audio signal to a target RMS level.
115
116    Parameters
117    ----------
118    audio : np.ndarray
119        The input audio signal.
120    fs : int
121        The sample rate of the audio signal.
122    rms : float, optional
123        The target RMS level in decibels (dB), by default -9 dB.
124    reference : np.ndarray, optional
125        The reference audio signal used for normalization, by default None.
126
127    Returns
128    -------
129    tuple[np.ndarray, float]
130        A tuple containing the normalized audio signal and the sample rate.
131
132    Notes
133    -----
134    This function normalizes the audio signal to a target RMS level specified in decibels (dB).
135    If a reference audio signal is provided, the normalization is performed relative to the RMS level of the reference signal.
136    If no reference signal is provided, the normalization is performed relative to the RMS level of the input audio signal.
137    The resulting normalized audio signal is multiplied by a scaling factor to achieve the target RMS level.
138    """
139    if type(reference) == type(None):
140        reference = audio
141    factor = 10**(rms/20) / np.sqrt(np.mean(reference**2))
142    logging.info(f'Stimuli was normalized to RMS average of {rms:.1f} dB')
143    return audio * factor, fs

Normalize the audio signal to a target RMS level.

Parameters

audio (np.ndarray): The input audio signal.
fs (int): The sample rate of the audio signal.
rms (float, optional): The target RMS level in decibels (dB), by default -9 dB.
reference (np.ndarray, optional): The reference audio signal used for normalization, by default None.

Returns

tuple[np.ndarray, float]: A tuple containing the normalized audio signal and the sample rate.

Notes

This function normalizes the audio signal to a target RMS level specified in decibels (dB). If a reference audio signal is provided, the normalization is performed relative to the RMS level of the reference signal. If no reference signal is provided, the normalization is performed relative to the RMS level of the input audio signal. The resulting normalized audio signal is multiplied by a scaling factor to achieve the target RMS level.

def lufs_normalize( audio: numpy.ndarray, fs: int, lufs: float = -16, reference: numpy.ndarray = None) -> tuple[numpy.ndarray, int]: View Source

145def lufs_normalize(
146        audio : np.ndarray, 
147        fs : int, 
148        lufs : float=-16, 
149        reference : np.ndarray=None
150        ) -> tuple[np.ndarray, int]:
151    """Normalize the loudness of an audio signal to a target LUFS level.
152
153    Parameters
154    ----------
155    audio : np.ndarray
156        The input audio signal as a NumPy array.
157    fs : int
158        The sample rate of the audio signal.
159    lufs : float, optional
160        The target loudness level in LUFS (Loudness Units Full Scale). Default is -16 LUFS.
161    reference : np.ndarray, optional
162        The reference audio signal to calculate the loudness. If not provided, the input audio signal is used as the reference.
163
164    Returns
165    -------
166    tuple[np.ndarray, int]
167        A tuple containing the normalized audio signal as a NumPy array and the sample rate as an int.
168    """
169    if type(reference) == type(None):
170        reference = audio
171    meter = pyln.Meter(fs)
172    loudness = meter.integrated_loudness(reference)
173    delta = lufs - loudness
174    factor = 10**(delta/20)
175    logging.info(f'Stimuli was loudness normalized to {lufs:.1f} dB LUFS')
176    return audio * factor, fs

Normalize the loudness of an audio signal to a target LUFS level.

Parameters

audio (np.ndarray): The input audio signal as a NumPy array.
fs (int): The sample rate of the audio signal.
lufs (float, optional): The target loudness level in LUFS (Loudness Units Full Scale). Default is -16 LUFS.
reference (np.ndarray, optional): The reference audio signal to calculate the loudness. If not provided, the input audio signal is used as the reference.

Returns

tuple[np.ndarray, int]: A tuple containing the normalized audio signal as a NumPy array and the sample rate as an int.

def ir_sum_normalize(audio: numpy.ndarray, ir: numpy.ndarray, fs: int, ir_sum: float = -9): View Source

178def ir_sum_normalize(
179        audio : np.ndarray, 
180        ir : np.ndarray, 
181        fs : int, 
182        ir_sum : float=-9):
183    """Normalize the audio based on the sum of the impulse response (IR).
184
185    This function normalizes the given audio signal based on the sum of the absolute values of the impulse response (IR).
186    The normalization factor is calculated as 10^(ir_sum/20) divided by the sum of the absolute values of the IR.
187    The audio signal is then multiplied by this factor to achieve the desired normalization.
188
189    Parameters
190    ----------
191    audio : np.ndarray
192        The input audio signal.
193    ir : np.ndarray
194        The impulse response (IR) signal.
195    fs : int
196        The sampling rate of the audio signal.
197    ir_sum : float, optional
198        The desired sum of the IR in decibels (dB), by default -9.
199
200    Returns
201    -------
202    np.ndarray
203        The normalized audio signal.
204    int
205        The sampling rate of the normalized audio signal.
206    """
207    factor = 10**(ir_sum/20) / abs(ir).sum()
208    logging.info(f'Stimuli was normalized based IR sum to {ir_sum:.1f} dB')
209    return audio * factor, fs

Normalize the audio based on the sum of the impulse response (IR).

This function normalizes the given audio signal based on the sum of the absolute values of the impulse response (IR). The normalization factor is calculated as 10^(ir_sum/20) divided by the sum of the absolute values of the IR. The audio signal is then multiplied by this factor to achieve the desired normalization.

Parameters

audio (np.ndarray): The input audio signal.
ir (np.ndarray): The impulse response (IR) signal.
fs (int): The sampling rate of the audio signal.
ir_sum (float, optional): The desired sum of the IR in decibels (dB), by default -9.

Returns

np.ndarray: The normalized audio signal.
int: The sampling rate of the normalized audio signal.