读取 24 位 FLAC 和 WAV 文件的数据
我成功地使用readData读取16位音频文件并生成用于波形显示的峰值文件。但是,我在解释 24 位 FLAC 和 WAV 文件的 PCM 值时遇到一些问题。
首先,24 位的块大小是多少?
16 位有符号值的范围是 -32768 到 +32768,24 位的范围是 -8388607 到 +8388607。
我对 16 位文件使用 4096 字节块大小 (65536 / 16 = 4096)。它对于检测峰值效果很好。
如果我用 24 位进行相同的计算,则 16777215 / 24 = 699050.625 字节。我错了吗?我想我必须使用 32 位变量来存储 24 位值。但是读取文件时应该使用多大的块大小? 699051?如何调整浮点数组的转换?
下面是我用来生成 16 位 PCM 数据峰值文件的完整 C# 代码。我故意将 24 位代码留空,因为它不起作用。有些代码引用了我自己的 FMOD 包装器,但它应该很容易理解。
// Declare variables
FMOD.RESULT result = FMOD.RESULT.OK;
FileStream fileStream = null;
BinaryWriter binaryWriter = null;
GZipStream gzipStream = null;
bool generatePeakFile = false;
int CHUNKSIZE = 0;
uint length = 0;
uint read = 0;
uint bytesread = 0;
Int16[] left16BitArray = null;
Int16[] right16BitArray = null;
Int32[] left32BitArray = null;
Int32[] right32BitArray = null;
float[] floatLeft = null;
float[] floatRight = null;
byte[] buffer = null;
IntPtr data = new IntPtr(); // initialized properly later
WaveDataMinMax minMax = null;
try
{
// Set current file directory
m_peakFileDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) + "\\Peak Files\\";
// Get file name from argument
string fileName = (string)e.Argument;
// Create sound system with NOSOUND
MPfm.Sound.System soundSystem = new MPfm.Sound.System(FMOD.OUTPUTTYPE.NOSOUND, string.Empty);
// Create sound
MPfm.Sound.Sound sound = soundSystem.CreateSound(fileName, false);
// Get sound format; specifically bits per sample (changes the calculations later)
SoundFormat soundFormat = sound.GetSoundFormat();
// Get the length of the file in PCM bytes
sound.BaseSound.getLength(ref length, FMOD.TIMEUNIT.PCMBYTES);
// Check if the folder for peak files exists
if (!Directory.Exists(PeakFileDirectory))
{
// Create directory
Directory.CreateDirectory(PeakFileDirectory);
}
// Generate the file name for the peak file by using the full path without special characters
string peakFilePath = PeakFileDirectory + fileName.Replace(@"\", "_").Replace(":", "_").Replace(".", "_") + ".mpfmPeak";
// Check if peak file exists
if(!File.Exists(peakFilePath))
{
// Set flag
generatePeakFile = true;
// Create peak file
fileStream = new FileStream(peakFilePath, FileMode.Create, FileAccess.Write);
binaryWriter = new BinaryWriter(fileStream);
gzipStream = new GZipStream(fileStream, CompressionMode.Compress);
}
// Check the bits per sample to determine what chunk size to get
if (soundFormat.BitsPerSample == 16)
{
// 4096 bytes for 16-bit PCM data
CHUNKSIZE = 4096;
}
else if (soundFormat.BitsPerSample == 24)
{
// 699050.625 bytes for 24-bit PCM data (???)
CHUNKSIZE = 699051;
}
// Create buffer
data = Marshal.AllocHGlobal(CHUNKSIZE);
buffer = new byte[CHUNKSIZE];
// Loop through file using chunk size
do
{
// Check for cancel
if (m_workerWaveForm.CancellationPending)
{
return;
}
// Check the bits per sample
if (soundFormat.BitsPerSample == 16)
{
// Read data chunk (4096 bytes for 16-bit PCM data)
result = sound.BaseSound.readData(data, (uint)CHUNKSIZE, ref read);
Marshal.Copy(data, buffer, 0, CHUNKSIZE);
bytesread += read;
// Is freehglobal needed? it crashes after one use.
//Marshal.FreeHGlobal(data);
// Convert the byte (8-bit) arrays into a short (16-bit) arrays (signed values)
left16BitArray = new Int16[buffer.Length / 4];
right16BitArray = new Int16[buffer.Length / 4];
// Loop through byte (8-bit) array buffer; increment by 4 (i.e. 4 times more data in 16-bit than 8-bit)
for (int i = 0; i < buffer.Length; i = i + 4)
{
// Convert values to 16-bit
left16BitArray[i / 4] = BitConverter.ToInt16(buffer, i);
right16BitArray[i / 4] = BitConverter.ToInt16(buffer, i + 2); // alternate between left and right channel
}
// Convert the short arrays to float arrays (signed values)
// This will convert the -32768 to 32768 value range to -1 to 1 (useful for wave display)
floatLeft = new float[left16BitArray.Length];
floatRight = new float[left16BitArray.Length];
for (int i = 0; i < left16BitArray.Length; i++)
{
// 16-bit data for unsigned values range from 0 to 65536.
floatLeft[i] = left16BitArray[i] / 65536.0f;
floatRight[i] = right16BitArray[i] / 65536.0f;
}
}
else if (soundFormat.BitsPerSample == 24)
{
// (non-working code removed)
// (I have no idea if this works) Convert the short arrays to float arrays (signed values)
// This will convert the -8388608 to 8388608value range to -1 to 1 (useful for wave display)
floatLeft = new float[left32BitArray.Length];
floatRight = new float[left32BitArray.Length];
for (int i = 0; i < left32BitArray.Length; i++)
{
// 16-bit data for unsigned values range from 0 to 16777215.
floatLeft[i] = left32BitArray[i] / 16777215.0f;
floatRight[i] = right32BitArray[i] / 16777215.0f;
}
}
// Calculate min/max
minMax = AudioTools.GetMinMaxFromWaveData(floatLeft, floatRight, false);
WaveDataHistory.Add(minMax);
// Report progress
m_bytesRead = bytesread;
m_totalBytes = length;
m_percentageDone = ((float)bytesread / (float)length) * 100;
// Write peak information to hard disk
if (generatePeakFile)
{
// Write peak information
binaryWriter.Write((double)minMax.leftMin);
binaryWriter.Write((double)minMax.leftMax);
binaryWriter.Write((double)minMax.rightMin);
binaryWriter.Write((double)minMax.rightMax);
binaryWriter.Write((double)minMax.mixMin);
binaryWriter.Write((double)minMax.mixMax);
}
}
while (result == FMOD.RESULT.OK && read == CHUNKSIZE);
// Release sound from memory
sound.Release();
// Close sound system and release from memory
soundSystem.Close();
soundSystem.Release();
// Set nulls for garbage collection
sound = null;
soundSystem = null;
left16BitArray = null;
right16BitArray = null;
left32BitArray = null;
right32BitArray = null;
floatLeft = null;
floatRight = null;
buffer = null;
minMax = null;
}
catch (Exception ex)
{
throw ex;
}
finally
{
// Did we have to generate a peak file?
if (generatePeakFile)
{
// Close writer and stream
gzipStream.Close();
binaryWriter.Close();
fileStream.Close();
// Set nulls
gzipStream = null;
binaryWriter = null;
fileStream = null;
}
}
// Call garbage collector
GC.Collect();
这是从浮点数组中提取最小/最大值的方法:
/// <summary>
/// This method takes the left channel and right channel wave raw data and analyses it to get
/// the maximum and minimum values in the float structure. It returns a data structure named
/// WaveDataMinMax (see class description for more information). Negative values can be converted to
/// positive values before min and max comparaison. Set this parameter to true for output meters and
/// false for wave form display controls.
/// </summary>
/// <param name="waveDataLeft">Raw wave data (left channel)</param>
/// <param name="waveDataRight">Raw wave data (right channel)</param>
/// <param name="convertNegativeToPositive">Convert negative values to positive values (ex: true when used for output meters,
/// false when used with wave form display controls (since the negative value is used to draw the bottom end of the waveform).<</param>
/// <returns>WaveDataMinMax data structure</returns>
public static WaveDataMinMax GetMinMaxFromWaveData(float[] waveDataLeft, float[] waveDataRight, bool convertNegativeToPositive)
{
// Create default data
WaveDataMinMax data = new WaveDataMinMax();
// Loop through values to get min/max
for (int i = 0; i < waveDataLeft.Length; i++)
{
// Set values to compare
float left = waveDataLeft[i];
float right = waveDataRight[i];
// Do we have to convert values before comparaison?
if (convertNegativeToPositive)
{
// Compare values, if negative then remove negative sign
if (left < 0)
{
left = -left;
}
if (right < 0)
{
right = -right;
}
}
// Calculate min/max for left channel
if (left < data.leftMin)
{
data.leftMin = left;
}
if (left > data.leftMax)
{
data.leftMax = left;
}
// Calculate min/max for right channel
if (right < data.rightMin)
{
data.rightMin = right;
}
if (right > data.rightMax)
{
data.rightMax = right;
}
// Calculate min/max mixing both channels
if (left < data.mixMin)
{
data.mixMin = left;
}
if (right < data.mixMin)
{
data.mixMin = right;
}
if (left > data.mixMax)
{
data.mixMax = left;
}
if (right > data.mixMax)
{
data.mixMax = right;
}
}
return data;
}
任何人都可以给我一个关于该怎么做的提示吗?我希望我的代码不太糟糕并且可以用作 16 位文件的示例。感谢您的帮助!
编辑:
这是使用 32 位变量转换代码从 3x8 位到 24 位:
left32BitArray = new Int32[buffer.Length / 6];
right32BitArray = new Int32[buffer.Length / 6];
for (int i = 0; i < buffer.Length; i = i + 6)
{
// Create smaller array in order to add the 4th 8-bit value
byte[] byteArrayLeft = new byte[4] {buffer[i], buffer[i + 1], buffer[i + 2], 0 };
byte[] byteArrayRight = new byte[4] { buffer[i + 3], buffer[i + 4], buffer[i + 5], 0 };
// Convert values to 32-bit variables
left32BitArray[i / 6] = BitConverter.ToInt32(byteArrayLeft, 0);
right32BitArray[i / 6] = BitConverter.ToInt32(byteArrayRight, 0);
}
I used readData successfully to read 16-bit audio files and generate peak files for wave form display. However, I'm having some trouble interpreting PCM values for 24-bit FLAC and WAV files.
First, what is the block size for 24-bit?
16-bit signed values ranges from -32768 to +32768 and 24-bit ranges from -8388607 to +8388607.
I used 4096 bytes block size for 16-bit files (65536 / 16 = 4096). It works fine for detecting peaks.
If I do the same calculation with 24-bit, 16777215 / 24 = 699050.625 bytes. Am I mistaken? I guess I have to use 32-bit variables to store the 24-bit values. But what block size should I use when reading the file? 699051? How do I adjust the conversion to float arrays?
Here is the full C# code I'm using to generate peak files for 16-bit PCM data. I've left the 24-bit code empty on purpose since it doesn't work. Some code references my own FMOD wrapper but it should be simple to understand.
// Declare variables
FMOD.RESULT result = FMOD.RESULT.OK;
FileStream fileStream = null;
BinaryWriter binaryWriter = null;
GZipStream gzipStream = null;
bool generatePeakFile = false;
int CHUNKSIZE = 0;
uint length = 0;
uint read = 0;
uint bytesread = 0;
Int16[] left16BitArray = null;
Int16[] right16BitArray = null;
Int32[] left32BitArray = null;
Int32[] right32BitArray = null;
float[] floatLeft = null;
float[] floatRight = null;
byte[] buffer = null;
IntPtr data = new IntPtr(); // initialized properly later
WaveDataMinMax minMax = null;
try
{
// Set current file directory
m_peakFileDirectory = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) + "\\Peak Files\\";
// Get file name from argument
string fileName = (string)e.Argument;
// Create sound system with NOSOUND
MPfm.Sound.System soundSystem = new MPfm.Sound.System(FMOD.OUTPUTTYPE.NOSOUND, string.Empty);
// Create sound
MPfm.Sound.Sound sound = soundSystem.CreateSound(fileName, false);
// Get sound format; specifically bits per sample (changes the calculations later)
SoundFormat soundFormat = sound.GetSoundFormat();
// Get the length of the file in PCM bytes
sound.BaseSound.getLength(ref length, FMOD.TIMEUNIT.PCMBYTES);
// Check if the folder for peak files exists
if (!Directory.Exists(PeakFileDirectory))
{
// Create directory
Directory.CreateDirectory(PeakFileDirectory);
}
// Generate the file name for the peak file by using the full path without special characters
string peakFilePath = PeakFileDirectory + fileName.Replace(@"\", "_").Replace(":", "_").Replace(".", "_") + ".mpfmPeak";
// Check if peak file exists
if(!File.Exists(peakFilePath))
{
// Set flag
generatePeakFile = true;
// Create peak file
fileStream = new FileStream(peakFilePath, FileMode.Create, FileAccess.Write);
binaryWriter = new BinaryWriter(fileStream);
gzipStream = new GZipStream(fileStream, CompressionMode.Compress);
}
// Check the bits per sample to determine what chunk size to get
if (soundFormat.BitsPerSample == 16)
{
// 4096 bytes for 16-bit PCM data
CHUNKSIZE = 4096;
}
else if (soundFormat.BitsPerSample == 24)
{
// 699050.625 bytes for 24-bit PCM data (???)
CHUNKSIZE = 699051;
}
// Create buffer
data = Marshal.AllocHGlobal(CHUNKSIZE);
buffer = new byte[CHUNKSIZE];
// Loop through file using chunk size
do
{
// Check for cancel
if (m_workerWaveForm.CancellationPending)
{
return;
}
// Check the bits per sample
if (soundFormat.BitsPerSample == 16)
{
// Read data chunk (4096 bytes for 16-bit PCM data)
result = sound.BaseSound.readData(data, (uint)CHUNKSIZE, ref read);
Marshal.Copy(data, buffer, 0, CHUNKSIZE);
bytesread += read;
// Is freehglobal needed? it crashes after one use.
//Marshal.FreeHGlobal(data);
// Convert the byte (8-bit) arrays into a short (16-bit) arrays (signed values)
left16BitArray = new Int16[buffer.Length / 4];
right16BitArray = new Int16[buffer.Length / 4];
// Loop through byte (8-bit) array buffer; increment by 4 (i.e. 4 times more data in 16-bit than 8-bit)
for (int i = 0; i < buffer.Length; i = i + 4)
{
// Convert values to 16-bit
left16BitArray[i / 4] = BitConverter.ToInt16(buffer, i);
right16BitArray[i / 4] = BitConverter.ToInt16(buffer, i + 2); // alternate between left and right channel
}
// Convert the short arrays to float arrays (signed values)
// This will convert the -32768 to 32768 value range to -1 to 1 (useful for wave display)
floatLeft = new float[left16BitArray.Length];
floatRight = new float[left16BitArray.Length];
for (int i = 0; i < left16BitArray.Length; i++)
{
// 16-bit data for unsigned values range from 0 to 65536.
floatLeft[i] = left16BitArray[i] / 65536.0f;
floatRight[i] = right16BitArray[i] / 65536.0f;
}
}
else if (soundFormat.BitsPerSample == 24)
{
// (non-working code removed)
// (I have no idea if this works) Convert the short arrays to float arrays (signed values)
// This will convert the -8388608 to 8388608value range to -1 to 1 (useful for wave display)
floatLeft = new float[left32BitArray.Length];
floatRight = new float[left32BitArray.Length];
for (int i = 0; i < left32BitArray.Length; i++)
{
// 16-bit data for unsigned values range from 0 to 16777215.
floatLeft[i] = left32BitArray[i] / 16777215.0f;
floatRight[i] = right32BitArray[i] / 16777215.0f;
}
}
// Calculate min/max
minMax = AudioTools.GetMinMaxFromWaveData(floatLeft, floatRight, false);
WaveDataHistory.Add(minMax);
// Report progress
m_bytesRead = bytesread;
m_totalBytes = length;
m_percentageDone = ((float)bytesread / (float)length) * 100;
// Write peak information to hard disk
if (generatePeakFile)
{
// Write peak information
binaryWriter.Write((double)minMax.leftMin);
binaryWriter.Write((double)minMax.leftMax);
binaryWriter.Write((double)minMax.rightMin);
binaryWriter.Write((double)minMax.rightMax);
binaryWriter.Write((double)minMax.mixMin);
binaryWriter.Write((double)minMax.mixMax);
}
}
while (result == FMOD.RESULT.OK && read == CHUNKSIZE);
// Release sound from memory
sound.Release();
// Close sound system and release from memory
soundSystem.Close();
soundSystem.Release();
// Set nulls for garbage collection
sound = null;
soundSystem = null;
left16BitArray = null;
right16BitArray = null;
left32BitArray = null;
right32BitArray = null;
floatLeft = null;
floatRight = null;
buffer = null;
minMax = null;
}
catch (Exception ex)
{
throw ex;
}
finally
{
// Did we have to generate a peak file?
if (generatePeakFile)
{
// Close writer and stream
gzipStream.Close();
binaryWriter.Close();
fileStream.Close();
// Set nulls
gzipStream = null;
binaryWriter = null;
fileStream = null;
}
}
// Call garbage collector
GC.Collect();
Here is the method that extracts the min/max values from float arrays:
/// <summary>
/// This method takes the left channel and right channel wave raw data and analyses it to get
/// the maximum and minimum values in the float structure. It returns a data structure named
/// WaveDataMinMax (see class description for more information). Negative values can be converted to
/// positive values before min and max comparaison. Set this parameter to true for output meters and
/// false for wave form display controls.
/// </summary>
/// <param name="waveDataLeft">Raw wave data (left channel)</param>
/// <param name="waveDataRight">Raw wave data (right channel)</param>
/// <param name="convertNegativeToPositive">Convert negative values to positive values (ex: true when used for output meters,
/// false when used with wave form display controls (since the negative value is used to draw the bottom end of the waveform).<</param>
/// <returns>WaveDataMinMax data structure</returns>
public static WaveDataMinMax GetMinMaxFromWaveData(float[] waveDataLeft, float[] waveDataRight, bool convertNegativeToPositive)
{
// Create default data
WaveDataMinMax data = new WaveDataMinMax();
// Loop through values to get min/max
for (int i = 0; i < waveDataLeft.Length; i++)
{
// Set values to compare
float left = waveDataLeft[i];
float right = waveDataRight[i];
// Do we have to convert values before comparaison?
if (convertNegativeToPositive)
{
// Compare values, if negative then remove negative sign
if (left < 0)
{
left = -left;
}
if (right < 0)
{
right = -right;
}
}
// Calculate min/max for left channel
if (left < data.leftMin)
{
data.leftMin = left;
}
if (left > data.leftMax)
{
data.leftMax = left;
}
// Calculate min/max for right channel
if (right < data.rightMin)
{
data.rightMin = right;
}
if (right > data.rightMax)
{
data.rightMax = right;
}
// Calculate min/max mixing both channels
if (left < data.mixMin)
{
data.mixMin = left;
}
if (right < data.mixMin)
{
data.mixMin = right;
}
if (left > data.mixMax)
{
data.mixMax = left;
}
if (right > data.mixMax)
{
data.mixMax = right;
}
}
return data;
}
Can anybody give me a hint on what to do? I hope my code isn't too bad and be can used as an example for 16-bit files. Thanks for any help!
EDIT:
Here is the 3x8-bit to 24-bit using a 32-bit variable conversion code:
left32BitArray = new Int32[buffer.Length / 6];
right32BitArray = new Int32[buffer.Length / 6];
for (int i = 0; i < buffer.Length; i = i + 6)
{
// Create smaller array in order to add the 4th 8-bit value
byte[] byteArrayLeft = new byte[4] {buffer[i], buffer[i + 1], buffer[i + 2], 0 };
byte[] byteArrayRight = new byte[4] { buffer[i + 3], buffer[i + 4], buffer[i + 5], 0 };
// Convert values to 32-bit variables
left32BitArray[i / 6] = BitConverter.ToInt32(byteArrayLeft, 0);
right32BitArray[i / 6] = BitConverter.ToInt32(byteArrayRight, 0);
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
24 位音频文件的块对齐为 3 * 通道数。为什么不选择 100 毫秒的音频:
这对于 24 位 WAV 来说效果很好。您的 FLAC 阅读器是否允许您读出该粒度取决于其内部实现。
24 bit audio files have a block align of 3 * number of channels. Why not go for 100ms of audio:
This will work fine for 24 bit WAV. Whether or not your FLAC reader lets you read out to that granularity depends on its internal implementation.