Merge pull request #91014 from DeeJayLSP/qoa-wav-playback

Add QOA (Quite OK Audio) as a WAV compression mode
This commit is contained in:
Rémi Verschelde 2024-05-02 12:46:32 +02:00
commit 9cb3a16a8e
No known key found for this signature in database
GPG key ID: C3336907360768E1
9 changed files with 1059 additions and 45 deletions

View file

@ -411,6 +411,11 @@ Comment: PolyPartition / Triangulator
Copyright: 2011-2021, Ivan Fratric and contributors
License: Expat
Files: ./thirdparty/misc/qoa.h
Comment: Quite OK Audio Format
Copyright: 2023, Dominic Szablewski
License: Expat
Files: ./thirdparty/misc/r128.c
./thirdparty/misc/r128.h
Comment: r128 library

View file

@ -15,7 +15,7 @@
<return type="int" enum="Error" />
<param index="0" name="path" type="String" />
<description>
Saves the AudioStreamWAV as a WAV file to [param path]. Samples with IMA ADPCM format can't be saved.
Saves the AudioStreamWAV as a WAV file to [param path]. Samples with IMA ADPCM or QOA formats can't be saved.
[b]Note:[/b] A [code].wav[/code] extension is automatically appended to [param path] if it is missing.
</description>
</method>
@ -56,6 +56,9 @@
<constant name="FORMAT_IMA_ADPCM" value="2" enum="Format">
Audio is compressed using IMA ADPCM.
</constant>
<constant name="FORMAT_QOA" value="3" enum="Format">
Audio is compressed as QOA ([url=https://qoaformat.org/]Quite OK Audio[/url]).
</constant>
<constant name="LOOP_DISABLED" value="0" enum="LoopMode">
Audio does not loop.
</constant>

View file

@ -14,6 +14,7 @@
The compression mode to use on import.
[b]Disabled:[/b] Imports audio data without any compression. This results in the highest possible quality.
[b]RAM (Ima-ADPCM):[/b] Performs fast lossy compression on import. Low CPU cost, but quality is noticeably decreased compared to Ogg Vorbis or even MP3.
[b]QOA ([url=https://qoaformat.org/]Quite OK Audio[/url]):[/b] Performs lossy compression on import. CPU cost is slightly higher than IMA-ADPCM, but quality is much higher.
</member>
<member name="edit/loop_begin" type="int" setter="" getter="" default="0">
The begin loop point to use when [member edit/loop_mode] is [b]Forward[/b], [b]Ping-Pong[/b] or [b]Backward[/b]. This is set in seconds after the beginning of the audio file.

View file

@ -90,7 +90,7 @@ void ResourceImporterWAV::get_import_options(const String &p_path, List<ImportOp
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "edit/loop_mode", PROPERTY_HINT_ENUM, "Detect From WAV,Disabled,Forward,Ping-Pong,Backward", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), 0));
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "edit/loop_begin"), 0));
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "edit/loop_end"), -1));
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "compress/mode", PROPERTY_HINT_ENUM, "Disabled,RAM (Ima-ADPCM)"), 0));
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "compress/mode", PROPERTY_HINT_ENUM, "Disabled,RAM (Ima-ADPCM),QOA (Quite OK Audio)"), 0));
}
Error ResourceImporterWAV::import(const String &p_source_file, const String &p_save_path, const HashMap<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) {
@ -454,13 +454,13 @@ Error ResourceImporterWAV::import(const String &p_source_file, const String &p_s
is16 = false;
}
Vector<uint8_t> dst_data;
Vector<uint8_t> pcm_data;
AudioStreamWAV::Format dst_format;
if (compression == 1) {
dst_format = AudioStreamWAV::FORMAT_IMA_ADPCM;
if (format_channels == 1) {
_compress_ima_adpcm(data, dst_data);
_compress_ima_adpcm(data, pcm_data);
} else {
//byte interleave
Vector<float> left;
@ -482,9 +482,9 @@ Error ResourceImporterWAV::import(const String &p_source_file, const String &p_s
_compress_ima_adpcm(right, bright);
int dl = bleft.size();
dst_data.resize(dl * 2);
pcm_data.resize(dl * 2);
uint8_t *w = dst_data.ptrw();
uint8_t *w = pcm_data.ptrw();
const uint8_t *rl = bleft.ptr();
const uint8_t *rr = bright.ptr();
@ -496,13 +496,14 @@ Error ResourceImporterWAV::import(const String &p_source_file, const String &p_s
} else {
dst_format = is16 ? AudioStreamWAV::FORMAT_16_BITS : AudioStreamWAV::FORMAT_8_BITS;
dst_data.resize(data.size() * (is16 ? 2 : 1));
bool enforce16 = is16 || compression == 2;
pcm_data.resize(data.size() * (enforce16 ? 2 : 1));
{
uint8_t *w = dst_data.ptrw();
uint8_t *w = pcm_data.ptrw();
int ds = data.size();
for (int i = 0; i < ds; i++) {
if (is16) {
if (enforce16) {
int16_t v = CLAMP(data[i] * 32768, -32768, 32767);
encode_uint16(v, &w[i * 2]);
} else {
@ -513,6 +514,23 @@ Error ResourceImporterWAV::import(const String &p_source_file, const String &p_s
}
}
Vector<uint8_t> dst_data;
if (compression == 2) {
dst_format = AudioStreamWAV::FORMAT_QOA;
qoa_desc desc = { 0, 0, 0, { { { 0 }, { 0 } } } };
uint32_t qoa_len = 0;
desc.samplerate = rate;
desc.samples = frames;
desc.channels = format_channels;
void *encoded = qoa_encode((short *)pcm_data.ptrw(), &desc, &qoa_len);
dst_data.resize(qoa_len);
memcpy(dst_data.ptrw(), encoded, qoa_len);
} else {
dst_data = pcm_data;
}
Ref<AudioStreamWAV> sample;
sample.instantiate();
sample->set_data(dst_data);

View file

@ -86,15 +86,15 @@ void AudioStreamPlaybackWAV::seek(double p_time) {
offset = uint64_t(p_time * base->mix_rate) << MIX_FRAC_BITS;
}
template <typename Depth, bool is_stereo, bool is_ima_adpcm>
void AudioStreamPlaybackWAV::do_resample(const Depth *p_src, AudioFrame *p_dst, int64_t &p_offset, int32_t &p_increment, uint32_t p_amount, IMA_ADPCM_State *p_ima_adpcm) {
template <typename Depth, bool is_stereo, bool is_ima_adpcm, bool is_qoa>
void AudioStreamPlaybackWAV::do_resample(const Depth *p_src, AudioFrame *p_dst, int64_t &p_offset, int32_t &p_increment, uint32_t p_amount, IMA_ADPCM_State *p_ima_adpcm, QOA_State *p_qoa) {
// this function will be compiled branchless by any decent compiler
int32_t final, final_r, next, next_r;
int32_t final = 0, final_r = 0, next = 0, next_r = 0;
while (p_amount) {
p_amount--;
int64_t pos = p_offset >> MIX_FRAC_BITS;
if (is_stereo && !is_ima_adpcm) {
if (is_stereo && !is_ima_adpcm && !is_qoa) {
pos <<= 1;
}
@ -175,32 +175,77 @@ void AudioStreamPlaybackWAV::do_resample(const Depth *p_src, AudioFrame *p_dst,
}
} else {
final = p_src[pos];
if (is_stereo) {
final_r = p_src[pos + 1];
}
if (is_qoa) {
if (pos != p_qoa->cache_pos) { // Prevents triple decoding on lower mix rates.
for (int i = 0; i < 2; i++) {
// Sign operations prevent triple decoding on backward loops, maxing prevents pop.
uint32_t interp_pos = MIN(pos + (i * sign) + (sign < 0), p_qoa->desc->samples - 1);
uint32_t new_data_ofs = 8 + interp_pos / QOA_FRAME_LEN * p_qoa->frame_len;
if constexpr (sizeof(Depth) == 1) { /* conditions will not exist anymore when compiled! */
final <<= 8;
if (is_stereo) {
final_r <<= 8;
if (p_qoa->data_ofs != new_data_ofs) {
p_qoa->data_ofs = new_data_ofs;
const uint8_t *src_ptr = (const uint8_t *)base->data;
src_ptr += p_qoa->data_ofs + AudioStreamWAV::DATA_PAD;
qoa_decode_frame(src_ptr, p_qoa->frame_len, p_qoa->desc, p_qoa->dec, &p_qoa->dec_len);
}
uint32_t dec_idx = (interp_pos % QOA_FRAME_LEN) * p_qoa->desc->channels;
if ((sign > 0 && i == 0) || (sign < 0 && i == 1)) {
final = p_qoa->dec[dec_idx];
p_qoa->cache[0] = final;
if (is_stereo) {
final_r = p_qoa->dec[dec_idx + 1];
p_qoa->cache_r[0] = final_r;
}
} else {
next = p_qoa->dec[dec_idx];
p_qoa->cache[1] = next;
if (is_stereo) {
next_r = p_qoa->dec[dec_idx + 1];
p_qoa->cache_r[1] = next_r;
}
}
}
p_qoa->cache_pos = pos;
} else {
final = p_qoa->cache[0];
if (is_stereo) {
final_r = p_qoa->cache_r[0];
}
next = p_qoa->cache[1];
if (is_stereo) {
next_r = p_qoa->cache_r[1];
}
}
}
if (is_stereo) {
next = p_src[pos + 2];
next_r = p_src[pos + 3];
} else {
next = p_src[pos + 1];
}
if constexpr (sizeof(Depth) == 1) {
next <<= 8;
final = p_src[pos];
if (is_stereo) {
next_r <<= 8;
final_r = p_src[pos + 1];
}
if constexpr (sizeof(Depth) == 1) { /* conditions will not exist anymore when compiled! */
final <<= 8;
if (is_stereo) {
final_r <<= 8;
}
}
if (is_stereo) {
next = p_src[pos + 2];
next_r = p_src[pos + 3];
} else {
next = p_src[pos + 1];
}
if constexpr (sizeof(Depth) == 1) {
next <<= 8;
if (is_stereo) {
next_r <<= 8;
}
}
}
int32_t frac = int64_t(p_offset & MIX_FRAC_MASK);
final = final + ((next - final) * frac >> MIX_FRAC_BITS);
@ -240,6 +285,9 @@ int AudioStreamPlaybackWAV::mix(AudioFrame *p_buffer, float p_rate_scale, int p_
case AudioStreamWAV::FORMAT_IMA_ADPCM:
len *= 2;
break;
case AudioStreamWAV::FORMAT_QOA:
len = qoa.desc->samples * qoa.desc->channels;
break;
}
if (base->stereo) {
@ -368,27 +416,34 @@ int AudioStreamPlaybackWAV::mix(AudioFrame *p_buffer, float p_rate_scale, int p_
switch (base->format) {
case AudioStreamWAV::FORMAT_8_BITS: {
if (is_stereo) {
do_resample<int8_t, true, false>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm);
do_resample<int8_t, true, false, false>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
} else {
do_resample<int8_t, false, false>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm);
do_resample<int8_t, false, false, false>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
}
} break;
case AudioStreamWAV::FORMAT_16_BITS: {
if (is_stereo) {
do_resample<int16_t, true, false>((int16_t *)data, dst_buff, offset, increment, target, ima_adpcm);
do_resample<int16_t, true, false, false>((int16_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
} else {
do_resample<int16_t, false, false>((int16_t *)data, dst_buff, offset, increment, target, ima_adpcm);
do_resample<int16_t, false, false, false>((int16_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
}
} break;
case AudioStreamWAV::FORMAT_IMA_ADPCM: {
if (is_stereo) {
do_resample<int8_t, true, true>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm);
do_resample<int8_t, true, true, false>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
} else {
do_resample<int8_t, false, true>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm);
do_resample<int8_t, false, true, false>((int8_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
}
} break;
case AudioStreamWAV::FORMAT_QOA: {
if (is_stereo) {
do_resample<uint8_t, true, false, true>((uint8_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
} else {
do_resample<uint8_t, false, false, true>((uint8_t *)data, dst_buff, offset, increment, target, ima_adpcm, &qoa);
}
} break;
}
dst_buff += target;
@ -412,6 +467,16 @@ void AudioStreamPlaybackWAV::tag_used_streams() {
AudioStreamPlaybackWAV::AudioStreamPlaybackWAV() {}
AudioStreamPlaybackWAV::~AudioStreamPlaybackWAV() {
if (qoa.desc) {
memfree(qoa.desc);
}
if (qoa.dec) {
memfree(qoa.dec);
}
}
/////////////////////
void AudioStreamWAV::set_format(Format p_format) {
@ -475,6 +540,10 @@ double AudioStreamWAV::get_length() const {
case AudioStreamWAV::FORMAT_IMA_ADPCM:
len *= 2;
break;
case AudioStreamWAV::FORMAT_QOA:
qoa_desc desc = { 0, 0, 0, { { { 0 }, { 0 } } } };
qoa_decode_header((uint8_t *)data + DATA_PAD, QOA_MIN_FILESIZE, &desc);
len = desc.samples * desc.channels;
}
if (stereo) {
@ -526,8 +595,8 @@ Vector<uint8_t> AudioStreamWAV::get_data() const {
}
Error AudioStreamWAV::save_to_wav(const String &p_path) {
if (format == AudioStreamWAV::FORMAT_IMA_ADPCM) {
WARN_PRINT("Saving IMA_ADPC samples are not supported yet");
if (format == AudioStreamWAV::FORMAT_IMA_ADPCM || format == AudioStreamWAV::FORMAT_QOA) {
WARN_PRINT("Saving IMA_ADPCM and QOA samples is not supported yet");
return ERR_UNAVAILABLE;
}
@ -548,6 +617,7 @@ Error AudioStreamWAV::save_to_wav(const String &p_path) {
byte_pr_sample = 1;
break;
case AudioStreamWAV::FORMAT_16_BITS:
case AudioStreamWAV::FORMAT_QOA:
byte_pr_sample = 2;
break;
case AudioStreamWAV::FORMAT_IMA_ADPCM:
@ -590,6 +660,7 @@ Error AudioStreamWAV::save_to_wav(const String &p_path) {
}
break;
case AudioStreamWAV::FORMAT_16_BITS:
case AudioStreamWAV::FORMAT_QOA:
for (unsigned int i = 0; i < data_bytes / 2; i++) {
uint16_t data_point = decode_uint16(&read_data[i * 2]);
file->store_16(data_point);
@ -607,6 +678,16 @@ Ref<AudioStreamPlayback> AudioStreamWAV::instantiate_playback() {
Ref<AudioStreamPlaybackWAV> sample;
sample.instantiate();
sample->base = Ref<AudioStreamWAV>(this);
if (format == AudioStreamWAV::FORMAT_QOA) {
sample->qoa.desc = (qoa_desc *)memalloc(sizeof(qoa_desc));
qoa_decode_header((uint8_t *)data + DATA_PAD, QOA_MIN_FILESIZE, sample->qoa.desc);
sample->qoa.frame_len = qoa_max_frame_size(sample->qoa.desc);
int samples_len = (sample->qoa.desc->samples > QOA_FRAME_LEN ? QOA_FRAME_LEN : sample->qoa.desc->samples);
int alloc_len = sample->qoa.desc->channels * samples_len * sizeof(int16_t);
sample->qoa.dec = (int16_t *)memalloc(alloc_len);
}
return sample;
}
@ -639,7 +720,7 @@ void AudioStreamWAV::_bind_methods() {
ClassDB::bind_method(D_METHOD("save_to_wav", "path"), &AudioStreamWAV::save_to_wav);
ADD_PROPERTY(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "data", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR), "set_data", "get_data");
ADD_PROPERTY(PropertyInfo(Variant::INT, "format", PROPERTY_HINT_ENUM, "8-Bit,16-Bit,IMA-ADPCM"), "set_format", "get_format");
ADD_PROPERTY(PropertyInfo(Variant::INT, "format", PROPERTY_HINT_ENUM, "8-Bit,16-Bit,IMA-ADPCM,QOA"), "set_format", "get_format");
ADD_PROPERTY(PropertyInfo(Variant::INT, "loop_mode", PROPERTY_HINT_ENUM, "Disabled,Forward,Ping-Pong,Backward"), "set_loop_mode", "get_loop_mode");
ADD_PROPERTY(PropertyInfo(Variant::INT, "loop_begin"), "set_loop_begin", "get_loop_begin");
ADD_PROPERTY(PropertyInfo(Variant::INT, "loop_end"), "set_loop_end", "get_loop_end");
@ -649,6 +730,7 @@ void AudioStreamWAV::_bind_methods() {
BIND_ENUM_CONSTANT(FORMAT_8_BITS);
BIND_ENUM_CONSTANT(FORMAT_16_BITS);
BIND_ENUM_CONSTANT(FORMAT_IMA_ADPCM);
BIND_ENUM_CONSTANT(FORMAT_QOA);
BIND_ENUM_CONSTANT(LOOP_DISABLED);
BIND_ENUM_CONSTANT(LOOP_FORWARD);

View file

@ -31,7 +31,11 @@
#ifndef AUDIO_STREAM_WAV_H
#define AUDIO_STREAM_WAV_H
#define QOA_IMPLEMENTATION
#define QOA_NO_STDIO
#include "servers/audio/audio_stream.h"
#include "thirdparty/misc/qoa.h"
class AudioStreamWAV;
@ -54,14 +58,25 @@ class AudioStreamPlaybackWAV : public AudioStreamPlayback {
int32_t window_ofs = 0;
} ima_adpcm[2];
struct QOA_State {
qoa_desc *desc = nullptr;
uint32_t data_ofs = 0;
uint32_t frame_len = 0;
int16_t *dec = nullptr;
uint32_t dec_len = 0;
int64_t cache_pos = -1;
int16_t cache[2] = { 0, 0 };
int16_t cache_r[2] = { 0, 0 };
} qoa;
int64_t offset = 0;
int sign = 1;
bool active = false;
friend class AudioStreamWAV;
Ref<AudioStreamWAV> base;
template <typename Depth, bool is_stereo, bool is_ima_adpcm>
void do_resample(const Depth *p_src, AudioFrame *p_dst, int64_t &p_offset, int32_t &p_increment, uint32_t p_amount, IMA_ADPCM_State *p_ima_adpcm);
template <typename Depth, bool is_stereo, bool is_ima_adpcm, bool is_qoa>
void do_resample(const Depth *p_src, AudioFrame *p_dst, int64_t &p_offset, int32_t &p_increment, uint32_t p_amount, IMA_ADPCM_State *p_ima_adpcm, QOA_State *p_qoa);
public:
virtual void start(double p_from_pos = 0.0) override;
@ -78,6 +93,7 @@ public:
virtual void tag_used_streams() override;
AudioStreamPlaybackWAV();
~AudioStreamPlaybackWAV();
};
class AudioStreamWAV : public AudioStream {
@ -88,7 +104,8 @@ public:
enum Format {
FORMAT_8_BITS,
FORMAT_16_BITS,
FORMAT_IMA_ADPCM
FORMAT_IMA_ADPCM,
FORMAT_QOA,
};
// Keep the ResourceImporterWAV `edit/loop_mode` enum hint in sync with these options.

View file

@ -679,6 +679,11 @@ Collection of single-file libraries used in Godot components.
* Version: git (7bdffb428b2b19ad1c43aa44c714dcc104177e84, 2021)
* Modifications: Change from STL to Godot types (see provided patch).
* License: MIT
- `qoa.h`
* Upstream: https://github.com/phoboslab/qoa
* Version: git (e4c751d61af2c395ea828c5888e728c1953bf09f, 2024)
* Modifications: Inlined functions and patched compiler warnings.
* License: MIT
- `r128.{c,h}`
* Upstream: https://github.com/fahickman/r128
* Version: git (6fc177671c47640d5bb69af10cf4ee91050015a1, 2023)

View file

@ -0,0 +1,155 @@
diff --git a/qoa.h b/qoa.h
index aa8fb59434..2dde8df098 100644
--- a/qoa.h
+++ b/qoa.h
@@ -140,14 +140,14 @@ typedef struct {
#endif
} qoa_desc;
-unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes);
-unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes);
-void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len);
+inline unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes);
+inline unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes);
+inline void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len);
-unsigned int qoa_max_frame_size(qoa_desc *qoa);
-unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa);
-unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len);
-short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *file);
+inline unsigned int qoa_max_frame_size(qoa_desc *qoa);
+inline unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa);
+inline unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len);
+inline short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *file);
#ifndef QOA_NO_STDIO
@@ -366,7 +366,7 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
), bytes, &p);
- for (int c = 0; c < channels; c++) {
+ for (unsigned int c = 0; c < channels; c++) {
/* Write the current LMS state */
qoa_uint64_t weights = 0;
qoa_uint64_t history = 0;
@@ -380,9 +380,9 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
/* We encode all samples with the channels interleaved on a slice level.
E.g. for stereo: (ch-0, slice 0), (ch 1, slice 0), (ch 0, slice 1), ...*/
- for (int sample_index = 0; sample_index < frame_len; sample_index += QOA_SLICE_LEN) {
+ for (unsigned int sample_index = 0; sample_index < frame_len; sample_index += QOA_SLICE_LEN) {
- for (int c = 0; c < channels; c++) {
+ for (unsigned int c = 0; c < channels; c++) {
int slice_len = qoa_clamp(QOA_SLICE_LEN, 0, frame_len - sample_index);
int slice_start = sample_index * channels + c;
int slice_end = (sample_index + slice_len) * channels + c;
@@ -391,10 +391,9 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
16 scalefactors, encode all samples for the current slice and
meassure the total squared error. */
qoa_uint64_t best_rank = -1;
- qoa_uint64_t best_error = -1;
- qoa_uint64_t best_slice;
- qoa_lms_t best_lms;
- int best_scalefactor;
+ qoa_uint64_t best_slice = -1;
+ qoa_lms_t best_lms = {{-1, -1, -1, -1}, {-1, -1, -1, -1}};
+ int best_scalefactor = -1;
for (int sfi = 0; sfi < 16; sfi++) {
/* There is a strong correlation between the scalefactors of
@@ -408,7 +407,6 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
qoa_lms_t lms = qoa->lms[c];
qoa_uint64_t slice = scalefactor;
qoa_uint64_t current_rank = 0;
- qoa_uint64_t current_error = 0;
for (int si = slice_start; si < slice_end; si += channels) {
int sample = sample_data[si];
@@ -438,7 +436,6 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
qoa_uint64_t error_sq = error * error;
current_rank += error_sq + weights_penalty * weights_penalty;
- current_error += error_sq;
if (current_rank > best_rank) {
break;
}
@@ -449,7 +446,6 @@ unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned
if (current_rank < best_rank) {
best_rank = current_rank;
- best_error = current_error;
best_slice = slice;
best_lms = lms;
best_scalefactor = scalefactor;
@@ -492,9 +488,9 @@ void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len)
num_frames * QOA_LMS_LEN * 4 * qoa->channels + /* 4 * 4 bytes lms state per channel */
num_slices * 8 * qoa->channels; /* 8 byte slices */
- unsigned char *bytes = QOA_MALLOC(encoded_size);
+ unsigned char *bytes = (unsigned char *)QOA_MALLOC(encoded_size);
- for (int c = 0; c < qoa->channels; c++) {
+ for (unsigned int c = 0; c < qoa->channels; c++) {
/* Set the initial LMS weights to {0, 0, -1, 2}. This helps with the
prediction of the first few ms of a file. */
qoa->lms[c].weights[0] = 0;
@@ -517,7 +513,7 @@ void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len)
#endif
int frame_len = QOA_FRAME_LEN;
- for (int sample_index = 0; sample_index < qoa->samples; sample_index += frame_len) {
+ for (unsigned int sample_index = 0; sample_index < qoa->samples; sample_index += frame_len) {
frame_len = qoa_clamp(QOA_FRAME_LEN, 0, qoa->samples - sample_index);
const short *frame_samples = sample_data + sample_index * qoa->channels;
unsigned int frame_size = qoa_encode_frame(frame_samples, qoa, frame_len, bytes + p);
@@ -580,14 +576,14 @@ unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa
/* Read and verify the frame header */
qoa_uint64_t frame_header = qoa_read_u64(bytes, &p);
- int channels = (frame_header >> 56) & 0x0000ff;
- int samplerate = (frame_header >> 32) & 0xffffff;
- int samples = (frame_header >> 16) & 0x00ffff;
- int frame_size = (frame_header ) & 0x00ffff;
+ unsigned int channels = (frame_header >> 56) & 0x0000ff;
+ unsigned int samplerate = (frame_header >> 32) & 0xffffff;
+ unsigned int samples = (frame_header >> 16) & 0x00ffff;
+ unsigned int frame_size = (frame_header ) & 0x00ffff;
int data_size = frame_size - 8 - QOA_LMS_LEN * 4 * channels;
int num_slices = data_size / 8;
- int max_total_samples = num_slices * QOA_SLICE_LEN;
+ unsigned int max_total_samples = num_slices * QOA_SLICE_LEN;
if (
channels != qoa->channels ||
@@ -600,7 +596,7 @@ unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa
/* Read the LMS state: 4 x 2 bytes history, 4 x 2 bytes weights per channel */
- for (int c = 0; c < channels; c++) {
+ for (unsigned int c = 0; c < channels; c++) {
qoa_uint64_t history = qoa_read_u64(bytes, &p);
qoa_uint64_t weights = qoa_read_u64(bytes, &p);
for (int i = 0; i < QOA_LMS_LEN; i++) {
@@ -613,8 +609,8 @@ unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa
/* Decode all slices for all channels in this frame */
- for (int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) {
- for (int c = 0; c < channels; c++) {
+ for (unsigned int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) {
+ for (unsigned int c = 0; c < channels; c++) {
qoa_uint64_t slice = qoa_read_u64(bytes, &p);
int scalefactor = (slice >> 60) & 0xf;
@@ -647,7 +643,7 @@ short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *qoa) {
/* Calculate the required size of the sample buffer and allocate */
int total_samples = qoa->samples * qoa->channels;
- short *sample_data = QOA_MALLOC(total_samples * sizeof(short));
+ short *sample_data = (short *)QOA_MALLOC(total_samples * sizeof(short));
unsigned int sample_index = 0;
unsigned int frame_len;

728
thirdparty/misc/qoa.h vendored Normal file
View file

@ -0,0 +1,728 @@
/*
Copyright (c) 2023, Dominic Szablewski - https://phoboslab.org
SPDX-License-Identifier: MIT
QOA - The "Quite OK Audio" format for fast, lossy audio compression
-- Data Format
QOA encodes pulse-code modulated (PCM) audio data with up to 255 channels,
sample rates from 1 up to 16777215 hertz and a bit depth of 16 bits.
The compression method employed in QOA is lossy; it discards some information
from the uncompressed PCM data. For many types of audio signals this compression
is "transparent", i.e. the difference from the original file is often not
audible.
QOA encodes 20 samples of 16 bit PCM data into slices of 64 bits. A single
sample therefore requires 3.2 bits of storage space, resulting in a 5x
compression (16 / 3.2).
A QOA file consists of an 8 byte file header, followed by a number of frames.
Each frame contains an 8 byte frame header, the current 16 byte en-/decoder
state per channel and 256 slices per channel. Each slice is 8 bytes wide and
encodes 20 samples of audio data.
All values, including the slices, are big endian. The file layout is as follows:
struct {
struct {
char magic[4]; // magic bytes "qoaf"
uint32_t samples; // samples per channel in this file
} file_header;
struct {
struct {
uint8_t num_channels; // no. of channels
uint24_t samplerate; // samplerate in hz
uint16_t fsamples; // samples per channel in this frame
uint16_t fsize; // frame size (includes this header)
} frame_header;
struct {
int16_t history[4]; // most recent last
int16_t weights[4]; // most recent last
} lms_state[num_channels];
qoa_slice_t slices[256][num_channels];
} frames[ceil(samples / (256 * 20))];
} qoa_file_t;
Each `qoa_slice_t` contains a quantized scalefactor `sf_quant` and 20 quantized
residuals `qrNN`:
.- QOA_SLICE -- 64 bits, 20 samples --------------------------/ /------------.
| Byte[0] | Byte[1] | Byte[2] \ \ Byte[7] |
| 7 6 5 4 3 2 1 0 | 7 6 5 4 3 2 1 0 | 7 6 5 / / 2 1 0 |
|------------+--------+--------+--------+---------+---------+-\ \--+---------|
| sf_quant | qr00 | qr01 | qr02 | qr03 | qr04 | / / | qr19 |
`-------------------------------------------------------------\ \------------`
Each frame except the last must contain exactly 256 slices per channel. The last
frame may contain between 1 .. 256 (inclusive) slices per channel. The last
slice (for each channel) in the last frame may contain less than 20 samples; the
slice still must be 8 bytes wide, with the unused samples zeroed out.
Channels are interleaved per slice. E.g. for 2 channel stereo:
slice[0] = L, slice[1] = R, slice[2] = L, slice[3] = R ...
A valid QOA file or stream must have at least one frame. Each frame must contain
at least one channel and one sample with a samplerate between 1 .. 16777215
(inclusive).
If the total number of samples is not known by the encoder, the samples in the
file header may be set to 0x00000000 to indicate that the encoder is
"streaming". In a streaming context, the samplerate and number of channels may
differ from frame to frame. For static files (those with samples set to a
non-zero value), each frame must have the same number of channels and same
samplerate.
Note that this implementation of QOA only handles files with a known total
number of samples.
A decoder should support at least 8 channels. The channel layout for channel
counts 1 .. 8 is:
1. Mono
2. L, R
3. L, R, C
4. FL, FR, B/SL, B/SR
5. FL, FR, C, B/SL, B/SR
6. FL, FR, C, LFE, B/SL, B/SR
7. FL, FR, C, LFE, B, SL, SR
8. FL, FR, C, LFE, BL, BR, SL, SR
QOA predicts each audio sample based on the previously decoded ones using a
"Sign-Sign Least Mean Squares Filter" (LMS). This prediction plus the
dequantized residual forms the final output sample.
*/
/* -----------------------------------------------------------------------------
Header - Public functions */
#ifndef QOA_H
#define QOA_H
#ifdef __cplusplus
extern "C" {
#endif
#define QOA_MIN_FILESIZE 16
#define QOA_MAX_CHANNELS 8
#define QOA_SLICE_LEN 20
#define QOA_SLICES_PER_FRAME 256
#define QOA_FRAME_LEN (QOA_SLICES_PER_FRAME * QOA_SLICE_LEN)
#define QOA_LMS_LEN 4
#define QOA_MAGIC 0x716f6166 /* 'qoaf' */
#define QOA_FRAME_SIZE(channels, slices) \
(8 + QOA_LMS_LEN * 4 * channels + 8 * slices * channels)
typedef struct {
int history[QOA_LMS_LEN];
int weights[QOA_LMS_LEN];
} qoa_lms_t;
typedef struct {
unsigned int channels;
unsigned int samplerate;
unsigned int samples;
qoa_lms_t lms[QOA_MAX_CHANNELS];
#ifdef QOA_RECORD_TOTAL_ERROR
double error;
#endif
} qoa_desc;
inline unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes);
inline unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes);
inline void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len);
inline unsigned int qoa_max_frame_size(qoa_desc *qoa);
inline unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa);
inline unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len);
inline short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *file);
#ifndef QOA_NO_STDIO
int qoa_write(const char *filename, const short *sample_data, qoa_desc *qoa);
void *qoa_read(const char *filename, qoa_desc *qoa);
#endif /* QOA_NO_STDIO */
#ifdef __cplusplus
}
#endif
#endif /* QOA_H */
/* -----------------------------------------------------------------------------
Implementation */
#ifdef QOA_IMPLEMENTATION
#include <stdlib.h>
#ifndef QOA_MALLOC
#define QOA_MALLOC(sz) malloc(sz)
#define QOA_FREE(p) free(p)
#endif
typedef unsigned long long qoa_uint64_t;
/* The quant_tab provides an index into the dequant_tab for residuals in the
range of -8 .. 8. It maps this range to just 3bits and becomes less accurate at
the higher end. Note that the residual zero is identical to the lowest positive
value. This is mostly fine, since the qoa_div() function always rounds away
from zero. */
static const int qoa_quant_tab[17] = {
7, 7, 7, 5, 5, 3, 3, 1, /* -8..-1 */
0, /* 0 */
0, 2, 2, 4, 4, 6, 6, 6 /* 1.. 8 */
};
/* We have 16 different scalefactors. Like the quantized residuals these become
less accurate at the higher end. In theory, the highest scalefactor that we
would need to encode the highest 16bit residual is (2**16)/8 = 8192. However we
rely on the LMS filter to predict samples accurately enough that a maximum
residual of one quarter of the 16 bit range is sufficient. I.e. with the
scalefactor 2048 times the quant range of 8 we can encode residuals up to 2**14.
The scalefactor values are computed as:
scalefactor_tab[s] <- round(pow(s + 1, 2.75)) */
static const int qoa_scalefactor_tab[16] = {
1, 7, 21, 45, 84, 138, 211, 304, 421, 562, 731, 928, 1157, 1419, 1715, 2048
};
/* The reciprocal_tab maps each of the 16 scalefactors to their rounded
reciprocals 1/scalefactor. This allows us to calculate the scaled residuals in
the encoder with just one multiplication instead of an expensive division. We
do this in .16 fixed point with integers, instead of floats.
The reciprocal_tab is computed as:
reciprocal_tab[s] <- ((1<<16) + scalefactor_tab[s] - 1) / scalefactor_tab[s] */
static const int qoa_reciprocal_tab[16] = {
65536, 9363, 3121, 1457, 781, 475, 311, 216, 156, 117, 90, 71, 57, 47, 39, 32
};
/* The dequant_tab maps each of the scalefactors and quantized residuals to
their unscaled & dequantized version.
Since qoa_div rounds away from the zero, the smallest entries are mapped to 3/4
instead of 1. The dequant_tab assumes the following dequantized values for each
of the quant_tab indices and is computed as:
float dqt[8] = {0.75, -0.75, 2.5, -2.5, 4.5, -4.5, 7, -7};
dequant_tab[s][q] <- round_ties_away_from_zero(scalefactor_tab[s] * dqt[q])
The rounding employed here is "to nearest, ties away from zero", i.e. positive
and negative values are treated symmetrically.
*/
static const int qoa_dequant_tab[16][8] = {
{ 1, -1, 3, -3, 5, -5, 7, -7},
{ 5, -5, 18, -18, 32, -32, 49, -49},
{ 16, -16, 53, -53, 95, -95, 147, -147},
{ 34, -34, 113, -113, 203, -203, 315, -315},
{ 63, -63, 210, -210, 378, -378, 588, -588},
{ 104, -104, 345, -345, 621, -621, 966, -966},
{ 158, -158, 528, -528, 950, -950, 1477, -1477},
{ 228, -228, 760, -760, 1368, -1368, 2128, -2128},
{ 316, -316, 1053, -1053, 1895, -1895, 2947, -2947},
{ 422, -422, 1405, -1405, 2529, -2529, 3934, -3934},
{ 548, -548, 1828, -1828, 3290, -3290, 5117, -5117},
{ 696, -696, 2320, -2320, 4176, -4176, 6496, -6496},
{ 868, -868, 2893, -2893, 5207, -5207, 8099, -8099},
{1064, -1064, 3548, -3548, 6386, -6386, 9933, -9933},
{1286, -1286, 4288, -4288, 7718, -7718, 12005, -12005},
{1536, -1536, 5120, -5120, 9216, -9216, 14336, -14336},
};
/* The Least Mean Squares Filter is the heart of QOA. It predicts the next
sample based on the previous 4 reconstructed samples. It does so by continuously
adjusting 4 weights based on the residual of the previous prediction.
The next sample is predicted as the sum of (weight[i] * history[i]).
The adjustment of the weights is done with a "Sign-Sign-LMS" that adds or
subtracts the residual to each weight, based on the corresponding sample from
the history. This, surprisingly, is sufficient to get worthwhile predictions.
This is all done with fixed point integers. Hence the right-shifts when updating
the weights and calculating the prediction. */
static int qoa_lms_predict(qoa_lms_t *lms) {
int prediction = 0;
for (int i = 0; i < QOA_LMS_LEN; i++) {
prediction += lms->weights[i] * lms->history[i];
}
return prediction >> 13;
}
static void qoa_lms_update(qoa_lms_t *lms, int sample, int residual) {
int delta = residual >> 4;
for (int i = 0; i < QOA_LMS_LEN; i++) {
lms->weights[i] += lms->history[i] < 0 ? -delta : delta;
}
for (int i = 0; i < QOA_LMS_LEN-1; i++) {
lms->history[i] = lms->history[i+1];
}
lms->history[QOA_LMS_LEN-1] = sample;
}
/* qoa_div() implements a rounding division, but avoids rounding to zero for
small numbers. E.g. 0.1 will be rounded to 1. Note that 0 itself still
returns as 0, which is handled in the qoa_quant_tab[].
qoa_div() takes an index into the .16 fixed point qoa_reciprocal_tab as an
argument, so it can do the division with a cheaper integer multiplication. */
static inline int qoa_div(int v, int scalefactor) {
int reciprocal = qoa_reciprocal_tab[scalefactor];
int n = (v * reciprocal + (1 << 15)) >> 16;
n = n + ((v > 0) - (v < 0)) - ((n > 0) - (n < 0)); /* round away from 0 */
return n;
}
static inline int qoa_clamp(int v, int min, int max) {
if (v < min) { return min; }
if (v > max) { return max; }
return v;
}
/* This specialized clamp function for the signed 16 bit range improves decode
performance quite a bit. The extra if() statement works nicely with the CPUs
branch prediction as this branch is rarely taken. */
static inline int qoa_clamp_s16(int v) {
if ((unsigned int)(v + 32768) > 65535) {
if (v < -32768) { return -32768; }
if (v > 32767) { return 32767; }
}
return v;
}
static inline qoa_uint64_t qoa_read_u64(const unsigned char *bytes, unsigned int *p) {
bytes += *p;
*p += 8;
return
((qoa_uint64_t)(bytes[0]) << 56) | ((qoa_uint64_t)(bytes[1]) << 48) |
((qoa_uint64_t)(bytes[2]) << 40) | ((qoa_uint64_t)(bytes[3]) << 32) |
((qoa_uint64_t)(bytes[4]) << 24) | ((qoa_uint64_t)(bytes[5]) << 16) |
((qoa_uint64_t)(bytes[6]) << 8) | ((qoa_uint64_t)(bytes[7]) << 0);
}
static inline void qoa_write_u64(qoa_uint64_t v, unsigned char *bytes, unsigned int *p) {
bytes += *p;
*p += 8;
bytes[0] = (v >> 56) & 0xff;
bytes[1] = (v >> 48) & 0xff;
bytes[2] = (v >> 40) & 0xff;
bytes[3] = (v >> 32) & 0xff;
bytes[4] = (v >> 24) & 0xff;
bytes[5] = (v >> 16) & 0xff;
bytes[6] = (v >> 8) & 0xff;
bytes[7] = (v >> 0) & 0xff;
}
/* -----------------------------------------------------------------------------
Encoder */
unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes) {
unsigned int p = 0;
qoa_write_u64(((qoa_uint64_t)QOA_MAGIC << 32) | qoa->samples, bytes, &p);
return p;
}
unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes) {
unsigned int channels = qoa->channels;
unsigned int p = 0;
unsigned int slices = (frame_len + QOA_SLICE_LEN - 1) / QOA_SLICE_LEN;
unsigned int frame_size = QOA_FRAME_SIZE(channels, slices);
int prev_scalefactor[QOA_MAX_CHANNELS] = {0};
/* Write the frame header */
qoa_write_u64((
(qoa_uint64_t)qoa->channels << 56 |
(qoa_uint64_t)qoa->samplerate << 32 |
(qoa_uint64_t)frame_len << 16 |
(qoa_uint64_t)frame_size
), bytes, &p);
for (unsigned int c = 0; c < channels; c++) {
/* Write the current LMS state */
qoa_uint64_t weights = 0;
qoa_uint64_t history = 0;
for (int i = 0; i < QOA_LMS_LEN; i++) {
history = (history << 16) | (qoa->lms[c].history[i] & 0xffff);
weights = (weights << 16) | (qoa->lms[c].weights[i] & 0xffff);
}
qoa_write_u64(history, bytes, &p);
qoa_write_u64(weights, bytes, &p);
}
/* We encode all samples with the channels interleaved on a slice level.
E.g. for stereo: (ch-0, slice 0), (ch 1, slice 0), (ch 0, slice 1), ...*/
for (unsigned int sample_index = 0; sample_index < frame_len; sample_index += QOA_SLICE_LEN) {
for (unsigned int c = 0; c < channels; c++) {
int slice_len = qoa_clamp(QOA_SLICE_LEN, 0, frame_len - sample_index);
int slice_start = sample_index * channels + c;
int slice_end = (sample_index + slice_len) * channels + c;
/* Brute for search for the best scalefactor. Just go through all
16 scalefactors, encode all samples for the current slice and
meassure the total squared error. */
qoa_uint64_t best_rank = -1;
qoa_uint64_t best_slice = -1;
qoa_lms_t best_lms = {{-1, -1, -1, -1}, {-1, -1, -1, -1}};
int best_scalefactor = -1;
for (int sfi = 0; sfi < 16; sfi++) {
/* There is a strong correlation between the scalefactors of
neighboring slices. As an optimization, start testing
the best scalefactor of the previous slice first. */
int scalefactor = (sfi + prev_scalefactor[c]) % 16;
/* We have to reset the LMS state to the last known good one
before trying each scalefactor, as each pass updates the LMS
state when encoding. */
qoa_lms_t lms = qoa->lms[c];
qoa_uint64_t slice = scalefactor;
qoa_uint64_t current_rank = 0;
for (int si = slice_start; si < slice_end; si += channels) {
int sample = sample_data[si];
int predicted = qoa_lms_predict(&lms);
int residual = sample - predicted;
int scaled = qoa_div(residual, scalefactor);
int clamped = qoa_clamp(scaled, -8, 8);
int quantized = qoa_quant_tab[clamped + 8];
int dequantized = qoa_dequant_tab[scalefactor][quantized];
int reconstructed = qoa_clamp_s16(predicted + dequantized);
/* If the weights have grown too large, we introduce a penalty
here. This prevents pops/clicks in certain problem cases */
int weights_penalty = ((
lms.weights[0] * lms.weights[0] +
lms.weights[1] * lms.weights[1] +
lms.weights[2] * lms.weights[2] +
lms.weights[3] * lms.weights[3]
) >> 18) - 0x8ff;
if (weights_penalty < 0) {
weights_penalty = 0;
}
long long error = (sample - reconstructed);
qoa_uint64_t error_sq = error * error;
current_rank += error_sq + weights_penalty * weights_penalty;
if (current_rank > best_rank) {
break;
}
qoa_lms_update(&lms, reconstructed, dequantized);
slice = (slice << 3) | quantized;
}
if (current_rank < best_rank) {
best_rank = current_rank;
best_slice = slice;
best_lms = lms;
best_scalefactor = scalefactor;
}
}
prev_scalefactor[c] = best_scalefactor;
qoa->lms[c] = best_lms;
#ifdef QOA_RECORD_TOTAL_ERROR
qoa->error += best_error;
#endif
/* If this slice was shorter than QOA_SLICE_LEN, we have to left-
shift all encoded data, to ensure the rightmost bits are the empty
ones. This should only happen in the last frame of a file as all
slices are completely filled otherwise. */
best_slice <<= (QOA_SLICE_LEN - slice_len) * 3;
qoa_write_u64(best_slice, bytes, &p);
}
}
return p;
}
void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len) {
if (
qoa->samples == 0 ||
qoa->samplerate == 0 || qoa->samplerate > 0xffffff ||
qoa->channels == 0 || qoa->channels > QOA_MAX_CHANNELS
) {
return NULL;
}
/* Calculate the encoded size and allocate */
unsigned int num_frames = (qoa->samples + QOA_FRAME_LEN-1) / QOA_FRAME_LEN;
unsigned int num_slices = (qoa->samples + QOA_SLICE_LEN-1) / QOA_SLICE_LEN;
unsigned int encoded_size = 8 + /* 8 byte file header */
num_frames * 8 + /* 8 byte frame headers */
num_frames * QOA_LMS_LEN * 4 * qoa->channels + /* 4 * 4 bytes lms state per channel */
num_slices * 8 * qoa->channels; /* 8 byte slices */
unsigned char *bytes = (unsigned char *)QOA_MALLOC(encoded_size);
for (unsigned int c = 0; c < qoa->channels; c++) {
/* Set the initial LMS weights to {0, 0, -1, 2}. This helps with the
prediction of the first few ms of a file. */
qoa->lms[c].weights[0] = 0;
qoa->lms[c].weights[1] = 0;
qoa->lms[c].weights[2] = -(1<<13);
qoa->lms[c].weights[3] = (1<<14);
/* Explicitly set the history samples to 0, as we might have some
garbage in there. */
for (int i = 0; i < QOA_LMS_LEN; i++) {
qoa->lms[c].history[i] = 0;
}
}
/* Encode the header and go through all frames */
unsigned int p = qoa_encode_header(qoa, bytes);
#ifdef QOA_RECORD_TOTAL_ERROR
qoa->error = 0;
#endif
int frame_len = QOA_FRAME_LEN;
for (unsigned int sample_index = 0; sample_index < qoa->samples; sample_index += frame_len) {
frame_len = qoa_clamp(QOA_FRAME_LEN, 0, qoa->samples - sample_index);
const short *frame_samples = sample_data + sample_index * qoa->channels;
unsigned int frame_size = qoa_encode_frame(frame_samples, qoa, frame_len, bytes + p);
p += frame_size;
}
*out_len = p;
return bytes;
}
/* -----------------------------------------------------------------------------
Decoder */
unsigned int qoa_max_frame_size(qoa_desc *qoa) {
return QOA_FRAME_SIZE(qoa->channels, QOA_SLICES_PER_FRAME);
}
unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa) {
unsigned int p = 0;
if (size < QOA_MIN_FILESIZE) {
return 0;
}
/* Read the file header, verify the magic number ('qoaf') and read the
total number of samples. */
qoa_uint64_t file_header = qoa_read_u64(bytes, &p);
if ((file_header >> 32) != QOA_MAGIC) {
return 0;
}
qoa->samples = file_header & 0xffffffff;
if (!qoa->samples) {
return 0;
}
/* Peek into the first frame header to get the number of channels and
the samplerate. */
qoa_uint64_t frame_header = qoa_read_u64(bytes, &p);
qoa->channels = (frame_header >> 56) & 0x0000ff;
qoa->samplerate = (frame_header >> 32) & 0xffffff;
if (qoa->channels == 0 || qoa->samples == 0 || qoa->samplerate == 0) {
return 0;
}
return 8;
}
unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len) {
unsigned int p = 0;
*frame_len = 0;
if (size < 8 + QOA_LMS_LEN * 4 * qoa->channels) {
return 0;
}
/* Read and verify the frame header */
qoa_uint64_t frame_header = qoa_read_u64(bytes, &p);
unsigned int channels = (frame_header >> 56) & 0x0000ff;
unsigned int samplerate = (frame_header >> 32) & 0xffffff;
unsigned int samples = (frame_header >> 16) & 0x00ffff;
unsigned int frame_size = (frame_header ) & 0x00ffff;
int data_size = frame_size - 8 - QOA_LMS_LEN * 4 * channels;
int num_slices = data_size / 8;
unsigned int max_total_samples = num_slices * QOA_SLICE_LEN;
if (
channels != qoa->channels ||
samplerate != qoa->samplerate ||
frame_size > size ||
samples * channels > max_total_samples
) {
return 0;
}
/* Read the LMS state: 4 x 2 bytes history, 4 x 2 bytes weights per channel */
for (unsigned int c = 0; c < channels; c++) {
qoa_uint64_t history = qoa_read_u64(bytes, &p);
qoa_uint64_t weights = qoa_read_u64(bytes, &p);
for (int i = 0; i < QOA_LMS_LEN; i++) {
qoa->lms[c].history[i] = ((signed short)(history >> 48));
history <<= 16;
qoa->lms[c].weights[i] = ((signed short)(weights >> 48));
weights <<= 16;
}
}
/* Decode all slices for all channels in this frame */
for (unsigned int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) {
for (unsigned int c = 0; c < channels; c++) {
qoa_uint64_t slice = qoa_read_u64(bytes, &p);
int scalefactor = (slice >> 60) & 0xf;
int slice_start = sample_index * channels + c;
int slice_end = qoa_clamp(sample_index + QOA_SLICE_LEN, 0, samples) * channels + c;
for (int si = slice_start; si < slice_end; si += channels) {
int predicted = qoa_lms_predict(&qoa->lms[c]);
int quantized = (slice >> 57) & 0x7;
int dequantized = qoa_dequant_tab[scalefactor][quantized];
int reconstructed = qoa_clamp_s16(predicted + dequantized);
sample_data[si] = reconstructed;
slice <<= 3;
qoa_lms_update(&qoa->lms[c], reconstructed, dequantized);
}
}
}
*frame_len = samples;
return p;
}
short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *qoa) {
unsigned int p = qoa_decode_header(bytes, size, qoa);
if (!p) {
return NULL;
}
/* Calculate the required size of the sample buffer and allocate */
int total_samples = qoa->samples * qoa->channels;
short *sample_data = (short *)QOA_MALLOC(total_samples * sizeof(short));
unsigned int sample_index = 0;
unsigned int frame_len;
unsigned int frame_size;
/* Decode all frames */
do {
short *sample_ptr = sample_data + sample_index * qoa->channels;
frame_size = qoa_decode_frame(bytes + p, size - p, qoa, sample_ptr, &frame_len);
p += frame_size;
sample_index += frame_len;
} while (frame_size && sample_index < qoa->samples);
qoa->samples = sample_index;
return sample_data;
}
/* -----------------------------------------------------------------------------
File read/write convenience functions */
#ifndef QOA_NO_STDIO
#include <stdio.h>
int qoa_write(const char *filename, const short *sample_data, qoa_desc *qoa) {
FILE *f = fopen(filename, "wb");
unsigned int size;
void *encoded;
if (!f) {
return 0;
}
encoded = qoa_encode(sample_data, qoa, &size);
if (!encoded) {
fclose(f);
return 0;
}
fwrite(encoded, 1, size, f);
fclose(f);
QOA_FREE(encoded);
return size;
}
void *qoa_read(const char *filename, qoa_desc *qoa) {
FILE *f = fopen(filename, "rb");
int size, bytes_read;
void *data;
short *sample_data;
if (!f) {
return NULL;
}
fseek(f, 0, SEEK_END);
size = ftell(f);
if (size <= 0) {
fclose(f);
return NULL;
}
fseek(f, 0, SEEK_SET);
data = QOA_MALLOC(size);
if (!data) {
fclose(f);
return NULL;
}
bytes_read = fread(data, 1, size, f);
fclose(f);
sample_data = qoa_decode(data, bytes_read, qoa);
QOA_FREE(data);
return sample_data;
}
#endif /* QOA_NO_STDIO */
#endif /* QOA_IMPLEMENTATION */