Extend WASAPI interface to support mix formats other than 16 bit integer. (#919)

* Extend WASAPI interface to support mix formats other than 16 bit integer. * Have WASAPI do sample rate conversion. Shouldn't be necessary but eh. * Add PR #919 to changelog.
2025-06-08 10:43:00 -07:00 · 2025-06-08 10:43:00 -07:00 · 337094f635
parent 44a854cea3
commit 337094f635
4 changed files with 295 additions and 18 deletions
--- a/USER_MANUAL.md
+++ b/USER_MANUAL.md
@ -799,6 +799,7 @@ LDPC | Low Density Parity Check Codes - a family of powerful FEC codes
    * Fix compiler errors when using wxWidgets 3.0. (PR #914)
    * Unit tests: Increase sleep time before killing recording to resolve macOS test failures. (PR #917)
    * Fix typo causing RX radio device to remain open. (PR #918)
+    * Fix WASAPI errors on some machines by supporting audio mix formats other than 16-bit integer. (PR #919)

 ## V2.0.0 June 2025

--- a/src/audio/CMakeLists.txt
+++ b/src/audio/CMakeLists.txt
@ -41,7 +41,7 @@ if(APPLE AND NATIVE_AUDIO_AVAILABLE)
        "-framework AVFoundation"
        "-framework CoreAudio")
 elseif(WIN32 AND NATIVE_AUDIO_AVAILABLE)
-    target_link_libraries(fdv_audio PRIVATE uuid avrt)
+    target_link_libraries(fdv_audio PRIVATE uuid avrt winmm ksuser)
 elseif(LINUX AND NATIVE_AUDIO_AVAILABLE AND DBUS_FOUND)
    target_include_directories(fdv_audio PRIVATE ${DBUS_INCLUDE_DIRS})
    target_compile_definitions(fdv_audio PRIVATE -DUSE_RTKIT)
--- a/src/audio/WASAPIAudioDevice.cpp
+++ b/src/audio/WASAPIAudioDevice.cpp
@ -49,6 +49,7 @@ WASAPIAudioDevice::WASAPIAudioDevice(IAudioClient* client, IAudioEngine::AudioDi
    , renderCaptureEvent_(nullptr)
    , isRenderCaptureRunning_(false)
    , semaphore_(nullptr)
+    , tmpBuf_(nullptr)
 {
    client_->AddRef();
 }
@ -85,30 +86,116 @@ void WASAPIAudioDevice::start()
    auto prom = std::make_shared<std::promise<void> >(); 
    auto fut = prom->get_future();
    enqueue_([&]() {
+        WAVEFORMATEX* streamFormatPtr = nullptr;
        WAVEFORMATEX streamFormat;
+        bool freeStreamFormat = false;

        // Populate stream format based on requested sample
        // rate/number of channels.
        // NOTE: this should already have been determined valid
        // by the audio engine!
-        streamFormat.wFormatTag = WAVE_FORMAT_PCM;
-        streamFormat.nChannels = numChannels_;
-        streamFormat.nSamplesPerSec = sampleRate_;
-        streamFormat.wBitsPerSample = 16;
-        streamFormat.nBlockAlign = (numChannels_ * streamFormat.wBitsPerSample) / 8;
-        streamFormat.nAvgBytesPerSec = streamFormat.nSamplesPerSec * streamFormat.nBlockAlign;
-        streamFormat.cbSize = 0;
+        HRESULT hr = client_->GetMixFormat(&streamFormatPtr);
+        if (SUCCEEDED(hr))
+        {
+            freeStreamFormat = true;
+            streamFormatPtr->nChannels = numChannels_;
+            streamFormatPtr->nSamplesPerSec = sampleRate_;
+            streamFormatPtr->nBlockAlign = (numChannels_ * streamFormatPtr->wBitsPerSample) / 8;
+            streamFormatPtr->nAvgBytesPerSec = streamFormatPtr->nSamplesPerSec * streamFormatPtr->nBlockAlign;
+        }
+        else
+        {
+            streamFormatPtr = &streamFormat;
+            streamFormat.wFormatTag = WAVE_FORMAT_PCM;
+            streamFormat.wBitsPerSample = 16;
+            streamFormat.nChannels = numChannels_;
+            streamFormat.nSamplesPerSec = sampleRate_;
+            streamFormat.nBlockAlign = (numChannels_ * streamFormat.wBitsPerSample) / 8;
+            streamFormat.nAvgBytesPerSec = streamFormat.nSamplesPerSec * streamFormat.nBlockAlign;
+            streamFormat.cbSize = 0;
+        }
+
+        // Set up for conversion to mix format
+        if (streamFormatPtr->wFormatTag == WAVE_FORMAT_PCM)
+        {
+            containerBits_ = streamFormatPtr->wBitsPerSample;
+            validBits_ = streamFormatPtr->wBitsPerSample;
+            isFloatingPoint_ = false;
+
+            log_info("Mix format is integer (container bits: %d, valid bits: %d)", containerBits_, validBits_);
+        }
+        else if (streamFormatPtr->wFormatTag == WAVE_FORMAT_IEEE_FLOAT)
+        {
+            containerBits_ = streamFormatPtr->wBitsPerSample;
+            validBits_ = streamFormatPtr->wBitsPerSample;
+            isFloatingPoint_ = true;
+            log_info("Mix format is floating point (container bits: %d, valid bits: %d)", containerBits_, validBits_);
+        }
+        else if (streamFormatPtr->wFormatTag == WAVE_FORMAT_EXTENSIBLE)
+        {
+            WAVEFORMATEXTENSIBLE* extFormat = (WAVEFORMATEXTENSIBLE*)streamFormatPtr;
+            containerBits_ = streamFormatPtr->wBitsPerSample;
+            validBits_ = extFormat->Samples.wValidBitsPerSample;
+            if (extFormat->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
+            {
+                isFloatingPoint_ = false;
+                log_info("Mix format is integer (container bits: %d, valid bits: %d)", containerBits_, validBits_);
+            }
+            else if (extFormat->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
+            {
+                isFloatingPoint_ = true;
+                log_info("Mix format is floating point (container bits: %d, valid bits: %d)", containerBits_, validBits_);
+            }
+            else
+            {
+                std::stringstream ss;
+                ss << "Unknown mix format found: " << GuidToString_(&extFormat->SubFormat);
+                log_error(ss.str().c_str());
+                if (onAudioErrorFunction)
+                {
+                    onAudioErrorFunction(*this, ss.str(), onAudioErrorState);
+                }
+                if (freeStreamFormat)
+                {
+                    CoTaskMemFree(streamFormatPtr);
+                }
+                prom->set_value();
+                return;
+            }
+        }
+        else
+        {
+            std::stringstream ss;
+            ss << "Unknown mix format found: " << streamFormatPtr->wFormatTag;
+            log_error(ss.str().c_str());
+            if (onAudioErrorFunction)
+            {
+                onAudioErrorFunction(*this, ss.str(), onAudioErrorState);
+            }
+            if (freeStreamFormat)
+            {
+                CoTaskMemFree(streamFormatPtr);
+            }
+            prom->set_value();
+            return;
+        }

        if (!initialized_)
        {
            // Initialize the audio client with the above format
-            HRESULT hr = client_->Initialize(
+            hr = client_->Initialize(
                AUDCLNT_SHAREMODE_SHARED,
-                AUDCLNT_STREAMFLAGS_EVENTCALLBACK,
+                AUDCLNT_STREAMFLAGS_EVENTCALLBACK | 
+                    AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM |
+                    AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY,
                BLOCK_TIME_NS / NS_PER_REFTIME, // REFERENCE_TIME is in 100ns units
                0,
-                &streamFormat,
+                streamFormatPtr,
                nullptr);
+            if (freeStreamFormat)
+            {
+                CoTaskMemFree(streamFormatPtr);
+            }
            if (FAILED(hr))
            {
                std::stringstream ss;
@ -123,6 +210,10 @@ void WASAPIAudioDevice::start()
            }
            initialized_ = true;
        }
+        else if (freeStreamFormat)
+        {
+            CoTaskMemFree(streamFormatPtr);
+        }

        // Create render/capture event
        renderCaptureEvent_ = CreateEvent(nullptr, false, false, nullptr);
@ -140,7 +231,7 @@ void WASAPIAudioDevice::start()
        }

        // Assign render/capture event
-        HRESULT hr = client_->SetEventHandle(renderCaptureEvent_);
+        hr = client_->SetEventHandle(renderCaptureEvent_);
        if (FAILED(hr))
        {
            std::stringstream ss;
@ -213,6 +304,10 @@ void WASAPIAudioDevice::start()
            return;
        }

+        // Allocate temporary buffer
+        tmpBuf_ = new short[sampleRate_];
+        assert(tmpBuf_ != nullptr);
+
        if (direction_ == IAudioEngine::AUDIO_ENGINE_OUT)
        {
            // Perform initial population of audio buffer
@ -229,16 +324,22 @@ void WASAPIAudioDevice::start()
                }
                renderClient_->Release();
                renderClient_ = nullptr;
+
+                delete[] tmpBuf_;
+                tmpBuf_ = nullptr;
+
                prom->set_value();
                return;
            }

-            memset(data, 0, bufferFrameCount_ * numChannels_ * sizeof(short));
+            memset(tmpBuf_, 0, bufferFrameCount_ * numChannels_ * sizeof(short));
            if (onAudioDataFunction)
            {
-                onAudioDataFunction(*this, (short*)data, bufferFrameCount_, onAudioDataState);
+                onAudioDataFunction(*this, tmpBuf_, bufferFrameCount_, onAudioDataState);
            }

+            copyToWindowsBuffer_(data, bufferFrameCount_);
+
            hr = renderClient_->ReleaseBuffer(bufferFrameCount_, 0);
            if (FAILED(hr))
            {
@ -251,6 +352,10 @@ void WASAPIAudioDevice::start()
                }
                renderClient_->Release();
                renderClient_ = nullptr;
+
+                delete[] tmpBuf_;
+                tmpBuf_ = nullptr;
+
                prom->set_value();
                return;
            }
@ -286,6 +391,12 @@ void WASAPIAudioDevice::start()
                captureClient_->Release();
                captureClient_ = nullptr;
            }
+
+            delete[] tmpBuf_;
+            tmpBuf_ = nullptr;
+
+            prom->set_value();
+            return;
        }

        // Start render/capture thread.
@ -411,6 +522,12 @@ void WASAPIAudioDevice::stop()
            CloseHandle(tmpSem);
        }

+        if (tmpBuf_ != nullptr)
+        {
+            delete[] tmpBuf_;
+            tmpBuf_ = nullptr;
+        }
+
        prom->set_value();
    });
    fut.wait();
@ -509,11 +626,12 @@ void WASAPIAudioDevice::renderAudio_()
    // Grab audio data from higher level code
    if (framesAvailable > 0 && data != nullptr)
    {
-        memset(data, 0, framesAvailable * numChannels_ * sizeof(short));
+        memset(tmpBuf_, 0, framesAvailable * numChannels_ * sizeof(short));
        if (onAudioDataFunction)
        {
-            onAudioDataFunction(*this, (short*)data, framesAvailable, onAudioDataState);
+            onAudioDataFunction(*this, tmpBuf_, framesAvailable, onAudioDataState);
        }
+        copyToWindowsBuffer_(data, framesAvailable);
    }

    // Release render buffer
@ -577,13 +695,17 @@ void WASAPIAudioDevice::captureAudio_()
            // Fill buffer with silence if told to do so.
            if (flags & AUDCLNT_BUFFERFLAGS_SILENT)
            {
-                memset(data, 0, numFramesAvailable * numChannels_ * sizeof(short));
+                memset(tmpBuf_, 0, numFramesAvailable * numChannels_ * sizeof(short));
+            }
+            else
+            {
+                copyFromWindowsBuffer_(data, numFramesAvailable);
            }

            // Pass data to higher level code
            if (onAudioDataFunction)
            {
-                onAudioDataFunction(*this, (short*)data, numFramesAvailable, onAudioDataState);
+                onAudioDataFunction(*this, tmpBuf_, numFramesAvailable, onAudioDataState);
            }
        }

@ -617,3 +739,86 @@ void WASAPIAudioDevice::captureAudio_()
        ReleaseSemaphore(semaphore_, 1, nullptr);
    }
 }
+
+void WASAPIAudioDevice::copyFromWindowsBuffer_(void* buf, int numFrames)
+{
+    if (isFloatingPoint_)
+    {
+        if (validBits_ == sizeof(float) * 8)
+        {
+            copyFloatToShort_<float>((float*)buf, numFrames);
+        }
+        else if (validBits_ == sizeof(double) * 8)
+        {
+            copyFloatToShort_<double>((double*)buf, numFrames);
+        }
+    }
+    else
+    {
+        if (containerBits_ == 8)
+        {
+            copyIntToShort_<char>((char*)buf, numFrames);
+        }
+        else if (containerBits_ == 16 && validBits_ == 16)
+        {
+            // Shortcut -- can just memcpy into tmpBuf_.
+            memcpy(tmpBuf_, buf, numFrames * numChannels_ * sizeof(short));
+        }
+        else if (containerBits_ == 16)
+        {
+            copyIntToShort_<short>((short*)buf, numFrames);
+        }
+        else if (containerBits_ == 32)
+        {
+            copyIntToShort_<int32_t>((int32_t*)buf, numFrames);
+        }
+    }
+}
+
+void WASAPIAudioDevice::copyToWindowsBuffer_(void* buf, int numFrames)
+{
+    if (isFloatingPoint_)
+    {
+        if (validBits_ == sizeof(float) * 8)
+        {
+            copyShortToFloat_<float>((float*)buf, numFrames);
+        }
+        else if (validBits_ == sizeof(double) * 8)
+        {
+            copyShortToFloat_<double>((double*)buf, numFrames);
+        }
+    }
+    else
+    {
+        if (containerBits_ == 8)
+        {
+            copyShortToInt_<char>((char*)buf, numFrames);
+        }
+        else if (containerBits_ == 16 && validBits_ == 16)
+        {
+            // Shortcut -- can just memcpy from tmpBuf_.
+            memcpy(buf, tmpBuf_, numFrames * numChannels_ * sizeof(short));
+        }
+        else if (containerBits_ == 16)
+        {
+            copyShortToInt_<short>((short*)buf, numFrames);
+        }
+        else if (containerBits_ == 32)
+        {
+            copyShortToInt_<int32_t>((int32_t*)buf, numFrames);
+        }
+    }
+}
+
+std::string WASAPIAudioDevice::GuidToString_(GUID *guid)
+{
+    char guid_string[37]; // 32 hex chars + 4 hyphens + null terminator
+    snprintf(
+          guid_string, sizeof(guid_string),
+          "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+          (unsigned int)guid->Data1, (unsigned int)guid->Data2, (unsigned int)guid->Data3,
+          guid->Data4[0], guid->Data4[1], guid->Data4[2],
+          guid->Data4[3], guid->Data4[4], guid->Data4[5],
+          guid->Data4[6], guid->Data4[7]);
+    return guid_string;
+}
--- a/src/audio/WASAPIAudioDevice.h
+++ b/src/audio/WASAPIAudioDevice.h
@ -85,10 +85,81 @@ private:
    bool isRenderCaptureRunning_;
    HANDLE semaphore_;

+    // For mix<->int16 conversions
+    int containerBits_;
+    int validBits_;
+    bool isFloatingPoint_;
+    short* tmpBuf_;
+
    void renderAudio_();
    void captureAudio_();
+    void copyFromWindowsBuffer_(void* buf, int numFrames);
+    void copyToWindowsBuffer_(void* buf, int numFrames);
+
+    template<typename T>
+    void copyFloatToShort_(T* source, int numFrames);
    
+    template<typename T>
+    void copyShortToFloat_(T* dest, int numFrames);
+    
+    template<typename T>
+    void copyIntToShort_(T* source, int numFrames);
+
+    template<typename T>
+    void copyShortToInt_(T* dest, int numFrames);
+
+    static std::string GuidToString_(GUID* guid);
    static thread_local HANDLE HelperTask_;
 };

+template<typename T>
+void WASAPIAudioDevice::copyFloatToShort_(T* source, int numFrames)
+{
+    for (int index = 0; index < numFrames; index++)
+    {
+        for (int innerIndex = 0; innerIndex < numChannels_; innerIndex++)
+        {
+            tmpBuf_[index * numChannels_ + innerIndex] = source[index * numChannels_ + innerIndex] * std::numeric_limits<short>::max();
+        }
+    }
+}
+
+template<typename T>
+void WASAPIAudioDevice::copyShortToFloat_(T* dest, int numFrames)
+{
+    for (int index = 0; index < numFrames; index++)
+    {
+        for (int innerIndex = 0; innerIndex < numChannels_; innerIndex++)
+        {
+            dest[index * numChannels_ + innerIndex] = (T)tmpBuf_[index * numChannels_ + innerIndex] / std::numeric_limits<short>::max();
+        }
+    }
+}
+
+template<typename T>
+void WASAPIAudioDevice::copyIntToShort_(T* source, int numFrames)
+{
+    for (int index = 0; index < numFrames; index++)
+    {
+        for (int innerIndex = 0; innerIndex < numChannels_; innerIndex++)
+        {
+            T temp = source[index * numChannels_ + innerIndex] >> (containerBits_ - validBits_);
+            tmpBuf_[index * numChannels_ + innerIndex] = temp >> (validBits_ >= 16 ? validBits_ - 16 : 0);
+        }
+    }
+}
+
+template<typename T>
+void WASAPIAudioDevice::copyShortToInt_(T* dest, int numFrames)
+{
+    for (int index = 0; index < numFrames; index++)
+    {
+        for (int innerIndex = 0; innerIndex < numChannels_; innerIndex++)
+        {
+            T temp = tmpBuf_[index * numChannels_ + innerIndex] << (validBits_ >= 16 ? validBits_ - 16 : 0);
+            dest[index * numChannels_ + innerIndex] = temp << (containerBits_ - validBits_);
+        }
+    }
+}
+
 #endif // WASAPI_AUDIO_DEVICE_H