Merge pull request #286 from Detanup01/voicechat

voicechat implementation
2025-08-08 08:35:40 +02:00 · 2025-07-19 12:56:09 +02:00 · 2025-07-19 12:56:09 +02:00 · 5ff7110ead
commit 5ff7110ead
parent 09b9516079 a83335bd3e
4 changed files with 347 additions and 41 deletions
--- a/dll/dll/steam_user.h
+++ b/dll/dll/steam_user.h
@ -20,6 +20,7 @@
 #include "base.h"
 #include "auth.h"
 #include "voicechat.h"
 class Steam_User :
 public ISteamUser004,
@ -49,10 +50,9 @@ public ISteamUser
 	class SteamCallResults *callback_results{};
    Local_Storage *local_storage{};
 	bool recording = false;
 	std::chrono::high_resolution_clock::time_point last_get_voice{};
 	std::string encrypted_app_ticket{};
 	Auth_Manager *auth_manager{};
    VoiceChat* voicechat{};
    std::map<std::string, std::string> registry{};
    std::string registry_nullptr{};
--- a/dll/dll/voicechat.h
+++ b/dll/dll/voicechat.h
@ -0,0 +1,85 @@
 /* Copyright (C) 2019 Mr Goldberg
   This file is part of the Goldberg Emulator
   The Goldberg Emulator is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 3 of the License, or (at your option) any later version.
   The Goldberg Emulator is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.
   You should have received a copy of the GNU Lesser General Public
   License along with the Goldberg Emulator; if not, see
   <http://www.gnu.org/licenses/>.  */
 #ifndef VOICECHAT_INCLUDE_H
 #define VOICECHAT_INCLUDE_H
 #include "base.h"
 #include <opus/opus.h>
 #include <portaudio.h>
 #define SAMPLE_RATE 48000
 #define CHANNELS 1
 #define FRAME_SIZE 960 // 20ms @ 48kHz
 #define MAX_ENCODED_SIZE 4000
 #define MAX_DECODED_SIZE (FRAME_SIZE * 2 * sizeof(int16_t)) // for stereo
 #define DEFAULT_BITRATE 32000
 struct VoicePacket {
    uint64_t userId;
    std::vector<uint8_t> encoded;
 };
 class VoiceChat
 {
    std::atomic<bool> isRecording{ false };
    std::atomic<bool> isPlaying{ false };
    std::mutex inputMutex;
    std::condition_variable inputCond;
    std::queue<std::vector<uint8_t>> encodedQueue;
    std::mutex playbackQueueMutex;
    std::queue<VoicePacket> playbackQueue;
    std::mutex decoderMapMutex;
    std::unordered_map<uint64_t, OpusDecoder*> decoderMap;
    OpusEncoder* encoder = nullptr;
    PaStream* inputStream = nullptr;
    PaStream* outputStream = nullptr;
    static int inputCallback(const void* input, void*, unsigned long frameCount,
        const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);
    static int outputCallback(const void*, void* output, unsigned long frameCount,
        const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);
 public:
    bool InitVoiceSystem();
    void ShutdownVoiceSystem();
    bool StartVoiceRecording();
    void StopVoiceRecording();
    bool StartVoicePlayback();
    void StopVoicePlayback();
    EVoiceResult GetAvailableVoice(uint32_t* pcbCompressed);
    EVoiceResult GetVoice(bool bWantCompressed, void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten);
    EVoiceResult DecompressVoice(const void* pCompressed, uint32_t cbCompressed,
        void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
        uint32_t nDesiredSampleRate);
    void QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len);
 };
 #endif // VOICECHAT_INCLUDE_H
--- a/dll/steam_user.cpp
+++ b/dll/steam_user.cpp
@ -28,8 +28,8 @@ Steam_User::Steam_User(Settings *settings, Local_Storage *local_storage, class N
    this->callbacks = callbacks;
    this->callback_results = callback_results;
    recording = false;
    auth_manager = new Auth_Manager(settings, network, callbacks);
    voicechat = new VoiceChat();
 }
 Steam_User::~Steam_User()
@ -480,10 +480,7 @@ bool Steam_User::GetUserDataFolder( char *pchBuffer, int cubBuffer )
 void Steam_User::StartVoiceRecording( )
 {
    PRINT_DEBUG_ENTRY();
-    last_get_voice = std::chrono::high_resolution_clock::now();
+    voicechat->StartVoiceRecording();
    recording = true;
    //TODO:fix
    recording = false;
 }
 // Stops voice recording. Because people often release push-to-talk keys early, the system will keep recording for
@ -492,7 +489,7 @@ void Steam_User::StartVoiceRecording( )
 void Steam_User::StopVoiceRecording( )
 {
    PRINT_DEBUG_ENTRY();
-    recording = false;
+    voicechat->StopVoiceRecording();
 }
 // Determine the size of captured audio data that is available from GetVoice.
@ -502,14 +499,7 @@ void Steam_User::StopVoiceRecording( )
 EVoiceResult Steam_User::GetAvailableVoice( uint32 *pcbCompressed, uint32 *pcbUncompressed_Deprecated, uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
    PRINT_DEBUG_ENTRY();
-    if (pcbCompressed) *pcbCompressed = 0;
+    return voicechat->GetAvailableVoice(pcbCompressed);
    if (pcbUncompressed_Deprecated) *pcbUncompressed_Deprecated = 0;
    if (!recording) return k_EVoiceResultNotRecording;
    double seconds = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - last_get_voice).count();
    if (pcbCompressed) *pcbCompressed = static_cast<uint32>(seconds * 1024.0 * 64.0 / 8.0);
    if (pcbUncompressed_Deprecated) *pcbUncompressed_Deprecated = static_cast<uint32>(seconds * (double)nUncompressedVoiceDesiredSampleRate_Deprecated * 2.0);
    return k_EVoiceResultOK;
 }
 EVoiceResult Steam_User::GetAvailableVoice(uint32 *pcbCompressed, uint32 *pcbUncompressed)
@ -542,22 +532,7 @@ EVoiceResult Steam_User::GetAvailableVoice(uint32 *pcbCompressed, uint32 *pcbUnc
 EVoiceResult Steam_User::GetVoice( bool bWantCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, bool bWantUncompressed_Deprecated, void *pUncompressedDestBuffer_Deprecated , uint32 cbUncompressedDestBufferSize_Deprecated , uint32 *nUncompressBytesWritten_Deprecated , uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
    PRINT_DEBUG_ENTRY();
-    if (!recording) return k_EVoiceResultNotRecording;
+    return voicechat->GetVoice(bWantCompressed, pDestBuffer, cbDestBufferSize, nBytesWritten);
    double seconds = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - last_get_voice).count();
    if (bWantCompressed) {
        uint32 towrite = static_cast<uint32>(seconds * 1024.0 * 64.0 / 8.0);
        if (cbDestBufferSize < towrite) towrite = cbDestBufferSize;
        if (pDestBuffer) memset(pDestBuffer, 0, towrite);
        if (nBytesWritten) *nBytesWritten = towrite;
    }
    if (bWantUncompressed_Deprecated) {
        PRINT_DEBUG("Wanted Uncompressed");
    }
    last_get_voice = std::chrono::high_resolution_clock::now();
    return k_EVoiceResultOK;
 }
 EVoiceResult Steam_User::GetVoice( bool bWantCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, bool bWantUncompressed, void *pUncompressedDestBuffer, uint32 cbUncompressedDestBufferSize, uint32 *nUncompressBytesWritten )
@ -581,14 +556,7 @@ EVoiceResult Steam_User::GetCompressedVoice( void *pDestBuffer, uint32 cbDestBuf
 EVoiceResult Steam_User::DecompressVoice( const void *pCompressed, uint32 cbCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, uint32 nDesiredSampleRate )
 {
    PRINT_DEBUG_ENTRY();
-    if (!recording) return k_EVoiceResultNotRecording;
+    return voicechat->DecompressVoice(pCompressed, cbCompressed, pDestBuffer, cbDestBufferSize, nBytesWritten, nDesiredSampleRate);
    uint32 uncompressed = static_cast<uint32>((double)cbCompressed * ((double)nDesiredSampleRate / 8192.0));
    if(nBytesWritten) *nBytesWritten = uncompressed;
    if (uncompressed > cbDestBufferSize) uncompressed = cbDestBufferSize;
    if (pDestBuffer) memset(pDestBuffer, 0, uncompressed);
    return k_EVoiceResultOK;
 }
 EVoiceResult Steam_User::DecompressVoice( const void *pCompressed, uint32 cbCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten )
@ -894,4 +862,4 @@ bool Steam_User::BSetDurationControlOnlineState( EDurationControlOnlineState eNe
 {
    PRINT_DEBUG_ENTRY();
    return false;
-}
+}
--- a/dll/voicechat.cpp
+++ b/dll/voicechat.cpp
@ -0,0 +1,253 @@
 #include "dll/voicechat.h"
 bool VoiceChat::InitVoiceSystem() {
    static std::atomic<int> initCount{ 0 };
    if (initCount++ == 0) {
        if (Pa_Initialize() != paNoError) {
            PRINT_DEBUG("PortAudio initialization failed");
            return false;
        }
    }
    isRecording = false;
    isPlaying = false;
    encoder = nullptr;
    inputStream = nullptr;
    outputStream = nullptr;
    return true;
 }
 void VoiceChat::ShutdownVoiceSystem() {
    static std::atomic<int> initCount{ 1 };
    if (--initCount == 0) {
        Pa_Terminate();
    }
 }
 int VoiceChat::inputCallback(const void* input, void*, unsigned long frameCount,
    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
    VoiceChat* chat = static_cast<VoiceChat*>(data);
    if (!input || frameCount != FRAME_SIZE || !chat->isRecording.load()) return paContinue;
    std::vector<uint8_t> encoded(MAX_ENCODED_SIZE);
    int len = opus_encode(chat->encoder, static_cast<const int16_t*>(input), frameCount,
        encoded.data(), MAX_ENCODED_SIZE);
    if (len > 0) {
        encoded.resize(len);
        {
            std::lock_guard<std::mutex> lock(chat->inputMutex);
            chat->encodedQueue.push(std::move(encoded));
        }
        chat->inputCond.notify_one();
    }
    else {
        PRINT_DEBUG("Opus encoding failed: %d", len);
    }
    return paContinue;
 }
 int VoiceChat::outputCallback(const void*, void* output, unsigned long frameCount,
    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
    VoiceChat* chat = static_cast<VoiceChat*>(data);
    int16_t* out = static_cast<int16_t*>(output);
    memset(out, 0, frameCount * sizeof(int16_t) * 2); // support stereo output
    std::lock_guard<std::mutex> lock(chat->playbackQueueMutex);
    size_t mixCount = 0;
    while (!chat->playbackQueue.empty()) {
        VoicePacket pkt = chat->playbackQueue.front();
        chat->playbackQueue.pop();
        OpusDecoder* decoder = nullptr;
        {
            std::lock_guard<std::mutex> dlock(chat->decoderMapMutex);
            decoder = chat->decoderMap[pkt.userId];
            if (!decoder) {
                int err = 0;
                decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
                if (err != OPUS_OK || !decoder) continue;
                chat->decoderMap[pkt.userId] = decoder;
            }
        }
        int16_t tempBuffer[FRAME_SIZE] = { 0 };
        int decoded = opus_decode(decoder, pkt.encoded.data(), pkt.encoded.size(), tempBuffer, frameCount, 0);
        if (decoded > 0) {
            for (int i = 0; i < decoded; ++i) {
                out[2 * i] += tempBuffer[i] / 2;     // left
                out[2 * i + 1] += tempBuffer[i] / 2; // right
            }
            ++mixCount;
        }
    }
    return paContinue;
 }
 bool VoiceChat::StartVoiceRecording() {
    if (isRecording.load()) return true;
    if (!InitVoiceSystem()) return false;
    int err = 0;
    encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, OPUS_APPLICATION_VOIP, &err);
    if (!encoder || err != OPUS_OK) {
        PRINT_DEBUG("Opus encoder create failed: %d", err);
        return false;
    }
    opus_encoder_ctl(encoder, OPUS_SET_BITRATE(DEFAULT_BITRATE));
    PaStreamParameters params{};
    params.device = Pa_GetDefaultInputDevice();
    if (params.device == paNoDevice) return false;
    params.channelCount = CHANNELS;
    params.sampleFormat = paInt16;
    params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowInputLatency;
    params.hostApiSpecificStreamInfo = nullptr;
    PaError paErr = Pa_OpenStream(&inputStream, &params, nullptr, SAMPLE_RATE, FRAME_SIZE,
        paClipOff, inputCallback, this);
    if (paErr != paNoError) {
        PRINT_DEBUG("Failed to open input stream: %s", Pa_GetErrorText(paErr));
        return false;
    }
    isRecording.store(true);
    Pa_StartStream(inputStream);
    return true;
 }
 void VoiceChat::StopVoiceRecording() {
    if (!isRecording.exchange(false)) return;
    if (inputStream) {
        Pa_StopStream(inputStream);
        Pa_CloseStream(inputStream);
        inputStream = nullptr;
    }
    if (encoder) {
        opus_encoder_destroy(encoder);
        encoder = nullptr;
    }
    ShutdownVoiceSystem();
 }
 bool VoiceChat::StartVoicePlayback() {
    if (isPlaying.load()) return true;
    if (!InitVoiceSystem()) return false;
    PaStreamParameters params{};
    params.device = Pa_GetDefaultOutputDevice();
    if (params.device == paNoDevice) return false;
    params.channelCount = 2; // stereo output
    params.sampleFormat = paInt16;
    params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowOutputLatency;
    params.hostApiSpecificStreamInfo = nullptr;
    PaError paErr = Pa_OpenStream(&outputStream, nullptr, &params, SAMPLE_RATE, FRAME_SIZE,
        paClipOff, outputCallback, nullptr);
    if (paErr != paNoError) {
        PRINT_DEBUG("Failed to open output stream: %s", Pa_GetErrorText(paErr));
        return false;
    }
    isPlaying.store(true);
    Pa_StartStream(outputStream);
    return true;
 }
 void VoiceChat::StopVoicePlayback() {
    if (!isPlaying.exchange(false)) return;
    if (outputStream) {
        Pa_StopStream(outputStream);
        Pa_CloseStream(outputStream);
        outputStream = nullptr;
    }
    std::lock_guard<std::mutex> lock(decoderMapMutex);
    for (auto& [id, decoder] : decoderMap) {
        opus_decoder_destroy(decoder);
    }
    decoderMap.clear();
    ShutdownVoiceSystem();
 }
 EVoiceResult VoiceChat::GetAvailableVoice(uint32_t* pcbCompressed) {
    if (!pcbCompressed) return k_EVoiceResultNotInitialized;
    std::lock_guard<std::mutex> lock(inputMutex);
    if (!isRecording.load()) return k_EVoiceResultNotRecording;
    if (encodedQueue.empty()) return k_EVoiceResultNoData;
    *pcbCompressed = static_cast<uint32_t>(encodedQueue.front().size());
    return k_EVoiceResultOK;
 }
 EVoiceResult VoiceChat::GetVoice(bool bWantCompressed, void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten) {
    if (!pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
    std::unique_lock<std::mutex> lock(inputMutex);
    inputCond.wait_for(lock, std::chrono::milliseconds(20), [this] {
        return !this->encodedQueue.empty();
        });
    if (encodedQueue.empty()) return k_EVoiceResultNoData;
    auto buf = std::move(encodedQueue.front());
    encodedQueue.pop();
    lock.unlock();
    if (bWantCompressed) {
        if (cbDestBufferSize < buf.size()) return k_EVoiceResultBufferTooSmall;
        memcpy(pDestBuffer, buf.data(), buf.size());
        *nBytesWritten = static_cast<uint32_t>(buf.size());
        return k_EVoiceResultOK;
    }
    else {
        int err;
        OpusDecoder* tempDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
        if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
        int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
        int samples = opus_decode(tempDecoder, buf.data(), static_cast<opus_int32>(buf.size()), pcm, FRAME_SIZE, 0);
        opus_decoder_destroy(tempDecoder);
        if (samples < 0) return k_EVoiceResultNotInitialized;
        uint32_t requiredSize = samples * CHANNELS * sizeof(int16_t);
        if (cbDestBufferSize < requiredSize) return k_EVoiceResultBufferTooSmall;
        *nBytesWritten = requiredSize;
        return k_EVoiceResultOK;
    }
 }
 EVoiceResult VoiceChat::DecompressVoice(const void* pCompressed, uint32_t cbCompressed,
    void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
    uint32_t nDesiredSampleRate) {
    if (!pCompressed || !pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
    int err;
    OpusDecoder* tempDecoder = opus_decoder_create(nDesiredSampleRate, CHANNELS, &err);
    if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
    int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
    int samples = opus_decode(tempDecoder, static_cast<const uint8_t*>(pCompressed), cbCompressed, pcm, FRAME_SIZE, 0);
    opus_decoder_destroy(tempDecoder);
    if (samples < 0) return k_EVoiceResultNotInitialized;
    uint32_t bytesRequired = samples * CHANNELS * sizeof(int16_t);
    if (cbDestBufferSize < bytesRequired) return k_EVoiceResultBufferTooSmall;
    *nBytesWritten = bytesRequired;
    return k_EVoiceResultOK;
 }
 // Called externally (e.g., from network thread) to enqueue received voice
 // We usually dont need this since it actually sends the voice data by SteamNetworking (or other) with GetVoice && DecompressVoice
 void VoiceChat::QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len) {
    if (!data || len == 0) return;
    std::lock_guard<std::mutex> lock(playbackQueueMutex);
    playbackQueue.push({ userId, std::vector<uint8_t>(data, data + len) });
 }