From a9ee9248472229fd6ce90eb2ae16198ce2c74f77 Mon Sep 17 00:00:00 2001
From: Detanup01 <91248446+Detanup01@users.noreply.github.com>
Date: Wed, 16 Jul 2025 21:54:04 +0200
Subject: [PATCH] voicechat implementation

---
 dll/dll/steam_user.h |   4 +-
 dll/dll/voicechat.h  |  85 +++++++++++++++
 dll/steam_user.cpp   |  43 +-------
 dll/voicechat.cpp    | 253 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 345 insertions(+), 40 deletions(-)
 create mode 100644 dll/dll/voicechat.h
 create mode 100644 dll/voicechat.cpp
diff --git a/dll/dll/steam_user.h b/dll/dll/steam_user.h
index ff77d201..e63fef86 100644
--- a/dll/dll/steam_user.h
+++ b/dll/dll/steam_user.h
@@ -20,6 +20,7 @@
 
 #include "base.h"
 #include "auth.h"
+#include "voicechat.h"
 
 class Steam_User :
 public ISteamUser004,
@@ -49,10 +50,9 @@ public ISteamUser
 	class SteamCallResults *callback_results{};
     Local_Storage *local_storage{};
 
-	bool recording = false;
-	std::chrono::high_resolution_clock::time_point last_get_voice{};
 	std::string encrypted_app_ticket{};
 	Auth_Manager *auth_manager{};
+    VoiceChat* voicechat{};
     std::map<std::string, std::string> registry{};
     std::string registry_nullptr{};
 
diff --git a/dll/dll/voicechat.h b/dll/dll/voicechat.h
new file mode 100644
index 00000000..6efbf7f1
--- /dev/null
+++ b/dll/dll/voicechat.h
@@ -0,0 +1,85 @@
+/* Copyright (C) 2019 Mr Goldberg
+   This file is part of the Goldberg Emulator
+
+   The Goldberg Emulator is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 3 of the License, or (at your option) any later version.
+
+   The Goldberg Emulator is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the Goldberg Emulator; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef VOICECHAT_INCLUDE_H
+#define VOICECHAT_INCLUDE_H
+
+#include "base.h"
+#include <opus/opus.h>
+#include <portaudio.h>
+
+#define SAMPLE_RATE 48000
+#define CHANNELS 1
+#define FRAME_SIZE 960 // 20ms @ 48kHz
+#define MAX_ENCODED_SIZE 4000
+#define MAX_DECODED_SIZE (FRAME_SIZE * 2 * sizeof(int16_t)) // for stereo
+#define DEFAULT_BITRATE 32000
+
+struct VoicePacket {
+    uint64_t userId;
+    std::vector<uint8_t> encoded;
+};
+
+class VoiceChat
+{
+    std::atomic<bool> isRecording{ false };
+    std::atomic<bool> isPlaying{ false };
+
+    std::mutex inputMutex;
+    std::condition_variable inputCond;
+    std::queue<std::vector<uint8_t>> encodedQueue;
+
+    std::mutex playbackQueueMutex;
+
+    std::queue<VoicePacket> playbackQueue;
+
+    std::mutex decoderMapMutex;
+    std::unordered_map<uint64_t, OpusDecoder*> decoderMap;
+
+    OpusEncoder* encoder = nullptr;
+    PaStream* inputStream = nullptr;
+    PaStream* outputStream = nullptr;
+    static int inputCallback(const void* input, void*, unsigned long frameCount,
+        const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);
+    static int outputCallback(const void*, void* output, unsigned long frameCount,
+        const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void*);
+
+public:
+    bool InitVoiceSystem();
+
+    void ShutdownVoiceSystem();
+
+    bool StartVoiceRecording();
+
+    void StopVoiceRecording();
+
+    bool StartVoicePlayback();
+
+    void StopVoicePlayback();
+
+    EVoiceResult GetAvailableVoice(uint32_t* pcbCompressed);
+
+    EVoiceResult GetVoice(bool bWantCompressed, void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten);
+
+    EVoiceResult DecompressVoice(const void* pCompressed, uint32_t cbCompressed,
+        void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
+        uint32_t nDesiredSampleRate);
+
+    void QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len);
+};
+
+#endif // VOICECHAT_INCLUDE_H
\ No newline at end of file
diff --git a/dll/steam_user.cpp b/dll/steam_user.cpp
index 8c82a996..67fed2c9 100644
--- a/dll/steam_user.cpp
+++ b/dll/steam_user.cpp
@@ -480,10 +480,7 @@ bool Steam_User::GetUserDataFolder( char *pchBuffer, int cubBuffer )
 void Steam_User::StartVoiceRecording( )
 {
     PRINT_DEBUG_ENTRY();
-    last_get_voice = std::chrono::high_resolution_clock::now();
-    recording = true;
-    //TODO:fix
-    recording = false;
+    voicechat->StartVoiceRecording();
 }
 
 // Stops voice recording. Because people often release push-to-talk keys early, the system will keep recording for
@@ -492,7 +489,7 @@ void Steam_User::StartVoiceRecording( )
 void Steam_User::StopVoiceRecording( )
 {
     PRINT_DEBUG_ENTRY();
-    recording = false;
+    voicechat->StopVoiceRecording();
 }
 
 // Determine the size of captured audio data that is available from GetVoice.
@@ -502,14 +499,7 @@ void Steam_User::StopVoiceRecording( )
 EVoiceResult Steam_User::GetAvailableVoice( uint32 *pcbCompressed, uint32 *pcbUncompressed_Deprecated, uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
     PRINT_DEBUG_ENTRY();
-    if (pcbCompressed) *pcbCompressed = 0;
-    if (pcbUncompressed_Deprecated) *pcbUncompressed_Deprecated = 0;
-    if (!recording) return k_EVoiceResultNotRecording;
-    double seconds = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - last_get_voice).count();
-    if (pcbCompressed) *pcbCompressed = static_cast<uint32>(seconds * 1024.0 * 64.0 / 8.0);
-    if (pcbUncompressed_Deprecated) *pcbUncompressed_Deprecated = static_cast<uint32>(seconds * (double)nUncompressedVoiceDesiredSampleRate_Deprecated * 2.0);
-
-    return k_EVoiceResultOK;
+    return voicechat->GetAvailableVoice(pcbCompressed);
 }
 
 EVoiceResult Steam_User::GetAvailableVoice(uint32 *pcbCompressed, uint32 *pcbUncompressed)
@@ -542,22 +532,7 @@ EVoiceResult Steam_User::GetAvailableVoice(uint32 *pcbCompressed, uint32 *pcbUnc
 EVoiceResult Steam_User::GetVoice( bool bWantCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, bool bWantUncompressed_Deprecated, void *pUncompressedDestBuffer_Deprecated , uint32 cbUncompressedDestBufferSize_Deprecated , uint32 *nUncompressBytesWritten_Deprecated , uint32 nUncompressedVoiceDesiredSampleRate_Deprecated  )
 {
     PRINT_DEBUG_ENTRY();
-    if (!recording) return k_EVoiceResultNotRecording;
-
-    double seconds = std::chrono::duration_cast<std::chrono::duration<double>>(std::chrono::high_resolution_clock::now() - last_get_voice).count();
-    if (bWantCompressed) {
-        uint32 towrite = static_cast<uint32>(seconds * 1024.0 * 64.0 / 8.0);
-        if (cbDestBufferSize < towrite) towrite = cbDestBufferSize;
-        if (pDestBuffer) memset(pDestBuffer, 0, towrite);
-        if (nBytesWritten) *nBytesWritten = towrite;
-    }
-
-    if (bWantUncompressed_Deprecated) {
-        PRINT_DEBUG("Wanted Uncompressed");
-    }
-
-    last_get_voice = std::chrono::high_resolution_clock::now();
-    return k_EVoiceResultOK;
+    return voicechat->GetVoice(bWantCompressed, pDestBuffer, cbDestBufferSize, nBytesWritten);
 }
 
 EVoiceResult Steam_User::GetVoice( bool bWantCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, bool bWantUncompressed, void *pUncompressedDestBuffer, uint32 cbUncompressedDestBufferSize, uint32 *nUncompressBytesWritten )
@@ -581,15 +556,7 @@ EVoiceResult Steam_User::GetCompressedVoice( void *pDestBuffer, uint32 cbDestBuf
 EVoiceResult Steam_User::DecompressVoice( const void *pCompressed, uint32 cbCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten, uint32 nDesiredSampleRate )
 {
     PRINT_DEBUG_ENTRY();
-    if (!recording) return k_EVoiceResultNotRecording;
-
-    uint32 uncompressed = static_cast<uint32>((double)cbCompressed * ((double)nDesiredSampleRate / 8192.0));
-    if(nBytesWritten) *nBytesWritten = uncompressed;
-    if (uncompressed > cbDestBufferSize) uncompressed = cbDestBufferSize;
-    if (pDestBuffer) memset(pDestBuffer, 0, uncompressed);
-
-    return k_EVoiceResultOK;
-}
+    return voicechat->DecompressVoice(pCompressed, cbCompressed, pDestBuffer, cbDestBufferSize, nBytesWritten, nDesiredSampleRate);
 
 EVoiceResult Steam_User::DecompressVoice( const void *pCompressed, uint32 cbCompressed, void *pDestBuffer, uint32 cbDestBufferSize, uint32 *nBytesWritten )
 {
diff --git a/dll/voicechat.cpp b/dll/voicechat.cpp
new file mode 100644
index 00000000..f6550902
--- /dev/null
+++ b/dll/voicechat.cpp
@@ -0,0 +1,253 @@
+#include "dll/voicechat.h"
+
+bool VoiceChat::InitVoiceSystem() {
+    static std::atomic<int> initCount{ 0 };
+    if (initCount++ == 0) {
+        if (Pa_Initialize() != paNoError) {
+            PRINT_DEBUG("PortAudio initialization failed");
+            return false;
+        }
+    }
+    isRecording = false;
+    isPlaying = false;
+    encoder = nullptr;
+    inputStream = nullptr;
+    outputStream = nullptr;
+    return true;
+}
+
+void VoiceChat::ShutdownVoiceSystem() {
+    static std::atomic<int> initCount{ 1 };
+    if (--initCount == 0) {
+        Pa_Terminate();
+    }
+}
+
+int VoiceChat::inputCallback(const void* input, void*, unsigned long frameCount,
+    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
+    VoiceChat* chat = static_cast<VoiceChat*>(data);
+    if (!input || frameCount != FRAME_SIZE || !chat->isRecording.load()) return paContinue;
+
+    std::vector<uint8_t> encoded(MAX_ENCODED_SIZE);
+    int len = opus_encode(chat->encoder, static_cast<const int16_t*>(input), frameCount,
+        encoded.data(), MAX_ENCODED_SIZE);
+    if (len > 0) {
+        encoded.resize(len);
+        {
+            std::lock_guard<std::mutex> lock(chat->inputMutex);
+            chat->encodedQueue.push(std::move(encoded));
+        }
+        chat->inputCond.notify_one();
+    }
+    else {
+        PRINT_DEBUG("Opus encoding failed: %d", len);
+    }
+    return paContinue;
+}
+
+int VoiceChat::outputCallback(const void*, void* output, unsigned long frameCount,
+    const PaStreamCallbackTimeInfo*, PaStreamCallbackFlags, void* data) {
+    VoiceChat* chat = static_cast<VoiceChat*>(data);
+    int16_t* out = static_cast<int16_t*>(output);
+    memset(out, 0, frameCount * sizeof(int16_t) * 2); // support stereo output
+
+    std::lock_guard<std::mutex> lock(chat->playbackQueueMutex);
+    size_t mixCount = 0;
+
+    while (!chat->playbackQueue.empty()) {
+        VoicePacket pkt = chat->playbackQueue.front();
+        chat->playbackQueue.pop();
+
+        OpusDecoder* decoder = nullptr;
+        {
+            std::lock_guard<std::mutex> dlock(chat->decoderMapMutex);
+            decoder = chat->decoderMap[pkt.userId];
+            if (!decoder) {
+                int err = 0;
+                decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
+                if (err != OPUS_OK || !decoder) continue;
+                chat->decoderMap[pkt.userId] = decoder;
+            }
+        }
+
+        int16_t tempBuffer[FRAME_SIZE] = { 0 };
+        int decoded = opus_decode(decoder, pkt.encoded.data(), pkt.encoded.size(), tempBuffer, frameCount, 0);
+        if (decoded > 0) {
+            for (int i = 0; i < decoded; ++i) {
+                out[2 * i] += tempBuffer[i] / 2;     // left
+                out[2 * i + 1] += tempBuffer[i] / 2; // right
+            }
+            ++mixCount;
+        }
+    }
+
+    return paContinue;
+}
+
+bool VoiceChat::StartVoiceRecording() {
+    if (isRecording.load()) return true;
+    if (!InitVoiceSystem()) return false;
+
+    int err = 0;
+    encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, OPUS_APPLICATION_VOIP, &err);
+    if (!encoder || err != OPUS_OK) {
+        PRINT_DEBUG("Opus encoder create failed: %d", err);
+        return false;
+    }
+
+    opus_encoder_ctl(encoder, OPUS_SET_BITRATE(DEFAULT_BITRATE));
+
+    PaStreamParameters params{};
+    params.device = Pa_GetDefaultInputDevice();
+    if (params.device == paNoDevice) return false;
+    params.channelCount = CHANNELS;
+    params.sampleFormat = paInt16;
+    params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowInputLatency;
+    params.hostApiSpecificStreamInfo = nullptr;
+
+    PaError paErr = Pa_OpenStream(&inputStream, &params, nullptr, SAMPLE_RATE, FRAME_SIZE,
+        paClipOff, inputCallback, this);
+    if (paErr != paNoError) {
+        PRINT_DEBUG("Failed to open input stream: %s", Pa_GetErrorText(paErr));
+        return false;
+    }
+
+    isRecording.store(true);
+    Pa_StartStream(inputStream);
+    return true;
+}
+
+void VoiceChat::StopVoiceRecording() {
+    if (!isRecording.exchange(false)) return;
+    if (inputStream) {
+        Pa_StopStream(inputStream);
+        Pa_CloseStream(inputStream);
+        inputStream = nullptr;
+    }
+    if (encoder) {
+        opus_encoder_destroy(encoder);
+        encoder = nullptr;
+    }
+    ShutdownVoiceSystem();
+}
+
+bool VoiceChat::StartVoicePlayback() {
+    if (isPlaying.load()) return true;
+    if (!InitVoiceSystem()) return false;
+
+    PaStreamParameters params{};
+    params.device = Pa_GetDefaultOutputDevice();
+    if (params.device == paNoDevice) return false;
+    params.channelCount = 2; // stereo output
+    params.sampleFormat = paInt16;
+    params.suggestedLatency = Pa_GetDeviceInfo(params.device)->defaultLowOutputLatency;
+    params.hostApiSpecificStreamInfo = nullptr;
+
+    PaError paErr = Pa_OpenStream(&outputStream, nullptr, &params, SAMPLE_RATE, FRAME_SIZE,
+        paClipOff, outputCallback, nullptr);
+    if (paErr != paNoError) {
+        PRINT_DEBUG("Failed to open output stream: %s", Pa_GetErrorText(paErr));
+        return false;
+    }
+
+    isPlaying.store(true);
+    Pa_StartStream(outputStream);
+    return true;
+}
+
+void VoiceChat::StopVoicePlayback() {
+    if (!isPlaying.exchange(false)) return;
+    if (outputStream) {
+        Pa_StopStream(outputStream);
+        Pa_CloseStream(outputStream);
+        outputStream = nullptr;
+    }
+
+    std::lock_guard<std::mutex> lock(decoderMapMutex);
+    for (auto& [id, decoder] : decoderMap) {
+        opus_decoder_destroy(decoder);
+    }
+    decoderMap.clear();
+
+    ShutdownVoiceSystem();
+}
+
+EVoiceResult VoiceChat::GetAvailableVoice(uint32_t* pcbCompressed) {
+    if (!pcbCompressed) return k_EVoiceResultNotInitialized;
+    std::lock_guard<std::mutex> lock(inputMutex);
+
+    if (!isRecording.load()) return k_EVoiceResultNotRecording;
+    if (encodedQueue.empty()) return k_EVoiceResultNoData;
+
+    *pcbCompressed = static_cast<uint32_t>(encodedQueue.front().size());
+    return k_EVoiceResultOK;
+}
+
+EVoiceResult VoiceChat::GetVoice(bool bWantCompressed, void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten) {
+    if (!pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
+
+    std::unique_lock<std::mutex> lock(inputMutex);
+    inputCond.wait_for(lock, std::chrono::milliseconds(20), [this] {
+        return !this->encodedQueue.empty();
+        });
+
+    if (encodedQueue.empty()) return k_EVoiceResultNoData;
+
+    auto buf = std::move(encodedQueue.front());
+    encodedQueue.pop();
+    lock.unlock();
+
+    if (bWantCompressed) {
+        if (cbDestBufferSize < buf.size()) return k_EVoiceResultBufferTooSmall;
+        memcpy(pDestBuffer, buf.data(), buf.size());
+        *nBytesWritten = static_cast<uint32_t>(buf.size());
+        return k_EVoiceResultOK;
+    }
+    else {
+        int err;
+        OpusDecoder* tempDecoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &err);
+        if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
+
+        int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
+        int samples = opus_decode(tempDecoder, buf.data(), static_cast<opus_int32>(buf.size()), pcm, FRAME_SIZE, 0);
+        opus_decoder_destroy(tempDecoder);
+
+        if (samples < 0) return k_EVoiceResultNotInitialized;
+
+        uint32_t requiredSize = samples * CHANNELS * sizeof(int16_t);
+        if (cbDestBufferSize < requiredSize) return k_EVoiceResultBufferTooSmall;
+
+        *nBytesWritten = requiredSize;
+        return k_EVoiceResultOK;
+    }
+}
+
+EVoiceResult VoiceChat::DecompressVoice(const void* pCompressed, uint32_t cbCompressed,
+    void* pDestBuffer, uint32_t cbDestBufferSize, uint32_t* nBytesWritten,
+    uint32_t nDesiredSampleRate) {
+    if (!pCompressed || !pDestBuffer || !nBytesWritten) return k_EVoiceResultNotInitialized;
+
+    int err;
+    OpusDecoder* tempDecoder = opus_decoder_create(nDesiredSampleRate, CHANNELS, &err);
+    if (!tempDecoder || err != OPUS_OK) return k_EVoiceResultNotInitialized;
+
+    int16_t* pcm = static_cast<int16_t*>(pDestBuffer);
+    int samples = opus_decode(tempDecoder, static_cast<const uint8_t*>(pCompressed), cbCompressed, pcm, FRAME_SIZE, 0);
+    opus_decoder_destroy(tempDecoder);
+
+    if (samples < 0) return k_EVoiceResultNotInitialized;
+
+    uint32_t bytesRequired = samples * CHANNELS * sizeof(int16_t);
+    if (cbDestBufferSize < bytesRequired) return k_EVoiceResultBufferTooSmall;
+
+    *nBytesWritten = bytesRequired;
+    return k_EVoiceResultOK;
+}
+
+// Called externally (e.g., from network thread) to enqueue received voice
+// We usually dont need this since it actually sends the voice data by SteamNetworking (or other) with GetVoice && DecompressVoice
+void VoiceChat::QueueIncomingVoice(uint64_t userId, const uint8_t* data, size_t len) {
+    if (!data || len == 0) return;
+    std::lock_guard<std::mutex> lock(playbackQueueMutex);
+    playbackQueue.push({ userId, std::vector<uint8_t>(data, data + len) });
+}
\ No newline at end of file