From 5a0630f01534e1af110422ca5800cdfbdede8b02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=88=98=E7=9A=93?= <whiteaxe@tuta.io>
Date: Wed, 6 Aug 2025 14:34:03 -0400
Subject: [PATCH] Patch wasm2c to evenly distribute the data segments

This prevents the TOC (table of contents) on PowerPC targets from being
overflowed by the sizes of the files generated by wasm2c. It should also
help improve compilation times. I'll be submitting this patch as a pull
request to WABT later.
---
 .github/workflows/autobuild.yml     |   2 +-
 libretro/Makefile                   |  15 ++-
 libretro/wasm2c-data-segments.patch | 191 ++++++++++++++++++++++++++++
 meson.build                         |  13 +-
 meson_options.txt                   |   1 -
 5 files changed, 201 insertions(+), 21 deletions(-)
 create mode 100644 libretro/wasm2c-data-segments.patch

diff --git a/.github/workflows/autobuild.yml b/.github/workflows/autobuild.yml
index c92c5101..223843cb 100644
--- a/.github/workflows/autobuild.yml
+++ b/.github/workflows/autobuild.yml
@@ -1279,7 +1279,7 @@ jobs:
       - name: Configure core
         run: |
           git config --global --add safe.directory "$(pwd)"
-          CLICOLOR_FORCE=1 meson setup build --cross-file libretro/meson-ps3.txt --buildtype release -Db_lto=false -Druby_lto=true -Dlibretro=true
+          CLICOLOR_FORCE=1 meson setup build --cross-file libretro/meson-ps3.txt --buildtype release -Db_lto=false -Dlibretro=true
 
       - name: Build core
         run: |
diff --git a/libretro/Makefile b/libretro/Makefile
index 407aed5f..3a571f82 100644
--- a/libretro/Makefile
+++ b/libretro/Makefile
@@ -29,7 +29,7 @@ WASI_CXXFLAGS ?= -Oz -DNDEBUG
 WASI_LDFLAGS ?=
 WASM_OPT_FLAGS ?= -Oz
 
-BUILD_PREFIX := ${PWD}/build
+BUILD_PREFIX := $(PWD)/build
 OUTDIR := $(BUILD_PREFIX)/libretro-stage1
 LIBDIR := $(BUILD_PREFIX)/lib
 DOWNLOADS := $(BUILD_PREFIX)/downloads
@@ -108,7 +108,7 @@ $(LIBDIR)/Dist/bin/ruby: $(DOWNLOADS)/ruby/Makefile ruby-bindings.h
 
 $(OUTDIR)/sandbox-bindgen/mkxp-sandbox-bindgen.cpp $(OUTDIR)/sandbox-bindgen/mkxp-sandbox-bindgen.h &: sandbox-bindgen.rb $(LIBDIR)/tags
 	mkdir -p $(OUTDIR)/sandbox-bindgen
-	cd $(LIBDIR) && $(RUBY) ${PWD}/sandbox-bindgen.rb
+	cd $(LIBDIR) && $(RUBY) $(PWD)/sandbox-bindgen.rb
 	mv $(LIBDIR)/mkxp-sandbox-bindgen.h $(OUTDIR)/sandbox-bindgen
 	mv $(LIBDIR)/mkxp-sandbox-bindgen.cpp $(OUTDIR)/sandbox-bindgen
 
@@ -148,11 +148,11 @@ $(DOWNLOADS)/ruby/configure: $(DOWNLOADS)/ruby/configure.ac
 $(DOWNLOADS)/ruby/configure.ac:
 	mkdir -p $(DOWNLOADS)
 	$(CLONE) $(GITHUB)/ruby/ruby $(DOWNLOADS)/ruby -b v$(subst .,_,$(RUBY_VERSION))
-	cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-stack-pointer.patch
-	cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-jump-buffer.patch
-	cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-compat.patch
-	cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-prng-time.patch
-	echo '#include "${PWD}/ruby-bindings.h"' >> $(DOWNLOADS)/ruby/gc.c
+	cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-stack-pointer.patch
+	cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-jump-buffer.patch
+	cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-compat.patch
+	cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-prng-time.patch
+	echo '#include "$(PWD)/ruby-bindings.h"' >> $(DOWNLOADS)/ruby/gc.c
 
 # WABT (targets the build machine)
 
@@ -239,6 +239,7 @@ $(DOWNLOADS)/wabt/include/wabt/config.h: $(DOWNLOADS)/wabt/src/config.h.in
 $(WASM2C_SRC) $(DOWNLOADS)/wabt/src/config.h.in &:
 	mkdir -p $(DOWNLOADS)
 	$(CLONE) $(GITHUB)/WebAssembly/wabt $(DOWNLOADS)/wabt -b $(WABT_VERSION)
+	cd $(DOWNLOADS)/wabt && $(GIT) apply $(PWD)/wasm2c-data-segments.patch
 
 $(DOWNLOADS)/picosha2/picosha2.h:
 	mkdir -p $(DOWNLOADS)/picosha2
diff --git a/libretro/wasm2c-data-segments.patch b/libretro/wasm2c-data-segments.patch
new file mode 100644
index 00000000..b1f54af4
--- /dev/null
+++ b/libretro/wasm2c-data-segments.patch
@@ -0,0 +1,191 @@
+# Patches wasm2c to distribute the data segments across all the output files when `--num-outputs` is greater than 1 instead of putting them all in a single output file.
+
+--- a/include/wabt/c-writer.h
++++ b/include/wabt/c-writer.h
+@@ -43,6 +43,18 @@ struct WriteCOptions {
+       size_t num_imported_functions,
+       size_t num_outputs)>
+       name_to_output_file_index;
++  /*
++   * data_segment_name_to_output_file_index takes const iterators to begin and
++   * end of a list of all data_segments in the module, and number of .c outputs
++   * as argument, returns a vector where vector[i] the index of the .c output
++   * that data_segments_begin + i goes into. Only called when --num-outputs is
++   * used.
++   */
++  std::function<std::vector<size_t>(
++      std::vector<DataSegment*>::const_iterator data_segments_begin,
++      std::vector<DataSegment*>::const_iterator data_segments_end,
++      size_t num_outputs)>
++      data_segment_name_to_output_file_index;
+ };
+ 
+ Result WriteC(std::vector<Stream*>&& c_streams,
+--- a/src/c-writer.cc
++++ b/src/c-writer.cc
+@@ -187,9 +187,10 @@ int GetShiftMask(Type type) {
+  * their names, and then divides all non-imported functions into equal-sized
+  * buckets (# of non-imported functions / # of .c outputs) based on the sorting.
+  */
+-static std::vector<size_t> default_name_to_output_file_index(
+-    std::vector<Func*>::const_iterator func_begin,
+-    std::vector<Func*>::const_iterator func_end,
++template <typename F>
++std::vector<size_t> default_name_to_output_file_index(
++    typename std::vector<F*>::const_iterator func_begin,
++    typename std::vector<F*>::const_iterator func_end,
+     size_t num_imports,
+     size_t num_streams) {
+   std::vector<size_t> result;
+@@ -218,6 +219,15 @@ static std::vector<size_t> default_name_to_output_file_index(
+   return result;
+ }
+ 
++static std::vector<size_t> default_data_segment_name_to_output_file_index(
++    typename std::vector<DataSegment*>::const_iterator data_segment_begin,
++    typename std::vector<DataSegment*>::const_iterator data_segment_end,
++    size_t num_streams) {
++  return default_name_to_output_file_index<DataSegment>(data_segment_begin,
++                                                        data_segment_end,
++                                                        0, num_streams);
++}
++
+ class CWriter {
+  public:
+   CWriter(std::vector<Stream*>&& c_streams,
+@@ -236,7 +246,15 @@ class CWriter {
+     if (c_streams_.size() != 1 && options.name_to_output_file_index) {
+       name_to_output_file_index_ = options.name_to_output_file_index;
+     } else {
+-      name_to_output_file_index_ = default_name_to_output_file_index;
++      name_to_output_file_index_ = default_name_to_output_file_index<Func>;
++    }
++    if (c_streams_.size() != 1 &&
++        options.data_segment_name_to_output_file_index) {
++      data_segment_name_to_output_file_index_ =
++          options.data_segment_name_to_output_file_index;
++    } else {
++      data_segment_name_to_output_file_index_ =
++          default_data_segment_name_to_output_file_index;
+     }
+   }
+ 
+@@ -407,7 +425,9 @@ class CWriter {
+   void WriteElemInstances();
+   void WriteGlobalInitializers();
+   void WriteDataInitializerDecls();
++  void WriteDataInitializer(const DataSegment* data_segment);
+   void WriteDataInitializers();
++  void WriteMultiDataInitializers();
+   void WriteElemInitializerDecls();
+   void WriteElemInitializers();
+   void WriteFuncRefWrappers();
+@@ -540,6 +560,11 @@ class CWriter {
+                                     size_t)>
+       name_to_output_file_index_;
+ 
++  std::function<std::vector<size_t>(std::vector<DataSegment*>::const_iterator,
++                                    std::vector<DataSegment*>::const_iterator,
++                                    size_t)>
++      data_segment_name_to_output_file_index_;
++
+   bool simd_used_in_header_;
+ 
+   bool in_tail_callee_;
+@@ -2218,28 +2243,35 @@ void CWriter::WriteDataInitializerDecls() {
+   }
+ }
+ 
++void CWriter::WriteDataInitializer(const DataSegment* data_segment) {
++  Write(Newline(), InternalSymbolScope(),
++        "const u8 data_segment_data_",
++        GlobalName(ModuleFieldType::DataSegment, data_segment->name),
++        "[] = ", OpenBrace());
++  size_t i = 0;
++  for (uint8_t x : data_segment->data) {
++    Writef("0x%02x, ", x);
++    if ((++i % 12) == 0)
++      Write(Newline());
++  }
++  if (i > 0)
++    Write(Newline());
++  Write(CloseBrace(), ";", Newline());
++}
++
+ void CWriter::WriteDataInitializers() {
+   if (module_->memories.empty()) {
+     return;
+   }
+ 
+-  for (const DataSegment* data_segment : module_->data_segments) {
+-    if (data_segment->data.empty()) {
+-      continue;
+-    }
++  if (c_streams_.size() == 1) {
++    for (const DataSegment* data_segment : module_->data_segments) {
++      if (data_segment->data.empty()) {
++        continue;
++      }
+ 
+-    Write(Newline(), InternalSymbolScope(), "const u8 data_segment_data_",
+-          GlobalName(ModuleFieldType::DataSegment, data_segment->name),
+-          "[] = ", OpenBrace());
+-    size_t i = 0;
+-    for (uint8_t x : data_segment->data) {
+-      Writef("0x%02x, ", x);
+-      if ((++i % 12) == 0)
+-        Write(Newline());
++      WriteDataInitializer(data_segment);
+     }
+-    if (i > 0)
+-      Write(Newline());
+-    Write(CloseBrace(), ";", Newline());
+   }
+ 
+   Write(Newline(), "static void init_memories(", ModuleInstanceTypeName(),
+@@ -2299,6 +2331,32 @@ void CWriter::WriteDataInitializers() {
+   }
+ }
+ 
++void CWriter::WriteMultiDataInitializers() {
++  if (c_streams_.size() == 1 || module_->memories.empty()) {
++    return;
++  }
++
++  std::vector<size_t> c_stream_assignment =
++      data_segment_name_to_output_file_index_(module_->data_segments.begin(),
++                                              module_->data_segments.end(),
++                                              c_streams_.size());
++
++  Index data_segment_index = 0;
++
++  for (const DataSegment* data_segment : module_->data_segments) {
++    if (data_segment->data.empty()) {
++      ++data_segment_index;
++      continue;
++    }
++
++    stream_ = c_streams_.at(c_stream_assignment.at(data_segment_index));
++
++    WriteDataInitializer(data_segment);
++
++    ++data_segment_index;
++  }
++}
++
+ void CWriter::WriteElemInstances() {
+   for (const ElemSegment* elem_segment : module_->elem_segments) {
+     std::string name =
+@@ -6099,6 +6157,12 @@ void CWriter::WriteCSource() {
+   /* Write function bodies across the different output streams */
+   WriteFuncs();
+ 
++  /*
++   * Write data segments across the different output streams if there's more
++   * than one output stream
++   */
++  WriteMultiDataInitializers();
++
+   /* For any empty .c output, write a dummy typedef to avoid gcc warning */
+   WriteMultiCTopEmpty();
+ }
diff --git a/meson.build b/meson.build
index fd2a14cd..ad40fc1d 100644
--- a/meson.build
+++ b/meson.build
@@ -24,18 +24,12 @@ if not is_libretro and host_system == 'darwin'
     error('This Meson project no longer supports macOS. Please use the Xcode project instead.')
 endif
 
-if is_libretro and (host_cpu_family == 'ppc' or host_cpu_family == 'ppc64') and not get_option('b_lto') and not get_option('ruby_lto')
-    # We get a bunch of "relocation truncated to fit" when linking if LTO isn't enabled in libretro builds due to the sizes of the files generated by wasm2c.
-    error('LTO is required when building for PowerPC architectures. Please pass either `-Db_lto=true` or `-Druby_lto=true` to Meson.')
-endif
-
 global_sources = []
 global_dependencies = [declare_dependency(sources: vcs_tag(command: ['git', 'rev-parse', '--short=7', 'HEAD'], fallback: 'unknown', input: 'src/git-hash.h.in', output: 'git-hash.h'))]
 global_include_dirs = []
 global_args = []
 global_cpp_args = []
 global_link_args = []
-libretro_ruby_args = []
 link_test_args = []
 
 if is_libretro
@@ -85,11 +79,6 @@ if (get_option('libretro_force_fat_lto') or (core_is_static and not is_emscripte
     global_args += '-ffat-lto-objects'
 endif
 
-if is_libretro and get_option('ruby_lto') and not get_option('b_lto')
-    libretro_ruby_args += '-flto'
-    libretro_ruby_args += '-ffat-lto-objects'
-endif
-
 if is_vitasdk
     global_args += '-mword-relocations'
 endif
@@ -682,7 +671,7 @@ if is_libretro
     global_dependencies += declare_dependency(
         link_with: static_library(
             'ruby',
-            c_args: global_args + libretro_ruby_args + [
+            c_args: global_args + [
                 '-frounding-math',
                 '-Wno-unused-function',
                 '-Wno-unused-value',
diff --git a/meson_options.txt b/meson_options.txt
index da8a9bb9..7ccc88ca 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -24,6 +24,5 @@ option('gfx_backend', type: 'combo', value: 'gl', choices: ['gl', 'gles'], descr
 
 option('libretro', type: 'boolean', value: false, description: 'Build a libretro core instead of an executable')
 option('libretro_stage1_path', type: 'string', value: 'libretro/build/libretro-stage1', description: 'Path to libretro-stage1 for libretro builds')
-option('ruby_lto', type: 'boolean', value: false, description: 'Enable link-time optimization for libruby in libretro builds, even if link-time optimization is disabled for everything else')
 option('emscripten_threaded', type: 'boolean', value: true, description: 'Enable multithreading support in libretro Emscripten builds')
 option('libretro_force_fat_lto', type: 'boolean', value: false, description: 'Always use -ffat-lto-objects for libretro builds with link-time optimization')