From 5a0630f01534e1af110422ca5800cdfbdede8b02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=9A=93?= Date: Wed, 6 Aug 2025 14:34:03 -0400 Subject: [PATCH] Patch wasm2c to evenly distribute the data segments This prevents the TOC (table of contents) on PowerPC targets from being overflowed by the sizes of the files generated by wasm2c. It should also help improve compilation times. I'll be submitting this patch as a pull request to WABT later. --- .github/workflows/autobuild.yml | 2 +- libretro/Makefile | 15 ++- libretro/wasm2c-data-segments.patch | 191 ++++++++++++++++++++++++++++ meson.build | 13 +- meson_options.txt | 1 - 5 files changed, 201 insertions(+), 21 deletions(-) create mode 100644 libretro/wasm2c-data-segments.patch diff --git a/.github/workflows/autobuild.yml b/.github/workflows/autobuild.yml index c92c5101..223843cb 100644 --- a/.github/workflows/autobuild.yml +++ b/.github/workflows/autobuild.yml @@ -1279,7 +1279,7 @@ jobs: - name: Configure core run: | git config --global --add safe.directory "$(pwd)" - CLICOLOR_FORCE=1 meson setup build --cross-file libretro/meson-ps3.txt --buildtype release -Db_lto=false -Druby_lto=true -Dlibretro=true + CLICOLOR_FORCE=1 meson setup build --cross-file libretro/meson-ps3.txt --buildtype release -Db_lto=false -Dlibretro=true - name: Build core run: | diff --git a/libretro/Makefile b/libretro/Makefile index 407aed5f..3a571f82 100644 --- a/libretro/Makefile +++ b/libretro/Makefile @@ -29,7 +29,7 @@ WASI_CXXFLAGS ?= -Oz -DNDEBUG WASI_LDFLAGS ?= WASM_OPT_FLAGS ?= -Oz -BUILD_PREFIX := ${PWD}/build +BUILD_PREFIX := $(PWD)/build OUTDIR := $(BUILD_PREFIX)/libretro-stage1 LIBDIR := $(BUILD_PREFIX)/lib DOWNLOADS := $(BUILD_PREFIX)/downloads @@ -108,7 +108,7 @@ $(LIBDIR)/Dist/bin/ruby: $(DOWNLOADS)/ruby/Makefile ruby-bindings.h $(OUTDIR)/sandbox-bindgen/mkxp-sandbox-bindgen.cpp $(OUTDIR)/sandbox-bindgen/mkxp-sandbox-bindgen.h &: sandbox-bindgen.rb $(LIBDIR)/tags mkdir -p $(OUTDIR)/sandbox-bindgen - cd $(LIBDIR) && $(RUBY) ${PWD}/sandbox-bindgen.rb + cd $(LIBDIR) && $(RUBY) $(PWD)/sandbox-bindgen.rb mv $(LIBDIR)/mkxp-sandbox-bindgen.h $(OUTDIR)/sandbox-bindgen mv $(LIBDIR)/mkxp-sandbox-bindgen.cpp $(OUTDIR)/sandbox-bindgen @@ -148,11 +148,11 @@ $(DOWNLOADS)/ruby/configure: $(DOWNLOADS)/ruby/configure.ac $(DOWNLOADS)/ruby/configure.ac: mkdir -p $(DOWNLOADS) $(CLONE) $(GITHUB)/ruby/ruby $(DOWNLOADS)/ruby -b v$(subst .,_,$(RUBY_VERSION)) - cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-stack-pointer.patch - cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-jump-buffer.patch - cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-compat.patch - cd $(DOWNLOADS)/ruby && $(GIT) apply ${PWD}/ruby-prng-time.patch - echo '#include "${PWD}/ruby-bindings.h"' >> $(DOWNLOADS)/ruby/gc.c + cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-stack-pointer.patch + cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-jump-buffer.patch + cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-compat.patch + cd $(DOWNLOADS)/ruby && $(GIT) apply $(PWD)/ruby-prng-time.patch + echo '#include "$(PWD)/ruby-bindings.h"' >> $(DOWNLOADS)/ruby/gc.c # WABT (targets the build machine) @@ -239,6 +239,7 @@ $(DOWNLOADS)/wabt/include/wabt/config.h: $(DOWNLOADS)/wabt/src/config.h.in $(WASM2C_SRC) $(DOWNLOADS)/wabt/src/config.h.in &: mkdir -p $(DOWNLOADS) $(CLONE) $(GITHUB)/WebAssembly/wabt $(DOWNLOADS)/wabt -b $(WABT_VERSION) + cd $(DOWNLOADS)/wabt && $(GIT) apply $(PWD)/wasm2c-data-segments.patch $(DOWNLOADS)/picosha2/picosha2.h: mkdir -p $(DOWNLOADS)/picosha2 diff --git a/libretro/wasm2c-data-segments.patch b/libretro/wasm2c-data-segments.patch new file mode 100644 index 00000000..b1f54af4 --- /dev/null +++ b/libretro/wasm2c-data-segments.patch @@ -0,0 +1,191 @@ +# Patches wasm2c to distribute the data segments across all the output files when `--num-outputs` is greater than 1 instead of putting them all in a single output file. + +--- a/include/wabt/c-writer.h ++++ b/include/wabt/c-writer.h +@@ -43,6 +43,18 @@ struct WriteCOptions { + size_t num_imported_functions, + size_t num_outputs)> + name_to_output_file_index; ++ /* ++ * data_segment_name_to_output_file_index takes const iterators to begin and ++ * end of a list of all data_segments in the module, and number of .c outputs ++ * as argument, returns a vector where vector[i] the index of the .c output ++ * that data_segments_begin + i goes into. Only called when --num-outputs is ++ * used. ++ */ ++ std::function( ++ std::vector::const_iterator data_segments_begin, ++ std::vector::const_iterator data_segments_end, ++ size_t num_outputs)> ++ data_segment_name_to_output_file_index; + }; + + Result WriteC(std::vector&& c_streams, +--- a/src/c-writer.cc ++++ b/src/c-writer.cc +@@ -187,9 +187,10 @@ int GetShiftMask(Type type) { + * their names, and then divides all non-imported functions into equal-sized + * buckets (# of non-imported functions / # of .c outputs) based on the sorting. + */ +-static std::vector default_name_to_output_file_index( +- std::vector::const_iterator func_begin, +- std::vector::const_iterator func_end, ++template ++std::vector default_name_to_output_file_index( ++ typename std::vector::const_iterator func_begin, ++ typename std::vector::const_iterator func_end, + size_t num_imports, + size_t num_streams) { + std::vector result; +@@ -218,6 +219,15 @@ static std::vector default_name_to_output_file_index( + return result; + } + ++static std::vector default_data_segment_name_to_output_file_index( ++ typename std::vector::const_iterator data_segment_begin, ++ typename std::vector::const_iterator data_segment_end, ++ size_t num_streams) { ++ return default_name_to_output_file_index(data_segment_begin, ++ data_segment_end, ++ 0, num_streams); ++} ++ + class CWriter { + public: + CWriter(std::vector&& c_streams, +@@ -236,7 +246,15 @@ class CWriter { + if (c_streams_.size() != 1 && options.name_to_output_file_index) { + name_to_output_file_index_ = options.name_to_output_file_index; + } else { +- name_to_output_file_index_ = default_name_to_output_file_index; ++ name_to_output_file_index_ = default_name_to_output_file_index; ++ } ++ if (c_streams_.size() != 1 && ++ options.data_segment_name_to_output_file_index) { ++ data_segment_name_to_output_file_index_ = ++ options.data_segment_name_to_output_file_index; ++ } else { ++ data_segment_name_to_output_file_index_ = ++ default_data_segment_name_to_output_file_index; + } + } + +@@ -407,7 +425,9 @@ class CWriter { + void WriteElemInstances(); + void WriteGlobalInitializers(); + void WriteDataInitializerDecls(); ++ void WriteDataInitializer(const DataSegment* data_segment); + void WriteDataInitializers(); ++ void WriteMultiDataInitializers(); + void WriteElemInitializerDecls(); + void WriteElemInitializers(); + void WriteFuncRefWrappers(); +@@ -540,6 +560,11 @@ class CWriter { + size_t)> + name_to_output_file_index_; + ++ std::function(std::vector::const_iterator, ++ std::vector::const_iterator, ++ size_t)> ++ data_segment_name_to_output_file_index_; ++ + bool simd_used_in_header_; + + bool in_tail_callee_; +@@ -2218,28 +2243,35 @@ void CWriter::WriteDataInitializerDecls() { + } + } + ++void CWriter::WriteDataInitializer(const DataSegment* data_segment) { ++ Write(Newline(), InternalSymbolScope(), ++ "const u8 data_segment_data_", ++ GlobalName(ModuleFieldType::DataSegment, data_segment->name), ++ "[] = ", OpenBrace()); ++ size_t i = 0; ++ for (uint8_t x : data_segment->data) { ++ Writef("0x%02x, ", x); ++ if ((++i % 12) == 0) ++ Write(Newline()); ++ } ++ if (i > 0) ++ Write(Newline()); ++ Write(CloseBrace(), ";", Newline()); ++} ++ + void CWriter::WriteDataInitializers() { + if (module_->memories.empty()) { + return; + } + +- for (const DataSegment* data_segment : module_->data_segments) { +- if (data_segment->data.empty()) { +- continue; +- } ++ if (c_streams_.size() == 1) { ++ for (const DataSegment* data_segment : module_->data_segments) { ++ if (data_segment->data.empty()) { ++ continue; ++ } + +- Write(Newline(), InternalSymbolScope(), "const u8 data_segment_data_", +- GlobalName(ModuleFieldType::DataSegment, data_segment->name), +- "[] = ", OpenBrace()); +- size_t i = 0; +- for (uint8_t x : data_segment->data) { +- Writef("0x%02x, ", x); +- if ((++i % 12) == 0) +- Write(Newline()); ++ WriteDataInitializer(data_segment); + } +- if (i > 0) +- Write(Newline()); +- Write(CloseBrace(), ";", Newline()); + } + + Write(Newline(), "static void init_memories(", ModuleInstanceTypeName(), +@@ -2299,6 +2331,32 @@ void CWriter::WriteDataInitializers() { + } + } + ++void CWriter::WriteMultiDataInitializers() { ++ if (c_streams_.size() == 1 || module_->memories.empty()) { ++ return; ++ } ++ ++ std::vector c_stream_assignment = ++ data_segment_name_to_output_file_index_(module_->data_segments.begin(), ++ module_->data_segments.end(), ++ c_streams_.size()); ++ ++ Index data_segment_index = 0; ++ ++ for (const DataSegment* data_segment : module_->data_segments) { ++ if (data_segment->data.empty()) { ++ ++data_segment_index; ++ continue; ++ } ++ ++ stream_ = c_streams_.at(c_stream_assignment.at(data_segment_index)); ++ ++ WriteDataInitializer(data_segment); ++ ++ ++data_segment_index; ++ } ++} ++ + void CWriter::WriteElemInstances() { + for (const ElemSegment* elem_segment : module_->elem_segments) { + std::string name = +@@ -6099,6 +6157,12 @@ void CWriter::WriteCSource() { + /* Write function bodies across the different output streams */ + WriteFuncs(); + ++ /* ++ * Write data segments across the different output streams if there's more ++ * than one output stream ++ */ ++ WriteMultiDataInitializers(); ++ + /* For any empty .c output, write a dummy typedef to avoid gcc warning */ + WriteMultiCTopEmpty(); + } diff --git a/meson.build b/meson.build index fd2a14cd..ad40fc1d 100644 --- a/meson.build +++ b/meson.build @@ -24,18 +24,12 @@ if not is_libretro and host_system == 'darwin' error('This Meson project no longer supports macOS. Please use the Xcode project instead.') endif -if is_libretro and (host_cpu_family == 'ppc' or host_cpu_family == 'ppc64') and not get_option('b_lto') and not get_option('ruby_lto') - # We get a bunch of "relocation truncated to fit" when linking if LTO isn't enabled in libretro builds due to the sizes of the files generated by wasm2c. - error('LTO is required when building for PowerPC architectures. Please pass either `-Db_lto=true` or `-Druby_lto=true` to Meson.') -endif - global_sources = [] global_dependencies = [declare_dependency(sources: vcs_tag(command: ['git', 'rev-parse', '--short=7', 'HEAD'], fallback: 'unknown', input: 'src/git-hash.h.in', output: 'git-hash.h'))] global_include_dirs = [] global_args = [] global_cpp_args = [] global_link_args = [] -libretro_ruby_args = [] link_test_args = [] if is_libretro @@ -85,11 +79,6 @@ if (get_option('libretro_force_fat_lto') or (core_is_static and not is_emscripte global_args += '-ffat-lto-objects' endif -if is_libretro and get_option('ruby_lto') and not get_option('b_lto') - libretro_ruby_args += '-flto' - libretro_ruby_args += '-ffat-lto-objects' -endif - if is_vitasdk global_args += '-mword-relocations' endif @@ -682,7 +671,7 @@ if is_libretro global_dependencies += declare_dependency( link_with: static_library( 'ruby', - c_args: global_args + libretro_ruby_args + [ + c_args: global_args + [ '-frounding-math', '-Wno-unused-function', '-Wno-unused-value', diff --git a/meson_options.txt b/meson_options.txt index da8a9bb9..7ccc88ca 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -24,6 +24,5 @@ option('gfx_backend', type: 'combo', value: 'gl', choices: ['gl', 'gles'], descr option('libretro', type: 'boolean', value: false, description: 'Build a libretro core instead of an executable') option('libretro_stage1_path', type: 'string', value: 'libretro/build/libretro-stage1', description: 'Path to libretro-stage1 for libretro builds') -option('ruby_lto', type: 'boolean', value: false, description: 'Enable link-time optimization for libruby in libretro builds, even if link-time optimization is disabled for everything else') option('emscripten_threaded', type: 'boolean', value: true, description: 'Enable multithreading support in libretro Emscripten builds') option('libretro_force_fat_lto', type: 'boolean', value: false, description: 'Always use -ffat-lto-objects for libretro builds with link-time optimization')