diff --git a/src/ARMeilleure/CodeGen/CompiledFunction.cs b/src/ARMeilleure/CodeGen/CompiledFunction.cs index 7014f715a..a5629b718 100644 --- a/src/ARMeilleure/CodeGen/CompiledFunction.cs +++ b/src/ARMeilleure/CodeGen/CompiledFunction.cs @@ -8,7 +8,7 @@ namespace ARMeilleure.CodeGen /// /// Represents a compiled function. /// - readonly struct CompiledFunction + public readonly struct CompiledFunction { /// /// Gets the machine code of the . @@ -44,10 +44,11 @@ namespace ARMeilleure.CodeGen /// pointing to the mapped function. /// /// Type of delegate + /// The jit cache to map the function into /// A delegate of type pointing to the mapped function - public T Map() + public T Map(JitCache jitCache) { - return MapWithPointer(out _); + return MapWithPointer(jitCache, out _); } /// @@ -55,11 +56,12 @@ namespace ARMeilleure.CodeGen /// pointing to the mapped function. /// /// Type of delegate + /// The jit cache to map the function into /// Pointer to the function code in memory /// A delegate of type pointing to the mapped function - public T MapWithPointer(out nint codePointer) + public T MapWithPointer(JitCache jitCache, out nint codePointer) { - codePointer = JitCache.Map(this); + codePointer = jitCache.Map(this); return Marshal.GetDelegateForFunctionPointer(codePointer); } diff --git a/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs index d103bc395..b78b9edd3 100644 --- a/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs +++ b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs @@ -3,7 +3,7 @@ namespace ARMeilleure.CodeGen.Linking /// /// Represents a relocation. /// - readonly struct RelocEntry + public readonly struct RelocEntry { public const int Stride = 13; // Bytes. diff --git a/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs index fb8b449a7..d0a3e0d08 100644 --- a/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs +++ b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs @@ -5,7 +5,7 @@ namespace ARMeilleure.CodeGen.Linking /// /// Represents relocation information about a . /// - readonly struct RelocInfo + public readonly struct RelocInfo { /// /// Gets an empty . diff --git a/src/ARMeilleure/CodeGen/Linking/Symbol.cs b/src/ARMeilleure/CodeGen/Linking/Symbol.cs index 5559afe09..6da5a2765 100644 --- a/src/ARMeilleure/CodeGen/Linking/Symbol.cs +++ b/src/ARMeilleure/CodeGen/Linking/Symbol.cs @@ -5,7 +5,7 @@ namespace ARMeilleure.CodeGen.Linking /// /// Represents a symbol. /// - readonly struct Symbol + public readonly struct Symbol { private readonly ulong _value; diff --git a/src/ARMeilleure/CodeGen/Linking/SymbolType.cs b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs index 29011a762..3232502ea 100644 --- a/src/ARMeilleure/CodeGen/Linking/SymbolType.cs +++ b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs @@ -3,7 +3,7 @@ namespace ARMeilleure.CodeGen.Linking /// /// Types of . /// - enum SymbolType : byte + public enum SymbolType : byte { /// /// Refers to nothing, i.e no symbol. diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs index 127b84231..6be1f5d6f 100644 --- a/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs @@ -1,6 +1,6 @@ namespace ARMeilleure.CodeGen.Unwinding { - struct UnwindInfo + public struct UnwindInfo { public const int Stride = 4; // Bytes. diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs index 2045019a3..6c4021d45 100644 --- a/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs @@ -1,6 +1,6 @@ namespace ARMeilleure.CodeGen.Unwinding { - enum UnwindPseudoOp + public enum UnwindPseudoOp { PushReg = 0, SetFrame = 1, diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs index 507ace598..5bce9482a 100644 --- a/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs @@ -1,6 +1,6 @@ namespace ARMeilleure.CodeGen.Unwinding { - struct UnwindPushEntry + public struct UnwindPushEntry { public const int Stride = 16; // Bytes. diff --git a/src/ARMeilleure/Memory/ReservedRegion.cs b/src/ARMeilleure/Memory/ReservedRegion.cs index dfe17c933..5b0a95f67 100644 --- a/src/ARMeilleure/Memory/ReservedRegion.cs +++ b/src/ARMeilleure/Memory/ReservedRegion.cs @@ -2,7 +2,7 @@ using System; namespace ARMeilleure.Memory { - public class ReservedRegion + public class ReservedRegion : IDisposable { public const int DefaultGranularity = 65536; // Mapping granularity in Windows. diff --git a/src/ARMeilleure/Signal/TestMethods.cs b/src/ARMeilleure/Signal/TestMethods.cs index 684157860..013705f21 100644 --- a/src/ARMeilleure/Signal/TestMethods.cs +++ b/src/ARMeilleure/Signal/TestMethods.cs @@ -1,5 +1,6 @@ using ARMeilleure.IntermediateRepresentation; using ARMeilleure.Translation; +using ARMeilleure.Translation.Cache; using System.Runtime.InteropServices; using static ARMeilleure.IntermediateRepresentation.Operand.Factory; @@ -17,7 +18,7 @@ namespace ARMeilleure.Signal public delegate int DebugThreadLocalMapGetOrReserve(int threadId, int initialState); public delegate void DebugNativeWriteLoop(nint nativeWriteLoopPtr, nint writePtr); - public static DebugPartialUnmap GenerateDebugPartialUnmap() + public static DebugPartialUnmap GenerateDebugPartialUnmap(JitCache jitCache) { EmitterContext context = new(); @@ -31,10 +32,10 @@ namespace ARMeilleure.Signal OperandType[] argTypes = [OperandType.I64]; - return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(jitCache); } - public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(nint structPtr) + public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(JitCache jitCache, nint structPtr) { EmitterContext context = new(); @@ -48,10 +49,10 @@ namespace ARMeilleure.Signal OperandType[] argTypes = [OperandType.I64]; - return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(jitCache); } - public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop() + public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop(JitCache jitCache) { EmitterContext context = new(); @@ -77,7 +78,7 @@ namespace ARMeilleure.Signal OperandType[] argTypes = [OperandType.I64]; - return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(jitCache); } } } diff --git a/src/ARMeilleure/Translation/Cache/CacheEntry.cs b/src/ARMeilleure/Translation/Cache/CacheEntry.cs index 25b06f781..dc356cb72 100644 --- a/src/ARMeilleure/Translation/Cache/CacheEntry.cs +++ b/src/ARMeilleure/Translation/Cache/CacheEntry.cs @@ -4,7 +4,7 @@ using System.Diagnostics.CodeAnalysis; namespace ARMeilleure.Translation.Cache { - readonly struct CacheEntry : IComparable + public readonly struct CacheEntry : IComparable { public int Offset { get; } public int Size { get; } diff --git a/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs b/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs index 9c5ca29df..162859179 100644 --- a/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs +++ b/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs @@ -22,12 +22,31 @@ namespace ARMeilleure.Translation.Cache return Offset.CompareTo(other.Offset); } } + + private readonly int _regionSize; + private int _regionCount; private readonly List _blocks = []; - public CacheMemoryAllocator(int capacity) + public CacheMemoryAllocator(int regionSize, int initialRegionCount = 1) { - _blocks.Add(new MemoryBlock(0, capacity)); + _regionCount = 0; + _regionSize = regionSize; + + for (; initialRegionCount > 0; initialRegionCount--) + { + _blocks.Add(new MemoryBlock(_regionSize * _regionCount, _regionSize)); + _regionCount++; + } + } + + public void AddNewBlocks(int count) + { + for (; count > 0; count--) + { + _blocks.Add(new MemoryBlock(_regionSize * _regionCount, _regionSize)); + _regionCount++; + } } public int Allocate(int size) @@ -65,13 +84,14 @@ namespace ARMeilleure.Translation.Cache { index = ~index; } + + int endOffs = block.Offset + block.Size; - if (index < _blocks.Count) + // Don't merge blocks from different allocations + if (index < _blocks.Count && endOffs % _regionSize != 0) { MemoryBlock next = _blocks[index]; - int endOffs = block.Offset + block.Size; - if (next.Offset == endOffs) { block = new MemoryBlock(block.Offset, block.Size + next.Size); @@ -79,7 +99,8 @@ namespace ARMeilleure.Translation.Cache } } - if (index > 0) + // Don't merge blocks from different allocations + if (index > 0 && block.Offset % _regionSize != 0) { MemoryBlock prev = _blocks[index - 1]; diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs index 7931fb360..0fae275c7 100644 --- a/src/ARMeilleure/Translation/Cache/JitCache.cs +++ b/src/ARMeilleure/Translation/Cache/JitCache.cs @@ -14,62 +14,35 @@ using System.Threading; namespace ARMeilleure.Translation.Cache { - static partial class JitCache + public partial class JitCache : IDisposable { private static readonly int _pageSize = (int)MemoryBlock.GetPageSize(); private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - private const int CacheSize = 256 * 1024 * 1024; + private const uint CacheSize = 256 * 1024 * 1024; - private static JitCacheInvalidation _jitCacheInvalidator; + private readonly JitCacheInvalidation _jitCacheInvalidator; - private static readonly List _cacheAllocators = []; + private readonly CacheMemoryAllocator _cacheAllocator; - private static readonly List _cacheEntries = []; + private readonly List _cacheEntries = []; - private static readonly Lock _lock = new(); - private static bool _initialized; + private readonly Lock _lock = new(); - private static readonly List _jitRegions = []; - private static int _activeRegionIndex = 0; + private readonly List _jitRegions = []; [SupportedOSPlatform("windows")] [LibraryImport("kernel32.dll", SetLastError = true)] - public static partial nint FlushInstructionCache(nint hProcess, nint lpAddress, nuint dwSize); + private static partial nint FlushInstructionCache(nint hProcess, nint lpAddress, nuint dwSize); - public static void Initialize(IJitMemoryAllocator allocator) + public JitCache(IJitMemoryAllocator allocator) { lock (_lock) { - if (_initialized) - { - if (OperatingSystem.IsWindows()) - { - JitUnwindWindows.RemoveFunctionTableHandler( - _jitRegions[0].Pointer); - } + _jitRegions.Add(new(allocator, CacheSize)); - for (int i = 0; i < _jitRegions.Count; i++) - { - _jitRegions[i].Dispose(); - } - - _jitRegions.Clear(); - _cacheAllocators.Clear(); - } - else - { - _initialized = true; - } - - _activeRegionIndex = 0; - - ReservedRegion firstRegion = new(allocator, CacheSize); - _jitRegions.Add(firstRegion); - - CacheMemoryAllocator firstCacheAllocator = new(CacheSize); - _cacheAllocators.Add(firstCacheAllocator); + _cacheAllocator = new((int)CacheSize); if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS()) { @@ -79,23 +52,20 @@ namespace ARMeilleure.Translation.Cache if (OperatingSystem.IsWindows()) { JitUnwindWindows.InstallFunctionTableHandler( - firstRegion.Pointer, CacheSize, firstRegion.Pointer + Allocate(_pageSize) + this, _jitRegions[0].Pointer, CacheSize, _jitRegions[0].Pointer + Allocate(_pageSize) ); } } } - public static nint Map(CompiledFunction func) + public nint Map(CompiledFunction func) { byte[] code = func.Code; lock (_lock) { - Debug.Assert(_initialized); - int funcOffset = Allocate(code.Length); - ReservedRegion targetRegion = _jitRegions[_activeRegionIndex]; - nint funcPtr = targetRegion.Pointer + funcOffset; + nint funcPtr = GetFunctionPtr(funcOffset); if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -109,9 +79,9 @@ namespace ARMeilleure.Translation.Cache } else { - ReprotectAsWritable(targetRegion, funcOffset, code.Length); + ReprotectAsWritable(funcOffset, code.Length); Marshal.Copy(code, 0, funcPtr, code.Length); - ReprotectAsExecutable(targetRegion, funcOffset, code.Length); + ReprotectAsExecutable(funcOffset, code.Length); if (OperatingSystem.IsWindows() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -129,25 +99,24 @@ namespace ARMeilleure.Translation.Cache } } - public static void Unmap(nint pointer) + public void Unmap(nint pointer) { lock (_lock) { - Debug.Assert(_initialized); - - foreach (ReservedRegion region in _jitRegions) + for (int i = 0; i < _jitRegions.Count; i++) { - if (pointer.ToInt64() < region.Pointer.ToInt64() || - pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64()) + ReservedRegion jitRegion = _jitRegions[i]; + if (pointer.ToInt64() < jitRegion.Pointer.ToInt64() || + pointer.ToInt64() >= (jitRegion.Pointer + (nint)CacheSize).ToInt64()) { continue; } - int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64()); + int funcOffset = (int)(pointer.ToInt64() - jitRegion.Pointer.ToInt64() + i * CacheSize); if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) { - _cacheAllocators[_activeRegionIndex].Free(funcOffset, AlignCodeSize(entry.Size)); + _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); _cacheEntries.RemoveAt(entryIndex); } @@ -156,53 +125,63 @@ namespace ARMeilleure.Translation.Cache } } - private static void ReprotectAsWritable(ReservedRegion region, int offset, int size) + private void ReprotectAsWritable(int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; - int regionEnd = (endOffs + _pageMask) & ~_pageMask; - - region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + int regionStart = (offset % (int)CacheSize) & ~_pageMask; + int regionEnd = ((endOffs % (int)CacheSize) + _pageMask) & ~_pageMask; + + GetRegion(offset).Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size) + private void ReprotectAsExecutable(int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; - int regionEnd = (endOffs + _pageMask) & ~_pageMask; + int regionStart = (offset % (int)CacheSize) & ~_pageMask; + int regionEnd = ((endOffs % (int)CacheSize) + _pageMask) & ~_pageMask; - region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + GetRegion(offset).Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static int Allocate(int codeSize) + private int Allocate(int codeSize) { codeSize = AlignCodeSize(codeSize); - int allocOffset = _cacheAllocators[_activeRegionIndex].Allocate(codeSize); + int allocOffset = _cacheAllocator.Allocate(codeSize); if (allocOffset >= 0) { - _jitRegions[_activeRegionIndex].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + GetRegion(allocOffset).ExpandIfNeeded((ulong)(allocOffset % (int)CacheSize) + (ulong)codeSize); return allocOffset; } - int exhaustedRegion = _activeRegionIndex; + _cacheAllocator.AddNewBlocks(1); ReservedRegion newRegion = new(_jitRegions[0].Allocator, CacheSize); + + Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache of size {(_jitRegions.Count * CacheSize).Bytes()} exhausted, creating new Cache Region ({((_jitRegions.Count + 1) * CacheSize).Bytes()} Total Allocation)."); + _jitRegions.Add(newRegion); - _activeRegionIndex = _jitRegions.Count - 1; - - Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {_activeRegionIndex} ({((long)(_activeRegionIndex + 1) * CacheSize).Bytes()} Total Allocation)."); - - _cacheAllocators.Add(new CacheMemoryAllocator(CacheSize)); - - int allocOffsetNew = _cacheAllocators[_activeRegionIndex].Allocate(codeSize); - if (allocOffsetNew < 0) + + allocOffset = _cacheAllocator.Allocate(codeSize); + if (allocOffset < 0) { throw new OutOfMemoryException("Failed to allocate in new Cache Region!"); } - newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize); - return allocOffsetNew; + GetRegion(allocOffset).ExpandIfNeeded((ulong)(allocOffset % (int)CacheSize) + (ulong)codeSize); + return allocOffset; + } + + private nint GetFunctionPtr(int offset) + { + return GetRegion(offset).Pointer + (offset % (int)CacheSize); + } + + private ReservedRegion GetRegion(int offset) + { + int index = offset / (int)CacheSize; + + return _jitRegions[index]; } private static int AlignCodeSize(int codeSize) @@ -210,7 +189,7 @@ namespace ARMeilleure.Translation.Cache return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); } - private static void Add(int offset, int size, UnwindInfo unwindInfo) + private void Add(int offset, int size, UnwindInfo unwindInfo) { CacheEntry entry = new(offset, size, unwindInfo); @@ -224,25 +203,22 @@ namespace ARMeilleure.Translation.Cache _cacheEntries.Insert(index, entry); } - public static bool TryFind(int offset, out CacheEntry entry, out int entryIndex) + public bool TryFind(int offset, out CacheEntry entry, out int entryIndex) { lock (_lock) { - foreach (ReservedRegion _ in _jitRegions) + int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); + + if (index < 0) { - int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); + index = ~index - 1; + } - if (index < 0) - { - index = ~index - 1; - } - - if (index >= 0) - { - entry = _cacheEntries[index]; - entryIndex = index; - return true; - } + if (index >= 0) + { + entry = _cacheEntries[index]; + entryIndex = index; + return true; } } @@ -250,5 +226,18 @@ namespace ARMeilleure.Translation.Cache entryIndex = 0; return false; } + + public void Dispose() + { + if (OperatingSystem.IsWindows()) + { + JitUnwindWindows.RemoveFunctionTableHandler(_jitRegions[0].Pointer); + } + + foreach (ReservedRegion jitRegion in _jitRegions) + { + jitRegion.Dispose(); + } + } } } diff --git a/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs b/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs index 15a1051fa..64129d811 100644 --- a/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs +++ b/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs @@ -2,6 +2,8 @@ using ARMeilleure.CodeGen.Unwinding; using System; +using System.Collections.Concurrent; +using System.Collections.Generic; using System.Diagnostics; using System.Runtime.InteropServices; @@ -26,6 +28,29 @@ namespace ARMeilleure.Translation.Cache public byte FrameRegister; public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize]; } + + private unsafe struct InternalFunctionHandler + { + public InternalFunctionHandler(JitCache jitCache, nint workBufferPtr) + { + _jitCache = jitCache; + + _runtimeFunction = (RuntimeFunction*)workBufferPtr; + + _unwindInfo = (UnwindInfo*)(workBufferPtr + _sizeOfRuntimeFunction); + } + + readonly JitCache _jitCache; + + readonly RuntimeFunction* _runtimeFunction; + + readonly UnwindInfo* _unwindInfo; + + public RuntimeFunction* FunctionTableHandler(ulong controlPc, nint context) + { + return JitUnwindWindows.FunctionTableHandler(_jitCache, _runtimeFunction, _unwindInfo, controlPc, context); + } + } private enum UnwindOp { @@ -59,27 +84,28 @@ namespace ARMeilleure.Translation.Cache private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback; - private static int _sizeOfRuntimeFunction; + private static readonly int _sizeOfRuntimeFunction; + + private static readonly ConcurrentDictionary _functionTableHandlers = new(); - private unsafe static RuntimeFunction* _runtimeFunction; + static JitUnwindWindows() + { + _sizeOfRuntimeFunction = Marshal.SizeOf(); + } - private unsafe static UnwindInfo* _unwindInfo; - - public static void InstallFunctionTableHandler(nint codeCachePointer, uint codeCacheLength, nint workBufferPtr) + public static void InstallFunctionTableHandler(JitCache jitCache, nint codeCachePointer, uint codeCacheLength, nint workBufferPtr) { ulong codeCachePtr = (ulong)codeCachePointer.ToInt64(); - _sizeOfRuntimeFunction = Marshal.SizeOf(); - bool result; + InternalFunctionHandler handler; + unsafe { - _runtimeFunction = (RuntimeFunction*)workBufferPtr; + handler = new InternalFunctionHandler(jitCache, workBufferPtr); - _unwindInfo = (UnwindInfo*)(workBufferPtr + _sizeOfRuntimeFunction); - - _getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler); + _getRuntimeFunctionCallback = handler.FunctionTableHandler; result = RtlInstallFunctionTableCallback( codeCachePtr | 3, @@ -94,6 +120,8 @@ namespace ARMeilleure.Translation.Cache { throw new InvalidOperationException("Failure installing function table callback."); } + + _functionTableHandlers.TryAdd(codeCachePtr, handler); } public static void RemoveFunctionTableHandler(nint codeCachePointer) @@ -111,24 +139,26 @@ namespace ARMeilleure.Translation.Cache { throw new InvalidOperationException("Failure removing function table callback."); } + + _functionTableHandlers.Remove(codeCachePtr, out _); } - private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, nint context) + private static unsafe RuntimeFunction* FunctionTableHandler(JitCache jitCache, RuntimeFunction* runtimeFunction, UnwindInfo* unwindInfo, ulong controlPc, nint context) { int offset = (int)((long)controlPc - context.ToInt64()); - if (!JitCache.TryFind(offset, out CacheEntry funcEntry, out _)) + if (!jitCache.TryFind(offset, out CacheEntry funcEntry, out _)) { return null; // Not found. } - CodeGen.Unwinding.UnwindInfo unwindInfo = funcEntry.UnwindInfo; + CodeGen.Unwinding.UnwindInfo funcUnwindInfo = funcEntry.UnwindInfo; int codeIndex = 0; - for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--) + for (int index = funcUnwindInfo.PushEntries.Length - 1; index >= 0; index--) { - UnwindPushEntry entry = unwindInfo.PushEntries[index]; + UnwindPushEntry entry = funcUnwindInfo.PushEntries[index]; switch (entry.PseudoOp) { @@ -140,14 +170,14 @@ namespace ARMeilleure.Translation.Cache if (stackOffset <= 0xFFFF0) { - _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128, entry.PrologOffset, entry.RegIndex); - _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset / 16); + unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128, entry.PrologOffset, entry.RegIndex); + unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset / 16); } else { - _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128Far, entry.PrologOffset, entry.RegIndex); - _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 0); - _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 16); + unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128Far, entry.PrologOffset, entry.RegIndex); + unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 0); + unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 16); } break; @@ -161,18 +191,18 @@ namespace ARMeilleure.Translation.Cache if (allocSize <= 128) { - _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocSmall, entry.PrologOffset, (allocSize / 8) - 1); + unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocSmall, entry.PrologOffset, (allocSize / 8) - 1); } else if (allocSize <= 0x7FFF8) { - _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 0); - _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize / 8); + unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 0); + unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize / 8); } else { - _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 1); - _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 0); - _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 16); + unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 1); + unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 0); + unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 16); } break; @@ -180,7 +210,7 @@ namespace ARMeilleure.Translation.Cache case UnwindPseudoOp.PushReg: { - _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.PushNonvol, entry.PrologOffset, entry.RegIndex); + unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.PushNonvol, entry.PrologOffset, entry.RegIndex); break; } @@ -192,16 +222,16 @@ namespace ARMeilleure.Translation.Cache Debug.Assert(codeIndex <= MaxUnwindCodesArraySize); - _unwindInfo->VersionAndFlags = 1; // Flags: The function has no handler. - _unwindInfo->SizeOfProlog = (byte)unwindInfo.PrologSize; - _unwindInfo->CountOfUnwindCodes = (byte)codeIndex; - _unwindInfo->FrameRegister = 0; + unwindInfo->VersionAndFlags = 1; // Flags: The function has no handler. + unwindInfo->SizeOfProlog = (byte)funcUnwindInfo.PrologSize; + unwindInfo->CountOfUnwindCodes = (byte)codeIndex; + unwindInfo->FrameRegister = 0; - _runtimeFunction->BeginAddress = (uint)funcEntry.Offset; - _runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size); - _runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction; + runtimeFunction->BeginAddress = (uint)funcEntry.Offset; + runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size); + runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction; - return _runtimeFunction; + return runtimeFunction; } private static ushort PackUnwindOp(UnwindOp op, int prologOffset, int opInfo) diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index 5534cde56..9ce3e5773 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -4,6 +4,7 @@ using ARMeilleure.CodeGen.Unwinding; using ARMeilleure.Common; using ARMeilleure.Memory; using ARMeilleure.State; +using ARMeilleure.Translation.Cache; using Humanizer; using Ryujinx.Common; using Ryujinx.Common.Configuration; @@ -33,7 +34,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 7010; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 7016; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -50,6 +51,8 @@ namespace ARMeilleure.Translation.PTC private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; public PtcProfiler Profiler { get; } + + private readonly JitCache _jitCache; // Carriers. private MemoryStream _infosStream; @@ -81,7 +84,7 @@ namespace ARMeilleure.Translation.PTC private volatile int _translateTotalCount; public event Action PtcStateChanged; - public Ptc() + public Ptc(JitCache jitCache) { Profiler = new PtcProfiler(this); @@ -92,6 +95,8 @@ namespace ARMeilleure.Translation.PTC _waitEvent = new ManualResetEvent(true); + _jitCache = jitCache; + _disposed = false; TitleIdText = TitleIdTextDefault; @@ -782,7 +787,7 @@ namespace ARMeilleure.Translation.PTC return new UnwindInfo(pushEntries, prologueSize); } - private static TranslatedFunction FastTranslate( + private TranslatedFunction FastTranslate( byte[] code, Counter callCounter, ulong guestSize, @@ -790,7 +795,7 @@ namespace ARMeilleure.Translation.PTC bool highCq) { CompiledFunction cFunc = new(code, unwindInfo, RelocInfo.Empty); - GuestFunction gFunc = cFunc.MapWithPointer(out nint gFuncPointer); + GuestFunction gFunc = cFunc.MapWithPointer(_jitCache, out nint gFuncPointer); return new TranslatedFunction(gFunc, gFuncPointer, callCounter, guestSize, highCq); } diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs index b098ff4cf..e73ecc85e 100644 --- a/src/ARMeilleure/Translation/Translator.cs +++ b/src/ARMeilleure/Translation/Translator.cs @@ -24,6 +24,7 @@ namespace ARMeilleure.Translation private readonly IJitMemoryAllocator _allocator; private readonly ConcurrentQueue> _oldFuncs; + public readonly JitCache JitCache; private readonly Ptc _ptc; internal TranslatorCache Functions { get; } @@ -43,16 +44,17 @@ namespace ARMeilleure.Translation _oldFuncs = new ConcurrentQueue>(); - _ptc = new Ptc(); + JitCache = new JitCache(allocator); + + _ptc = new Ptc(JitCache); Queue = new TranslatorQueue(); - JitCache.Initialize(allocator); CountTable = new EntryTable(); Functions = new TranslatorCache(); FunctionTable = functionTable; - Stubs = new TranslatorStubs(FunctionTable); + Stubs = new TranslatorStubs(JitCache, FunctionTable); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; } @@ -167,6 +169,7 @@ namespace ARMeilleure.Translation } ClearJitCache(); + JitCache.Dispose(); Stubs.Dispose(); FunctionTable.Dispose(); @@ -305,7 +308,7 @@ namespace ARMeilleure.Translation _ptc.WriteCompiledFunction(address, funcSize, hash, highCq, compiledFunc); } - GuestFunction func = compiledFunc.MapWithPointer(out nint funcPointer); + GuestFunction func = compiledFunc.MapWithPointer(JitCache, out nint funcPointer); Allocators.ResetAll(); diff --git a/src/ARMeilleure/Translation/TranslatorStubs.cs b/src/ARMeilleure/Translation/TranslatorStubs.cs index 2d95ceb99..92fb2d2b7 100644 --- a/src/ARMeilleure/Translation/TranslatorStubs.cs +++ b/src/ARMeilleure/Translation/TranslatorStubs.cs @@ -14,6 +14,8 @@ namespace ARMeilleure.Translation /// class TranslatorStubs : IDisposable { + private readonly JitCache _jitCache; + private readonly Lazy _slowDispatchStub; private bool _disposed; @@ -83,11 +85,14 @@ namespace ARMeilleure.Translation /// Initializes a new instance of the class with the specified /// instance. /// + /// Jit cache to map functions in /// Function table used to store pointers to the functions that the guest code will call /// is null - public TranslatorStubs(IAddressTable functionTable) + public TranslatorStubs(JitCache jitCache, IAddressTable functionTable) { ArgumentNullException.ThrowIfNull(functionTable); + + _jitCache = jitCache; _functionTable = functionTable; _slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true); @@ -115,12 +120,12 @@ namespace ARMeilleure.Translation { if (_dispatchStub.IsValueCreated) { - JitCache.Unmap(_dispatchStub.Value); + _jitCache.Unmap(_dispatchStub.Value); } if (_dispatchLoop.IsValueCreated) { - JitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value)); + _jitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value)); } _disposed = true; @@ -188,7 +193,7 @@ namespace ARMeilleure.Translation OperandType retType = OperandType.I64; OperandType[] argTypes = [OperandType.I64]; - GuestFunction func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + GuestFunction func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(_jitCache); return Marshal.GetFunctionPointerForDelegate(func); } @@ -213,7 +218,7 @@ namespace ARMeilleure.Translation OperandType retType = OperandType.I64; OperandType[] argTypes = [OperandType.I64]; - GuestFunction func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + GuestFunction func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(_jitCache); return Marshal.GetFunctionPointerForDelegate(func); } @@ -290,7 +295,7 @@ namespace ARMeilleure.Translation OperandType retType = OperandType.None; OperandType[] argTypes = [OperandType.I64, OperandType.I64]; - return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(_jitCache); } /// @@ -314,7 +319,7 @@ namespace ARMeilleure.Translation OperandType retType = OperandType.I64; OperandType[] argTypes = [OperandType.I64, OperandType.I64]; - return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(_jitCache); } } } diff --git a/src/ARMeilleure/Translation/TranslatorTestMethods.cs b/src/ARMeilleure/Translation/TranslatorTestMethods.cs index 5f15cf550..a4ad62dd2 100644 --- a/src/ARMeilleure/Translation/TranslatorTestMethods.cs +++ b/src/ARMeilleure/Translation/TranslatorTestMethods.cs @@ -1,6 +1,7 @@ using ARMeilleure.CodeGen.X86; using ARMeilleure.IntermediateRepresentation; using ARMeilleure.State; +using ARMeilleure.Translation.Cache; using System; using System.Runtime.InteropServices; using static ARMeilleure.IntermediateRepresentation.Operand.Factory; @@ -59,7 +60,7 @@ namespace ARMeilleure.Translation return context.VectorExtract(OperandType.I32, vec, 0); } - public static FpFlagsPInvokeTest GenerateFpFlagsPInvokeTest() + public static FpFlagsPInvokeTest GenerateFpFlagsPInvokeTest(JitCache jitCache) { EmitterContext context = new(); @@ -141,7 +142,7 @@ namespace ARMeilleure.Translation OperandType[] argTypes = [OperandType.I64]; - return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(); + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map(jitCache); } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs b/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs index 05c889922..519fa54a0 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/CacheMemoryAllocator.cs @@ -23,12 +23,31 @@ namespace Ryujinx.Cpu.LightningJit.Cache return Offset.CompareTo(other.Offset); } } + + private readonly int _regionSize; + private int _regionCount; private readonly List _blocks = []; - public CacheMemoryAllocator(int capacity) + public CacheMemoryAllocator(int regionSize, int initialRegionCount = 1) { - _blocks.Add(new MemoryBlock(0, capacity)); + _regionCount = 0; + _regionSize = regionSize; + + for (; initialRegionCount > 0; initialRegionCount--) + { + _blocks.Add(new MemoryBlock(_regionSize * _regionCount, _regionSize)); + _regionCount++; + } + } + + public void AddNewBlocks(int count) + { + for (; count > 0; count--) + { + _blocks.Add(new MemoryBlock(_regionSize * _regionCount, _regionSize)); + _regionCount++; + } } public int Allocate(int size) @@ -100,13 +119,14 @@ namespace Ryujinx.Cpu.LightningJit.Cache { index = ~index; } + + int endOffs = block.Offset + block.Size; - if (index < _blocks.Count) + // Don't merge blocks from different allocations + if (index < _blocks.Count && endOffs % _regionSize != 0) { MemoryBlock next = _blocks[index]; - int endOffs = block.Offset + block.Size; - if (next.Offset == endOffs) { block = new MemoryBlock(block.Offset, block.Size + next.Size); @@ -114,7 +134,8 @@ namespace Ryujinx.Cpu.LightningJit.Cache } } - if (index > 0) + // Don't merge blocks from different allocations + if (index > 0 && block.Offset % _regionSize != 0) { MemoryBlock prev = _blocks[index - 1]; diff --git a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs index a96e3554a..ee0710de2 100644 --- a/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs +++ b/src/Ryujinx.Cpu/LightningJit/Cache/JitCache.cs @@ -11,76 +11,45 @@ using System.Threading; namespace Ryujinx.Cpu.LightningJit.Cache { - static partial class JitCache + partial class JitCache : IDisposable { private static readonly int _pageSize = (int)MemoryBlock.GetPageSize(); private static readonly int _pageMask = _pageSize - 1; private const int CodeAlignment = 4; // Bytes. - // TODO: JIT Cache size should be application dependent, not global. - private const int CacheSize = 1024 * (1024 * 1024); // Megabytes * Size of Megabytes (since its in bytes). + private const uint CacheSize = 256 * 1024 * 1024; // Megabytes * Size of Megabytes (since its in bytes). - private static JitCacheInvalidation _jitCacheInvalidator; + private readonly JitCacheInvalidation _jitCacheInvalidator; - private static CacheMemoryAllocator _cacheAllocator; + private readonly CacheMemoryAllocator _cacheAllocator; - private static readonly List _cacheEntries = []; + private readonly List _cacheEntries = []; - private static readonly Lock _lock = new(); - private static bool _initialized; - private static readonly List _jitRegions = []; - private static int _activeRegionIndex = 0; - - [SupportedOSPlatform("windows")] - [LibraryImport("kernel32.dll", SetLastError = true)] - public static partial nint FlushInstructionCache(nint hProcess, nint lpAddress, nuint dwSize); + private readonly Lock _lock = new(); - [SupportedOSPlatform("macos")] - [LibraryImport("libSystem.dylib", EntryPoint = "sys_icache_invalidate")] - internal static partial void SysICacheInvalidate(nint start, nuint len); + private readonly List _jitRegions = []; - [SupportedOSPlatform("linux")] - [LibraryImport("libgcc_s.so.1", EntryPoint = "__clear_cache")] - internal static partial void ClearCache(nint begin, nint end); - - public static void Initialize(IJitMemoryAllocator allocator) + public JitCache(IJitMemoryAllocator allocator) { - if (_initialized) - { - return; - } - lock (_lock) { - if (_initialized) - { - return; - } - - ReservedRegion firstRegion = new(allocator, CacheSize); - _jitRegions.Add(firstRegion); - _activeRegionIndex = 0; - + _jitRegions.Add(new(allocator, CacheSize)); + + _cacheAllocator = new CacheMemoryAllocator((int)CacheSize); + if (!OperatingSystem.IsWindows() && !OperatingSystem.IsMacOS()) { _jitCacheInvalidator = new JitCacheInvalidation(allocator); } - - _cacheAllocator = new CacheMemoryAllocator(CacheSize); - - _initialized = true; } } - public unsafe static nint Map(ReadOnlySpan code) + public nint Map(ReadOnlySpan code) { lock (_lock) { - Debug.Assert(_initialized); - int funcOffset = Allocate(code.Length); - ReservedRegion targetRegion = _jitRegions[_activeRegionIndex]; - nint funcPtr = targetRegion.Pointer + funcOffset; + nint funcPtr = GetFunctionPtr(funcOffset); if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) { @@ -94,9 +63,9 @@ namespace Ryujinx.Cpu.LightningJit.Cache } else { - ReprotectAsWritable(targetRegion, funcOffset, code.Length); + ReprotectAsWritable(funcOffset, code.Length); Marshal.Copy(code.ToArray(), 0, funcPtr, code.Length); - ReprotectAsExecutable(targetRegion, funcOffset, code.Length); + ReprotectAsExecutable(funcOffset, code.Length); _jitCacheInvalidator?.Invalidate(funcPtr, (ulong)code.Length); } @@ -107,21 +76,20 @@ namespace Ryujinx.Cpu.LightningJit.Cache } } - public static void Unmap(nint pointer) + public void Unmap(nint pointer) { lock (_lock) { - Debug.Assert(_initialized); - - foreach (ReservedRegion region in _jitRegions) + for (int i = 0; i < _jitRegions.Count; i++) { + ReservedRegion region = _jitRegions[i]; if (pointer.ToInt64() < region.Pointer.ToInt64() || - pointer.ToInt64() >= (region.Pointer + CacheSize).ToInt64()) + pointer.ToInt64() >= (region.Pointer + (nint)CacheSize).ToInt64()) { continue; } - int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64()); + int funcOffset = (int)(pointer.ToInt64() - region.Pointer.ToInt64() + i * CacheSize); if (TryFind(funcOffset, out CacheEntry entry, out int entryIndex) && entry.Offset == funcOffset) { @@ -134,59 +102,63 @@ namespace Ryujinx.Cpu.LightningJit.Cache } } - private static void ReprotectAsWritable(ReservedRegion region, int offset, int size) + private void ReprotectAsWritable(int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; - int regionEnd = (endOffs + _pageMask) & ~_pageMask; + int regionStart = (offset % (int)CacheSize) & ~_pageMask; + int regionEnd = ((endOffs % (int)CacheSize) + _pageMask) & ~_pageMask; - region.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + GetRegion(offset).Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static void ReprotectAsExecutable(ReservedRegion region, int offset, int size) + private void ReprotectAsExecutable(int offset, int size) { int endOffs = offset + size; - int regionStart = offset & ~_pageMask; - int regionEnd = (endOffs + _pageMask) & ~_pageMask; + int regionStart = (offset % (int)CacheSize) & ~_pageMask; + int regionEnd = ((endOffs % (int)CacheSize) + _pageMask) & ~_pageMask; - region.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + GetRegion(offset).Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); } - private static int Allocate(int codeSize) + private int Allocate(int codeSize) { codeSize = AlignCodeSize(codeSize); + + int allocOffset = _cacheAllocator.Allocate(codeSize); - for (int i = _activeRegionIndex; i < _jitRegions.Count; i++) + if (allocOffset >= 0) { - int allocOffset = _cacheAllocator.Allocate(codeSize); - - if (allocOffset >= 0) - { - _jitRegions[i].ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); - _activeRegionIndex = i; - return allocOffset; - } + GetRegion(allocOffset).ExpandIfNeeded((ulong)(allocOffset % (int)CacheSize) + (ulong)codeSize); + return allocOffset; } - - int exhaustedRegion = _activeRegionIndex; + + _cacheAllocator.AddNewBlocks(1); ReservedRegion newRegion = new(_jitRegions[0].Allocator, CacheSize); + + Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache of size {(_jitRegions.Count * CacheSize).Bytes()} exhausted, creating new Cache Region ({((_jitRegions.Count + 1) * CacheSize).Bytes()} Total Allocation)."); + _jitRegions.Add(newRegion); - _activeRegionIndex = _jitRegions.Count - 1; - int newRegionNumber = _activeRegionIndex; - - Logger.Warning?.Print(LogClass.Cpu, $"JIT Cache Region {exhaustedRegion} exhausted, creating new Cache Region {newRegionNumber} ({((long)(newRegionNumber + 1) * CacheSize).Bytes()} Total Allocation)."); - - _cacheAllocator = new CacheMemoryAllocator(CacheSize); - - int allocOffsetNew = _cacheAllocator.Allocate(codeSize); - if (allocOffsetNew < 0) + allocOffset = _cacheAllocator.Allocate(codeSize); + if (allocOffset < 0) { throw new OutOfMemoryException("Failed to allocate in new Cache Region!"); } - newRegion.ExpandIfNeeded((ulong)allocOffsetNew + (ulong)codeSize); - return allocOffsetNew; + GetRegion(allocOffset).ExpandIfNeeded((ulong)(allocOffset % (int)CacheSize) + (ulong)codeSize); + return allocOffset; + } + + private nint GetFunctionPtr(int offset) + { + return GetRegion(offset).Pointer + (offset % (int)CacheSize); + } + + private ReservedRegion GetRegion(int offset) + { + int index = offset / (int)CacheSize; + + return _jitRegions[index]; } private static int AlignCodeSize(int codeSize) @@ -194,7 +166,7 @@ namespace Ryujinx.Cpu.LightningJit.Cache return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); } - private static void Add(int offset, int size) + private void Add(int offset, int size) { CacheEntry entry = new(offset, size); @@ -208,7 +180,7 @@ namespace Ryujinx.Cpu.LightningJit.Cache _cacheEntries.Insert(index, entry); } - public static bool TryFind(int offset, out CacheEntry entry, out int entryIndex) + public bool TryFind(int offset, out CacheEntry entry, out int entryIndex) { lock (_lock) { @@ -231,5 +203,13 @@ namespace Ryujinx.Cpu.LightningJit.Cache entryIndex = 0; return false; } + + public void Dispose() + { + foreach (ReservedRegion jitRegion in _jitRegions) + { + jitRegion.Dispose(); + } + } } } diff --git a/src/Ryujinx.Cpu/LightningJit/Translator.cs b/src/Ryujinx.Cpu/LightningJit/Translator.cs index 1ee6993d2..db77cf058 100644 --- a/src/Ryujinx.Cpu/LightningJit/Translator.cs +++ b/src/Ryujinx.Cpu/LightningJit/Translator.cs @@ -19,6 +19,7 @@ namespace Ryujinx.Cpu.LightningJit private static bool IsNoWxPlatform => false; private readonly ConcurrentQueue> _oldFuncs; + private readonly JitCache _jitCache; private readonly NoWxCache _noWxCache; private bool _disposed; @@ -39,12 +40,12 @@ namespace Ryujinx.Cpu.LightningJit } else { - JitCache.Initialize(new JitMemoryAllocator(forJit: true)); + _jitCache = new(new JitMemoryAllocator(forJit: true)); } Functions = new TranslatorCache(); FunctionTable = functionTable; - Stubs = new TranslatorStubs(FunctionTable, _noWxCache); + Stubs = new TranslatorStubs(_jitCache, FunctionTable, _noWxCache); FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; @@ -100,7 +101,7 @@ namespace Ryujinx.Cpu.LightningJit if (oldFunc != func) { - JitCache.Unmap(func.FuncPointer); + _jitCache.Unmap(func.FuncPointer); func = oldFunc; } @@ -121,7 +122,7 @@ namespace Ryujinx.Cpu.LightningJit private TranslatedFunction Translate(ulong address, ExecutionMode mode) { CompiledFunction func = Compile(address, mode); - nint funcPointer = JitCache.Map(func.Code); + nint funcPointer = _jitCache.Map(func.Code); return new TranslatedFunction(funcPointer, (ulong)func.GuestCodeLength); } @@ -164,14 +165,14 @@ namespace Ryujinx.Cpu.LightningJit foreach (TranslatedFunction func in functions) { - JitCache.Unmap(func.FuncPointer); + _jitCache.Unmap(func.FuncPointer); } Functions.Clear(); while (_oldFuncs.TryDequeue(out KeyValuePair kv)) { - JitCache.Unmap(kv.Value.FuncPointer); + _jitCache.Unmap(kv.Value.FuncPointer); } } @@ -188,6 +189,7 @@ namespace Ryujinx.Cpu.LightningJit else { ClearJitCache(); + _jitCache.Dispose(); } Stubs.Dispose(); diff --git a/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs b/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs index d0eec7eff..fdd6929a7 100644 --- a/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs +++ b/src/Ryujinx.Cpu/LightningJit/TranslatorStubs.cs @@ -19,6 +19,8 @@ namespace Ryujinx.Cpu.LightningJit { private delegate ulong GetFunctionAddressDelegate(nint framePointer, ulong address); + private readonly JitCache _jitCache; + private readonly Lazy _slowDispatchStub; private bool _disposed; @@ -76,13 +78,16 @@ namespace Ryujinx.Cpu.LightningJit /// Initializes a new instance of the class with the specified /// instance. /// + /// Jit cache to map functions in /// Function table used to store pointers to the functions that the guest code will call /// Cache used on platforms that enforce W^X, otherwise should be null /// is null - public TranslatorStubs(IAddressTable functionTable, NoWxCache noWxCache) + public TranslatorStubs(JitCache jitCache, IAddressTable functionTable, NoWxCache noWxCache) { ArgumentNullException.ThrowIfNull(functionTable); + _jitCache = jitCache; + _functionTable = functionTable; _noWxCache = noWxCache; _getFunctionAddressRef = NativeInterface.GetFunctionAddress; @@ -113,12 +118,12 @@ namespace Ryujinx.Cpu.LightningJit { if (_dispatchStub.IsValueCreated) { - JitCache.Unmap(_dispatchStub.Value); + _jitCache.Unmap(_dispatchStub.Value); } if (_dispatchLoop.IsValueCreated) { - JitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value)); + _jitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value)); } } @@ -363,7 +368,7 @@ namespace Ryujinx.Cpu.LightningJit } else { - return JitCache.Map(code); + return _jitCache.Map(code); } } diff --git a/src/Ryujinx.Tests/Cpu/CpuTest.cs b/src/Ryujinx.Tests/Cpu/CpuTest.cs index c01a9e4e8..aab85a76f 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTest.cs @@ -10,6 +10,8 @@ using MemoryPermission = Ryujinx.Tests.Unicorn.MemoryPermission; namespace Ryujinx.Tests.Cpu { [TestFixture] + [Parallelizable(ParallelScope.All)] + [FixtureLifeCycle(LifeCycle.InstancePerTestCase)] public class CpuTest { protected static readonly ulong Size = MemoryBlock.GetPageSize(); diff --git a/src/Ryujinx.Tests/Cpu/CpuTest32.cs b/src/Ryujinx.Tests/Cpu/CpuTest32.cs index e742c1713..ea78b7844 100644 --- a/src/Ryujinx.Tests/Cpu/CpuTest32.cs +++ b/src/Ryujinx.Tests/Cpu/CpuTest32.cs @@ -10,6 +10,8 @@ using MemoryPermission = Ryujinx.Tests.Unicorn.MemoryPermission; namespace Ryujinx.Tests.Cpu { [TestFixture] + [Parallelizable(ParallelScope.All)] + [FixtureLifeCycle(LifeCycle.InstancePerTestCase)] public class CpuTest32 { protected static readonly uint Size = (uint)MemoryBlock.GetPageSize(); diff --git a/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs b/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs index d67f22160..a6014ed56 100644 --- a/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs +++ b/src/Ryujinx.Tests/Cpu/EnvironmentTests.cs @@ -51,7 +51,7 @@ namespace Ryujinx.Tests.Cpu bool methodCalled = false; bool isFz = false; - TranslatorTestMethods.FpFlagsPInvokeTest method = TranslatorTestMethods.GenerateFpFlagsPInvokeTest(); + TranslatorTestMethods.FpFlagsPInvokeTest method = TranslatorTestMethods.GenerateFpFlagsPInvokeTest(_translator.JitCache); // This method sets flush-to-zero and then calls the managed method. // Before and after setting the flags, it ensures subnormal addition works as expected. diff --git a/src/Ryujinx.Tests/Memory/PartialUnmaps.cs b/src/Ryujinx.Tests/Memory/PartialUnmaps.cs index c2eeded4d..f80a7519f 100644 --- a/src/Ryujinx.Tests/Memory/PartialUnmaps.cs +++ b/src/Ryujinx.Tests/Memory/PartialUnmaps.cs @@ -245,7 +245,7 @@ namespace Ryujinx.Tests.Memory // Create a large mapping. mainMemory.MapView(backing, 0, 0, vaSize); - TestMethods.DebugNativeWriteLoop writeFunc = TestMethods.GenerateDebugNativeWriteLoop(); + TestMethods.DebugNativeWriteLoop writeFunc = TestMethods.GenerateDebugNativeWriteLoop(_translator.JitCache); nint writePtr = mainMemory.GetPointer(vaSize - 0x1000, 4); Thread testThread = new(() => @@ -339,7 +339,7 @@ namespace Ryujinx.Tests.Memory fixed (void* localMap = &state.LocalCounts) { - TestMethods.DebugThreadLocalMapGetOrReserve getOrReserve = TestMethods.GenerateDebugThreadLocalMapGetOrReserve((nint)localMap); + TestMethods.DebugThreadLocalMapGetOrReserve getOrReserve = TestMethods.GenerateDebugThreadLocalMapGetOrReserve(_translator.JitCache, (nint)localMap); for (int i = 0; i < ThreadLocalMap.MapSize; i++) {