Skip to content

Optimizations for a specific function is causing runtime misbehavior #122254

@vinnyrom

Description

@vinnyrom

Description

Optimizations for int DecodeFromString<Dialect>(Span<byte>, ReadOnlySpan<char>) is causing test failures.

Reproduction Steps

Add a C# Class Library project with the following:

using System.Runtime.CompilerServices;

namespace Repro.Library;

public static class AdvancedBase85
{
  public interface IDialect
  {
    static abstract char ZeroSentinel { get; }
    static abstract char FillSentinel { get; }
    static abstract char[] CharacterMap { get; }
    static abstract byte[] ByteCodes { get; }
  }

  public sealed class SafeForXmlDialect : IDialect
  {
    private SafeForXmlDialect() { }

    static char IDialect.ZeroSentinel => '!';
    static char IDialect.FillSentinel => '~';
    static char[] IDialect.CharacterMap => "#$%()*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}".ToCharArray();
    static byte[] IDialect.ByteCodes =>
    [
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    //        !     "     #     $     %     &     '     (     )     *     +     ,     -     .     /
      0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0xFF, 0xFF, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF,
    //  0     1     2     3     4     5     6     7     8     9     :     ;     <     =     >     ?
      0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0xFF, 0x16, 0xFF, 0x17,
    //  @     A     B     C     D     E     F     G     H     I     J     K     L     M     N     O
      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
    //  P     Q     R     S     T     U     V     W     X     Y     Z     [     \     ]     ^     _
      0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0x34, 0x35, 0x36,
    //  `     a     b     c     d     e     f     g     h     i     j     k     l     m     n     o
      0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
    //  p     q     r     s     t     u     v     w     x     y     z     {     |     }     ~
      0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0xFF, 0xFF,
    ];
  }

  public sealed class SafeForMailDialect : IDialect
  {
    private SafeForMailDialect() { }

    static char IDialect.ZeroSentinel => '!';
    static char IDialect.FillSentinel => '%';
    static char[] IDialect.CharacterMap => "#$&'()*+,./0123456789;<=>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{|}~".ToCharArray();
    static byte[] IDialect.ByteCodes =>
    [
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
    //        !     "     #     $     %     &     '     (     )     *     +     ,     -     .     /
      0xFF, 0xFF, 0xFF, 0x00, 0x01, 0xFF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xFF, 0x09, 0x0A,
    //  0     1     2     3     4     5     6     7     8     9     :     ;     <     =     >     ?
      0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0xFF, 0x15, 0x16, 0x17, 0x18, 0xFF,
    //  @     A     B     C     D     E     F     G     H     I     J     K     L     M     N     O
      0xFF, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
    // P     Q     R     S     T     U     V     W     X     Y     Z     [     \     ]     ^     _
      0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0x34, 0x35, 0x36,
    //  `     a     b     c     d     e     f     g     h     i     j     k     l     m     n     o
      0xFF, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
    //  p     q     r     s     t     u     v     w     x     y     z     {     |     }     ~
      0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0xFF,
    ];
  }

  public static MemoryStream? DecodeFromString(string? text) => DecodeFromString<SafeForXmlDialect>(text);
  public static MemoryStream? DecodeFromString<Dialect>(string? text)
    where Dialect : IDialect
  {
    if (text is null)
    {
      return null;
    }

    var textLength = text.Length;
    if (textLength == 0)
    {
      return new MemoryStream(0);
    }

    var guard = Guid.NewGuid().ToByteArray();
    var guardLength = guard.Length;
    var guardOffset = (textLength << 2) + 4;
    var data = new byte[guardOffset + guardLength];

    var guardSpan = new Span<byte>(data, guardOffset, guardLength);
    guard.CopyTo(guardSpan);

    var dataLength = DecodeFromString<Dialect>(data, text);
    if (dataLength > guardOffset)
    {
      throw new InvalidOperationException("decoder reported length overflowed end of buffer");
    }

    if (!guardSpan.SequenceEqual(guard))
    {
      throw new InvalidOperationException("end of buffer guard was overwritten");
    }

    return new MemoryStream(data, 0, dataLength, false, true);
  }

  // The issue does not reproduce when optimizations are disabled:
  // [MethodImpl(MethodImplOptions.NoOptimization)]
  private unsafe static int DecodeFromString<Dialect>(Span<byte> data, ReadOnlySpan<char> text)
    where Dialect : IDialect
  {
    fixed (byte* byteCodes = Dialect.ByteCodes)
    {
      var zeroSentinel = Dialect.ZeroSentinel;
      var fillSentinel = Dialect.FillSentinel;

      fixed (byte* pData = data)
      {
        var pCurrentValue = (uint*)pData;

        fixed (char* pText = text)
        {
          int block = 0;
          uint value = 0;

          for (char* pCurrentChar = pText, pEndChar = pText + text.Length; pCurrentChar < pEndChar; ++pCurrentChar)
          {
            var currentChar = *pCurrentChar;
            if (currentChar == zeroSentinel)
            {
              if (block > 3)
              {
                throw new InvalidOperationException("zero sentinel used in invalid block level; only supported in blocks 0-3");
              }

              if (block != 0)
              {
                *pCurrentValue++ = value;
                block = 0;
              }
              else
              {
                *pCurrentValue++ = 0;
              }
            }
            else if (currentChar == fillSentinel)
            {
              if (block != 0)
              {
                throw new InvalidOperationException("fill sentinel used in invalid block level; only support at block zero");
              }
              *pCurrentValue++ = uint.MaxValue;
            }
            else if (currentChar > 0x20 && currentChar < 0x7F)
            {
              var byteCode = byteCodes[currentChar];
              if (byteCode >= 0x55)
              {
                throw new InvalidOperationException("encoundered invalid decoded byte value");
              }

              switch (block)
              {
                case 0:
                {
                  value = byteCode;
                  block = 1;
                  break;
                }
                case 1:
                {
                  value += byteCode * 85u;
                  block = 2;
                  break;
                }
                case 2:
                {
                  value += byteCode * 7225u;
                  block = 3;
                  break;
                }
                case 3:
                {
                  value += byteCode * 614125u;
                  block = 4;
                  break;
                }
                case 4:
                {
                  value += byteCode * 52200625u;
                  *pCurrentValue++ = value;
                  block = 0;
                  break;
                }
                default:
                {
                  throw new InvalidOperationException("encountered invalid block level; block level must be 0-4");
                }
              }
            }
            else
            {
              if (currentChar != ' ' && currentChar != '\r' && currentChar != '\n' && currentChar != '\t')
              {
                throw new InvalidOperationException("encountered illegal character");
              }
            }
          }

          var bytesWritten = (int)((byte*)pCurrentValue - pData);
          switch (block)
          {
            case 2:
              if (value > 0xFF)
              {
                throw new InvalidOperationException("invalid 2-block leftover value encountered");
              }
              *(byte*)pCurrentValue = (byte)value;
              ++bytesWritten;
              break;
            case 3:
              if (value > 0xFFFF)
              {
                throw new InvalidOperationException("invalid 3-block leftover value encountered");
              }
              *(short*)pCurrentValue = (short)value;
              bytesWritten += 2;
              break;
            case 4:
              if (value > 0xFFFFFF)
              {
                throw new InvalidOperationException("invalid 4-block leftover value encountered");
              }
              *(byte*)pCurrentValue = (byte)value;
              *((byte*)pCurrentValue + 1) = (byte)(value >> 8);
              *((byte*)pCurrentValue + 2) = (byte)(value >> 16);
              bytesWritten += 3;
              break;
            default:
              if (block != 0)
              {
                throw new InvalidOperationException("invalid leftover block-level; leftovers can only be 2-4");
              }
              break;
          }

          return bytesWritten;
        }
      }
    }
  }

  public static string? EncodeToString(byte[] data) => EncodeToString<SafeForXmlDialect>(data);
  public static string? EncodeToString<Dialect>(byte[] data)
    where Dialect : IDialect
  {
    if (data is null)
    {
      return null;
    }

    int dataLength = data.Length;
    if (dataLength == 0)
    {
      return string.Empty;
    }

    var guard = Guid.NewGuid().ToByteArray().Select(b => (char)b).ToArray();
    var guardLength = guard.Length;
    var guardOffset = ((dataLength * 5) >> 2) + 4;
    var text = new char[guardOffset + guardLength];

    var guardSpan = new Span<char>(text, guardOffset, guardLength);
    guard.CopyTo(guardSpan);

    var textLength = EncodeToString<Dialect>(text, data);
    if (textLength > guardOffset)
    {
      throw new InvalidOperationException("encoder reported length overflowed end of buffer");
    }

    if (!guardSpan.SequenceEqual(guard))
    {
      throw new InvalidOperationException("end of buffer guard was overwritten");
    }

    return new string(text, 0, textLength);
  }

  private unsafe static int EncodeToString<Dialect>(Span<char> text, ReadOnlySpan<byte> data)
    where Dialect : IDialect
  {
    fixed (char* characterMap = Dialect.CharacterMap)
    {
      var zeroSentinel = Dialect.ZeroSentinel;
      var fillSentinel = Dialect.FillSentinel;

      fixed (char* pText = text)
      {
        var pCurrentChar = pText;

        fixed (byte* pData = data)
        {
          uint value = 0;
          uint temp = 0;

          var pCurrentValue = (uint*)pData;
          for (var pEndValue = pCurrentValue + (data.Length >> 2); pCurrentValue < pEndValue; ++pCurrentValue)
          {
            value = *pCurrentValue;

            if (value == 0)
            {
              *pCurrentChar++ = zeroSentinel;
              continue;
            }

            if (value == 0xFFFFFFFFUL)
            {
              *pCurrentChar++ = fillSentinel;
              continue;
            }

            temp = value / 85;
            pCurrentChar[0] = characterMap[value - temp * 85];
            if (temp == 0)
            {
              pCurrentChar[1] = zeroSentinel;
              pCurrentChar += 2;
              continue;
            }

            value = temp / 85;
            pCurrentChar[1] = characterMap[temp - value * 85];
            if (value == 0)
            {
              pCurrentChar[2] = zeroSentinel;
              pCurrentChar += 3;
              continue;
            }

            temp = value / 85;
            pCurrentChar[2] = characterMap[value - temp * 85];
            if (temp == 0)
            {
              pCurrentChar[3] = zeroSentinel;
              pCurrentChar += 4;
              continue;
            }

            value = temp / 85;
            pCurrentChar[3] = characterMap[temp - value * 85];
            temp = value / 85;
            pCurrentChar[4] = characterMap[value - temp * 85];
            pCurrentChar += 5;
          }

          pCurrentChar = EncodeLeftover(characterMap, pCurrentChar, (byte*)pCurrentValue, leftoverCount: data.Length & 3);

          return (int)(pCurrentChar - pText);
        }
      }
    }
  }

  private unsafe static char* EncodeLeftover(char* characterMap, char* pCurrentChar, byte* pLeftoverData, int leftoverCount)
  {
    uint value = 0;
    uint temp = 0;

    switch (leftoverCount)
    {
      case 3:
      {
        value = pLeftoverData[2];
        value <<= 8;
        value |= pLeftoverData[1];
        value <<= 8;
        value |= pLeftoverData[0];

        temp = value / 85;
        pCurrentChar[0] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[1] = characterMap[temp - value * 85];
        temp = value / 85;
        pCurrentChar[2] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[3] = characterMap[temp - value * 85];
        pCurrentChar += 4;
        break;
      }
      case 2:
      {
        value = pLeftoverData[1];
        value <<= 8;
        value |= pLeftoverData[0];

        temp = value / 85;
        pCurrentChar[0] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[1] = characterMap[temp - value * 85];
        temp = value / 85;
        pCurrentChar[2] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar += 3;
        break;
      }
      case 1:
      {
        value = pLeftoverData[0];

        temp = value / 85;
        pCurrentChar[0] = characterMap[value - temp * 85];
        value = temp / 85;
        pCurrentChar[1] = characterMap[temp - value * 85];
        pCurrentChar += 2;
        break;
      }
      case 0:
      {
        break;
      }
      default:
      {
        throw new InvalidOperationException("invalid leftover bytes; only 1-3 are valid");
      }
    }

    return pCurrentChar;
  }
}

Add a C# NUnit Test Project with the following code and add a reference to the library project:

using System.ComponentModel;

namespace Repro.Library.Tests;

[TestFixture]
[EditorBrowsable(EditorBrowsableState.Never)]
internal sealed class AdvancedBase85GeneralTests
{
  private static void TestFixedSizeAndValues(int size, byte value)
  {
    Console.WriteLine("  Testing size {0} with value 0x{1:X2}", size, value);

    var originalBytes = new byte[size];
    originalBytes.AsSpan().Fill(value);

    var encodedBytes = AdvancedBase85.EncodeToString(originalBytes);
    var decodedBytes = AdvancedBase85.DecodeFromString(encodedBytes)!.ToArray();
    CompareBytes(originalBytes, decodedBytes);
  }

  private static void CompareBytes(byte[] bytes, byte[] newBytes)
  {
    if (!bytes.SequenceEqual(newBytes))
    {
      Assert.Fail("byte arrays differ");
    }
  }

  [Test]
  public static void SantityChecks()
  {
    const int nMaxBytes = 4903;

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x00);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x01);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x7F);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0x80);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0xFE);
    }

    for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
    {
      TestFixedSizeAndValues(nBytes, 0xFF);
    }
  }

  [Test]
  public static void RandomChecks()
  {
    var runCount = 4096;
    Console.WriteLine("Running {0} random checks", runCount);

    var random = new Random(12345);
    for (int i = 0; i < 4096; i++)
    {
      var count = random.Next(1, 65536);
      var originalBytes = new byte[count];
      random.NextBytes(originalBytes);

      var encodedBytes = AdvancedBase85.EncodeToString(originalBytes);
      var decodedBytes = AdvancedBase85.DecodeFromString(encodedBytes)!.ToArray();
      CompareBytes(originalBytes, decodedBytes);

      if ((i + 1) % 256 == 0)
      {
        Console.WriteLine($"  {i + 1} checks completed");
      }
    }
  }

  [Test]
  public static void SparseChecks()
  {
    Console.WriteLine("Running sparse checks...");

    var random = new Random(12345);
    for (var offset = -4; offset <= 4; offset++)
    {
      var length = offset + 65536;
      var swapCount = int.Max(length >> 11, 8);

      Console.WriteLine("  Array length of {0} from offset {1}", length, offset);

      var originalBytes = new byte[length];
      for (var mode = -1; mode <= 1; mode++)
      {
        Console.WriteLine($"    Spare mode is {(mode == 0 ? "Mixed" : mode < 0 ? "Zeros" : "Ones")}");
        random.NextBytes(originalBytes);

        if (mode != 0)
        {
          for (var index = 0; index < length; index++)
          {
            byte value = originalBytes[index];
            if (value == 0 || value == byte.MaxValue)
            {
              originalBytes[index] = (byte)random.Next(1, 255);
            }
          }
        }

        var firstLength = 0;
        var lastLength = int.MaxValue;

        var loopCount = 0;
        var runCount = 0;
        for (; ; )
        {
          var text = AdvancedBase85.EncodeToString(originalBytes)!;
          if (mode != 0)
          {
            var newLength = text.Length;
            if (firstLength == 0)
            {
              firstLength = newLength;
            }
            else if (newLength > lastLength)
            {
              Assert.Fail("unexpected encoding growth");
            }
            lastLength = newLength;
          }

          var decodedBytes = AdvancedBase85.DecodeFromString(text)!.ToArray();
          CompareBytes(originalBytes, decodedBytes);

          loopCount++;
          if (loopCount % 512 == 0)
          {
            Console.WriteLine($"      {loopCount} checks completed");
          }

          if (runCount >= length)
          {
            break;
          }

          var swap = 0;
          while (swap < swapCount)
          {
            var index = random.Next(0, length);

            if (mode == 0)
            {
              originalBytes[index] = (byte)(random.NextDouble() < 0.5 ? 0x00 : 0xFF);
            }
            else if (mode < 0)
            {
              originalBytes[index] = 0x00;
            }
            else
            {
              originalBytes[index] = 0xFF;
            }

            swap++;
            runCount++;
          }
        }

        if (mode != 0)
        {
          Assert.That(lastLength, Is.LessThan(firstLength));
          Console.WriteLine($"      Encoding length reduced from {firstLength} to {lastLength}");
        }
      }
    }
  }
}

Expected behavior

All tests to pass with the Release configuration.

Actual behavior

Under the Release configuration, the tests non-deterministically fail.

Regression?

Worked in 8.0.

Known Workarounds

The issue does not reproduce if [MethodImpl(MethodImplOptions.NoOptimization)] is added to int DecodeFromString<Dialect>(Span<byte>, ReadOnlySpan<char>).

Configuration

Processor 12th Gen Intel(R) Core(TM) i7-12800H 2.40 GHz
Installed RAM 64.0 GB (63.7 GB usable)
System type 64-bit operating system, x64-based processor
Edition Windows 11 Enterprise
Version 23H2
OS build 22631.4460

Other information

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMIuntriagedNew issue has not been triaged by the area owner

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions