-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Description
Description
Optimizations for int DecodeFromString<Dialect>(Span<byte>, ReadOnlySpan<char>) is causing test failures.
Reproduction Steps
Add a C# Class Library project with the following:
using System.Runtime.CompilerServices;
namespace Repro.Library;
public static class AdvancedBase85
{
public interface IDialect
{
static abstract char ZeroSentinel { get; }
static abstract char FillSentinel { get; }
static abstract char[] CharacterMap { get; }
static abstract byte[] ByteCodes { get; }
}
public sealed class SafeForXmlDialect : IDialect
{
private SafeForXmlDialect() { }
static char IDialect.ZeroSentinel => '!';
static char IDialect.FillSentinel => '~';
static char[] IDialect.CharacterMap => "#$%()*+,-.0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}".ToCharArray();
static byte[] IDialect.ByteCodes =>
[
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
// ! " # $ % & ' ( ) * + , - . /
0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0xFF, 0xFF, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF,
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0xFF, 0x16, 0xFF, 0x17,
// @ A B C D E F G H I J K L M N O
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
// P Q R S T U V W X Y Z [ \ ] ^ _
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0x34, 0x35, 0x36,
// ` a b c d e f g h i j k l m n o
0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46,
// p q r s t u v w x y z { | } ~
0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0xFF, 0xFF,
];
}
public sealed class SafeForMailDialect : IDialect
{
private SafeForMailDialect() { }
static char IDialect.ZeroSentinel => '!';
static char IDialect.FillSentinel => '%';
static char[] IDialect.CharacterMap => "#$&'()*+,./0123456789;<=>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_abcdefghijklmnopqrstuvwxyz{|}~".ToCharArray();
static byte[] IDialect.ByteCodes =>
[
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
// ! " # $ % & ' ( ) * + , - . /
0xFF, 0xFF, 0xFF, 0x00, 0x01, 0xFF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0xFF, 0x09, 0x0A,
// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0xFF, 0x15, 0x16, 0x17, 0x18, 0xFF,
// @ A B C D E F G H I J K L M N O
0xFF, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
// P Q R S T U V W X Y Z [ \ ] ^ _
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0x34, 0x35, 0x36,
// ` a b c d e f g h i j k l m n o
0xFF, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
// p q r s t u v w x y z { | } ~
0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0xFF,
];
}
public static MemoryStream? DecodeFromString(string? text) => DecodeFromString<SafeForXmlDialect>(text);
public static MemoryStream? DecodeFromString<Dialect>(string? text)
where Dialect : IDialect
{
if (text is null)
{
return null;
}
var textLength = text.Length;
if (textLength == 0)
{
return new MemoryStream(0);
}
var guard = Guid.NewGuid().ToByteArray();
var guardLength = guard.Length;
var guardOffset = (textLength << 2) + 4;
var data = new byte[guardOffset + guardLength];
var guardSpan = new Span<byte>(data, guardOffset, guardLength);
guard.CopyTo(guardSpan);
var dataLength = DecodeFromString<Dialect>(data, text);
if (dataLength > guardOffset)
{
throw new InvalidOperationException("decoder reported length overflowed end of buffer");
}
if (!guardSpan.SequenceEqual(guard))
{
throw new InvalidOperationException("end of buffer guard was overwritten");
}
return new MemoryStream(data, 0, dataLength, false, true);
}
// The issue does not reproduce when optimizations are disabled:
// [MethodImpl(MethodImplOptions.NoOptimization)]
private unsafe static int DecodeFromString<Dialect>(Span<byte> data, ReadOnlySpan<char> text)
where Dialect : IDialect
{
fixed (byte* byteCodes = Dialect.ByteCodes)
{
var zeroSentinel = Dialect.ZeroSentinel;
var fillSentinel = Dialect.FillSentinel;
fixed (byte* pData = data)
{
var pCurrentValue = (uint*)pData;
fixed (char* pText = text)
{
int block = 0;
uint value = 0;
for (char* pCurrentChar = pText, pEndChar = pText + text.Length; pCurrentChar < pEndChar; ++pCurrentChar)
{
var currentChar = *pCurrentChar;
if (currentChar == zeroSentinel)
{
if (block > 3)
{
throw new InvalidOperationException("zero sentinel used in invalid block level; only supported in blocks 0-3");
}
if (block != 0)
{
*pCurrentValue++ = value;
block = 0;
}
else
{
*pCurrentValue++ = 0;
}
}
else if (currentChar == fillSentinel)
{
if (block != 0)
{
throw new InvalidOperationException("fill sentinel used in invalid block level; only support at block zero");
}
*pCurrentValue++ = uint.MaxValue;
}
else if (currentChar > 0x20 && currentChar < 0x7F)
{
var byteCode = byteCodes[currentChar];
if (byteCode >= 0x55)
{
throw new InvalidOperationException("encoundered invalid decoded byte value");
}
switch (block)
{
case 0:
{
value = byteCode;
block = 1;
break;
}
case 1:
{
value += byteCode * 85u;
block = 2;
break;
}
case 2:
{
value += byteCode * 7225u;
block = 3;
break;
}
case 3:
{
value += byteCode * 614125u;
block = 4;
break;
}
case 4:
{
value += byteCode * 52200625u;
*pCurrentValue++ = value;
block = 0;
break;
}
default:
{
throw new InvalidOperationException("encountered invalid block level; block level must be 0-4");
}
}
}
else
{
if (currentChar != ' ' && currentChar != '\r' && currentChar != '\n' && currentChar != '\t')
{
throw new InvalidOperationException("encountered illegal character");
}
}
}
var bytesWritten = (int)((byte*)pCurrentValue - pData);
switch (block)
{
case 2:
if (value > 0xFF)
{
throw new InvalidOperationException("invalid 2-block leftover value encountered");
}
*(byte*)pCurrentValue = (byte)value;
++bytesWritten;
break;
case 3:
if (value > 0xFFFF)
{
throw new InvalidOperationException("invalid 3-block leftover value encountered");
}
*(short*)pCurrentValue = (short)value;
bytesWritten += 2;
break;
case 4:
if (value > 0xFFFFFF)
{
throw new InvalidOperationException("invalid 4-block leftover value encountered");
}
*(byte*)pCurrentValue = (byte)value;
*((byte*)pCurrentValue + 1) = (byte)(value >> 8);
*((byte*)pCurrentValue + 2) = (byte)(value >> 16);
bytesWritten += 3;
break;
default:
if (block != 0)
{
throw new InvalidOperationException("invalid leftover block-level; leftovers can only be 2-4");
}
break;
}
return bytesWritten;
}
}
}
}
public static string? EncodeToString(byte[] data) => EncodeToString<SafeForXmlDialect>(data);
public static string? EncodeToString<Dialect>(byte[] data)
where Dialect : IDialect
{
if (data is null)
{
return null;
}
int dataLength = data.Length;
if (dataLength == 0)
{
return string.Empty;
}
var guard = Guid.NewGuid().ToByteArray().Select(b => (char)b).ToArray();
var guardLength = guard.Length;
var guardOffset = ((dataLength * 5) >> 2) + 4;
var text = new char[guardOffset + guardLength];
var guardSpan = new Span<char>(text, guardOffset, guardLength);
guard.CopyTo(guardSpan);
var textLength = EncodeToString<Dialect>(text, data);
if (textLength > guardOffset)
{
throw new InvalidOperationException("encoder reported length overflowed end of buffer");
}
if (!guardSpan.SequenceEqual(guard))
{
throw new InvalidOperationException("end of buffer guard was overwritten");
}
return new string(text, 0, textLength);
}
private unsafe static int EncodeToString<Dialect>(Span<char> text, ReadOnlySpan<byte> data)
where Dialect : IDialect
{
fixed (char* characterMap = Dialect.CharacterMap)
{
var zeroSentinel = Dialect.ZeroSentinel;
var fillSentinel = Dialect.FillSentinel;
fixed (char* pText = text)
{
var pCurrentChar = pText;
fixed (byte* pData = data)
{
uint value = 0;
uint temp = 0;
var pCurrentValue = (uint*)pData;
for (var pEndValue = pCurrentValue + (data.Length >> 2); pCurrentValue < pEndValue; ++pCurrentValue)
{
value = *pCurrentValue;
if (value == 0)
{
*pCurrentChar++ = zeroSentinel;
continue;
}
if (value == 0xFFFFFFFFUL)
{
*pCurrentChar++ = fillSentinel;
continue;
}
temp = value / 85;
pCurrentChar[0] = characterMap[value - temp * 85];
if (temp == 0)
{
pCurrentChar[1] = zeroSentinel;
pCurrentChar += 2;
continue;
}
value = temp / 85;
pCurrentChar[1] = characterMap[temp - value * 85];
if (value == 0)
{
pCurrentChar[2] = zeroSentinel;
pCurrentChar += 3;
continue;
}
temp = value / 85;
pCurrentChar[2] = characterMap[value - temp * 85];
if (temp == 0)
{
pCurrentChar[3] = zeroSentinel;
pCurrentChar += 4;
continue;
}
value = temp / 85;
pCurrentChar[3] = characterMap[temp - value * 85];
temp = value / 85;
pCurrentChar[4] = characterMap[value - temp * 85];
pCurrentChar += 5;
}
pCurrentChar = EncodeLeftover(characterMap, pCurrentChar, (byte*)pCurrentValue, leftoverCount: data.Length & 3);
return (int)(pCurrentChar - pText);
}
}
}
}
private unsafe static char* EncodeLeftover(char* characterMap, char* pCurrentChar, byte* pLeftoverData, int leftoverCount)
{
uint value = 0;
uint temp = 0;
switch (leftoverCount)
{
case 3:
{
value = pLeftoverData[2];
value <<= 8;
value |= pLeftoverData[1];
value <<= 8;
value |= pLeftoverData[0];
temp = value / 85;
pCurrentChar[0] = characterMap[value - temp * 85];
value = temp / 85;
pCurrentChar[1] = characterMap[temp - value * 85];
temp = value / 85;
pCurrentChar[2] = characterMap[value - temp * 85];
value = temp / 85;
pCurrentChar[3] = characterMap[temp - value * 85];
pCurrentChar += 4;
break;
}
case 2:
{
value = pLeftoverData[1];
value <<= 8;
value |= pLeftoverData[0];
temp = value / 85;
pCurrentChar[0] = characterMap[value - temp * 85];
value = temp / 85;
pCurrentChar[1] = characterMap[temp - value * 85];
temp = value / 85;
pCurrentChar[2] = characterMap[value - temp * 85];
value = temp / 85;
pCurrentChar += 3;
break;
}
case 1:
{
value = pLeftoverData[0];
temp = value / 85;
pCurrentChar[0] = characterMap[value - temp * 85];
value = temp / 85;
pCurrentChar[1] = characterMap[temp - value * 85];
pCurrentChar += 2;
break;
}
case 0:
{
break;
}
default:
{
throw new InvalidOperationException("invalid leftover bytes; only 1-3 are valid");
}
}
return pCurrentChar;
}
}Add a C# NUnit Test Project with the following code and add a reference to the library project:
using System.ComponentModel;
namespace Repro.Library.Tests;
[TestFixture]
[EditorBrowsable(EditorBrowsableState.Never)]
internal sealed class AdvancedBase85GeneralTests
{
private static void TestFixedSizeAndValues(int size, byte value)
{
Console.WriteLine(" Testing size {0} with value 0x{1:X2}", size, value);
var originalBytes = new byte[size];
originalBytes.AsSpan().Fill(value);
var encodedBytes = AdvancedBase85.EncodeToString(originalBytes);
var decodedBytes = AdvancedBase85.DecodeFromString(encodedBytes)!.ToArray();
CompareBytes(originalBytes, decodedBytes);
}
private static void CompareBytes(byte[] bytes, byte[] newBytes)
{
if (!bytes.SequenceEqual(newBytes))
{
Assert.Fail("byte arrays differ");
}
}
[Test]
public static void SantityChecks()
{
const int nMaxBytes = 4903;
for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
{
TestFixedSizeAndValues(nBytes, 0x00);
}
for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
{
TestFixedSizeAndValues(nBytes, 0x01);
}
for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
{
TestFixedSizeAndValues(nBytes, 0x7F);
}
for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
{
TestFixedSizeAndValues(nBytes, 0x80);
}
for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
{
TestFixedSizeAndValues(nBytes, 0xFE);
}
for (int nBytes = 1; nBytes <= nMaxBytes; ++nBytes)
{
TestFixedSizeAndValues(nBytes, 0xFF);
}
}
[Test]
public static void RandomChecks()
{
var runCount = 4096;
Console.WriteLine("Running {0} random checks", runCount);
var random = new Random(12345);
for (int i = 0; i < 4096; i++)
{
var count = random.Next(1, 65536);
var originalBytes = new byte[count];
random.NextBytes(originalBytes);
var encodedBytes = AdvancedBase85.EncodeToString(originalBytes);
var decodedBytes = AdvancedBase85.DecodeFromString(encodedBytes)!.ToArray();
CompareBytes(originalBytes, decodedBytes);
if ((i + 1) % 256 == 0)
{
Console.WriteLine($" {i + 1} checks completed");
}
}
}
[Test]
public static void SparseChecks()
{
Console.WriteLine("Running sparse checks...");
var random = new Random(12345);
for (var offset = -4; offset <= 4; offset++)
{
var length = offset + 65536;
var swapCount = int.Max(length >> 11, 8);
Console.WriteLine(" Array length of {0} from offset {1}", length, offset);
var originalBytes = new byte[length];
for (var mode = -1; mode <= 1; mode++)
{
Console.WriteLine($" Spare mode is {(mode == 0 ? "Mixed" : mode < 0 ? "Zeros" : "Ones")}");
random.NextBytes(originalBytes);
if (mode != 0)
{
for (var index = 0; index < length; index++)
{
byte value = originalBytes[index];
if (value == 0 || value == byte.MaxValue)
{
originalBytes[index] = (byte)random.Next(1, 255);
}
}
}
var firstLength = 0;
var lastLength = int.MaxValue;
var loopCount = 0;
var runCount = 0;
for (; ; )
{
var text = AdvancedBase85.EncodeToString(originalBytes)!;
if (mode != 0)
{
var newLength = text.Length;
if (firstLength == 0)
{
firstLength = newLength;
}
else if (newLength > lastLength)
{
Assert.Fail("unexpected encoding growth");
}
lastLength = newLength;
}
var decodedBytes = AdvancedBase85.DecodeFromString(text)!.ToArray();
CompareBytes(originalBytes, decodedBytes);
loopCount++;
if (loopCount % 512 == 0)
{
Console.WriteLine($" {loopCount} checks completed");
}
if (runCount >= length)
{
break;
}
var swap = 0;
while (swap < swapCount)
{
var index = random.Next(0, length);
if (mode == 0)
{
originalBytes[index] = (byte)(random.NextDouble() < 0.5 ? 0x00 : 0xFF);
}
else if (mode < 0)
{
originalBytes[index] = 0x00;
}
else
{
originalBytes[index] = 0xFF;
}
swap++;
runCount++;
}
}
if (mode != 0)
{
Assert.That(lastLength, Is.LessThan(firstLength));
Console.WriteLine($" Encoding length reduced from {firstLength} to {lastLength}");
}
}
}
}
}Expected behavior
All tests to pass with the Release configuration.
Actual behavior
Under the Release configuration, the tests non-deterministically fail.
Regression?
Worked in 8.0.
Known Workarounds
The issue does not reproduce if [MethodImpl(MethodImplOptions.NoOptimization)] is added to int DecodeFromString<Dialect>(Span<byte>, ReadOnlySpan<char>).
Configuration
Processor 12th Gen Intel(R) Core(TM) i7-12800H 2.40 GHz
Installed RAM 64.0 GB (63.7 GB usable)
System type 64-bit operating system, x64-based processor
Edition Windows 11 Enterprise
Version 23H2
OS build 22631.4460
Other information
No response