| | | 1 | | // Licensed to the .NET Foundation under one or more agreements. |
| | | 2 | | // The .NET Foundation licenses this file to you under the MIT license. |
| | | 3 | | |
| | | 4 | | using System.Diagnostics; |
| | | 5 | | using System.Runtime.CompilerServices; |
| | | 6 | | using System.Runtime.InteropServices; |
| | | 7 | | using System.Text; |
| | | 8 | | #if NET |
| | | 9 | | using System.Runtime.Intrinsics.Arm; |
| | | 10 | | using System.Runtime.Intrinsics.X86; |
| | | 11 | | using System.Runtime.Intrinsics; |
| | | 12 | | #endif |
| | | 13 | | |
| | | 14 | | namespace System.Buffers.Text |
| | | 15 | | { |
| | | 16 | | // AVX2 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arch/avx |
| | | 17 | | // Vector128 version based on https://github.com/aklomp/base64/tree/e516d769a2a432c08404f1981e73b431566057be/lib/arc |
| | | 18 | | internal static partial class Base64Helper |
| | | 19 | | { |
| | | 20 | | internal static unsafe OperationStatus DecodeFrom<TBase64Decoder, T>(TBase64Decoder decoder, ReadOnlySpan<T> sou |
| | | 21 | | out int bytesConsumed, out int bytesWritten, bool isFinalBlock, bool ignoreWhiteSpace) |
| | | 22 | | where TBase64Decoder : IBase64Decoder<T> |
| | | 23 | | where T : unmanaged |
| | | 24 | | { |
| | | 25 | | if (source.IsEmpty) |
| | | 26 | | { |
| | 92 | 27 | | bytesConsumed = 0; |
| | 92 | 28 | | bytesWritten = 0; |
| | 92 | 29 | | return OperationStatus.Done; |
| | | 30 | | } |
| | | 31 | | |
| | 1210050 | 32 | | fixed (T* srcBytes = &MemoryMarshal.GetReference(source)) |
| | 1210050 | 33 | | fixed (byte* destBytes = &MemoryMarshal.GetReference(bytes)) |
| | | 34 | | { |
| | 1210050 | 35 | | int srcLength = decoder.SrcLength(isFinalBlock, source.Length); |
| | 1210050 | 36 | | int destLength = bytes.Length; |
| | 1210050 | 37 | | int maxSrcLength = srcLength; |
| | 1210050 | 38 | | int decodedLength = decoder.GetMaxDecodedLength(srcLength); |
| | | 39 | | |
| | | 40 | | // max. 2 padding chars |
| | 1210050 | 41 | | if (destLength < decodedLength - 2) |
| | | 42 | | { |
| | | 43 | | // For overflow see comment below |
| | 0 | 44 | | maxSrcLength = destLength / 3 * 4; |
| | | 45 | | } |
| | | 46 | | |
| | 1210050 | 47 | | T* src = srcBytes; |
| | 1210050 | 48 | | byte* dest = destBytes; |
| | 1210050 | 49 | | T* srcEnd = srcBytes + (uint)srcLength; |
| | 1210050 | 50 | | T* srcMax = srcBytes + (uint)maxSrcLength; |
| | | 51 | | |
| | | 52 | | #if NET |
| | 1210050 | 53 | | if (maxSrcLength >= 24) |
| | | 54 | | { |
| | 628732 | 55 | | T* end = srcMax - 88; |
| | 628732 | 56 | | if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported && (end >= src)) |
| | | 57 | | { |
| | 0 | 58 | | Avx512Decode(decoder, ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); |
| | | 59 | | |
| | 0 | 60 | | if (src == srcEnd) |
| | | 61 | | { |
| | | 62 | | goto DoneExit; |
| | | 63 | | } |
| | | 64 | | } |
| | | 65 | | |
| | 628732 | 66 | | end = srcMax - 45; |
| | 628732 | 67 | | if (Avx2.IsSupported && (end >= src)) |
| | | 68 | | { |
| | 604658 | 69 | | Avx2Decode(decoder, ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); |
| | | 70 | | |
| | 604658 | 71 | | if (src == srcEnd) |
| | | 72 | | { |
| | | 73 | | goto DoneExit; |
| | | 74 | | } |
| | | 75 | | } |
| | | 76 | | |
| | 628732 | 77 | | end = srcMax - 66; |
| | | 78 | | if (AdvSimd.Arm64.IsSupported && (end >= src)) |
| | | 79 | | { |
| | | 80 | | AdvSimdDecode(decoder, ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); |
| | | 81 | | |
| | | 82 | | if (src == srcEnd) |
| | | 83 | | { |
| | | 84 | | goto DoneExit; |
| | | 85 | | } |
| | | 86 | | } |
| | | 87 | | |
| | 628732 | 88 | | end = srcMax - 24; |
| | 628732 | 89 | | if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian && (end >= src)) |
| | | 90 | | { |
| | 592936 | 91 | | Vector128Decode(decoder, ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); |
| | | 92 | | |
| | 592936 | 93 | | if (src == srcEnd) |
| | | 94 | | { |
| | | 95 | | goto DoneExit; |
| | | 96 | | } |
| | | 97 | | } |
| | | 98 | | } |
| | | 99 | | #endif |
| | | 100 | | |
| | | 101 | | // Last bytes could have padding characters, so process them separately and treat them as valid only if |
| | | 102 | | // if isFinalBlock is false, padding characters are considered invalid |
| | 1210050 | 103 | | int skipLastChunk = isFinalBlock ? 4 : 0; |
| | | 104 | | |
| | 1210050 | 105 | | if (destLength >= decodedLength) |
| | | 106 | | { |
| | 1210050 | 107 | | maxSrcLength = srcLength - skipLastChunk; |
| | | 108 | | } |
| | | 109 | | else |
| | | 110 | | { |
| | | 111 | | // This should never overflow since destLength here is less than int.MaxValue / 4 * 3 (i.e. 16106127 |
| | | 112 | | // Therefore, (destLength / 3) * 4 will always be less than 2147483641 |
| | 0 | 113 | | Debug.Assert(destLength < (int.MaxValue / 4 * 3)); |
| | | 114 | | #if NET |
| | 0 | 115 | | (maxSrcLength, int remainder) = int.DivRem(destLength, 3); |
| | 0 | 116 | | maxSrcLength *= 4; |
| | | 117 | | #else |
| | | 118 | | maxSrcLength = (destLength / 3) * 4; |
| | | 119 | | int remainder = (int)((uint)destLength % 3); |
| | | 120 | | #endif |
| | 0 | 121 | | if (isFinalBlock && remainder > 0) |
| | | 122 | | { |
| | 0 | 123 | | srcLength &= ~0x3; // In case of Base64UrlDecoder source can be not a multiple of 4, round down |
| | | 124 | | } |
| | | 125 | | } |
| | | 126 | | |
| | 1210050 | 127 | | ref sbyte decodingMap = ref MemoryMarshal.GetReference(decoder.DecodingMap); |
| | 1210050 | 128 | | srcMax = srcBytes + maxSrcLength; |
| | | 129 | | |
| | 3134404 | 130 | | while (src < srcMax) |
| | | 131 | | { |
| | 2423510 | 132 | | int result = decoder.DecodeFourElements(src, ref decodingMap); |
| | | 133 | | |
| | 2423510 | 134 | | if (result < 0) |
| | | 135 | | { |
| | | 136 | | goto InvalidDataExit; |
| | | 137 | | } |
| | | 138 | | |
| | 1924354 | 139 | | WriteThreeLowOrderBytes(dest, result); |
| | 1924354 | 140 | | src += 4; |
| | 1924354 | 141 | | dest += 3; |
| | | 142 | | } |
| | | 143 | | |
| | 710894 | 144 | | if (maxSrcLength != srcLength - skipLastChunk) |
| | | 145 | | { |
| | | 146 | | goto DestinationTooSmallExit; |
| | | 147 | | } |
| | | 148 | | |
| | 710894 | 149 | | if (src == srcEnd) |
| | | 150 | | { |
| | 558576 | 151 | | if (isFinalBlock) |
| | | 152 | | { |
| | | 153 | | goto InvalidDataExit; |
| | | 154 | | } |
| | | 155 | | |
| | 554582 | 156 | | if (src == srcBytes + source.Length) |
| | | 157 | | { |
| | 554582 | 158 | | goto DoneExit; |
| | | 159 | | } |
| | | 160 | | |
| | | 161 | | goto NeedMoreDataExit; |
| | | 162 | | } |
| | | 163 | | |
| | | 164 | | // if isFinalBlock is false, we will never reach this point |
| | | 165 | | // Handle remaining bytes, for Base64 its always 4 bytes, for Base64Url up to 8 bytes left. |
| | | 166 | | // If more than 4 bytes remained it will end up in DestinationTooSmallExit or InvalidDataExit (might suc |
| | 152318 | 167 | | long remaining = srcEnd - src; |
| | 152318 | 168 | | Debug.Assert(typeof(TBase64Decoder) == typeof(Base64DecoderByte) ? remaining == 4 : remaining < 8); |
| | 152318 | 169 | | int i0 = decoder.DecodeRemaining(srcEnd, ref decodingMap, remaining, out uint t2, out uint t3); |
| | | 170 | | |
| | 152318 | 171 | | byte* destMax = destBytes + (uint)destLength; |
| | | 172 | | |
| | 152318 | 173 | | if (!decoder.IsValidPadding(t3)) |
| | | 174 | | { |
| | 58156 | 175 | | int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); |
| | 58156 | 176 | | int i3 = Unsafe.Add(ref decodingMap, (IntPtr)t3); |
| | | 177 | | |
| | 58156 | 178 | | i2 <<= 6; |
| | | 179 | | |
| | 58156 | 180 | | i0 |= i3; |
| | 58156 | 181 | | i0 |= i2; |
| | | 182 | | |
| | 58156 | 183 | | if (i0 < 0) |
| | | 184 | | { |
| | | 185 | | goto InvalidDataExit; |
| | | 186 | | } |
| | 54164 | 187 | | if (dest + 3 > destMax) |
| | | 188 | | { |
| | | 189 | | goto DestinationTooSmallExit; |
| | | 190 | | } |
| | | 191 | | |
| | 54164 | 192 | | WriteThreeLowOrderBytes(dest, i0); |
| | 54164 | 193 | | dest += 3; |
| | 54164 | 194 | | src += 4; |
| | | 195 | | } |
| | 94162 | 196 | | else if (!decoder.IsValidPadding(t2)) |
| | | 197 | | { |
| | 45094 | 198 | | int i2 = Unsafe.Add(ref decodingMap, (IntPtr)t2); |
| | | 199 | | |
| | 45094 | 200 | | i2 <<= 6; |
| | | 201 | | |
| | 45094 | 202 | | i0 |= i2; |
| | | 203 | | |
| | 45094 | 204 | | if ((i0 & 0x800000c0) != 0) // if negative or 2 unused bits are not 0. |
| | | 205 | | { |
| | | 206 | | goto InvalidDataExit; |
| | | 207 | | } |
| | 44688 | 208 | | if (dest + 2 > destMax) |
| | | 209 | | { |
| | | 210 | | goto DestinationTooSmallExit; |
| | | 211 | | } |
| | | 212 | | |
| | 44688 | 213 | | dest[0] = (byte)(i0 >> 16); |
| | 44688 | 214 | | dest[1] = (byte)(i0 >> 8); |
| | 44688 | 215 | | dest += 2; |
| | 44688 | 216 | | src += remaining; |
| | | 217 | | } |
| | | 218 | | else |
| | | 219 | | { |
| | 49068 | 220 | | if ((i0 & 0x8000F000) != 0) // if negative or 4 unused bits are not 0. |
| | | 221 | | { |
| | | 222 | | goto InvalidDataExit; |
| | | 223 | | } |
| | 48754 | 224 | | if (dest + 1 > destMax) |
| | | 225 | | { |
| | | 226 | | goto DestinationTooSmallExit; |
| | | 227 | | } |
| | | 228 | | |
| | 48754 | 229 | | dest[0] = (byte)(i0 >> 16); |
| | 48754 | 230 | | dest += 1; |
| | 48754 | 231 | | src += remaining; |
| | | 232 | | } |
| | | 233 | | |
| | 147606 | 234 | | if (srcLength != source.Length) |
| | | 235 | | { |
| | | 236 | | goto InvalidDataExit; |
| | | 237 | | } |
| | | 238 | | |
| | | 239 | | DoneExit: |
| | 701636 | 240 | | bytesConsumed = (int)(src - srcBytes); |
| | 701636 | 241 | | bytesWritten = (int)(dest - destBytes); |
| | 701636 | 242 | | return OperationStatus.Done; |
| | | 243 | | |
| | | 244 | | DestinationTooSmallExit: |
| | 0 | 245 | | if (srcLength != source.Length && isFinalBlock) |
| | | 246 | | { |
| | | 247 | | goto InvalidDataExit; // if input is not a multiple of 4, and there is no more data, return invalid |
| | | 248 | | } |
| | | 249 | | |
| | 0 | 250 | | if (ignoreWhiteSpace) |
| | | 251 | | { |
| | | 252 | | // Fall through to InvalidDataFallback which strips whitespace and re-evaluates destination size req |
| | | 253 | | goto InvalidDataExit; |
| | | 254 | | } |
| | | 255 | | |
| | 0 | 256 | | bytesConsumed = (int)(src - srcBytes); |
| | 0 | 257 | | bytesWritten = (int)(dest - destBytes); |
| | 0 | 258 | | return OperationStatus.DestinationTooSmall; |
| | | 259 | | |
| | | 260 | | NeedMoreDataExit: |
| | 0 | 261 | | bytesConsumed = (int)(src - srcBytes); |
| | 0 | 262 | | bytesWritten = (int)(dest - destBytes); |
| | 0 | 263 | | return OperationStatus.NeedMoreData; |
| | | 264 | | |
| | | 265 | | InvalidDataExit: |
| | 508414 | 266 | | bytesConsumed = (int)(src - srcBytes); |
| | 508414 | 267 | | bytesWritten = (int)(dest - destBytes); |
| | 508414 | 268 | | return ignoreWhiteSpace ? |
| | 508414 | 269 | | InvalidDataFallback(decoder, source, bytes, ref bytesConsumed, ref bytesWritten, isFinalBlock) : |
| | 508414 | 270 | | OperationStatus.InvalidData; |
| | | 271 | | } |
| | | 272 | | |
| | | 273 | | static OperationStatus InvalidDataFallback(TBase64Decoder decoder, ReadOnlySpan<T> source, Span<byte> bytes, |
| | | 274 | | { |
| | 42810 | 275 | | source = source.Slice(bytesConsumed); |
| | 42810 | 276 | | bytes = bytes.Slice(bytesWritten); |
| | | 277 | | |
| | | 278 | | OperationStatus status; |
| | | 279 | | do |
| | | 280 | | { |
| | 488932 | 281 | | int localConsumed = decoder.IndexOfAnyExceptWhiteSpace(source); |
| | 488932 | 282 | | if (localConsumed < 0) |
| | | 283 | | { |
| | | 284 | | // The remainder of the input is all whitespace. Mark it all as having been consumed, |
| | | 285 | | // and mark the operation as being done. |
| | 162 | 286 | | bytesConsumed += source.Length; |
| | 162 | 287 | | status = OperationStatus.Done; |
| | 162 | 288 | | break; |
| | | 289 | | } |
| | | 290 | | |
| | 488770 | 291 | | if (localConsumed == 0) |
| | | 292 | | { |
| | | 293 | | // Non-whitespace was found at the beginning of the input. Since it wasn't consumed |
| | | 294 | | // by the previous call to DecodeFromUtf8, it must be part of a Base64 sequence |
| | | 295 | | // that was interrupted by whitespace or something else considered invalid. |
| | | 296 | | // Fall back to block-wise decoding. This is very slow, but it's also very non-standard |
| | | 297 | | // formatting of the input; whitespace is typically only found between blocks, such as |
| | | 298 | | // when Convert.ToBase64String inserts a line break every 76 output characters. |
| | 23482 | 299 | | return decoder.DecodeWithWhiteSpaceBlockwiseWrapper(decoder, source, bytes, ref bytesConsumed, r |
| | | 300 | | } |
| | | 301 | | |
| | | 302 | | // Skip over the starting whitespace and continue. |
| | 465288 | 303 | | bytesConsumed += localConsumed; |
| | 465288 | 304 | | source = source.Slice(localConsumed); |
| | | 305 | | |
| | | 306 | | // Try again after consumed whitespace |
| | 465288 | 307 | | status = DecodeFrom(decoder, source, bytes, out localConsumed, out int localWritten, isFinalBlock, i |
| | 465288 | 308 | | bytesConsumed += localConsumed; |
| | 465288 | 309 | | bytesWritten += localWritten; |
| | | 310 | | |
| | 465288 | 311 | | if (status is OperationStatus.Done or OperationStatus.NeedMoreData) |
| | | 312 | | { |
| | | 313 | | break; |
| | | 314 | | } |
| | | 315 | | |
| | | 316 | | // The DecodeFrom helper will return DestinationTooSmall if the destination is too small, |
| | | 317 | | // regardless of whether it's actually too small once you skip whitespace characters. |
| | | 318 | | // In that case we loop again and fall back to block-wise decoding if we can't make progress. |
| | | 319 | | |
| | 446122 | 320 | | source = source.Slice(localConsumed); |
| | 446122 | 321 | | bytes = bytes.Slice(localWritten); |
| | | 322 | | } |
| | 446122 | 323 | | while (!source.IsEmpty); |
| | | 324 | | |
| | 19328 | 325 | | return status; |
| | | 326 | | } |
| | | 327 | | } |
| | | 328 | | |
| | | 329 | | internal static unsafe OperationStatus DecodeFromUtf8InPlace<TBase64Decoder>(TBase64Decoder decoder, Span<byte> |
| | | 330 | | where TBase64Decoder : IBase64Decoder<byte> |
| | | 331 | | { |
| | 1053086 | 332 | | if (buffer.IsEmpty) |
| | | 333 | | { |
| | 0 | 334 | | bytesWritten = 0; |
| | 0 | 335 | | return OperationStatus.Done; |
| | | 336 | | } |
| | | 337 | | |
| | 1053086 | 338 | | fixed (byte* bufferBytes = &MemoryMarshal.GetReference(buffer)) |
| | | 339 | | { |
| | 1053086 | 340 | | uint bufferLength = (uint)buffer.Length; |
| | 1053086 | 341 | | uint sourceIndex = 0; |
| | 1053086 | 342 | | uint destIndex = 0; |
| | | 343 | | |
| | 1053086 | 344 | | if (decoder.IsInvalidLength(buffer.Length)) |
| | | 345 | | { |
| | | 346 | | goto InvalidExit; |
| | | 347 | | } |
| | | 348 | | |
| | 1044534 | 349 | | ref sbyte decodingMap = ref MemoryMarshal.GetReference(decoder.DecodingMap); |
| | | 350 | | |
| | 1044534 | 351 | | if (bufferLength > 4) |
| | | 352 | | { |
| | 2156630 | 353 | | while (sourceIndex < bufferLength - 4) |
| | | 354 | | { |
| | 2144686 | 355 | | int result = decoder.DecodeFourElements(bufferBytes + sourceIndex, ref decodingMap); |
| | 2144686 | 356 | | if (result < 0) |
| | | 357 | | { |
| | | 358 | | goto InvalidExit; |
| | | 359 | | } |
| | | 360 | | |
| | 2141542 | 361 | | WriteThreeLowOrderBytes(bufferBytes + destIndex, result); |
| | 2141542 | 362 | | destIndex += 3; |
| | 2141542 | 363 | | sourceIndex += 4; |
| | | 364 | | } |
| | | 365 | | } |
| | | 366 | | |
| | | 367 | | uint t0; |
| | | 368 | | uint t1; |
| | | 369 | | uint t2; |
| | | 370 | | uint t3; |
| | | 371 | | |
| | 1041390 | 372 | | switch (bufferLength - sourceIndex) |
| | | 373 | | { |
| | | 374 | | case 2: |
| | 0 | 375 | | t0 = bufferBytes[bufferLength - 2]; |
| | 0 | 376 | | t1 = bufferBytes[bufferLength - 1]; |
| | 0 | 377 | | t2 = EncodingPad; |
| | 0 | 378 | | t3 = EncodingPad; |
| | 0 | 379 | | break; |
| | | 380 | | case 3: |
| | 0 | 381 | | t0 = bufferBytes[bufferLength - 3]; |
| | 0 | 382 | | t1 = bufferBytes[bufferLength - 2]; |
| | 0 | 383 | | t2 = bufferBytes[bufferLength - 1]; |
| | 0 | 384 | | t3 = EncodingPad; |
| | 0 | 385 | | break; |
| | | 386 | | case 4: |
| | 1041390 | 387 | | t0 = bufferBytes[bufferLength - 4]; |
| | 1041390 | 388 | | t1 = bufferBytes[bufferLength - 3]; |
| | 1041390 | 389 | | t2 = bufferBytes[bufferLength - 2]; |
| | 1041390 | 390 | | t3 = bufferBytes[bufferLength - 1]; |
| | | 391 | | break; |
| | | 392 | | default: |
| | | 393 | | goto InvalidExit; |
| | | 394 | | } |
| | | 395 | | |
| | 1041390 | 396 | | int i0 = Unsafe.Add(ref decodingMap, (int)t0); |
| | 1041390 | 397 | | int i1 = Unsafe.Add(ref decodingMap, (int)t1); |
| | | 398 | | |
| | 1041390 | 399 | | i0 <<= 18; |
| | 1041390 | 400 | | i1 <<= 12; |
| | | 401 | | |
| | 1041390 | 402 | | i0 |= i1; |
| | | 403 | | |
| | 1041390 | 404 | | if (!decoder.IsValidPadding(t3)) |
| | | 405 | | { |
| | 1032722 | 406 | | int i2 = Unsafe.Add(ref decodingMap, (int)t2); |
| | 1032722 | 407 | | int i3 = Unsafe.Add(ref decodingMap, (int)t3); |
| | | 408 | | |
| | 1032722 | 409 | | i2 <<= 6; |
| | | 410 | | |
| | 1032722 | 411 | | i0 |= i3; |
| | 1032722 | 412 | | i0 |= i2; |
| | | 413 | | |
| | 1032722 | 414 | | if (i0 < 0) |
| | | 415 | | { |
| | | 416 | | goto InvalidExit; |
| | | 417 | | } |
| | | 418 | | |
| | 1027716 | 419 | | WriteThreeLowOrderBytes(bufferBytes + destIndex, i0); |
| | 1027716 | 420 | | destIndex += 3; |
| | | 421 | | } |
| | 8668 | 422 | | else if (!decoder.IsValidPadding(t2)) |
| | | 423 | | { |
| | 4310 | 424 | | int i2 = Unsafe.Add(ref decodingMap, (int)t2); |
| | | 425 | | |
| | 4310 | 426 | | i2 <<= 6; |
| | | 427 | | |
| | 4310 | 428 | | i0 |= i2; |
| | | 429 | | |
| | 4310 | 430 | | if ((i0 & 0x800000c0) != 0) // if negative or 2 unused bits are not 0. |
| | | 431 | | { |
| | | 432 | | goto InvalidExit; |
| | | 433 | | } |
| | | 434 | | |
| | 4054 | 435 | | bufferBytes[destIndex] = (byte)(i0 >> 16); |
| | 4054 | 436 | | bufferBytes[destIndex + 1] = (byte)(i0 >> 8); |
| | 4054 | 437 | | destIndex += 2; |
| | | 438 | | } |
| | | 439 | | else |
| | | 440 | | { |
| | 4358 | 441 | | if ((i0 & 0x8000F000) != 0) // if negative or 4 unused bits are not 0. |
| | | 442 | | { |
| | | 443 | | goto InvalidExit; |
| | | 444 | | } |
| | | 445 | | |
| | 4154 | 446 | | bufferBytes[destIndex] = (byte)(i0 >> 16); |
| | 4154 | 447 | | destIndex += 1; |
| | | 448 | | } |
| | | 449 | | |
| | 1035924 | 450 | | bytesWritten = (int)destIndex; |
| | 1035924 | 451 | | return OperationStatus.Done; |
| | | 452 | | |
| | | 453 | | InvalidExit: |
| | 17162 | 454 | | bytesWritten = (int)destIndex; |
| | 17162 | 455 | | return ignoreWhiteSpace ? |
| | 17162 | 456 | | DecodeWithWhiteSpaceFromUtf8InPlace<TBase64Decoder>(decoder, buffer, ref bytesWritten, sourceIndex) |
| | 17162 | 457 | | OperationStatus.InvalidData; |
| | | 458 | | } |
| | | 459 | | } |
| | | 460 | | |
| | | 461 | | internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TBase64Decoder decoder, ReadOnlySp |
| | | 462 | | where TBase64Decoder : IBase64Decoder<byte> |
| | | 463 | | { |
| | | 464 | | const int BlockSize = 4; |
| | 11588 | 465 | | Span<byte> buffer = stackalloc byte[BlockSize]; |
| | 11588 | 466 | | OperationStatus status = OperationStatus.Done; |
| | | 467 | | |
| | 529554 | 468 | | while (!source.IsEmpty) |
| | | 469 | | { |
| | 529444 | 470 | | int encodedIdx = 0; |
| | 529444 | 471 | | int bufferIdx = 0; |
| | 529444 | 472 | | int skipped = 0; |
| | | 473 | | |
| | 5356280 | 474 | | for (; encodedIdx < source.Length && (uint)bufferIdx < (uint)buffer.Length; ++encodedIdx) |
| | | 475 | | { |
| | 2413418 | 476 | | if (IsWhiteSpace(source[encodedIdx])) |
| | | 477 | | { |
| | 300192 | 478 | | skipped++; |
| | | 479 | | } |
| | | 480 | | else |
| | | 481 | | { |
| | 2113226 | 482 | | buffer[bufferIdx] = source[encodedIdx]; |
| | 2113226 | 483 | | bufferIdx++; |
| | | 484 | | } |
| | | 485 | | } |
| | | 486 | | |
| | 529444 | 487 | | source = source.Slice(encodedIdx); |
| | 529444 | 488 | | bytesConsumed += skipped; |
| | | 489 | | |
| | 529444 | 490 | | if (bufferIdx == 0) |
| | | 491 | | { |
| | | 492 | | continue; |
| | | 493 | | } |
| | | 494 | | |
| | | 495 | | bool hasAnotherBlock; |
| | | 496 | | |
| | 529334 | 497 | | if (typeof(TBase64Decoder) == typeof(Base64DecoderByte)) |
| | | 498 | | { |
| | 529334 | 499 | | hasAnotherBlock = source.Length >= BlockSize; |
| | | 500 | | } |
| | | 501 | | else |
| | | 502 | | { |
| | 0 | 503 | | hasAnotherBlock = source.Length > 1; |
| | | 504 | | } |
| | | 505 | | |
| | 529334 | 506 | | bool localIsFinalBlock = !hasAnotherBlock; |
| | | 507 | | |
| | | 508 | | // If this block contains padding and there's another block, then only whitespace may follow for being v |
| | 529334 | 509 | | if (hasAnotherBlock) |
| | | 510 | | { |
| | 522826 | 511 | | int paddingCount = GetPaddingCount(decoder, ref buffer[BlockSize - 1]); |
| | 522826 | 512 | | if (paddingCount > 0) |
| | | 513 | | { |
| | 690 | 514 | | hasAnotherBlock = false; |
| | 690 | 515 | | localIsFinalBlock = true; |
| | | 516 | | } |
| | | 517 | | } |
| | | 518 | | |
| | 529334 | 519 | | if (localIsFinalBlock && !isFinalBlock) |
| | | 520 | | { |
| | 0 | 521 | | localIsFinalBlock = false; |
| | | 522 | | } |
| | | 523 | | |
| | 529334 | 524 | | status = DecodeFrom<TBase64Decoder, byte>(decoder, buffer.Slice(0, bufferIdx), bytes, out int localConsu |
| | 529334 | 525 | | bytesConsumed += localConsumed; |
| | 529334 | 526 | | bytesWritten += localWritten; |
| | | 527 | | |
| | 529334 | 528 | | if (status != OperationStatus.Done) |
| | | 529 | | { |
| | 7656 | 530 | | return status; |
| | | 531 | | } |
| | | 532 | | |
| | | 533 | | // The remaining data must all be whitespace in order to be valid. |
| | 521678 | 534 | | if (!hasAnotherBlock) |
| | | 535 | | { |
| | 43144 | 536 | | for (int i = 0; i < source.Length; ++i) |
| | | 537 | | { |
| | 20042 | 538 | | if (!IsWhiteSpace(source[i])) |
| | | 539 | | { |
| | | 540 | | // Revert previous dest increment, since an invalid state followed. |
| | 2292 | 541 | | bytesConsumed -= localConsumed; |
| | 2292 | 542 | | bytesWritten -= localWritten; |
| | | 543 | | |
| | 2292 | 544 | | return OperationStatus.InvalidData; |
| | | 545 | | } |
| | | 546 | | |
| | 17750 | 547 | | bytesConsumed++; |
| | | 548 | | } |
| | | 549 | | |
| | 1530 | 550 | | break; |
| | | 551 | | } |
| | | 552 | | |
| | 517856 | 553 | | bytes = bytes.Slice(localWritten); |
| | | 554 | | } |
| | | 555 | | |
| | 1640 | 556 | | return status; |
| | | 557 | | } |
| | | 558 | | |
| | | 559 | | internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TBase64Decoder decoder, ReadOnlySp |
| | | 560 | | where TBase64Decoder : IBase64Decoder<ushort> |
| | | 561 | | { |
| | | 562 | | const int BlockSize = 4; |
| | 11894 | 563 | | Span<ushort> buffer = stackalloc ushort[BlockSize]; |
| | 11894 | 564 | | OperationStatus status = OperationStatus.Done; |
| | | 565 | | |
| | 24910 | 566 | | while (!source.IsEmpty) |
| | | 567 | | { |
| | 24876 | 568 | | int encodedIdx = 0; |
| | 24876 | 569 | | int bufferIdx = 0; |
| | 24876 | 570 | | int skipped = 0; |
| | | 571 | | |
| | 383280 | 572 | | for (; encodedIdx < source.Length && (uint)bufferIdx < (uint)buffer.Length; ++encodedIdx) |
| | | 573 | | { |
| | 179202 | 574 | | if (IsWhiteSpace(source[encodedIdx])) |
| | | 575 | | { |
| | 81396 | 576 | | skipped++; |
| | | 577 | | } |
| | | 578 | | else |
| | | 579 | | { |
| | 97806 | 580 | | buffer[bufferIdx] = source[encodedIdx]; |
| | 97806 | 581 | | bufferIdx++; |
| | | 582 | | } |
| | | 583 | | } |
| | | 584 | | |
| | 24876 | 585 | | source = source.Slice(encodedIdx); |
| | 24876 | 586 | | bytesConsumed += skipped; |
| | | 587 | | |
| | 24876 | 588 | | if (bufferIdx == 0) |
| | | 589 | | { |
| | | 590 | | continue; |
| | | 591 | | } |
| | | 592 | | |
| | | 593 | | bool hasAnotherBlock; |
| | | 594 | | |
| | 24842 | 595 | | if (decoder is Base64DecoderByte) |
| | | 596 | | { |
| | 0 | 597 | | hasAnotherBlock = source.Length >= BlockSize; |
| | | 598 | | } |
| | | 599 | | else |
| | | 600 | | { |
| | 24842 | 601 | | hasAnotherBlock = source.Length > 1; |
| | | 602 | | } |
| | | 603 | | |
| | 24842 | 604 | | bool localIsFinalBlock = !hasAnotherBlock; |
| | | 605 | | |
| | | 606 | | // If this block contains padding and there's another block, then only whitespace may follow for being v |
| | 24842 | 607 | | if (hasAnotherBlock) |
| | | 608 | | { |
| | 23566 | 609 | | int paddingCount = GetPaddingCount(decoder, ref buffer[BlockSize - 1]); |
| | 23566 | 610 | | if (paddingCount > 0) |
| | | 611 | | { |
| | 84 | 612 | | hasAnotherBlock = false; |
| | 84 | 613 | | localIsFinalBlock = true; |
| | | 614 | | } |
| | | 615 | | } |
| | | 616 | | |
| | 24842 | 617 | | if (localIsFinalBlock && !isFinalBlock) |
| | | 618 | | { |
| | 0 | 619 | | localIsFinalBlock = false; |
| | | 620 | | } |
| | | 621 | | |
| | 24842 | 622 | | status = DecodeFrom(decoder, buffer.Slice(0, bufferIdx), bytes, out int localConsumed, out int localWrit |
| | 24842 | 623 | | bytesConsumed += localConsumed; |
| | 24842 | 624 | | bytesWritten += localWritten; |
| | | 625 | | |
| | 24842 | 626 | | if (status != OperationStatus.Done) |
| | | 627 | | { |
| | 11826 | 628 | | return status; |
| | | 629 | | } |
| | | 630 | | |
| | | 631 | | // The remaining data must all be whitespace in order to be valid. |
| | 13016 | 632 | | if (!hasAnotherBlock) |
| | | 633 | | { |
| | 192 | 634 | | for (int i = 0; i < source.Length; ++i) |
| | | 635 | | { |
| | 82 | 636 | | if (!IsWhiteSpace(source[i])) |
| | | 637 | | { |
| | | 638 | | // Revert previous dest increment, since an invalid state followed. |
| | 20 | 639 | | bytesConsumed -= localConsumed; |
| | 20 | 640 | | bytesWritten -= localWritten; |
| | | 641 | | |
| | 20 | 642 | | return OperationStatus.InvalidData; |
| | | 643 | | } |
| | | 644 | | } |
| | | 645 | | |
| | 14 | 646 | | bytesConsumed += source.Length; |
| | 14 | 647 | | break; |
| | | 648 | | } |
| | | 649 | | |
| | 12982 | 650 | | bytes = bytes.Slice(localWritten); |
| | | 651 | | } |
| | | 652 | | |
| | 48 | 653 | | return status; |
| | | 654 | | } |
| | | 655 | | |
| | | 656 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 657 | | private static int GetPaddingCount<TBase64Decoder>(TBase64Decoder decoder, ref byte ptrToLastElement) |
| | | 658 | | where TBase64Decoder : IBase64Decoder<byte> |
| | | 659 | | { |
| | 522826 | 660 | | int padding = 0; |
| | | 661 | | |
| | 522826 | 662 | | if (decoder.IsValidPadding(ptrToLastElement)) |
| | | 663 | | { |
| | 620 | 664 | | padding++; |
| | | 665 | | } |
| | | 666 | | |
| | 522826 | 667 | | if (decoder.IsValidPadding(Unsafe.Subtract(ref ptrToLastElement, 1))) |
| | | 668 | | { |
| | 264 | 669 | | padding++; |
| | | 670 | | } |
| | | 671 | | |
| | 522826 | 672 | | return padding; |
| | | 673 | | } |
| | | 674 | | |
| | | 675 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 676 | | private static int GetPaddingCount<TBase64Decoder>(TBase64Decoder decoder, ref ushort ptrToLastElement) |
| | | 677 | | where TBase64Decoder : IBase64Decoder<ushort> |
| | | 678 | | { |
| | 23566 | 679 | | int padding = 0; |
| | | 680 | | |
| | 23566 | 681 | | if (decoder.IsValidPadding(ptrToLastElement)) |
| | | 682 | | { |
| | 64 | 683 | | padding++; |
| | | 684 | | } |
| | | 685 | | |
| | 23566 | 686 | | if (decoder.IsValidPadding(Unsafe.Subtract(ref ptrToLastElement, 1))) |
| | | 687 | | { |
| | 62 | 688 | | padding++; |
| | | 689 | | } |
| | | 690 | | |
| | 23566 | 691 | | return padding; |
| | | 692 | | } |
| | | 693 | | |
| | | 694 | | private static OperationStatus DecodeWithWhiteSpaceFromUtf8InPlace<TBase64Decoder>(TBase64Decoder decoder, Span< |
| | | 695 | | where TBase64Decoder : IBase64Decoder<byte> |
| | | 696 | | { |
| | 11884 | 697 | | int BlockSize = Math.Min(source.Length - (int)sourceIndex, 4); |
| | 11884 | 698 | | Span<byte> buffer = stackalloc byte[BlockSize]; |
| | | 699 | | |
| | 11884 | 700 | | OperationStatus status = OperationStatus.Done; |
| | 11884 | 701 | | int localDestIndex = destIndex; |
| | 11884 | 702 | | bool hasPaddingBeenProcessed = false; |
| | 11884 | 703 | | int localBytesWritten = 0; |
| | | 704 | | |
| | 1036582 | 705 | | while (sourceIndex < (uint)source.Length) |
| | | 706 | | { |
| | 1034646 | 707 | | int bufferIdx = 0; |
| | | 708 | | |
| | 5808976 | 709 | | while (bufferIdx < BlockSize && sourceIndex < (uint)source.Length) |
| | | 710 | | { |
| | 4774330 | 711 | | if (!IsWhiteSpace(source[(int)sourceIndex])) |
| | | 712 | | { |
| | 4126794 | 713 | | buffer[bufferIdx] = source[(int)sourceIndex]; |
| | 4126794 | 714 | | bufferIdx++; |
| | | 715 | | } |
| | | 716 | | |
| | 4774330 | 717 | | sourceIndex++; |
| | | 718 | | } |
| | | 719 | | |
| | 1034646 | 720 | | if (bufferIdx == 0) |
| | | 721 | | { |
| | | 722 | | continue; |
| | | 723 | | } |
| | | 724 | | |
| | 1033920 | 725 | | if (bufferIdx != 4) |
| | | 726 | | { |
| | | 727 | | // Base64 require 4 bytes, for Base64Url it can be less than 4 bytes but not 1 byte. |
| | 4364 | 728 | | if (decoder is Base64DecoderByte || bufferIdx == 1) |
| | | 729 | | { |
| | 4364 | 730 | | status = OperationStatus.InvalidData; |
| | 4364 | 731 | | break; |
| | | 732 | | } |
| | | 733 | | else // For Base64Url fill empty slots in last block with padding |
| | | 734 | | { |
| | 0 | 735 | | while (bufferIdx < BlockSize) // Can happen only for last block |
| | | 736 | | { |
| | 0 | 737 | | Debug.Assert(source.Length == sourceIndex); |
| | 0 | 738 | | buffer[bufferIdx++] = (byte)EncodingPad; |
| | | 739 | | } |
| | | 740 | | } |
| | | 741 | | } |
| | | 742 | | |
| | 1029556 | 743 | | if (hasPaddingBeenProcessed) |
| | | 744 | | { |
| | | 745 | | // Padding has already been processed, a new valid block cannot be processed. |
| | | 746 | | // Revert previous dest increment, since an invalid state followed. |
| | 306 | 747 | | localDestIndex -= localBytesWritten; |
| | 306 | 748 | | status = OperationStatus.InvalidData; |
| | 306 | 749 | | break; |
| | | 750 | | } |
| | | 751 | | |
| | 1029250 | 752 | | status = DecodeFromUtf8InPlace<TBase64Decoder>(decoder, buffer, out localBytesWritten, ignoreWhiteSpace: |
| | 1029250 | 753 | | localDestIndex += localBytesWritten; |
| | 1029250 | 754 | | hasPaddingBeenProcessed = localBytesWritten < 3; |
| | | 755 | | |
| | 1029250 | 756 | | if (status != OperationStatus.Done) |
| | | 757 | | { |
| | | 758 | | break; |
| | | 759 | | } |
| | | 760 | | |
| | | 761 | | // Write result to source span in place. |
| | 8190696 | 762 | | for (int i = 0; i < localBytesWritten; i++) |
| | | 763 | | { |
| | 3071376 | 764 | | source[localDestIndex - localBytesWritten + i] = buffer[i]; |
| | | 765 | | } |
| | | 766 | | } |
| | | 767 | | |
| | 11884 | 768 | | destIndex = localDestIndex; |
| | 11884 | 769 | | return status; |
| | | 770 | | } |
| | | 771 | | |
| | | 772 | | #if NET |
| | | 773 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 774 | | [CompExactlyDependsOn(typeof(Avx512BW))] |
| | | 775 | | [CompExactlyDependsOn(typeof(Avx512Vbmi))] |
| | | 776 | | private static unsafe void Avx512Decode<TBase64Decoder, T>(TBase64Decoder decoder, ref T* srcBytes, ref byte* de |
| | | 777 | | where TBase64Decoder : IBase64Decoder<T> |
| | | 778 | | where T : unmanaged |
| | | 779 | | { |
| | | 780 | | // Reference for VBMI implementation : https://github.com/WojciechMula/base64simd/tree/master/decode |
| | | 781 | | // If we have AVX512 support, pick off 64 bytes at a time for as long as we can, |
| | | 782 | | // but make sure that we quit before seeing any == markers at the end of the |
| | | 783 | | // string. Also, because we write 16 zeroes at the end of the output, ensure |
| | | 784 | | // that there are at least 22 valid bytes of input data remaining to close the |
| | | 785 | | // gap. 64 + 2 + 22 = 88 bytes. |
| | 0 | 786 | | T* src = srcBytes; |
| | 0 | 787 | | byte* dest = destBytes; |
| | | 788 | | |
| | | 789 | | // The JIT won't hoist these "constants", so help it |
| | 0 | 790 | | Vector512<sbyte> vbmiLookup0 = Vector512.Create(decoder.VbmiLookup0).AsSByte(); |
| | 0 | 791 | | Vector512<sbyte> vbmiLookup1 = Vector512.Create(decoder.VbmiLookup1).AsSByte(); |
| | 0 | 792 | | Vector512<byte> vbmiPackedLanesControl = Vector512.Create( |
| | 0 | 793 | | 0x06000102, 0x090a0405, 0x0c0d0e08, 0x16101112, |
| | 0 | 794 | | 0x191a1415, 0x1c1d1e18, 0x26202122, 0x292a2425, |
| | 0 | 795 | | 0x2c2d2e28, 0x36303132, 0x393a3435, 0x3c3d3e38, |
| | 0 | 796 | | 0x00000000, 0x00000000, 0x00000000, 0x00000000).AsByte(); |
| | | 797 | | |
| | 0 | 798 | | Vector512<sbyte> mergeConstant0 = Vector512.Create(0x01400140).AsSByte(); |
| | 0 | 799 | | Vector512<short> mergeConstant1 = Vector512.Create(0x00011000).AsInt16(); |
| | | 800 | | |
| | | 801 | | // This algorithm requires AVX512VBMI support. |
| | | 802 | | // Vbmi was first introduced in CannonLake and is available from IceLake on. |
| | | 803 | | do |
| | | 804 | | { |
| | 0 | 805 | | if (!decoder.TryLoadVector512(src, srcStart, sourceLength, out Vector512<sbyte> str)) |
| | | 806 | | { |
| | | 807 | | break; |
| | | 808 | | } |
| | | 809 | | |
| | | 810 | | // Step 1: Translate encoded Base64 input to their original indices |
| | | 811 | | // This step also checks for invalid inputs and exits. |
| | | 812 | | // After this, we have indices which are verified to have upper 2 bits set to 0 in each byte. |
| | | 813 | | // origIndex = [...|00dddddd|00cccccc|00bbbbbb|00aaaaaa] |
| | 0 | 814 | | Vector512<sbyte> origIndex = Avx512Vbmi.PermuteVar64x8x2(vbmiLookup0, str, vbmiLookup1); |
| | 0 | 815 | | Vector512<sbyte> errorVec = (origIndex.AsInt32() | str.AsInt32()).AsSByte(); |
| | 0 | 816 | | if (errorVec.ExtractMostSignificantBits() != 0) |
| | | 817 | | { |
| | | 818 | | break; |
| | | 819 | | } |
| | | 820 | | |
| | | 821 | | // Step 2: Now we need to reshuffle bits to remove the 0 bits. |
| | | 822 | | // multiAdd1: [...|0000cccc|ccdddddd|0000aaaa|aabbbbbb] |
| | 0 | 823 | | Vector512<short> multiAdd1 = Avx512BW.MultiplyAddAdjacent(origIndex.AsByte(), mergeConstant0); |
| | | 824 | | // multiAdd1: [...|00000000|aaaaaabb|bbbbcccc|ccdddddd] |
| | 0 | 825 | | Vector512<int> multiAdd2 = Avx512BW.MultiplyAddAdjacent(multiAdd1, mergeConstant1); |
| | | 826 | | |
| | | 827 | | // Step 3: Pack 48 bytes |
| | 0 | 828 | | str = Avx512Vbmi.PermuteVar64x8(multiAdd2.AsByte(), vbmiPackedLanesControl).AsSByte(); |
| | | 829 | | |
| | 0 | 830 | | AssertWrite<Vector512<sbyte>>(dest, destStart, destLength); |
| | 0 | 831 | | str.Store((sbyte*)dest); |
| | 0 | 832 | | src += 64; |
| | 0 | 833 | | dest += 48; |
| | | 834 | | } |
| | 0 | 835 | | while (src <= srcEnd); |
| | | 836 | | |
| | 0 | 837 | | srcBytes = src; |
| | 0 | 838 | | destBytes = dest; |
| | 0 | 839 | | } |
| | | 840 | | |
| | | 841 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 842 | | [CompExactlyDependsOn(typeof(Avx2))] |
| | | 843 | | private static unsafe void Avx2Decode<TBase64Decoder, T>(TBase64Decoder decoder, ref T* srcBytes, ref byte* dest |
| | | 844 | | where TBase64Decoder : IBase64Decoder<T> |
| | | 845 | | where T : unmanaged |
| | | 846 | | { |
| | | 847 | | // If we have AVX2 support, pick off 32 bytes at a time for as long as we can, |
| | | 848 | | // but make sure that we quit before seeing any == markers at the end of the |
| | | 849 | | // string. Also, because we write 8 zeroes at the end of the output, ensure |
| | | 850 | | // that there are at least 11 valid bytes of input data remaining to close the |
| | | 851 | | // gap. 32 + 2 + 11 = 45 bytes. |
| | | 852 | | |
| | | 853 | | // See SSSE3-version below for an explanation of how the code works. |
| | | 854 | | |
| | | 855 | | // The JIT won't hoist these "constants", so help it |
| | 604658 | 856 | | Vector256<sbyte> lutHi = Vector256.Create(decoder.Avx2LutHigh); |
| | | 857 | | |
| | 604658 | 858 | | Vector256<sbyte> lutLo = Vector256.Create(decoder.Avx2LutLow); |
| | | 859 | | |
| | 604658 | 860 | | Vector256<sbyte> lutShift = Vector256.Create(decoder.Avx2LutShift); |
| | | 861 | | |
| | 604658 | 862 | | Vector256<sbyte> packBytesInLaneMask = Vector256.Create( |
| | 604658 | 863 | | 2, 1, 0, 6, |
| | 604658 | 864 | | 5, 4, 10, 9, |
| | 604658 | 865 | | 8, 14, 13, 12, |
| | 604658 | 866 | | -1, -1, -1, -1, |
| | 604658 | 867 | | 2, 1, 0, 6, |
| | 604658 | 868 | | 5, 4, 10, 9, |
| | 604658 | 869 | | 8, 14, 13, 12, |
| | 604658 | 870 | | -1, -1, -1, -1); |
| | | 871 | | |
| | 604658 | 872 | | Vector256<int> packLanesControl = Vector256.Create( |
| | 604658 | 873 | | 0, 0, 0, 0, |
| | 604658 | 874 | | 1, 0, 0, 0, |
| | 604658 | 875 | | 2, 0, 0, 0, |
| | 604658 | 876 | | 4, 0, 0, 0, |
| | 604658 | 877 | | 5, 0, 0, 0, |
| | 604658 | 878 | | 6, 0, 0, 0, |
| | 604658 | 879 | | -1, -1, -1, -1, |
| | 604658 | 880 | | -1, -1, -1, -1).AsInt32(); |
| | | 881 | | |
| | 604658 | 882 | | Vector256<sbyte> maskSlashOrUnderscore = Vector256.Create((sbyte)decoder.MaskSlashOrUnderscore); |
| | 604658 | 883 | | Vector256<sbyte> shiftForUnderscore = Vector256.Create((sbyte)33); |
| | 604658 | 884 | | Vector256<sbyte> mergeConstant0 = Vector256.Create(0x01400140).AsSByte(); |
| | 604658 | 885 | | Vector256<short> mergeConstant1 = Vector256.Create(0x00011000).AsInt16(); |
| | | 886 | | |
| | 604658 | 887 | | T* src = srcBytes; |
| | 604658 | 888 | | byte* dest = destBytes; |
| | | 889 | | |
| | | 890 | | //while (remaining >= 45) |
| | | 891 | | do |
| | | 892 | | { |
| | 4006736 | 893 | | if (!decoder.TryLoadAvxVector256(src, srcStart, sourceLength, out Vector256<sbyte> str)) |
| | | 894 | | { |
| | | 895 | | break; |
| | | 896 | | } |
| | | 897 | | |
| | 3998464 | 898 | | Vector256<sbyte> hiNibbles = ((str.AsInt32()) >>> 4).AsSByte() & maskSlashOrUnderscore; |
| | | 899 | | |
| | 3998464 | 900 | | if (!decoder.TryDecode256Core(str, hiNibbles, maskSlashOrUnderscore, lutLo, lutHi, lutShift, shiftForUnd |
| | | 901 | | { |
| | | 902 | | break; |
| | | 903 | | } |
| | | 904 | | |
| | | 905 | | // in, lower lane, bits, upper case are most significant bits, lower case are least significant bits: |
| | | 906 | | // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ |
| | | 907 | | // 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG |
| | | 908 | | // 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD |
| | | 909 | | // 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA |
| | | 910 | | |
| | 3541356 | 911 | | Vector256<short> merge_ab_and_bc = Avx2.MultiplyAddAdjacent(str.AsByte(), mergeConstant0); |
| | | 912 | | // 0000kkkk LLllllll 0000JJJJ JJjjKKKK |
| | | 913 | | // 0000hhhh IIiiiiii 0000GGGG GGggHHHH |
| | | 914 | | // 0000eeee FFffffff 0000DDDD DDddEEEE |
| | | 915 | | // 0000bbbb CCcccccc 0000AAAA AAaaBBBB |
| | | 916 | | |
| | 3541356 | 917 | | Vector256<int> output = Avx2.MultiplyAddAdjacent(merge_ab_and_bc, mergeConstant1); |
| | | 918 | | // 00000000 JJJJJJjj KKKKkkkk LLllllll |
| | | 919 | | // 00000000 GGGGGGgg HHHHhhhh IIiiiiii |
| | | 920 | | // 00000000 DDDDDDdd EEEEeeee FFffffff |
| | | 921 | | // 00000000 AAAAAAaa BBBBbbbb CCcccccc |
| | | 922 | | |
| | | 923 | | // Pack bytes together in each lane: |
| | 3541356 | 924 | | output = Avx2.Shuffle(output.AsSByte(), packBytesInLaneMask).AsInt32(); |
| | | 925 | | // 00000000 00000000 00000000 00000000 |
| | | 926 | | // LLllllll KKKKkkkk JJJJJJjj IIiiiiii |
| | | 927 | | // HHHHhhhh GGGGGGgg FFffffff EEEEeeee |
| | | 928 | | // DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa |
| | | 929 | | |
| | | 930 | | // Pack lanes |
| | 3541356 | 931 | | str = Avx2.PermuteVar8x32(output, packLanesControl).AsSByte(); |
| | | 932 | | |
| | 3541356 | 933 | | AssertWrite<Vector256<sbyte>>(dest, destStart, destLength); |
| | 3541356 | 934 | | Avx.Store(dest, str.AsByte()); |
| | | 935 | | |
| | 3541356 | 936 | | src += 32; |
| | 3541356 | 937 | | dest += 24; |
| | | 938 | | } |
| | 3541356 | 939 | | while (src <= srcEnd); |
| | | 940 | | |
| | 604658 | 941 | | srcBytes = src; |
| | 604658 | 942 | | destBytes = dest; |
| | 604658 | 943 | | } |
| | | 944 | | |
| | | 945 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 946 | | [CompExactlyDependsOn(typeof(Ssse3))] |
| | | 947 | | [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] |
| | | 948 | | internal static Vector128<byte> SimdShuffle(Vector128<byte> left, Vector128<byte> right, Vector128<byte> mask8F) |
| | | 949 | | { |
| | 1781084 | 950 | | Debug.Assert((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian); |
| | | 951 | | |
| | 1781084 | 952 | | if (Ssse3.IsSupported) |
| | | 953 | | { |
| | 1781084 | 954 | | return Ssse3.Shuffle(left, right); |
| | | 955 | | } |
| | | 956 | | else |
| | | 957 | | { |
| | 0 | 958 | | return AdvSimd.Arm64.VectorTableLookup(left, right & mask8F); |
| | | 959 | | } |
| | | 960 | | } |
| | | 961 | | |
| | | 962 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 963 | | [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] |
| | | 964 | | private static unsafe void AdvSimdDecode<TBase64Decoder, T>(TBase64Decoder decoder, ref T* srcBytes, ref byte* d |
| | | 965 | | where TBase64Decoder : IBase64Decoder<T> |
| | | 966 | | where T : unmanaged |
| | | 967 | | { |
| | | 968 | | // C# implementation of https://github.com/aklomp/base64/blob/3a5add8652076612a8407627a42c768736a4263f/lib/a |
| | | 969 | | // If we have AdvSimd support, pick off 64 bytes at a time for as long as we can, |
| | | 970 | | // but make sure that we quit before seeing any == markers at the end of the |
| | | 971 | | // string. 64 + 2 = 66 bytes. |
| | | 972 | | |
| | | 973 | | // In the decoding process, we want to map each byte, representing a Base64 value, to its 6-bit (0-63) repre |
| | | 974 | | // It uses the following mapping. Values outside the following groups are invalid and, we abort decoding whe |
| | | 975 | | // |
| | | 976 | | // # From To Char |
| | | 977 | | // 1 [43] [62] + |
| | | 978 | | // 2 [47] [63] / |
| | | 979 | | // 3 [48..57] [52..61] 0..9 |
| | | 980 | | // 4 [65..90] [0..25] A..Z |
| | | 981 | | // 5 [97..122] [26..51] a..z |
| | | 982 | | // |
| | | 983 | | // To map an input value to its Base64 representation, we use look-up tables 'decLutOne' and 'decLutTwo'. |
| | | 984 | | // 'decLutOne' helps to map groups 1, 2 and 3 while 'decLutTwo' maps groups 4 and 5 in the above list. |
| | | 985 | | // After mapping, each value falls between 0-63. Consequently, the last six bits of each byte now hold a val |
| | | 986 | | // We then compress four such bytes (with valid 4 * 6 = 24 bits) to three UTF8 bytes (3 * 8 = 24 bits). |
| | | 987 | | // For faster decoding, we use SIMD operations that allow the processing of multiple bytes together. |
| | | 988 | | // However, the compress operation on adjacent values of a vector could be slower. Thus, we de-interleave wh |
| | | 989 | | // the input bytes that store adjacent bytes in separate vectors. This later simplifies the compress step wi |
| | | 990 | | // of logical operations. This requires interleaving while storing the decoded result. |
| | | 991 | | |
| | | 992 | | // Values in 'decLutOne' maps input values from 0 to 63. |
| | | 993 | | // 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 |
| | | 994 | | // 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 |
| | | 995 | | // 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63 |
| | | 996 | | // 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255 |
| | | 997 | | var decLutOne = (Vector128<byte>.AllBitsSet, |
| | | 998 | | Vector128<byte>.AllBitsSet, |
| | | 999 | | Vector128.Create(decoder.AdvSimdLutOne3).AsByte(), |
| | | 1000 | | Vector128.Create(0x37363534, 0x3B3A3938, 0xFFFF3D3C, 0xFFFFFFFF).AsByte()); |
| | | 1001 | | |
| | | 1002 | | // Values in 'decLutTwo' maps input values from 63 to 127. |
| | | 1003 | | // 0, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 |
| | | 1004 | | // 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255 |
| | | 1005 | | // 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 |
| | | 1006 | | // 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255 |
| | | 1007 | | var decLutTwo = (Vector128.Create(0x0100FF00, 0x05040302, 0x09080706, 0x0D0C0B0A).AsByte(), |
| | | 1008 | | Vector128.Create(0x11100F0E, 0x15141312, 0x19181716, 0xFFFFFFFF).AsByte(), |
| | | 1009 | | Vector128.Create(decoder.AdvSimdLutTwo3Uint1, 0x1F1E1D1C, 0x23222120, 0x27262524).AsByte(), |
| | | 1010 | | Vector128.Create(0x2B2A2928, 0x2F2E2D2C, 0x33323130, 0xFFFFFFFF).AsByte()); |
| | | 1011 | | |
| | | 1012 | | T* src = srcBytes; |
| | | 1013 | | byte* dest = destBytes; |
| | | 1014 | | Vector128<byte> offset = Vector128.Create<byte>(63); |
| | | 1015 | | |
| | | 1016 | | do |
| | | 1017 | | { |
| | | 1018 | | // Step 1: Load 64 bytes and de-interleave. |
| | | 1019 | | if (!decoder.TryLoadArmVector128x4(src, srcStart, sourceLength, |
| | | 1020 | | out Vector128<byte> str1, out Vector128<byte> str2, out Vector128<byte> str3, out Vector128<byte> st |
| | | 1021 | | { |
| | | 1022 | | break; |
| | | 1023 | | } |
| | | 1024 | | |
| | | 1025 | | // Step 2: Map each valid input to its Base64 value. |
| | | 1026 | | // We use two look-ups to compute partial results and combine them later. |
| | | 1027 | | |
| | | 1028 | | // Step 2.1: Detect valid Base64 values from the first three groups. Maps input as, |
| | | 1029 | | // 0 to 63 (Invalid) => 255 |
| | | 1030 | | // 0 to 63 (Valid) => Their Base64 equivalent |
| | | 1031 | | // 64 to 255 => 0 |
| | | 1032 | | |
| | | 1033 | | // Each input value acts as an index in the look-up table 'decLutOne'. |
| | | 1034 | | // e.g., for group 1: index 43 maps to 62 (Base64 '+'). |
| | | 1035 | | // Group 4 and 5 values are out-of-range (>64), so they are mapped to zero. |
| | | 1036 | | // Other valid indices but invalid values are mapped to 255. |
| | | 1037 | | Vector128<byte> decOne1 = AdvSimd.Arm64.VectorTableLookup(decLutOne, str1); |
| | | 1038 | | Vector128<byte> decOne2 = AdvSimd.Arm64.VectorTableLookup(decLutOne, str2); |
| | | 1039 | | Vector128<byte> decOne3 = AdvSimd.Arm64.VectorTableLookup(decLutOne, str3); |
| | | 1040 | | Vector128<byte> decOne4 = AdvSimd.Arm64.VectorTableLookup(decLutOne, str4); |
| | | 1041 | | |
| | | 1042 | | // Step 2.2: Detect valid Base64 values from groups 4 and 5. Maps input as, |
| | | 1043 | | // 0 to 63 => 0 |
| | | 1044 | | // 64 to 122 (Valid) => Their Base64 equivalent |
| | | 1045 | | // 64 to 122 (Invalid) => 255 |
| | | 1046 | | // 123 to 255 => Remains unchanged |
| | | 1047 | | |
| | | 1048 | | // Subtract/offset each input value by 63 so that it can be used as a valid offset. |
| | | 1049 | | // Subtract saturate makes values from the first three groups set to zero that are |
| | | 1050 | | // then mapped to zero in the subsequent look-up. |
| | | 1051 | | Vector128<byte> decTwo1 = AdvSimd.SubtractSaturate(str1, offset); |
| | | 1052 | | Vector128<byte> decTwo2 = AdvSimd.SubtractSaturate(str2, offset); |
| | | 1053 | | Vector128<byte> decTwo3 = AdvSimd.SubtractSaturate(str3, offset); |
| | | 1054 | | Vector128<byte> decTwo4 = AdvSimd.SubtractSaturate(str4, offset); |
| | | 1055 | | |
| | | 1056 | | // We use VTBX to map values where out-of-range indices are unchanged. |
| | | 1057 | | decTwo1 = AdvSimd.Arm64.VectorTableLookupExtension(decTwo1, decLutTwo, decTwo1); |
| | | 1058 | | decTwo2 = AdvSimd.Arm64.VectorTableLookupExtension(decTwo2, decLutTwo, decTwo2); |
| | | 1059 | | decTwo3 = AdvSimd.Arm64.VectorTableLookupExtension(decTwo3, decLutTwo, decTwo3); |
| | | 1060 | | decTwo4 = AdvSimd.Arm64.VectorTableLookupExtension(decTwo4, decLutTwo, decTwo4); |
| | | 1061 | | |
| | | 1062 | | // Step 3: Combine the partial result. |
| | | 1063 | | // Each look-up above maps valid values to their Base64 equivalent or zero. |
| | | 1064 | | // Thus the intermediate results 'decOne' and 'decTwo' could be OR-ed to get final values. |
| | | 1065 | | str1 = (decOne1 | decTwo1); |
| | | 1066 | | str2 = (decOne2 | decTwo2); |
| | | 1067 | | str3 = (decOne3 | decTwo3); |
| | | 1068 | | str4 = (decOne4 | decTwo4); |
| | | 1069 | | |
| | | 1070 | | // Step 4: Detect an invalid input value. |
| | | 1071 | | // Invalid values < 122 are set to 255 while the ones above 122 are unchanged. |
| | | 1072 | | // Check for invalid input, any value larger than 63. |
| | | 1073 | | Vector128<byte> classified = (Vector128.GreaterThan(str1, offset) |
| | | 1074 | | | Vector128.GreaterThan(str2, offset) |
| | | 1075 | | | Vector128.GreaterThan(str3, offset) |
| | | 1076 | | | Vector128.GreaterThan(str4, offset)); |
| | | 1077 | | |
| | | 1078 | | // Check that all bits are zero. |
| | | 1079 | | if (classified != Vector128<byte>.Zero) |
| | | 1080 | | { |
| | | 1081 | | break; |
| | | 1082 | | } |
| | | 1083 | | |
| | | 1084 | | // Step 5: Compress four bytes into three. |
| | | 1085 | | Vector128<byte> res1 = ((str1 << 2) | (str2 >> 4)); |
| | | 1086 | | Vector128<byte> res2 = ((str2 << 4) | (str3 >> 2)); |
| | | 1087 | | Vector128<byte> res3 = ((str3 << 6) | str4); |
| | | 1088 | | |
| | | 1089 | | // Step 6: Interleave and store decoded results. |
| | | 1090 | | AssertWrite<Vector128<byte>>(dest, destStart, destLength); |
| | | 1091 | | AdvSimd.Arm64.StoreVectorAndZip(dest, (res1, res2, res3)); |
| | | 1092 | | |
| | | 1093 | | src += 64; |
| | | 1094 | | dest += 48; |
| | | 1095 | | } |
| | | 1096 | | while (src <= srcEnd); |
| | | 1097 | | |
| | | 1098 | | srcBytes = src; |
| | | 1099 | | destBytes = dest; |
| | | 1100 | | } |
| | | 1101 | | |
| | | 1102 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1103 | | [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] |
| | | 1104 | | [CompExactlyDependsOn(typeof(Ssse3))] |
| | | 1105 | | private static unsafe void Vector128Decode<TBase64Decoder, T>(TBase64Decoder decoder, ref T* srcBytes, ref byte* |
| | | 1106 | | where TBase64Decoder : IBase64Decoder<T> |
| | | 1107 | | where T : unmanaged |
| | | 1108 | | { |
| | | 1109 | | Debug.Assert((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian); |
| | | 1110 | | |
| | | 1111 | | // If we have Vector128 support, pick off 16 bytes at a time for as long as we can, |
| | | 1112 | | // but make sure that we quit before seeing any == markers at the end of the |
| | | 1113 | | // string. Also, because we write four zeroes at the end of the output, ensure |
| | | 1114 | | // that there are at least 6 valid bytes of input data remaining to close the |
| | | 1115 | | // gap. 16 + 2 + 6 = 24 bytes. |
| | | 1116 | | |
| | | 1117 | | // The input consists of six character sets in the Base64 alphabet, |
| | | 1118 | | // which we need to map back to the 6-bit values they represent. |
| | | 1119 | | // There are three ranges, two singles, and then there's the rest. |
| | | 1120 | | // |
| | | 1121 | | // # From To Add Characters |
| | | 1122 | | // 1 [43] [62] +19 + |
| | | 1123 | | // 2 [47] [63] +16 / |
| | | 1124 | | // 3 [48..57] [52..61] +4 0..9 |
| | | 1125 | | // 4 [65..90] [0..25] -65 A..Z |
| | | 1126 | | // 5 [97..122] [26..51] -71 a..z |
| | | 1127 | | // (6) Everything else => invalid input |
| | | 1128 | | |
| | | 1129 | | // We will use LUTS for character validation & offset computation |
| | | 1130 | | // Remember that 0x2X and 0x0X are the same index for _mm_shuffle_epi8, |
| | | 1131 | | // this allows to mask with 0x2F instead of 0x0F and thus save one constant declaration (register and/or mem |
| | | 1132 | | |
| | | 1133 | | // For offsets: |
| | | 1134 | | // Perfect hash for lut = ((src>>4)&0x2F)+((src==0x2F)?0xFF:0x00) |
| | | 1135 | | // 0000 = garbage |
| | | 1136 | | // 0001 = / |
| | | 1137 | | // 0010 = + |
| | | 1138 | | // 0011 = 0-9 |
| | | 1139 | | // 0100 = A-Z |
| | | 1140 | | // 0101 = A-Z |
| | | 1141 | | // 0110 = a-z |
| | | 1142 | | // 0111 = a-z |
| | | 1143 | | // 1000 >= garbage |
| | | 1144 | | |
| | | 1145 | | // For validation, here's the table. |
| | | 1146 | | // A character is valid if and only if the AND of the 2 lookups equals 0: |
| | | 1147 | | |
| | | 1148 | | // hi \ lo 0000 0001 0010 0011 0100 0101 0110 0111 1000 1001 1010 1011 1100 1101 1110 1111 |
| | | 1149 | | // LUT 0x15 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x11 0x13 0x1A 0x1B 0x1B 0x1B 0x1A |
| | | 1150 | | |
| | | 1151 | | // 0000 0X10 char NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI |
| | | 1152 | | // andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1153 | | |
| | | 1154 | | // 0001 0x10 char DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US |
| | | 1155 | | // andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1156 | | |
| | | 1157 | | // 0010 0x01 char ! " # $ % & ' ( ) * + , - . / |
| | | 1158 | | // andlut 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x01 0x00 0x01 0x01 0x01 0x00 |
| | | 1159 | | |
| | | 1160 | | // 0011 0x02 char 0 1 2 3 4 5 6 7 8 9 : ; < = > ? |
| | | 1161 | | // andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x02 0x02 0x02 0x02 0x02 0x02 |
| | | 1162 | | |
| | | 1163 | | // 0100 0x04 char @ A B C D E F G H I J K L M N 0 |
| | | 1164 | | // andlut 0x04 0x00 0x00 0x00 0X00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
| | | 1165 | | |
| | | 1166 | | // 0101 0x08 char P Q R S T U V W X Y Z [ \ ] ^ _ |
| | | 1167 | | // andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08 |
| | | 1168 | | |
| | | 1169 | | // 0110 0x04 char ` a b c d e f g h i j k l m n o |
| | | 1170 | | // andlut 0x04 0x00 0x00 0x00 0X00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 |
| | | 1171 | | // 0111 0X08 char p q r s t u v w x y z { | } ~ |
| | | 1172 | | // andlut 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x08 0x08 0x08 0x08 0x08 |
| | | 1173 | | |
| | | 1174 | | // 1000 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1175 | | // 1001 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1176 | | // 1010 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1177 | | // 1011 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1178 | | // 1100 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1179 | | // 1101 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1180 | | // 1110 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1181 | | // 1111 0x10 andlut 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 0x10 |
| | | 1182 | | |
| | | 1183 | | // The JIT won't hoist these "constants", so help it |
| | 592936 | 1184 | | Vector128<byte> lutHi = Vector128.Create(decoder.Vector128LutHigh).AsByte(); |
| | 592936 | 1185 | | Vector128<byte> lutLo = Vector128.Create(decoder.Vector128LutLow).AsByte(); |
| | 592936 | 1186 | | Vector128<sbyte> lutShift = Vector128.Create(decoder.Vector128LutShift).AsSByte(); |
| | 592936 | 1187 | | Vector128<sbyte> packBytesMask = Vector128.Create(0x06000102, 0x090A0405, 0x0C0D0E08, 0xffffffff).AsSByte(); |
| | 592936 | 1188 | | Vector128<byte> mergeConstant0 = Vector128.Create(0x01400140).AsByte(); |
| | 592936 | 1189 | | Vector128<short> mergeConstant1 = Vector128.Create(0x00011000).AsInt16(); |
| | 592936 | 1190 | | Vector128<byte> one = Vector128<byte>.One; |
| | 592936 | 1191 | | Vector128<byte> mask2F = Vector128.Create(decoder.MaskSlashOrUnderscore); |
| | 592936 | 1192 | | Vector128<byte> mask8F = Vector128.Create((byte)0x8F); |
| | 592936 | 1193 | | Vector128<byte> shiftForUnderscore = Vector128.Create((byte)33); |
| | 592936 | 1194 | | T* src = srcBytes; |
| | 592936 | 1195 | | byte* dest = destBytes; |
| | | 1196 | | |
| | | 1197 | | //while (remaining >= 24) |
| | | 1198 | | do |
| | | 1199 | | { |
| | 648714 | 1200 | | if (!decoder.TryLoadVector128(src, srcStart, sourceLength, out Vector128<byte> str)) |
| | | 1201 | | { |
| | | 1202 | | break; |
| | | 1203 | | } |
| | | 1204 | | |
| | | 1205 | | // lookup |
| | 639426 | 1206 | | Vector128<byte> hiNibbles = Vector128.ShiftRightLogical(str.AsInt32(), 4).AsByte() & mask2F; |
| | | 1207 | | |
| | 639426 | 1208 | | if (!decoder.TryDecode128Core(str, hiNibbles, mask2F, mask8F, lutLo, lutHi, lutShift, shiftForUnderscore |
| | | 1209 | | { |
| | | 1210 | | break; |
| | | 1211 | | } |
| | | 1212 | | |
| | | 1213 | | // in, bits, upper case are most significant bits, lower case are least significant bits |
| | | 1214 | | // 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ |
| | | 1215 | | // 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG |
| | | 1216 | | // 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD |
| | | 1217 | | // 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA |
| | | 1218 | | |
| | | 1219 | | Vector128<short> merge_ab_and_bc; |
| | 169468 | 1220 | | if (Ssse3.IsSupported) |
| | | 1221 | | { |
| | 169468 | 1222 | | merge_ab_and_bc = Ssse3.MultiplyAddAdjacent(str.AsByte(), mergeConstant0.AsSByte()); |
| | | 1223 | | } |
| | | 1224 | | else if (AdvSimd.Arm64.IsSupported) |
| | | 1225 | | { |
| | | 1226 | | Vector128<ushort> evens = AdvSimd.ShiftLeftLogicalWideningLower(AdvSimd.Arm64.UnzipEven(str, one).Ge |
| | | 1227 | | Vector128<ushort> odds = AdvSimd.Arm64.TransposeOdd(str, Vector128<byte>.Zero).AsUInt16(); |
| | | 1228 | | merge_ab_and_bc = Vector128.Add(evens, odds).AsInt16(); |
| | | 1229 | | } |
| | | 1230 | | else |
| | | 1231 | | { |
| | | 1232 | | // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are l |
| | 0 | 1233 | | ThrowUnreachableException(); |
| | | 1234 | | merge_ab_and_bc = default; |
| | | 1235 | | } |
| | | 1236 | | // 0000kkkk LLllllll 0000JJJJ JJjjKKKK |
| | | 1237 | | // 0000hhhh IIiiiiii 0000GGGG GGggHHHH |
| | | 1238 | | // 0000eeee FFffffff 0000DDDD DDddEEEE |
| | | 1239 | | // 0000bbbb CCcccccc 0000AAAA AAaaBBBB |
| | | 1240 | | |
| | | 1241 | | Vector128<int> output; |
| | 169468 | 1242 | | if (Ssse3.IsSupported) |
| | | 1243 | | { |
| | 169468 | 1244 | | output = Sse2.MultiplyAddAdjacent(merge_ab_and_bc, mergeConstant1); |
| | | 1245 | | } |
| | | 1246 | | else if (AdvSimd.Arm64.IsSupported) |
| | | 1247 | | { |
| | | 1248 | | Vector128<int> ievens = AdvSimd.ShiftLeftLogicalWideningLower(AdvSimd.Arm64.UnzipEven(merge_ab_and_b |
| | | 1249 | | Vector128<int> iodds = AdvSimd.Arm64.TransposeOdd(merge_ab_and_bc, Vector128<short>.Zero).AsInt32(); |
| | | 1250 | | output = Vector128.Add(ievens, iodds).AsInt32(); |
| | | 1251 | | } |
| | | 1252 | | else |
| | | 1253 | | { |
| | | 1254 | | // We explicitly recheck each IsSupported query to ensure that the trimmer can see which paths are l |
| | 0 | 1255 | | ThrowUnreachableException(); |
| | | 1256 | | output = default; |
| | | 1257 | | } |
| | | 1258 | | // 00000000 JJJJJJjj KKKKkkkk LLllllll |
| | | 1259 | | // 00000000 GGGGGGgg HHHHhhhh IIiiiiii |
| | | 1260 | | // 00000000 DDDDDDdd EEEEeeee FFffffff |
| | | 1261 | | // 00000000 AAAAAAaa BBBBbbbb CCcccccc |
| | | 1262 | | |
| | | 1263 | | // Pack bytes together: |
| | 169468 | 1264 | | str = SimdShuffle(output.AsByte(), packBytesMask.AsByte(), mask8F); |
| | | 1265 | | // 00000000 00000000 00000000 00000000 |
| | | 1266 | | // LLllllll KKKKkkkk JJJJJJjj IIiiiiii |
| | | 1267 | | // HHHHhhhh GGGGGGgg FFffffff EEEEeeee |
| | | 1268 | | // DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa |
| | | 1269 | | |
| | 169468 | 1270 | | AssertWrite<Vector128<sbyte>>(dest, destStart, destLength); |
| | 169468 | 1271 | | str.Store(dest); |
| | | 1272 | | |
| | 169468 | 1273 | | src += 16; |
| | 169468 | 1274 | | dest += 12; |
| | | 1275 | | } |
| | 169468 | 1276 | | while (src <= srcEnd); |
| | | 1277 | | |
| | 592936 | 1278 | | srcBytes = src; |
| | 592936 | 1279 | | destBytes = dest; |
| | 592936 | 1280 | | } |
| | | 1281 | | #endif |
| | | 1282 | | |
| | | 1283 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1284 | | private static unsafe void WriteThreeLowOrderBytes(byte* destination, int value) |
| | | 1285 | | { |
| | 5147776 | 1286 | | destination[0] = (byte)(value >> 16); |
| | 5147776 | 1287 | | destination[1] = (byte)(value >> 8); |
| | 5147776 | 1288 | | destination[2] = (byte)value; |
| | 5147776 | 1289 | | } |
| | | 1290 | | |
| | | 1291 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1292 | | internal static bool IsWhiteSpace(int value) |
| | | 1293 | | { |
| | 9760304 | 1294 | | Debug.Assert(value >= 0 && value <= ushort.MaxValue); |
| | | 1295 | | uint charMinusLowUInt32; |
| | 9760304 | 1296 | | return (int)((0xC8000100U << (short)(charMinusLowUInt32 = (ushort)(value - '\t'))) & (charMinusLowUInt32 - 3 |
| | | 1297 | | } |
| | | 1298 | | |
| | | 1299 | | internal readonly struct Base64DecoderByte : IBase64Decoder<byte> |
| | | 1300 | | { |
| | | 1301 | | // Pre-computing this table using a custom string(s_characters) and GenerateDecodingMapAndVerify (found in t |
| | | 1302 | | public ReadOnlySpan<sbyte> DecodingMap => |
| | 2256764 | 1303 | | [ |
| | 2256764 | 1304 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1305 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1306 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, //62 is placed at index 43 ( |
| | 2256764 | 1307 | | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, //52-61 are placed at index |
| | 2256764 | 1308 | | -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
| | 2256764 | 1309 | | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, //0-25 are placed at index 6 |
| | 2256764 | 1310 | | -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
| | 2256764 | 1311 | | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, //26-51 are placed at index |
| | 2256764 | 1312 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bytes over 122 ('z') are |
| | 2256764 | 1313 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Hence, padding the map wi |
| | 2256764 | 1314 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1315 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1316 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1317 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1318 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1319 | | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| | 2256764 | 1320 | | ]; |
| | | 1321 | | |
| | | 1322 | | public ReadOnlySpan<uint> VbmiLookup0 => |
| | 0 | 1323 | | [ |
| | 0 | 1324 | | 0x80808080, 0x80808080, 0x80808080, 0x80808080, |
| | 0 | 1325 | | 0x80808080, 0x80808080, 0x80808080, 0x80808080, |
| | 0 | 1326 | | 0x80808080, 0x80808080, 0x3e808080, 0x3f808080, |
| | 0 | 1327 | | 0x37363534, 0x3b3a3938, 0x80803d3c, 0x80808080 |
| | 0 | 1328 | | ]; |
| | | 1329 | | |
| | | 1330 | | public ReadOnlySpan<uint> VbmiLookup1 => |
| | 0 | 1331 | | [ |
| | 0 | 1332 | | 0x02010080, 0x06050403, 0x0a090807, 0x0e0d0c0b, |
| | 0 | 1333 | | 0x1211100f, 0x16151413, 0x80191817, 0x80808080, |
| | 0 | 1334 | | 0x1c1b1a80, 0x201f1e1d, 0x24232221, 0x28272625, |
| | 0 | 1335 | | 0x2c2b2a29, 0x302f2e2d, 0x80333231, 0x80808080 |
| | 0 | 1336 | | ]; |
| | | 1337 | | |
| | | 1338 | | public ReadOnlySpan<sbyte> Avx2LutHigh => |
| | 604658 | 1339 | | [ |
| | 604658 | 1340 | | 0x10, 0x10, 0x01, 0x02, |
| | 604658 | 1341 | | 0x04, 0x08, 0x04, 0x08, |
| | 604658 | 1342 | | 0x10, 0x10, 0x10, 0x10, |
| | 604658 | 1343 | | 0x10, 0x10, 0x10, 0x10, |
| | 604658 | 1344 | | 0x10, 0x10, 0x01, 0x02, |
| | 604658 | 1345 | | 0x04, 0x08, 0x04, 0x08, |
| | 604658 | 1346 | | 0x10, 0x10, 0x10, 0x10, |
| | 604658 | 1347 | | 0x10, 0x10, 0x10, 0x10 |
| | 604658 | 1348 | | ]; |
| | | 1349 | | |
| | | 1350 | | public ReadOnlySpan<sbyte> Avx2LutLow => |
| | 604658 | 1351 | | [ |
| | 604658 | 1352 | | 0x15, 0x11, 0x11, 0x11, |
| | 604658 | 1353 | | 0x11, 0x11, 0x11, 0x11, |
| | 604658 | 1354 | | 0x11, 0x11, 0x13, 0x1A, |
| | 604658 | 1355 | | 0x1B, 0x1B, 0x1B, 0x1A, |
| | 604658 | 1356 | | 0x15, 0x11, 0x11, 0x11, |
| | 604658 | 1357 | | 0x11, 0x11, 0x11, 0x11, |
| | 604658 | 1358 | | 0x11, 0x11, 0x13, 0x1A, |
| | 604658 | 1359 | | 0x1B, 0x1B, 0x1B, 0x1A |
| | 604658 | 1360 | | ]; |
| | | 1361 | | |
| | | 1362 | | public ReadOnlySpan<sbyte> Avx2LutShift => |
| | 604658 | 1363 | | [ |
| | 604658 | 1364 | | 0, 16, 19, 4, |
| | 604658 | 1365 | | -65, -65, -71, -71, |
| | 604658 | 1366 | | 0, 0, 0, 0, |
| | 604658 | 1367 | | 0, 0, 0, 0, |
| | 604658 | 1368 | | 0, 16, 19, 4, |
| | 604658 | 1369 | | -65, -65, -71, -71, |
| | 604658 | 1370 | | 0, 0, 0, 0, |
| | 604658 | 1371 | | 0, 0, 0, 0 |
| | 604658 | 1372 | | ]; |
| | | 1373 | | |
| | 1197594 | 1374 | | public byte MaskSlashOrUnderscore => (byte)'/'; |
| | | 1375 | | |
| | 592936 | 1376 | | public ReadOnlySpan<int> Vector128LutHigh => [0x02011010, 0x08040804, 0x10101010, 0x10101010]; |
| | | 1377 | | |
| | 592936 | 1378 | | public ReadOnlySpan<int> Vector128LutLow => [0x11111115, 0x11111111, 0x1A131111, 0x1A1B1B1B]; |
| | | 1379 | | |
| | 592936 | 1380 | | public ReadOnlySpan<uint> Vector128LutShift => [0x04131000, 0xb9b9bfbf, 0x00000000, 0x00000000]; |
| | | 1381 | | |
| | 0 | 1382 | | public ReadOnlySpan<uint> AdvSimdLutOne3 => [0xFFFFFFFF, 0xFFFFFFFF, 0x3EFFFFFF, 0x3FFFFFFF]; |
| | | 1383 | | |
| | 0 | 1384 | | public uint AdvSimdLutTwo3Uint1 => 0x1B1AFFFF; |
| | | 1385 | | |
| | 851288 | 1386 | | public int GetMaxDecodedLength(int utf8Length) => Base64.GetMaxDecodedFromUtf8Length(utf8Length); |
| | | 1387 | | |
| | 1053086 | 1388 | | public bool IsInvalidLength(int bufferLength) => bufferLength % 4 != 0; // only decode input if it is a mult |
| | | 1389 | | |
| | 2182214 | 1390 | | public bool IsValidPadding(uint padChar) => padChar == EncodingPad; |
| | | 1391 | | |
| | 851288 | 1392 | | public int SrcLength(bool _, int utf8Length) => utf8Length & ~0x3; // only decode input up to the closest m |
| | | 1393 | | |
| | | 1394 | | #if NET |
| | | 1395 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1396 | | [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] |
| | | 1397 | | [CompExactlyDependsOn(typeof(Ssse3))] |
| | | 1398 | | public bool TryDecode128Core( |
| | | 1399 | | Vector128<byte> str, |
| | | 1400 | | Vector128<byte> hiNibbles, |
| | | 1401 | | Vector128<byte> maskSlashOrUnderscore, |
| | | 1402 | | Vector128<byte> mask8F, |
| | | 1403 | | Vector128<byte> lutLow, |
| | | 1404 | | Vector128<byte> lutHigh, |
| | | 1405 | | Vector128<sbyte> lutShift, |
| | | 1406 | | Vector128<byte> _, |
| | | 1407 | | out Vector128<byte> result) |
| | | 1408 | | { |
| | 639426 | 1409 | | Vector128<byte> loNibbles = str & maskSlashOrUnderscore; |
| | 639426 | 1410 | | Vector128<byte> hi = SimdShuffle(lutHigh, hiNibbles, mask8F); |
| | 639426 | 1411 | | Vector128<byte> lo = SimdShuffle(lutLow, loNibbles, mask8F); |
| | | 1412 | | |
| | | 1413 | | // Check for invalid input: if any "and" values from lo and hi are not zero, |
| | | 1414 | | // fall back on bytewise code to do error checking and reporting: |
| | 639426 | 1415 | | if ((lo & hi) != Vector128<byte>.Zero) |
| | | 1416 | | { |
| | 469958 | 1417 | | result = default; |
| | 469958 | 1418 | | return false; |
| | | 1419 | | } |
| | | 1420 | | |
| | 169468 | 1421 | | Vector128<byte> eq2F = Vector128.Equals(str, maskSlashOrUnderscore); |
| | 169468 | 1422 | | Vector128<byte> shift = SimdShuffle(lutShift.AsByte(), (eq2F + hiNibbles), mask8F); |
| | | 1423 | | |
| | | 1424 | | // Now simply add the delta values to the input: |
| | 169468 | 1425 | | result = str + shift; |
| | | 1426 | | |
| | 169468 | 1427 | | return true; |
| | | 1428 | | } |
| | | 1429 | | |
| | | 1430 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1431 | | [CompExactlyDependsOn(typeof(Avx2))] |
| | | 1432 | | public bool TryDecode256Core( |
| | | 1433 | | Vector256<sbyte> str, |
| | | 1434 | | Vector256<sbyte> hiNibbles, |
| | | 1435 | | Vector256<sbyte> maskSlashOrUnderscore, |
| | | 1436 | | Vector256<sbyte> lutLow, |
| | | 1437 | | Vector256<sbyte> lutHigh, |
| | | 1438 | | Vector256<sbyte> lutShift, |
| | | 1439 | | Vector256<sbyte> _, |
| | | 1440 | | out Vector256<sbyte> result) |
| | | 1441 | | { |
| | 3998464 | 1442 | | Vector256<sbyte> loNibbles = str & maskSlashOrUnderscore; |
| | 3998464 | 1443 | | Vector256<sbyte> hi = Avx2.Shuffle(lutHigh, hiNibbles); |
| | 3998464 | 1444 | | Vector256<sbyte> lo = Avx2.Shuffle(lutLow, loNibbles); |
| | | 1445 | | |
| | 3998464 | 1446 | | if ((lo & hi) != Vector256<sbyte>.Zero) |
| | | 1447 | | { |
| | 457108 | 1448 | | result = default; |
| | 457108 | 1449 | | return false; |
| | | 1450 | | } |
| | | 1451 | | |
| | 3541356 | 1452 | | Vector256<sbyte> eq2F = Avx2.CompareEqual(str, maskSlashOrUnderscore); |
| | 3541356 | 1453 | | Vector256<sbyte> shift = Avx2.Shuffle(lutShift, eq2F + hiNibbles); |
| | | 1454 | | |
| | 3541356 | 1455 | | result = str + shift; |
| | | 1456 | | |
| | 3541356 | 1457 | | return true; |
| | | 1458 | | } |
| | | 1459 | | |
| | | 1460 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1461 | | public unsafe bool TryLoadVector512(byte* src, byte* srcStart, int sourceLength, out Vector512<sbyte> str) |
| | | 1462 | | { |
| | 0 | 1463 | | AssertRead<Vector512<sbyte>>(src, srcStart, sourceLength); |
| | 0 | 1464 | | str = Vector512.Load(src).AsSByte(); |
| | 0 | 1465 | | return true; |
| | | 1466 | | } |
| | | 1467 | | |
| | | 1468 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1469 | | [CompExactlyDependsOn(typeof(Avx2))] |
| | | 1470 | | public unsafe bool TryLoadAvxVector256(byte* src, byte* srcStart, int sourceLength, out Vector256<sbyte> str |
| | | 1471 | | { |
| | 1552602 | 1472 | | AssertRead<Vector256<sbyte>>(src, srcStart, sourceLength); |
| | 1552602 | 1473 | | str = Avx.LoadVector256(src).AsSByte(); |
| | 1552602 | 1474 | | return true; |
| | | 1475 | | } |
| | | 1476 | | |
| | | 1477 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1478 | | public unsafe bool TryLoadVector128(byte* src, byte* srcStart, int sourceLength, out Vector128<byte> str) |
| | | 1479 | | { |
| | 327872 | 1480 | | AssertRead<Vector128<sbyte>>(src, srcStart, sourceLength); |
| | 327872 | 1481 | | str = Vector128.LoadUnsafe(ref *src); |
| | 327872 | 1482 | | return true; |
| | | 1483 | | } |
| | | 1484 | | |
| | | 1485 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1486 | | [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] |
| | | 1487 | | public unsafe bool TryLoadArmVector128x4(byte* src, byte* srcStart, int sourceLength, |
| | | 1488 | | out Vector128<byte> str1, out Vector128<byte> str2, out Vector128<byte> str3, out Vector128<byte> str4) |
| | | 1489 | | { |
| | 0 | 1490 | | AssertRead<Vector128<byte>>(src, srcStart, sourceLength); |
| | 0 | 1491 | | (str1, str2, str3, str4) = AdvSimd.Arm64.Load4xVector128AndUnzip(src); |
| | | 1492 | | |
| | 0 | 1493 | | return true; |
| | | 1494 | | } |
| | | 1495 | | #endif // NET |
| | | 1496 | | |
| | | 1497 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1498 | | public unsafe int DecodeFourElements(byte* source, ref sbyte decodingMap) |
| | | 1499 | | { |
| | | 1500 | | // The 'source' span expected to have at least 4 elements, and the 'decodingMap' consists 256 sbytes |
| | 3415928 | 1501 | | uint t0 = source[0]; |
| | 3415928 | 1502 | | uint t1 = source[1]; |
| | 3415928 | 1503 | | uint t2 = source[2]; |
| | 3415928 | 1504 | | uint t3 = source[3]; |
| | | 1505 | | |
| | 3415928 | 1506 | | int i0 = Unsafe.Add(ref decodingMap, (int)t0); |
| | 3415928 | 1507 | | int i1 = Unsafe.Add(ref decodingMap, (int)t1); |
| | 3415928 | 1508 | | int i2 = Unsafe.Add(ref decodingMap, (int)t2); |
| | 3415928 | 1509 | | int i3 = Unsafe.Add(ref decodingMap, (int)t3); |
| | | 1510 | | |
| | 3415928 | 1511 | | i0 <<= 18; |
| | 3415928 | 1512 | | i1 <<= 12; |
| | 3415928 | 1513 | | i2 <<= 6; |
| | | 1514 | | |
| | 3415928 | 1515 | | i0 |= i3; |
| | 3415928 | 1516 | | i1 |= i2; |
| | | 1517 | | |
| | 3415928 | 1518 | | i0 |= i1; |
| | 3415928 | 1519 | | return i0; |
| | | 1520 | | } |
| | | 1521 | | |
| | | 1522 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1523 | | public unsafe int DecodeRemaining(byte* srcEnd, ref sbyte decodingMap, long remaining, out uint t2, out uint |
| | | 1524 | | { |
| | | 1525 | | uint t0; |
| | | 1526 | | uint t1; |
| | 54444 | 1527 | | t2 = EncodingPad; |
| | 54444 | 1528 | | t3 = EncodingPad; |
| | | 1529 | | switch (remaining) |
| | | 1530 | | { |
| | | 1531 | | case 2: |
| | 0 | 1532 | | t0 = srcEnd[-2]; |
| | 0 | 1533 | | t1 = srcEnd[-1]; |
| | 0 | 1534 | | break; |
| | | 1535 | | case 3: |
| | 0 | 1536 | | t0 = srcEnd[-3]; |
| | 0 | 1537 | | t1 = srcEnd[-2]; |
| | 0 | 1538 | | t2 = srcEnd[-1]; |
| | 0 | 1539 | | break; |
| | | 1540 | | case 4: |
| | 54444 | 1541 | | t0 = srcEnd[-4]; |
| | 54444 | 1542 | | t1 = srcEnd[-3]; |
| | 54444 | 1543 | | t2 = srcEnd[-2]; |
| | 54444 | 1544 | | t3 = srcEnd[-1]; |
| | 54444 | 1545 | | break; |
| | | 1546 | | default: |
| | 0 | 1547 | | return -1; |
| | | 1548 | | } |
| | | 1549 | | |
| | 54444 | 1550 | | int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); |
| | 54444 | 1551 | | int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); |
| | | 1552 | | |
| | 54444 | 1553 | | i0 <<= 18; |
| | 54444 | 1554 | | i1 <<= 12; |
| | | 1555 | | |
| | 54444 | 1556 | | i0 |= i1; |
| | 54444 | 1557 | | return i0; |
| | | 1558 | | } |
| | | 1559 | | |
| | | 1560 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1561 | | public int IndexOfAnyExceptWhiteSpace(ReadOnlySpan<byte> span) |
| | | 1562 | | { |
| | 1182488 | 1563 | | for (int i = 0; i < span.Length; i++) |
| | | 1564 | | { |
| | 591094 | 1565 | | if (!IsWhiteSpace(span[i])) |
| | | 1566 | | { |
| | 262080 | 1567 | | return i; |
| | | 1568 | | } |
| | | 1569 | | } |
| | | 1570 | | |
| | 150 | 1571 | | return -1; |
| | | 1572 | | } |
| | | 1573 | | |
| | | 1574 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1575 | | public OperationStatus DecodeWithWhiteSpaceBlockwiseWrapper<TBase64Decoder>(TBase64Decoder decoder, ReadOnly |
| | | 1576 | | Span<byte> bytes, ref int bytesConsumed, ref int bytesWritten, bool isFinalBlock = true) |
| | | 1577 | | where TBase64Decoder : IBase64Decoder<byte> => |
| | 11588 | 1578 | | DecodeWithWhiteSpaceBlockwise(decoder, utf8, bytes, ref bytesConsumed, ref bytesWritten, isFinalBlock); |
| | | 1579 | | } |
| | | 1580 | | |
| | | 1581 | | internal readonly struct Base64DecoderChar : IBase64Decoder<ushort> |
| | | 1582 | | { |
| | 358762 | 1583 | | public ReadOnlySpan<sbyte> DecodingMap => default(Base64DecoderByte).DecodingMap; |
| | | 1584 | | |
| | 0 | 1585 | | public ReadOnlySpan<uint> VbmiLookup0 => default(Base64DecoderByte).VbmiLookup0; |
| | | 1586 | | |
| | 0 | 1587 | | public ReadOnlySpan<uint> VbmiLookup1 => default(Base64DecoderByte).VbmiLookup1; |
| | | 1588 | | |
| | 302812 | 1589 | | public ReadOnlySpan<sbyte> Avx2LutHigh => default(Base64DecoderByte).Avx2LutHigh; |
| | | 1590 | | |
| | 302812 | 1591 | | public ReadOnlySpan<sbyte> Avx2LutLow => default(Base64DecoderByte).Avx2LutLow; |
| | | 1592 | | |
| | 302812 | 1593 | | public ReadOnlySpan<sbyte> Avx2LutShift => default(Base64DecoderByte).Avx2LutShift; |
| | | 1594 | | |
| | 596538 | 1595 | | public byte MaskSlashOrUnderscore => default(Base64DecoderByte).MaskSlashOrUnderscore; |
| | | 1596 | | |
| | 293726 | 1597 | | public ReadOnlySpan<int> Vector128LutHigh => default(Base64DecoderByte).Vector128LutHigh; |
| | | 1598 | | |
| | 293726 | 1599 | | public ReadOnlySpan<int> Vector128LutLow => default(Base64DecoderByte).Vector128LutLow; |
| | | 1600 | | |
| | 293726 | 1601 | | public ReadOnlySpan<uint> Vector128LutShift => default(Base64DecoderByte).Vector128LutShift; |
| | | 1602 | | |
| | 0 | 1603 | | public ReadOnlySpan<uint> AdvSimdLutOne3 => default(Base64DecoderByte).AdvSimdLutOne3; |
| | | 1604 | | |
| | 0 | 1605 | | public uint AdvSimdLutTwo3Uint1 => default(Base64DecoderByte).AdvSimdLutTwo3Uint1; |
| | | 1606 | | |
| | 358762 | 1607 | | public int GetMaxDecodedLength(int sourceLength) => Base64.GetMaxDecodedFromUtf8Length(sourceLength); |
| | | 1608 | | |
| | 0 | 1609 | | public bool IsInvalidLength(int bufferLength) => bufferLength % 4 != 0; |
| | | 1610 | | |
| | 207108 | 1611 | | public bool IsValidPadding(uint padChar) => padChar == EncodingPad; |
| | | 1612 | | |
| | 358762 | 1613 | | public int SrcLength(bool _, int sourceLength) => sourceLength & ~0x3; |
| | | 1614 | | |
| | | 1615 | | #if NET |
| | | 1616 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1617 | | [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] |
| | | 1618 | | [CompExactlyDependsOn(typeof(Ssse3))] |
| | | 1619 | | public bool TryDecode128Core(Vector128<byte> str, Vector128<byte> hiNibbles, Vector128<byte> maskSlashOrUnde |
| | | 1620 | | Vector128<byte> lutLow, Vector128<byte> lutHigh, Vector128<sbyte> lutShift, Vector128<byte> shiftForUnde |
| | 311554 | 1621 | | default(Base64DecoderByte).TryDecode128Core(str, hiNibbles, maskSlashOrUnderscore, mask8F, lutLow, lutHi |
| | | 1622 | | |
| | | 1623 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1624 | | [CompExactlyDependsOn(typeof(Avx2))] |
| | | 1625 | | public bool TryDecode256Core(Vector256<sbyte> str, Vector256<sbyte> hiNibbles, Vector256<sbyte> maskSlashOrU |
| | | 1626 | | Vector256<sbyte> lutHigh, Vector256<sbyte> lutShift, Vector256<sbyte> shiftForUnderscore, out Vector256< |
| | 2445862 | 1627 | | default(Base64DecoderByte).TryDecode256Core(str, hiNibbles, maskSlashOrUnderscore, lutLow, lutHigh, lutS |
| | | 1628 | | |
| | | 1629 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1630 | | public unsafe bool TryLoadVector512(ushort* src, ushort* srcStart, int sourceLength, out Vector512<sbyte> st |
| | | 1631 | | { |
| | 0 | 1632 | | AssertRead<Vector512<ushort>>(src, srcStart, sourceLength); |
| | 0 | 1633 | | Vector512<ushort> utf16VectorLower = Vector512.Load(src); |
| | 0 | 1634 | | Vector512<ushort> utf16VectorUpper = Vector512.Load(src + 32); |
| | 0 | 1635 | | if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) |
| | | 1636 | | { |
| | 0 | 1637 | | str = default; |
| | 0 | 1638 | | return false; |
| | | 1639 | | } |
| | | 1640 | | |
| | 0 | 1641 | | str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper).AsSByte(); |
| | 0 | 1642 | | return true; |
| | | 1643 | | } |
| | | 1644 | | |
| | | 1645 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1646 | | [CompExactlyDependsOn(typeof(Avx2))] |
| | | 1647 | | public unsafe bool TryLoadAvxVector256(ushort* src, ushort* srcStart, int sourceLength, out Vector256<sbyte> |
| | | 1648 | | { |
| | 2454134 | 1649 | | AssertRead<Vector256<sbyte>>(src, srcStart, sourceLength); |
| | 2454134 | 1650 | | Vector256<ushort> utf16VectorLower = Avx.LoadVector256(src); |
| | 2454134 | 1651 | | Vector256<ushort> utf16VectorUpper = Avx.LoadVector256(src + 16); |
| | | 1652 | | |
| | 2454134 | 1653 | | if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) |
| | | 1654 | | { |
| | 8272 | 1655 | | str = default; |
| | 8272 | 1656 | | return false; |
| | | 1657 | | } |
| | | 1658 | | |
| | 2445862 | 1659 | | str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper).AsSByte(); |
| | 2445862 | 1660 | | return true; |
| | | 1661 | | } |
| | | 1662 | | |
| | | 1663 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1664 | | public unsafe bool TryLoadVector128(ushort* src, ushort* srcStart, int sourceLength, out Vector128<byte> str |
| | | 1665 | | { |
| | 320842 | 1666 | | AssertRead<Vector128<sbyte>>(src, srcStart, sourceLength); |
| | 320842 | 1667 | | Vector128<ushort> utf16VectorLower = Vector128.LoadUnsafe(ref *src); |
| | 320842 | 1668 | | Vector128<ushort> utf16VectorUpper = Vector128.LoadUnsafe(ref *src, 8); |
| | 320842 | 1669 | | if (Ascii.VectorContainsNonAsciiChar(utf16VectorLower | utf16VectorUpper)) |
| | | 1670 | | { |
| | 9288 | 1671 | | str = default; |
| | 9288 | 1672 | | return false; |
| | | 1673 | | } |
| | | 1674 | | |
| | 311554 | 1675 | | str = Ascii.ExtractAsciiVector(utf16VectorLower, utf16VectorUpper); |
| | 311554 | 1676 | | return true; |
| | | 1677 | | } |
| | | 1678 | | |
| | | 1679 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1680 | | [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] |
| | | 1681 | | public unsafe bool TryLoadArmVector128x4(ushort* src, ushort* srcStart, int sourceLength, |
| | | 1682 | | out Vector128<byte> str1, out Vector128<byte> str2, out Vector128<byte> str3, out Vector128<byte> str4) |
| | | 1683 | | { |
| | 0 | 1684 | | AssertRead<Vector128<sbyte>>(src, srcStart, sourceLength); |
| | 0 | 1685 | | var (s11, s12, s21, s22) = AdvSimd.Arm64.Load4xVector128AndUnzip(src); |
| | 0 | 1686 | | var (s31, s32, s41, s42) = AdvSimd.Arm64.Load4xVector128AndUnzip(src + 32); |
| | | 1687 | | |
| | 0 | 1688 | | if (Ascii.VectorContainsNonAsciiChar(s11 | s12 | s21 | s22 | s31 | s32 | s41 | s42)) |
| | | 1689 | | { |
| | 0 | 1690 | | str1 = str2 = str3 = str4 = default; |
| | 0 | 1691 | | return false; |
| | | 1692 | | } |
| | | 1693 | | |
| | 0 | 1694 | | str1 = Ascii.ExtractAsciiVector(s11, s31); |
| | 0 | 1695 | | str2 = Ascii.ExtractAsciiVector(s12, s32); |
| | 0 | 1696 | | str3 = Ascii.ExtractAsciiVector(s21, s41); |
| | 0 | 1697 | | str4 = Ascii.ExtractAsciiVector(s22, s42); |
| | | 1698 | | |
| | 0 | 1699 | | return true; |
| | | 1700 | | } |
| | | 1701 | | #endif // NET |
| | | 1702 | | |
| | | 1703 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1704 | | public unsafe int DecodeFourElements(ushort* source, ref sbyte decodingMap) |
| | | 1705 | | { |
| | | 1706 | | // The 'source' span expected to have at least 4 elements, and the 'decodingMap' consists 256 sbytes |
| | 1152268 | 1707 | | uint t0 = source[0]; |
| | 1152268 | 1708 | | uint t1 = source[1]; |
| | 1152268 | 1709 | | uint t2 = source[2]; |
| | 1152268 | 1710 | | uint t3 = source[3]; |
| | | 1711 | | |
| | 1152268 | 1712 | | if (((t0 | t1 | t2 | t3) & 0xffffff00) != 0) |
| | | 1713 | | { |
| | 20460 | 1714 | | return -1; // One or more chars falls outside the 00..ff range, invalid Base64 character. |
| | | 1715 | | } |
| | | 1716 | | |
| | 1131808 | 1717 | | int i0 = Unsafe.Add(ref decodingMap, (int)t0); |
| | 1131808 | 1718 | | int i1 = Unsafe.Add(ref decodingMap, (int)t1); |
| | 1131808 | 1719 | | int i2 = Unsafe.Add(ref decodingMap, (int)t2); |
| | 1131808 | 1720 | | int i3 = Unsafe.Add(ref decodingMap, (int)t3); |
| | | 1721 | | |
| | 1131808 | 1722 | | i0 <<= 18; |
| | 1131808 | 1723 | | i1 <<= 12; |
| | 1131808 | 1724 | | i2 <<= 6; |
| | | 1725 | | |
| | 1131808 | 1726 | | i0 |= i3; |
| | 1131808 | 1727 | | i1 |= i2; |
| | | 1728 | | |
| | 1131808 | 1729 | | i0 |= i1; |
| | 1131808 | 1730 | | return i0; |
| | | 1731 | | } |
| | | 1732 | | |
| | | 1733 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1734 | | public unsafe int DecodeRemaining(ushort* srcEnd, ref sbyte decodingMap, long remaining, out uint t2, out ui |
| | | 1735 | | { |
| | | 1736 | | uint t0; |
| | | 1737 | | uint t1; |
| | 97874 | 1738 | | t2 = EncodingPad; |
| | 97874 | 1739 | | t3 = EncodingPad; |
| | | 1740 | | switch (remaining) |
| | | 1741 | | { |
| | | 1742 | | case 2: |
| | 0 | 1743 | | t0 = srcEnd[-2]; |
| | 0 | 1744 | | t1 = srcEnd[-1]; |
| | 0 | 1745 | | break; |
| | | 1746 | | case 3: |
| | 0 | 1747 | | t0 = srcEnd[-3]; |
| | 0 | 1748 | | t1 = srcEnd[-2]; |
| | 0 | 1749 | | t2 = srcEnd[-1]; |
| | 0 | 1750 | | break; |
| | | 1751 | | case 4: |
| | 97874 | 1752 | | t0 = srcEnd[-4]; |
| | 97874 | 1753 | | t1 = srcEnd[-3]; |
| | 97874 | 1754 | | t2 = srcEnd[-2]; |
| | 97874 | 1755 | | t3 = srcEnd[-1]; |
| | 97874 | 1756 | | break; |
| | | 1757 | | default: |
| | 0 | 1758 | | return -1; |
| | | 1759 | | } |
| | | 1760 | | |
| | 97874 | 1761 | | if (((t0 | t1 | t2 | t3) & 0xffffff00) != 0) |
| | | 1762 | | { |
| | 1106 | 1763 | | return -1; |
| | | 1764 | | } |
| | | 1765 | | |
| | 96768 | 1766 | | int i0 = Unsafe.Add(ref decodingMap, (IntPtr)t0); |
| | 96768 | 1767 | | int i1 = Unsafe.Add(ref decodingMap, (IntPtr)t1); |
| | | 1768 | | |
| | 96768 | 1769 | | i0 <<= 18; |
| | 96768 | 1770 | | i1 <<= 12; |
| | | 1771 | | |
| | 96768 | 1772 | | i0 |= i1; |
| | 96768 | 1773 | | return i0; |
| | | 1774 | | } |
| | | 1775 | | |
| | | 1776 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1777 | | public int IndexOfAnyExceptWhiteSpace(ReadOnlySpan<ushort> span) |
| | | 1778 | | { |
| | 1312284 | 1779 | | for (int i = 0; i < span.Length; i++) |
| | | 1780 | | { |
| | 656130 | 1781 | | if (!IsWhiteSpace(span[i])) |
| | | 1782 | | { |
| | 226690 | 1783 | | return i; |
| | | 1784 | | } |
| | | 1785 | | } |
| | | 1786 | | |
| | 12 | 1787 | | return -1; |
| | | 1788 | | } |
| | | 1789 | | |
| | | 1790 | | [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| | | 1791 | | public OperationStatus DecodeWithWhiteSpaceBlockwiseWrapper<TBase64Decoder>(TBase64Decoder decoder, ReadOnly |
| | | 1792 | | Span<byte> bytes, ref int bytesConsumed, ref int bytesWritten, bool isFinalBlock = true) where TBase64De |
| | 11894 | 1793 | | DecodeWithWhiteSpaceBlockwise(default(Base64DecoderChar), source, bytes, ref bytesConsumed, ref bytesWri |
| | | 1794 | | } |
| | | 1795 | | } |
| | | 1796 | | } |