| | | 1 | | // Licensed to the .NET Foundation under one or more agreements. |
| | | 2 | | // The .NET Foundation licenses this file to you under the MIT license. |
| | | 3 | | |
| | | 4 | | using System.Buffers; |
| | | 5 | | using System.Buffers.Text; |
| | | 6 | | using System.Diagnostics; |
| | | 7 | | using System.Text.Encodings.Web; |
| | | 8 | | |
| | | 9 | | #if !NET |
| | | 10 | | using System.Runtime.CompilerServices; |
| | | 11 | | #endif |
| | | 12 | | |
| | | 13 | | namespace System.Text.Json |
| | | 14 | | { |
| | | 15 | | internal static partial class JsonWriterHelper |
| | | 16 | | { |
| | | 17 | | // Only allow ASCII characters between ' ' (0x20) and '~' (0x7E), inclusively, |
| | | 18 | | // but exclude characters that need to be escaped as hex: '"', '\'', '&', '+', '<', '>', '`' |
| | | 19 | | // and exclude characters that need to be escaped by adding a backslash: '\n', '\r', '\t', '\\', '\b', '\f' |
| | | 20 | | // |
| | | 21 | | // non-zero = allowed, 0 = disallowed |
| | | 22 | | public const int LastAsciiCharacter = 0x7F; |
| | | 23 | | private static ReadOnlySpan<byte> AllowList => // byte.MaxValue + 1 |
| | 0 | 24 | | [ |
| | 0 | 25 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0000..U+000F |
| | 0 | 26 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+0010..U+001F |
| | 0 | 27 | | 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, // U+0020..U+002F |
| | 0 | 28 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, // U+0030..U+003F |
| | 0 | 29 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0040..U+004F |
| | 0 | 30 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // U+0050..U+005F |
| | 0 | 31 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // U+0060..U+006F |
| | 0 | 32 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // U+0070..U+007F |
| | 0 | 33 | | |
| | 0 | 34 | | // Also include the ranges from U+0080 to U+00FF for performance to avoid UTF8 code from checking boundary. |
| | 0 | 35 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| | 0 | 36 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| | 0 | 37 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| | 0 | 38 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| | 0 | 39 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| | 0 | 40 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| | 0 | 41 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| | 0 | 42 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // U+00F0..U+00FF |
| | 0 | 43 | | ]; |
| | | 44 | | |
| | | 45 | | #if NET |
| | | 46 | | private const string HexFormatString = "X4"; |
| | | 47 | | #endif |
| | | 48 | | |
| | 0 | 49 | | private static readonly StandardFormat s_hexStandardFormat = new StandardFormat('X', 4); |
| | | 50 | | |
| | 0 | 51 | | private static bool NeedsEscaping(byte value) => AllowList[value] == 0; |
| | | 52 | | |
| | 0 | 53 | | private static bool NeedsEscapingNoBoundsCheck(char value) => AllowList[value] == 0; |
| | | 54 | | |
| | | 55 | | public static int NeedsEscaping(ReadOnlySpan<byte> value, JavaScriptEncoder? encoder) |
| | 7798 | 56 | | { |
| | 7798 | 57 | | return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncodeUtf8(value); |
| | 7798 | 58 | | } |
| | | 59 | | |
| | | 60 | | public static int NeedsEscaping(ReadOnlySpan<char> value, JavaScriptEncoder? encoder) |
| | 0 | 61 | | { |
| | | 62 | | // Some implementations of JavaScriptEncoder.FindFirstCharacterToEncode may not accept |
| | | 63 | | // null pointers and guard against that. Hence, check up-front to return -1. |
| | 0 | 64 | | if (value.IsEmpty) |
| | 0 | 65 | | { |
| | 0 | 66 | | return -1; |
| | | 67 | | } |
| | | 68 | | |
| | | 69 | | // Unfortunately, there is no public API for FindFirstCharacterToEncode(Span<char>) yet, |
| | | 70 | | // so we have to use the unsafe FindFirstCharacterToEncode(char*, int) instead. |
| | | 71 | | unsafe |
| | 0 | 72 | | { |
| | 0 | 73 | | fixed (char* ptr = value) |
| | 0 | 74 | | { |
| | 0 | 75 | | return (encoder ?? JavaScriptEncoder.Default).FindFirstCharacterToEncode(ptr, value.Length); |
| | | 76 | | } |
| | | 77 | | } |
| | 0 | 78 | | } |
| | | 79 | | |
| | | 80 | | public static int GetMaxEscapedLength(int textLength, int firstIndexToEscape) |
| | 0 | 81 | | { |
| | 0 | 82 | | Debug.Assert(textLength > 0); |
| | 0 | 83 | | Debug.Assert(firstIndexToEscape >= 0 && firstIndexToEscape < textLength); |
| | 0 | 84 | | return firstIndexToEscape + JsonConstants.MaxExpansionFactorWhileEscaping * (textLength - firstIndexToEscape |
| | 0 | 85 | | } |
| | | 86 | | |
| | | 87 | | private static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination, JavaScriptEncoder encoder, re |
| | 0 | 88 | | { |
| | 0 | 89 | | Debug.Assert(encoder != null); |
| | | 90 | | |
| | 0 | 91 | | OperationStatus result = encoder.EncodeUtf8(value, destination, out int encoderBytesConsumed, out int encode |
| | | 92 | | |
| | 0 | 93 | | Debug.Assert(result != OperationStatus.DestinationTooSmall); |
| | 0 | 94 | | Debug.Assert(result != OperationStatus.NeedMoreData || !isFinalBlock); |
| | | 95 | | |
| | 0 | 96 | | if (!(result == OperationStatus.Done || (result == OperationStatus.NeedMoreData && !isFinalBlock))) |
| | 0 | 97 | | { |
| | 0 | 98 | | ThrowHelper.ThrowArgumentException_InvalidUTF8(value.Slice(encoderBytesWritten)); |
| | | 99 | | } |
| | | 100 | | |
| | 0 | 101 | | Debug.Assert(encoderBytesConsumed == value.Length || (result == OperationStatus.NeedMoreData && !isFinalBloc |
| | | 102 | | |
| | 0 | 103 | | written += encoderBytesWritten; |
| | 0 | 104 | | consumed += encoderBytesConsumed; |
| | 0 | 105 | | } |
| | | 106 | | |
| | | 107 | | public static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination, int indexOfFirstByteToEscape, |
| | 0 | 108 | | => EscapeString(value, destination, indexOfFirstByteToEscape, encoder, out _, out written, isFinalBlock: tru |
| | | 109 | | |
| | | 110 | | public static void EscapeString(ReadOnlySpan<byte> value, Span<byte> destination, int indexOfFirstByteToEscape, |
| | 0 | 111 | | { |
| | 0 | 112 | | Debug.Assert(indexOfFirstByteToEscape >= 0 && indexOfFirstByteToEscape < value.Length); |
| | | 113 | | |
| | 0 | 114 | | value.Slice(0, indexOfFirstByteToEscape).CopyTo(destination); |
| | 0 | 115 | | written = indexOfFirstByteToEscape; |
| | 0 | 116 | | consumed = indexOfFirstByteToEscape; |
| | | 117 | | |
| | 0 | 118 | | if (encoder != null) |
| | 0 | 119 | | { |
| | 0 | 120 | | destination = destination.Slice(indexOfFirstByteToEscape); |
| | 0 | 121 | | value = value.Slice(indexOfFirstByteToEscape); |
| | 0 | 122 | | EscapeString(value, destination, encoder, ref consumed, ref written, isFinalBlock); |
| | 0 | 123 | | } |
| | | 124 | | else |
| | 0 | 125 | | { |
| | | 126 | | // For performance when no encoder is specified, perform escaping here for Ascii and on the |
| | | 127 | | // first occurrence of a non-Ascii character, then call into the default encoder. |
| | 0 | 128 | | while (indexOfFirstByteToEscape < value.Length) |
| | 0 | 129 | | { |
| | 0 | 130 | | byte val = value[indexOfFirstByteToEscape]; |
| | 0 | 131 | | if (IsAsciiValue(val)) |
| | 0 | 132 | | { |
| | 0 | 133 | | if (NeedsEscaping(val)) |
| | 0 | 134 | | { |
| | 0 | 135 | | EscapeNextBytes(val, destination, ref written); |
| | 0 | 136 | | indexOfFirstByteToEscape++; |
| | 0 | 137 | | consumed++; |
| | 0 | 138 | | } |
| | | 139 | | else |
| | 0 | 140 | | { |
| | 0 | 141 | | destination[written] = val; |
| | 0 | 142 | | written++; |
| | 0 | 143 | | indexOfFirstByteToEscape++; |
| | 0 | 144 | | consumed++; |
| | 0 | 145 | | } |
| | 0 | 146 | | } |
| | | 147 | | else |
| | 0 | 148 | | { |
| | | 149 | | // Fall back to default encoder. |
| | 0 | 150 | | destination = destination.Slice(written); |
| | 0 | 151 | | value = value.Slice(indexOfFirstByteToEscape); |
| | 0 | 152 | | EscapeString(value, destination, JavaScriptEncoder.Default, ref consumed, ref written, isFinalBl |
| | 0 | 153 | | break; |
| | | 154 | | } |
| | 0 | 155 | | } |
| | 0 | 156 | | } |
| | 0 | 157 | | } |
| | | 158 | | |
| | | 159 | | private static void EscapeNextBytes(byte value, Span<byte> destination, ref int written) |
| | 0 | 160 | | { |
| | 0 | 161 | | destination[written++] = (byte)'\\'; |
| | 0 | 162 | | switch (value) |
| | | 163 | | { |
| | | 164 | | case JsonConstants.Quote: |
| | | 165 | | // Optimize for the common quote case. |
| | 0 | 166 | | destination[written++] = (byte)'u'; |
| | 0 | 167 | | destination[written++] = (byte)'0'; |
| | 0 | 168 | | destination[written++] = (byte)'0'; |
| | 0 | 169 | | destination[written++] = (byte)'2'; |
| | 0 | 170 | | destination[written++] = (byte)'2'; |
| | 0 | 171 | | break; |
| | | 172 | | case JsonConstants.LineFeed: |
| | 0 | 173 | | destination[written++] = (byte)'n'; |
| | 0 | 174 | | break; |
| | | 175 | | case JsonConstants.CarriageReturn: |
| | 0 | 176 | | destination[written++] = (byte)'r'; |
| | 0 | 177 | | break; |
| | | 178 | | case JsonConstants.Tab: |
| | 0 | 179 | | destination[written++] = (byte)'t'; |
| | 0 | 180 | | break; |
| | | 181 | | case JsonConstants.BackSlash: |
| | 0 | 182 | | destination[written++] = (byte)'\\'; |
| | 0 | 183 | | break; |
| | | 184 | | case JsonConstants.BackSpace: |
| | 0 | 185 | | destination[written++] = (byte)'b'; |
| | 0 | 186 | | break; |
| | | 187 | | case JsonConstants.FormFeed: |
| | 0 | 188 | | destination[written++] = (byte)'f'; |
| | 0 | 189 | | break; |
| | | 190 | | default: |
| | 0 | 191 | | destination[written++] = (byte)'u'; |
| | | 192 | | |
| | 0 | 193 | | bool result = Utf8Formatter.TryFormat(value, destination.Slice(written), out int bytesWritten, forma |
| | 0 | 194 | | Debug.Assert(result); |
| | 0 | 195 | | Debug.Assert(bytesWritten == 4); |
| | 0 | 196 | | written += bytesWritten; |
| | 0 | 197 | | break; |
| | | 198 | | } |
| | 0 | 199 | | } |
| | | 200 | | |
| | 0 | 201 | | private static bool IsAsciiValue(byte value) => value <= LastAsciiCharacter; |
| | | 202 | | |
| | 0 | 203 | | private static bool IsAsciiValue(char value) => value <= LastAsciiCharacter; |
| | | 204 | | |
| | | 205 | | private static void EscapeString(ReadOnlySpan<char> value, Span<char> destination, JavaScriptEncoder encoder, re |
| | 0 | 206 | | { |
| | 0 | 207 | | Debug.Assert(encoder != null); |
| | | 208 | | |
| | 0 | 209 | | OperationStatus result = encoder.Encode(value, destination, out int encoderBytesConsumed, out int encoderCha |
| | | 210 | | |
| | 0 | 211 | | Debug.Assert(result != OperationStatus.DestinationTooSmall); |
| | 0 | 212 | | Debug.Assert(result != OperationStatus.NeedMoreData || !isFinalBlock); |
| | | 213 | | |
| | 0 | 214 | | if (!(result == OperationStatus.Done || (result == OperationStatus.NeedMoreData && !isFinalBlock))) |
| | 0 | 215 | | { |
| | 0 | 216 | | ThrowHelper.ThrowArgumentException_InvalidUTF16(value[encoderCharsWritten]); |
| | | 217 | | } |
| | | 218 | | |
| | 0 | 219 | | Debug.Assert(encoderBytesConsumed == value.Length || (result == OperationStatus.NeedMoreData && !isFinalBloc |
| | | 220 | | |
| | 0 | 221 | | written += encoderCharsWritten; |
| | 0 | 222 | | consumed += encoderBytesConsumed; |
| | 0 | 223 | | } |
| | | 224 | | |
| | | 225 | | public static void EscapeString(ReadOnlySpan<char> value, Span<char> destination, int indexOfFirstByteToEscape, |
| | 0 | 226 | | => EscapeString(value, destination, indexOfFirstByteToEscape, encoder, out _, out written, isFinalBlock: tru |
| | | 227 | | |
| | | 228 | | public static void EscapeString(ReadOnlySpan<char> value, Span<char> destination, int indexOfFirstByteToEscape, |
| | 0 | 229 | | { |
| | 0 | 230 | | Debug.Assert(indexOfFirstByteToEscape >= 0 && indexOfFirstByteToEscape < value.Length); |
| | | 231 | | |
| | 0 | 232 | | value.Slice(0, indexOfFirstByteToEscape).CopyTo(destination); |
| | 0 | 233 | | written = indexOfFirstByteToEscape; |
| | 0 | 234 | | consumed = indexOfFirstByteToEscape; |
| | | 235 | | |
| | 0 | 236 | | if (encoder != null) |
| | 0 | 237 | | { |
| | 0 | 238 | | destination = destination.Slice(indexOfFirstByteToEscape); |
| | 0 | 239 | | value = value.Slice(indexOfFirstByteToEscape); |
| | 0 | 240 | | EscapeString(value, destination, encoder, ref consumed, ref written, isFinalBlock); |
| | 0 | 241 | | } |
| | | 242 | | else |
| | 0 | 243 | | { |
| | | 244 | | // For performance when no encoder is specified, perform escaping here for Ascii and on the |
| | | 245 | | // first occurrence of a non-Ascii character, then call into the default encoder. |
| | 0 | 246 | | while (indexOfFirstByteToEscape < value.Length) |
| | 0 | 247 | | { |
| | 0 | 248 | | char val = value[indexOfFirstByteToEscape]; |
| | 0 | 249 | | if (IsAsciiValue(val)) |
| | 0 | 250 | | { |
| | 0 | 251 | | if (NeedsEscapingNoBoundsCheck(val)) |
| | 0 | 252 | | { |
| | 0 | 253 | | EscapeNextChars(val, destination, ref written); |
| | 0 | 254 | | indexOfFirstByteToEscape++; |
| | 0 | 255 | | consumed++; |
| | 0 | 256 | | } |
| | | 257 | | else |
| | 0 | 258 | | { |
| | 0 | 259 | | destination[written] = val; |
| | 0 | 260 | | written++; |
| | 0 | 261 | | indexOfFirstByteToEscape++; |
| | 0 | 262 | | consumed++; |
| | 0 | 263 | | } |
| | 0 | 264 | | } |
| | | 265 | | else |
| | 0 | 266 | | { |
| | | 267 | | // Fall back to default encoder. |
| | 0 | 268 | | destination = destination.Slice(written); |
| | 0 | 269 | | value = value.Slice(indexOfFirstByteToEscape); |
| | 0 | 270 | | EscapeString(value, destination, JavaScriptEncoder.Default, ref consumed, ref written, isFinalBl |
| | 0 | 271 | | break; |
| | | 272 | | } |
| | 0 | 273 | | } |
| | 0 | 274 | | } |
| | 0 | 275 | | } |
| | | 276 | | |
| | | 277 | | private static void EscapeNextChars(char value, Span<char> destination, ref int written) |
| | 0 | 278 | | { |
| | 0 | 279 | | Debug.Assert(IsAsciiValue(value)); |
| | | 280 | | |
| | 0 | 281 | | destination[written++] = '\\'; |
| | 0 | 282 | | switch ((byte)value) |
| | | 283 | | { |
| | | 284 | | case JsonConstants.Quote: |
| | | 285 | | // Optimize for the common quote case. |
| | 0 | 286 | | destination[written++] = 'u'; |
| | 0 | 287 | | destination[written++] = '0'; |
| | 0 | 288 | | destination[written++] = '0'; |
| | 0 | 289 | | destination[written++] = '2'; |
| | 0 | 290 | | destination[written++] = '2'; |
| | 0 | 291 | | break; |
| | | 292 | | case JsonConstants.LineFeed: |
| | 0 | 293 | | destination[written++] = 'n'; |
| | 0 | 294 | | break; |
| | | 295 | | case JsonConstants.CarriageReturn: |
| | 0 | 296 | | destination[written++] = 'r'; |
| | 0 | 297 | | break; |
| | | 298 | | case JsonConstants.Tab: |
| | 0 | 299 | | destination[written++] = 't'; |
| | 0 | 300 | | break; |
| | | 301 | | case JsonConstants.BackSlash: |
| | 0 | 302 | | destination[written++] = '\\'; |
| | 0 | 303 | | break; |
| | | 304 | | case JsonConstants.BackSpace: |
| | 0 | 305 | | destination[written++] = 'b'; |
| | 0 | 306 | | break; |
| | | 307 | | case JsonConstants.FormFeed: |
| | 0 | 308 | | destination[written++] = 'f'; |
| | 0 | 309 | | break; |
| | | 310 | | default: |
| | 0 | 311 | | destination[written++] = 'u'; |
| | | 312 | | #if NET |
| | 0 | 313 | | int intChar = value; |
| | 0 | 314 | | intChar.TryFormat(destination.Slice(written), out int charsWritten, HexFormatString); |
| | 0 | 315 | | Debug.Assert(charsWritten == 4); |
| | 0 | 316 | | written += charsWritten; |
| | | 317 | | #else |
| | | 318 | | written = WriteHex(value, destination, written); |
| | | 319 | | #endif |
| | 0 | 320 | | break; |
| | | 321 | | } |
| | 0 | 322 | | } |
| | | 323 | | |
| | | 324 | | #if !NET |
| | | 325 | | private static int WriteHex(int value, Span<char> destination, int written) |
| | | 326 | | { |
| | | 327 | | destination[written++] = HexConverter.ToCharUpper(value >> 12); |
| | | 328 | | destination[written++] = HexConverter.ToCharUpper(value >> 8); |
| | | 329 | | destination[written++] = HexConverter.ToCharUpper(value >> 4); |
| | | 330 | | destination[written++] = HexConverter.ToCharUpper(value); |
| | | 331 | | return written; |
| | | 332 | | } |
| | | 333 | | #endif |
| | | 334 | | } |
| | | 335 | | } |