| | | 1 | | using System.Text; |
| | | 2 | | |
| | | 3 | | namespace ValidateLib.TabularData.Parsing |
| | | 4 | | { |
| | | 5 | | /// <summary> |
| | | 6 | | /// Reads quoted value of one field in the row. |
| | | 7 | | /// Implements the algorithm: https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-read-a-quoted-value |
| | | 8 | | /// </summary> |
| | | 9 | | public class QuotedCharReader : Reader |
| | | 10 | | { |
| | 1 | 11 | | public QuotedCharReader(Flags flags) : base(flags) |
| | | 12 | | { |
| | 1 | 13 | | } |
| | | 14 | | |
| | | 15 | | public bool VerifyNextCharsAreQuoteChar(CustomStreamReader reader) |
| | | 16 | | { |
| | | 17 | | // Read the next characters |
| | 1 | 18 | | char[] buffer = new char[quoteCharacterLength]; |
| | 1 | 19 | | int bytesRead = reader.ReadChars(quoteCharacterLength, buffer); |
| | | 20 | | |
| | | 21 | | // Check if the read characters match the expected string |
| | 1 | 22 | | bool match = bytesRead == quoteCharacterLength && new string(buffer) == flags.quoteCharacter; |
| | | 23 | | |
| | | 24 | | // Roll back the position if the characters don't match |
| | 1 | 25 | | if (!match) |
| | | 26 | | { |
| | 1 | 27 | | reader.MoveBack(bytesRead); |
| | | 28 | | } |
| | | 29 | | |
| | 1 | 30 | | return match; |
| | | 31 | | } |
| | | 32 | | |
| | | 33 | | /// <summary> |
| | | 34 | | /// Implements the method from algorithm: https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-read-a |
| | | 35 | | /// </summary> |
| | | 36 | | /// <param name="reader"></param> |
| | | 37 | | /// <returns>Quoted value</returns> |
| | | 38 | | public string ReadQuotedValue(CustomStreamReader reader) |
| | | 39 | | { |
| | 1 | 40 | | StringBuilder quotedValue = new StringBuilder(); |
| | 1 | 41 | | quotedValue.Append(flags.quoteCharacter); |
| | | 42 | | |
| | | 43 | | while (true) |
| | | 44 | | { |
| | 1 | 45 | | int nextChar = reader.ReadChar(); |
| | | 46 | | |
| | 1 | 47 | | if (IsEndOfFile(nextChar)) |
| | 0 | 48 | | return quotedValue.ToString(); |
| | | 49 | | |
| | 1 | 50 | | char currentChar = (char)nextChar; |
| | | 51 | | |
| | | 52 | | // return if we found the end of the quoted value |
| | 1 | 53 | | if (ProcessChar(reader, quotedValue, currentChar)) |
| | 1 | 54 | | return quotedValue.ToString(); |
| | | 55 | | } |
| | | 56 | | } |
| | | 57 | | /// <summary> |
| | | 58 | | /// Processes one char in a read quoted value. |
| | | 59 | | /// </summary> |
| | | 60 | | /// <param name="reader"></param> |
| | | 61 | | /// <param name="quotedValue"></param> |
| | | 62 | | /// <param name="currentChar"></param> |
| | | 63 | | /// <returns> |
| | | 64 | | /// True if we have found the ending quoteChar. |
| | | 65 | | /// </returns> |
| | | 66 | | bool ProcessChar(CustomStreamReader reader, StringBuilder quotedValue, char currentChar) |
| | | 67 | | { |
| | 1 | 68 | | if (currentChar == flags.escapeCharacter) |
| | | 69 | | { |
| | 1 | 70 | | if (HandleEscapeChar(reader, quotedValue)) |
| | 1 | 71 | | return true; // we found the end of quoted value, quoted char same as escape char |
| | | 72 | | } |
| | 1 | 73 | | else if (currentChar == flags.quoteCharacter[0]) |
| | | 74 | | { |
| | 1 | 75 | | return HandlePotenitalQuoteChar(reader, quotedValue, currentChar); |
| | | 76 | | } |
| | | 77 | | else |
| | | 78 | | { |
| | | 79 | | // normal case |
| | 1 | 80 | | quotedValue.Append(currentChar); |
| | | 81 | | } |
| | | 82 | | |
| | 1 | 83 | | return false; |
| | | 84 | | } |
| | | 85 | | |
| | | 86 | | /// <summary> |
| | | 87 | | /// Handles case when the quoted char has potentially been read. |
| | | 88 | | /// </summary> |
| | | 89 | | /// <param name="reader"></param> |
| | | 90 | | /// <param name="quotedValue"></param> |
| | | 91 | | /// <param name="currentChar"></param> |
| | | 92 | | /// <returns>True if we have read quoted char.</returns> |
| | | 93 | | private bool HandlePotenitalQuoteChar(CustomStreamReader reader, StringBuilder quotedValue, char currentChar) |
| | | 94 | | { |
| | 1 | 95 | | reader.MoveBack(1); |
| | 1 | 96 | | if (VerifyNextCharsAreQuoteChar(reader)) |
| | | 97 | | { |
| | 1 | 98 | | quotedValue.Append(flags.quoteCharacter); |
| | 1 | 99 | | return true; // we found the end of quoted value |
| | | 100 | | } |
| | | 101 | | else |
| | | 102 | | { |
| | | 103 | | // normal case, string starts with the same char as the quotechar string |
| | 0 | 104 | | reader.ReadChar(); |
| | 0 | 105 | | quotedValue.Append(currentChar); |
| | | 106 | | } |
| | 0 | 107 | | return false; |
| | | 108 | | } |
| | | 109 | | |
| | | 110 | | /// <summary> |
| | | 111 | | /// Handles the case when the escape char has been read inside quoted value. |
| | | 112 | | /// </summary> |
| | | 113 | | /// <param name="reader"></param> |
| | | 114 | | /// <param name="quotedValue"></param> |
| | | 115 | | /// <returns>True when we have found the ending quote char = case when the quote char is equal to the escapechar |
| | | 116 | | /// <exception cref="Exception"></exception> |
| | | 117 | | private bool HandleEscapeChar(CustomStreamReader reader, StringBuilder quotedValue) |
| | | 118 | | { |
| | 1 | 119 | | if (VerifyNextCharsAreQuoteChar(reader)) |
| | | 120 | | { |
| | 1 | 121 | | quotedValue.Append(flags.escapeCharacter); |
| | 1 | 122 | | quotedValue.Append(flags.quoteCharacter); |
| | | 123 | | } |
| | 1 | 124 | | else if (flags.quoteCharacter != flags.escapeCharacter.ToString()) |
| | | 125 | | { |
| | 0 | 126 | | quotedValue.Append(flags.escapeCharacter); |
| | 0 | 127 | | int nextSymbol = reader.ReadChar(); |
| | | 128 | | |
| | 0 | 129 | | if (IsEndOfFile(nextSymbol)) |
| | 0 | 130 | | throw new Exception("Unexpected end of file after escape char!"); |
| | | 131 | | |
| | 0 | 132 | | quotedValue.Append((char)nextSymbol); |
| | | 133 | | } |
| | | 134 | | else |
| | | 135 | | { |
| | 1 | 136 | | quotedValue.Append(flags.quoteCharacter); |
| | 1 | 137 | | return true; |
| | | 138 | | } |
| | 1 | 139 | | return false; |
| | | 140 | | } |
| | | 141 | | } |
| | | 142 | | } |