| | 1 | | using System.Text; |
| | 2 | |
|
| | 3 | | namespace ValidateLib.TabularData.Parsing |
| | 4 | | { |
| | 5 | | /// <summary> |
| | 6 | | /// Reads quoted value of one field in the row. |
| | 7 | | /// Implements the algorithm: https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-read-a-quoted-value |
| | 8 | | /// </summary> |
| | 9 | | public class QuotedCharReader : Reader |
| | 10 | | { |
| 1 | 11 | | public QuotedCharReader(Flags flags) : base(flags) |
| | 12 | | { |
| 1 | 13 | | } |
| | 14 | |
|
| | 15 | | public bool VerifyNextCharsAreQuoteChar(CustomStreamReader reader) |
| | 16 | | { |
| | 17 | | // Read the next characters |
| 1 | 18 | | char[] buffer = new char[quoteCharacterLength]; |
| 1 | 19 | | int bytesRead = reader.ReadChars(quoteCharacterLength, buffer); |
| | 20 | |
|
| | 21 | | // Check if the read characters match the expected string |
| 1 | 22 | | bool match = bytesRead == quoteCharacterLength && new string(buffer) == flags.quoteCharacter; |
| | 23 | |
|
| | 24 | | // Roll back the position if the characters don't match |
| 1 | 25 | | if (!match) |
| | 26 | | { |
| 1 | 27 | | reader.MoveBack(bytesRead); |
| | 28 | | } |
| | 29 | |
|
| 1 | 30 | | return match; |
| | 31 | | } |
| | 32 | |
|
| | 33 | | /// <summary> |
| | 34 | | /// Implements the method from algorithm: https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-read-a |
| | 35 | | /// </summary> |
| | 36 | | /// <param name="reader"></param> |
| | 37 | | /// <returns>Quoted value</returns> |
| | 38 | | public string ReadQuotedValue(CustomStreamReader reader) |
| | 39 | | { |
| 1 | 40 | | StringBuilder quotedValue = new StringBuilder(); |
| 1 | 41 | | quotedValue.Append(flags.quoteCharacter); |
| | 42 | |
|
| | 43 | | while (true) |
| | 44 | | { |
| 1 | 45 | | int nextChar = reader.ReadChar(); |
| | 46 | |
|
| 1 | 47 | | if (IsEndOfFile(nextChar)) |
| 0 | 48 | | return quotedValue.ToString(); |
| | 49 | |
|
| 1 | 50 | | char currentChar = (char)nextChar; |
| | 51 | |
|
| | 52 | | // return if we found the end of the quoted value |
| 1 | 53 | | if (ProcessChar(reader, quotedValue, currentChar)) |
| 1 | 54 | | return quotedValue.ToString(); |
| | 55 | | } |
| | 56 | | } |
| | 57 | | /// <summary> |
| | 58 | | /// Processes one char in a read quoted value. |
| | 59 | | /// </summary> |
| | 60 | | /// <param name="reader"></param> |
| | 61 | | /// <param name="quotedValue"></param> |
| | 62 | | /// <param name="currentChar"></param> |
| | 63 | | /// <returns> |
| | 64 | | /// True if we have found the ending quoteChar. |
| | 65 | | /// </returns> |
| | 66 | | bool ProcessChar(CustomStreamReader reader, StringBuilder quotedValue, char currentChar) |
| | 67 | | { |
| 1 | 68 | | if (currentChar == flags.escapeCharacter) |
| | 69 | | { |
| 1 | 70 | | if (HandleEscapeChar(reader, quotedValue)) |
| 1 | 71 | | return true; // we found the end of quoted value, quoted char same as escape char |
| | 72 | | } |
| 1 | 73 | | else if (currentChar == flags.quoteCharacter[0]) |
| | 74 | | { |
| 1 | 75 | | return HandlePotenitalQuoteChar(reader, quotedValue, currentChar); |
| | 76 | | } |
| | 77 | | else |
| | 78 | | { |
| | 79 | | // normal case |
| 1 | 80 | | quotedValue.Append(currentChar); |
| | 81 | | } |
| | 82 | |
|
| 1 | 83 | | return false; |
| | 84 | | } |
| | 85 | |
|
| | 86 | | /// <summary> |
| | 87 | | /// Handles case when the quoted char has potentially been read. |
| | 88 | | /// </summary> |
| | 89 | | /// <param name="reader"></param> |
| | 90 | | /// <param name="quotedValue"></param> |
| | 91 | | /// <param name="currentChar"></param> |
| | 92 | | /// <returns>True if we have read quoted char.</returns> |
| | 93 | | private bool HandlePotenitalQuoteChar(CustomStreamReader reader, StringBuilder quotedValue, char currentChar) |
| | 94 | | { |
| 1 | 95 | | reader.MoveBack(1); |
| 1 | 96 | | if (VerifyNextCharsAreQuoteChar(reader)) |
| | 97 | | { |
| 1 | 98 | | quotedValue.Append(flags.quoteCharacter); |
| 1 | 99 | | return true; // we found the end of quoted value |
| | 100 | | } |
| | 101 | | else |
| | 102 | | { |
| | 103 | | // normal case, string starts with the same char as the quotechar string |
| 0 | 104 | | reader.ReadChar(); |
| 0 | 105 | | quotedValue.Append(currentChar); |
| | 106 | | } |
| 0 | 107 | | return false; |
| | 108 | | } |
| | 109 | |
|
| | 110 | | /// <summary> |
| | 111 | | /// Handles the case when the escape char has been read inside quoted value. |
| | 112 | | /// </summary> |
| | 113 | | /// <param name="reader"></param> |
| | 114 | | /// <param name="quotedValue"></param> |
| | 115 | | /// <returns>True when we have found the ending quote char = case when the quote char is equal to the escapechar |
| | 116 | | /// <exception cref="Exception"></exception> |
| | 117 | | private bool HandleEscapeChar(CustomStreamReader reader, StringBuilder quotedValue) |
| | 118 | | { |
| 1 | 119 | | if (VerifyNextCharsAreQuoteChar(reader)) |
| | 120 | | { |
| 1 | 121 | | quotedValue.Append(flags.escapeCharacter); |
| 1 | 122 | | quotedValue.Append(flags.quoteCharacter); |
| | 123 | | } |
| 1 | 124 | | else if (flags.quoteCharacter != flags.escapeCharacter.ToString()) |
| | 125 | | { |
| 0 | 126 | | quotedValue.Append(flags.escapeCharacter); |
| 0 | 127 | | int nextSymbol = reader.ReadChar(); |
| | 128 | |
|
| 0 | 129 | | if (IsEndOfFile(nextSymbol)) |
| 0 | 130 | | throw new Exception("Unexpected end of file after escape char!"); |
| | 131 | |
|
| 0 | 132 | | quotedValue.Append((char)nextSymbol); |
| | 133 | | } |
| | 134 | | else |
| | 135 | | { |
| 1 | 136 | | quotedValue.Append(flags.quoteCharacter); |
| 1 | 137 | | return true; |
| | 138 | | } |
| 1 | 139 | | return false; |
| | 140 | | } |
| | 141 | | } |
| | 142 | | } |