| | 1 | | using System.Text; |
| | 2 | |
|
| | 3 | | namespace ValidateLib.TabularData.Parsing |
| | 4 | | { |
| | 5 | | /// <summary> |
| | 6 | | /// Reads one row from the CSV file with specific flags. |
| | 7 | | /// Implements algorithm: https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-read-a-row |
| | 8 | | /// </summary> |
| | 9 | | public class RowReader : Reader |
| | 10 | | { |
| | 11 | | private QuotedCharReader quotedCharReader; |
| 1 | 12 | | public RowReader(Flags flags) : base(flags) |
| | 13 | | { |
| 1 | 14 | | quotedCharReader = new QuotedCharReader(flags); |
| 1 | 15 | | } |
| | 16 | |
|
| | 17 | | public string? ReadRow(CustomStreamReader reader) |
| | 18 | | { |
| 1 | 19 | | StringBuilder rowContent = new StringBuilder(); |
| | 20 | |
|
| | 21 | | while (true) |
| | 22 | | { |
| 1 | 23 | | int nextChar = reader.ReadChar(); |
| 1 | 24 | | if (IsEndOfFile(nextChar)) |
| | 25 | | { |
| 1 | 26 | | if (rowContent.ToString() == "") |
| 1 | 27 | | return null; |
| 1 | 28 | | return rowContent.ToString(); |
| | 29 | | } |
| | 30 | |
|
| | 31 | |
|
| 1 | 32 | | char currentChar = (char)nextChar; |
| 1 | 33 | | if (ProcessChar(reader, rowContent, currentChar)) |
| 1 | 34 | | return rowContent.ToString(); |
| | 35 | |
|
| | 36 | | } |
| | 37 | | } |
| | 38 | |
|
| | 39 | | /// <summary> |
| | 40 | | /// Processes one char for the read row method. |
| | 41 | | /// </summary> |
| | 42 | | /// <param name="reader"></param> |
| | 43 | | /// <param name="rowContent"></param> |
| | 44 | | /// <param name="currentChar"></param> |
| | 45 | | /// <returns>True if the line has already been read</returns> |
| | 46 | | bool ProcessChar(CustomStreamReader reader, StringBuilder rowContent, char currentChar) |
| | 47 | | { |
| 1 | 48 | | if (currentChar == flags.escapeCharacter) |
| | 49 | | { |
| 1 | 50 | | return HandleEscapeChar(reader, rowContent); |
| | 51 | | } |
| 1 | 52 | | else if (currentChar == flags.quoteCharacter[0]) |
| | 53 | | { |
| 1 | 54 | | return HandlePotentialQuoteChar(reader, rowContent, currentChar); |
| | 55 | | } |
| | 56 | | else |
| | 57 | | { |
| 1 | 58 | | return HandleNewlineOrDefault(reader, rowContent, currentChar); |
| | 59 | | } |
| | 60 | | } |
| | 61 | | /// <summary> |
| | 62 | | /// Handles case when we read a char that is the same as the first char of quoteChar string. |
| | 63 | | /// </summary> |
| | 64 | | /// <param name="reader"></param> |
| | 65 | | /// <param name="rowContent"></param> |
| | 66 | | /// <param name="currentChar"></param> |
| | 67 | | /// <returns> Returns always false as we cannot read unescaped newline character hear</returns> |
| | 68 | | private bool HandlePotentialQuoteChar(CustomStreamReader reader, StringBuilder rowContent, char currentChar) |
| | 69 | | { |
| | 70 | | // Case when the quoteChar is of length one so it has alrady been matched. |
| 1 | 71 | | if (quoteCharacterLength == 1) |
| | 72 | | { |
| 0 | 73 | | rowContent.Append(quotedCharReader.ReadQuotedValue(reader)); |
| 0 | 74 | | return false; |
| | 75 | | } |
| | 76 | |
|
| | 77 | | // Otherwise proceed to more complex method to determine whether a quote char of length > 1 has been detecte |
| 1 | 78 | | reader.MoveBack(1); |
| 1 | 79 | | if (quotedCharReader.VerifyNextCharsAreQuoteChar(reader)) |
| 1 | 80 | | rowContent.Append(quotedCharReader.ReadQuotedValue(reader)); |
| | 81 | | else |
| | 82 | | { |
| 0 | 83 | | reader.ReadChar(); |
| 0 | 84 | | rowContent.Append(currentChar); |
| | 85 | | } |
| 1 | 86 | | return false; |
| | 87 | | } |
| | 88 | |
|
| | 89 | | /// <summary> |
| | 90 | | /// Handles cases of newline or default not special character. |
| | 91 | | /// </summary> |
| | 92 | | /// <param name="reader"></param> |
| | 93 | | /// <param name="rowContent"></param> |
| | 94 | | /// <param name="currentChar"></param> |
| | 95 | | /// <returns>True if unescaped newline character has been read</returns> |
| | 96 | | private bool HandleNewlineOrDefault(CustomStreamReader reader, StringBuilder rowContent, char currentChar) |
| | 97 | | { |
| 1 | 98 | | reader.MoveBack(1); |
| 1 | 99 | | if (IsNextCharsLineTerminator(reader)) |
| | 100 | | { |
| 1 | 101 | | return true; |
| | 102 | | } |
| | 103 | | else |
| | 104 | | { |
| 1 | 105 | | reader.ReadChar(); |
| 1 | 106 | | rowContent.Append(currentChar); |
| | 107 | | } |
| 1 | 108 | | return false; |
| | 109 | | } |
| | 110 | |
|
| | 111 | | /// <summary> |
| | 112 | | /// Escape char was already detected. Need to process more chars for special cases 2.1 and 2.2 from algorithm |
| | 113 | | /// https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-read-a-row |
| | 114 | | /// </summary> |
| | 115 | | /// <param name="reader"></param> |
| | 116 | | /// <param name="dialect"></param> |
| | 117 | | /// <param name="rowContent"></param> |
| | 118 | | /// <returns> Returns always false as we cannot read unescaped newline character hear</returns> |
| | 119 | | private bool HandleEscapeChar(CustomStreamReader reader, StringBuilder rowContent) |
| | 120 | | { |
| 1 | 121 | | if (quotedCharReader.VerifyNextCharsAreQuoteChar(reader)) |
| | 122 | | { |
| 0 | 123 | | rowContent.Append(flags.escapeCharacter); |
| 0 | 124 | | rowContent.Append(flags.quoteCharacter); |
| | 125 | | } |
| 1 | 126 | | else if (flags.quoteCharacter != flags.escapeCharacter.ToString()) |
| | 127 | | { |
| 0 | 128 | | rowContent.Append(flags.escapeCharacter); |
| 0 | 129 | | int nextSymbol = reader.ReadChar(); |
| | 130 | |
|
| 0 | 131 | | if (IsEndOfFile(nextSymbol)) |
| 0 | 132 | | throw new Exception("Unexpected end of file after escape char!"); |
| | 133 | |
|
| 0 | 134 | | rowContent.Append((char)nextSymbol); |
| | 135 | | } |
| | 136 | | else |
| | 137 | | { |
| 1 | 138 | | rowContent.Append(quotedCharReader.ReadQuotedValue(reader)); |
| | 139 | | } |
| | 140 | |
|
| 1 | 141 | | return false; |
| | 142 | | } |
| | 143 | | } |
| | 144 | | } |