| | 1 | | namespace ValidateLib.TabularData.Parsing |
| | 2 | | { |
| | 3 | | /// <summary> |
| | 4 | | /// Defines methods for more advanced readers for working with the CSV |
| | 5 | | /// files. |
| | 6 | | /// </summary> |
| | 7 | | public abstract class Reader |
| | 8 | | { |
| | 9 | | protected Flags flags; |
| 1 | 10 | | protected int maximalLineTerminatorLength = 1; |
| 1 | 11 | | protected int quoteCharacterLength = 1; |
| | 12 | | protected int bufferSize; |
| | 13 | | protected bool CRLF_ending = false; |
| | 14 | | protected bool LF_ending = false; |
| 1 | 15 | | protected byte CR_LF_length = 2; |
| 1 | 16 | | protected byte LF_length = 1; |
| | 17 | | protected bool unusualLineTerminator = false; |
| | 18 | |
|
| 1 | 19 | | public Reader(Flags flags) |
| | 20 | | { |
| 1 | 21 | | this.flags = flags; |
| 1 | 22 | | maximalLineTerminatorLength = flags.lineTerminators.OrderByDescending(s => s.Length).First().Length; |
| 1 | 23 | | this.flags.lineTerminators = flags.lineTerminators.OrderBy(s => s.Length).ToList(); |
| 1 | 24 | | quoteCharacterLength = flags.quoteCharacter.Length; |
| 1 | 25 | | bufferSize = Math.Max(ReaderConstants.BUFFER_MINIMAL_SIZE, Math.Max(maximalLineTerminatorLength, quoteCharac |
| 1 | 26 | | if (flags.lineTerminators.Contains("\r\n")) |
| | 27 | | { |
| 1 | 28 | | CRLF_ending = true; |
| | 29 | | } |
| 1 | 30 | | if (flags.lineTerminators.Contains("\n")) |
| | 31 | | { |
| 1 | 32 | | LF_ending = true; |
| | 33 | | } |
| | 34 | |
|
| 1 | 35 | | foreach (var lineTerminator in flags.lineTerminators) |
| | 36 | | { |
| 1 | 37 | | if (lineTerminator != "\r\n" && lineTerminator != "\n") |
| | 38 | | { |
| 1 | 39 | | unusualLineTerminator = true; |
| | 40 | | } |
| | 41 | | } |
| | 42 | |
|
| 1 | 43 | | } |
| 1 | 44 | | public int MaximalLineTerminatorLength { get { return maximalLineTerminatorLength; } } |
| | 45 | |
|
| | 46 | | /// <summary> |
| | 47 | | /// Checks whether next chars in a stream reader are newline strings (can be multiple chars) |
| | 48 | | /// </summary> |
| | 49 | | /// <param name="reader"></param> |
| | 50 | | /// <returns>True when the next string is newline string</returns> |
| | 51 | | protected bool IsNextCharsLineTerminator(CustomStreamReader reader) |
| | 52 | | { |
| 1 | 53 | | char[] buffer = new char[maximalLineTerminatorLength]; |
| 1 | 54 | | int bytesRead = reader.ReadChars(maximalLineTerminatorLength, buffer); |
| | 55 | |
|
| 1 | 56 | | if (bytesRead == 0) |
| | 57 | | { |
| | 58 | | // End of stream reached |
| 0 | 59 | | return false; |
| | 60 | | } |
| | 61 | |
|
| | 62 | |
|
| 1 | 63 | | if (CRLF_ending && bytesRead >= 2) |
| | 64 | | { |
| 1 | 65 | | if (buffer[0] == '\r' && buffer[1] == '\n') |
| | 66 | | { |
| | 67 | | // Match found |
| 1 | 68 | | reader.MoveBack(bytesRead - CR_LF_length); |
| 1 | 69 | | return true; |
| | 70 | | } |
| | 71 | | } |
| | 72 | |
|
| 1 | 73 | | if (LF_ending && bytesRead >= 1) |
| | 74 | | { |
| 1 | 75 | | if (buffer[0] == '\n') |
| | 76 | | { |
| | 77 | | // Match found |
| 1 | 78 | | reader.MoveBack(bytesRead - LF_length); |
| 1 | 79 | | return true; |
| | 80 | | } |
| | 81 | | } |
| | 82 | |
|
| 1 | 83 | | if (unusualLineTerminator) |
| | 84 | | { |
| 1 | 85 | | string nextChars = new string(buffer, 0, bytesRead); |
| | 86 | |
|
| | 87 | | // Check if the next characters match any of the line terminators |
| 1 | 88 | | foreach (string terminator in flags.lineTerminators) |
| | 89 | | { |
| 1 | 90 | | if (nextChars.StartsWith(terminator)) |
| | 91 | | { |
| | 92 | | // Match found |
| 1 | 93 | | reader.MoveBack(bytesRead - terminator.Length); |
| 1 | 94 | | return true; |
| | 95 | | } |
| | 96 | | } |
| | 97 | | } |
| | 98 | |
|
| 1 | 99 | | reader.MoveBack(bytesRead); |
| 1 | 100 | | return false; |
| 1 | 101 | | } |
| | 102 | |
|
| 1 | 103 | | protected bool IsEndOfFile(int currentChar) => currentChar == ReaderConstants.END_OF_STREAM_CONSTANT; |
| | 104 | | } |
| | 105 | | } |