< Summary

Information
Class: ValidateLib.TabularData.Parsing.Params
Assembly: validatelib.dll
File(s): C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\TabularData\Parsing\RowParser.cs
Line coverage
100%
Covered lines: 2
Uncovered lines: 0
Coverable lines: 2
Total lines: 213
Line coverage: 100%
Branch coverage
N/A
Covered branches: 0
Total branches: 0
Branch coverage: N/A
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

File(s)

C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\TabularData\Parsing\RowParser.cs

#LineLine coverage
 1using System.Text;
 2using ValidateLib.ErrorsAndWarnings.Errors;
 3using ValidateLib.TabularData.AnnotatedTabularDataModel;
 4
 5namespace ValidateLib.TabularData.Parsing
 6{
 7    record class Params
 8    {
 19        public string rowContent { get; set; }
 110        public int position { get; set; }
 11    }
 12    /// <summary>
 13    /// Parses one row from the CSV file and returns fields contained in this row.
 14    /// Implements this algorithm: https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-parse-a-row
 15    /// </summary>
 16    public class RowParser : Reader
 17    {
 18        List<string> cellValues = new List<string>();
 19        StringBuilder currentCellValue = new StringBuilder();
 20        string trimmedCellValue = "";
 21        protected bool defaultQuoteCase = true;
 22        bool quoted = false;
 23        Row? _row;
 24        bool charOutsideQuotesAdded = false;
 25
 26        public RowParser(Flags flags) : base(flags)
 27        {
 28            if (flags.quoteCharacter != "\"")
 29            {
 30                defaultQuoteCase = false;
 31            }
 32        }
 33        public List<string> ParseRow(string row, Row? annotatedRow = null)
 34        {
 35            _row = annotatedRow;
 36            cellValues = new List<string>();
 37            currentCellValue = new StringBuilder();
 38            quoted = false;
 39
 40            Params parameters = new Params()
 41            {
 42                rowContent = row,
 43                position = 0
 44            };
 45
 46            while (isPositionInBound(parameters))
 47            {
 48                ProcessChar(parameters);
 49            }
 50            AddCurrentCell();
 51            return cellValues;
 52
 53
 54        }
 55        bool isPositionInBound(string row, int position) => position < row.Length;
 56        bool isPositionInBound(Params parameters) => parameters.position < parameters.rowContent.Length;
 57        string getSubStringStartingFromPosition(Params parameters) => parameters.rowContent.Substring(parameters.positio
 58        private char GetCharAtPosition(Params parameters) => parameters.rowContent[parameters.position];
 59        private bool IsQuote(Params parameters)
 60        {
 61            if (defaultQuoteCase)
 62            {
 63                return parameters.rowContent[parameters.position] == '\"';
 64            }
 65            return getSubStringStartingFromPosition(parameters).StartsWith(flags.quoteCharacter);
 66        }
 67        private bool IsDelimiter(Params parameters)
 68        {
 69            int positionInDelimiter = 0;
 70            int positionInParams = parameters.position;
 71            while (positionInParams < parameters.rowContent.Length && positionInDelimiter < flags.delimiter.Length)
 72            {
 73                if (flags.delimiter[positionInDelimiter] != parameters.rowContent[positionInParams])
 74                    return false;
 75                positionInDelimiter++;
 76                positionInParams++;
 77            }
 78            return true;
 79        }
 80
 81        bool ProcessChar(Params parameters)
 82        {
 83            char currentChar = parameters.rowContent[parameters.position];
 84            if (currentChar == flags.escapeCharacter)
 85            {
 86                HandleEscapeChar(parameters);
 87            }
 88            else if (IsQuote(parameters))
 89            {
 90                HandleQuoteChar(parameters);
 91            }
 92            else if (IsDelimiter(parameters))
 93            {
 94                HandleDelimiter(parameters);
 95            }
 96            else
 97            {
 98                currentCellValue.Append(currentChar);
 99                parameters.position++;
 100            }
 101            return false;
 102        }
 103
 104        private void HandleDelimiter(Params parameters)
 105        {
 106            if (quoted)
 107            {
 108                currentCellValue.Append(GetCharAtPosition(parameters));
 109                parameters.position++;
 110            }
 111            else
 112            {
 113                AddCurrentCell();
 114                currentCellValue = new StringBuilder();
 115                parameters.position++;
 116            }
 117        }
 118
 119        private void HandleEscapeChar(Params parameters)
 120        {
 121            if (IsEscapeFollowedByQuote(parameters.rowContent, parameters.position))
 122            {
 123                currentCellValue.Append(flags.quoteCharacter);
 124                parameters.position += 2;
 125            }
 126            else if (flags.quoteCharacter != flags.escapeCharacter.ToString())
 127            {
 128                if (isPositionInBound(parameters.rowContent, parameters.position + 1))
 129                {
 130                    char charToAppend = parameters.rowContent[parameters.position + 1];
 131                    currentCellValue.Append(charToAppend);
 132                }
 133                parameters.position += 2;
 134
 135            }
 136            else
 137            {
 138                HandleQuoteChar(parameters);
 139            }
 140        }
 141
 142        private void HandleQuoteChar(Params parameters)
 143        {
 144            if (!quoted)
 145            {
 146                quoted = true;
 147
 148
 149                if (currentCellValue.Length > 0 && currentCellValue.ToString() != flags.quoteCharacter)
 150                {
 151                    if (!charOutsideQuotesAdded)
 152                    {
 153                        _row?.errors?.Add(ErrorFactory.GetCharOutsideQuotesVE(_row.table.url, _row.sourceNumber, cellVal
 154                        charOutsideQuotesAdded = true;
 155                    }
 156                }
 157
 158
 159                parameters.position++;
 160            }
 161            else
 162            {
 163                quoted = false;
 164                parameters.position++;
 165
 166                if (isPositionInBound(parameters)
 167                    && !IsDelimiter(parameters)
 168                    && !IsQuote(parameters))
 169                {
 170                    if (!charOutsideQuotesAdded)
 171                    {
 172                        _row?.errors?.Add(ErrorFactory.GetCharOutsideQuotesVE(_row.table.url, _row.sourceNumber, cellVal
 173                        charOutsideQuotesAdded = true;
 174                    }
 175                }
 176
 177
 178            }
 179
 180        }
 181        private bool IsEscapeFollowedByQuote(string rowContent, int position)
 182        {
 183            string escapeFollowedByQuote = flags.escapeCharacter.ToString() + flags.quoteCharacter;
 184            if (position >= 0 && position + escapeFollowedByQuote.Length <= rowContent.Length)
 185            {
 186                string extractedSubstring = rowContent.Substring(position, escapeFollowedByQuote.Length);
 187                return extractedSubstring.Equals(escapeFollowedByQuote);
 188            }
 189
 190            return false;
 191        }
 192
 193        private void ConditionallyTrimCellValue()
 194        {
 195            trimmedCellValue = currentCellValue.ToString();
 196            if (flags.trim == TrimOptions.START || flags.trim == TrimOptions.TRUE)
 197            {
 198                trimmedCellValue = trimmedCellValue.TrimStart();
 199            }
 200            if (flags.trim == TrimOptions.END || flags.trim == TrimOptions.TRUE)
 201            {
 202                trimmedCellValue = trimmedCellValue.TrimEnd();
 203            }
 204        }
 205
 206        private void AddCurrentCell()
 207        {
 208            charOutsideQuotesAdded = false;
 209            ConditionallyTrimCellValue();
 210            cellValues.Add(trimmedCellValue);
 211        }
 212    }
 213}