< Summary

Information
Class: ValidateLib.TabularData.Parsing.RowParser
Assembly: validatelib.dll
File(s): C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\TabularData\Parsing\RowParser.cs
Line coverage
89%
Covered lines: 84
Uncovered lines: 10
Coverable lines: 94
Total lines: 213
Line coverage: 89.3%
Branch coverage
N/A
Covered branches: 0
Total branches: 0
Branch coverage: N/A
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

File(s)

C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\TabularData\Parsing\RowParser.cs

#LineLine coverage
 1using System.Text;
 2using ValidateLib.ErrorsAndWarnings.Errors;
 3using ValidateLib.TabularData.AnnotatedTabularDataModel;
 4
 5namespace ValidateLib.TabularData.Parsing
 6{
 7    record class Params
 8    {
 9        public string rowContent { get; set; }
 10        public int position { get; set; }
 11    }
 12    /// <summary>
 13    /// Parses one row from the CSV file and returns fields contained in this row.
 14    /// Implements this algorithm: https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#dfn-parse-a-row
 15    /// </summary>
 16    public class RowParser : Reader
 17    {
 118        List<string> cellValues = new List<string>();
 119        StringBuilder currentCellValue = new StringBuilder();
 120        string trimmedCellValue = "";
 121        protected bool defaultQuoteCase = true;
 22        bool quoted = false;
 23        Row? _row;
 24        bool charOutsideQuotesAdded = false;
 25
 126        public RowParser(Flags flags) : base(flags)
 27        {
 128            if (flags.quoteCharacter != "\"")
 29            {
 030                defaultQuoteCase = false;
 31            }
 132        }
 33        public List<string> ParseRow(string row, Row? annotatedRow = null)
 34        {
 135            _row = annotatedRow;
 136            cellValues = new List<string>();
 137            currentCellValue = new StringBuilder();
 138            quoted = false;
 39
 140            Params parameters = new Params()
 141            {
 142                rowContent = row,
 143                position = 0
 144            };
 45
 146            while (isPositionInBound(parameters))
 47            {
 148                ProcessChar(parameters);
 49            }
 150            AddCurrentCell();
 151            return cellValues;
 52
 53
 54        }
 155        bool isPositionInBound(string row, int position) => position < row.Length;
 156        bool isPositionInBound(Params parameters) => parameters.position < parameters.rowContent.Length;
 057        string getSubStringStartingFromPosition(Params parameters) => parameters.rowContent.Substring(parameters.positio
 158        private char GetCharAtPosition(Params parameters) => parameters.rowContent[parameters.position];
 59        private bool IsQuote(Params parameters)
 60        {
 161            if (defaultQuoteCase)
 62            {
 163                return parameters.rowContent[parameters.position] == '\"';
 64            }
 065            return getSubStringStartingFromPosition(parameters).StartsWith(flags.quoteCharacter);
 66        }
 67        private bool IsDelimiter(Params parameters)
 68        {
 169            int positionInDelimiter = 0;
 170            int positionInParams = parameters.position;
 171            while (positionInParams < parameters.rowContent.Length && positionInDelimiter < flags.delimiter.Length)
 72            {
 173                if (flags.delimiter[positionInDelimiter] != parameters.rowContent[positionInParams])
 174                    return false;
 175                positionInDelimiter++;
 176                positionInParams++;
 77            }
 178            return true;
 79        }
 80
 81        bool ProcessChar(Params parameters)
 82        {
 183            char currentChar = parameters.rowContent[parameters.position];
 184            if (currentChar == flags.escapeCharacter)
 85            {
 186                HandleEscapeChar(parameters);
 87            }
 188            else if (IsQuote(parameters))
 89            {
 090                HandleQuoteChar(parameters);
 91            }
 192            else if (IsDelimiter(parameters))
 93            {
 194                HandleDelimiter(parameters);
 95            }
 96            else
 97            {
 198                currentCellValue.Append(currentChar);
 199                parameters.position++;
 100            }
 1101            return false;
 102        }
 103
 104        private void HandleDelimiter(Params parameters)
 105        {
 1106            if (quoted)
 107            {
 1108                currentCellValue.Append(GetCharAtPosition(parameters));
 1109                parameters.position++;
 110            }
 111            else
 112            {
 1113                AddCurrentCell();
 1114                currentCellValue = new StringBuilder();
 1115                parameters.position++;
 116            }
 1117        }
 118
 119        private void HandleEscapeChar(Params parameters)
 120        {
 1121            if (IsEscapeFollowedByQuote(parameters.rowContent, parameters.position))
 122            {
 1123                currentCellValue.Append(flags.quoteCharacter);
 1124                parameters.position += 2;
 125            }
 1126            else if (flags.quoteCharacter != flags.escapeCharacter.ToString())
 127            {
 1128                if (isPositionInBound(parameters.rowContent, parameters.position + 1))
 129                {
 1130                    char charToAppend = parameters.rowContent[parameters.position + 1];
 1131                    currentCellValue.Append(charToAppend);
 132                }
 1133                parameters.position += 2;
 134
 135            }
 136            else
 137            {
 1138                HandleQuoteChar(parameters);
 139            }
 1140        }
 141
 142        private void HandleQuoteChar(Params parameters)
 143        {
 1144            if (!quoted)
 145            {
 1146                quoted = true;
 147
 148
 1149                if (currentCellValue.Length > 0 && currentCellValue.ToString() != flags.quoteCharacter)
 150                {
 0151                    if (!charOutsideQuotesAdded)
 152                    {
 0153                        _row?.errors?.Add(ErrorFactory.GetCharOutsideQuotesVE(_row.table.url, _row.sourceNumber, cellVal
 0154                        charOutsideQuotesAdded = true;
 155                    }
 156                }
 157
 158
 1159                parameters.position++;
 160            }
 161            else
 162            {
 1163                quoted = false;
 1164                parameters.position++;
 165
 1166                if (isPositionInBound(parameters)
 1167                    && !IsDelimiter(parameters)
 1168                    && !IsQuote(parameters))
 169                {
 0170                    if (!charOutsideQuotesAdded)
 171                    {
 0172                        _row?.errors?.Add(ErrorFactory.GetCharOutsideQuotesVE(_row.table.url, _row.sourceNumber, cellVal
 0173                        charOutsideQuotesAdded = true;
 174                    }
 175                }
 176
 177
 178            }
 179
 1180        }
 181        private bool IsEscapeFollowedByQuote(string rowContent, int position)
 182        {
 1183            string escapeFollowedByQuote = flags.escapeCharacter.ToString() + flags.quoteCharacter;
 1184            if (position >= 0 && position + escapeFollowedByQuote.Length <= rowContent.Length)
 185            {
 1186                string extractedSubstring = rowContent.Substring(position, escapeFollowedByQuote.Length);
 1187                return extractedSubstring.Equals(escapeFollowedByQuote);
 188            }
 189
 1190            return false;
 191        }
 192
 193        private void ConditionallyTrimCellValue()
 194        {
 1195            trimmedCellValue = currentCellValue.ToString();
 1196            if (flags.trim == TrimOptions.START || flags.trim == TrimOptions.TRUE)
 197            {
 1198                trimmedCellValue = trimmedCellValue.TrimStart();
 199            }
 1200            if (flags.trim == TrimOptions.END || flags.trim == TrimOptions.TRUE)
 201            {
 1202                trimmedCellValue = trimmedCellValue.TrimEnd();
 203            }
 1204        }
 205
 206        private void AddCurrentCell()
 207        {
 1208            charOutsideQuotesAdded = false;
 1209            ConditionallyTrimCellValue();
 1210            cellValues.Add(trimmedCellValue);
 1211        }
 212    }
 213}