< Summary

Information
Class: ValidateLib.TabularData.Parsing.CellParser
Assembly: validatelib.dll
File(s): C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\TabularData\Parsing\CellParser.cs
Line coverage
92%
Covered lines: 73
Uncovered lines: 6
Coverable lines: 79
Total lines: 192
Line coverage: 92.4%
Branch coverage
N/A
Covered branches: 0
Total branches: 0
Branch coverage: N/A
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

File(s)

C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\TabularData\Parsing\CellParser.cs

#LineLine coverage
 1using System.Text.RegularExpressions;
 2using ValidateLib.ErrorsAndWarnings.Errors;
 3using ValidateLib.ErrorsAndWarnings.Errors.Specific;
 4using ValidateLib.ErrorsAndWarnings.Errors.ValidationErrors;
 5using ValidateLib.Metadata.Descriptors;
 6using ValidateLib.TabularData.AnnotatedTabularDataModel;
 7using ValidateLib.TabularData.Datatypes;
 8
 9namespace ValidateLib.TabularData.Parsing
 10{
 11    /// <summary>
 12    /// This class should serve the purpose of parsing string value of cell to a correct data-type. Adheres to the
 13    /// algorithm https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#parsing-cells.
 14    /// </summary>
 15    public class CellParser
 16    {
 117        static string[] CRLFTIrreplacable = new string[]
 118        {
 119            "string",
 120            "json",
 121            "xml",
 122            "html",
 123            "anyAtomicType"
 124        };
 25
 126        static string[] StringUnstripable = new string[]
 127        {
 128            "string",
 129            "json",
 130            "xml",
 131            "html",
 132            "anyAtomicType",
 133            "normalizedString"
 134        };
 35        /// <summary>
 36        /// Implements whole algorithm.
 37        /// </summary>
 38        /// <param name="cell"></param>
 39        /// <returns></returns>
 40        public static List<Error> ParseCell(Cell cell)
 41        {
 142            Column? cellsColumn = cell.column;
 143            if (cellsColumn is null)
 044                return cell.errors;
 45
 46            // steps 1 and 2 from the algorithm
 147            string normalizedStringValue = NormalizeString(cell.column!.datatype!, cell.stringValue);
 48
 49            // step 3
 150            normalizedStringValue = normalizedStringValue == "" ? cellsColumn._default : normalizedStringValue;
 51
 52
 153            if (cellsColumn.separator != null)
 54            {
 55                // step 4 and 5
 156                HandleSeparatorCase(cell, normalizedStringValue);
 57            }
 58            else
 59            {
 60                // step 6 7 8 9
 161                HandleNormalCase(cell, normalizedStringValue);
 62            }
 63
 164            return cell.errors;
 65        }
 66
 67        /// <summary>
 68        /// Step 1 and step 2 from the algorithm
 69        /// </summary>
 70        /// <param name="datatype"></param>
 71        /// <param name="stringValue"></param>
 72        /// <returns></returns>
 73        static string NormalizeString(DatatypeDescriptor datatype, string stringValue)
 74        {
 75
 176            if (!CRLFTIrreplacable.Contains(datatype._base!._value!))
 177                stringValue = ReplaceCRLFT(stringValue);
 178            if (!StringUnstripable.Contains(datatype._base!._value))
 179                stringValue = NormalizeWhitespace(stringValue);
 80
 181            return stringValue;
 82        }
 83
 84        /// <summary>
 85        /// Step 1 from the algorithm
 86        /// </summary>
 87        /// <param name="stringValue"></param>
 88        /// <returns></returns>
 89        static string ReplaceCRLFT(string stringValue)
 90        {
 191            stringValue = stringValue.Replace('\n', ' ');
 192            stringValue = stringValue.Replace('\r', ' ');
 193            stringValue = stringValue.Replace('\t', ' ');
 194            return stringValue;
 95        }
 96
 97        /// <summary>
 98        /// Step 2 from the algorithm
 99        /// </summary>
 100        /// <param name="input"></param>
 101        /// <returns></returns>
 102        static string NormalizeWhitespace(string input)
 103        {
 104            // Replace multiple consecutive whitespace characters with a single space
 1105            string normalizedString = Regex.Replace(input, @"\s+", " ");
 106
 107            // Trim leading and trailing spaces
 1108            normalizedString = normalizedString.Trim();
 109
 1110            return normalizedString;
 111        }
 112
 113        static void HandleSeparatorCase(Cell cell, string normalizedStringValue)
 114        {
 1115            Column cellsColumn = cell.column!;
 1116            cell.cellType = CellType.LIST;
 117
 1118            if (normalizedStringValue == "")
 119            {
 1120                if (cellsColumn.required)
 0121                    cell.errors.Add(ErrorFactory.GetEmptyCellInRequiredColumnVE(cell));
 122            }
 123            else
 124            {
 1125                if (cellsColumn._null.Contains(normalizedStringValue))
 126                {
 0127                    cell.cellType = CellType.NULL;
 128                }
 129
 1130                string[] stringValues = normalizedStringValue.Split(cellsColumn.separator);
 131
 1132                if (cellsColumn.datatype!._base!._value != "string" &&
 1133                    cellsColumn.datatype._base._value != "anyAtomicType")
 1134                    foreach (var stringVal in stringValues)
 135                    {
 1136                        stringVal.Trim();
 1137                        HandleNormalCase(cell, stringVal, true);
 138                    }
 139            }
 1140        }
 141        /// <summary>
 142        /// Handles steps 6-9 from algorithm
 143        /// </summary>
 144        /// <param name="cell"></param>
 145        /// <param name="normalizedString"></param>
 146        /// <param name="addToList"> Used to parametrize the method for reuse in the step 5.4 </param>
 147        static void HandleNormalCase(Cell cell, string normalizedString, bool addToList = false)
 148        {
 1149            Column cellsColumn = cell.column!;
 1150            if (normalizedString == "")
 1151                normalizedString = cellsColumn._default;
 1152            if (cellsColumn._null.Contains(normalizedString))
 153            {
 1154                if (!addToList)
 1155                    cell.cellType = CellType.NULL;
 1156                if (cellsColumn.separator is null && cellsColumn.required)
 1157                    cell.errors.Add(ErrorFactory.GetEmptyCellInRequiredColumnVE(cell));
 158            }
 159            else
 160            {
 161                try
 162                {
 1163                    var datatypeDescriptor = cellsColumn.datatype;
 1164                    var parsedCellValue = DatatypeFactory.GetDatatype(
 1165                            normalizedString,
 1166                            datatypeDescriptor._base!._value!,
 1167                            datatypeDescriptor.format
 1168                            );
 1169                    if (addToList)
 1170                        cell.cellValues.Add(parsedCellValue);
 171                    else
 1172                        cell.value = parsedCellValue;
 173
 174
 1175                }
 1176                catch (DatatypeValidationError error)
 177                {
 1178                    error.Cell = cell;
 1179                    cell.errors.Add(error);
 1180                    cell.value = DatatypeFactory.GetDatatype(normalizedString, "string", null);
 1181                }
 0182                catch (Error error)
 183                {
 0184                    cell.errors.Add(error);
 0185                }
 186            }
 187
 1188        }
 189    }
 190
 191
 192}