| | 1 | | using ValidateLib.Encoding.Bom; |
| | 2 | | using ValidateLib.ErrorsAndWarnings.Errors; |
| | 3 | | using ValidateLib.ErrorsAndWarnings.Warnings; |
| | 4 | | using ValidateLib.Metadata.Descriptors; |
| | 5 | | using ValidateLib.Results; |
| | 6 | | using ValidateLib.TabularData.AnnotatedTabularDataModel; |
| | 7 | | using ValidateLib.TabularData.Parsing; |
| | 8 | | using ValidateLib.TabularData.Validation.ValidationRules; |
| | 9 | |
|
| | 10 | | namespace ValidateLib.TabularData.Validation |
| | 11 | | { |
| | 12 | | /// <summary> |
| | 13 | | /// validates tabular data files based on the metadata extracted either from |
| | 14 | | /// metadata file or from embedded metadata. |
| | 15 | | /// Uses classes that parses csv file. |
| | 16 | | /// </summary> |
| | 17 | | public class TabularDataTableValidator |
| | 18 | | { |
| | 19 | | const int BUFFER_SIZE = 10000; |
| | 20 | | const int MINIMAL_CHARS_COUNT = 50; |
| | 21 | | TableDescriptor _tableDescriptor; |
| | 22 | | private int _numberOfForeingKeys = 0; |
| 1 | 23 | | protected List<ICellValidationRule> cellValidationRules { get; set; } = new List<ICellValidationRule>(); |
| 1 | 24 | | protected List<IRowValidationRule> rowValidationRules { get; set; } = new List<IRowValidationRule>(); |
| 1 | 25 | | public TabularDataTableValidator(TableDescriptor tableDescriptor, List<ICellValidationRule>? cellRules = null, L |
| | 26 | | { |
| | 27 | | // defualt rules set by use |
| 1 | 28 | | cellValidationRules.AddRange(GetDefaultCellValidationRules()); |
| 1 | 29 | | rowValidationRules.AddRange(GetDefaultRowValidationRules()); |
| | 30 | |
|
| | 31 | | // rules that the user can provide, maybe not the cleanest solution? TODO think about this |
| 1 | 32 | | if (cellRules != null) |
| 0 | 33 | | cellValidationRules.AddRange(cellRules); |
| 1 | 34 | | if (rowRules != null) |
| 1 | 35 | | rowValidationRules.AddRange(rowRules); |
| 1 | 36 | | this._tableDescriptor = tableDescriptor; |
| | 37 | |
|
| 1 | 38 | | if (tableDescriptor.tableSchema is not null && |
| 1 | 39 | | tableDescriptor.tableSchema._value is not null && |
| 1 | 40 | | tableDescriptor.tableSchema._value.foreignKeys is not null) |
| | 41 | | { |
| 1 | 42 | | _numberOfForeingKeys = tableDescriptor.tableSchema._value.foreignKeys._value!.Count; |
| | 43 | | } |
| | 44 | |
|
| | 45 | |
|
| 1 | 46 | | } |
| | 47 | | private List<ICellValidationRule> GetDefaultCellValidationRules() |
| | 48 | | { |
| 1 | 49 | | List<ICellValidationRule> rules = new List<ICellValidationRule>(); |
| 1 | 50 | | rules.Add(new CellDatatypeValidationRule()); |
| 1 | 51 | | rules.Add(new CellLengthValidationRule()); |
| 1 | 52 | | rules.Add(new CellValueConstraintValidationRule()); |
| | 53 | |
|
| 1 | 54 | | return rules; |
| | 55 | | } |
| | 56 | |
|
| | 57 | | private List<IRowValidationRule> GetDefaultRowValidationRules() |
| | 58 | | { |
| 1 | 59 | | List<IRowValidationRule> rules = new List<IRowValidationRule>(); |
| 1 | 60 | | rules.Add(new PrimaryKeyRowValidationRule()); |
| 1 | 61 | | rules.Add(new NumberOfColumnsRowValidationRule()); |
| | 62 | |
|
| 1 | 63 | | return rules; |
| | 64 | |
|
| | 65 | | } |
| 1 | 66 | | private int GetMinimalCharsCount(Reader reader, Flags flags) => Math.Max(MINIMAL_CHARS_COUNT, Math.Max(reader.Ma |
| | 67 | |
|
| | 68 | | /// <summary> |
| | 69 | | /// We need separate method for the TableGroup because we need to annotate the columns which are |
| | 70 | | /// referenced from other schemes so we would remember the values during the parsing. I. e. set the |
| | 71 | | /// correct flags for the Column objects inside the table. |
| | 72 | | /// </summary> |
| | 73 | | /// <param name="errors"></param> |
| | 74 | | /// <param name="table"> annotated table with referenced columns flags set correctly</param> |
| | 75 | | public void ValidateTable(ITableValidationDetail validationDetail, Table table) |
| | 76 | | { |
| | 77 | | try |
| | 78 | | { |
| 1 | 79 | | ValidateTableUnsafe(validationDetail, table); |
| 1 | 80 | | } |
| 0 | 81 | | catch (Error e) |
| | 82 | | { |
| 0 | 83 | | validationDetail.Errors.Add(e); |
| 0 | 84 | | } |
| 1 | 85 | | catch (Exception) |
| | 86 | | { |
| 1 | 87 | | validationDetail.Errors.Add(ErrorFactory.GetUnknownError()); |
| 1 | 88 | | } |
| | 89 | |
|
| | 90 | |
|
| 1 | 91 | | } |
| | 92 | | void ValidateTableUnsafe(ITableValidationDetail validationDetail, Table table) |
| | 93 | | { |
| 1 | 94 | | RowReader rowReader = new RowReader(_tableDescriptor._flags!); |
| 1 | 95 | | TabularDataCreator tabularDataCreator = new TabularDataCreator(_tableDescriptor._flags!, table); |
| 1 | 96 | | TabularDataAnnotator tabularDataAnottator = new TabularDataAnnotator(_tableDescriptor._flags!); |
| | 97 | |
|
| 1 | 98 | | int minimalCharsCount = GetMinimalCharsCount(rowReader, _tableDescriptor._flags!); |
| 1 | 99 | | using (var fs = _tableDescriptor._fileWrapper!.OpenNewFileStream()) |
| | 100 | | { |
| 1 | 101 | | ValidateTableWithWrapper(fs, minimalCharsCount, table, tabularDataAnottator, tabularDataCreator, rowRead |
| 1 | 102 | | } |
| 1 | 103 | | } |
| | 104 | |
|
| | 105 | | void ValidateTableWithWrapper( |
| | 106 | | FileStream fs, |
| | 107 | | int minimalCharsCount, |
| | 108 | | Table table, |
| | 109 | | TabularDataAnnotator tabularDataAnottator, |
| | 110 | | TabularDataCreator tabularDataCreator, |
| | 111 | | RowReader rowReader, |
| | 112 | | ITableValidationDetail validationDetail, |
| | 113 | | bool tableGroup = false |
| | 114 | | ) |
| | 115 | | { |
| | 116 | |
|
| 1 | 117 | | if (BomDetector.ContainsUtf8Bom(fs)) |
| | 118 | | { |
| 0 | 119 | | validationDetail.Warnings.Add(WarningFactory.GetBomPresentWarning(validationDetail.TableIRI)); |
| | 120 | | } |
| | 121 | |
|
| 1 | 122 | | CustomStreamReader csr = new CustomStreamReader(fs, BUFFER_SIZE, minimalCharsCount); |
| | 123 | |
|
| 1 | 124 | | tabularDataCreator.HandleHeaderAndSkipRows(csr); |
| 1 | 125 | | tabularDataAnottator.AnnotateTable(table, _tableDescriptor); |
| 1 | 126 | | if (!tableGroup) |
| 0 | 127 | | tabularDataAnottator.CreateAnnotatedColumns(table, _tableDescriptor); |
| | 128 | |
|
| 1 | 129 | | Row? row = tabularDataCreator.CreateRow(csr, rowReader); |
| | 130 | |
|
| 1 | 131 | | while (row is not null) |
| | 132 | | { |
| 1 | 133 | | tabularDataAnottator.AnnotateRow(row, _tableDescriptor); |
| 1 | 134 | | validationDetail.Errors.AddRange(row.errors); |
| 1 | 135 | | validationDetail.Errors.AddRange(ValidateRow(row)); |
| 1 | 136 | | row = tabularDataCreator.CreateRow(csr, rowReader); |
| | 137 | | } |
| | 138 | |
|
| 1 | 139 | | validationDetail.RowsProcessed = tabularDataCreator.rowsProcessed; |
| 1 | 140 | | validationDetail.CellsProcessed = tabularDataCreator.cellsProcessed; |
| 1 | 141 | | validationDetail.ColumnsProcessed = table.columns.Count; |
| | 142 | |
|
| 1 | 143 | | } |
| | 144 | | public List<Error> ValidateRow(Row annotatedRow) |
| | 145 | | { |
| 1 | 146 | | List<Error> errors = new List<Error>(); |
| | 147 | |
|
| 1 | 148 | | foreach (var rowRule in rowValidationRules) |
| | 149 | | { |
| 1 | 150 | | errors.AddRange(rowRule.ValidateRow(annotatedRow)); |
| | 151 | | } |
| | 152 | |
|
| 1 | 153 | | foreach (var cell in annotatedRow.cells) |
| | 154 | | { |
| 1 | 155 | | errors.AddRange(ValidateCell(cell)); |
| | 156 | | } |
| 1 | 157 | | return errors; |
| | 158 | |
|
| | 159 | | } |
| | 160 | |
|
| | 161 | | private List<Error> ValidateCell(Cell annotatedCell) |
| | 162 | | { |
| 1 | 163 | | foreach (var rule in cellValidationRules) |
| | 164 | | { |
| 1 | 165 | | rule.ValidateCell(annotatedCell); |
| | 166 | | } |
| 1 | 167 | | return annotatedCell.errors; |
| | 168 | | } |
| | 169 | | } |
| | 170 | | } |