| | 1 | | using ValidateLib.ErrorsAndWarnings.Errors; |
| | 2 | | using ValidateLib.ErrorsAndWarnings.Errors.ValidationErrors; |
| | 3 | | using ValidateLib.Metadata.Descriptors; |
| | 4 | | using ValidateLib.Metadata.Embedded; |
| | 5 | | using ValidateLib.Results; |
| | 6 | |
|
| | 7 | | namespace ValidateLib.TableCompatibility |
| | 8 | | { |
| | 9 | | /// <summary> |
| | 10 | | /// Provides utilities for checking compatibilities of table descriptors and |
| | 11 | | /// table schemes. Needed for tabular data validation. |
| | 12 | | /// </summary> |
| | 13 | | public class CompatibilityChecker : ICompatibilityChecker |
| | 14 | | { |
| | 15 | | public int _conflictingColumnNumber; |
| | 16 | | /// <summary> |
| | 17 | | /// Implements : https://www.w3.org/TR/2015/REC-tabular-metadata-20151217/#schema-compatibility |
| | 18 | | /// </summary> |
| | 19 | | /// <param name="schema1"> first schema to be checked</param> |
| | 20 | | /// <param name="schema2"> second schema to be checked</param> |
| | 21 | | /// <returns> True if the schemes are compatible </returns> |
| | 22 | |
|
| | 23 | | public void CheckSchemesCompatible(TableGroupDescriptor tableGroup, ITableGroupValidationDetail validationDetail |
| | 24 | | { |
| 1 | 25 | | for (int i = 0; i < tableGroup.tables!._value!.Count; i++) |
| | 26 | | { |
| 1 | 27 | | var table = tableGroup.tables!._value![i]; |
| | 28 | | try |
| | 29 | | { |
| 1 | 30 | | using (var fs = table._fileWrapper!.OpenNewFileStream()) |
| | 31 | | { |
| 1 | 32 | | var emMetadataExtractor = new EmbeddedMetadataExtractor(table._flags!); |
| 1 | 33 | | var embeddedMetadata = emMetadataExtractor.extractEmbeddedMetadataTableDescriptor(fs!, table.dia |
| | 34 | |
|
| 1 | 35 | | if (!AreTableDescriptionsCompatible(table, embeddedMetadata)) |
| | 36 | | { |
| 1 | 37 | | validationDetails.TableValidationDetails[i].Errors.Add |
| 1 | 38 | | ( |
| 1 | 39 | | ErrorFactory.GetTableSchemesNotCompatibleError |
| 1 | 40 | | ( |
| 1 | 41 | | table.url!._value!, |
| 1 | 42 | | _conflictingColumnNumber |
| 1 | 43 | | ) |
| 1 | 44 | | ); |
| | 45 | | } |
| 1 | 46 | | } |
| 1 | 47 | | } |
| 1 | 48 | | catch (Exception ex) |
| | 49 | | { |
| 1 | 50 | | ErrorFactory.ThrowRemoteFileResoltuionError(table.url!._value!, ex.Message); |
| 0 | 51 | | } |
| | 52 | |
|
| | 53 | | } |
| 1 | 54 | | } |
| | 55 | | public bool AreSchemaDescriptorsCompatible(SchemaDescriptor schema1, SchemaDescriptor schema2) |
| | 56 | | { |
| 1 | 57 | | int schema1NonVirtColumnsCount = GetNumberOfNonVirutalColumns(schema1); |
| 1 | 58 | | int schema2NonVirtColumnsCount = GetNumberOfNonVirutalColumns(schema2); |
| 1 | 59 | | if (schema1NonVirtColumnsCount != schema2NonVirtColumnsCount) |
| | 60 | | { |
| 1 | 61 | | _conflictingColumnNumber = Math.Max(schema1NonVirtColumnsCount, schema2NonVirtColumnsCount); |
| 1 | 62 | | return false; |
| | 63 | | } |
| | 64 | |
|
| | 65 | |
|
| 1 | 66 | | if (schema1NonVirtColumnsCount > 0) |
| | 67 | | { |
| 1 | 68 | | for (int i = 0; i < schema1NonVirtColumnsCount; i++) |
| | 69 | | { |
| 1 | 70 | | ColumnDescriptor column1 = schema1.columns!._value![i]; |
| 1 | 71 | | ColumnDescriptor column2 = schema2.columns!._value![i]; |
| 1 | 72 | | _conflictingColumnNumber = i + 1; |
| 1 | 73 | | if (column1!._virtual!._value && !column2!._virtual!._value) |
| 0 | 74 | | return false; |
| 1 | 75 | | if (column2._virtual!._value! && !column1._virtual._value) |
| 0 | 76 | | return false; |
| 1 | 77 | | if (!AreTwoNonVirtualColumnsCompatible(schema1.columns._value[i], schema2.columns._value[i])) |
| 1 | 78 | | return false; ; |
| | 79 | | } |
| | 80 | | } |
| | 81 | |
|
| 1 | 82 | | return true; |
| | 83 | | } |
| | 84 | |
|
| | 85 | | private static bool AreTwoNonVirtualColumnsCompatible(ColumnDescriptor column1, ColumnDescriptor column2) |
| | 86 | | { |
| | 87 | | // First case |
| 1 | 88 | | if (column1.name is null && column1.titles is null) |
| 0 | 89 | | return true; |
| 1 | 90 | | if (column2.name is null && column2.titles is null) |
| 0 | 91 | | return true; |
| | 92 | |
|
| | 93 | | // Second case |
| | 94 | |
|
| 1 | 95 | | if (column1.name is not null && column2.name is not null) |
| | 96 | | { |
| 1 | 97 | | if (column1.name._value == column2.name._value) |
| 1 | 98 | | return true; |
| | 99 | | } |
| | 100 | |
|
| | 101 | | // third case |
| 1 | 102 | | return AreTheTitlesIntersecting(column1, column2); |
| | 103 | |
|
| | 104 | | // fourth case not considered because we are validating |
| | 105 | |
|
| | 106 | | } |
| | 107 | |
|
| | 108 | | private static bool AreTheTitlesIntersecting(ColumnDescriptor column1, ColumnDescriptor column2) |
| | 109 | | { |
| 1 | 110 | | if (!(column1.titles is not null && column1.titles is not null)) |
| 1 | 111 | | return false; |
| | 112 | |
|
| 1 | 113 | | Dictionary<string, string[]> titles1 = column1.titles!._value!; |
| 1 | 114 | | Dictionary<string, string[]> titles2 = column2.titles!._value!; |
| | 115 | |
|
| 1 | 116 | | foreach (var keyValuePar in titles1) |
| | 117 | | { |
| | 118 | |
|
| 1 | 119 | | if (keyValuePar.Key == "und") |
| | 120 | | { |
| 1 | 121 | | foreach (var keyValuePar2 in titles2) |
| | 122 | | { |
| 1 | 123 | | if (HasNonEmptyCaseSensitiveIntersection(keyValuePar.Value, keyValuePar2.Value)) |
| 1 | 124 | | return true; |
| | 125 | | } |
| | 126 | | } |
| | 127 | | else |
| | 128 | | { |
| | 129 | | // TODO maybe some BCP truncation? |
| 1 | 130 | | if (titles2.ContainsKey(keyValuePar.Key)) |
| | 131 | | { |
| 1 | 132 | | if (HasNonEmptyCaseSensitiveIntersection(keyValuePar.Value, titles2[keyValuePar.Key])) |
| 1 | 133 | | return true; |
| | 134 | | } |
| 1 | 135 | | if (titles2.ContainsKey("und")) |
| | 136 | | { |
| 1 | 137 | | if (HasNonEmptyCaseSensitiveIntersection(keyValuePar.Value, titles2["und"])) |
| 1 | 138 | | return true; |
| | 139 | | } |
| | 140 | |
|
| | 141 | | } |
| | 142 | | } |
| 1 | 143 | | return false; |
| 1 | 144 | | } |
| | 145 | |
|
| | 146 | | static bool HasNonEmptyCaseSensitiveIntersection(string[] array1, string[] array2) |
| | 147 | | { |
| | 148 | | // Convert both arrays to HashSet for faster lookups |
| 1 | 149 | | HashSet<string> set1 = new HashSet<string>(array1); |
| 1 | 150 | | HashSet<string> set2 = new HashSet<string>(array2); |
| | 151 | |
|
| | 152 | | // Check if there is any common element in both sets (case-sensitive) |
| 1 | 153 | | return set1.Intersect(set2, StringComparer.Ordinal).Any(); |
| | 154 | | } |
| | 155 | |
|
| | 156 | | /// <summary> |
| | 157 | | /// Checks whether two table desciptors are compatible. |
| | 158 | | /// Implements <see href="https://www.w3.org/TR/2015/REC-tabular-metadata-20151217/#table-description-compatibil |
| | 159 | | /// </summary> |
| | 160 | | /// <returns> true if they are compatible, false otherwise.</returns> |
| | 161 | | public bool AreTableDescriptionsCompatible(TableDescriptor table1, TableDescriptor table2) |
| | 162 | | { |
| 1 | 163 | | if (table1.url is null || table2.url is null) |
| 0 | 164 | | throw new ArgumentException("Required property missing!"); |
| | 165 | |
|
| 1 | 166 | | if (table1.url._value != table2.url._value) |
| 1 | 167 | | return false; |
| | 168 | |
|
| 1 | 169 | | if (table1.tableSchema is not null && table2.tableSchema is null) |
| 0 | 170 | | return true; |
| | 171 | |
|
| 1 | 172 | | if (table2.tableSchema is not null && table1.tableSchema is null) |
| 1 | 173 | | return true; |
| | 174 | |
|
| 1 | 175 | | if (table1.tableSchema is null && table2.tableSchema is null) |
| 0 | 176 | | return true; |
| | 177 | |
|
| 1 | 178 | | return AreSchemaDescriptorsCompatible(table1.tableSchema!._value!, table2.tableSchema!._value!); |
| | 179 | | } |
| | 180 | |
|
| | 181 | | public static int GetNumberOfNonVirutalColumns(SchemaDescriptor schema) |
| | 182 | | { |
| 1 | 183 | | if (schema.columns is null || schema.columns._value is null) |
| 1 | 184 | | return 0; |
| 1 | 185 | | int count = 0; |
| 1 | 186 | | foreach (var column in schema.columns._value) |
| | 187 | | { |
| 1 | 188 | | if (!column._virtual!._value) |
| 1 | 189 | | count++; |
| | 190 | | } |
| 1 | 191 | | return count; |
| | 192 | | } |
| | 193 | | } |
| | 194 | | } |