| | | 1 | | using Newtonsoft.Json.Linq; |
| | | 2 | | using ValidateLib.ErrorsAndWarnings.Warnings; |
| | | 3 | | using ValidateLib.IRINormalization; |
| | | 4 | | using ValidateLib.Metadata.Descriptors; |
| | | 5 | | using ValidateLib.UtilityClasses; |
| | | 6 | | |
| | | 7 | | namespace ValidateLib.Metadata.MetdataLocation |
| | | 8 | | { |
| | | 9 | | /// <summary> |
| | | 10 | | /// Locates metadata according to the algorithm specified at: |
| | | 11 | | /// https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#locating-metadata |
| | | 12 | | /// </summary> |
| | | 13 | | public class MetadataLocator : ILinkLocationTest |
| | | 14 | | { |
| | 1 | 15 | | static string DEFAULT_NAME = "csv-metadata.json"; |
| | 1 | 16 | | static string DEFAULT_SUFFIX = "-metadata.json"; |
| | 1 | 17 | | public List<Warning> warnings = new List<Warning>(); |
| | | 18 | | |
| | | 19 | | /// <summary> |
| | | 20 | | /// Locates metadata for remote file. |
| | | 21 | | /// </summary> |
| | | 22 | | /// <param name="fileURL"> Url of remote file we want to located metadata for. </param> |
| | | 23 | | /// <returns> List of possible metadata files. </returns> |
| | | 24 | | public async Task<string?> LocateMetadataForRemoteFileAsync(string fileURL) |
| | | 25 | | { |
| | | 26 | | try |
| | | 27 | | { |
| | 0 | 28 | | return await LocateMetadataForRemoteFileUnsafeAsync(fileURL); |
| | | 29 | | } |
| | 0 | 30 | | catch (Exception) |
| | | 31 | | { |
| | 0 | 32 | | return null; |
| | | 33 | | } |
| | | 34 | | |
| | 0 | 35 | | } |
| | | 36 | | |
| | | 37 | | async Task<string?> LocateMetadataForRemoteFileUnsafeAsync(string fileURL) |
| | | 38 | | { |
| | | 39 | | |
| | 0 | 40 | | string normalizedFileURL = IRINormalizator.NormalizeIri(fileURL); |
| | 0 | 41 | | string baseURL = IRINormalizator.GetBaseUrl(normalizedFileURL); |
| | | 42 | | |
| | 0 | 43 | | string? metadataUrl = await GetMetadataLocationFromLinkHeaderAsync(normalizedFileURL, baseURL); |
| | 0 | 44 | | if (metadataUrl is not null) |
| | 0 | 45 | | return metadataUrl; |
| | | 46 | | |
| | 0 | 47 | | metadataUrl = GetMetadataLocationFromSiteWideConfig(normalizedFileURL, baseURL); |
| | 0 | 48 | | return metadataUrl; |
| | 0 | 49 | | } |
| | | 50 | | |
| | | 51 | | string? GetMetadataLocationFromSiteWideConfig(string normalizedFileURL, string baseURL) |
| | | 52 | | { |
| | 0 | 53 | | List<string> possibleLocations = SiteWideLocator.GetMetadataLocationFromSiteWideConfigAsync(normalizedFileUR |
| | | 54 | | |
| | 0 | 55 | | foreach (var possibleLocation in possibleLocations) |
| | | 56 | | { |
| | 0 | 57 | | if (DoesMetadataFileReferenceTheTabularDataFile(normalizedFileURL, possibleLocation, warnings)) |
| | 0 | 58 | | return possibleLocation; |
| | | 59 | | |
| | | 60 | | } |
| | 0 | 61 | | return null; |
| | 0 | 62 | | } |
| | | 63 | | |
| | | 64 | | async Task<string?> GetMetadataLocationFromLinkHeaderAsync(string normalizedFileURL, string baseURL) |
| | | 65 | | { |
| | 0 | 66 | | string? metadataURL = null; |
| | 0 | 67 | | using (HttpClient httpClient = new HttpClient()) |
| | | 68 | | { |
| | | 69 | | // Create a HttpRequestMessage with HttpMethod.Head to not fully download the file just get the link hea |
| | 0 | 70 | | HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Head, normalizedFileURL); |
| | | 71 | | |
| | | 72 | | // Send the request |
| | 0 | 73 | | HttpResponseMessage response = await httpClient.SendAsync(request); |
| | 0 | 74 | | var possibleLocations = LinkHeaderProcessor.GetMetadataLocationsFromLinkHeader(response); |
| | 0 | 75 | | foreach (var possibleLocation in possibleLocations) |
| | | 76 | | { |
| | 0 | 77 | | string absoluteNormalizedMetadataUrl = IRINormalizator.TurnUrlIntoAbsoluteWithBase(baseURL, possible |
| | 0 | 78 | | if (DoesMetadataFileReferenceTheTabularDataFile(normalizedFileURL, absoluteNormalizedMetadataUrl, wa |
| | 0 | 79 | | metadataURL = absoluteNormalizedMetadataUrl; |
| | | 80 | | } |
| | 0 | 81 | | return metadataURL; |
| | | 82 | | |
| | | 83 | | } |
| | | 84 | | |
| | 0 | 85 | | } |
| | | 86 | | /// <summary> |
| | | 87 | | /// Tries the locate the metadata file for local tabular data file. |
| | | 88 | | /// </summary> |
| | | 89 | | /// <param name="filePath">Path to local tabular data file</param> |
| | | 90 | | /// <returns> File of the metadata file or null if the metadata file could not be found </returns> |
| | | 91 | | public string? LocateMetadataForLocalFile(string filePath) |
| | | 92 | | { |
| | 1 | 93 | | string? pathWithDefaulSuffix = filePath + DEFAULT_SUFFIX; |
| | 1 | 94 | | pathWithDefaulSuffix = IriUtilityClass.GetMetadataLocation(pathWithDefaulSuffix); |
| | 1 | 95 | | var defaultSuffixUri = new Uri(pathWithDefaulSuffix!); |
| | 1 | 96 | | pathWithDefaulSuffix = defaultSuffixUri.LocalPath; |
| | 1 | 97 | | if (pathWithDefaulSuffix is not null && File.Exists(pathWithDefaulSuffix)) |
| | | 98 | | { |
| | 1 | 99 | | if (DoesMetadataFileReferenceTheTabularDataFile(filePath, pathWithDefaulSuffix, warnings)) |
| | 1 | 100 | | return pathWithDefaulSuffix; |
| | | 101 | | } |
| | | 102 | | |
| | | 103 | | |
| | 1 | 104 | | string? pathWithDefaultName = ReplaceFileName(filePath, DEFAULT_NAME); |
| | 1 | 105 | | pathWithDefaultName = IriUtilityClass.GetMetadataLocation(pathWithDefaultName); |
| | 1 | 106 | | if (pathWithDefaultName is not null) |
| | | 107 | | { |
| | 1 | 108 | | var defaultNameUri = new Uri(pathWithDefaultName); |
| | 1 | 109 | | pathWithDefaultName = defaultNameUri.LocalPath; |
| | | 110 | | } |
| | 1 | 111 | | if (pathWithDefaultName is not null && File.Exists(pathWithDefaultName)) |
| | | 112 | | { |
| | 1 | 113 | | if (DoesMetadataFileReferenceTheTabularDataFile(filePath, pathWithDefaultName, warnings)) |
| | 1 | 114 | | return pathWithDefaultName; |
| | | 115 | | } |
| | 1 | 116 | | return null; |
| | | 117 | | } |
| | | 118 | | |
| | | 119 | | static bool DoesMetadataFileReferenceTheTabularDataFile(string tabularDataIRI, string metadataIRI, List<Warning> |
| | | 120 | | { |
| | 1 | 121 | | var metadataLocation = IriUtilityClass.GetMetadataLocation(metadataIRI); |
| | 1 | 122 | | if (metadataLocation is null) return false; |
| | 1 | 123 | | JObject? jObject = ObjectPropertyUtilityClass.GetDescriptor(metadataIRI); |
| | 1 | 124 | | if (jObject is null) return false; |
| | 1 | 125 | | var context = Context.GetContextFromJToken(jObject, new List<Warning>(), metadataLocation); |
| | | 126 | | |
| | 1 | 127 | | if (JsonUtilityClass.IsObjectTableGroupDescriptor(jObject)) |
| | | 128 | | { |
| | 1 | 129 | | if (DoesTheTableGroupReferenceTabularFile(tabularDataIRI, jObject, context)) |
| | | 130 | | { |
| | 1 | 131 | | return true; |
| | | 132 | | } |
| | | 133 | | else |
| | | 134 | | { |
| | 1 | 135 | | if (warnings is not null) |
| | | 136 | | { |
| | 1 | 137 | | warnings.Add(WarningFactory.GetFoundMetadataFileNotDescribingTabularFileWarning(metadataIRI)); |
| | | 138 | | } |
| | 1 | 139 | | return false; |
| | | 140 | | } |
| | | 141 | | |
| | | 142 | | } |
| | 1 | 143 | | else if (JsonUtilityClass.IsObjectTableDescriptor(jObject)) |
| | | 144 | | { |
| | 1 | 145 | | TableDescriptor.Normalize(jObject, context); |
| | 1 | 146 | | if (DoesTableReferenceTheTabularDataFIle(tabularDataIRI, jObject)) |
| | | 147 | | { |
| | 1 | 148 | | return true; |
| | | 149 | | } |
| | | 150 | | else |
| | | 151 | | { |
| | 0 | 152 | | if (warnings is not null) |
| | | 153 | | { |
| | 0 | 154 | | warnings.Add(WarningFactory.GetFoundMetadataFileNotDescribingTabularFileWarning(metadataIRI)); |
| | | 155 | | } |
| | 0 | 156 | | return false; |
| | | 157 | | } |
| | | 158 | | } |
| | | 159 | | |
| | 0 | 160 | | return false; |
| | | 161 | | } |
| | | 162 | | |
| | | 163 | | static bool DoesTheTableGroupReferenceTabularFile(string tabularDataIRI, JObject tableGroupObject, Context conte |
| | | 164 | | { |
| | 1 | 165 | | TableGroupDescriptor.Normalize(tableGroupObject, context); |
| | 1 | 166 | | JArray tablesArray = (JArray)tableGroupObject["tables"]!; |
| | 1 | 167 | | foreach (var table in tablesArray) |
| | | 168 | | { |
| | 1 | 169 | | JObject tableObj = (JObject)table; |
| | 1 | 170 | | if (table.Type == JTokenType.Object && tableObj.ContainsKey("url")) |
| | | 171 | | { |
| | 1 | 172 | | if (DoesTableReferenceTheTabularDataFIle(tabularDataIRI, tableObj)) |
| | 1 | 173 | | return true; |
| | | 174 | | } |
| | | 175 | | } |
| | 1 | 176 | | return false; |
| | 1 | 177 | | } |
| | | 178 | | static bool DoesTableReferenceTheTabularDataFIle(string tabularDataIRI, JObject tableObject) |
| | | 179 | | { |
| | 1 | 180 | | if (tableObject["url"]?.Type == JTokenType.String) |
| | | 181 | | { |
| | 1 | 182 | | return tableObject["url"]!.ToString() == tabularDataIRI; |
| | | 183 | | } |
| | 0 | 184 | | return false; |
| | | 185 | | } |
| | | 186 | | static string ReplaceFileName(string originalPath, string newFileName) |
| | | 187 | | { |
| | | 188 | | // Get the directory of the original path |
| | 1 | 189 | | string? directory = Path.GetDirectoryName(originalPath); |
| | | 190 | | |
| | 1 | 191 | | if (directory is null) return newFileName; |
| | | 192 | | |
| | | 193 | | // Combine the directory and the new file name to get the new path |
| | 1 | 194 | | string newPath = Path.Combine(directory, newFileName); |
| | 1 | 195 | | return newPath; |
| | | 196 | | } |
| | | 197 | | |
| | | 198 | | /// <summary> |
| | | 199 | | /// This is only used for integration tests. Should never be used otherwise. Following methods also. |
| | | 200 | | /// </summary> |
| | | 201 | | /// <param name="fileURL"></param> |
| | | 202 | | /// <param name="mockupMessage"></param> |
| | | 203 | | /// <returns></returns> |
| | | 204 | | /// <exception cref="NotImplementedException"></exception> |
| | | 205 | | string? ILinkLocationTest.LocateMetadataForRemoteFile(string fileURL, HttpResponseMessage mockupMessage) |
| | | 206 | | { |
| | | 207 | | try |
| | | 208 | | { |
| | 0 | 209 | | return ((ILinkLocationTest)this).LocateMetadataForRemoteFileUnsafe(fileURL, mockupMessage); |
| | | 210 | | } |
| | 0 | 211 | | catch (Exception) |
| | | 212 | | { |
| | 0 | 213 | | return null; |
| | | 214 | | } |
| | 0 | 215 | | } |
| | | 216 | | |
| | | 217 | | string? ILinkLocationTest.LocateMetadataForRemoteFileUnsafe(string fileURL, HttpResponseMessage mockupMessage) |
| | | 218 | | { |
| | 0 | 219 | | string normalizedFileURL = IRINormalizator.NormalizeIri(fileURL); |
| | 0 | 220 | | string baseURL = IRINormalizator.GetBaseUrl(normalizedFileURL); |
| | | 221 | | |
| | 0 | 222 | | string? metadataUrl = ((ILinkLocationTest)this).GetMetadataLocationFromLinkHeader(normalizedFileURL, baseURL |
| | 0 | 223 | | if (metadataUrl is not null) |
| | 0 | 224 | | return metadataUrl; |
| | | 225 | | |
| | 0 | 226 | | metadataUrl = GetMetadataLocationFromSiteWideConfig(normalizedFileURL, baseURL); |
| | 0 | 227 | | return metadataUrl; |
| | | 228 | | } |
| | | 229 | | |
| | | 230 | | string? ILinkLocationTest.GetMetadataLocationFromLinkHeader(string normalizedFileURL, string baseURL, HttpRespon |
| | | 231 | | { |
| | 0 | 232 | | string? metadataURL = null; |
| | 0 | 233 | | var possibleLocations = LinkHeaderProcessor.GetMetadataLocationsFromLinkHeader(mockupMessage); |
| | 0 | 234 | | foreach (var possibleLocation in possibleLocations) |
| | | 235 | | { |
| | 0 | 236 | | string absoluteNormalizedMetadataUrl = IRINormalizator.TurnUrlIntoAbsoluteWithBase(baseURL, possibleLoca |
| | 0 | 237 | | if (DoesMetadataFileReferenceTheTabularDataFile(normalizedFileURL, absoluteNormalizedMetadataUrl, warnin |
| | 0 | 238 | | metadataURL = absoluteNormalizedMetadataUrl; |
| | | 239 | | } |
| | 0 | 240 | | return metadataURL; |
| | | 241 | | |
| | | 242 | | |
| | | 243 | | } |
| | | 244 | | } |
| | | 245 | | } |