| | | 1 | | using Newtonsoft.Json.Linq; |
| | | 2 | | using ValidateLib.Metadata.Embedded; |
| | | 3 | | using ValidateLib.TabularData.AnnotatedTabularDataModel; |
| | | 4 | | using ValidateLib.TabularData.Parsing; |
| | | 5 | | |
| | | 6 | | namespace ValidateLib.TabularData.Validation |
| | | 7 | | { |
| | | 8 | | public class TabularDataCreator : EmbeddedMetadataExtractor |
| | | 9 | | { |
| | 1 | 10 | | Table table { get; set; } |
| | | 11 | | string? firstRecordRowContent = null; |
| | 1 | 12 | | int rowNumber = 1; |
| | 1 | 13 | | int sourceColumnNumber = 1; |
| | 1 | 14 | | public int rowsProcessed { get; set; } |
| | 1 | 15 | | public int cellsProcessed { get; set; } |
| | 1 | 16 | | public TabularDataCreator(Flags flags, Table table) : base(flags) |
| | | 17 | | { |
| | 1 | 18 | | _rowParser = new RowParser(flags); |
| | 1 | 19 | | _rowReader = new RowReader(flags); |
| | 1 | 20 | | this.table = table; |
| | | 21 | | |
| | 1 | 22 | | } |
| | | 23 | | public void HandleHeaderAndSkipRows(CustomStreamReader csr) |
| | | 24 | | { |
| | 1 | 25 | | _csr = csr; |
| | | 26 | | |
| | 1 | 27 | | string content = "{\r\n \"@context\": \"http://www.w3.org/ns/csvw\",\r\n \"rdfs:comment\": [],\r\n \"tabl |
| | 1 | 28 | | JObject embeddedMetadata = JObject.Parse(content); |
| | | 29 | | |
| | 1 | 30 | | _sourceRowNumber = 1; // point 4 |
| | 1 | 31 | | HandleEncoding(); // point 5 |
| | 1 | 32 | | HandleSkipRows(embeddedMetadata); |
| | 1 | 33 | | HandleHeader(embeddedMetadata); |
| | | 34 | | |
| | 1 | 35 | | firstRecordRowContent = _rowReader!.ReadRow(csr); |
| | 1 | 36 | | if (firstRecordRowContent is not null) |
| | 1 | 37 | | HandleZeroHeaderRowCount(embeddedMetadata, firstRecordRowContent); |
| | | 38 | | |
| | 1 | 39 | | } |
| | | 40 | | |
| | | 41 | | public Row? CreateRow(CustomStreamReader csr, RowReader rowReader) |
| | | 42 | | { |
| | 1 | 43 | | Row? row = null; |
| | | 44 | | |
| | | 45 | | // 10.1 |
| | 1 | 46 | | sourceColumnNumber = 1; |
| | | 47 | | |
| | | 48 | | // 10.2 |
| | | 49 | | // not a first round we needed to read to process the header |
| | | 50 | | string? rowContent; |
| | 1 | 51 | | if (firstRecordRowContent is null) |
| | 1 | 52 | | rowContent = rowReader.ReadRow(csr!); |
| | | 53 | | else |
| | | 54 | | { |
| | 1 | 55 | | rowContent = firstRecordRowContent; |
| | 1 | 56 | | firstRecordRowContent = null; |
| | | 57 | | } |
| | | 58 | | |
| | | 59 | | // there is nothing left to read |
| | 1 | 60 | | if (rowContent is null) |
| | 1 | 61 | | return null; |
| | | 62 | | |
| | | 63 | | // 10.3 - we do not do anything, just skip 10.4 |
| | 1 | 64 | | if (!IsComment(rowContent)) |
| | | 65 | | { |
| | | 66 | | |
| | | 67 | | |
| | 1 | 68 | | row = new Row(table, rowNumber, _sourceRowNumber); |
| | | 69 | | |
| | | 70 | | // 10.4 |
| | 1 | 71 | | List<string> cellValues = _rowParser!.ParseRow(rowContent, row); |
| | | 72 | | |
| | 1 | 73 | | cellsProcessed += cellValues.Count; |
| | | 74 | | |
| | | 75 | | // if 10.4.1 skip the row and do nothing |
| | 1 | 76 | | if (!ShouldSkipTheRow(cellValues)) |
| | | 77 | | { |
| | | 78 | | // 10.4.2 |
| | | 79 | | // row = new Row(table, rowNumber, sourceRowNumber); |
| | | 80 | | // 10.4.3 - skipped |
| | | 81 | | |
| | | 82 | | // 10.4.4 |
| | 1 | 83 | | HandleSkipColumns(cellValues); |
| | | 84 | | |
| | 1 | 85 | | Column? column = null; |
| | | 86 | | // 10.4.5.1 |
| | | 87 | | // watch out that at the algorithm the i starts at 1 |
| | 1 | 88 | | for (int i = 0; i < cellValues.Count; i++) |
| | | 89 | | { |
| | | 90 | | |
| | 1 | 91 | | if (table.columns.Count >= i + 1) |
| | | 92 | | { |
| | | 93 | | // 10.4.5.1 |
| | 1 | 94 | | column = table.columns[i]; |
| | | 95 | | } |
| | | 96 | | |
| | | 97 | | // 10.4.5.2 |
| | 1 | 98 | | Cell cell = new Cell(table, column, cellValues[i], row); |
| | | 99 | | |
| | | 100 | | // 10.4.5.4 |
| | 1 | 101 | | row.cells.Add(cell); |
| | | 102 | | |
| | | 103 | | // 10.4.5.5 |
| | 1 | 104 | | sourceColumnNumber++; |
| | | 105 | | |
| | | 106 | | } |
| | | 107 | | } |
| | | 108 | | } |
| | | 109 | | |
| | | 110 | | // 10.5 |
| | 1 | 111 | | _sourceRowNumber++; |
| | | 112 | | |
| | 1 | 113 | | if (row is not null) |
| | 1 | 114 | | rowsProcessed++; |
| | 1 | 115 | | return row; |
| | | 116 | | } |
| | | 117 | | |
| | | 118 | | protected void HandleSkipColumns(List<string> cellValues) |
| | | 119 | | { |
| | 1 | 120 | | RemoveSkipColumns(cellValues); |
| | 1 | 121 | | sourceColumnNumber += flags.skipColumns; |
| | 1 | 122 | | } |
| | | 123 | | static protected bool AreAllStringsEmpty(List<string> stringList) |
| | 0 | 124 | | => stringList.All(string.IsNullOrEmpty); |
| | | 125 | | |
| | | 126 | | protected bool ShouldSkipTheRow(List<string> cellValues) |
| | | 127 | | { |
| | 1 | 128 | | return flags.skipBlankRows && AreAllStringsEmpty(cellValues); |
| | | 129 | | } |
| | | 130 | | } |
| | | 131 | | } |