| | 1 | | using Newtonsoft.Json.Linq; |
| | 2 | | using ValidateLib.Metadata.Embedded; |
| | 3 | | using ValidateLib.TabularData.AnnotatedTabularDataModel; |
| | 4 | | using ValidateLib.TabularData.Parsing; |
| | 5 | |
|
| | 6 | | namespace ValidateLib.TabularData.Validation |
| | 7 | | { |
| | 8 | | public class TabularDataCreator : EmbeddedMetadataExtractor |
| | 9 | | { |
| 1 | 10 | | Table table { get; set; } |
| | 11 | | string? firstRecordRowContent = null; |
| 1 | 12 | | int rowNumber = 1; |
| 1 | 13 | | int sourceColumnNumber = 1; |
| 1 | 14 | | public int rowsProcessed { get; set; } |
| 1 | 15 | | public int cellsProcessed { get; set; } |
| 1 | 16 | | public TabularDataCreator(Flags flags, Table table) : base(flags) |
| | 17 | | { |
| 1 | 18 | | _rowParser = new RowParser(flags); |
| 1 | 19 | | _rowReader = new RowReader(flags); |
| 1 | 20 | | this.table = table; |
| | 21 | |
|
| 1 | 22 | | } |
| | 23 | | public void HandleHeaderAndSkipRows(CustomStreamReader csr) |
| | 24 | | { |
| 1 | 25 | | _csr = csr; |
| | 26 | |
|
| 1 | 27 | | string content = "{\r\n \"@context\": \"http://www.w3.org/ns/csvw\",\r\n \"rdfs:comment\": [],\r\n \"tabl |
| 1 | 28 | | JObject embeddedMetadata = JObject.Parse(content); |
| | 29 | |
|
| 1 | 30 | | _sourceRowNumber = 1; // point 4 |
| 1 | 31 | | HandleEncoding(); // point 5 |
| 1 | 32 | | HandleSkipRows(embeddedMetadata); |
| 1 | 33 | | HandleHeader(embeddedMetadata); |
| | 34 | |
|
| 1 | 35 | | firstRecordRowContent = _rowReader!.ReadRow(csr); |
| 1 | 36 | | if (firstRecordRowContent is not null) |
| 1 | 37 | | HandleZeroHeaderRowCount(embeddedMetadata, firstRecordRowContent); |
| | 38 | |
|
| 1 | 39 | | } |
| | 40 | |
|
| | 41 | | public Row? CreateRow(CustomStreamReader csr, RowReader rowReader) |
| | 42 | | { |
| 1 | 43 | | Row? row = null; |
| | 44 | |
|
| | 45 | | // 10.1 |
| 1 | 46 | | sourceColumnNumber = 1; |
| | 47 | |
|
| | 48 | | // 10.2 |
| | 49 | | // not a first round we needed to read to process the header |
| | 50 | | string? rowContent; |
| 1 | 51 | | if (firstRecordRowContent is null) |
| 1 | 52 | | rowContent = rowReader.ReadRow(csr!); |
| | 53 | | else |
| | 54 | | { |
| 1 | 55 | | rowContent = firstRecordRowContent; |
| 1 | 56 | | firstRecordRowContent = null; |
| | 57 | | } |
| | 58 | |
|
| | 59 | | // there is nothing left to read |
| 1 | 60 | | if (rowContent is null) |
| 1 | 61 | | return null; |
| | 62 | |
|
| | 63 | | // 10.3 - we do not do anything, just skip 10.4 |
| 1 | 64 | | if (!IsComment(rowContent)) |
| | 65 | | { |
| | 66 | |
|
| | 67 | |
|
| 1 | 68 | | row = new Row(table, rowNumber, _sourceRowNumber); |
| | 69 | |
|
| | 70 | | // 10.4 |
| 1 | 71 | | List<string> cellValues = _rowParser!.ParseRow(rowContent, row); |
| | 72 | |
|
| 1 | 73 | | cellsProcessed += cellValues.Count; |
| | 74 | |
|
| | 75 | | // if 10.4.1 skip the row and do nothing |
| 1 | 76 | | if (!ShouldSkipTheRow(cellValues)) |
| | 77 | | { |
| | 78 | | // 10.4.2 |
| | 79 | | // row = new Row(table, rowNumber, sourceRowNumber); |
| | 80 | | // 10.4.3 - skipped |
| | 81 | |
|
| | 82 | | // 10.4.4 |
| 1 | 83 | | HandleSkipColumns(cellValues); |
| | 84 | |
|
| 1 | 85 | | Column? column = null; |
| | 86 | | // 10.4.5.1 |
| | 87 | | // watch out that at the algorithm the i starts at 1 |
| 1 | 88 | | for (int i = 0; i < cellValues.Count; i++) |
| | 89 | | { |
| | 90 | |
|
| 1 | 91 | | if (table.columns.Count >= i + 1) |
| | 92 | | { |
| | 93 | | // 10.4.5.1 |
| 1 | 94 | | column = table.columns[i]; |
| | 95 | | } |
| | 96 | |
|
| | 97 | | // 10.4.5.2 |
| 1 | 98 | | Cell cell = new Cell(table, column, cellValues[i], row); |
| | 99 | |
|
| | 100 | | // 10.4.5.4 |
| 1 | 101 | | row.cells.Add(cell); |
| | 102 | |
|
| | 103 | | // 10.4.5.5 |
| 1 | 104 | | sourceColumnNumber++; |
| | 105 | |
|
| | 106 | | } |
| | 107 | | } |
| | 108 | | } |
| | 109 | |
|
| | 110 | | // 10.5 |
| 1 | 111 | | _sourceRowNumber++; |
| | 112 | |
|
| 1 | 113 | | if (row is not null) |
| 1 | 114 | | rowsProcessed++; |
| 1 | 115 | | return row; |
| | 116 | | } |
| | 117 | |
|
| | 118 | | protected void HandleSkipColumns(List<string> cellValues) |
| | 119 | | { |
| 1 | 120 | | RemoveSkipColumns(cellValues); |
| 1 | 121 | | sourceColumnNumber += flags.skipColumns; |
| 1 | 122 | | } |
| | 123 | | static protected bool AreAllStringsEmpty(List<string> stringList) |
| 0 | 124 | | => stringList.All(string.IsNullOrEmpty); |
| | 125 | |
|
| | 126 | | protected bool ShouldSkipTheRow(List<string> cellValues) |
| | 127 | | { |
| 1 | 128 | | return flags.skipBlankRows && AreAllStringsEmpty(cellValues); |
| | 129 | | } |
| | 130 | | } |
| | 131 | | } |