< Summary

Information
Class: ValidateLib.Metadata.Embedded.EmbeddedMetadataExtractor
Assembly: validatelib.dll
File(s): C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\Metadata\Embedded\EmbeddedMetadataExtractor.cs
Line coverage
71%
Covered lines: 46
Uncovered lines: 18
Coverable lines: 64
Total lines: 190
Line coverage: 71.8%
Branch coverage
N/A
Covered branches: 0
Total branches: 0
Branch coverage: N/A
Method coverage

Feature is only available for sponsors

Upgrade to PRO version

Metrics

File(s)

C:\skola_karlovka\RP\code\csv-validator\CSV_Validator\ValidateLib\Metadata\Embedded\EmbeddedMetadataExtractor.cs

#LineLine coverage
 1using Newtonsoft.Json.Linq;
 2using ValidateLib.ErrorsAndWarnings.Warnings;
 3using ValidateLib.Metadata.Descriptors;
 4using ValidateLib.Metadata.ParsingAndValidation;
 5using ValidateLib.TabularData.Parsing;
 6
 7namespace ValidateLib.Metadata.Embedded
 8{
 9    /// <summary>
 10    /// Extracts embedded metadata based on the
 11    /// <see href="https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#parsing">algorithm</see>
 12    /// </summary>
 13    public class EmbeddedMetadataExtractor : Reader
 14    {
 115        protected int _sourceRowNumber = 1;
 16        protected CustomStreamReader? _csr;
 17        protected RowReader? _rowReader;
 18        protected RowParser? _rowParser;
 119        public EmbeddedMetadataExtractor(Flags flags) : base(flags)
 20        {
 21
 122        }
 23        /// <summary>
 24        /// extracts embedded metadata from file stream.
 25        /// </summary>
 26        /// <param name="fileIRI"> normalized file path, used as URL for embedded metadata</param>
 27        /// <param name="dialect"> dialect to be used for extraction </param>
 28        /// <param name="fs"> file stream to extract the metadata from</param>
 29        /// <returns></returns>
 30        public JObject extractEmbeddedMetadata(FileStream fs, DialectDescriptor dialect, string fileIRI)
 31        {
 32            // point 2 in https://www.w3.org/TR/2015/REC-tabular-data-model-20151217/#parsing
 33            // creating default embedded metadata object
 134            string content = "{\r\n  \"@context\": \"http://www.w3.org/ns/csvw\",\r\n  \"rdfs:comment\": [],\r\n  \"tabl
 135            JObject embeddedMetadata = JObject.Parse(content);
 136            embeddedMetadata["url"] = fileIRI; // required property, needed for table description compatibility check
 37
 38            try
 39            {
 140                if (fs is not null)
 41                {
 42
 143                    _csr = new CustomStreamReader(fs, bufferSize, maximalLineTerminatorLength);
 144                    _rowReader = new RowReader(flags);
 145                    _rowParser = new RowParser(flags);
 146                    _sourceRowNumber = 1;    // point 4
 147                    HandleEncoding();   // point 5
 148                    HandleSkipRows(embeddedMetadata);
 149                    HandleHeader(embeddedMetadata);
 50
 51                }
 152            }
 53            finally
 54            {
 155                if (fs is not null)
 156                    fs.Dispose();
 157            }
 58
 59
 160            return embeddedMetadata;
 61        }
 62
 63        /// <summary>
 64        /// Extracts embedded table descriptor from the file.
 65        /// </summary>
 66        /// <param name="fs"> filestream to the file from which we extract embedded metadata </param>
 67        /// <param name="dialect"> dialect based on what we parse tabular data file </param>
 68        /// <param name="fileIRI"> file iri</param>
 69        /// <returns></returns>
 70        public TableDescriptor extractEmbeddedMetadataTableDescriptor(FileStream fs, DialectDescriptor dialect, string f
 71        {
 72
 173            JObject embeddedMetadata = extractEmbeddedMetadata(fs, dialect, fileIRI);
 174            List<Warning> warnings = new List<Warning>();
 75
 176            var tableDescriptor = MetadataParserValidator.ProcessTableFromJObject(warnings, embeddedMetadata, fileIRI);
 177            return tableDescriptor;
 78
 79        }
 80
 81        protected void HandleEncoding()
 82        {
 83            // TODO
 184        }
 85
 86        protected void HandleZeroHeaderRowCount(JObject embeddedMetadata, string firstRecordRowContent)
 87        {
 188            if (flags.headerRowCount == 0)
 89            {
 090                JArray columnsArray = (JArray)embeddedMetadata["tableSchema"]!["columns"]!;
 091                List<string> firstRecordRowColumns = _rowParser!.ParseRow(firstRecordRowContent);
 092                firstRecordRowColumns.RemoveRange(0, flags.skipColumns);
 093                for (int i = 0; i < firstRecordRowColumns.Count - flags.skipColumns; i++)
 94                {
 095                    AddNewColumn(columnsArray);
 96                }
 97
 98            }
 199        }
 100        // Point 6 in algorithm
 101        protected void HandleSkipRows(JObject embeddedMetadata)
 102        {
 1103            for (int i = 0; i < flags.skipRows; i++)
 104            {
 0105                string rowContent = _rowReader!.ReadRow(_csr!);
 0106                JArray comments = (JArray)embeddedMetadata["rdfs:comment"]!;
 0107                if (flags.commentPrefix != null && rowContent.StartsWith(flags.commentPrefix))
 108                {
 0109                    string rowContentWithoutCommentPrefix = rowContent.Substring(flags.commentPrefix.Length);
 0110                    comments.Add(rowContentWithoutCommentPrefix);
 111                }
 112                else
 113                {
 0114                    if (rowContent != "")
 115                    {
 0116                        comments.Add(rowContent);
 117                    }
 118                }
 0119                _sourceRowNumber++;
 120            }
 1121        }
 122
 123        // Point 7 in algorithm
 124        protected void HandleHeader(JObject embeddedMetadata)
 125        {
 126
 1127            for (int i = 0; i < flags.headerRowCount; i++)
 128            {
 1129                string rowContent = _rowReader!.ReadRow(_csr!)!;
 1130                JArray comments = (JArray)embeddedMetadata["rdfs:comment"]!;
 131                // point 7.2
 1132                if (IsComment(rowContent))
 133                {
 0134                    string rowContentWithoutCommentPrefix = rowContent.Substring(flags.commentPrefix.Length);
 0135                    comments.Add(rowContentWithoutCommentPrefix);
 136                }
 137                // point 7.3
 138                else
 139                {
 1140                    RowParser rowParser = new RowParser(flags);
 1141                    List<string> cellValues = rowParser.ParseRow(rowContent);
 1142                    HandleColumnsFromOneRowOfHeader(cellValues, embeddedMetadata);
 143                }
 144            }
 145
 1146            _sourceRowNumber++;
 1147        }
 148        /// <summary>
 149        /// Check whether the string is comment string.
 150        /// </summary>
 151        /// <param name="rowContent"> content of the row.</param>
 152        /// <returns> true if it is comment </returns>
 153        protected bool IsComment(string rowContent)
 154        {
 1155            return flags.commentPrefix != null && rowContent.StartsWith(flags.commentPrefix);
 156        }
 157
 158        protected void HandleColumnsFromOneRowOfHeader(List<string> cellValues, JObject embeddedMetadata)
 159        {
 1160            RemoveSkipColumns(cellValues);
 1161            JArray columnsArray = (JArray)embeddedMetadata["tableSchema"]!["columns"]!;
 162
 1163            for (int i = 0; i < cellValues.Count; i++)
 164            {
 165                // we do not want to use this anymore as we allow empty header names or all whitespace names
 166                /*
 167                if (cellValues[i] == "" || cellValues[i].All(char.IsWhiteSpace))
 168                    continue;
 169                */
 1170                if (columnsArray.Count <= i)
 171                {
 1172                    AddNewColumnWithTitle(columnsArray, cellValues[i]);
 173                }
 174                else
 175                {
 0176                    AppendNewTitleToColumnDescriptor(columnsArray, cellValues[i], i);
 177                }
 178            }
 1179        }
 180        /// <summary>
 181        /// Removes number of skip columns cell values from the row cells as they
 182        /// do no concern us.
 183        /// </summary>
 184        /// <param name="cellValues"> cells of a row </param>
 1185        protected void RemoveSkipColumns(List<string> cellValues) => cellValues.RemoveRange(0, flags.skipColumns);
 0186        protected void AddNewColumn(JArray columnsArray) => columnsArray.Add(new JObject());
 1187        protected void AddNewColumnWithTitle(JArray columnsArray, string title) => columnsArray.Add(new JObject(new JPro
 0188        protected void AppendNewTitleToColumnDescriptor(JArray columnsArray, string title, int index) => ((JArray)column
 189    }
 190}