6
6
using System.Collections.Concurrent;
7
7
using System.Collections;
8
8
using System.IO.Pipes;
9
+
using System;
10
+
using System.ComponentModel.Design.Serialization;
9
11
10
12
namespace Signum.Utilities;
11
13
@@ -181,8 +183,7 @@ public static IEnumerable<T> ReadStream<T>(Stream stream, Encoding? encoding = n
181
183
182
184
var members = CsvMemberCache<T>.Members;
183
185
var parsers = members.Select(m => GetParser(defCulture, m, defOptions.ParserFactory)).ToList();
184
-
185
-
Regex regex = GetRegex(defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);
186
+
Regex valueRegex = GetRegex(isLine: false, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);
186
187
187
188
if (defOptions.AsumeSingleLine)
188
189
{
@@ -199,66 +200,68 @@ public static IEnumerable<T> ReadStream<T>(Stream stream, Encoding? encoding = n
199
200
if (csvLine == null)
200
201
yield break;
201
202
202
-
Match? m = null;
203
-
T? t = null;
204
-
try
203
+
if (csvLine.Length > 0)
205
204
{
206
-
m = regex.Match(csvLine);
207
-
if (m.Length > 0)
205
+
T? t = null;
206
+
try
208
207
{
209
-
t = ReadObject<T>(m, members, parsers);
208
+
var m = valueRegex.EnumerateMatches(csvLine);
209
+
210
+
t = ReadObject<T>(m, csvLine.AsSpan(), members, parsers);
210
211
}
211
-
}
212
-
catch (Exception e)
213
-
{
214
-
e.Data["row"] = line;
212
+
catch (Exception e)
213
+
{
214
+
e.Data["row"] = line;
215
215
216
-
if (defOptions.SkipError == null || !defOptions.SkipError(e, m))
217
-
throw new ParseCsvException(e);
218
-
}
216
+
if (defOptions.SkipError == null || !defOptions.SkipError(e, csvLine))
217
+
throw new ParseCsvException(e);
218
+
}
219
219
220
-
if (t != null)
221
-
yield return t;
220
+
if (t != null)
221
+
yield return t;
222
222
223
+
}
223
224
line++;
224
225
}
225
226
}
226
227
}
227
228
else
228
229
{
230
+
Regex lineRegex = GetRegex(isLine: true, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);
231
+
229
232
using (StreamReader sr = new StreamReader(stream, encoding))
230
233
{
231
234
string str = sr.ReadToEnd();
232
235
233
-
var matches = regex.Matches(str).Cast<Match>();
234
-
235
-
if (skipLines > 0)
236
-
matches = matches.Skip(skipLines);
237
-
238
-
int line = skipLines;
239
-
foreach (var m in matches)
236
+
int i = 0;
237
+
foreach (Match m in lineRegex.Matches(str))
240
238
{
239
+
if (i < skipLines)
240
+
continue;
241
+
241
242
if (m.Length > 0)
242
243
{
243
244
T? t = null;
244
245
try
245
246
{
247
+
var line = m.Value;
248
+
246
249
if (options?.Constructor != null)
247
-
t = options.Constructor(m);
250
+
t = options.Constructor(line);
248
251
else
249
-
t = ReadObject<T>(m, members, parsers);
252
+
t = ReadObject<T>(valueRegex.EnumerateMatches(line), line, members, parsers);
250
253
}
251
254
catch (Exception e)
252
255
{
253
-
e.Data["row"] = line;
256
+
e.Data["row"] = i;
254
257
255
-
if (defOptions.SkipError == null || !defOptions.SkipError(e, m))
258
+
if (defOptions.SkipError == null || !defOptions.SkipError(e, str.Substring(m.Index, m.Length)))
256
259
throw new ParseCsvException(e);
257
260
}
258
261
if (t != null)
259
262
yield return t;
260
263
}
261
-
line++;
264
+
i++;
262
265
}
263
266
}
264
267
}
@@ -271,18 +274,20 @@ public static T ReadLine<T>(string csvLine, CultureInfo? culture = null, CsvRead
271
274
272
275
var defCulture = GetDefaultCulture(culture);
273
276
274
-
Regex regex = GetRegex(defCulture, defOptions.RegexTimeout);
277
+
Regex regex = GetRegex(isLine: false, defCulture, defOptions.RegexTimeout);
275
278
276
-
Match m = regex.Match(csvLine);
279
+
var vme = regex.EnumerateMatches(csvLine);
277
280
278
281
var members = CsvMemberCache<T>.Members;
279
282
280
-
return ReadObject<T>(m,
283
+
return ReadObject<T>(vme,
284
+
csvLine.AsSpan(),
281
285
members,
282
286
members.Select(c => GetParser(defCulture, c, defOptions.ParserFactory)).ToList());
283
287
}
284
288
285
-
private static Func<string, object?> GetParser<T>(CultureInfo culture, CsvMemberInfo<T> column, Func<CsvMemberInfo<T>, CultureInfo, Func<string, object?>?>? parserFactory)
289
+
290
+
private static ValueParser GetParser<T>(CultureInfo culture, CsvMemberInfo<T> column, Func<CsvMemberInfo<T>, CultureInfo, ValueParser?>? parserFactory)
286
291
{
287
292
if (parserFactory != null)
288
293
{
@@ -294,43 +299,51 @@ public static T ReadLine<T>(string csvLine, CultureInfo? culture = null, CsvRead
294
299
295
300
var type = column.IsCollection ? column.MemberInfo.ReturningType().ElementType()! : column.MemberInfo.ReturningType();
296
301
297
-
return str => ConvertTo(str, type, culture, column.Format);
302
+
return GetBasicParser(type.UnNullify(), culture, column.Format);
298
303
}
299
304
300
-
static T ReadObject<T>(Match m, List<CsvMemberInfo<T>> members, List<Func<string, object?>> parsers)
301
-
{
302
-
var vals = m.Groups["val"].Captures;
303
-
304
-
if (vals.Count < members.Count)
305
-
throw new FormatException("Only {0} columns found (instead of {1}) in line: {2}".FormatWith(vals.Count, members.Count, m.Value));
305
+
public delegate object? ValueParser(ReadOnlySpan<char> str);
306
306
307
+
static T ReadObject<T>(Regex.ValueMatchEnumerator vme, ReadOnlySpan<char> line, List<CsvMemberInfo<T>> members, List<ValueParser> parsers)
308
+
{
307
309
T t = Activator.CreateInstance<T>();
308
310
309
-
for (int i = 0; i < members.Count; i++)
311
+
bool endsInCollection = false;
312
+
int i = 0;
313
+
foreach (var v in vme)
310
314
{
315
+
if (members.Count <= i)
316
+
continue;
317
+
318
+
var value = line.Slice(v.Index, v.Length);
311
319
var member = members[i];
312
320
var parser = parsers[i];
313
-
string? str = null;
314
321
try
315
322
{
316
323
if (!member.IsCollection)
317
324
{
318
-
str = DecodeCsv(vals[i].Value);
325
+
value = DecodeCsv(value);
319
326
320
-
object? val = parser(str);
327
+
object? val = parser(value);
321
328
322
329
member.MemberEntry.Setter!(t, val);
323
330
}
324
331
else
325
332
{
333
+
if (i != members.Count - 1)
334
+
throw new InvalidOperationException($"Collection {member.MemberInfo} should be the last member");
335
+
endsInCollection = true;
326
336
var list = (IList)Activator.CreateInstance(member.MemberInfo.ReturningType())!;
327
337
328
-
for (int j = i; j < vals.Count; j++)
329
-
{
330
-
str = DecodeCsv(vals[j].Value);
331
-
332
-
object? val = parser(str);
338
+
value = DecodeCsv(value);
339
+
object? val = parser(value);
340
+
list.Add(val);
333
341
342
+
foreach (var v2 in vme)
343
+
{
344
+
value = line.Slice(v2.Index, v2.Length);
345
+
value = DecodeCsv(value);
346
+
val = parser(value);
334
347
list.Add(val);
335
348
}
336
349
@@ -339,11 +352,17 @@ static T ReadObject<T>(Match m, List<CsvMemberInfo<T>> members, List<Func<string
339
352
}
340
353
catch (Exception e)
341
354
{
342
-
e.Data["value"] = str;
355
+
e.Data["value"] = new String(value);
343
356
e.Data["member"] = members[i].MemberInfo.Name;
344
357
throw;
345
358
}
359
+
360
+
i++;
346
361
}
362
+
363
+
if (!endsInCollection && i != members.Count)
364
+
throw new FormatException("Only {0} columns found (instead of {1}) in line: {2}".FormatWith(i, members.Count, new string(line)));
365
+
347
366
return t;
348
367
}
349
368
@@ -369,7 +388,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
369
388
var defCulture = GetDefaultCulture(culture);
370
389
var defOptions = options ?? new CsvReadOptions();
371
390
372
-
Regex regex = GetRegex(defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);
391
+
Regex valueRegex = GetRegex(false, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);
373
392
if (defOptions.AsumeSingleLine)
374
393
{
375
394
using (StreamReader sr = new StreamReader(stream, encoding))
@@ -386,7 +405,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
386
405
string[]? t = null;
387
406
try
388
407
{
389
-
m = regex.Match(csvLine);
408
+
m = valueRegex.Match(csvLine);
390
409
if (m.Length > 0)
391
410
{
392
411
t = m.Groups["val"].Captures.Select(c => c.Value).ToArray();
@@ -396,7 +415,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
396
415
{
397
416
e.Data["row"] = line;
398
417
399
-
if (defOptions.SkipError == null || !defOptions.SkipError(e, m))
418
+
if (defOptions.SkipError == null || !defOptions.SkipError(e, csvLine))
400
419
throw new ParseCsvException(e);
401
420
}
402
421
@@ -413,7 +432,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
413
432
{
414
433
string str = sr.ReadToEnd();
415
434
416
-
var matches = regex.Matches(str).Cast<Match>();
435
+
var matches = valueRegex.Matches(str).Cast<Match>();
417
436
418
437
int line = 0;
419
438
foreach (var m in matches)
@@ -429,7 +448,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e
429
448
{
430
449
e.Data["row"] = line;
431
450
432
-
if (defOptions.SkipError == null || !defOptions.SkipError(e, m))
451
+
if (defOptions.SkipError == null || !defOptions.SkipError(e, m.Value))
433
452
throw new ParseCsvException(e);
434
453
}
435
454
if (t != null)
@@ -530,16 +549,17 @@ public class MyFileCSV
530
549
""";
531
550
}
532
551
533
-
534
-
static ConcurrentDictionary<char, Regex> regexCache = new ConcurrentDictionary<char, Regex>();
535
-
const string BaseRegex = @"^((?<val>'(?:[^']+|'')*'|[^;\r\n]*))?((?!($|\r\n));(?<val>'(?:[^']+|'')*'|[^;\r\n]*))*($|\r\n)";
536
-
static Regex GetRegex(CultureInfo culture, TimeSpan timeout, char? listSeparator = null)
552
+
static ConcurrentDictionary<(bool multiLine, char separator, TimeSpan timeout), Regex> regexCache = new();
553
+
readonly static string ValueRegex = "'(?:[^']+|'')*'|[^;\r\n]*".Replace('\'', '"');
554
+
readonly static string LineRegex = $@"^({ValueRegex})?((?!($|\r\n));({ValueRegex}))*($|\r\n)";
555
+
static Regex GetRegex(bool isLine, CultureInfo culture, TimeSpan timeout, char? listSeparator = null)
537
556
{
538
557
char separator = listSeparator ?? GetListSeparator(culture);
539
558
540
-
return regexCache.GetOrAdd(separator, s =>
541
-
new Regex(BaseRegex.Replace('\'', '"').Replace(';', s), RegexOptions.Multiline | RegexOptions.ExplicitCapture, timeout));
559
+
return regexCache.GetOrAdd((isLine, separator, timeout), a =>
560
+
new Regex((isLine ? LineRegex : ValueRegex).Replace(';', a.separator), RegexOptions.Multiline | RegexOptions.ExplicitCapture, a.timeout));
542
561
}
562
+
543
563
544
564
private static char GetListSeparator(CultureInfo culture)
545
565
{
@@ -570,62 +590,60 @@ static CsvMemberCache()
570
590
public static List<CsvMemberInfo<T>> Members;
571
591
}
572
592
573
-
static string DecodeCsv(string s)
593
+
594
+
595
+
static ReadOnlySpan<char> DecodeCsv(ReadOnlySpan<char> s)
574
596
{
575
597
if (s.StartsWith("\"") && s.EndsWith("\""))
576
598
{
577
-
string str = s[1..^1].Replace("\"\"", "\"");
599
+
string str = new string(s[1..^1]).Replace("\"\"", "\"");
578
600
579
601
return Regex.Replace(str, "(?<!\r)\n", "\r\n");
580
602
}
581
603
582
604
return s;
583
605
}
584
606
585
-
static object? ConvertTo(string s, Type type, CultureInfo culture, string? format)
607
+
static ValueParser GetBasicParser(Type type, CultureInfo culture, string? format)
586
608
{
587
-
Type? baseType = Nullable.GetUnderlyingType(type);
588
-
if (baseType != null)
609
+
return type switch
589
610
{
590
-
if (!s.HasText())
591
-
return null;
592
-
593
-
type = baseType;
594
-
}
595
-
596
-
if (type.IsEnum)
597
-
return Enum.Parse(type, s);
598
-
599
-
if (type == typeof(DateTime))
600
-
if (format == null)
601
-
return DateTime.Parse(s, culture);
602
-
else
603
-
return DateTime.ParseExact(s, format, culture);
604
-
605
-
if (type == typeof(DateOnly))
606
-
if (format == null)
607
-
return DateOnly.Parse(s, culture);
608
-
else
609
-
return DateOnly.ParseExact(s, format, culture);
610
-
611
-
if (type == typeof(Guid))
612
-
return Guid.Parse(s);
613
-
614
-
return Convert.ChangeType(s, type, culture);
611
+
_ when type == typeof(string) => str => str.Length == 0 ? null : str.ToString(),
612
+
_ when type == typeof(byte) => str => str.Length == 0 ? null : byte.Parse(str, NumberStyles.Integer, culture),
613
+
_ when type == typeof(sbyte) => str => str.Length == 0 ? null : sbyte.Parse(str, NumberStyles.Integer, culture),
614
+
_ when type == typeof(short) => str => str.Length == 0 ? null : short.Parse(str, NumberStyles.Integer, culture),
615
+
_ when type == typeof(ushort) => str => str.Length == 0 ? null : ushort.Parse(str, NumberStyles.Integer, culture),
616
+
_ when type == typeof(int) => str => str.Length == 0 ? null : int.Parse(str, NumberStyles.Integer, culture),
617
+
_ when type == typeof(uint) => str => str.Length == 0 ? null : uint.Parse(str, NumberStyles.Integer, culture),
618
+
_ when type == typeof(long) => str => str.Length == 0 ? null : long.Parse(str, NumberStyles.Integer, culture),
619
+
_ when type == typeof(ulong) => str => str.Length == 0 ? null : ulong.Parse(str, NumberStyles.Integer, culture),
620
+
_ when type == typeof(float) => str => str.Length == 0 ? null : float.Parse(str, NumberStyles.Float, culture),
621
+
_ when type == typeof(double) => str => str.Length == 0 ? null : double.Parse(str, NumberStyles.Float, culture),
622
+
_ when type == typeof(decimal) => str => str.Length == 0 ? null : decimal.Parse(str, NumberStyles.Number, culture),
623
+
_ when type == typeof(DateTime) => str => str.Length == 0 ? null : DateTime.ParseExact(str, format, culture),
624
+
_ when type == typeof(DateTimeOffset) => str => str.Length == 0 ? null : DateTimeOffset.ParseExact(str, format, culture),
625
+
_ when type == typeof(DateOnly) => str => str.Length == 0 ? null : DateOnly.ParseExact(str, format, culture),
626
+
_ when type == typeof(TimeOnly) => str => str.Length == 0 ? null : TimeOnly.ParseExact(str, format, culture),
627
+
_ when type == typeof(Guid) => str => str.Length == 0 ? null : Guid.Parse(str.ToString()),
628
+
_ when type.IsEnum => str => str.Length == 0 ? null : Enum.Parse(type, str),
629
+
_ => str => Convert.ChangeType(new string(str), type, culture)
630
+
};
615
631
}
616
632
}
617
633
618
634
public class CsvReadOptions<T> : CsvReadOptions
619
635
where T : class
620
636
{
621
-
public Func<CsvMemberInfo<T>, CultureInfo, Func<string, object?>?>? ParserFactory;
622
-
public Func<Match, T>? Constructor;
637
+
public Func<CsvMemberInfo<T>, CultureInfo, Csv.ValueParser?>? ParserFactory;
638
+
public CsvConstructor<T>? Constructor;
623
639
}
624
640
641
+
public delegate T CsvConstructor<T>(ReadOnlySpan<char> line);
642
+
625
643
public class CsvReadOptions
626
644
{
627
-
public bool AsumeSingleLine = false;
628
-
public Func<Exception, Match?, bool>? SkipError;
645
+
public bool AsumeSingleLine = true; //Breaking change!
646
+
public Func<Exception, string, bool>? SkipError;
629
647
public TimeSpan RegexTimeout = Regex.InfiniteMatchTimeout;
630
648
public char? ListSeparator;
631
649
}
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4