A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://github.com/signumsoftware/framework/commit/e17aeb8df452dfd8289586ccb0135019ad92de38 below:

fast CSV · signumsoftware/framework@e17aeb8 · GitHub

6 6

using System.Collections.Concurrent;

7 7

using System.Collections;

8 8

using System.IO.Pipes;

9 +

using System;

10 +

using System.ComponentModel.Design.Serialization;

9 11 10 12

namespace Signum.Utilities;

11 13

@@ -181,8 +183,7 @@ public static IEnumerable<T> ReadStream<T>(Stream stream, Encoding? encoding = n

181 183 182 184

var members = CsvMemberCache<T>.Members;

183 185

var parsers = members.Select(m => GetParser(defCulture, m, defOptions.ParserFactory)).ToList();

184 - 185 -

Regex regex = GetRegex(defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);

186 +

Regex valueRegex = GetRegex(isLine: false, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);

186 187 187 188

if (defOptions.AsumeSingleLine)

188 189

{

@@ -199,66 +200,68 @@ public static IEnumerable<T> ReadStream<T>(Stream stream, Encoding? encoding = n

199 200

if (csvLine == null)

200 201

yield break;

201 202 202 -

Match? m = null;

203 -

T? t = null;

204 -

try

203 +

if (csvLine.Length > 0)

205 204

{

206 -

m = regex.Match(csvLine);

207 -

if (m.Length > 0)

205 +

T? t = null;

206 +

try

208 207

{

209 -

t = ReadObject<T>(m, members, parsers);

208 +

var m = valueRegex.EnumerateMatches(csvLine);

209 + 210 +

t = ReadObject<T>(m, csvLine.AsSpan(), members, parsers);

210 211

}

211 -

}

212 -

catch (Exception e)

213 -

{

214 -

e.Data["row"] = line;

212 +

catch (Exception e)

213 +

{

214 +

e.Data["row"] = line;

215 215 216 -

if (defOptions.SkipError == null || !defOptions.SkipError(e, m))

217 -

throw new ParseCsvException(e);

218 -

}

216 +

if (defOptions.SkipError == null || !defOptions.SkipError(e, csvLine))

217 +

throw new ParseCsvException(e);

218 +

}

219 219 220 -

if (t != null)

221 -

yield return t;

220 +

if (t != null)

221 +

yield return t;

222 222 223 +

}

223 224

line++;

224 225

}

225 226

}

226 227

}

227 228

else

228 229

{

230 +

Regex lineRegex = GetRegex(isLine: true, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);

231 + 229 232

using (StreamReader sr = new StreamReader(stream, encoding))

230 233

{

231 234

string str = sr.ReadToEnd();

232 235 233 -

var matches = regex.Matches(str).Cast<Match>();

234 - 235 -

if (skipLines > 0)

236 -

matches = matches.Skip(skipLines);

237 - 238 -

int line = skipLines;

239 -

foreach (var m in matches)

236 +

int i = 0;

237 +

foreach (Match m in lineRegex.Matches(str))

240 238

{

239 +

if (i < skipLines)

240 +

continue;

241 + 241 242

if (m.Length > 0)

242 243

{

243 244

T? t = null;

244 245

try

245 246

{

247 +

var line = m.Value;

248 + 246 249

if (options?.Constructor != null)

247 -

t = options.Constructor(m);

250 +

t = options.Constructor(line);

248 251

else

249 -

t = ReadObject<T>(m, members, parsers);

252 +

t = ReadObject<T>(valueRegex.EnumerateMatches(line), line, members, parsers);

250 253

}

251 254

catch (Exception e)

252 255

{

253 -

e.Data["row"] = line;

256 +

e.Data["row"] = i;

254 257 255 -

if (defOptions.SkipError == null || !defOptions.SkipError(e, m))

258 +

if (defOptions.SkipError == null || !defOptions.SkipError(e, str.Substring(m.Index, m.Length)))

256 259

throw new ParseCsvException(e);

257 260

}

258 261

if (t != null)

259 262

yield return t;

260 263

}

261 -

line++;

264 +

i++;

262 265

}

263 266

}

264 267

}

@@ -271,18 +274,20 @@ public static T ReadLine<T>(string csvLine, CultureInfo? culture = null, CsvRead

271 274 272 275

var defCulture = GetDefaultCulture(culture);

273 276 274 -

Regex regex = GetRegex(defCulture, defOptions.RegexTimeout);

277 +

Regex regex = GetRegex(isLine: false, defCulture, defOptions.RegexTimeout);

275 278 276 -

Match m = regex.Match(csvLine);

279 +

var vme = regex.EnumerateMatches(csvLine);

277 280 278 281

var members = CsvMemberCache<T>.Members;

279 282 280 -

return ReadObject<T>(m,

283 +

return ReadObject<T>(vme,

284 +

csvLine.AsSpan(),

281 285

members,

282 286

members.Select(c => GetParser(defCulture, c, defOptions.ParserFactory)).ToList());

283 287

}

284 288 285 -

private static Func<string, object?> GetParser<T>(CultureInfo culture, CsvMemberInfo<T> column, Func<CsvMemberInfo<T>, CultureInfo, Func<string, object?>?>? parserFactory)

289 + 290 +

private static ValueParser GetParser<T>(CultureInfo culture, CsvMemberInfo<T> column, Func<CsvMemberInfo<T>, CultureInfo, ValueParser?>? parserFactory)

286 291

{

287 292

if (parserFactory != null)

288 293

{

@@ -294,43 +299,51 @@ public static T ReadLine<T>(string csvLine, CultureInfo? culture = null, CsvRead

294 299 295 300

var type = column.IsCollection ? column.MemberInfo.ReturningType().ElementType()! : column.MemberInfo.ReturningType();

296 301 297 -

return str => ConvertTo(str, type, culture, column.Format);

302 +

return GetBasicParser(type.UnNullify(), culture, column.Format);

298 303

}

299 304 300 -

static T ReadObject<T>(Match m, List<CsvMemberInfo<T>> members, List<Func<string, object?>> parsers)

301 -

{

302 -

var vals = m.Groups["val"].Captures;

303 - 304 -

if (vals.Count < members.Count)

305 -

throw new FormatException("Only {0} columns found (instead of {1}) in line: {2}".FormatWith(vals.Count, members.Count, m.Value));

305 +

public delegate object? ValueParser(ReadOnlySpan<char> str);

306 306 307 +

static T ReadObject<T>(Regex.ValueMatchEnumerator vme, ReadOnlySpan<char> line, List<CsvMemberInfo<T>> members, List<ValueParser> parsers)

308 +

{

307 309

T t = Activator.CreateInstance<T>();

308 310 309 -

for (int i = 0; i < members.Count; i++)

311 +

bool endsInCollection = false;

312 +

int i = 0;

313 +

foreach (var v in vme)

310 314

{

315 +

if (members.Count <= i)

316 +

continue;

317 + 318 +

var value = line.Slice(v.Index, v.Length);

311 319

var member = members[i];

312 320

var parser = parsers[i];

313 -

string? str = null;

314 321

try

315 322

{

316 323

if (!member.IsCollection)

317 324

{

318 -

str = DecodeCsv(vals[i].Value);

325 +

value = DecodeCsv(value);

319 326 320 -

object? val = parser(str);

327 +

object? val = parser(value);

321 328 322 329

member.MemberEntry.Setter!(t, val);

323 330

}

324 331

else

325 332

{

333 +

if (i != members.Count - 1)

334 +

throw new InvalidOperationException($"Collection {member.MemberInfo} should be the last member");

335 +

endsInCollection = true;

326 336

var list = (IList)Activator.CreateInstance(member.MemberInfo.ReturningType())!;

327 337 328 -

for (int j = i; j < vals.Count; j++)

329 -

{

330 -

str = DecodeCsv(vals[j].Value);

331 - 332 -

object? val = parser(str);

338 +

value = DecodeCsv(value);

339 +

object? val = parser(value);

340 +

list.Add(val);

333 341 342 +

foreach (var v2 in vme)

343 +

{

344 +

value = line.Slice(v2.Index, v2.Length);

345 +

value = DecodeCsv(value);

346 +

val = parser(value);

334 347

list.Add(val);

335 348

}

336 349

@@ -339,11 +352,17 @@ static T ReadObject<T>(Match m, List<CsvMemberInfo<T>> members, List<Func<string

339 352

}

340 353

catch (Exception e)

341 354

{

342 -

e.Data["value"] = str;

355 +

e.Data["value"] = new String(value);

343 356

e.Data["member"] = members[i].MemberInfo.Name;

344 357

throw;

345 358

}

359 + 360 +

i++;

346 361

}

362 + 363 +

if (!endsInCollection && i != members.Count)

364 +

throw new FormatException("Only {0} columns found (instead of {1}) in line: {2}".FormatWith(i, members.Count, new string(line)));

365 + 347 366

return t;

348 367

}

349 368

@@ -369,7 +388,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e

369 388

var defCulture = GetDefaultCulture(culture);

370 389

var defOptions = options ?? new CsvReadOptions();

371 390 372 -

Regex regex = GetRegex(defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);

391 +

Regex valueRegex = GetRegex(false, defCulture, defOptions.RegexTimeout, defOptions.ListSeparator);

373 392

if (defOptions.AsumeSingleLine)

374 393

{

375 394

using (StreamReader sr = new StreamReader(stream, encoding))

@@ -386,7 +405,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e

386 405

string[]? t = null;

387 406

try

388 407

{

389 -

m = regex.Match(csvLine);

408 +

m = valueRegex.Match(csvLine);

390 409

if (m.Length > 0)

391 410

{

392 411

t = m.Groups["val"].Captures.Select(c => c.Value).ToArray();

@@ -396,7 +415,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e

396 415

{

397 416

e.Data["row"] = line;

398 417 399 -

if (defOptions.SkipError == null || !defOptions.SkipError(e, m))

418 +

if (defOptions.SkipError == null || !defOptions.SkipError(e, csvLine))

400 419

throw new ParseCsvException(e);

401 420

}

402 421

@@ -413,7 +432,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e

413 432

{

414 433

string str = sr.ReadToEnd();

415 434 416 -

var matches = regex.Matches(str).Cast<Match>();

435 +

var matches = valueRegex.Matches(str).Cast<Match>();

417 436 418 437

int line = 0;

419 438

foreach (var m in matches)

@@ -429,7 +448,7 @@ public static IEnumerable<string[]> ReadUntypedStream(Stream stream, Encoding? e

429 448

{

430 449

e.Data["row"] = line;

431 450 432 -

if (defOptions.SkipError == null || !defOptions.SkipError(e, m))

451 +

if (defOptions.SkipError == null || !defOptions.SkipError(e, m.Value))

433 452

throw new ParseCsvException(e);

434 453

}

435 454

if (t != null)

@@ -530,16 +549,17 @@ public class MyFileCSV

530 549

""";

531 550

}

532 551 533 - 534 -

static ConcurrentDictionary<char, Regex> regexCache = new ConcurrentDictionary<char, Regex>();

535 -

const string BaseRegex = @"^((?<val>'(?:[^']+|'')*'|[^;\r\n]*))?((?!($|\r\n));(?<val>'(?:[^']+|'')*'|[^;\r\n]*))*($|\r\n)";

536 -

static Regex GetRegex(CultureInfo culture, TimeSpan timeout, char? listSeparator = null)

552 +

static ConcurrentDictionary<(bool multiLine, char separator, TimeSpan timeout), Regex> regexCache = new();

553 +

readonly static string ValueRegex = "'(?:[^']+|'')*'|[^;\r\n]*".Replace('\'', '"');

554 +

readonly static string LineRegex = $@"^({ValueRegex})?((?!($|\r\n));({ValueRegex}))*($|\r\n)";

555 +

static Regex GetRegex(bool isLine, CultureInfo culture, TimeSpan timeout, char? listSeparator = null)

537 556

{

538 557

char separator = listSeparator ?? GetListSeparator(culture);

539 558 540 -

return regexCache.GetOrAdd(separator, s =>

541 -

new Regex(BaseRegex.Replace('\'', '"').Replace(';', s), RegexOptions.Multiline | RegexOptions.ExplicitCapture, timeout));

559 +

return regexCache.GetOrAdd((isLine, separator, timeout), a =>

560 +

new Regex((isLine ? LineRegex : ValueRegex).Replace(';', a.separator), RegexOptions.Multiline | RegexOptions.ExplicitCapture, a.timeout));

542 561

}

562 + 543 563 544 564

private static char GetListSeparator(CultureInfo culture)

545 565

{

@@ -570,62 +590,60 @@ static CsvMemberCache()

570 590

public static List<CsvMemberInfo<T>> Members;

571 591

}

572 592 573 -

static string DecodeCsv(string s)

593 + 594 + 595 +

static ReadOnlySpan<char> DecodeCsv(ReadOnlySpan<char> s)

574 596

{

575 597

if (s.StartsWith("\"") && s.EndsWith("\""))

576 598

{

577 -

string str = s[1..^1].Replace("\"\"", "\"");

599 +

string str = new string(s[1..^1]).Replace("\"\"", "\"");

578 600 579 601

return Regex.Replace(str, "(?<!\r)\n", "\r\n");

580 602

}

581 603 582 604

return s;

583 605

}

584 606 585 -

static object? ConvertTo(string s, Type type, CultureInfo culture, string? format)

607 +

static ValueParser GetBasicParser(Type type, CultureInfo culture, string? format)

586 608

{

587 -

Type? baseType = Nullable.GetUnderlyingType(type);

588 -

if (baseType != null)

609 +

return type switch

589 610

{

590 -

if (!s.HasText())

591 -

return null;

592 - 593 -

type = baseType;

594 -

}

595 - 596 -

if (type.IsEnum)

597 -

return Enum.Parse(type, s);

598 - 599 -

if (type == typeof(DateTime))

600 -

if (format == null)

601 -

return DateTime.Parse(s, culture);

602 -

else

603 -

return DateTime.ParseExact(s, format, culture);

604 - 605 -

if (type == typeof(DateOnly))

606 -

if (format == null)

607 -

return DateOnly.Parse(s, culture);

608 -

else

609 -

return DateOnly.ParseExact(s, format, culture);

610 - 611 -

if (type == typeof(Guid))

612 -

return Guid.Parse(s);

613 - 614 -

return Convert.ChangeType(s, type, culture);

611 +

_ when type == typeof(string) => str => str.Length == 0 ? null : str.ToString(),

612 +

_ when type == typeof(byte) => str => str.Length == 0 ? null : byte.Parse(str, NumberStyles.Integer, culture),

613 +

_ when type == typeof(sbyte) => str => str.Length == 0 ? null : sbyte.Parse(str, NumberStyles.Integer, culture),

614 +

_ when type == typeof(short) => str => str.Length == 0 ? null : short.Parse(str, NumberStyles.Integer, culture),

615 +

_ when type == typeof(ushort) => str => str.Length == 0 ? null : ushort.Parse(str, NumberStyles.Integer, culture),

616 +

_ when type == typeof(int) => str => str.Length == 0 ? null : int.Parse(str, NumberStyles.Integer, culture),

617 +

_ when type == typeof(uint) => str => str.Length == 0 ? null : uint.Parse(str, NumberStyles.Integer, culture),

618 +

_ when type == typeof(long) => str => str.Length == 0 ? null : long.Parse(str, NumberStyles.Integer, culture),

619 +

_ when type == typeof(ulong) => str => str.Length == 0 ? null : ulong.Parse(str, NumberStyles.Integer, culture),

620 +

_ when type == typeof(float) => str => str.Length == 0 ? null : float.Parse(str, NumberStyles.Float, culture),

621 +

_ when type == typeof(double) => str => str.Length == 0 ? null : double.Parse(str, NumberStyles.Float, culture),

622 +

_ when type == typeof(decimal) => str => str.Length == 0 ? null : decimal.Parse(str, NumberStyles.Number, culture),

623 +

_ when type == typeof(DateTime) => str => str.Length == 0 ? null : DateTime.ParseExact(str, format, culture),

624 +

_ when type == typeof(DateTimeOffset) => str => str.Length == 0 ? null : DateTimeOffset.ParseExact(str, format, culture),

625 +

_ when type == typeof(DateOnly) => str => str.Length == 0 ? null : DateOnly.ParseExact(str, format, culture),

626 +

_ when type == typeof(TimeOnly) => str => str.Length == 0 ? null : TimeOnly.ParseExact(str, format, culture),

627 +

_ when type == typeof(Guid) => str => str.Length == 0 ? null : Guid.Parse(str.ToString()),

628 +

_ when type.IsEnum => str => str.Length == 0 ? null : Enum.Parse(type, str),

629 +

_ => str => Convert.ChangeType(new string(str), type, culture)

630 +

};

615 631

}

616 632

}

617 633 618 634

public class CsvReadOptions<T> : CsvReadOptions

619 635

where T : class

620 636

{

621 -

public Func<CsvMemberInfo<T>, CultureInfo, Func<string, object?>?>? ParserFactory;

622 -

public Func<Match, T>? Constructor;

637 +

public Func<CsvMemberInfo<T>, CultureInfo, Csv.ValueParser?>? ParserFactory;

638 +

public CsvConstructor<T>? Constructor;

623 639

}

624 640 641 +

public delegate T CsvConstructor<T>(ReadOnlySpan<char> line);

642 + 625 643

public class CsvReadOptions

626 644

{

627 -

public bool AsumeSingleLine = false;

628 -

public Func<Exception, Match?, bool>? SkipError;

645 +

public bool AsumeSingleLine = true; //Breaking change!

646 +

public Func<Exception, string, bool>? SkipError;

629 647

public TimeSpan RegexTimeout = Regex.InfiniteMatchTimeout;

630 648

public char? ListSeparator;

631 649

}


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4