1 module xlsxreader; 2 3 import std.algorithm.iteration : filter, map, joiner; 4 import std.algorithm.mutation : reverse; 5 import std.algorithm.searching : all, canFind, startsWith; 6 import std.algorithm.sorting : sort; 7 import std.array : array, empty, front, replace, popFront; 8 import std.ascii : isDigit; 9 import std.conv : to; 10 import std.datetime : DateTime, Date, TimeOfDay; 11 import std.exception : enforce; 12 import std.file : read, exists, readText; 13 import std.format : format; 14 import std.range : tee; 15 import std.regex; 16 import std.stdio; 17 import std.traits : isIntegral, isFloatingPoint, isSomeString; 18 import std.typecons : tuple, Nullable, nullable; 19 import std.utf : byChar; 20 import std.variant : Algebraic, visit; 21 import std.zip; 22 23 import dxml.dom; 24 25 /// 26 struct Pos { 27 // zero based 28 size_t row; 29 // zero based 30 size_t col; 31 } 32 33 /// 34 alias Data = Algebraic!(bool,long,double,string,DateTime,Date,TimeOfDay); 35 36 /// 37 struct Cell { 38 string loc; 39 size_t row; // row[r] 40 string t; // s or n, s for pointer, n for value, stored in v 41 string r; // c[r] 42 string v; // c.v the value or ptr 43 string f; // c.f the formula 44 string xmlValue; 45 Pos position; 46 } 47 48 // 49 enum CellType { 50 datetime, 51 timeofday, 52 date, 53 bool_, 54 double_, 55 long_, 56 string_ 57 } 58 59 import std.ascii : toUpper; 60 /// 61 struct Sheet { 62 import std.ascii : toUpper; 63 64 Cell[] cells; 65 Cell[][] table; 66 Pos maxPos; 67 68 string toString() const @safe { 69 import std.format : formattedWrite; 70 import std.array : appender; 71 long[] maxCol = new long[](maxPos.col + 1); 72 foreach(const row; this.table) { 73 foreach(const idx, Cell col; row) { 74 string s = col.xmlValue; 75 76 maxCol[idx] = maxCol[idx] < s.length ? s.length : maxCol[idx]; 77 } 78 } 79 maxCol[] += 1; 80 81 auto app = appender!string(); 82 foreach(const row; this.table) { 83 foreach(const idx, Cell col; row) { 84 string s = col.xmlValue; 85 formattedWrite(app, "%*s, ", maxCol[idx], s); 86 } 87 formattedWrite(app, "\n"); 88 } 89 return app.data; 90 } 91 92 void printTable() const @safe { 93 writeln(this.toString()); 94 } 95 96 // Column 97 98 Iterator!T getColumn(T)(size_t col, size_t startRow, size_t endRow) { 99 auto c = this.iterateColumn!T(col, startRow, endRow); 100 return typeof(return)(c.array); 101 } 102 103 private enum t = q{ 104 Iterator!(%1$s) getColumn%2$s(size_t col, size_t startRow, size_t endRow) @safe { 105 return getColumn!(%1$s)(col, startRow, endRow); 106 } 107 }; 108 static foreach(T; ["long", "double", "string", "Date", "TimeOfDay", 109 "DateTime"]) 110 { 111 mixin(format(t, T, T[0].toUpper ~ T[1 .. $])); 112 } 113 114 ColumnUntyped iterateColumnUntyped(size_t col, size_t startRow, size_t endRow) @safe { 115 return typeof(return)(&this, col, startRow, endRow); 116 } 117 118 Column!(T) iterateColumn(T)(size_t col, size_t startRow, size_t endRow) { 119 return typeof(return)(&this, col, startRow, endRow); 120 } 121 122 Column!(long) iterateColumnLong(size_t col, size_t startRow, size_t endRow) @safe { 123 return typeof(return)(&this, col, startRow, endRow); 124 } 125 126 Column!(double) iterateColumnDouble(size_t col, size_t startRow, size_t endRow) @safe { 127 return typeof(return)(&this, col, startRow, endRow); 128 } 129 130 Column!(string) iterateColumnString(size_t col, size_t startRow, size_t endRow) @safe { 131 return typeof(return)(&this, col, startRow, endRow); 132 } 133 134 Column!(DateTime) iterateColumnDateTime(size_t col, size_t startRow, size_t endRow) @safe { 135 return typeof(return)(&this, col, startRow, endRow); 136 } 137 138 Column!(Date) iterateColumnDate(size_t col, size_t startRow, size_t endRow) @safe { 139 return typeof(return)(&this, col, startRow, endRow); 140 } 141 142 Column!(TimeOfDay) iterateColumnTimeOfDay(size_t col, size_t startRow, size_t endRow) @safe { 143 return typeof(return)(&this, col, startRow, endRow); 144 } 145 146 // Row 147 148 Iterator!T getRow(T)(size_t row, size_t startColumn, size_t endColumn) @safe { 149 return typeof(return)(this.iterateRow!T(row, startColumn, endColumn).array); // TODO: why .array? 150 } 151 152 private enum t2 = q{ 153 Iterator!(%1$s) getRow%2$s(size_t row, size_t startColumn, size_t endColumn) @safe { 154 return getRow!(%1$s)(row, startColumn, endColumn); 155 } 156 }; 157 static foreach(T; ["long", "double", "string", "Date", "TimeOfDay", 158 "DateTime"]) 159 { 160 mixin(format(t2, T, T[0].toUpper ~ T[1 .. $])); 161 } 162 163 RowUntyped iterateRowUntyped(size_t row, size_t startColumn, size_t endColumn) @safe { 164 return typeof(return)(&this, row, startColumn, endColumn); 165 } 166 167 Row!(T) iterateRow(T)(size_t row, size_t startColumn, size_t endColumn) @trusted /* TODO: remove @trusted when `&this` is stored in a @safe manner in `Row` */ { 168 return typeof(return)(&this, row, startColumn, endColumn); 169 } 170 171 Row!(long) iterateRowLong(size_t row, size_t startColumn, size_t endColumn) @safe { 172 return typeof(return)(&this, row, startColumn, endColumn); 173 } 174 175 Row!(double) iterateRowDouble(size_t row, size_t startColumn, size_t endColumn) @safe { 176 return typeof(return)(&this, row, startColumn, endColumn); 177 } 178 179 Row!(string) iterateRowString(size_t row, size_t startColumn, size_t endColumn) @safe { 180 return typeof(return)(&this, row, startColumn, endColumn); 181 } 182 183 Row!(DateTime) iterateRowDateTime(size_t row, size_t startColumn, size_t endColumn) @safe { 184 return typeof(return)(&this, row, startColumn, endColumn); 185 } 186 187 Row!(Date) iterateRowDate(size_t row, size_t startColumn, size_t endColumn) @safe { 188 return typeof(return)(&this, row, startColumn, endColumn); 189 } 190 191 Row!(TimeOfDay) iterateRowTimeOfDay(size_t row, size_t startColumn, size_t endColumn) @safe { 192 return typeof(return)(&this, row, startColumn, endColumn); 193 } 194 } 195 196 struct Iterator(T) { 197 T[] data; 198 199 this(T[] data) { 200 this.data = data; 201 } 202 203 @property bool empty() const pure nothrow @nogc { 204 return this.data.empty; 205 } 206 207 void popFront() { 208 this.data.popFront(); 209 } 210 211 @property T front() { 212 return this.data.front; 213 } 214 215 inout(typeof(this)) save() inout pure nothrow @nogc { 216 return this; 217 } 218 219 // Request random access. 220 inout(T)[] array() inout @safe pure nothrow @nogc { 221 return data; 222 } 223 } 224 225 /// 226 struct RowUntyped { 227 Sheet* sheet; 228 const size_t row; 229 const size_t startColumn; 230 const size_t endColumn; 231 size_t cur; 232 233 this(Sheet* sheet, size_t row, size_t startColumn, size_t endColumn) pure nothrow @nogc @safe { 234 assert(sheet.table.length == sheet.maxPos.row + 1); 235 this.sheet = sheet; 236 this.row = row; 237 this.startColumn = startColumn; 238 this.endColumn = endColumn; 239 this.cur = this.startColumn; 240 } 241 242 @property bool empty() const pure nothrow @nogc @safe { 243 return this.cur >= this.endColumn; 244 } 245 246 void popFront() pure nothrow @nogc @safe { 247 ++this.cur; 248 } 249 250 inout(typeof(this)) save() inout pure nothrow @nogc @safe { 251 return this; 252 } 253 254 @property inout(Cell) front() inout pure nothrow @nogc @safe { 255 return this.sheet.table[this.row][this.cur]; 256 } 257 } 258 259 /// 260 struct Row(T) { 261 RowUntyped ru; 262 T front; 263 264 this(Sheet* sheet, size_t row, size_t startColumn, size_t endColumn) { 265 this.ru = RowUntyped(sheet, row, startColumn, endColumn); 266 this.read(); 267 } 268 269 @property bool empty() const pure nothrow @nogc { 270 return this.ru.empty; 271 } 272 273 void popFront() { 274 this.ru.popFront(); 275 if(!this.empty) { 276 this.read(); 277 } 278 } 279 280 inout(typeof(this)) save() inout pure nothrow @nogc { 281 return this; 282 } 283 284 private void read() { 285 this.front = convertTo!T(this.ru.front.xmlValue); 286 } 287 } 288 289 /// 290 struct ColumnUntyped { 291 Sheet* sheet; 292 const size_t col; 293 const size_t startRow; 294 const size_t endRow; 295 size_t cur; 296 297 this(Sheet* sheet, size_t col, size_t startRow, size_t endRow) @safe { 298 assert(sheet.table.length == sheet.maxPos.row + 1); 299 this.sheet = sheet; 300 this.col = col; 301 this.startRow = startRow; 302 this.endRow = endRow; 303 this.cur = this.startRow; 304 } 305 306 @property bool empty() const pure nothrow @nogc @safe { 307 return this.cur >= this.endRow; 308 } 309 310 void popFront() @safe { 311 ++this.cur; 312 } 313 314 inout(typeof(this)) save() inout pure nothrow @nogc @safe { 315 return this; 316 } 317 318 @property Cell front() @safe { 319 return this.sheet.table[this.cur][this.col]; 320 } 321 } 322 323 /// 324 struct Column(T) { 325 ColumnUntyped cu; 326 327 T front; 328 329 this(Sheet* sheet, size_t col, size_t startRow, size_t endRow) { 330 this.cu = ColumnUntyped(sheet, col, startRow, endRow); 331 this.read(); 332 } 333 334 @property bool empty() const pure nothrow @nogc { 335 return this.cu.empty; 336 } 337 338 void popFront() { 339 this.cu.popFront(); 340 if(!this.empty) { 341 this.read(); 342 } 343 } 344 345 inout(typeof(this)) save() inout pure nothrow @nogc { 346 return this; 347 } 348 349 private void read() { 350 this.front = convertTo!T(this.cu.front.xmlValue); 351 } 352 } 353 354 @safe unittest { 355 import std.range : isForwardRange; 356 import std.meta : AliasSeq; 357 static foreach(T; AliasSeq!(long,double,DateTime,TimeOfDay,Date,string)) {{ 358 alias C = Column!T; 359 alias R = Row!T; 360 alias I = Iterator!T; 361 static assert(isForwardRange!C, C.stringof); 362 static assert(isForwardRange!R, R.stringof); 363 static assert(isForwardRange!I, I.stringof); 364 }} 365 } 366 367 Date longToDate(long d) @safe { 368 // modifed from https://www.codeproject.com/Articles/2750/ 369 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 370 371 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 372 // leap year, but Excel/Lotus 123 think it is... 373 if(d == 60) { 374 return Date(1900, 2, 29); 375 } else if(d < 60) { 376 // Because of the 29-02-1900 bug, any serial date 377 // under 60 is one off... Compensate. 378 ++d; 379 } 380 381 // Modified Julian to DMY calculation with an addition of 2415019 382 int l = cast(int)d + 68569 + 2415019; 383 int n = int(( 4 * l ) / 146097); 384 l = l - int(( 146097 * n + 3 ) / 4); 385 int i = int(( 4000 * ( l + 1 ) ) / 1461001); 386 l = l - int(( 1461 * i ) / 4) + 31; 387 int j = int(( 80 * l ) / 2447); 388 int nDay = l - int(( 2447 * j ) / 80); 389 l = int(j / 11); 390 int nMonth = j + 2 - ( 12 * l ); 391 int nYear = 100 * ( n - 49 ) + i + l; 392 return Date(nYear, nMonth, nDay); 393 } 394 395 long dateToLong(Date d) @safe { 396 // modifed from https://www.codeproject.com/Articles/2750/ 397 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 398 399 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 400 // leap year, but Excel/Lotus 123 think it is... 401 if(d.day == 29 && d.month == 2 && d.year == 1900) { 402 return 60; 403 } 404 405 // DMY to Modified Julian calculated with an extra subtraction of 2415019. 406 long nSerialDate = 407 int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) + 408 int(( 367 * ( d.month - 2 - 12 * 409 ( ( d.month - 14 ) / 12 ) ) ) / 12) - 410 int(( 3 * ( int(( d.year + 4900 411 + int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) + 412 d.day - 2415019 - 32075; 413 414 if(nSerialDate < 60) { 415 // Because of the 29-02-1900 bug, any serial date 416 // under 60 is one off... Compensate. 417 nSerialDate--; 418 } 419 420 return nSerialDate; 421 } 422 423 @safe unittest { 424 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 425 foreach(const d; ds) { 426 long l = dateToLong(d); 427 Date r = longToDate(l); 428 assert(r == d, format("%s %s", r, d)); 429 } 430 } 431 432 TimeOfDay doubleToTimeOfDay(double s) @safe { 433 import core.stdc.math : lround; 434 double secs = (24.0 * 60.0 * 60.0) * s; 435 436 // TODO not one-hundred my lround is needed 437 int secI = to!int(lround(secs)); 438 439 return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60); 440 } 441 442 double timeOfDayToDouble(TimeOfDay tod) @safe { 443 long h = tod.hour * 60 * 60; 444 long m = tod.minute * 60; 445 long s = tod.second; 446 return (h + m + s) / (24.0 * 60.0 * 60.0); 447 } 448 449 @safe unittest { 450 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 451 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 452 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 453 foreach(const tod; tods) { 454 double d = timeOfDayToDouble(tod); 455 assert(d <= 1.0, format("%s", d)); 456 TimeOfDay r = doubleToTimeOfDay(d); 457 assert(r == tod, format("%s %s", r, tod)); 458 } 459 } 460 461 double datetimeToDouble(DateTime dt) @safe { 462 double d = dateToLong(dt.date); 463 double t = timeOfDayToDouble(dt.timeOfDay); 464 return d + t; 465 } 466 467 DateTime doubleToDateTime(double d) @safe { 468 long l = cast(long)d; 469 Date dt = longToDate(l); 470 TimeOfDay t = doubleToTimeOfDay(d - l); 471 return DateTime(dt, t); 472 } 473 474 @safe unittest { 475 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 476 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 477 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 478 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 479 foreach(const d; ds) { 480 foreach(const tod; tods) { 481 DateTime dt = DateTime(d, tod); 482 double dou = datetimeToDouble(dt); 483 484 Date rd = longToDate(cast(long)dou); 485 assert(rd == d, format("%s %s", rd, d)); 486 487 double rest = dou - cast(long)dou; 488 TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou); 489 assert(rt == tod, format("%s %s", rt, tod)); 490 491 DateTime r = doubleToDateTime(dou); 492 assert(r == dt, format("%s %s", r, dt)); 493 } 494 } 495 } 496 497 Date stringToDate(string s) @safe { 498 import std.array : split; 499 import std.string : indexOf; 500 501 if(s.indexOf('/') != -1) { 502 auto sp = s.split('/'); 503 enforce(sp.length == 3, format("[%s]", sp)); 504 return Date(to!int(sp[2]), to!int(sp[1]), to!int(sp[0])); 505 } else { 506 return longToDate(to!long(s)); 507 } 508 } 509 510 bool tryConvertTo(T,S)(S var) { 511 return !(tryConvertToImpl!T(Data(var)).isNull()); 512 } 513 514 Nullable!(T) tryConvertToImpl(T)(Data var) { 515 try { 516 return nullable(convertTo!T(var)); 517 } catch(Exception e) { 518 return Nullable!T(); 519 } 520 } 521 522 T convertTo(T)(string var) @safe { 523 import std.math : lround; 524 static if(isSomeString!T) { 525 return to!T(var); 526 } else static if(is(T == bool)) { 527 return var == "1"; 528 } else static if(isIntegral!T) { 529 return to!T(var); 530 } else static if(isFloatingPoint!T) { 531 return to!T(var); 532 } else static if(is(T == DateTime)) { 533 if(var.canConvertToLong()) { 534 return doubleToDateTime(to!long(var)); 535 } else if(var.canConvertToDouble()) { 536 return doubleToDateTime(to!double(var)); 537 } 538 enforce(false, "Can not convert '" ~ var ~ "' to a DateTime"); 539 assert(false, "Unreachable"); 540 } else static if(is(T == Date)) { 541 if(var.canConvertToLong()) { 542 return longToDate(to!long(var)); 543 } else if(var.canConvertToDouble()) { 544 return longToDate(lround(to!double(var))); 545 } 546 return stringToDate(var); 547 } else static if(is(T == TimeOfDay)) { 548 double l = to!double(var); 549 return doubleToTimeOfDay(l - cast(long)l); 550 } else { 551 static assert(false, T.stringof ~ " not supported"); 552 } 553 } 554 555 private ZipArchive readFile(in string filename) @trusted { 556 enforce(exists(filename), "File with name " ~ filename ~ " does not exist"); 557 return new typeof(return)(read(filename)); 558 } 559 560 struct SheetNameId { 561 string name; 562 int id; 563 string rid; 564 } 565 566 string convertToString(in ubyte[] d) @trusted { 567 import std.encoding; 568 auto b = getBOM(d); 569 switch(b.schema) { 570 case BOM.none: 571 return cast(string)d; 572 case BOM.utf8: 573 return cast(string)(d[3 .. $]); 574 case BOM.utf16be: goto default; 575 case BOM.utf16le: goto default; 576 case BOM.utf32be: goto default; 577 case BOM.utf32le: goto default; 578 default: 579 string ret; 580 transcode(d, ret); 581 return ret; 582 } 583 } 584 585 SheetNameId[] sheetNames(in string filename) @trusted { 586 auto file = readFile(filename); 587 auto ams = file.directory; 588 immutable wbStr = "xl/workbook.xml"; 589 if(wbStr !in ams) { 590 return SheetNameId[].init; 591 } 592 ubyte[] wb = file.expand(ams[wbStr]); 593 string wbData = convertToString(wb); 594 595 auto dom = parseDOM(wbData); 596 if(dom.children.length != 1) { 597 return []; 598 } 599 auto workbook = dom.children[0]; 600 string sheetName = workbook.name == "workbook" 601 ? "sheets" : "s:sheets"; 602 if(workbook.name != "workbook" && workbook.name != "s:workbook") { 603 return []; 604 } 605 auto sheetsRng = workbook.children.filter!(c => c.name == sheetName); 606 if(sheetsRng.empty) { 607 return []; 608 } 609 610 return sheetsRng.front.children 611 .map!(s => SheetNameId( 612 s.attributes.filter!(a => a.name == "name").front.value 613 .specialCharacterReplacementReverse(), 614 s.attributes.filter!(a => a.name == "sheetId").front 615 .value.to!int(), 616 s.attributes.filter!(a => a.name == "r:id").front.value, 617 ) 618 ) 619 .array 620 .sort!((a, b) => a.id < b.id) 621 .release; 622 } 623 624 @safe unittest { 625 auto r = sheetNames("multitable.xlsx"); 626 assert(r[0].name == "wb1"); 627 assert(r[0].id == 1); 628 } 629 630 @safe unittest { 631 auto r = sheetNames("sheetnames.xlsx"); 632 assert(r[0].name == "A & B ;", r[0].name); 633 assert(r[0].id == 1); 634 } 635 636 struct Relationships { 637 string id; 638 string file; 639 } 640 641 Relationships[string] parseRelationships(ZipArchive za, ArchiveMember am) @trusted { 642 ubyte[] d = za.expand(am); 643 string relData = convertToString(d); 644 auto dom = parseDOM(relData); 645 assert(dom.children.length == 1); 646 auto rel = dom.children[0]; 647 assert(rel.name == "Relationships"); 648 auto relRng = rel.children.filter!(c => c.name == "Relationship"); 649 assert(!relRng.empty); 650 651 Relationships[string] ret; 652 foreach(r; relRng) { 653 Relationships tmp; 654 tmp.id = r.attributes.filter!(a => a.name == "Id") 655 .front.value; 656 tmp.file = r.attributes.filter!(a => a.name == "Target") 657 .front.value; 658 ret[tmp.id] = tmp; 659 } 660 return ret; 661 } 662 663 Sheet readSheet(in string filename, in string sheetName) @safe { 664 SheetNameId[] sheets = sheetNames(filename); 665 auto sRng = sheets.filter!(s => s.name == sheetName); 666 enforce(!sRng.empty, "No sheet with name " ~ sheetName 667 ~ " found in file " ~ filename); 668 return readSheetImpl(filename, sRng.front.rid); 669 } 670 671 string eatXlPrefix(string fn) @safe { 672 foreach(const p; ["xl//", "/xl/"]) { 673 if(fn.startsWith(p)) { 674 return fn[p.length .. $]; 675 } 676 } 677 return fn; 678 } 679 680 Sheet readSheetImpl(in string filename, in string rid) @trusted { 681 scope(failure) { 682 writefln("Failed at file '%s' and sheet '%s'", filename, rid); 683 } 684 auto file = readFile(filename); 685 auto ams = file.directory; 686 immutable ss = "xl/sharedStrings.xml"; 687 string[] sharedStrings = (ss in ams) 688 ? readSharedEntries(file, ams[ss]) 689 : []; 690 //logf("%s", sharedStrings); 691 692 Relationships[string] rels = parseRelationships(file, 693 ams["xl/_rels/workbook.xml.rels"]); 694 695 Relationships* sheetRel = rid in rels; 696 enforce(sheetRel !is null, format("Could not find '%s' in '%s'", rid, 697 filename)); 698 string shrFn = eatXlPrefix(sheetRel.file); 699 string fn = "xl/" ~ shrFn; 700 ArchiveMember* sheet = fn in ams; 701 enforce(sheet !is null, format("sheetRel.file orig '%s', fn %s not in [%s]", 702 sheetRel.file, fn, ams.keys())); 703 704 Sheet ret; 705 ret.cells = insertValueIntoCell(readCells(file, *sheet), sharedStrings); 706 Pos maxPos; 707 foreach(ref c; ret.cells) { 708 c.position = toPos(c.r); 709 maxPos = elementMax(maxPos, c.position); 710 } 711 ret.maxPos = maxPos; 712 ret.table = new Cell[][](ret.maxPos.row + 1, ret.maxPos.col + 1); 713 foreach(const c; ret.cells) { 714 ret.table[c.position.row][c.position.col] = c; 715 } 716 return ret; 717 } 718 719 string[] readSharedEntries(ZipArchive za, ArchiveMember am) @trusted { 720 ubyte[] ss = za.expand(am); 721 string ssData = convertToString(ss); 722 auto dom = parseDOM(ssData); 723 string[] ret; 724 if(dom.type != EntityType.elementStart) { 725 return ret; 726 } 727 assert(dom.children.length == 1); 728 auto sst = dom.children[0]; 729 assert(sst.name == "sst"); 730 if(sst.type != EntityType.elementStart || sst.children.empty) { 731 return ret; 732 } 733 auto siRng = sst.children.filter!(c => c.name == "si"); 734 foreach(si; siRng) { 735 if(si.type != EntityType.elementStart) { 736 continue; 737 } 738 //ret ~= extractData(si); 739 string tmp; 740 foreach(tORr; si.children) { 741 if(tORr.name == "t" && tORr.type == EntityType.elementStart 742 && !tORr.children.empty) 743 { 744 //ret ~= Data(convert(tORr.children[0].text)); 745 ret ~= tORr.children[0].text.removeSpecialCharacter(); 746 } else if(tORr.name == "r") { 747 foreach(r; tORr.children.filter!(r => r.name == "t")) { 748 if(r.type == EntityType.elementStart && !r.children.empty) { 749 tmp ~= r.children[0].text.removeSpecialCharacter(); 750 } 751 } 752 } else { 753 //ret ~= Data.init; 754 ret ~= ""; 755 } 756 } 757 if(!tmp.empty) { 758 //ret ~= Data(convert(tmp)); 759 ret ~= tmp.removeSpecialCharacter(); 760 } 761 } 762 return ret; 763 } 764 765 string extractData(DOMEntity!string si) { 766 string tmp; 767 foreach(tORr; si.children) { 768 if(tORr.name == "t") { 769 if(!tORr.attributes.filter!(a => a.name == "xml:space").empty) { 770 return ""; 771 } else if(tORr.type == EntityType.elementStart 772 && !tORr.children.empty) 773 { 774 return tORr.children[0].text; 775 } else { 776 return ""; 777 } 778 } else if(tORr.name == "r") { 779 foreach(r; tORr.children.filter!(r => r.name == "t")) { 780 tmp ~= r.children[0].text; 781 } 782 } 783 } 784 if(!tmp.empty) { 785 return tmp; 786 } 787 assert(false); 788 } 789 790 private bool canConvertToLong(in string s) @safe { 791 if(s.empty) { 792 return false; 793 } 794 return s.byChar.all!isDigit(); 795 } 796 797 private immutable rs = r"[\+-]{0,1}[0-9][0-9]*\.[0-9]*"; 798 private auto rgx = ctRegex!rs; 799 800 private bool canConvertToDoubleOld(in string s) @safe { 801 auto cap = matchAll(s, rgx); 802 return cap.empty || cap.front.hit != s ? false : true; 803 } 804 805 private bool canConvertToDouble(string s) pure @safe @nogc { 806 if(s.startsWith('+') || s.startsWith('-')) { 807 s = s[1 .. $]; 808 } 809 if(s.empty) { 810 return false; 811 } 812 813 if(s[0] < '0' || s[0] > '9') { // at least one in [0-9] 814 return false; 815 } 816 817 s = s[1 .. $]; 818 819 if(s.empty) { 820 return true; 821 } 822 823 while(!s.empty && s[0] >= '0' && s[0] <= '9') { 824 s = s[1 .. $]; 825 } 826 if(s.empty) { 827 return true; 828 } 829 if(s[0] != '.') { 830 return false; 831 } 832 s = s[1 .. $]; 833 if(s.empty) { 834 return true; 835 } 836 837 while(!s.empty && s[0] >= '0' && s[0] <= '9') { 838 s = s[1 .. $]; 839 } 840 841 return s.empty; 842 } 843 844 @safe unittest { 845 static struct Test { 846 string tt; 847 bool rslt; 848 } 849 auto tests = 850 [ Test("-", false) 851 , Test("0.0", true) 852 , Test("-0.", true) 853 , Test("-0.0", true) 854 , Test("-0.a", false) 855 , Test("-0.0", true) 856 , Test("-1100.0", true) 857 ]; 858 foreach(const t; tests) { 859 assert(canConvertToDouble(t.tt) == canConvertToDoubleOld(t.tt) 860 && canConvertToDouble(t.tt) == t.rslt 861 , format("%s %s %s %s", t.tt 862 , canConvertToDouble(t.tt), canConvertToDoubleOld(t.tt) 863 , t.rslt)); 864 } 865 } 866 867 string removeSpecialCharacter(string s) { 868 struct ToRe { 869 string from; 870 string to; 871 } 872 873 immutable ToRe[] toRe = [ 874 ToRe( "&", "&"), 875 ToRe( ">", "<"), 876 ToRe( "<", ">"), 877 ToRe( """, "\""), 878 ToRe( "'", "'") 879 ]; 880 881 string replaceStrings(string s) { 882 import std.algorithm.searching : canFind; 883 import std.array : replace; 884 foreach(const tr; toRe) { 885 while(canFind(s, tr.from)) { 886 s = s.replace(tr.from, tr.to); 887 } 888 } 889 return s; 890 } 891 892 return replaceStrings(s); 893 } 894 895 Cell[] readCells(ZipArchive za, ArchiveMember am) @trusted { 896 Cell[] ret; 897 ubyte[] ss = za.expand(am); 898 string ssData = convertToString(ss); 899 auto dom = parseDOM(ssData); 900 assert(dom.children.length == 1); 901 auto ws = dom.children[0]; 902 if(ws.name != "worksheet") { 903 return ret; 904 } 905 auto sdRng = ws.children.filter!(c => c.name == "sheetData"); 906 assert(!sdRng.empty); 907 if(sdRng.front.type != EntityType.elementStart) { 908 return ret; 909 } 910 auto rows = sdRng.front.children 911 .filter!(r => r.name == "row"); 912 913 foreach(ref row; rows) { 914 if(row.type != EntityType.elementStart || row.children.empty) { 915 continue; 916 } 917 foreach(ref c; row.children.filter!(r => r.name == "c")) { 918 Cell tmp; 919 tmp.row = row.attributes.filter!(a => a.name == "r") 920 .front.value.to!size_t(); 921 tmp.r = c.attributes.filter!(a => a.name == "r") 922 .front.value; 923 auto t = c.attributes.filter!(a => a.name == "t"); 924 if(t.empty) { 925 // we assume that no t attribute means direct number 926 //writefln("Found a strange empty cell \n%s", c); 927 } else { 928 tmp.t = t.front.value; 929 } 930 if(tmp.t == "s" || tmp.t == "n") { 931 if(c.type == EntityType.elementStart) { 932 auto v = c.children.filter!(c => c.name == "v"); 933 //enforce(!v.empty, format("r %s", tmp.row)); 934 if(!v.empty && v.front.type == EntityType.elementStart 935 && !v.front.children.empty) 936 { 937 tmp.v = v.front.children[0].text; 938 } else { 939 tmp.v = ""; 940 } 941 } 942 } else if(tmp.t == "inlineStr") { 943 auto is_ = c.children.filter!(c => c.name == "is"); 944 tmp.v = extractData(is_.front); 945 } else if(c.type == EntityType.elementStart) { 946 auto v = c.children.filter!(c => c.name == "v"); 947 if(!v.empty && v.front.type == EntityType.elementStart 948 && !v.front.children.empty) 949 { 950 tmp.v = v.front.children[0].text; 951 } 952 } 953 if(c.type == EntityType.elementStart) { 954 auto f = c.children.filter!(c => c.name == "f"); 955 if(!f.empty && f.front.type == EntityType.elementStart) { 956 tmp.f = f.front.children[0].text; 957 } 958 } 959 ret ~= tmp; 960 } 961 } 962 return ret; 963 } 964 965 Cell[] insertValueIntoCell(Cell[] cells, string[] ss) @trusted { 966 immutable excepted = ["n", "s", "b", "e", "str", "inlineStr"]; 967 immutable same = ["n", "e", "str", "inlineStr"]; 968 foreach(ref Cell c; cells) { 969 assert(canFind(excepted, c.t) || c.t.empty, 970 format("'%s' not in [%s]", c.t, excepted)); 971 if(c.t.empty) { 972 //c.xmlValue = convert(c.v); 973 c.xmlValue = c.v.removeSpecialCharacter(); 974 } else if(canFind(same, c.t)) { 975 //c.xmlValue = convert(c.v); 976 c.xmlValue = c.v.removeSpecialCharacter(); 977 } else if(c.t == "b") { 978 //logf("'%s' %s", c.v, c); 979 //c.xmlValue = c.v == "1"; 980 c.xmlValue = c.v.removeSpecialCharacter(); 981 } else { 982 if(!c.v.empty) { 983 size_t idx = to!size_t(c.v); 984 //logf("'%s' %s", c.v, idx); 985 //c.xmlValue = ss[idx]; 986 c.xmlValue = ss[idx]; 987 } 988 } 989 } 990 return cells; 991 } 992 993 Pos toPos(in string s) @safe { 994 import std.string : indexOfAny; 995 import std.math : pow; 996 ptrdiff_t fn = s.indexOfAny("0123456789"); 997 enforce(fn != -1, s); 998 size_t row = to!size_t(to!long(s[fn .. $]) - 1); 999 size_t col = 0; 1000 string colS = s[0 .. fn]; 1001 foreach(const idx, char c; colS) { 1002 col = col * 26 + (c - 'A' + 1); 1003 } 1004 return Pos(row, col - 1); 1005 } 1006 1007 @safe unittest { 1008 assert(toPos("A1").col == 0); 1009 assert(toPos("Z1").col == 25); 1010 assert(toPos("AA1").col == 26); 1011 } 1012 1013 Pos elementMax(Pos a, Pos b) { 1014 return Pos(a.row < b.row ? b.row : a.row, 1015 a.col < b.col ? b.col : a.col); 1016 } 1017 1018 string specialCharacterReplacement(string s) { 1019 return s.replace("\"", """) 1020 .replace("'", "'") 1021 .replace("<", "<") 1022 .replace(">", ">") 1023 .replace("&", "&"); 1024 } 1025 1026 string specialCharacterReplacementReverse(string s) { 1027 return s.replace(""", "\"") 1028 .replace("'", "'") 1029 .replace("<", "<") 1030 .replace(">", ">") 1031 .replace("&", "&"); 1032 } 1033 1034 @safe unittest { 1035 import std.math : isClose; 1036 auto r = readSheet("multitable.xlsx", "wb1"); 1037 assert(isClose(r.table[12][5].xmlValue.to!double(), 26.74), 1038 format("%s", r.table[12][5]) 1039 ); 1040 1041 assert(isClose(r.table[13][5].xmlValue.to!double(), -26.74), 1042 format("%s", r.table[13][5]) 1043 ); 1044 } 1045 1046 @safe unittest { 1047 import std.algorithm.comparison : equal; 1048 auto s = readSheet("multitable.xlsx", "wb1"); 1049 auto r = s.iterateRow!long(15, 1, 6); 1050 1051 auto expected = [1, 2, 3, 4, 5]; 1052 assert(equal(r, expected), format("%s", r)); 1053 1054 auto r2 = s.getRow!long(15, 1, 6); 1055 assert(equal(r, expected)); 1056 1057 auto it = s.iterateRowLong(15, 1, 6); 1058 assert(equal(r2, it)); 1059 1060 auto it2 = s.iterateRowUntyped(15, 1, 6) 1061 .map!(it => format("%s", it)) 1062 .array; 1063 } 1064 1065 @safe unittest { 1066 import std.algorithm.comparison : equal; 1067 auto s = readSheet("multitable.xlsx", "wb2"); 1068 //writefln("%s\n%(%s\n%)", s.maxPos, s.cells); 1069 auto rslt = s.iterateColumn!Date(1, 1, 6); 1070 auto rsltUt = s.iterateColumnUntyped(1, 1, 6) 1071 .map!(it => format("%s", it)) 1072 .array; 1073 assert(!rsltUt.empty); 1074 1075 auto target = [Date(2019,5,01), Date(2016,12,27), Date(1976,7,23), 1076 Date(1986,7,2), Date(2038,1,19) 1077 ]; 1078 assert(equal(rslt, target), format("\n%s\n%s", rslt, target)); 1079 1080 auto it = s.getColumn!Date(1, 1, 6); 1081 assert(equal(rslt, it)); 1082 1083 auto it2 = s.getColumnDate(1, 1, 6); 1084 assert(equal(rslt, it2)); 1085 } 1086 1087 @safe unittest { 1088 import std.algorithm.comparison : equal; 1089 auto s = readSheet("multitable.xlsx", "Sheet3"); 1090 writeln(s.table[0][0].xmlValue); 1091 assert(s.table[0][0].xmlValue.to!long(), 1092 format("%s", s.table[0][0].xmlValue)); 1093 //assert(s.table[0][0].canConvertTo(CellType.bool_)); 1094 } 1095 1096 unittest { 1097 import std.file : dirEntries, SpanMode; 1098 import std.traits : EnumMembers; 1099 foreach(const de; dirEntries("xlsx_files/", "*.xlsx", SpanMode.depth) 1100 .filter!(a => a.name != "xlsx_files/data03.xlsx")) 1101 { 1102 //writeln(de.name); 1103 auto sn = sheetNames(de.name); 1104 foreach(const s; sn) { 1105 auto sheet = readSheet(de.name, s.name); 1106 foreach(const cell; sheet.cells) { 1107 } 1108 } 1109 } 1110 } 1111 1112 @safe unittest { 1113 import std.algorithm.comparison : equal; 1114 auto sheet = readSheet("testworkbook.xlsx", "ws1"); 1115 //writefln("%(%s\n%)", sheet.cells); 1116 //writeln(sheet.toString()); 1117 assert(sheet.table[2][3].xmlValue.to!long() == 1337); 1118 1119 auto c = sheet.getColumnLong(3, 2, 5); 1120 auto r = [1337, 2, 3]; 1121 assert(equal(c, r), format("%s %s", c, sheet.toString())); 1122 1123 auto c2 = sheet.getColumnString(4, 2, 5); 1124 string f2 = sheet.table[2][4].xmlValue; 1125 assert(f2 == "hello", f2); 1126 f2 = sheet.table[3][4].xmlValue; 1127 assert(f2 == "sil", f2); 1128 f2 = sheet.table[4][4].xmlValue; 1129 assert(f2 == "foo", f2); 1130 auto r2 = ["hello", "sil", "foo"]; 1131 assert(equal(c2, r2), format("%s", c2)); 1132 } 1133 1134 @safe unittest { 1135 import std.math : isClose; 1136 auto sheet = readSheet("toto.xlsx", "Trades"); 1137 writefln("%(%s\n%)", sheet.cells); 1138 1139 auto r = sheet.getRowString(1, 0, 2).array; 1140 1141 double d = to!double(r[1]); 1142 assert(isClose(d, 38204642.510000)); 1143 } 1144 1145 @safe unittest { 1146 auto sheet = readSheet("leading_zeros.xlsx", "Sheet1"); 1147 auto a2 = sheet.cells.filter!(c => c.r == "A2"); 1148 assert(!a2.empty); 1149 assert(a2.front.xmlValue == "0012", format("%s", a2.front)); 1150 } 1151 1152 @safe unittest { 1153 import std.algorithm.comparison : equal; 1154 auto s = readSheet("datetimes.xlsx", "Sheet1"); 1155 //writefln("%s\n%(%s\n%)", s.maxPos, s.cells); 1156 auto rslt = s.iterateColumn!DateTime(0, 0, 2); 1157 assert(!rslt.empty); 1158 1159 auto target = 1160 [ DateTime(Date(1986,1,11), TimeOfDay.init) 1161 , DateTime(Date(1986,7,2), TimeOfDay.init) 1162 ]; 1163 assert(equal(rslt, target), format("\ngot: %s\nexp: %s\ntable %s", rslt 1164 , target, s.toString())); 1165 }