1 module xlsxreader; 2 3 import std.algorithm.iteration : filter, map, joiner; 4 import std.algorithm.mutation : reverse; 5 import std.algorithm.searching : all, canFind, startsWith; 6 import std.algorithm.sorting : sort; 7 import std.array : array, empty, front, popFront; 8 import std.ascii : isDigit; 9 import std.experimental.logger; 10 import std.conv : to; 11 import std.datetime : DateTime, Date, TimeOfDay; 12 import std.exception : enforce; 13 import std.file : read, exists, readText; 14 import std.format : format; 15 import std.range : tee; 16 import std.regex; 17 import std.stdio; 18 import std.traits : isIntegral, isFloatingPoint, isSomeString; 19 import std.typecons : tuple, Nullable, nullable; 20 import std.utf : byChar; 21 import std.variant : Algebraic, visit; 22 import std.zip; 23 24 import dxml.dom; 25 26 /// 27 struct Pos { 28 // zero based 29 size_t row; 30 // zero based 31 size_t col; 32 } 33 34 /// 35 alias Data = Algebraic!(bool,long,double,string,DateTime,Date,TimeOfDay); 36 37 /// 38 struct Cell { 39 string loc; 40 size_t row; // row[r] 41 string t; // s or n, s for pointer, n for value, stored in v 42 string r; // c[r] 43 string v; // c.v the value or ptr 44 string f; // c.f the formula 45 Data value; 46 Pos position; 47 48 bool canConvertTo(CellType ct) const { 49 auto b = (bool l) { 50 switch(ct) { 51 case CellType.datetime: return false; 52 case CellType.timeofday: return false; 53 case CellType.date: return false; 54 default: return true; 55 } 56 }; 57 58 auto dt = (DateTime l) { 59 switch(ct) { 60 case CellType.datetime: return true; 61 case CellType.timeofday: return true; 62 case CellType.date: return true; 63 case CellType.double_: return true; 64 default: return false; 65 } 66 }; 67 68 auto de = (Date l) { 69 switch(ct) { 70 case CellType.datetime: return false; 71 case CellType.timeofday: return false; 72 case CellType.date: return true; 73 case CellType.double_: return true; 74 case CellType.long_: return true; 75 default: return false; 76 } 77 }; 78 79 auto tod = (TimeOfDay l) { 80 switch(ct) { 81 case CellType.datetime: return false; 82 case CellType.timeofday: return true; 83 case CellType.double_: return true; 84 default: return false; 85 } 86 }; 87 88 auto l = (long l) { 89 switch(ct) { 90 case CellType.date: return !tryConvertTo!Date(l); 91 case CellType.long_: return true; 92 case CellType.string_: return true; 93 case CellType.double_: return true; 94 case CellType.bool_: return l == 0 || l == 1; 95 default: return false; 96 } 97 }; 98 99 auto d = (double l) { 100 switch(ct) { 101 case CellType.string_: return true; 102 case CellType.double_: return true; 103 case CellType.datetime: return !tryConvertTo!DateTime(l); 104 case CellType.date: return !tryConvertTo!Date(l); 105 case CellType.timeofday: return !tryConvertTo!TimeOfDay(l); 106 default: return false; 107 } 108 }; 109 110 auto s = (string l) { 111 switch(ct) { 112 case CellType.string_: return true; 113 case CellType.bool_: return !tryConvertTo!bool(l); 114 case CellType.long_: return !tryConvertTo!long(l); 115 case CellType.double_: return !tryConvertTo!double(l); 116 case CellType.datetime: return !tryConvertTo!DateTime(l); 117 case CellType.date: return !tryConvertTo!Date(l); 118 case CellType.timeofday: return !tryConvertTo!TimeOfDay(l); 119 default: return false; 120 } 121 }; 122 123 return this.value.visit!( 124 (bool l) => b(l), 125 (long lo) => l(lo), 126 (double l) => d(l), 127 (string l) => s(l), 128 (DateTime l) => dt(l), 129 (Date l) => de(l), 130 (TimeOfDay l) => tod(l), 131 () => false) 132 (); 133 } 134 135 bool convertToBool() { 136 return convertTo!bool(this.value); 137 } 138 139 long convertToLong() { 140 return convertTo!long(this.value); 141 } 142 143 double convertToDouble() { 144 return convertTo!double(this.value); 145 } 146 147 string convertToString() { 148 return convertTo!string(this.value); 149 } 150 151 Date convertToDate() { 152 return convertTo!Date(this.value); 153 } 154 155 TimeOfDay convertToTimeOfDay() { 156 return convertTo!TimeOfDay(this.value); 157 } 158 159 DateTime convertToDateTime() { 160 return convertTo!DateTime(this.value); 161 } 162 } 163 164 // 165 enum CellType { 166 datetime, 167 timeofday, 168 date, 169 bool_, 170 double_, 171 long_, 172 string_ 173 } 174 175 import std.ascii : toUpper; 176 /// 177 struct Sheet { 178 Cell[] cells; 179 Cell[][] table; 180 Pos maxPos; 181 182 string toString() { 183 import std.format : formattedWrite; 184 import std.array : appender; 185 long[] maxCol = new long[](maxPos.col + 1); 186 foreach(row; this.table) { 187 foreach(idx, Cell col; row) { 188 string s = col.value.visit!( 189 (bool l) => to!string(l), 190 (long l) => to!string(l), 191 (double l) => format("%.4f", l), 192 (string l) => l, 193 (DateTime l) => l.toISOExtString(), 194 (Date l) => l.toISOExtString(), 195 (TimeOfDay l) => l.toISOExtString(), 196 () => "") 197 (); 198 199 maxCol[idx] = maxCol[idx] < s.length ? s.length : maxCol[idx]; 200 } 201 } 202 maxCol[] += 1; 203 204 auto app = appender!string(); 205 foreach(row; this.table) { 206 foreach(idx, Cell col; row) { 207 string s = col.value.visit!( 208 (bool l) => to!string(l), 209 (long l) => to!string(l), 210 (double l) => format("%.4f", l), 211 (string l) => l, 212 (DateTime l) => l.toISOExtString(), 213 (Date l) => l.toISOExtString(), 214 (TimeOfDay l) => l.toISOExtString(), 215 () => "") 216 (); 217 formattedWrite(app, "%*s, ", maxCol[idx], s); 218 } 219 formattedWrite(app, "\n"); 220 } 221 return app.data; 222 } 223 224 void printTable() { 225 writeln(this.toString()); 226 } 227 228 // Column 229 230 Iterator!T getColumn(T)(size_t col, size_t start, size_t end) { 231 auto c = this.iterateColumn!T(col, start, end); 232 return Iterator!T(c.array); 233 } 234 235 private enum t = q{ 236 Iterator!(%1$s) getColumn%2$s(size_t col, size_t start, size_t end) { 237 return getColumn!(%1$s)(col, start, end); 238 } 239 }; 240 static foreach(T; ["long", "double", "string", "Date", "TimeOfDay", 241 "DateTime"]) 242 { 243 mixin(format(t, T, T[0].toUpper ~ T[1 .. $])); 244 } 245 246 ColumnUntyped iterateColumnUntyped(size_t col, size_t start, size_t end) { 247 return ColumnUntyped(&this, col, start, end); 248 } 249 250 Column!(T) iterateColumn(T)(size_t col, size_t start, size_t end) { 251 return Column!(T)(&this, col, start, end); 252 } 253 254 Column!(long) iterateColumnLong(size_t col, size_t start, size_t end) { 255 return Column!(long)(&this, col, start, end); 256 } 257 258 Column!(double) iterateColumnDouble(size_t col, size_t start, size_t end) { 259 return Column!(double)(&this, col, start, end); 260 } 261 262 Column!(string) iterateColumnString(size_t col, size_t start, size_t end) { 263 return Column!(string)(&this, col, start, end); 264 } 265 266 Column!(DateTime) iterateColumnDateTime(size_t col, size_t start, 267 size_t end) 268 { 269 return Column!(DateTime)(&this, col, start, end); 270 } 271 272 Column!(Date) iterateColumnDate(size_t col, size_t start, size_t end) { 273 return Column!(Date)(&this, col, start, end); 274 } 275 276 Column!(TimeOfDay) iterateColumnTimeOfDay(size_t col, size_t start, 277 size_t end) 278 { 279 return Column!(TimeOfDay)(&this, col, start, end); 280 } 281 282 // Row 283 284 Iterator!T getRow(T)(size_t row, size_t start, size_t end) { 285 auto c = this.iterateRow!T(row, start, end); 286 return Iterator!T(c.array); 287 } 288 289 private enum t2 = q{ 290 Iterator!(%1$s) getRow%2$s(size_t row, size_t start, size_t end) { 291 return getRow!(%1$s)(row, start, end); 292 } 293 }; 294 static foreach(T; ["long", "double", "string", "Date", "TimeOfDay", 295 "DateTime"]) 296 { 297 mixin(format(t2, T, T[0].toUpper ~ T[1 .. $])); 298 } 299 300 RowUntyped iterateRowUntyped(size_t row, size_t start, size_t end) { 301 return RowUntyped(&this, row, start, end); 302 } 303 304 Row!(T) iterateRow(T)(size_t row, size_t start, size_t end) { 305 return Row!(T)(&this, row, start, end); 306 } 307 308 Row!(long) iterateRowLong(size_t row, size_t start, size_t end) { 309 return Row!(long)(&this, row, start, end); 310 } 311 312 Row!(double) iterateRowDouble(size_t row, size_t start, size_t end) { 313 return Row!(double)(&this, row, start, end); 314 } 315 316 Row!(string) iterateRowString(size_t row, size_t start, size_t end) { 317 return Row!(string)(&this, row, start, end); 318 } 319 320 Row!(DateTime) iterateRowDateTime(size_t row, size_t start, 321 size_t end) 322 { 323 return Row!(DateTime)(&this, row, start, end); 324 } 325 326 Row!(Date) iterateRowDate(size_t row, size_t start, size_t end) { 327 return Row!(Date)(&this, row, start, end); 328 } 329 330 Row!(TimeOfDay) iterateRowTimeOfDay(size_t row, size_t start, 331 size_t end) 332 { 333 return Row!(TimeOfDay)(&this, row, start, end); 334 } 335 } 336 337 struct Iterator(T) { 338 T[] data; 339 340 this(T[] data) { 341 this.data = data; 342 } 343 344 @property bool empty() const { 345 return this.data.empty; 346 } 347 348 void popFront() { 349 this.data.popFront(); 350 } 351 352 @property T front() { 353 return this.data.front; 354 } 355 356 typeof(this) save() { 357 return this; 358 } 359 } 360 361 /// 362 struct RowUntyped { 363 Sheet* sheet; 364 /*const*/ size_t row; 365 size_t start; 366 size_t end; 367 size_t cur; 368 369 this(Sheet* sheet, size_t row, size_t start, size_t end) { 370 this.sheet = sheet; 371 this.row = row; 372 this.start = start; 373 this.end = end; 374 this.cur = this.start; 375 } 376 377 @property bool empty() const { 378 return this.cur >= this.end; 379 } 380 381 void popFront() { 382 ++this.cur; 383 } 384 385 typeof(this) save() { 386 return this; 387 } 388 389 @property Cell front() { 390 return this.sheet.table[this.row][this.cur]; 391 } 392 } 393 394 /// 395 struct Row(T) { 396 RowUntyped ru; 397 398 T front; 399 400 this(Sheet* sheet, size_t row, size_t start, size_t end) { 401 this.ru = RowUntyped(sheet, row, start, end); 402 this.read(); 403 } 404 405 @property bool empty() const { 406 return this.ru.empty; 407 } 408 409 void popFront() { 410 this.ru.popFront(); 411 if(!this.empty) { 412 this.read(); 413 } 414 } 415 416 typeof(this) save() { 417 return this; 418 } 419 420 private void read() { 421 this.front = convertTo!T(this.ru.front.value); 422 } 423 424 bool canConvertTo(CellType ct) const { 425 for(size_t it = this.ru.start; it < this.ru.end; ++it) { 426 if(!this.ru.sheet.table[this.ru.row][it].canConvertTo(ct)) { 427 return false; 428 } 429 } 430 return true; 431 } 432 } 433 434 /// 435 struct ColumnUntyped { 436 Sheet* sheet; 437 /*const*/ size_t col; 438 size_t start; 439 size_t end; 440 size_t cur; 441 442 this(Sheet* sheet, size_t col, size_t start, size_t end) { 443 this.sheet = sheet; 444 this.col = col; 445 this.start = start; 446 this.end = end; 447 this.cur = this.start; 448 } 449 450 @property bool empty() const { 451 return this.cur >= this.end; 452 } 453 454 void popFront() { 455 ++this.cur; 456 } 457 458 typeof(this) save() { 459 return this; 460 } 461 462 @property Cell front() { 463 return this.sheet.table[this.cur][this.col]; 464 } 465 } 466 467 /// 468 struct Column(T) { 469 ColumnUntyped cu; 470 471 T front; 472 473 this(Sheet* sheet, size_t col, size_t start, size_t end) { 474 this.cu = ColumnUntyped(sheet, col, start, end); 475 this.read(); 476 } 477 478 @property bool empty() const { 479 return this.cu.empty; 480 } 481 482 void popFront() { 483 this.cu.popFront(); 484 if(!this.empty) { 485 this.read(); 486 } 487 } 488 489 typeof(this) save() { 490 return this; 491 } 492 493 private void read() { 494 this.front = convertTo!T(this.cu.front.value); 495 } 496 497 bool canConvertTo(CellType ct) const { 498 for(size_t it = this.cu.start; it < this.cu.end; ++it) { 499 if(!this.cu.sheet.table[it][this.cu.col].canConvertTo(ct)) { 500 return false; 501 } 502 } 503 return true; 504 } 505 } 506 507 unittest { 508 import std.range : isForwardRange; 509 import std.meta : AliasSeq; 510 static foreach(T; AliasSeq!(long,double,DateTime,TimeOfDay,Date,string)) {{ 511 alias C = Column!T; 512 alias R = Row!T; 513 alias I = Iterator!T; 514 static assert(isForwardRange!C, C.stringof); 515 static assert(isForwardRange!R, R.stringof); 516 static assert(isForwardRange!I, I.stringof); 517 }} 518 } 519 520 Date longToDate(long d) { 521 // modifed from https://www.codeproject.com/Articles/2750/ 522 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 523 524 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 525 // leap year, but Excel/Lotus 123 think it is... 526 if(d == 60) { 527 return Date(1900, 2, 29); 528 } else if(d < 60) { 529 // Because of the 29-02-1900 bug, any serial date 530 // under 60 is one off... Compensate. 531 ++d; 532 } 533 534 // Modified Julian to DMY calculation with an addition of 2415019 535 int l = cast(int)d + 68569 + 2415019; 536 int n = int(( 4 * l ) / 146097); 537 l = l - int(( 146097 * n + 3 ) / 4); 538 int i = int(( 4000 * ( l + 1 ) ) / 1461001); 539 l = l - int(( 1461 * i ) / 4) + 31; 540 int j = int(( 80 * l ) / 2447); 541 int nDay = l - int(( 2447 * j ) / 80); 542 l = int(j / 11); 543 int nMonth = j + 2 - ( 12 * l ); 544 int nYear = 100 * ( n - 49 ) + i + l; 545 return Date(nYear, nMonth, nDay); 546 } 547 548 long dateToLong(Date d) { 549 // modifed from https://www.codeproject.com/Articles/2750/ 550 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 551 552 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 553 // leap year, but Excel/Lotus 123 think it is... 554 if(d.day == 29 && d.month == 2 && d.year == 1900) { 555 return 60; 556 } 557 558 // DMY to Modified Julian calculated with an extra subtraction of 2415019. 559 long nSerialDate = 560 int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) + 561 int(( 367 * ( d.month - 2 - 12 * 562 ( ( d.month - 14 ) / 12 ) ) ) / 12) - 563 int(( 3 * ( int(( d.year + 4900 564 + int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) + 565 d.day - 2415019 - 32075; 566 567 if(nSerialDate < 60) { 568 // Because of the 29-02-1900 bug, any serial date 569 // under 60 is one off... Compensate. 570 nSerialDate--; 571 } 572 573 return nSerialDate; 574 } 575 576 unittest { 577 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 578 foreach(d; ds) { 579 long l = dateToLong(d); 580 Date r = longToDate(l); 581 assert(r == d, format("%s %s", r, d)); 582 } 583 } 584 585 TimeOfDay doubleToTimeOfDay(double s) { 586 import core.stdc.math : lround; 587 double secs = (24.0 * 60.0 * 60.0) * s; 588 589 // TODO not one-hundred my lround is needed 590 int secI = to!int(lround(secs)); 591 592 return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60); 593 } 594 595 double timeOfDayToDouble(TimeOfDay tod) { 596 long h = tod.hour * 60 * 60; 597 long m = tod.minute * 60; 598 long s = tod.second; 599 return (h + m + s) / (24.0 * 60.0 * 60.0); 600 } 601 602 unittest { 603 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 604 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 605 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 606 foreach(tod; tods) { 607 double d = timeOfDayToDouble(tod); 608 assert(d <= 1.0, format("%s", d)); 609 TimeOfDay r = doubleToTimeOfDay(d); 610 assert(r == tod, format("%s %s", r, tod)); 611 } 612 } 613 614 double datetimeToDouble(DateTime dt) { 615 double d = dateToLong(dt.date); 616 double t = timeOfDayToDouble(dt.timeOfDay); 617 return d + t; 618 } 619 620 DateTime doubleToDateTime(double d) { 621 long l = cast(long)d; 622 Date dt = longToDate(l); 623 TimeOfDay t = doubleToTimeOfDay(d - l); 624 return DateTime(dt, t); 625 } 626 627 unittest { 628 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 629 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 630 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 631 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 632 foreach(d; ds) { 633 foreach(tod; tods) { 634 DateTime dt = DateTime(d, tod); 635 double dou = datetimeToDouble(dt); 636 637 Date rd = longToDate(cast(long)dou); 638 assert(rd == d, format("%s %s", rd, d)); 639 640 double rest = dou - cast(long)dou; 641 TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou); 642 assert(rt == tod, format("%s %s", rt, tod)); 643 644 DateTime r = doubleToDateTime(dou); 645 assert(r == dt, format("%s %s", r, dt)); 646 } 647 } 648 } 649 650 Date stringToDate(string s) { 651 import std.array : split; 652 import std.string : indexOf; 653 654 if(s.indexOf('/') != -1) { 655 auto sp = s.split('/'); 656 enforce(sp.length == 3, format("[%s]", sp)); 657 return Date(to!int(sp[2]), to!int(sp[1]), to!int(sp[0])); 658 } else { 659 return longToDate(to!long(s)); 660 } 661 } 662 663 bool tryConvertTo(T,S)(S var) { 664 return !(tryConvertToImpl!T(Data(var)).isNull()); 665 } 666 667 Nullable!(T) tryConvertToImpl(T)(Data var) { 668 try { 669 return nullable(convertTo!T(var)); 670 } catch(Exception e) { 671 return Nullable!T(); 672 } 673 } 674 675 T convertTo(T)(Data var) { 676 static if(is(T == Data)) { 677 return var; 678 } else static if(isSomeString!T) { 679 return var.visit!( 680 (bool l) => to!string(l), 681 (long l) => to!string(l), 682 (double l) => format("%f", l), 683 (string l) => l, 684 (DateTime l) => l.toISOExtString(), 685 (Date l) => l.toISOExtString(), 686 (TimeOfDay l) => l.toISOExtString(), 687 () => "") 688 (); 689 } else static if(is(T == bool)) { 690 if(var.type != typeid(bool) && var.type != typeid(long) 691 && var.type == typeid(string)) 692 { 693 throw new Exception("Can not convert " ~ var.type.toString() ~ 694 " to bool"); 695 } 696 return var.visit!( 697 (bool l) => l, 698 (long l) => l == 0 ? false : true, 699 (string l) => to!bool(l), 700 (double l) => false, 701 (DateTime l) => false, 702 (Date l) => false, 703 (TimeOfDay l) => false, 704 () => false) 705 (); 706 } else static if(isIntegral!T) { 707 return var.visit!( 708 (bool l) => to!T(l), 709 (long l) => to!T(l), 710 (double l) => to!T(l), 711 (string l) => to!T(l), 712 (DateTime l) => to!T(dateToLong(l.date)), 713 (Date l) => to!T(dateToLong(l)), 714 (TimeOfDay l) => to!T(0), 715 () => to!T(0)) 716 (); 717 } else static if(isFloatingPoint!T) { 718 return var.visit!( 719 (bool l) => to!T(l), 720 (long l) => to!T(l), 721 (double l) => to!T(l), 722 (string l) => to!T(l), 723 (DateTime l) => to!T(dateToLong(l.date)), 724 (Date l) => to!T(dateToLong(l)), 725 (TimeOfDay l) => to!T(0), 726 () => T.init) 727 (); 728 } else static if(is(T == DateTime)) { 729 return var.visit!( 730 (bool l) => DateTime.init, 731 (long l) => doubleToDateTime(to!long(l)), 732 (double l) => doubleToDateTime(l), 733 (string l) => doubleToDateTime(to!double(l)), 734 (DateTime l) => l, 735 (Date l) => DateTime(l, TimeOfDay.init), 736 (TimeOfDay l) => DateTime(Date.init, l), 737 () => DateTime.init) 738 (); 739 } else static if(is(T == Date)) { 740 import std.math : lround; 741 742 return var.visit!( 743 (bool l) => Date.init, 744 (long l) => longToDate(l), 745 (double l) => longToDate(lround(l)), 746 (string l) => stringToDate(l), 747 (DateTime l) => l.date, 748 (Date l) => l, 749 (TimeOfDay l) => Date.init, 750 () => Date.init) 751 (); 752 } else static if(is(T == TimeOfDay)) { 753 import std.math : lround; 754 755 return var.visit!( 756 (bool l) => TimeOfDay.init, 757 (long l) => TimeOfDay.init, 758 (double l) => doubleToTimeOfDay(l - cast(long)l), 759 (string l) => doubleToTimeOfDay( 760 to!double(l) - cast(long)to!double(l) 761 ), 762 (DateTime l) => l.timeOfDay, 763 (Date l) => TimeOfDay.init, 764 (TimeOfDay l) => l, 765 () => TimeOfDay.init) 766 (); 767 } 768 assert(false, T.stringof); 769 } 770 771 772 private ZipArchive readFile(string filename) { 773 enforce(exists(filename), "File with name " ~ filename ~ " does not 774 exist"); 775 776 auto file = new ZipArchive(read(filename)); 777 return file; 778 } 779 780 struct SheetNameId { 781 string name; 782 int id; 783 string rid; 784 } 785 786 string convertToString(ubyte[] d) { 787 import std.encoding; 788 auto b = getBOM(d); 789 switch(b.schema) { 790 case BOM.none: 791 return cast(string)d; 792 case BOM.utf8: 793 return cast(string)(d[3 .. $]); 794 case BOM.utf16be: goto default; 795 case BOM.utf16le: goto default; 796 case BOM.utf32be: goto default; 797 case BOM.utf32le: goto default; 798 default: 799 string ret; 800 transcode(d, ret); 801 return ret; 802 } 803 } 804 805 SheetNameId[] sheetNames(string filename) { 806 auto file = readFile(filename); 807 auto ams = file.directory; 808 immutable wbStr = "xl/workbook.xml"; 809 if(wbStr !in ams) { 810 return SheetNameId[].init; 811 } 812 ubyte[] wb = file.expand(ams[wbStr]); 813 string wbData = convertToString(wb); 814 815 auto dom = parseDOM(wbData); 816 assert(dom.children.length == 1); 817 auto workbook = dom.children[0]; 818 string sheetName = workbook.name == "workbook" 819 ? "sheets" : "s:sheets"; 820 assert(workbook.name == "workbook" || workbook.name == "s:workbook"); 821 auto sheetsRng = workbook.children.filter!(c => c.name == sheetName); 822 assert(!sheetsRng.empty); 823 return sheetsRng.front.children 824 .map!(s => SheetNameId( 825 s.attributes.filter!(a => a.name == "name").front.value, 826 s.attributes.filter!(a => a.name == "sheetId").front 827 .value.to!int(), 828 s.attributes.filter!(a => a.name == "r:id").front.value, 829 ) 830 ) 831 .array 832 .sort!((a, b) => a.id < b.id) 833 .release; 834 } 835 836 unittest { 837 auto r = sheetNames("multitable.xlsx"); 838 assert(r[0].name == "wb1"); 839 assert(r[0].id == 1); 840 } 841 842 struct Relationships { 843 string id; 844 string file; 845 } 846 847 Relationships[string] parseRelationships(ZipArchive za, ArchiveMember am) { 848 ubyte[] d = za.expand(am); 849 string relData = convertToString(d); 850 auto dom = parseDOM(relData); 851 assert(dom.children.length == 1); 852 auto rel = dom.children[0]; 853 assert(rel.name == "Relationships"); 854 auto relRng = rel.children.filter!(c => c.name == "Relationship"); 855 assert(!relRng.empty); 856 857 Relationships[string] ret; 858 foreach(r; relRng) { 859 Relationships tmp; 860 tmp.id = r.attributes.filter!(a => a.name == "Id") 861 .front.value; 862 tmp.file = r.attributes.filter!(a => a.name == "Target") 863 .front.value; 864 ret[tmp.id] = tmp; 865 } 866 return ret; 867 } 868 869 Sheet readSheet(string filename, string sheetName) { 870 SheetNameId[] sheets = sheetNames(filename); 871 auto sRng = sheets.filter!(s => s.name == sheetName); 872 enforce(!sRng.empty, "No sheet with name " ~ sheetName 873 ~ " found in file " ~ filename); 874 return readSheetImpl(filename, sRng.front.rid); 875 } 876 877 string eatXlPrefix(string fn) { 878 foreach(p; ["xl//", "/xl/"]) { 879 if(fn.startsWith(p)) { 880 return fn[p.length .. $]; 881 } 882 } 883 return fn; 884 } 885 886 Sheet readSheetImpl(string filename, string rid) { 887 scope(failure) { 888 writefln("Failed at file '%s' and sheet '%s'", filename, rid); 889 } 890 auto file = readFile(filename); 891 auto ams = file.directory; 892 immutable ss = "xl/sharedStrings.xml"; 893 Data[] sharedStrings = (ss in ams) 894 ? readSharedEntries(file, ams[ss]) 895 : []; 896 //logf("%s", sharedStrings); 897 898 Relationships[string] rels = parseRelationships(file, 899 ams["xl/_rels/workbook.xml.rels"]); 900 901 Relationships* sheetRel = rid in rels; 902 enforce(sheetRel !is null, format("Could not find '%s' in '%s'", rid, 903 filename)); 904 string shrFn = eatXlPrefix(sheetRel.file); 905 string fn = "xl/" ~ shrFn; 906 ArchiveMember* sheet = fn in ams; 907 enforce(sheet !is null, format("sheetRel.file orig '%s', fn %s not in [%s]", 908 sheetRel.file, fn, ams.keys())); 909 910 Sheet ret; 911 ret.cells = insertValueIntoCell(readCells(file, *sheet), sharedStrings); 912 Pos maxPos; 913 foreach(ref c; ret.cells) { 914 c.position = toPos(c.r); 915 maxPos = elementMax(maxPos, c.position); 916 } 917 ret.maxPos = maxPos; 918 ret.table = new Cell[][](ret.maxPos.row + 1, ret.maxPos.col + 1); 919 foreach(c; ret.cells) { 920 ret.table[c.position.row][c.position.col] = c; 921 } 922 return ret; 923 } 924 925 Data[] readSharedEntries(ZipArchive za, ArchiveMember am) { 926 ubyte[] ss = za.expand(am); 927 string ssData = convertToString(ss); 928 auto dom = parseDOM(ssData); 929 Data[] ret; 930 if(dom.type != EntityType.elementStart) { 931 return ret; 932 } 933 assert(dom.children.length == 1); 934 auto sst = dom.children[0]; 935 assert(sst.name == "sst"); 936 if(sst.type != EntityType.elementStart || sst.children.empty) { 937 return ret; 938 } 939 auto siRng = sst.children.filter!(c => c.name == "si"); 940 foreach(si; siRng) { 941 if(si.type != EntityType.elementStart) { 942 continue; 943 } 944 //ret ~= extractData(si); 945 string tmp; 946 foreach(tORr; si.children) { 947 if(tORr.name == "t" && tORr.type == EntityType.elementStart 948 && !tORr.children.empty) 949 { 950 ret ~= Data(convert(tORr.children[0].text)); 951 } else if(tORr.name == "r") { 952 foreach(r; tORr.children.filter!(r => r.name == "t")) { 953 if(r.type == EntityType.elementStart && !r.children.empty) { 954 tmp ~= r.children[0].text; 955 } 956 } 957 } else { 958 ret ~= Data.init; 959 } 960 } 961 if(!tmp.empty) { 962 ret ~= Data(convert(tmp)); 963 } 964 } 965 return ret; 966 } 967 968 string extractData(DOMEntity!string si) { 969 string tmp; 970 foreach(tORr; si.children) { 971 if(tORr.name == "t") { 972 if(!tORr.attributes.filter!(a => a.name == "xml:space").empty) { 973 return ""; 974 } else if(tORr.type == EntityType.elementStart 975 && !tORr.children.empty) 976 { 977 return tORr.children[0].text; 978 } else { 979 return ""; 980 } 981 } else if(tORr.name == "r") { 982 foreach(r; tORr.children.filter!(r => r.name == "t")) { 983 tmp ~= r.children[0].text; 984 } 985 } 986 } 987 if(!tmp.empty) { 988 return tmp; 989 } 990 assert(false); 991 } 992 993 private bool canConvertToLong(string s) { 994 if(s.empty) { 995 return false; 996 } 997 return s.byChar.all!isDigit(); 998 } 999 1000 private immutable rs = r"[\+-]{0,1}[0-9][0-9]*\.[0-9]*"; 1001 private auto rgx = ctRegex!rs; 1002 1003 private bool canConvertToDouble(string s) { 1004 auto cap = matchAll(s, rgx); 1005 return cap.empty || cap.front.hit != s ? false : true; 1006 } 1007 1008 Data convert(string s) { 1009 struct ToRe { 1010 string from; 1011 string to; 1012 } 1013 1014 immutable ToRe[] toRe = [ 1015 ToRe( "&", "&"), 1016 ToRe( ">", "<"), 1017 ToRe( "<", ">"), 1018 ToRe( """, "\""), 1019 ToRe( "'", "'") 1020 ]; 1021 1022 string replaceStrings(string s) { 1023 import std.algorithm.searching : canFind; 1024 import std.array : replace; 1025 foreach(tr; toRe) { 1026 while(canFind(s, tr.from)) { 1027 s = s.replace(tr.from, tr.to); 1028 } 1029 } 1030 return s; 1031 } 1032 1033 if(canConvertToLong(s)) { 1034 return Data(to!long(s)); 1035 } else if(canConvertToDouble(s)) { 1036 return Data(to!double(s)); 1037 } else { 1038 return Data(replaceStrings(s)); 1039 } 1040 } 1041 1042 Cell[] readCells(ZipArchive za, ArchiveMember am) { 1043 Cell[] ret; 1044 ubyte[] ss = za.expand(am); 1045 string ssData = convertToString(ss); 1046 auto dom = parseDOM(ssData); 1047 assert(dom.children.length == 1); 1048 auto ws = dom.children[0]; 1049 if(ws.name != "worksheet") { 1050 return ret; 1051 } 1052 auto sdRng = ws.children.filter!(c => c.name == "sheetData"); 1053 assert(!sdRng.empty); 1054 if(sdRng.front.type != EntityType.elementStart) { 1055 return ret; 1056 } 1057 auto rows = sdRng.front.children 1058 .filter!(r => r.name == "row"); 1059 1060 foreach(row; rows) { 1061 if(row.type != EntityType.elementStart || row.children.empty) { 1062 continue; 1063 } 1064 foreach(c; row.children.filter!(r => r.name == "c")) { 1065 Cell tmp; 1066 tmp.row = row.attributes.filter!(a => a.name == "r") 1067 .front.value.to!size_t(); 1068 tmp.r = c.attributes.filter!(a => a.name == "r") 1069 .front.value; 1070 auto t = c.attributes.filter!(a => a.name == "t"); 1071 if(t.empty) { 1072 // we assume that no t attribute means direct number 1073 //writefln("Found a strange empty cell \n%s", c); 1074 } else { 1075 tmp.t = t.front.value; 1076 } 1077 if(tmp.t == "s" || tmp.t == "n") { 1078 if(c.type == EntityType.elementStart) { 1079 auto v = c.children.filter!(c => c.name == "v"); 1080 //enforce(!v.empty, format("r %s", tmp.row)); 1081 if(!v.empty && v.front.type == EntityType.elementStart 1082 && !v.front.children.empty) 1083 { 1084 tmp.v = v.front.children[0].text; 1085 } else { 1086 tmp.v = ""; 1087 } 1088 } 1089 } else if(tmp.t == "inlineStr") { 1090 auto is_ = c.children.filter!(c => c.name == "is"); 1091 tmp.v = extractData(is_.front); 1092 } else if(c.type == EntityType.elementStart) { 1093 auto v = c.children.filter!(c => c.name == "v"); 1094 if(!v.empty && v.front.type == EntityType.elementStart 1095 && !v.front.children.empty) 1096 { 1097 tmp.v = v.front.children[0].text; 1098 } 1099 } 1100 if(c.type == EntityType.elementStart) { 1101 auto f = c.children.filter!(c => c.name == "f"); 1102 if(!f.empty && f.front.type == EntityType.elementStart) { 1103 tmp.f = f.front.children[0].text; 1104 } 1105 } 1106 ret ~= tmp; 1107 } 1108 } 1109 return ret; 1110 } 1111 1112 Cell[] insertValueIntoCell(Cell[] cells, Data[] ss) { 1113 immutable excepted = ["n", "s", "b", "e", "str", "inlineStr"]; 1114 immutable same = ["n", "e", "str", "inlineStr"]; 1115 foreach(ref Cell c; cells) { 1116 assert(canFind(excepted, c.t) || c.t.empty, 1117 format("'%s' not in [%s]", c.t, excepted)); 1118 if(c.t.empty) { 1119 c.value = convert(c.v); 1120 } else if(canFind(same, c.t)) { 1121 c.value = convert(c.v); 1122 } else if(c.t == "b") { 1123 //logf("'%s' %s", c.v, c); 1124 c.value = c.v == "1"; 1125 } else { 1126 if(!c.v.empty) { 1127 size_t idx = to!size_t(c.v); 1128 //logf("'%s' %s", c.v, idx); 1129 c.value = ss[idx]; 1130 } 1131 } 1132 } 1133 return cells; 1134 } 1135 1136 Pos toPos(string s) { 1137 import std.string : indexOfAny; 1138 import std.math : pow; 1139 ptrdiff_t fn = s.indexOfAny("0123456789"); 1140 enforce(fn != -1, s); 1141 size_t row = to!size_t(to!long(s[fn .. $]) - 1); 1142 size_t col = 0; 1143 string colS = s[0 .. fn]; 1144 foreach(idx, char c; colS) { 1145 col = col * 26 + (c - 'A' + 1); 1146 } 1147 return Pos(row, col - 1); 1148 } 1149 1150 unittest { 1151 assert(toPos("A1").col == 0); 1152 assert(toPos("Z1").col == 25); 1153 assert(toPos("AA1").col == 26); 1154 } 1155 1156 Pos elementMax(Pos a, Pos b) { 1157 return Pos(a.row < b.row ? b.row : a.row, 1158 a.col < b.col ? b.col : a.col); 1159 } 1160 1161 unittest { 1162 import std.math : approxEqual; 1163 auto r = readSheet("multitable.xlsx", "wb1"); 1164 assert(approxEqual(r.table[12][5].value.get!double(), 26.74), 1165 format("%s", r.table[12][5]) 1166 ); 1167 1168 assert(approxEqual(r.table[13][5].value.get!double(), -26.74), 1169 format("%s", r.table[13][5]) 1170 ); 1171 } 1172 1173 unittest { 1174 import std.algorithm.comparison : equal; 1175 auto s = readSheet("multitable.xlsx", "wb1"); 1176 auto r = s.iterateRow!long(15, 1, 6); 1177 1178 assert(r.canConvertTo(CellType.long_)); 1179 assert(r.canConvertTo(CellType.double_)); 1180 assert(r.canConvertTo(CellType.string_)); 1181 1182 auto expected = [1, 2, 3, 4, 5]; 1183 assert(equal(r, expected), format("%s", r)); 1184 1185 auto r2 = s.getRow!long(15, 1, 6); 1186 assert(equal(r, expected)); 1187 1188 auto it = s.iterateRowLong(15, 1, 6); 1189 assert(equal(r2, it)); 1190 1191 auto it2 = s.iterateRowUntyped(15, 1, 6) 1192 .map!(it => format("%s", it)) 1193 .array; 1194 } 1195 1196 unittest { 1197 import std.algorithm.comparison : equal; 1198 auto s = readSheet("multitable.xlsx", "wb2"); 1199 //writefln("%s\n%(%s\n%)", s.maxPos, s.cells); 1200 auto rslt = s.iterateColumn!Date(1, 1, 6); 1201 auto rsltUt = s.iterateColumnUntyped(1, 1, 6) 1202 .map!(it => format("%s", it)) 1203 .array; 1204 assert(!rsltUt.empty); 1205 1206 auto target = [Date(2019,5,01), Date(2016,12,27), Date(1976,7,23), 1207 Date(1986,7,2), Date(2038,1,19) 1208 ]; 1209 assert(equal(rslt, target), format("\n%s\n%s", rslt, target)); 1210 1211 auto it = s.getColumn!Date(1, 1, 6); 1212 assert(equal(rslt, it)); 1213 1214 auto it2 = s.getColumnDate(1, 1, 6); 1215 assert(equal(rslt, it2)); 1216 } 1217 1218 unittest { 1219 import std.algorithm.comparison : equal; 1220 auto s = readSheet("multitable.xlsx", "Sheet3"); 1221 writeln(s.table[0][0].value.type()); 1222 assert(s.table[0][0].value.peek!long(), 1223 format("%s", s.table[0][0].value)); 1224 assert(s.table[0][0].canConvertTo(CellType.bool_)); 1225 } 1226 1227 unittest { 1228 import std.file : dirEntries, SpanMode; 1229 import std.traits : EnumMembers; 1230 foreach(de; dirEntries("xlsx_files/", "*.xlsx", SpanMode.depth) 1231 .filter!(a => a.name != "xlsx_files/data03.xlsx")) 1232 { 1233 //writeln(de.name); 1234 auto sn = sheetNames(de.name); 1235 foreach(s; sn) { 1236 auto sheet = readSheet(de.name, s.name); 1237 foreach(cell; sheet.cells) { 1238 foreach(T; [EnumMembers!CellType]) { 1239 auto cc = cell.canConvertTo(T); 1240 } 1241 } 1242 } 1243 } 1244 } 1245 1246 unittest { 1247 import std.algorithm.comparison : equal; 1248 auto sheet = readSheet("testworkbook.xlsx", "ws1"); 1249 //writefln("%(%s\n%)", sheet.cells); 1250 //writeln(sheet.toString()); 1251 //assert(sheet.table[2][3].value.get!long() == 1337); 1252 1253 auto c = sheet.getColumnLong(3, 2, 5); 1254 auto r = [1337, 2, 3]; 1255 assert(equal(c, r), format("%s", c)); 1256 1257 auto c2 = sheet.getColumnString(4, 2, 5); 1258 string f2 = sheet.table[2][4].convertToString(); 1259 assert(f2 == "hello", f2); 1260 f2 = sheet.table[3][4].convertToString(); 1261 assert(f2 == "sil", f2); 1262 f2 = sheet.table[4][4].convertToString(); 1263 assert(f2 == "foo", f2); 1264 auto r2 = ["hello", "sil", "foo"]; 1265 assert(equal(c2, r2), format("%s", c2)); 1266 } 1267 1268 unittest { 1269 import std.math : approxEqual; 1270 auto sheet = readSheet("toto.xlsx", "Trades"); 1271 writefln("%(%s\n%)", sheet.cells); 1272 1273 auto r = sheet.getRowString(1, 0, 2).array; 1274 1275 double d = to!double(r[1]); 1276 assert(approxEqual(d, 38204642.510000)); 1277 }