1 module xlsxreader; 2 3 import std.algorithm.iteration : filter, map, joiner; 4 import std.algorithm.mutation : reverse; 5 import std.algorithm.searching : all, canFind, startsWith; 6 import std.algorithm.sorting : sort; 7 import std.array : array, empty, front, popFront; 8 import std.ascii : isDigit; 9 import std.experimental.logger; 10 import std.conv : to; 11 import std.datetime : DateTime, Date, TimeOfDay; 12 import std.exception : enforce; 13 import std.file : read, exists, readText; 14 import std.format : format; 15 import std.range : tee; 16 import std.regex; 17 import std.stdio; 18 import std.traits : isIntegral, isFloatingPoint, isSomeString; 19 import std.typecons : tuple, Nullable, nullable; 20 import std.utf : byChar; 21 import std.variant; 22 import std.zip; 23 24 import dxml.dom; 25 26 /// 27 struct Pos { 28 // zero based 29 size_t row; 30 // zero based 31 size_t col; 32 } 33 34 /// 35 alias Data = Algebraic!(bool,long,double,string,DateTime,Date,TimeOfDay); 36 37 /// 38 struct Cell { 39 string loc; 40 size_t row; // row[r] 41 string t; // s or n, s for pointer, n for value, stored in v 42 string r; // c[r] 43 string v; // c.v the value or ptr 44 string f; // c.f the formula 45 Data value; 46 Pos position; 47 48 bool canConvertTo(CellType ct) const { 49 auto b = (bool l) { 50 switch(ct) { 51 case CellType.datetime: return false; 52 case CellType.timeofday: return false; 53 case CellType.date: return false; 54 default: return true; 55 } 56 }; 57 58 auto dt = (DateTime l) { 59 switch(ct) { 60 case CellType.datetime: return true; 61 case CellType.timeofday: return true; 62 case CellType.date: return true; 63 case CellType.double_: return true; 64 default: return false; 65 } 66 }; 67 68 auto de = (Date l) { 69 switch(ct) { 70 case CellType.datetime: return false; 71 case CellType.timeofday: return false; 72 case CellType.date: return true; 73 case CellType.double_: return true; 74 case CellType.long_: return true; 75 default: return false; 76 } 77 }; 78 79 auto tod = (TimeOfDay l) { 80 switch(ct) { 81 case CellType.datetime: return false; 82 case CellType.timeofday: return true; 83 case CellType.double_: return true; 84 default: return false; 85 } 86 }; 87 88 auto l = (long l) { 89 switch(ct) { 90 case CellType.date: return !tryConvertTo!Date(l); 91 case CellType.long_: return true; 92 case CellType.string_: return true; 93 case CellType.double_: return true; 94 default: return false; 95 } 96 }; 97 98 auto d = (double l) { 99 switch(ct) { 100 case CellType.string_: return true; 101 case CellType.double_: return true; 102 case CellType.datetime: return !tryConvertTo!DateTime(l); 103 case CellType.date: return !tryConvertTo!Date(l); 104 case CellType.timeofday: return !tryConvertTo!TimeOfDay(l); 105 default: return false; 106 } 107 }; 108 109 auto s = (string l) { 110 switch(ct) { 111 case CellType.string_: return true; 112 case CellType.bool_: return !tryConvertTo!bool(l); 113 case CellType.long_: return !tryConvertTo!long(l); 114 case CellType.double_: return !tryConvertTo!double(l); 115 case CellType.datetime: return !tryConvertTo!DateTime(l); 116 case CellType.date: return !tryConvertTo!Date(l); 117 case CellType.timeofday: return !tryConvertTo!TimeOfDay(l); 118 default: return false; 119 } 120 }; 121 122 return this.value.visit!( 123 (bool l) => b(l), 124 (long lo) => l(lo), 125 (double l) => d(l), 126 (string l) => s(l), 127 (DateTime l) => dt(l), 128 (Date l) => de(l), 129 (TimeOfDay l) => tod(l), 130 () => false) 131 (); 132 } 133 } 134 135 // 136 enum CellType { 137 datetime, 138 timeofday, 139 date, 140 bool_, 141 double_, 142 long_, 143 string_ 144 } 145 146 /// 147 struct Sheet { 148 Cell[] cells; 149 Cell[][] table; 150 Pos maxPos; 151 152 string toString() { 153 import std.format : formattedWrite; 154 import std.array : appender; 155 long[] maxCol = new long[](maxPos.col + 1); 156 foreach(row; this.table) { 157 foreach(idx, Cell col; row) { 158 string s = col.value.visit!( 159 (bool l) => to!string(l), 160 (long l) => to!string(l), 161 (double l) => format("%.4f", l), 162 (string l) => l, 163 (DateTime l) => l.toISOExtString(), 164 (Date l) => l.toISOExtString(), 165 (TimeOfDay l) => l.toISOExtString(), 166 () => "") 167 (); 168 169 maxCol[idx] = maxCol[idx] < s.length ? s.length : maxCol[idx]; 170 } 171 } 172 maxCol[] += 1; 173 174 auto app = appender!string(); 175 foreach(row; this.table) { 176 foreach(idx, Cell col; row) { 177 string s = col.value.visit!( 178 (bool l) => to!string(l), 179 (long l) => to!string(l), 180 (double l) => format("%.4f", l), 181 (string l) => l, 182 (DateTime l) => l.toISOExtString(), 183 (Date l) => l.toISOExtString(), 184 (TimeOfDay l) => l.toISOExtString(), 185 () => "") 186 (); 187 formattedWrite(app, "%*s, ", maxCol[idx], s); 188 } 189 formattedWrite(app, "\n"); 190 } 191 return app.data; 192 } 193 194 void printTable() { 195 writeln(this.toString()); 196 } 197 198 // Column 199 200 Iterator!T getColumn(T)(size_t col, size_t start, size_t end) { 201 auto c = this.iterateColumn!T(col, start, end); 202 return Iterator!T(c.array); 203 } 204 205 private enum t = q{ 206 Iterator!(%1$s) getColumn%2$s(size_t col, size_t start, size_t end) { 207 return getColumn!(%1$s)(col, start, end); 208 } 209 }; 210 static foreach(T; ["long", "double", "string", "Date", "TimeOfDay", 211 "DateTime"]) 212 { 213 import std.ascii : toUpper; 214 mixin(format(t, T, T[0].toUpper ~ T[1 .. $])); 215 } 216 217 Column!(T) iterateColumn(T)(size_t col, size_t start, size_t end) { 218 return Column!(T)(&this, col, start, end); 219 } 220 221 Column!(long) iterateColumnLong(size_t col, size_t start, size_t end) { 222 return Column!(long)(&this, col, start, end); 223 } 224 225 Column!(double) iterateColumnDouble(size_t col, size_t start, size_t end) { 226 return Column!(double)(&this, col, start, end); 227 } 228 229 Column!(string) iterateColumnString(size_t col, size_t start, size_t end) { 230 return Column!(string)(&this, col, start, end); 231 } 232 233 Column!(DateTime) iterateColumnDateTime(size_t col, size_t start, 234 size_t end) 235 { 236 return Column!(DateTime)(&this, col, start, end); 237 } 238 239 Column!(Date) iterateColumnDate(size_t col, size_t start, size_t end) { 240 return Column!(Date)(&this, col, start, end); 241 } 242 243 Column!(TimeOfDay) iterateColumnTimeOfDay(size_t col, size_t start, 244 size_t end) 245 { 246 return Column!(TimeOfDay)(&this, col, start, end); 247 } 248 249 // Row 250 251 Iterator!T getRow(T)(size_t row, size_t start, size_t end) { 252 auto c = this.iterateRow!T(row, start, end); 253 return Iterator!T(c.array); 254 } 255 256 private enum t2 = q{ 257 Iterator!(%1$s) getRow%2$s(size_t row, size_t start, size_t end) { 258 return getRow!(%1$s)(row, start, end); 259 } 260 }; 261 static foreach(T; ["long", "double", "string", "Date", "TimeOfDay", 262 "DateTime"]) 263 { 264 import std.ascii : toUpper; 265 mixin(format(t2, T, T[0].toUpper ~ T[1 .. $])); 266 } 267 268 Row!(T) iterateRow(T)(size_t row, size_t start, size_t end) { 269 return Row!(T)(&this, row, start, end); 270 } 271 272 Row!(long) iterateRowLong(size_t row, size_t start, size_t end) { 273 return Row!(long)(&this, row, start, end); 274 } 275 276 Row!(double) iterateRowDouble(size_t row, size_t start, size_t end) { 277 return Row!(double)(&this, row, start, end); 278 } 279 280 Row!(string) iterateRowString(size_t row, size_t start, size_t end) { 281 return Row!(string)(&this, row, start, end); 282 } 283 284 Row!(DateTime) iterateRowDateTime(size_t row, size_t start, 285 size_t end) 286 { 287 return Row!(DateTime)(&this, row, start, end); 288 } 289 290 Row!(Date) iterateRowDate(size_t row, size_t start, size_t end) { 291 return Row!(Date)(&this, row, start, end); 292 } 293 294 Row!(TimeOfDay) iterateRowTimeOfDay(size_t row, size_t start, 295 size_t end) 296 { 297 return Row!(TimeOfDay)(&this, row, start, end); 298 } 299 } 300 301 struct Iterator(T) { 302 T[] data; 303 304 this(T[] data) { 305 this.data = data; 306 } 307 308 @property bool empty() const { 309 return this.data.empty; 310 } 311 312 void popFront() { 313 this.data.popFront(); 314 } 315 316 @property T front() { 317 return this.data.front; 318 } 319 320 typeof(this) save() { 321 return this; 322 } 323 } 324 325 /// 326 struct Row(T) { 327 Sheet* sheet; 328 /*const*/ size_t row; 329 size_t start; 330 size_t end; 331 size_t cur; 332 333 T front; 334 335 this(Sheet* sheet, size_t row, size_t start, size_t end) { 336 this.sheet = sheet; 337 this.row = row; 338 this.start = start; 339 this.end = end; 340 this.cur = this.start; 341 this.read(); 342 } 343 344 @property bool empty() const { 345 return this.cur >= this.end; 346 } 347 348 void popFront() { 349 ++this.cur; 350 if(!this.empty) { 351 this.read(); 352 } 353 } 354 355 typeof(this) save() { 356 return this; 357 } 358 359 private void read() { 360 this.front = convertTo!T(this.sheet.table[this.row][this.cur].value); 361 } 362 363 bool canConvertTo(CellType ct) const { 364 for(size_t it = this.start; it < this.end; ++it) { 365 if(!this.sheet.table[this.row][it].canConvertTo(ct)) { 366 return false; 367 } 368 } 369 return true; 370 } 371 } 372 373 /// 374 struct Column(T) { 375 Sheet* sheet; 376 /*const*/ size_t col; 377 size_t start; 378 size_t end; 379 size_t cur; 380 381 T front; 382 383 this(Sheet* sheet, size_t col, size_t start, size_t end) { 384 this.sheet = sheet; 385 this.col = col; 386 this.start = start; 387 this.end = end; 388 this.cur = this.start; 389 this.read(); 390 } 391 392 @property bool empty() const { 393 return this.cur >= this.end; 394 } 395 396 void popFront() { 397 ++this.cur; 398 if(!this.empty) { 399 this.read(); 400 } 401 } 402 403 typeof(this) save() { 404 return this; 405 } 406 407 private void read() { 408 this.front = convertTo!T(this.sheet.table[this.cur][this.col].value); 409 } 410 411 bool canConvertTo(CellType ct) const { 412 for(size_t it = this.start; it < this.end; ++it) { 413 if(!this.sheet.table[it][this.col].canConvertTo(ct)) { 414 return false; 415 } 416 } 417 return true; 418 } 419 } 420 421 unittest { 422 import std.range : isForwardRange; 423 import std.meta : AliasSeq; 424 static foreach(T; AliasSeq!(long,double,DateTime,TimeOfDay,Date,string)) {{ 425 alias C = Column!T; 426 alias R = Row!T; 427 alias I = Iterator!T; 428 static assert(isForwardRange!C, C.stringof); 429 static assert(isForwardRange!R, R.stringof); 430 static assert(isForwardRange!I, I.stringof); 431 }} 432 } 433 434 Date longToDate(long d) { 435 // modifed from https://www.codeproject.com/Articles/2750/ 436 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 437 438 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 439 // leap year, but Excel/Lotus 123 think it is... 440 if(d == 60) { 441 return Date(1900, 2, 29); 442 } else if(d < 60) { 443 // Because of the 29-02-1900 bug, any serial date 444 // under 60 is one off... Compensate. 445 ++d; 446 } 447 448 // Modified Julian to DMY calculation with an addition of 2415019 449 int l = cast(int)d + 68569 + 2415019; 450 int n = int(( 4 * l ) / 146097); 451 l = l - int(( 146097 * n + 3 ) / 4); 452 int i = int(( 4000 * ( l + 1 ) ) / 1461001); 453 l = l - int(( 1461 * i ) / 4) + 31; 454 int j = int(( 80 * l ) / 2447); 455 int nDay = l - int(( 2447 * j ) / 80); 456 l = int(j / 11); 457 int nMonth = j + 2 - ( 12 * l ); 458 int nYear = 100 * ( n - 49 ) + i + l; 459 return Date(nYear, nMonth, nDay); 460 } 461 462 long dateToLong(Date d) { 463 // modifed from https://www.codeproject.com/Articles/2750/ 464 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 465 466 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 467 // leap year, but Excel/Lotus 123 think it is... 468 if(d.day == 29 && d.month == 2 && d.year == 1900) { 469 return 60; 470 } 471 472 // DMY to Modified Julian calculated with an extra subtraction of 2415019. 473 long nSerialDate = 474 int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) + 475 int(( 367 * ( d.month - 2 - 12 * 476 ( ( d.month - 14 ) / 12 ) ) ) / 12) - 477 int(( 3 * ( int(( d.year + 4900 478 + int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) + 479 d.day - 2415019 - 32075; 480 481 if(nSerialDate < 60) { 482 // Because of the 29-02-1900 bug, any serial date 483 // under 60 is one off... Compensate. 484 nSerialDate--; 485 } 486 487 return nSerialDate; 488 } 489 490 unittest { 491 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 492 foreach(d; ds) { 493 long l = dateToLong(d); 494 Date r = longToDate(l); 495 assert(r == d, format("%s %s", r, d)); 496 } 497 } 498 499 TimeOfDay doubleToTimeOfDay(double s) { 500 import core.stdc.math : lround; 501 double secs = (24.0 * 60.0 * 60.0) * s; 502 503 // TODO not one-hundred my lround is needed 504 int secI = to!int(lround(secs)); 505 506 return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60); 507 } 508 509 double timeOfDayToDouble(TimeOfDay tod) { 510 long h = tod.hour * 60 * 60; 511 long m = tod.minute * 60; 512 long s = tod.second; 513 return (h + m + s) / (24.0 * 60.0 * 60.0); 514 } 515 516 unittest { 517 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 518 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 519 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 520 foreach(tod; tods) { 521 double d = timeOfDayToDouble(tod); 522 assert(d <= 1.0, format("%s", d)); 523 TimeOfDay r = doubleToTimeOfDay(d); 524 assert(r == tod, format("%s %s", r, tod)); 525 } 526 } 527 528 double datetimeToDouble(DateTime dt) { 529 double d = dateToLong(dt.date); 530 double t = timeOfDayToDouble(dt.timeOfDay); 531 return d + t; 532 } 533 534 DateTime doubleToDateTime(double d) { 535 long l = cast(long)d; 536 Date dt = longToDate(l); 537 TimeOfDay t = doubleToTimeOfDay(d - l); 538 return DateTime(dt, t); 539 } 540 541 unittest { 542 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 543 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 544 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 545 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 546 foreach(d; ds) { 547 foreach(tod; tods) { 548 DateTime dt = DateTime(d, tod); 549 double dou = datetimeToDouble(dt); 550 551 Date rd = longToDate(cast(long)dou); 552 assert(rd == d, format("%s %s", rd, d)); 553 554 double rest = dou - cast(long)dou; 555 TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou); 556 assert(rt == tod, format("%s %s", rt, tod)); 557 558 DateTime r = doubleToDateTime(dou); 559 assert(r == dt, format("%s %s", r, dt)); 560 } 561 } 562 } 563 564 Date stringToDate(string s) { 565 import std.array : split; 566 import std.string : indexOf; 567 568 if(s.indexOf('/') != -1) { 569 auto sp = s.split('/'); 570 enforce(sp.length == 3, format("[%s]", sp)); 571 return Date(to!int(sp[2]), to!int(sp[1]), to!int(sp[0])); 572 } else { 573 return longToDate(to!long(s)); 574 } 575 } 576 577 bool tryConvertTo(T,S)(S var) { 578 return !(tryConvertToImpl!T(Data(var)).isNull()); 579 } 580 581 Nullable!(T) tryConvertToImpl(T)(Data var) { 582 try { 583 return nullable(convertTo!T(var)); 584 } catch(Exception e) { 585 return Nullable!T(); 586 } 587 } 588 589 T convertTo(T)(Data var) { 590 static if(is(T == Data)) { 591 return var; 592 } else static if(isSomeString!T) { 593 return var.visit!( 594 (bool l) => to!string(l), 595 (long l) => to!string(l), 596 (double l) => to!string(l), 597 (string l) => l, 598 (DateTime l) => l.toISOExtString(), 599 (Date l) => l.toISOExtString(), 600 (TimeOfDay l) => l.toISOExtString(), 601 () => "") 602 (); 603 } else static if(is(T == bool)) { 604 if(var.type != typeid(bool) && var.type != typeid(long) 605 && var.type == typeid(string)) 606 { 607 throw new Exception("Can not convert " ~ var.type.toString() ~ 608 " to bool"); 609 } 610 return var.visit!( 611 (bool l) => l, 612 (long l) => l == 0 ? false : true, 613 (string l) => to!bool(l), 614 (double l) => false, 615 (DateTime l) => false, 616 (Date l) => false, 617 (TimeOfDay l) => false, 618 () => false) 619 (); 620 } else static if(isIntegral!T) { 621 return var.visit!( 622 (bool l) => to!T(l), 623 (long l) => to!T(l), 624 (double l) => to!T(l), 625 (string l) => to!T(l), 626 (DateTime l) => to!T(dateToLong(l.date)), 627 (Date l) => to!T(dateToLong(l)), 628 (TimeOfDay l) => to!T(0), 629 () => to!T(0)) 630 (); 631 } else static if(isFloatingPoint!T) { 632 return var.visit!( 633 (bool l) => to!T(l), 634 (long l) => to!T(l), 635 (double l) => to!T(l), 636 (string l) => to!T(l), 637 (DateTime l) => to!T(dateToLong(l.date)), 638 (Date l) => to!T(dateToLong(l)), 639 (TimeOfDay l) => to!T(0), 640 () => T.init) 641 (); 642 } else static if(is(T == DateTime)) { 643 return var.visit!( 644 (bool l) => DateTime.init, 645 (long l) => doubleToDateTime(to!long(l)), 646 (double l) => doubleToDateTime(l), 647 (string l) => doubleToDateTime(to!double(l)), 648 (DateTime l) => l, 649 (Date l) => DateTime(l, TimeOfDay.init), 650 (TimeOfDay l) => DateTime(Date.init, l), 651 () => DateTime.init) 652 (); 653 } else static if(is(T == Date)) { 654 import std.math : lround; 655 656 return var.visit!( 657 (bool l) => Date.init, 658 (long l) => longToDate(l), 659 (double l) => longToDate(lround(l)), 660 (string l) => stringToDate(l), 661 (DateTime l) => l.date, 662 (Date l) => l, 663 (TimeOfDay l) => Date.init, 664 () => Date.init) 665 (); 666 } else static if(is(T == TimeOfDay)) { 667 import std.math : lround; 668 669 return var.visit!( 670 (bool l) => TimeOfDay.init, 671 (long l) => TimeOfDay.init, 672 (double l) => doubleToTimeOfDay(l - cast(long)l), 673 (string l) => doubleToTimeOfDay( 674 to!double(l) - cast(long)to!double(l) 675 ), 676 (DateTime l) => l.timeOfDay, 677 (Date l) => TimeOfDay.init, 678 (TimeOfDay l) => l, 679 () => TimeOfDay.init) 680 (); 681 } 682 assert(false, T.stringof); 683 } 684 685 686 private ZipArchive readFile(string filename) { 687 enforce(exists(filename), "File with name " ~ filename ~ " does not 688 exist"); 689 690 auto file = new ZipArchive(read(filename)); 691 return file; 692 } 693 694 struct SheetNameId { 695 string name; 696 int id; 697 string rid; 698 } 699 700 string convertToString(ubyte[] d) { 701 import std.encoding; 702 auto b = getBOM(d); 703 switch(b.schema) { 704 case BOM.none: 705 return cast(string)d; 706 case BOM.utf8: 707 return cast(string)(d[3 .. $]); 708 case BOM.utf16be: goto default; 709 case BOM.utf16le: goto default; 710 case BOM.utf32be: goto default; 711 case BOM.utf32le: goto default; 712 default: 713 string ret; 714 transcode(d, ret); 715 return ret; 716 } 717 } 718 719 SheetNameId[] sheetNames(string filename) { 720 auto file = readFile(filename); 721 auto ams = file.directory; 722 immutable wbStr = "xl/workbook.xml"; 723 if(wbStr !in ams) { 724 return SheetNameId[].init; 725 } 726 ubyte[] wb = file.expand(ams[wbStr]); 727 string wbData = convertToString(wb); 728 729 auto dom = parseDOM(wbData); 730 assert(dom.children.length == 1); 731 auto workbook = dom.children[0]; 732 string sheetName = workbook.name == "workbook" 733 ? "sheets" : "s:sheets"; 734 assert(workbook.name == "workbook" || workbook.name == "s:workbook"); 735 auto sheetsRng = workbook.children.filter!(c => c.name == sheetName); 736 assert(!sheetsRng.empty); 737 return sheetsRng.front.children 738 .map!(s => SheetNameId( 739 s.attributes.filter!(a => a.name == "name").front.value, 740 s.attributes.filter!(a => a.name == "sheetId").front 741 .value.to!int(), 742 s.attributes.filter!(a => a.name == "r:id").front.value, 743 ) 744 ) 745 .array 746 .sort!((a, b) => a.id < b.id) 747 .release; 748 } 749 750 unittest { 751 auto r = sheetNames("multitable.xlsx"); 752 assert(r[0].name == "wb1"); 753 assert(r[0].id == 1); 754 } 755 756 struct Relationships { 757 string id; 758 string file; 759 } 760 761 Relationships[string] parseRelationships(ZipArchive za, ArchiveMember am) { 762 ubyte[] d = za.expand(am); 763 string relData = convertToString(d); 764 auto dom = parseDOM(relData); 765 assert(dom.children.length == 1); 766 auto rel = dom.children[0]; 767 assert(rel.name == "Relationships"); 768 auto relRng = rel.children.filter!(c => c.name == "Relationship"); 769 assert(!relRng.empty); 770 771 Relationships[string] ret; 772 foreach(r; relRng) { 773 Relationships tmp; 774 tmp.id = r.attributes.filter!(a => a.name == "Id") 775 .front.value; 776 tmp.file = r.attributes.filter!(a => a.name == "Target") 777 .front.value; 778 ret[tmp.id] = tmp; 779 } 780 return ret; 781 } 782 783 Sheet readSheet(string filename, string sheetName) { 784 SheetNameId[] sheets = sheetNames(filename); 785 auto sRng = sheets.filter!(s => s.name == sheetName); 786 enforce(!sRng.empty, "No sheet with name " ~ sheetName 787 ~ " found in file " ~ filename); 788 return readSheetImpl(filename, sRng.front.rid); 789 } 790 791 string eatXlPrefix(string fn) { 792 foreach(p; ["xl//", "/xl/"]) { 793 if(fn.startsWith(p)) { 794 return fn[p.length .. $]; 795 } 796 } 797 return fn; 798 } 799 800 Sheet readSheetImpl(string filename, string rid) { 801 scope(failure) { 802 writefln("Failed at file '%s' and sheet '%s'", filename, rid); 803 } 804 auto file = readFile(filename); 805 auto ams = file.directory; 806 immutable ss = "xl/sharedStrings.xml"; 807 Data[] sharedStrings = (ss in ams) 808 ? readSharedEntries(file, ams[ss]) 809 : []; 810 //logf("%s", sharedStrings); 811 812 Relationships[string] rels = parseRelationships(file, 813 ams["xl/_rels/workbook.xml.rels"]); 814 815 Relationships* sheetRel = rid in rels; 816 enforce(sheetRel !is null, format("Could not find '%s' in '%s'", rid, 817 filename)); 818 string shrFn = eatXlPrefix(sheetRel.file); 819 string fn = "xl/" ~ shrFn; 820 ArchiveMember* sheet = fn in ams; 821 enforce(sheet !is null, format("sheetRel.file orig '%s', fn %s not in [%s]", 822 sheetRel.file, fn, ams.keys())); 823 824 Sheet ret; 825 ret.cells = insertValueIntoCell(readCells(file, *sheet), sharedStrings); 826 Pos maxPos; 827 foreach(ref c; ret.cells) { 828 c.position = toPos(c.r); 829 maxPos = elementMax(maxPos, c.position); 830 } 831 ret.maxPos = maxPos; 832 ret.table = new Cell[][](ret.maxPos.row + 1, ret.maxPos.col + 1); 833 foreach(c; ret.cells) { 834 ret.table[c.position.row][c.position.col] = c; 835 } 836 return ret; 837 } 838 839 Data[] readSharedEntries(ZipArchive za, ArchiveMember am) { 840 ubyte[] ss = za.expand(am); 841 string ssData = convertToString(ss); 842 auto dom = parseDOM(ssData); 843 Data[] ret; 844 if(dom.type != EntityType.elementStart) { 845 return ret; 846 } 847 assert(dom.children.length == 1); 848 auto sst = dom.children[0]; 849 assert(sst.name == "sst"); 850 if(sst.type != EntityType.elementStart || sst.children.empty) { 851 return ret; 852 } 853 auto siRng = sst.children.filter!(c => c.name == "si"); 854 foreach(si; siRng) { 855 if(si.type != EntityType.elementStart) { 856 continue; 857 } 858 //ret ~= extractData(si); 859 string tmp; 860 foreach(tORr; si.children) { 861 if(tORr.name == "t" && tORr.type == EntityType.elementStart 862 && !tORr.children.empty) 863 { 864 ret ~= Data(convert(tORr.children[0].text)); 865 } else if(tORr.name == "r") { 866 foreach(r; tORr.children.filter!(r => r.name == "t")) { 867 if(r.type == EntityType.elementStart && !r.children.empty) { 868 tmp ~= r.children[0].text; 869 } 870 } 871 } else { 872 ret ~= Data.init; 873 } 874 } 875 if(!tmp.empty) { 876 ret ~= Data(convert(tmp)); 877 } 878 } 879 return ret; 880 } 881 882 string extractData(DOMEntity!string si) { 883 string tmp; 884 foreach(tORr; si.children) { 885 if(tORr.name == "t") { 886 if(!tORr.attributes.filter!(a => a.name == "xml:space").empty) { 887 return ""; 888 } else if(tORr.type == EntityType.elementStart 889 && !tORr.children.empty) 890 { 891 return tORr.children[0].text; 892 } else { 893 return ""; 894 } 895 } else if(tORr.name == "r") { 896 foreach(r; tORr.children.filter!(r => r.name == "t")) { 897 tmp ~= r.children[0].text; 898 } 899 } 900 } 901 if(!tmp.empty) { 902 return tmp; 903 } 904 assert(false); 905 } 906 907 private bool canConvertToLong(string s) { 908 if(s.empty) { 909 return false; 910 } 911 return s.byChar.all!isDigit(); 912 } 913 914 private immutable rs = r"[0-9][0-9]*\.[0-9]*"; 915 private auto rgx = ctRegex!rs; 916 917 private bool canConvertToDouble(string s) { 918 auto cap = matchAll(s, rgx); 919 return cap.empty || cap.front.hit != s ? false : true; 920 } 921 922 Data convert(string s) { 923 struct ToRe { 924 string from; 925 string to; 926 } 927 928 immutable ToRe[] toRe = [ 929 ToRe( "&", "&"), 930 ToRe( ">", "<"), 931 ToRe( "<", ">"), 932 ToRe( """, "\""), 933 ToRe( "'", "'") 934 ]; 935 936 string replaceStrings(string s) { 937 import std.algorithm.searching : canFind; 938 import std.array : replace; 939 foreach(tr; toRe) { 940 while(canFind(s, tr.from)) { 941 s = s.replace(tr.from, tr.to); 942 } 943 } 944 return s; 945 } 946 947 if(canConvertToLong(s)) { 948 return Data(to!long(s)); 949 } else if(canConvertToDouble(s)) { 950 return Data(to!double(s)); 951 } else { 952 return Data(replaceStrings(s)); 953 } 954 } 955 956 Cell[] readCells(ZipArchive za, ArchiveMember am) { 957 Cell[] ret; 958 ubyte[] ss = za.expand(am); 959 string ssData = convertToString(ss); 960 auto dom = parseDOM(ssData); 961 assert(dom.children.length == 1); 962 auto ws = dom.children[0]; 963 if(ws.name != "worksheet") { 964 return ret; 965 } 966 auto sdRng = ws.children.filter!(c => c.name == "sheetData"); 967 assert(!sdRng.empty); 968 if(sdRng.front.type != EntityType.elementStart) { 969 return ret; 970 } 971 auto rows = sdRng.front.children 972 .filter!(r => r.name == "row"); 973 974 foreach(row; rows) { 975 if(row.type != EntityType.elementStart || row.children.empty) { 976 continue; 977 } 978 foreach(c; row.children.filter!(r => r.name == "c")) { 979 Cell tmp; 980 tmp.row = row.attributes.filter!(a => a.name == "r") 981 .front.value.to!size_t(); 982 tmp.r = c.attributes.filter!(a => a.name == "r") 983 .front.value; 984 auto t = c.attributes.filter!(a => a.name == "t"); 985 if(t.empty) { 986 // we assume that no t attribute means direct number 987 //writefln("Found a strange empty cell \n%s", c); 988 } else { 989 tmp.t = t.front.value; 990 } 991 if(tmp.t == "s" || tmp.t == "n") { 992 if(c.type == EntityType.elementStart) { 993 auto v = c.children.filter!(c => c.name == "v"); 994 //enforce(!v.empty, format("r %s", tmp.row)); 995 if(!v.empty && v.front.type == EntityType.elementStart 996 && !v.front.children.empty) 997 { 998 tmp.v = v.front.children[0].text; 999 } else { 1000 tmp.v = ""; 1001 } 1002 } 1003 } else if(tmp.t == "inlineStr") { 1004 auto is_ = c.children.filter!(c => c.name == "is"); 1005 tmp.v = extractData(is_.front); 1006 } else if(c.type == EntityType.elementStart) { 1007 auto v = c.children.filter!(c => c.name == "v"); 1008 if(!v.empty && v.front.type == EntityType.elementStart 1009 && !v.front.children.empty) 1010 { 1011 tmp.v = v.front.children[0].text; 1012 } 1013 } 1014 if(c.type == EntityType.elementStart) { 1015 auto f = c.children.filter!(c => c.name == "f"); 1016 if(!f.empty && f.front.type == EntityType.elementStart) { 1017 tmp.f = f.front.children[0].text; 1018 } 1019 } 1020 ret ~= tmp; 1021 } 1022 } 1023 return ret; 1024 } 1025 1026 Cell[] insertValueIntoCell(Cell[] cells, Data[] ss) { 1027 immutable excepted = ["n", "s", "b", "e", "str", "inlineStr"]; 1028 immutable same = ["n", "e", "str", "inlineStr"]; 1029 foreach(ref Cell c; cells) { 1030 assert(canFind(excepted, c.t) || c.t.empty, 1031 format("'%s' not in [%s]", c.t, excepted)); 1032 if(c.t.empty) { 1033 c.value = convert(c.v); 1034 } else if(canFind(same, c.t)) { 1035 c.value = convert(c.v); 1036 } else if(c.t == "b") { 1037 //logf("'%s' %s", c.v, c); 1038 c.value = c.v == "1"; 1039 } else { 1040 if(!c.v.empty) { 1041 size_t idx = to!size_t(c.v); 1042 //logf("'%s' %s", c.v, idx); 1043 c.value = ss[idx]; 1044 } 1045 } 1046 } 1047 return cells; 1048 } 1049 1050 Pos toPos(string s) { 1051 import std.string : indexOfAny; 1052 import std.math : pow; 1053 ptrdiff_t fn = s.indexOfAny("0123456789"); 1054 enforce(fn != -1, s); 1055 size_t row = to!size_t(to!long(s[fn .. $]) - 1); 1056 size_t col = 0; 1057 string colS = s[0 .. fn]; 1058 foreach(idx, char c; colS) { 1059 col = col * 26 + (c - 'A' + 1); 1060 } 1061 return Pos(row, col - 1); 1062 } 1063 1064 unittest { 1065 assert(toPos("A1").col == 0); 1066 assert(toPos("Z1").col == 25); 1067 assert(toPos("AA1").col == 26); 1068 } 1069 1070 Pos elementMax(Pos a, Pos b) { 1071 return Pos(a.row < b.row ? b.row : a.row, 1072 a.col < b.col ? b.col : a.col); 1073 } 1074 1075 unittest { 1076 import std.math : approxEqual; 1077 auto r = readSheet("multitable.xlsx", "wb1"); 1078 assert(approxEqual(r.table[12][5].value.get!double(), 26.74), 1079 format("%s", r.table[12][5]) 1080 ); 1081 } 1082 1083 unittest { 1084 import std.algorithm.comparison : equal; 1085 auto s = readSheet("multitable.xlsx", "wb1"); 1086 auto r = s.iterateRow!long(15, 1, 6); 1087 1088 assert(r.canConvertTo(CellType.long_)); 1089 assert(r.canConvertTo(CellType.double_)); 1090 assert(r.canConvertTo(CellType.string_)); 1091 1092 auto expected = [1, 2, 3, 4, 5]; 1093 assert(equal(r, expected), format("%s", r)); 1094 1095 auto r2 = s.getRow!long(15, 1, 6); 1096 assert(equal(r, expected)); 1097 1098 auto it = s.iterateRowLong(15, 1, 6); 1099 assert(equal(r2, it)); 1100 } 1101 1102 unittest { 1103 import std.algorithm.comparison : equal; 1104 auto s = readSheet("multitable.xlsx", "wb2"); 1105 writefln("%s\n%(%s\n%)", s.maxPos, s.cells); 1106 auto rslt = s.iterateColumn!Date(1, 1, 6); 1107 1108 auto target = [Date(2019,5,01), Date(2016,12,27), Date(1976,7,23), 1109 Date(1986,7,2), Date(2038,1,19) 1110 ]; 1111 assert(equal(rslt, target), format("\n%s\n%s", rslt, target)); 1112 1113 auto it = s.getColumn!Date(1, 1, 6); 1114 assert(equal(rslt, it)); 1115 1116 auto it2 = s.getColumnDate(1, 1, 6); 1117 assert(equal(rslt, it2)); 1118 } 1119 1120 unittest { 1121 import std.algorithm.comparison : equal; 1122 auto s = readSheet("multitable.xlsx", "Sheet3"); 1123 assert(s.table[0][0].value.peek!bool()); 1124 } 1125 1126 unittest { 1127 import std.file : dirEntries, SpanMode; 1128 import std.traits : EnumMembers; 1129 foreach(de; dirEntries("xlsx_files/", "*.xlsx", SpanMode.depth) 1130 .filter!(a => a.name != "xlsx_files/data03.xlsx")) 1131 { 1132 //writeln(de.name); 1133 auto sn = sheetNames(de.name); 1134 foreach(s; sn) { 1135 auto sheet = readSheet(de.name, s.name); 1136 foreach(cell; sheet.cells) { 1137 foreach(T; [EnumMembers!CellType]) { 1138 auto cc = cell.canConvertTo(T); 1139 } 1140 } 1141 } 1142 } 1143 } 1144 1145 unittest { 1146 import std.algorithm.comparison : equal; 1147 auto sheet = readSheet("testworkbook.xlsx", "ws1"); 1148 writefln("%(%s\n%)", sheet.cells); 1149 //writeln(sheet.toString()); 1150 //assert(sheet.table[2][3].value.get!long() == 1337); 1151 1152 auto c = sheet.getColumnLong(3, 2, 5); 1153 auto r = [1337, 2, 3]; 1154 assert(equal(c, r), format("%s", c)); 1155 1156 auto c2 = sheet.getColumnString(4, 2, 5); 1157 auto r2 = ["hello", "sil", "foo"]; 1158 assert(equal(c2, r2), format("%s", c2)); 1159 }