1 module xlsxreader;
2 
3 import std.algorithm.iteration : filter, map, joiner;
4 import std.algorithm.mutation : reverse;
5 import std.algorithm.searching : all, canFind, startsWith;
6 import std.algorithm.sorting : sort;
7 import std.array : array, empty, front, replace, popFront;
8 import std.ascii : isDigit;
9 import std.conv : to;
10 import std.datetime : DateTime, Date, TimeOfDay;
11 import std.exception : enforce;
12 import std.file : read, exists, readText;
13 import std.format : format;
14 import std.range : tee;
15 import std.regex;
16 import std.stdio;
17 import std.traits : isIntegral, isFloatingPoint, isSomeString;
18 import std.typecons : tuple, Nullable, nullable;
19 import std.utf : byChar;
20 import std.variant : Algebraic, visit;
21 import std.zip;
22 
23 import dxml.dom;
24 
25 ///
26 struct Pos {
27 	// zero based
28 	size_t row;
29 	// zero based
30 	size_t col;
31 }
32 
33 ///
34 alias Data = Algebraic!(bool,long,double,string,DateTime,Date,TimeOfDay);
35 
36 ///
37 struct Cell {
38 	string loc;
39 	size_t row; // row[r]
40 	string t; // s or n, s for pointer, n for value, stored in v
41 	string r; // c[r]
42 	string v; // c.v the value or ptr
43 	string f; // c.f the formula
44 	string xmlValue;
45 	Pos position;
46 }
47 
48 //
49 enum CellType {
50 	datetime,
51 	timeofday,
52 	date,
53 	bool_,
54 	double_,
55 	long_,
56 	string_
57 }
58 
59 import std.ascii : toUpper;
60 ///
61 struct Sheet {
62 	import std.ascii : toUpper;
63 
64 	Cell[] cells;
65 	Cell[][] table;
66 	Pos maxPos;
67 
68 	string toString() const @safe {
69 		import std.format : formattedWrite;
70 		import std.array : appender;
71 		long[] maxCol = new long[](maxPos.col + 1);
72 		foreach(const row; this.table) {
73 			foreach(const idx, Cell col; row) {
74 				string s = col.xmlValue;
75 
76 				maxCol[idx] = maxCol[idx] < s.length ? s.length : maxCol[idx];
77 			}
78 		}
79 		maxCol[] += 1;
80 
81 		auto app = appender!string();
82 		foreach(const row; this.table) {
83 			foreach(const idx, Cell col; row) {
84 				string s = col.xmlValue;
85 				formattedWrite(app, "%*s, ", maxCol[idx], s);
86 			}
87 			formattedWrite(app, "\n");
88 		}
89 		return app.data;
90 	}
91 
92 	void printTable() const @safe {
93 		writeln(this.toString());
94 	}
95 
96 	// Column
97 
98 	Iterator!T getColumn(T)(size_t col, size_t startRow, size_t endRow) {
99 		auto c = this.iterateColumn!T(col, startRow, endRow);
100 		return typeof(return)(c.array);
101 	}
102 
103 	private enum t = q{
104 	Iterator!(%1$s) getColumn%2$s(size_t col, size_t startRow, size_t endRow) @safe {
105 		return getColumn!(%1$s)(col, startRow, endRow);
106 	}
107 	};
108 	static foreach(T; ["long", "double", "string", "Date", "TimeOfDay",
109 			"DateTime"])
110 	{
111 		mixin(format(t, T, T[0].toUpper ~ T[1 .. $]));
112 	}
113 
114 	ColumnUntyped iterateColumnUntyped(size_t col, size_t startRow, size_t endRow) @safe {
115 		return typeof(return)(&this, col, startRow, endRow);
116 	}
117 
118 	Column!(T) iterateColumn(T)(size_t col, size_t startRow, size_t endRow) {
119 		return typeof(return)(&this, col, startRow, endRow);
120 	}
121 
122 	Column!(long) iterateColumnLong(size_t col, size_t startRow, size_t endRow) @safe {
123 		return typeof(return)(&this, col, startRow, endRow);
124 	}
125 
126 	Column!(double) iterateColumnDouble(size_t col, size_t startRow, size_t endRow) @safe {
127 		return typeof(return)(&this, col, startRow, endRow);
128 	}
129 
130 	Column!(string) iterateColumnString(size_t col, size_t startRow, size_t endRow) @safe {
131 		return typeof(return)(&this, col, startRow, endRow);
132 	}
133 
134 	Column!(DateTime) iterateColumnDateTime(size_t col, size_t startRow, size_t endRow) @safe {
135 		return typeof(return)(&this, col, startRow, endRow);
136 	}
137 
138 	Column!(Date) iterateColumnDate(size_t col, size_t startRow, size_t endRow) @safe {
139 		return typeof(return)(&this, col, startRow, endRow);
140 	}
141 
142 	Column!(TimeOfDay) iterateColumnTimeOfDay(size_t col, size_t startRow, size_t endRow) @safe {
143 		return typeof(return)(&this, col, startRow, endRow);
144 	}
145 
146 	// Row
147 
148 	Iterator!T getRow(T)(size_t row, size_t startColumn, size_t endColumn) @safe {
149 		return typeof(return)(this.iterateRow!T(row, startColumn, endColumn).array); // TODO: why .array?
150 	}
151 
152 	private enum t2 = q{
153 	Iterator!(%1$s) getRow%2$s(size_t row, size_t startColumn, size_t endColumn) @safe {
154 		return getRow!(%1$s)(row, startColumn, endColumn);
155 	}
156 	};
157 	static foreach(T; ["long", "double", "string", "Date", "TimeOfDay",
158 			"DateTime"])
159 	{
160 		mixin(format(t2, T, T[0].toUpper ~ T[1 .. $]));
161 	}
162 
163 	RowUntyped iterateRowUntyped(size_t row, size_t startColumn, size_t endColumn) @safe {
164 		return typeof(return)(&this, row, startColumn, endColumn);
165 	}
166 
167 	Row!(T) iterateRow(T)(size_t row, size_t startColumn, size_t endColumn) @trusted /* TODO: remove @trusted when `&this` is stored in a @safe manner in `Row` */ {
168 		return typeof(return)(&this, row, startColumn, endColumn);
169 	}
170 
171 	Row!(long) iterateRowLong(size_t row, size_t startColumn, size_t endColumn) @safe {
172 		return typeof(return)(&this, row, startColumn, endColumn);
173 	}
174 
175 	Row!(double) iterateRowDouble(size_t row, size_t startColumn, size_t endColumn) @safe {
176 		return typeof(return)(&this, row, startColumn, endColumn);
177 	}
178 
179 	Row!(string) iterateRowString(size_t row, size_t startColumn, size_t endColumn) @safe {
180 		return typeof(return)(&this, row, startColumn, endColumn);
181 	}
182 
183 	Row!(DateTime) iterateRowDateTime(size_t row, size_t startColumn, size_t endColumn) @safe {
184 		return typeof(return)(&this, row, startColumn, endColumn);
185 	}
186 
187 	Row!(Date) iterateRowDate(size_t row, size_t startColumn, size_t endColumn) @safe {
188 		return typeof(return)(&this, row, startColumn, endColumn);
189 	}
190 
191 	Row!(TimeOfDay) iterateRowTimeOfDay(size_t row, size_t startColumn, size_t endColumn) @safe {
192 		return typeof(return)(&this, row, startColumn, endColumn);
193 	}
194 }
195 
196 struct Iterator(T) {
197 	T[] data;
198 
199 	this(T[] data) {
200 		this.data = data;
201 	}
202 
203 	@property bool empty() const pure nothrow @nogc {
204 		return this.data.empty;
205 	}
206 
207 	void popFront() {
208 		this.data.popFront();
209 	}
210 
211 	@property T front() {
212 		return this.data.front;
213 	}
214 
215 	inout(typeof(this)) save() inout pure nothrow @nogc {
216 		return this;
217 	}
218 
219 	// Request random access.
220 	inout(T)[] array() inout @safe pure nothrow @nogc {
221 		return data;
222 	}
223 }
224 
225 ///
226 struct RowUntyped {
227 	Sheet* sheet;
228 	const size_t row;
229 	const size_t startColumn;
230 	const size_t endColumn;
231 	size_t cur;
232 
233 	this(Sheet* sheet, size_t row, size_t startColumn, size_t endColumn) pure nothrow @nogc @safe {
234 		assert(sheet.table.length == sheet.maxPos.row + 1);
235 		this.sheet = sheet;
236 		this.row = row;
237 		this.startColumn = startColumn;
238 		this.endColumn = endColumn;
239 		this.cur = this.startColumn;
240 	}
241 
242 	@property bool empty() const pure nothrow @nogc @safe {
243 		return this.cur >= this.endColumn;
244 	}
245 
246 	void popFront() pure nothrow @nogc @safe {
247 		++this.cur;
248 	}
249 
250 	inout(typeof(this)) save() inout pure nothrow @nogc @safe {
251 		return this;
252 	}
253 
254 	@property inout(Cell) front() inout pure nothrow @nogc @safe {
255 		return this.sheet.table[this.row][this.cur];
256 	}
257 }
258 
259 ///
260 struct Row(T) {
261 	RowUntyped ru;
262 	T front;
263 
264 	this(Sheet* sheet, size_t row, size_t startColumn, size_t endColumn) {
265 		this.ru = RowUntyped(sheet, row, startColumn, endColumn);
266 		this.read();
267 	}
268 
269 	@property bool empty() const pure nothrow @nogc {
270 		return this.ru.empty;
271 	}
272 
273 	void popFront() {
274 		this.ru.popFront();
275 		if(!this.empty) {
276 			this.read();
277 		}
278 	}
279 
280 	inout(typeof(this)) save() inout pure nothrow @nogc {
281 		return this;
282 	}
283 
284 	private void read() {
285 		this.front = convertTo!T(this.ru.front.xmlValue);
286 	}
287 }
288 
289 ///
290 struct ColumnUntyped {
291 	Sheet* sheet;
292 	const size_t col;
293 	const size_t startRow;
294 	const size_t endRow;
295 	size_t cur;
296 
297 	this(Sheet* sheet, size_t col, size_t startRow, size_t endRow) @safe {
298 		assert(sheet.table.length == sheet.maxPos.row + 1);
299 		this.sheet = sheet;
300 		this.col = col;
301 		this.startRow = startRow;
302 		this.endRow = endRow;
303 		this.cur = this.startRow;
304 	}
305 
306 	@property bool empty() const pure nothrow @nogc @safe {
307 		return this.cur >= this.endRow;
308 	}
309 
310 	void popFront() @safe {
311 		++this.cur;
312 	}
313 
314 	inout(typeof(this)) save() inout pure nothrow @nogc @safe {
315 		return this;
316 	}
317 
318 	@property Cell front() @safe {
319 		return this.sheet.table[this.cur][this.col];
320 	}
321 }
322 
323 ///
324 struct Column(T) {
325 	ColumnUntyped cu;
326 
327 	T front;
328 
329 	this(Sheet* sheet, size_t col, size_t startRow, size_t endRow) {
330 		this.cu = ColumnUntyped(sheet, col, startRow, endRow);
331 		this.read();
332 	}
333 
334 	@property bool empty() const pure nothrow @nogc {
335 		return this.cu.empty;
336 	}
337 
338 	void popFront() {
339 		this.cu.popFront();
340 		if(!this.empty) {
341 			this.read();
342 		}
343 	}
344 
345 	inout(typeof(this)) save() inout pure nothrow @nogc {
346 		return this;
347 	}
348 
349 	private void read() {
350 		this.front = convertTo!T(this.cu.front.xmlValue);
351 	}
352 }
353 
354 @safe unittest {
355 	import std.range : isForwardRange;
356 	import std.meta : AliasSeq;
357 	static foreach(T; AliasSeq!(long,double,DateTime,TimeOfDay,Date,string)) {{
358 		alias C = Column!T;
359 		alias R = Row!T;
360 		alias I = Iterator!T;
361 		static assert(isForwardRange!C, C.stringof);
362 		static assert(isForwardRange!R, R.stringof);
363 		static assert(isForwardRange!I, I.stringof);
364 	}}
365 }
366 
367 Date longToDate(long d) @safe {
368 	// modifed from https://www.codeproject.com/Articles/2750/
369 	// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
370 
371 	// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
372 	// leap year, but Excel/Lotus 123 think it is...
373 	if(d == 60) {
374 		return Date(1900, 2,  29);
375 	} else if(d < 60) {
376 		// Because of the 29-02-1900 bug, any serial date
377 		// under 60 is one off... Compensate.
378 		++d;
379 	}
380 
381 	// Modified Julian to DMY calculation with an addition of 2415019
382 	int l = cast(int)d + 68569 + 2415019;
383 	int n = int(( 4 * l ) / 146097);
384 	l = l - int(( 146097 * n + 3 ) / 4);
385 	int i = int(( 4000 * ( l + 1 ) ) / 1461001);
386 	l = l - int(( 1461 * i ) / 4) + 31;
387 	int j = int(( 80 * l ) / 2447);
388 	int nDay = l - int(( 2447 * j ) / 80);
389 	l = int(j / 11);
390 	int nMonth = j + 2 - ( 12 * l );
391 	int nYear = 100 * ( n - 49 ) + i + l;
392 	return Date(nYear, nMonth, nDay);
393 }
394 
395 long dateToLong(Date d) @safe {
396 	// modifed from https://www.codeproject.com/Articles/2750/
397 	// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
398 
399 	// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
400 	// leap year, but Excel/Lotus 123 think it is...
401 	if(d.day == 29 && d.month == 2 && d.year == 1900) {
402 		return 60;
403 	}
404 
405 	// DMY to Modified Julian calculated with an extra subtraction of 2415019.
406 	long nSerialDate =
407 			int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) +
408 			int(( 367 * ( d.month - 2 - 12 *
409 				( ( d.month - 14 ) / 12 ) ) ) / 12) -
410 				int(( 3 * ( int(( d.year + 4900
411 				+ int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) +
412 				d.day - 2415019 - 32075;
413 
414 	if(nSerialDate < 60) {
415 		// Because of the 29-02-1900 bug, any serial date
416 		// under 60 is one off... Compensate.
417 		nSerialDate--;
418 	}
419 
420 	return nSerialDate;
421 }
422 
423 @safe unittest {
424 	auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
425 	foreach(const d; ds) {
426 		long l = dateToLong(d);
427 		Date r = longToDate(l);
428 		assert(r == d, format("%s %s", r, d));
429 	}
430 }
431 
432 TimeOfDay doubleToTimeOfDay(double s) @safe {
433 	import core.stdc.math : lround;
434 	double secs = (24.0 * 60.0 * 60.0) * s;
435 
436 	// TODO not one-hundred my lround is needed
437 	int secI = to!int(lround(secs));
438 
439 	return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60);
440 }
441 
442 double timeOfDayToDouble(TimeOfDay tod) @safe {
443 	long h = tod.hour * 60 * 60;
444 	long m = tod.minute * 60;
445 	long s = tod.second;
446 	return (h + m + s) / (24.0 * 60.0 * 60.0);
447 }
448 
449 @safe unittest {
450 	auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
451 		 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
452 		 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
453 	foreach(const tod; tods) {
454 		double d = timeOfDayToDouble(tod);
455 		assert(d <= 1.0, format("%s", d));
456 		TimeOfDay r = doubleToTimeOfDay(d);
457 		assert(r == tod, format("%s %s", r, tod));
458 	}
459 }
460 
461 double datetimeToDouble(DateTime dt) @safe {
462 	double d = dateToLong(dt.date);
463 	double t = timeOfDayToDouble(dt.timeOfDay);
464 	return d + t;
465 }
466 
467 DateTime doubleToDateTime(double d) @safe {
468 	long l = cast(long)d;
469 	Date dt = longToDate(l);
470 	TimeOfDay t = doubleToTimeOfDay(d - l);
471 	return DateTime(dt, t);
472 }
473 
474 @safe unittest {
475 	auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
476 	auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
477 		 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
478 		 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
479 	foreach(const d; ds) {
480 		foreach(const tod; tods) {
481 			DateTime dt = DateTime(d, tod);
482 			double dou = datetimeToDouble(dt);
483 
484 			Date rd = longToDate(cast(long)dou);
485 			assert(rd == d, format("%s %s", rd, d));
486 
487 			double rest = dou - cast(long)dou;
488 			TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou);
489 			assert(rt == tod, format("%s %s", rt, tod));
490 
491 			DateTime r = doubleToDateTime(dou);
492 			assert(r == dt, format("%s %s", r, dt));
493 		}
494 	}
495 }
496 
497 Date stringToDate(string s) @safe {
498 	import std.array : split;
499 	import std.string : indexOf;
500 
501 	if(s.indexOf('/') != -1) {
502 		auto sp = s.split('/');
503 		enforce(sp.length == 3, format("[%s]", sp));
504 		return Date(to!int(sp[2]), to!int(sp[1]), to!int(sp[0]));
505 	} else {
506 		return longToDate(to!long(s));
507 	}
508 }
509 
510 bool tryConvertTo(T,S)(S var) {
511 	return !(tryConvertToImpl!T(Data(var)).isNull());
512 }
513 
514 Nullable!(T) tryConvertToImpl(T)(Data var) {
515 	try {
516 		return nullable(convertTo!T(var));
517 	} catch(Exception e) {
518 		return Nullable!T();
519 	}
520 }
521 
522 T convertTo(T)(string var) @safe {
523 	import std.math : lround;
524 	static if(isSomeString!T) {
525 		return to!T(var);
526 	} else static if(is(T == bool)) {
527 		return var == "1";
528 	} else static if(isIntegral!T) {
529 		return to!T(var);
530 	} else static if(isFloatingPoint!T) {
531 		return to!T(var);
532 	} else static if(is(T == DateTime)) {
533 		if(var.canConvertToLong()) {
534 			return doubleToDateTime(to!long(var));
535 		} else if(var.canConvertToDouble()) {
536 			return doubleToDateTime(to!double(var));
537 		}
538 		enforce(false, "Can not convert '" ~ var ~ "' to a DateTime");
539 		assert(false, "Unreachable");
540 	} else static if(is(T == Date)) {
541 		if(var.canConvertToLong()) {
542 			return longToDate(to!long(var));
543 		} else if(var.canConvertToDouble()) {
544 			return longToDate(lround(to!double(var)));
545 		}
546 		return stringToDate(var);
547 	} else static if(is(T == TimeOfDay)) {
548 		double l = to!double(var);
549 		return doubleToTimeOfDay(l - cast(long)l);
550 	} else {
551 		static assert(false, T.stringof ~ " not supported");
552 	}
553 }
554 
555 private ZipArchive readFile(in string filename) @trusted {
556 	enforce(exists(filename), "File with name " ~ filename ~ " does not exist");
557 	return new typeof(return)(read(filename));
558 }
559 
560 struct SheetNameId {
561 	string name;
562 	int id;
563 	string rid;
564 }
565 
566 string convertToString(in ubyte[] d) @trusted {
567 	import std.encoding;
568 	auto b = getBOM(d);
569 	switch(b.schema) {
570 		case BOM.none:
571 			return cast(string)d;
572 		case BOM.utf8:
573 			return cast(string)(d[3 .. $]);
574 		case BOM.utf16be: goto default;
575 		case BOM.utf16le: goto default;
576 		case BOM.utf32be: goto default;
577 		case BOM.utf32le: goto default;
578 		default:
579 			string ret;
580 			transcode(d, ret);
581 			return ret;
582 	}
583 }
584 
585 SheetNameId[] sheetNames(in string filename) @trusted {
586 	auto file = readFile(filename);
587 	auto ams = file.directory;
588 	immutable wbStr = "xl/workbook.xml";
589 	if(wbStr !in ams) {
590 		return SheetNameId[].init;
591 	}
592 	ubyte[] wb = file.expand(ams[wbStr]);
593 	string wbData = convertToString(wb);
594 
595 	auto dom = parseDOM(wbData);
596 	if(dom.children.length != 1) {
597 		return [];
598 	}
599 	auto workbook = dom.children[0];
600 	string sheetName = workbook.name == "workbook"
601 		? "sheets" : "s:sheets";
602 	if(workbook.name != "workbook" && workbook.name != "s:workbook") {
603 		return [];
604 	}
605 	auto sheetsRng = workbook.children.filter!(c => c.name == sheetName);
606 	if(sheetsRng.empty) {
607 		return [];
608 	}
609 
610 	return sheetsRng.front.children
611 		.map!(s => SheetNameId(
612 					s.attributes.filter!(a => a.name == "name").front.value
613 						.specialCharacterReplacementReverse(),
614 					s.attributes.filter!(a => a.name == "sheetId").front
615 						.value.to!int(),
616 					s.attributes.filter!(a => a.name == "r:id").front.value,
617 				)
618 		)
619 		.array
620 		.sort!((a, b) => a.id < b.id)
621 		.release;
622 }
623 
624 @safe unittest {
625 	auto r = sheetNames("multitable.xlsx");
626 	assert(r[0].name == "wb1");
627 	assert(r[0].id == 1);
628 }
629 
630 @safe unittest {
631 	auto r = sheetNames("sheetnames.xlsx");
632 	assert(r[0].name == "A & B ;", r[0].name);
633 	assert(r[0].id == 1);
634 }
635 
636 struct Relationships {
637 	string id;
638 	string file;
639 }
640 
641 Relationships[string] parseRelationships(ZipArchive za, ArchiveMember am) @trusted {
642 	ubyte[] d = za.expand(am);
643 	string relData = convertToString(d);
644 	auto dom = parseDOM(relData);
645 	assert(dom.children.length == 1);
646 	auto rel = dom.children[0];
647 	assert(rel.name == "Relationships");
648 	auto relRng = rel.children.filter!(c => c.name == "Relationship");
649 	assert(!relRng.empty);
650 
651 	Relationships[string] ret;
652 	foreach(r; relRng) {
653 		Relationships tmp;
654 		tmp.id = r.attributes.filter!(a => a.name == "Id")
655 			.front.value;
656 		tmp.file = r.attributes.filter!(a => a.name == "Target")
657 			.front.value;
658 		ret[tmp.id] = tmp;
659 	}
660 	return ret;
661 }
662 
663 Sheet readSheet(in string filename, in string sheetName) @safe {
664 	SheetNameId[] sheets = sheetNames(filename);
665 	auto sRng = sheets.filter!(s => s.name == sheetName);
666 	enforce(!sRng.empty, "No sheet with name " ~ sheetName
667 			~ " found in file " ~ filename);
668 	return readSheetImpl(filename, sRng.front.rid);
669 }
670 
671 string eatXlPrefix(string fn) @safe {
672 	foreach(const p; ["xl//", "/xl/"]) {
673 		if(fn.startsWith(p)) {
674 			return fn[p.length .. $];
675 		}
676 	}
677 	return fn;
678 }
679 
680 Sheet readSheetImpl(in string filename, in string rid) @trusted {
681 	scope(failure) {
682 		writefln("Failed at file '%s' and sheet '%s'", filename, rid);
683 	}
684 	auto file = readFile(filename);
685 	auto ams = file.directory;
686 	immutable ss = "xl/sharedStrings.xml";
687 	string[] sharedStrings = (ss in ams)
688 		? readSharedEntries(file, ams[ss])
689 		: [];
690 	//logf("%s", sharedStrings);
691 
692 	Relationships[string] rels = parseRelationships(file,
693 			ams["xl/_rels/workbook.xml.rels"]);
694 
695 	Relationships* sheetRel = rid in rels;
696 	enforce(sheetRel !is null, format("Could not find '%s' in '%s'", rid,
697 				filename));
698 	string shrFn = eatXlPrefix(sheetRel.file);
699 	string fn = "xl/" ~ shrFn;
700 	ArchiveMember* sheet = fn in ams;
701 	enforce(sheet !is null, format("sheetRel.file orig '%s', fn %s not in [%s]",
702 				sheetRel.file, fn, ams.keys()));
703 
704 	Sheet ret;
705 	ret.cells = insertValueIntoCell(readCells(file, *sheet), sharedStrings);
706 	Pos maxPos;
707 	foreach(ref c; ret.cells) {
708 		c.position = toPos(c.r);
709 		maxPos = elementMax(maxPos, c.position);
710 	}
711 	ret.maxPos = maxPos;
712 	ret.table = new Cell[][](ret.maxPos.row + 1, ret.maxPos.col + 1);
713 	foreach(const c; ret.cells) {
714 		ret.table[c.position.row][c.position.col] = c;
715 	}
716 	return ret;
717 }
718 
719 string[] readSharedEntries(ZipArchive za, ArchiveMember am) @trusted {
720 	ubyte[] ss = za.expand(am);
721 	string ssData = convertToString(ss);
722 	auto dom = parseDOM(ssData);
723 	string[] ret;
724 	if(dom.type != EntityType.elementStart) {
725 		return ret;
726 	}
727 	assert(dom.children.length == 1);
728 	auto sst = dom.children[0];
729 	assert(sst.name == "sst");
730 	if(sst.type != EntityType.elementStart || sst.children.empty) {
731 		return ret;
732 	}
733 	auto siRng = sst.children.filter!(c => c.name == "si");
734 	foreach(si; siRng) {
735 		if(si.type != EntityType.elementStart) {
736 			continue;
737 		}
738 		//ret ~= extractData(si);
739 		string tmp;
740 		foreach(tORr; si.children) {
741 			if(tORr.name == "t" && tORr.type == EntityType.elementStart
742 					&& !tORr.children.empty)
743 			{
744 				//ret ~= Data(convert(tORr.children[0].text));
745 				ret ~= tORr.children[0].text.removeSpecialCharacter();
746 			} else if(tORr.name == "r") {
747 				foreach(r; tORr.children.filter!(r => r.name == "t")) {
748 					if(r.type == EntityType.elementStart && !r.children.empty) {
749 						tmp ~= r.children[0].text.removeSpecialCharacter();
750 					}
751 				}
752 			} else {
753 				//ret ~= Data.init;
754 				ret ~= "";
755 			}
756 		}
757 		if(!tmp.empty) {
758 			//ret ~= Data(convert(tmp));
759 			ret ~= tmp.removeSpecialCharacter();
760 		}
761 	}
762 	return ret;
763 }
764 
765 string extractData(DOMEntity!string si) {
766 	string tmp;
767 	foreach(tORr; si.children) {
768 		if(tORr.name == "t") {
769 			if(!tORr.attributes.filter!(a => a.name == "xml:space").empty) {
770 				return "";
771 			} else if(tORr.type == EntityType.elementStart
772 					&& !tORr.children.empty)
773 			{
774 				return tORr.children[0].text;
775 			} else {
776 				return "";
777 			}
778 		} else if(tORr.name == "r") {
779 			foreach(r; tORr.children.filter!(r => r.name == "t")) {
780 				tmp ~= r.children[0].text;
781 			}
782 		}
783 	}
784 	if(!tmp.empty) {
785 		return tmp;
786 	}
787 	assert(false);
788 }
789 
790 private bool canConvertToLong(in string s) @safe {
791 	if(s.empty) {
792 		return false;
793 	}
794 	return s.byChar.all!isDigit();
795 }
796 
797 private immutable rs = r"[\+-]{0,1}[0-9][0-9]*\.[0-9]*";
798 private auto rgx = ctRegex!rs;
799 
800 private bool canConvertToDoubleOld(in string s) @safe {
801 	auto cap = matchAll(s, rgx);
802 	return cap.empty || cap.front.hit != s ? false : true;
803 }
804 
805 private bool canConvertToDouble(string s) pure @safe @nogc {
806 	if(s.startsWith('+') || s.startsWith('-')) {
807 		s = s[1 .. $];
808 	}
809 	if(s.empty) {
810 		return false;
811 	}
812 
813 	if(s[0] < '0' || s[0] > '9') { // at least one in [0-9]
814 		return false;
815 	}
816 
817 	s = s[1 .. $];
818 
819 	if(s.empty) {
820 		return true;
821 	}
822 
823 	while(!s.empty && s[0] >= '0' && s[0] <= '9') {
824 		s = s[1 .. $];
825 	}
826 	if(s.empty) {
827 		return true;
828 	}
829 	if(s[0] != '.') {
830 		return false;
831 	}
832 	s = s[1 .. $];
833 	if(s.empty) {
834 		return true;
835 	}
836 
837 	while(!s.empty && s[0] >= '0' && s[0] <= '9') {
838 		s = s[1 .. $];
839 	}
840 
841 	return s.empty;
842 }
843 
844 @safe unittest {
845 	static struct Test {
846 		string tt;
847 		bool rslt;
848 	}
849 	auto tests =
850 		[ Test("-", false)
851 		, Test("0.0", true)
852 		, Test("-0.", true)
853 		, Test("-0.0", true)
854 		, Test("-0.a", false)
855 		, Test("-0.0", true)
856 		, Test("-1100.0", true)
857 		];
858 	foreach(const t; tests) {
859 		assert(canConvertToDouble(t.tt) == canConvertToDoubleOld(t.tt)
860 				&& canConvertToDouble(t.tt) == t.rslt
861 			, format("%s %s %s %s", t.tt
862 				, canConvertToDouble(t.tt), canConvertToDoubleOld(t.tt)
863 				, t.rslt));
864 	}
865 }
866 
867 string removeSpecialCharacter(string s) {
868 	struct ToRe {
869 		string from;
870 		string to;
871 	}
872 
873 	immutable ToRe[] toRe = [
874 		ToRe( "&amp;", "&"),
875 		ToRe( "&gt;", "<"),
876 		ToRe( "&lt;", ">"),
877 		ToRe( "&quot;", "\""),
878 		ToRe( "&apos;", "'")
879 	];
880 
881 	string replaceStrings(string s) {
882 		import std.algorithm.searching : canFind;
883 		import std.array : replace;
884 		foreach(const tr; toRe) {
885 			while(canFind(s, tr.from)) {
886 				s = s.replace(tr.from, tr.to);
887 			}
888 		}
889 		return s;
890 	}
891 
892 	return replaceStrings(s);
893 }
894 
895 Cell[] readCells(ZipArchive za, ArchiveMember am) @trusted {
896 	Cell[] ret;
897 	ubyte[] ss = za.expand(am);
898 	string ssData = convertToString(ss);
899 	auto dom = parseDOM(ssData);
900 	assert(dom.children.length == 1);
901 	auto ws = dom.children[0];
902 	if(ws.name != "worksheet") {
903 		return ret;
904 	}
905 	auto sdRng = ws.children.filter!(c => c.name == "sheetData");
906 	assert(!sdRng.empty);
907 	if(sdRng.front.type != EntityType.elementStart) {
908 		return ret;
909 	}
910 	auto rows = sdRng.front.children
911 		.filter!(r => r.name == "row");
912 
913 	foreach(ref row; rows) {
914 		if(row.type != EntityType.elementStart || row.children.empty) {
915 			continue;
916 		}
917 		foreach(ref c; row.children.filter!(r => r.name == "c")) {
918 			Cell tmp;
919 			tmp.row = row.attributes.filter!(a => a.name == "r")
920 				.front.value.to!size_t();
921 			tmp.r = c.attributes.filter!(a => a.name == "r")
922 				.front.value;
923 			auto t = c.attributes.filter!(a => a.name == "t");
924 			if(t.empty) {
925 				// we assume that no t attribute means direct number
926 				//writefln("Found a strange empty cell \n%s", c);
927 			} else {
928 				tmp.t = t.front.value;
929 			}
930 			if(tmp.t == "s" || tmp.t == "n") {
931 				if(c.type == EntityType.elementStart) {
932 					auto v = c.children.filter!(c => c.name == "v");
933 					//enforce(!v.empty, format("r %s", tmp.row));
934 					if(!v.empty && v.front.type == EntityType.elementStart
935 							&& !v.front.children.empty)
936 					{
937 						tmp.v = v.front.children[0].text;
938 					} else {
939 						tmp.v = "";
940 					}
941 				}
942 			} else if(tmp.t == "inlineStr") {
943 				auto is_ = c.children.filter!(c => c.name == "is");
944 				tmp.v = extractData(is_.front);
945 			} else if(c.type == EntityType.elementStart) {
946 				auto v = c.children.filter!(c => c.name == "v");
947 				if(!v.empty && v.front.type == EntityType.elementStart
948 						&& !v.front.children.empty)
949 				{
950 					tmp.v = v.front.children[0].text;
951 				}
952 			}
953 			if(c.type == EntityType.elementStart) {
954 				auto f = c.children.filter!(c => c.name == "f");
955 				if(!f.empty && f.front.type == EntityType.elementStart) {
956 					tmp.f = f.front.children[0].text;
957 				}
958 			}
959 			ret ~= tmp;
960 		}
961 	}
962 	return ret;
963 }
964 
965 Cell[] insertValueIntoCell(Cell[] cells, string[] ss) @trusted {
966 	immutable excepted = ["n", "s", "b", "e", "str", "inlineStr"];
967 	immutable same = ["n", "e", "str", "inlineStr"];
968 	foreach(ref Cell c; cells) {
969 		assert(canFind(excepted, c.t) || c.t.empty,
970 				format("'%s' not in [%s]", c.t, excepted));
971 		if(c.t.empty) {
972 			//c.xmlValue = convert(c.v);
973 			c.xmlValue = c.v.removeSpecialCharacter();
974 		} else if(canFind(same, c.t)) {
975 			//c.xmlValue = convert(c.v);
976 			c.xmlValue = c.v.removeSpecialCharacter();
977 		} else if(c.t == "b") {
978 			//logf("'%s' %s", c.v, c);
979 			//c.xmlValue = c.v == "1";
980 			c.xmlValue = c.v.removeSpecialCharacter();
981 		} else {
982 			if(!c.v.empty) {
983 				size_t idx = to!size_t(c.v);
984 				//logf("'%s' %s", c.v, idx);
985 				//c.xmlValue = ss[idx];
986 				c.xmlValue = ss[idx];
987 			}
988 		}
989 	}
990 	return cells;
991 }
992 
993 Pos toPos(in string s) @safe {
994 	import std.string : indexOfAny;
995 	import std.math : pow;
996 	ptrdiff_t fn = s.indexOfAny("0123456789");
997 	enforce(fn != -1, s);
998 	size_t row = to!size_t(to!long(s[fn .. $]) - 1);
999 	size_t col = 0;
1000 	string colS = s[0 .. fn];
1001 	foreach(const idx, char c; colS) {
1002 		col = col * 26 + (c - 'A' + 1);
1003 	}
1004 	return Pos(row, col - 1);
1005 }
1006 
1007 @safe unittest {
1008 	assert(toPos("A1").col == 0);
1009 	assert(toPos("Z1").col == 25);
1010 	assert(toPos("AA1").col == 26);
1011 }
1012 
1013 Pos elementMax(Pos a, Pos b) {
1014 	return Pos(a.row < b.row ? b.row : a.row,
1015 			a.col < b.col ? b.col : a.col);
1016 }
1017 
1018 string specialCharacterReplacement(string s) {
1019 	return s.replace("\"", "&quot;")
1020 		.replace("'", "&apos;")
1021 		.replace("<", "&lt;")
1022 		.replace(">", "&gt;")
1023 		.replace("&", "&amp;");
1024 }
1025 
1026 string specialCharacterReplacementReverse(string s) {
1027 	return s.replace("&quot;", "\"")
1028 		.replace("&apos;", "'")
1029 		.replace("&lt;", "<")
1030 		.replace("&gt;", ">")
1031 		.replace("&amp;", "&");
1032 }
1033 
1034 @safe unittest {
1035 	import std.math : isClose;
1036 	auto r = readSheet("multitable.xlsx", "wb1");
1037 	assert(isClose(r.table[12][5].xmlValue.to!double(), 26.74),
1038 			format("%s", r.table[12][5])
1039 		);
1040 
1041 	assert(isClose(r.table[13][5].xmlValue.to!double(), -26.74),
1042 			format("%s", r.table[13][5])
1043 		);
1044 }
1045 
1046 @safe unittest {
1047 	import std.algorithm.comparison : equal;
1048 	auto s = readSheet("multitable.xlsx", "wb1");
1049 	auto r = s.iterateRow!long(15, 1, 6);
1050 
1051 	auto expected = [1, 2, 3, 4, 5];
1052 	assert(equal(r, expected), format("%s", r));
1053 
1054 	auto r2 = s.getRow!long(15, 1, 6);
1055 	assert(equal(r, expected));
1056 
1057 	auto it = s.iterateRowLong(15, 1, 6);
1058 	assert(equal(r2, it));
1059 
1060 	auto it2 = s.iterateRowUntyped(15, 1, 6)
1061 		.map!(it => format("%s", it))
1062 		.array;
1063 }
1064 
1065 @safe unittest {
1066 	import std.algorithm.comparison : equal;
1067 	auto s = readSheet("multitable.xlsx", "wb2");
1068 	//writefln("%s\n%(%s\n%)", s.maxPos, s.cells);
1069 	auto rslt = s.iterateColumn!Date(1, 1, 6);
1070 	auto rsltUt = s.iterateColumnUntyped(1, 1, 6)
1071 		.map!(it => format("%s", it))
1072 		.array;
1073 	assert(!rsltUt.empty);
1074 
1075 	auto target = [Date(2019,5,01), Date(2016,12,27), Date(1976,7,23),
1076 		 Date(1986,7,2), Date(2038,1,19)
1077 	];
1078 	assert(equal(rslt, target), format("\n%s\n%s", rslt, target));
1079 
1080 	auto it = s.getColumn!Date(1, 1, 6);
1081 	assert(equal(rslt, it));
1082 
1083 	auto it2 = s.getColumnDate(1, 1, 6);
1084 	assert(equal(rslt, it2));
1085 }
1086 
1087 @safe unittest {
1088 	import std.algorithm.comparison : equal;
1089 	auto s = readSheet("multitable.xlsx", "Sheet3");
1090 	writeln(s.table[0][0].xmlValue);
1091 	assert(s.table[0][0].xmlValue.to!long(),
1092 			format("%s", s.table[0][0].xmlValue));
1093 	//assert(s.table[0][0].canConvertTo(CellType.bool_));
1094 }
1095 
1096 unittest {
1097 	import std.file : dirEntries, SpanMode;
1098 	import std.traits : EnumMembers;
1099 	foreach(const de; dirEntries("xlsx_files/", "*.xlsx", SpanMode.depth)
1100 			.filter!(a => a.name != "xlsx_files/data03.xlsx"))
1101 	{
1102 		//writeln(de.name);
1103 		auto sn = sheetNames(de.name);
1104 		foreach(const s; sn) {
1105 			auto sheet = readSheet(de.name, s.name);
1106 			foreach(const cell; sheet.cells) {
1107 			}
1108 		}
1109 	}
1110 }
1111 
1112 @safe unittest {
1113 	import std.algorithm.comparison : equal;
1114 	auto sheet = readSheet("testworkbook.xlsx", "ws1");
1115 	//writefln("%(%s\n%)", sheet.cells);
1116 	//writeln(sheet.toString());
1117 	assert(sheet.table[2][3].xmlValue.to!long() == 1337);
1118 
1119 	auto c = sheet.getColumnLong(3, 2, 5);
1120 	auto r = [1337, 2, 3];
1121 	assert(equal(c, r), format("%s %s", c, sheet.toString()));
1122 
1123 	auto c2 = sheet.getColumnString(4, 2, 5);
1124 	string f2 = sheet.table[2][4].xmlValue;
1125 	assert(f2 == "hello", f2);
1126 	f2 = sheet.table[3][4].xmlValue;
1127 	assert(f2 == "sil", f2);
1128 	f2 = sheet.table[4][4].xmlValue;
1129 	assert(f2 == "foo", f2);
1130 	auto r2 = ["hello", "sil", "foo"];
1131 	assert(equal(c2, r2), format("%s", c2));
1132 }
1133 
1134 @safe unittest {
1135 	import std.math : isClose;
1136 	auto sheet = readSheet("toto.xlsx", "Trades");
1137 	writefln("%(%s\n%)", sheet.cells);
1138 
1139 	auto r = sheet.getRowString(1, 0, 2).array;
1140 
1141 	double d = to!double(r[1]);
1142 	assert(isClose(d, 38204642.510000));
1143 }
1144 
1145 @safe unittest {
1146 	auto sheet = readSheet("leading_zeros.xlsx", "Sheet1");
1147 	auto a2 = sheet.cells.filter!(c => c.r == "A2");
1148 	assert(!a2.empty);
1149 	assert(a2.front.xmlValue == "0012", format("%s", a2.front));
1150 }
1151 
1152 @safe unittest {
1153 	import std.algorithm.comparison : equal;
1154 	auto s = readSheet("datetimes.xlsx", "Sheet1");
1155 	//writefln("%s\n%(%s\n%)", s.maxPos, s.cells);
1156 	auto rslt = s.iterateColumn!DateTime(0, 0, 2);
1157 	assert(!rslt.empty);
1158 
1159 	auto target =
1160 		[ DateTime(Date(1986,1,11), TimeOfDay.init)
1161 		, DateTime(Date(1986,7,2), TimeOfDay.init)
1162 		];
1163 	assert(equal(rslt, target), format("\ngot: %s\nexp: %s\ntable %s", rslt
1164 				, target, s.toString()));
1165 }