RE: Reading Native Excel files->Solution
- To: mathgroup at smc.vnet.net
- Subject: [mg15007] RE: [mg14976] Reading Native Excel files->Solution
- From: "Barthelet, Luc" <lucb at ea.com>
- Date: Sat, 5 Dec 1998 01:30:12 -0500
- Sender: owner-wri-mathgroup at wolfram.com
Well, since I did not get any answer, I wrote some code: It is based on the dev doc that Microsoft published (search for Excel File Format on www.microsoft.com). I had to hack a few things in there since the format description by Microsoft is incomplete. This does not work with all spreadsheets, but worked with all the one I cared about. If anyone improves on this, please post the improvements. usage: XlsRead[ filename] will return a 2x2 Matrix. Thanks. Luc Barthelet. ------------------------------------------------------------------------- HexToNumber[s_] := FromDigits[ ToCharacterCode[ s ] /. { x_ /; x >= 65 + 32 :> x - 55 - 32, x_ /; x >= 65 :> x - 55, x_ /; x < 65 :> x - 48}, 16]; HexToDec[s_] := FromDigits[ ToCharacterCode[ s ] /. { x_ /; x >= 65 + 32 :> x - 55 - 32, x_ /; x >= 65 :> x - 55, x_ /; x < 65 :> x - 48}, 16]; DecToHex[num_] := "0x"<>StringJoin[ FromCharacterCode[ IntegerDigits[num, 16, 8] /. {x_ /; x > 9 :> x + 55, x_ /; x < 10 :> x + 48}]]; myBitAnd[n1_Integer, n2_Integer] := Module[ {d1, d2, l1, l2}, d1 = IntegerDigits[n1, 2]; d2 = IntegerDigits[n2, 2]; l1 = Length[d1]; l2 = Length[d2]; If[ l1 < l2, d1 = Join[Table[0, {l2 - l1}], d1];]; If[ l2 < l1, d2 = Join[Table[0, {l1 - l2}], d2];]; FromDigits[ d1 d2, 2] ]; XlsRead[filename_, defaultValue_:"NA", debug_:False] := Module[ {RecordID, RecordLength, whichPass, offset, firstSheetOffset, StringList, TotalStringNum, UniqStringNum, StringLeft, k, numOfChars, thisRow, thisCol, stringIndex, RK, thisNumber, nData, theFile, data, RowMin, RowMax, ColMin, ColMax, toSkip, ss}, (* some local functions *) ReadIEEENumber[val_] := Module[ {sign, exponent, mantissa}, If[val == 0, 0, valInBits = IntegerDigits[val, 2, 32]; sign = First[valInBits]; exponent = FromDigits[Take[ valInBits, {2, 12}], 2] - 1024 ; mantissa = FromDigits[Join[{1}, Take[ valInBits, {13, 32}] ], 2]; (* Print["sign=", sign]; Print["exponent=", exponent]; Print["mantissa=", Join[{1}, Take[ valInBits, {13, 32}] ], mantissa]; *) If[sign == 1, -mantissa 2^(exponent - 19) , mantissa 2^(exponent - 19) ] ] ]; HexToDec[s_] := FromDigits[ ToCharacterCode[ s ] /. { x_ /; x >= 65 + 32 :> x - 55 - 32, x_ /; x >= 65 :> x - 55, x_ /; x < 65 :> x - 48}, 16]; ScanAFile[] := Module[{}, While[RecordID = Plus @@ (nData[[ {1, 2}]] { 1, 256}); RecordID != HexToDec["0a"] && RecordID != HexToDec["EB"], RecordID = Plus @@ (nData[[ {1, 2}]] { 1, 256}); RecordLength = Plus @@ (nData[[ {3, 4}]] { 1, 256}); If[debug && whichPass == 1 , Print["RecordID =", DecToHex[RecordID], " ;RecordLength=", RecordLength, " ;offset=", DecToHex[offset] ];]; thisRecord = Take[nData, {5, 4 + RecordLength}]; If[ MemberQ[ { HexToDec["0809"], HexToDec["0a"], HexToDec["FC"], HexToDec["85"], HexToDec["BD"], HexToDec["FD"], HexToDec["27E"]}, RecordID], Switch[RecordID, (* -Sheet info---- - ---------------------------------- *) HexToDec["85"], If[firstSheetOffset == 0, firstSheetOffset = Plus @@ (thisRecord [[{1, 2, 3, 4}]] {1, 256, 65536, 16777216}); If[debug, Print["Found the top sheet, offset @ ", DecToHex[toSkip + firstSheetOffset], "this numbers:", thisRecord [[{1, 2, 3, 4}]] ];]; ]; , (* -BOF---- - ---------------------------------- *) HexToDec["0809"], If[debug, Print["BOF, Offset =", DecToHex[offset]];], (* -EOF---- - ---------------------------------- *) HexToDec["0a"], If[debug, Print["EOF, Offset =", DecToHex[offset]];], (* -String Table---- - ---------------------------------- *) HexToDec["FC"], If [ whichPass == 1, StringList = {}; TotalStringNum = Plus @@ (thisRecord [[{1, 2, 3, 4}]] {1, 256, 65536, 16777216}); UniqStringNum = Plus @@ (thisRecord [[{5, 6, 7, 8}]] {1, 256, 65536, 16777216}); If[debug, Print[" Found String Table, TotalString=", TotalStringNum, " ;Unique Strings=", UniqStringNum ]; ]; StringLeft = Drop[thisRecord, 8]; For[k = 1, k <= UniqStringNum, k++, numOfChars = Plus @@ (StringLeft [[{1, 2}]] {1, 256}); (* ignore the flag at this time *) StringList = Append[StringList, FromCharacterCode[ Take[StringLeft, {4, 3 + numOfChars} ]]]; StringLeft = Drop[StringLeft, 3 + numOfChars]; ]; If[debug, Print["String Table: Total Strings=", TotalStringNum, " ;Unique Strings=", UniqStringNum, " ;The Strings=", StringList];] ];, (* -Text Cells---- - ---------------------------------- *) HexToDec["FD"], thisRow = 1 + thisRecord [[1]] + 256thisRecord [[2]]; thisCol = 1 + thisRecord [[3]] + 256thisRecord [[4]]; stringIndex = Plus @@ (thisRecord [[{7, 8, 9, 10}]] {1, 256, 65536, 16777216}); Switch[whichPass, 1, If[ RowMin > thisRow, RowMin = thisRow]; If[ RowMax < thisRow, RowMax = thisRow]; If[ ColMin > thisCol, ColMin = thisCol]; If[ ColMax < thisCol, ColMax = thisCol];, 2, If[ stringIndex < Length[StringList], ss[[thisRow, thisCol]] = StringList\[LeftDoubleBracket] 1 + stringIndex \[RightDoubleBracket] ]; ]; If[debug && False, Print[ "Text @ Row=", thisRow , " ;Col=", thisCol, " -> ", If[ stringIndex < Length[StringList], StringList\[LeftDoubleBracket] 1 + stringIndex \[RightDoubleBracket] , "too big"] ]; ];, (* -Number Cell---- - ---------------------------------- *) HexToDec["27E"], thisRow = 1 + thisRecord [[1]] + 256thisRecord [[2]]; thisCol = 1 + thisRecord [[3]] + 256thisRecord [[4]]; RK = Plus @@ (thisRecord [[{7, 8, 9, 10}]] {1, 256, 65536, 16777216}); thisNumber = ReadIEEENumber[myBitAnd[RK, HexToDec["FFFFFFFC"]]]; Switch[whichPass, 1, If[ RowMin > thisRow, RowMin = thisRow]; If[ RowMax < thisRow, RowMax = thisRow]; If[ ColMin > thisCol, ColMin = thisCol]; If[ ColMax < thisCol, ColMax = thisCol];, 2, ss[[thisRow, thisCol]] = thisNumber ; ]; If[debug && False, Print[ "Number @ Row=", thisRow , " ;Col=", thisCol, "; RKnumber= ", thisNumber, " ;RKtype=" , Mod[RK, 4]];], (* -Multiple Number Cell---- - ---------------------------------- *) HexToDec["BD"], thisRow = 1 + thisRecord [[1]] + 256thisRecord [[2]]; firstCol = 1 + thisRecord [[3]] + 256thisRecord [[4]]; lastCol = 1 + thisRecord [[-2]] + 256thisRecord [[-1]]; Switch[whichPass, 1, If[ RowMin > thisRow, RowMin = thisRow]; If[ RowMax < thisRow, RowMax = thisRow]; If[ ColMin > firstCol, ColMin = firstCol]; If[ ColMax < firstCol, ColMax = firstCol]; If[ ColMin > lastCol, ColMin = lastCol]; If[ ColMax < lastCol, ColMax = lastCol];, 2, RKList = Drop[#, 2] & /@ Partition[Drop[Drop[thisRecord, 4], -2], 6]; For[ thisCol = firstCol; k = 1;, thisCol <= lastCol, thisCol++; k++;, RK = Plus @@ (RKList\[LeftDoubleBracket] k\[RightDoubleBracket] {1, 256, 65536, 16777216}); thisNumber = ReadIEEENumber[myBitAnd[RK, HexToDec["FFFFFFFC"]]]; ss[[thisRow, thisCol]] = thisNumber ; ]; (* For *) ]; (* Switch *) If[debug && False, Print[ "Number @ Row=", thisRow , " ;Col=", thisCol, "; RKnumber= ", thisNumber, " ;RKtype=" , Mod[RK, 4]];], (* ---------------------------------------- *) _, If[debug, Print[ DecToHex[RecordID], thisRecord, FromCharacterCode[thisRecord ] ]]; ]; (* switch *) ]; (* If[ MemberQ ... *) nData = Drop[nData, RecordLength + 4]; offset += RecordLength + 4; ]; (* While for each record *) ]; (* begining of the code *) theFile = OpenRead[filename, DOSTextFormat -> False]; data = ReadList[theFile, Byte]; Close[theFile]; firstSheetOffset = 0; If[debug, Print["Length of file:", Length[data]];]; whichPass = 0; While[whichPass < 2, whichPass++; If[debug, Print["Pass#", whichPass];]; (* we do 2 passes, first one to find the RowMin, RowMax, ColMin, ColMax, then we put the data in the table *) Switch[whichPass, 1, {RowMin, RowMax, ColMin, ColMax} = {17000, 0, 17000, 0};, 2, ss = Table[defaultValue, {RowMax - RowMin + 1}, {ColMax - ColMin + 1}]; ]; (* where to start ? *) (* did not figure out yet how many bytes to skip at the beginning except I bet it is a multiple of 512 *) toSkip = 512; While[ (toSkip + 512) < Length[data] && data\[LeftDoubleBracket] toSkip + {1, 2} \[RightDoubleBracket] != {9, 8}, toSkip += 512;]; If[debug, Print["Position of first record=", DecToHex[toSkip]];]; nData = Drop[data, toSkip]; offset = toSkip; RecordID = Plus @@ (nData[[ {1, 2}]] { 1, 256}); If[RecordID == HexToDec["0809"], (* read the top segment *) ScanAFile[]; (* read the first sheet *) If[ firstSheetOffset > 0, nData = Drop[data, toSkip + firstSheetOffset]; RecordID = Plus @@ (nData[[ {1, 2}]] { 1, 256}); If[RecordID ! HexToDec["0809"], Print[ "Oops error in position!"];]; ScanAFile[]; ]; , Print[ "Oops error in position!"];]; ]; (* While for each Pass *) ss ]; (* Module *) --------------------------------------------------------------------------- -----Original Message----- From: Barthelet, Luc [mailto:lucb at ea.com] To: mathgroup at smc.vnet.net Subject: [mg15007] [mg14976] Reading Native Excel files Did anyone write a notebook to read native Excel files in Mathematica? (*.xls under Windows) So far I have always forced the users to save the excel files as text files. It works fine, except it requires an extra step and therefore an extra opportunity for a human error. I have used Mathlink for Excel in the past and I do not want to use it for solving this since the whole point of it is to not require mathematica on those user machines. thanks. Luc Barthelet GM the day, Mayor at night http://www.simcity.com