From 788fcabde4ade599df09231717ac8af4deb87f3a Mon Sep 17 00:00:00 2001 From: TimG1964 Date: Thu, 13 Nov 2025 18:09:13 +0000 Subject: [PATCH 1/7] Switch to XLSX.jl for read as well as write --- Project.toml | 6 +-- README.md | 90 ++++++++++++++++++++++++--------- data/TestData.xlsx | Bin 0 -> 12907 bytes docs/src/index.md | 121 +++++++++++++++++++++++++++++++++++++++++++++ src/ExcelFiles.jl | 106 ++++++++++----------------------------- test/runtests.jl | 88 +++++++++++++++++---------------- 6 files changed, 259 insertions(+), 152 deletions(-) create mode 100644 data/TestData.xlsx diff --git a/Project.toml b/Project.toml index 68ecaaa..ee12dfb 100644 --- a/Project.toml +++ b/Project.toml @@ -5,12 +5,10 @@ version = "1.0.1-DEV" [deps] DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -ExcelReaders = "c04bee98-12a5-510c-87df-2a230cb6e075" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" IterableTables = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d" IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" TableShowUtils = "5e66a065-1f0a-5976-b372-e0b8c017ca10" TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" TableTraitsUtils = "382cd787-c1b6-5bf2-a167-d5b971a19bda" @@ -18,15 +16,13 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] DataValues = "0.4.11" -ExcelReaders = "0.11" FileIO = "1" IterableTables = "0.8.3, 0.9, 0.10, 0.11, 1" IteratorInterfaceExtensions = "0.1.1, 1" -PyCall = "1.90" TableShowUtils = "0.2" TableTraits = "0.3.1, 0.4, 1" TableTraitsUtils = "0.3, 0.4, 1" -XLSX = "0.4.1, 0.5, 0.6, 0.7, 0.8, 0.9" +XLSX = "0.10, 0.11" julia = "1" [extras] diff --git a/README.md b/README.md index f175400..bf8add9 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,18 @@ ## Overview -This package provides load support for Excel files under the +This package provides support for Excel files under the [FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package. +It provides functionality to read simple tabular data from +an Excel (.xlsx) file and to save simple tabular data to an +Excel file. + +For more extensive functionality when reading and writing Excel files, +consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/). +Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable` +and `writetable`. + ## Installation Use ``Pkg.add("ExcelFiles")`` in Julia to install ExcelFiles and its dependencies. @@ -18,17 +27,17 @@ Use ``Pkg.add("ExcelFiles")`` in Julia to install ExcelFiles and its dependencie ### Load an Excel file -To read a Excel file into a ``DataFrame``, use the following julia code: +To read an Excel file into a `DataFrame`, use the following julia code: -````julia +```julia using ExcelFiles, DataFrames df = DataFrame(load("data.xlsx", "Sheet1")) -```` +``` -The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a ``DataFrame``: +The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`: -````julia +```julia using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly # Load into a DataTable @@ -45,46 +54,81 @@ ts = TS(load("data.xlsx", "Sheet1")) # Plot directly with Gadfly plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line) -```` +``` + +The `load` function takes a number of arguments and keywords: + +```julia + FileIO.load( + source::String, + [sheet::String, + [columns::String]]; + [first_row::Int], + [column_labels::Vector{String}], + [header::Bool], + [normalizenames::Bool] + ) +``` -The ``load`` function also takes a number of parameters: - -````julia -function load(f::FileIO.File{FileIO.format"Excel"}, range; keywords...) -```` #### Arguments: -* ``range``: either the name of the sheet in the Excel file to read, or a full Excel range specification (i.e. "Sheetname!A1:B2"). -* The ``keywords`` arguments are the same as in [ExcelReaders.jl](https://github.com/queryverse/ExcelReaders.jl) (which is used under the hood to read Excel files). When ``range`` is a sheet name, the keyword arguments for the ``readxlsheet`` function from ExcelReaders.jl apply, if ``range`` is a range specification, the keyword arguments for the ``readxl`` function apply. +* `source`: The name of the file to be loaded. +* `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. +* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. + +#### Keywords: + +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. +* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. +* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. +* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false` ### Save an Excel file The following code saves any iterable table as an excel file: -````julia + +```julia using ExcelFiles save("output.xlsx", it) -```` -This will work as long as it is any of the types supported as sources in IterableTables.jl. +``` +This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). + +The `save` function takes a number of arguments and keywords: + +``` + FileIO.save( + source::String; + [overwrite::Bool] + ) +``` + +#### Arguments: + +* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` + +#### Keywords: + +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` ### Using the pipe syntax -``load`` also support the pipe syntax. For example, to load an Excel file into a ``DataFrame``, one can use the following code: +The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code: -````julia +```julia using ExcelFiles, DataFrame df = load("data.xlsx", "Sheet1") |> DataFrame -```` +``` To save an iterable table, one can use the following form: -````julia +```julia using ExcelFiles, DataFrame df = # Aquire a DataFrame somehow df |> save("output.xlsx") -```` +``` -The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the ``save`` function to store the results in a new file. +The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file. diff --git a/data/TestData.xlsx b/data/TestData.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c5c19f448809978bf49a481c80318574cb96f3e8 GIT binary patch literal 12907 zcmeHtWl$X3*7o2QB)Gc_uEE`cySuvt4HgJaa0_n1-7UBW2_Zo6zyt{ng9e9hl5=k5 z9PYV4-|x@8UDe&yGrgXw-g`ZJJ!`L}A`b(L4S)wA0ssICKsnL3i5MsVKnMWq|I0xm!B98*6yKuyiwI@p5z^&xeHu<^iA~@Bi=kFCKx9Z^xCO*|9?}XuJ9g_4~0u z{uZ8#k%G7i)JSpN ztGtm}W?MBHE=vP|o0nP)_=$Rlv?`zJbYB3>**V#Lv!=l`vVdSFA{M$?BRH%TB4#OS zwR_7M*3{Wiijq+-1o?yGNH%=Xj!N9EV{N%_Q5BM&*biE9zo$5+NAFibED9ponVUCK zo+@p}I`S$?pBNSciC+QU)xt)+9Z}EJ7GHXHLHfW&;BCi`8vJEXzVErK`$Wk4wF0lx z{TrBVN;t-k4bAEsOP$5aqs>18`!VT>UzdP)uw^%2Qw)VdIb6rmPD*?#Ld;t+&%<{l zHqpNUgkwvi$&V(5@t790PvnXpGV{h^UqHno=xli#E}u}tQrOK?Usqdv<(*P@If>j$ za>OYmSrHWSzaxV<;KKt9K;>^NZqj6~iYcHbTJEMJ#?T-PhIt;)>3=)A zv@962J52FojjJ*W6I+0)`DImD>a~jp0t2mUinL4R>cCUane&++>Cff7fu5al3>B@V z@8m~7RMOMOl64ppEZT%fSVhEPxI*cHdP9o3D`uC~P&4A{2UTIUEj)Rl-|Y`4n3!mncY}-e#}Z)!)laV zn3o(A&p*)Qod@VOavhIldk^p+^;ZlZjRZxJm99Vn+P{+|t9r8K4GaK)js!U&Bje@3 z`qJ6e-ptwA{%4pg)N*uQgq)7}B7;7pXs?!9crP7lTmkIN63lQ$hCON9xkoH;(eQ zYN;Vn8L~0}K8@wPaMhG2WsX_m z8mOXzY@g2L5$yCe6AT1r#S;^Yr2tQW2?(`C_*t&ga9^;?AOBq z`;^h>;FP+^+*tEn+M(DUI0mIQBZ9eQ~? zmsI8`6;IPOXw|OI7~(T-Jpv2md+!V7 zK`?OUMtshY49q+)GYulHhM8 zVp(nd30s$ZpC~S==47WozYNk9enKgjX(i?~Z*Al^vPjM%JU?Of#a2CE-v+iA!F_Ka z`pWZV%lml;2P|Keq7gqUg9W>X+;6BI^L#?8yxjt-FRrh4f9y&wjP3tedgWh!w84uXXGb}2pOKp=K8CpjWpnpN+~44H~FY7 zTck{xUhp@9mY%ul4-z2Nt-Y0EAOlj3w0}$9awsnG+uHa5e)%~G4%}wg)cQBXnC$DR zrdn=&w}koy&@ZTlWyQsdvfrX_n$B7nfIi!NkO^Cy6V~O88bH8eeS&pk!tK__>m$eZ z$r5-~>qBt3W%0TIrb6n}XvasAPdg!3+o=A$uR!lc`kad{8au{MHGj%nnr??INcnzz z9})xpJxKV`0%J;G0f0I*001B2M}GzhHycY!cQ@8wH}+rAB3VB!pAGv1h-U#Tdd;G; zK`EVuLDFzq-@dTt#;i$5Z@J_%X4Q3*lRr$|_054Y2j=Ydeyg~lB&jqDMm=MUUt<0| zq2G=H#mJfS?DT^jmsl)!A&U%+eabt|>22HlwW7!XpFGusl&v*lAtiGcor ziBrpaK|LlnLFCji_hjLcD`4yi&`3dzZ2cBz+!`(%X{nrekefg+y2KcBW8C`S{&sD@ z6m%I&u?Ma+`@;6QXSiSwx$V$(s;De-jifO@iy_gUAP4sY-Or6~*-` z&`#Cj+xzmYAjjb@4$#Ii*80-a>ld)0i;C&ETQt+yTG)ll^c4Iru=Y#VVS)C=aL^!~=^hN^fnUZJlK$mdaMpT?SDkjMolIWm|C zAXDD~X}Z*Ue!zu_B`X}kyr=F=(3 z9qE77!*+>+Fxu_0c3k$&Nvd*s4u|IcGNEpMysL1URW`m2hTW+B<^kec4}V;e-;SvT zi(*Ix(5y-G^aw%^Q0mZAu%48W{Vsm)1T|ged@u@dOBagxdjW4 z%`$wP?j#Sbze;SbvYseBLSuN-FBqX1GbGU53p>C5&Re2%$bhIwe^`0dBD&`} zk_j=v)iR{j;Tsqne%0ZHu^Q}t&LXC6*+M`E?@dRrF%xbjH&iTb9%n*I4M4wf3yCqM z9|ZS>FbxSC*m`7SKa-Oi_vD8`V|R1xrW3rabr?6LuU$71#CDdtZ(`xuTPt#cwT5xy zBxh+Ei4TWU{*+TEVfb>2>thdu7HufVy zKk-)z5Skc%H0Ktiu)U$eZ?=q;<;{0e7pherIA@ZI%Rv& zVg8~>U&OcSSFF!&T4H#L2KbG(I;NgkoFS)lF$?>runWA;47w#%HHdT%w9hC>5% zdbKPXV~y;H{qk~VKi%=kQHOpZ11?-|8a<1>50;fx#JebQWF31K4V88xv%EF5y_O;e zhZ=hJ5cRLbu`zUwcJcEga&7kEYWV{W?KMrS?Z`J>Ayh2GgWnkYUVbgv@edd&EM!~U z?Yjat=#SSNlrUKu2o}!WBe1VV5b%!tfEO;?enk^H(jS?lbF1WV+p~DU(NOS_^V+vf zA@Kzc^$M+?VuxxLOQ-U&%?^zVMhP!gCQ$d`U6eP=F>Qc5WlMH%3kdRW2=d|MtSy{5V70>PDWJ5i z+s$S8{JfN=sr8E2rY|Q<({r7`5!Uv+TIhZdof>XRn1YrpdITb1);?75=Hcr?aTU14z_z z601Y0K`Fhpc@Jx`xie?&qb`uv2GCP$h@X-;DFQ@(*hm!9uo zo3I2AjV28nxM#wFgK<${&?bUob2Gw=VU*F?)|XDDh;@|+vImmrdAABBF@>K%8K`x^ zP>ZpwvSL%b{^W_Sp%dbkhwSciF7S-wo|wr3RPeAK{&&q=!JRgV%l)!;&FvIsdT2R1 zZoSL4lI)3`j)b2;k_v4`Cj%f(2%wB@I=o>1%b)~+a(8rxr(&yKw zeaf$7N|l=47~^8OUmfiD?jJ6pJuIrdzLoEjZdRWbcXidjY9bP&8nLdo=a}YoZfVA< zgV7_-hH;)MBp8fAiu}f3B(njKSaIp?H<`)aBZ6x8u0UXQD z2tDRU{%TcL&+A;|x0fWxi=p+AV@TCZy#UmykaNQj9h1Y#8fdD^RJ6PdDQ?J!(d2e^v<8b3BPqv_c1UjY`bJMadZ-fiwi__$jq?zs{}&rI!!DjmbB$FFV~q z{OC{F@qh2Uhvl))cOj7|84_~-gY$Cza^5L@`{1A9=A!prVz@mfby2sBxs!Zp34dv2 z+Utu-l1$S7?b!N?N?7&5AO1snq{g8S=_ov>8q zWlS~&)E1YB@Ds%o%1ND5MCvd)8?d$<2Kr0IA^2Lsc~*IP@lHh=dMbNH9#RSwfw*@R zlkuM~o=d8*9Lvg_3xzSre=;eW*LvMZCn+IOH3s_9@yuGW)|Xf8+-k{3;mzz*`;`~I z`>N5f@*svXE<>K`LkYIRCnSyS;xC_XvE5x{$^>y%MKc`qjBF&II$oJ`&?XxeE?tAo zf+!|y*9~E6G%j`I#Qf^|7;%LnVpw!8+TGi}ON9ljS){{WF&uA>T}AO2Fr^x;-PGZI z7ZdzM70`jd+3m$o-@5lg8)r(z%8S&5Bm1oXbllYWUZ}tccTUc+EtChZAKaXn+3773 z1t!E%nQ}B2`ErlrrK!(zSvOW|@1jSnZIYJB{NYL8yzJj}$0r#@W*1o|*)J`Snfm|d zA6C@ugX<87^dk6Kd-|1Yx!YJeTC)DS{z|j~q0F+b?KNsMll$PvSLrQQ6n& zZ4o#rgU1BG-7k@xE7GW8179(;xClhN9c|{}-ayQ#>*(Q3>}zMoL()0|=Etaf!#c{W zjTx6Ks!K4*fCmmDwRQ@VPCpfcIpCPr>DihEV)q?&(7<4zB=V;$s`I%Qk*V{2$L#l((9a^ zXTBu@s8_TaHOy^4Lk@s{{hSKl_S4`GU6B2O$DUgy+-SFt-^{g$zkHmyoFzo7y@@Ms;4k2UeSQ> z2u0x53=VDCZSr?>JM54Ydxkwz)Q*3uQB~0d3;Ldq4nVcgnZz#Bl^*hf)%@BUjRq%$ zz!mxKh$G7tY5eWDTz5U(!+D8SS9-)Z#~^0=ag?;-x3uF3BMEuwb_isFb3H*FUf2Cm zC2^CGQwc1>*ZFX|6O2%D5it6SFYJal#-V=;^y+?&CpS-oEa2vLHPyeP@wof$a;K)- zZxx}>WDF>HvpWe^CKbB5ZV>4T*bW3E3pTF8WMI6hPQY?cqGRL6f=lHih}3StA4ucJ z<%I{4&I#cN^w@KL^lKGbg9kSQ0*|3;6u|rjac1nAU#3rIcwJtHXEg}1bU?M1-_%nI zrWGf2yRkR9@URR2IA889OIlpvg0UfU?WttBxn6$7~q$eY-SG%t+IU4ah5p1zO6bVz-ZiYQxRfWcIF2^Nz@#C0y^w`;# zexxR!94%6n`Pr2?SgkXLxPM6S1!4|v!)ps)QoOW|@DCIG=I+?$tiD!w<3Q00*Jh{A zfFjwQj;zd5gK_xj0|Ak_nFhQCNcA+N*jnx;v2+gf#;1#cIqJGPZ>6lMoBI3Siw7|% z@S3epa8H)5Vb99Wg}@!!{l#^s6H9KlU8*=Lq0!;&_==Ejy9b&2Q0$lJ1Q=USFlI_g zEqctRj_IeAK~-UMXn$VXr@FzSMNXh}rn+L9IGOvwy4Dz=xSc z1KPZ0Q$+Jl%hjpSe8oRLC}F1Ati3BGkj1SL>%=vtuCuz5Wl!a%XN2*&xkkKi_ajZ+ zcZx{^E^*xZHz5QI?`v(mbqu~7;+5%H)FHyDFxErg(80D>d#coY=Ye`r+7}(duY6V1 zL)p&Hx0SKu(QDxe*JEX-3T0WWpRu>Nx-t~u|W z-qL$)@2D_YNyrv>A1iL?MGYr%*M8ALPRUR5{5)UYe=v_|ePHs0F5oi5LPbCg?8_eY z?b@MGQh3$)xh@BVCf*n?dh63{160iN2+?TT15xzuQuq`?RQ9jsndFGh%g%mj^99oQ#Ur6p1LH_IXd&$-ArVcnmF^vkf; zKKRTC`IgNlG)OiWaFS?FRumg6fWA^X*D>I!hHJ7cZ_bm;SJ^W+q9M<;%VpK=b`1oX zs4@z;mD&zfa8y3?UD;I=_<$OS^JXZc6~{h=2eYRABzW0%W*5$^u7)er+5fsWAp2W? zJ2|OgD?$zjjUvNp3qqjjbSj7%uijMQEL`UhFq&$i48;X*)?}EDjZ`20)WAM(zB|X>hEyIjstaX zo4wBUt(L`VZ%EA1phD{TR`}*vkm_b)W$kL*$FQ@w7G-u#?3)Lg!zLH6<<;VOpIA$( zRUf^sO`dTRrG937;izJ}p!R{s;Kj@BI<~}_lRGV22_9&*gZvP!(kLrh>1;muohr~^ zLI=mm>{#V@n99$L z`0`FI#QR5X2&0|NqT#ii+)-a$pYQ{d*lyigl3i^N$hMbBuH3_A6B3@*3=>ur*M8RA zJ~*kE3MiW*L#^x3PFQf>PC%}eBrTW{C)yCj!YFS6GAw*^k$?l?A1)IzyytzgIZxz6 z5aXW+Q}#++0(PndCbbo##LOHxOBDIUeKk#$#^H0TKW`(3QD6u{+6ov6T=gy-s6>Y+ zLsmP?z4+zGqS%pCL5fyqo!qy-Mc@JKj?ztZKs;PNY^T(sr@HFv+riGl)EJ560?wV; z+JtVmr#o1YP=E(1)%#_98FEpG7~gHO0d z#OvxgCt0w^VQ^>};K9b(EB2wwuv53vd5kwLyuv%)pn*%Lu)-srmZ##p%^<|Z>vY1x z;9;Qlu$v|C45+IIMYS9~7&`*Wb>z!&2s^t8N4HgWGahI-G9(NY^CpoN<0v>a5pGus z=7?E4XVP6Rw&WiXdbO&8E{lzti;(Nr=5D~zJCjgBC85q354-e0A4Hr^zNP%~nDv)L z`?n5&QBCY;5Uq^{;STnnysT0&fx?=vgVdfb$N)|{~`US6)+9Ol~lE(AHqFvmc0a>_r3+0x71($&e-LBZC^ z-i`IIPrnpwR^o&mr0zZh4(<#-?)1PM(ve8&+lCLV8-wK2SVipTx#aI@Ufs~!&Uu`v zc{Vs)l_=+O8J~TM6e3UmhLWy?Mc*g!T&d zY_W@=8|TTf#xVB~i*{r=ZdS4FcF(Ci*5{pJ3MXt2#I|Bwa{h{XYHtlCBG$N&ubM5Q zSwjcanHs$w{25Jkw4~B0*wv%jL}V&8%uxr^2$L7w@&T8AE}A{*$S|R_N(80i(`m@n z9-qGK$4x!G5z$Iv;GU>=PNu1>67;dp!;r;!QtEKic-bjFS2JwaXAtR&f8?>smI!iPeBdJ9 zK+pBG@z53M^HnRiM1{esYt*RoKh8whXOfC4$Uo?i*}(W?Cd^G8%q470ZJmC`3WK;Y z^yZsrL5;glOsBpQ1s5E z!o2COPlVzmL>HuD(z3p1)_-+Ng;UXvgoBzBVoMSLG=N5)cigNZIaucjeeAU%R9<{v zaOZZk^Y$9T)_>ooe|{UJr{{l_PRN}5 zC8{d-lVgX16mQW{!OTS-LkZPUZ6EOz6)Gn$hrT&c;o1P^4nXotj!u)8h+ALd+?;^L z`kK#)YLy6u#HeLeVKay*&h3JXD!l!IDYw#bE4c}2Z4l5D$Ht<9c#?@wCT6)zI$`nP z5km-#Z z-W;115`FRH8SwLpX(Gn$`nk#58@D`dr7XE?LoEJ-^_+S*vVj8My^YEFX2+?`SJCmg z-!4i%q;}acd9*O1l`_GWe1$$8O5toyplqA*cT5?B&IK;x+Hc;f>s+b@wJyDVt1$_~ zgsajH{g49FQp4UOCP{%$Vn>|Za_g@(u9{wCfnDOQP#+m>D_p!V^Zep^!4J>9RpsW% z17?G&f{emRCAGDGYB>s>J!js9tJXa(xxJTMUxw8a>rVql48K_dcML;O2MY zzcrqo+8MyhMen3Dq3`kr;aA1CC5yzMv9uvKgeQebA9<@{ER&wuv(1kg96@A2lbwnD zP`#QWY%d6F?WqRgqEb3W(e{h!Kxw>soBOGGEvB0SO6sg24+eb8gxW+0!6z>3RPa|z zp{Y7BCfHf9=B|iai{1{iu0n`FUOd|1ryG9O=16Ec6+DLrxS=-^Y{Ao?iNHoOmXHHqNZ9_=j`gBQf zL5G8|m9p2#&ktXiT`vXWN<|ilCJAc7z_^Kai(PkDZjkAdfnGh6v%Y)Np#$ERrLKV4 z10I+mfnt6I_(jvj*ZhVRTo0xt-}K&bxNXTzS>^=a#Ffvr8WZR@9}8`qhFh{uP4K}} z>e72H;ab4x!dD0CNS;Laxy4#0UK7Ec@ou*tum9#f6f`qLJ^piV_`m%8U-y6L6jzb| zJHX%DQvYT6^F9PwO#afO`q=PstKM(ZJxJF0XHUkz8uuO>|Gi`Awp;Iza&i6<u7)(DRS+9_!HGc<*U{;r&gOKDPe5)cb7w@P literal 0 HcmV?d00001 diff --git a/docs/src/index.md b/docs/src/index.md index e10b99d..a1c42d6 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1 +1,122 @@ # Introduction + +This package provides support for Excel files under the +[FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package. + +It provides functionality to read simple tabular data from +an Excel (.xlsx) file and to save simple tabular data to an +Excel file. + +For more extensive functionality when reading and writing Excel files, +consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/). +Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable` +and `writetable`. + +# Usage + +## Load an Excel file + +To read an Excel file into a `DataFrame`, use the following julia code: + +```julia +using ExcelFiles, DataFrames + +df = DataFrame(load("data.xlsx", "Sheet1")) +``` + +The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`: + +```julia +using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly + +# Load into a DataTable +dt = DataTable(load("data.xlsx", "Sheet1")) + +# Load into an IndexedTable +it = IndexedTable(load("data.xlsx", "Sheet1")) + +# Load into a TimeArray +ta = TimeArray(load("data.xlsx", "Sheet1")) + +# Load into a TS +ts = TS(load("data.xlsx", "Sheet1")) + +# Plot directly with Gadfly +plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line) +``` + +The `load` function takes a number of arguments and keywords: + +```julia + FileIO.load( + source::String, + [sheet::String, + [columns::String]]; + [first_row::Int], + [column_labels::Vector{String}], + [header::Bool], + [normalizenames::Bool] + ) +``` + +### Arguments: + +* `source`: The name of the file to be loaded. +* `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. +* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. + +### Keywords: + +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. +* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. +* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. +* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`. + +## Save an Excel file + +The following code saves any iterable table as an excel file: +```julia +using ExcelFiles + +save("output.xlsx", it) +``` +This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). + +The `save` function takes a number of arguments and keywords: + +``` + FileIO.save( + source::String; + [overwrite::Bool] + ) +``` + +### Arguments: + +* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` + +### Keywords: + +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` + +## Using the pipe syntax + +The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code: + +```julia +using ExcelFiles, DataFrame + +df = load("data.xlsx", "Sheet1") |> DataFrame +``` + +To save an iterable table, one can use the following form: + +```julia +using ExcelFiles, DataFrame + +df = # Aquire a DataFrame somehow + +df |> save("output.xlsx") +``` + +The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file. diff --git a/src/ExcelFiles.jl b/src/ExcelFiles.jl index 9b7eb6a..bce2a06 100644 --- a/src/ExcelFiles.jl +++ b/src/ExcelFiles.jl @@ -1,7 +1,7 @@ module ExcelFiles -using ExcelReaders, XLSX, IteratorInterfaceExtensions, TableTraits, DataValues +using XLSX, IteratorInterfaceExtensions, TableTraits, DataValues using TableTraitsUtils, FileIO, TableShowUtils, Dates, Printf import IterableTables @@ -9,7 +9,8 @@ export load, save, File, @format_str struct ExcelFile filename::String - range::String + sheet::Union{Nothing,String} + columns::Union{Nothing,String} keywords end @@ -29,100 +30,43 @@ end Base.Multimedia.showable(::MIME"application/vnd.dataresource+json", source::ExcelFile) = true -function fileio_load(f::FileIO.File{FileIO.format"Excel"}, range; keywords...) - return ExcelFile(f.filename, range, keywords) +function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet, columns; kw...) + return ExcelFile(f.filename, sheet, columns, kw) +end +function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet; kw...) + return ExcelFile(f.filename, sheet, nothing, kw) +end +function fileio_load(f::FileIO.File{FileIO.format"Excel", String}; kw...) + return ExcelFile(f.filename, nothing, nothing, kw) end -function fileio_save(f::FileIO.File{FileIO.format"Excel"}, data; sheetname::AbstractString="") +function fileio_save(f::FileIO.File{FileIO.format"Excel"}, data; kw...) cols, colnames = TableTraitsUtils.create_columns_from_iterabletable(data, na_representation=:missing) - return XLSX.writetable(f.filename, cols, colnames; sheetname=sheetname) + return XLSX.writetable(f.filename, cols, colnames; kw...) end IteratorInterfaceExtensions.isiterable(x::ExcelFile) = true TableTraits.isiterabletable(x::ExcelFile) = true -function gennames(n::Integer) - res = Vector{Symbol}(undef, n) - for i in 1:n - res[i] = Symbol(@sprintf "x%d" i) - end - return res -end - -function _readxl(file::ExcelReaders.ExcelFile, sheetname::AbstractString, startrow::Integer, startcol::Integer, endrow::Integer, endcol::Integer; header::Bool=true, colnames::Vector{Symbol}=Symbol[]) - data = ExcelReaders.readxl_internal(file, sheetname, startrow, startcol, endrow, endcol) - - nrow, ncol = size(data) - - if length(colnames) == 0 - if header - headervec = data[1, :] - NAcol = map(i -> isa(i, DataValues.DataValue) && DataValues.isna(i), headervec) - headervec[NAcol] = gennames(count(!iszero, NAcol)) - - # This somewhat complicated conditional makes sure that column names - # that are integer numbers end up without an extra ".0" as their name - colnames = [isa(i, AbstractFloat) ? ( modf(i)[1] == 0.0 ? Symbol(Int(i)) : Symbol(string(i)) ) : Symbol(i) for i in vec(headervec)] +function _readxl(file::ExcelFile) + if isnothing(file.columns) + if isnothing(file.sheet) + table=XLSX.readtable(file.filename, "Sheet1"; file.keywords...) else - colnames = gennames(ncol) + table=XLSX.readtable(file.filename, file.sheet; file.keywords...) end - elseif length(colnames) != ncol - error("Length of colnames must equal number of columns in selected range") + else + table=XLSX.readtable(file.filename, file.sheet, file.columns; file.keywords...) end - - columns = Array{Any}(undef, ncol) - - for i = 1:ncol - if header - vals = data[2:end,i] - else - vals = data[:,i] - end - - # Check whether all non-NA values in this column - # are of the same type - type_of_el = length(vals) > 0 ? typeof(vals[1]) : Any - for val = vals - type_of_el = promote_type(type_of_el, typeof(val)) - end - - if type_of_el <: DataValue - columns[i] = convert(DataValueArray{eltype(type_of_el)}, vals) - - # TODO Check wether this hack is correct - for (j, v) in enumerate(columns[i]) - if v isa DataValue && !DataValues.isna(v) && v[] isa DataValue - columns[i][j] = v[] - end - end - else - columns[i] = convert(Array{type_of_el}, vals) - end + colnames=Vector{Symbol}(undef, length(table.data)) + for (k, v) in table.column_label_index + colnames[v] = Symbol(k) end - - return columns, colnames + return table.data, colnames end function IteratorInterfaceExtensions.getiterator(file::ExcelFile) - column_data, col_names = if occursin("!", file.range) - excelfile = openxl(file.filename) - - sheetname, startrow, startcol, endrow, endcol = ExcelReaders.convert_ref_to_sheet_row_col(file.range) - - _readxl(excelfile, sheetname, startrow, startcol, endrow, endcol; file.keywords...) - else - excelfile = openxl(file.filename) - sheet = excelfile.workbook.sheet_by_name(file.range) - - keywords = filter(i -> !(i[1] in (:header, :colnames)), file.keywords) - startrow, startcol, endrow, endcol = ExcelReaders.convert_args_to_row_col(sheet; keywords...) - - keywords2 = copy(file.keywords) - keywords2 = filter(i -> !(i[1] in (:skipstartrows, :skipstartcols, :nrows, :ncols)), file.keywords) - - _readxl(excelfile, file.range, startrow, startcol, endrow, endcol; keywords2...) - end - + column_data, col_names = _readxl(file) return create_tableiterator(column_data, col_names) end diff --git a/test/runtests.jl b/test/runtests.jl index d1d0372..0415b98 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,24 +1,36 @@ using ExcelFiles -using ExcelReaders using IteratorInterfaceExtensions using TableTraits using TableTraitsUtils using Dates +using XLSX using DataValues using DataFrames using Test +data_directory = joinpath(dirname(pathof(ExcelFiles)), "..", "data") +@assert isdir(data_directory) + @testset "ExcelFiles" begin - filename = normpath(dirname(pathof(ExcelReaders)), "..", "test", "TestData.xlsx") + filename = joinpath(data_directory, "TestData.xlsx") efile = load(filename, "Sheet1") - @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1.0"A"true2.09.03.0"FF"#NA2015-03-03T00:00:001965-04-03T00:00:00#DIV/0!#DIV/0!#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#N/A#N/A3.4
2.0"CCC"falsefalse#NA3.5"GGG"#NA1988-04-09T00:00:0019:00:00#REF!#NAME?"HKEJW"
2.5"DDDD"true1.5true4.0"HHHH"false15:02:00#NA#NAME?#NA#NA
" + # XLSX.jl v0.10.4 + @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1"A"true293"FF"#NADate("2015-03-03")Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2"CCC"falsefalse#NA3.5"GGG"#NADate("1988-04-09")19:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4"HHHH"false15:02:00#NA#NA#NA#NA
" + + # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) +# @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1.0"A"true293.0"FF"#NA2015-03-03Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2.0"CCC"falsefalse#NA3.5"GGG"#NA1988-04-0919:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4.0"HHHH"false15:02:00#NA#NA#NA#NA
" + + # XLSX.jl v0.10.4 + @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"string\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"string\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"string\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"string\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" - @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2.0,\"Mixed with NA\":9.0,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03T00:00:00\",\"Dates with NA\":\"1965-04-03T00:00:00\",\"Some errors\":{\"errorcode\":7},\"Errors with NA\":{\"errorcode\":7},\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":{\"errorcode\":42},\"Errors with NA\":{\"errorcode\":42},\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09T00:00:00\",\"Dates with NA\":\"19:00:00\",\"Some errors\":{\"errorcode\":23},\"Errors with NA\":{\"errorcode\":29},\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":{\"errorcode\":29},\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" + # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) +# @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" - @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2.0 │ 9.0 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed" +# This test is truncated (... with 8 more columns:) so probably isn't robust - although it passes locally. +# @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2 │ 9 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed" @test TableTraits.isiterabletable(efile) == true @test IteratorInterfaceExtensions.isiterable(efile) == true @@ -27,7 +39,7 @@ using Test @test isiterable(efile) == true - full_dfs = [create_columns_from_iterabletable(load(filename, "Sheet1!C3:O7")), create_columns_from_iterabletable(load(filename, "Sheet1"))] + full_dfs = [create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=3)), create_columns_from_iterabletable(load(filename, "Sheet1"))] for (df, names) in full_dfs @test length(df) == 13 @test length(df[1]) == 4 @@ -42,16 +54,13 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test eltype(df[11]) == ExcelReaders.ExcelErrorCell - @test df[12][1][] isa ExcelReaders.ExcelErrorCell - @test df[12][2][] isa ExcelReaders.ExcelErrorCell - @test df[12][3][] isa ExcelReaders.ExcelErrorCell - @test df[12][4] == NA + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] end - df, names = create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=false)) - @test names == [:x1,:x2,:x3,:x4,:x5,:x6,:x7,:x8,:x9,:x10,:x11,:x12,:x13] + df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false)) + @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O] @test length(df[1]) == 4 @test length(df) == 13 @test df[1] == [1., 1.5, 2., 2.5] @@ -64,19 +73,14 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test isa(df[11][1], ExcelReaders.ExcelErrorCell) - @test isa(df[11][2], ExcelReaders.ExcelErrorCell) - @test isa(df[11][3], ExcelReaders.ExcelErrorCell) - @test isa(df[11][4], ExcelReaders.ExcelErrorCell) - @test isa(df[12][1][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][2][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][3][], ExcelReaders.ExcelErrorCell) - @test DataValues.isna(df[12][4]) + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] + @test DataValues.isna(df[12][4]) good_colnames = [:c1, :c2, :c3, :c4, :c5, :c6, :c7, :c8, :c9, :c10, :c11, :c12, :c13] - df, names = create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=false, colnames=good_colnames)) + df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false, column_labels=good_colnames)) @test names == good_colnames @test length(df[1]) == 4 @test length(df) == 13 @@ -90,15 +94,10 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test isa(df[11][1], ExcelReaders.ExcelErrorCell) - @test isa(df[11][2], ExcelReaders.ExcelErrorCell) - @test isa(df[11][3], ExcelReaders.ExcelErrorCell) - @test isa(df[11][4], ExcelReaders.ExcelErrorCell) - @test isa(df[12][1][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][2][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][3][], ExcelReaders.ExcelErrorCell) - @test DataValues.isna(df[12][4]) + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] + @test DataValues.isna(df[12][4]) # Test for saving DataFrame to XLSX input = (Day = ["Nov. 27","Nov. 28","Nov. 29"], Highest = [78,79,75]) |> DataFrame @@ -114,7 +113,7 @@ using Test @test input == output rm("file.xlsx") - df, names = create_columns_from_iterabletable(load(filename, "Sheet1", colnames=good_colnames)) + df, names = create_columns_from_iterabletable(load(filename, "Sheet1"; column_labels=good_colnames)) @test names == good_colnames @test length(df[1]) == 4 @test length(df) == 13 @@ -128,22 +127,25 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test isa(df[11][1], ExcelReaders.ExcelErrorCell) - @test isa(df[11][2], ExcelReaders.ExcelErrorCell) - @test isa(df[11][3], ExcelReaders.ExcelErrorCell) - @test isa(df[11][4], ExcelReaders.ExcelErrorCell) - @test isa(df[12][1][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][2][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][3][], ExcelReaders.ExcelErrorCell) - @test DataValues.isna(df[12][4]) + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] + @test DataValues.isna(df[12][4]) -# Too few colnames - @test_throws ErrorException create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=true, colnames=[:c1, :c2, :c3, :c4])) +# Too few column labels + # XLSX.jl v0.10.4 + @test_throws AssertionError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) + + # XLSX.jl v0.11.0 +# @test_throws XLSX.XLSXError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) # Test for constructing DataFrame with empty header cell - data, names = create_columns_from_iterabletable(load(filename, "Sheet2!C5:E7")) - @test names == [:Col1, :x1, :Col3] + data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E")) + @test names == [:Col1, Symbol("#Empty"), :Col3] + + # XLSX.jl v0.11.0. The `normalizenames` keyword not available in 0.10.4 +# data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E"; normalizenames=true)) +# @test names == [:Col1, :_Empty, :Col3] end From 6e038cb20066c06e32dc1555b4eba4a4f3e8cec1 Mon Sep 17 00:00:00 2001 From: TimG1964 Date: Fri, 14 Nov 2025 11:08:15 +0000 Subject: [PATCH 2/7] Correct minor error in docs --- README.md | 8 +++++--- data/TestData.xlsx | Bin 12907 -> 12924 bytes docs/src/index.md | 12 +++++++----- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bf8add9..6f037d6 100644 --- a/README.md +++ b/README.md @@ -96,20 +96,22 @@ This will work as long as it is any of the types supported as sources in Iterabl The `save` function takes a number of arguments and keywords: -``` +```julia FileIO.save( source::String; + [sheetname::String], [overwrite::Bool] ) ``` #### Arguments: -* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` +* `source`: The name of the file to be created on save. #### Keywords: -* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` +* `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`. +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`. ### Using the pipe syntax diff --git a/data/TestData.xlsx b/data/TestData.xlsx index c5c19f448809978bf49a481c80318574cb96f3e8..d188f4e53acb532a45e43481bfc5b8a4b602dc28 100644 GIT binary patch delta 2934 zcmZ8jc{CJ`7M?LgX1>uC07pca`^IfnwX7v3n*_&?gS-6>TY7RCO zr?xfu4sRzmXm8fds7)}X5r$|Bf`L-+zGgDsCR!RP&@YvIMM^wD+hy z-97^XIAb-1%)>d(*0~Omd-zBjSGcr7QKChjijt|{!LpBCtt+!du{4PS^7AoQX_?P( zZoT~avUc3QnSASIDwok0%#X0y2Aquk(>$lIF!8f`9i;|RC?EcF)KM#4wq%9q}W zmweRXkskEBpy3Fo!GPv)@9_l7uvf*~jd!3Q^{NB~kv3s(U0CfI&CId}(dTElU^1G1 zJFoLNINLz*xa)K0`$+LRJ%L3h&Y@Ss(`3T%8kIzOQ;q1-;tU02!a{VOU8*YTyxG}? zHTDF(T@7hDkH-xtgnr0I_)c)LLJ|8^F$%j*IW}~+U7bGJGp=2_`*ZNc-67*T+2uxl z$V(4PbfKBnf`~ap8uC_5`;aAoKPEaieqQEZr(*@9iQbVFYNXmzJpLzcf~3lv$ZEKs zFq;PmdF;xlw-G2AtEpT5V|Y~1>1y!v+#PCE{jq54hM&X)u|K%kn}M0&2&D zGn8B$j*zI3Bt|5m7R9`F#vEtrX`lOR30`bhm?6z>u*`j`E!O~s0ZQGS{FJVXYWMkW zddMqk6s@UA^0=$K?fR*os@_z)=%wFKH_e4)bl*t#XC=a6l7+gN!{{+xy_xGeTvYWe$_- zx^8;8!UJ^Dwe1=gf#pmN6YEf6Jfp98EjVjiitLO)0dE-o01foqyE`+yeop;i2Z`ATD(amD?oOu>4Pcd)S)7vgAjx%b-u_pgIj;#QA}p%oUkz1agpS%e+c z;{&g#^nkLaBoQ|D#=OER6|1k2+1IiVQ;(sfXY-kj_Hn|^c(IFGiK%tEG79LweoY9( zkC#jP2Cy4dkLT2#rMG!Dt{@Weod(RPezBPmutc1I@!HWy$MG1!4b$y{O~Xik zkvV5G`BSQ?a}crD0w+AYXl@FO?GQ3EJ|Ta}gsJTe$4cHaNll@Tt4np+NOnCuzv`27 zY_d)0^?FNFFs15Du|XeRFZaD$P}nH<~jn zz!NujZ}bF9tv+|I2ocjJPCk(O;X`~#!%lQvJ$-Q>*fEELrIXe=x9w_S8<+@5j%m5g zcY{e?+N~Yami#3~D(>HYk?QaB2=S2)`5H{QK>z@N9RPs-cElibXjo*><2Zs1D(-9JyL@ll?J*${O`U0vyxVqW@6H-tYtC~=(!ddWY_ zyki#d>N}%=UiUO5xOr}U$j3|O+2YSCI{6y7WXgo2%35f^GD$m>o9;q-ART$RVKljw z#kJ@O+2Q-UFd<*VE2QvqEDPf&P2@SgJe|5Z2D@tf$<1*-_Hd^T5G3`I4REvck^IA0{g` zl{=<~D96`5$RNu0KRqeYHxB9qKfu)EkjvirH>`~`!6 z{2|`hot1W3RAXJ1z zbg~Ff*z8g{#xOmzf31Bu#?8FlIEQ1aQ>e=xsk&r;Fe@3H#Kl$ReOBkdf7diLc1A~39cGvY%*|!I@D$Ml8A~~xr zpQl(IW$@)d_9;_lxtw{78x^YEx>c!5Fc>lB5;Di!NIa76+iVNYyA!Lv6{i`E5_dQ* zU}>ke)>ofJ2QIN)u4%S|%a!;+XSnu>+V3|bW8MY4B4xI}U*1jCCyW~{9K2-CElLgC z;vRd!bE1^xsJLw&%e>usc(wKw@{6Zr^;g4?0lH^u^_XGyGlybEwkwCpQrNfZP4Yo`@Si2ritXTZBK@arW4>b z{=KW3NcoZT>nn=)qs)q~9OxV@j6D64_WNmYl@Q5xj+L2UTJSCOfU%^Ohp&N@c93BG zxO6{=)s)tkkFPH~a~eqUxOi6?-u21q2v)LT&G{h+?D+BWtL9GQr0%BbwxHyON3`|9 zrlxkXlBg6TbiHP2T0K|FcX7=?%;p()D#BP&?ow!>rbrYrg9(r9w(TO|6?=yGxU-kW z9pnS~F9j0E!a-A<%6ZcAl;@|7TmqkfeL84}3XU8jY%R61WQ-)S3(-oCp+o27xPxoK z7T893lfi3|2g~sbK?a&0EvtYWuX2O9Itb;?oKy`fav&d+s~TeN^{?b_VTm}u(6NVi ze|mO$PH*hL(OdU!J_5h4$#ZY1d}X~>EnJ|psN^MDUKReT0QaU1ceyFtsgzjK;6g30 zRc!jp7VujC>%EJQW~Hg))>-i542h;^!yU2FC`bA@MlpD^qsXyHhqJen$0y&IHA{9= zDdC#l)HwPpqu|#)oy!NA!dAD=-r|qwRL-(A?Nmc-gm~OeITHIlV|cx?LKyfzk7@-+Lp#O;3oUG=o0Gcml&t>U zop-Th^Yuih57%U&-y2exYca!Z+)`B{{Xa9OzdXVids}73CS~I(0+k!)E4b6U%{)76 zs03+tfX~I|Oh;8#-AenB#By8|8V!x8ImdNSLP;(f;da5HIJBu!jj>jIrvj6!dkv(ciFm(f%T?>G2Qal>KZHOmx>DFuGaoyfV+Ua{)S&+@@ z^m*nQw)^(;V8MGhFbMb@Su4ri!BxblF|Bz__3c)BIc+g6Zn~Rw4Ql0Qzgcf=HeJMFo{smnfsqEp+JTFoM(I!X`?dfas><~b|B8i3;-BfNW##_V}; z&&;$E@D7Iz@Wz8M7XvpPvjzXUQCPO;whGozI}dr0w0mL`m9HPYzn$msJ?z$`n=OC zNXGW&bocq--GEl-p!i?apNxCd#+eC2vu+t$3f(s#y`H>mJ@%KH-FjgA@lp9m;=4Ea z)R=)Q%9rVJ(?4MEM>Lbjvw}TNgQ@8OoTk>%GZ`_wKj6nDF)G4YMeH8}8{m z=|ju^3hJ0p*RoW5rD1vJ= z2a$Rc`-C1JS$0={<-Og09~Bh-Qi+0*)a@sQ%A>2^r`tUnuragD>*SGj>LKI?0SidIdiq8<9VR4r6+ib8CK(Cf4u5Xo0&&9#2YSv1k6 zSLd{CDPJygvW>L`o4lN?L1t{)bk2)rWf74i zNi`d*!IZmE8!iEz@XPyFsUN{>ou?>4NNA#Pt1!+zm2EbY2k%#15T7!~n&gj|R=7B` zE3Sv@>HV;K>r~TGK2Vb_a}b-MO*(|(cVMn2Rw9d3{vj{Mc-HAk$eT;&9Fs|IJWE4f zlvC=$M)}+UUTl4<`o<^3$F6~f^%qY9ytq2MBt#P0wA>%X3PrlJ+uEEvN~R>zybY?k zcyP0(i#2m=HB*mID|4eU1D#59OWVAQ(<2Y=FvLxnW}RJ;8s)R)E13~(ye2EB)72uO zM*UnM9i8hKwiG8LhP=bOL0V*h-7XtSbdn5Mbp^{rx^cN!3K#{)J)nqAfS_maH@Jnb zMiDWx?iy^8Cori_tFE7nai|QFAHWxqc30UZ@bu4wCODsZII&D92qKjcYA=~$PEx6g z)R<;W(%OKEuIC=4by1Kh)JKt zYR2n#x`pjri5H}cju7ce>)1(Jr(H8q4Lg}-cARcxXSWiWTfDx3k!B3_=nJR`fVjj7 zD%z}NS7Got94toN@Ux|jbs_&CZad0>+8n&^iz}hf|zv5Ge z9puYZnKJtBe!IE1`Qh-?k+D^p8i$v@Rqc-l^%-M#5RPQHo-1%ac5zhR@WQ)@tV75IU5mzW2G%DGZzT$o-6HV{a5~!XYK-b_Uam zGGez4D9=K~taLfTmh!ZnJ$&HCYx0$QuIdH59=bhmkA;%`ZzMC>`e8v4M>|Lu5kU=X z$q4YKh^)>CA7bOL5C~`L283$L>Ob17Qg*6Fa1|pWpoR`R!leHyfvsNueDR1bcu9ws zyRro)JNfml9^;(g=8OW+p9QBvL|-O zIlAOX$l0JFR@%b73{B~((IEY4TZs1Nw})Ac`CP>+3Ob9fB=g5dF$Kj(I7J_-yltM@ zb7>Gd8$GYfDs4u<>8qcv=19AXrgRhx<+1mV;mQGs_08(4kAqW(s7bj^K1Q|*2fbgf zwQ@NF!(HbZvT<8d2<6a-h198iZguUt%fW|#DT~AeZN|h^0BC6cU_7nU<)uTl|Df8_ z_^OaJXR?ZYNmWo9RJCfq%{lgN@a_i>XIgLl<|~DQJ(z3UTXKCIe$`?0hO63A= zT__FeBh)6fy%R7ssUAM)`|2lg77ZAu%o$H({Du!$R=%VL`oYSaZ2kh*n+a*5B(GR9 zWP`=_eWG2fLc(LjSF+AlUlM|%nK?}d2a}>zGr;U0KC8IDWDsCXzn7uP+w`-uP*9_j z>5VfF1esublYe&}hqi0IP9@d=yu3qG_=BNOy;^~NLxhB%M%}3c%Y&b_bnb9mgngX7 zp3Y5O+;@L`99UpymV5Qcg%?R#%xh#6=`9XhUmBih#*8dSB&X$n{YC1}?DUZjYLVkC zmuDb-q2KL$qTEanZySrmJQ<|Vhs~V5y-c;XKd_2!oqqh-^4c&^{;WkiJ?#mwrB1O6 zr7tEZ>;uVYp+?#cS!R`b@R34H8WWSfwaX^Q{`z$^87_crwK(RW@sKS|j7)YCmsF%w zKA0si>rAd_(;(ke|J!AhESBMaPidQi63jC(gqS=$!@sb!Z{rbm2q{0t)_r;Qc9+Myy33=>~{v2oybBis&jONwiRu xrF%gPQogl;y)`8UY7s> diff --git a/docs/src/index.md b/docs/src/index.md index a1c42d6..1a79d63 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -84,20 +84,22 @@ This will work as long as it is any of the types supported as sources in Iterabl The `save` function takes a number of arguments and keywords: -``` +```julia FileIO.save( source::String; + [sheetname::String], [overwrite::Bool] ) ``` -### Arguments: +#### Arguments: -* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` +* `source`: The name of the file to be created on save. -### Keywords: +#### Keywords: -* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` +* `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`. +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`. ## Using the pipe syntax From 884334d851a7bf2cb8bb17a49e315089b89c4750 Mon Sep 17 00:00:00 2001 From: TimG1964 Date: Mon, 23 Feb 2026 15:11:22 +0000 Subject: [PATCH 3/7] Add transposed tables capability --- README.md | 20 ++++++++++++-------- src/ExcelFiles.jl | 36 +++++++++++++++++++++++++++++++++--- test/runtests.jl | 2 +- 3 files changed, 46 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 6f037d6..28503d4 100644 --- a/README.md +++ b/README.md @@ -62,11 +62,13 @@ The `load` function takes a number of arguments and keywords: FileIO.load( source::String, [sheet::String, - [columns::String]]; + [range::String]]; [first_row::Int], + [first_column::Int], [column_labels::Vector{String}], [header::Bool], - [normalizenames::Bool] + [normalizenames::Bool], + [transpose::Bool] ) ``` @@ -74,14 +76,16 @@ The `load` function takes a number of arguments and keywords: * `source`: The name of the file to be loaded. * `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. -* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. +* `range`: Determines which rows/columns to read. Given as a column range like `"A:F"` when `transpose=false` or as a row range like `"2:7"` when `transpose=true`. For example, `"B:D"` will select columns B, C and D. If `range` is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid `sheet` **must** be specified when specifying `range`. #### Keywords: -* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. -* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. -* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. -* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false` +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. This keyword will be ignored if `transpose=true`. +* `first_column`: Indicates the first column of the data table to be read. For example, `first_column=5` or `first_column="E"` will look for a table starting at sheet column 5 ("E"). If first_row is not given, the algorithm will look for the first non-empty row in the sheet. This keyword will be ignored if `transpose=false`. +* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row (or column if `transpose=true`) of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. +* `column_labels`: Specifies column names for the header of the table. If `column_labels` is given and `header=true`, the headers given by `column_labels` will be used, and the first row (or column if `transpose=true`) of the table (containing headers) will be ignored. +* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`. +* `transpose`: Set to `true` to read a transposed table organised in rows rather than columns. Default=`false`. ### Save an Excel file @@ -92,7 +96,7 @@ using ExcelFiles save("output.xlsx", it) ``` -This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). +This will work as long as `it` is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). The `save` function takes a number of arguments and keywords: diff --git a/src/ExcelFiles.jl b/src/ExcelFiles.jl index bce2a06..4b256fa 100644 --- a/src/ExcelFiles.jl +++ b/src/ExcelFiles.jl @@ -48,16 +48,46 @@ end IteratorInterfaceExtensions.isiterable(x::ExcelFile) = true TableTraits.isiterabletable(x::ExcelFile) = true +function dropkey(p::Base.Pairs, key::Symbol) + nt = NamedTuple(p) # convert to NamedTuple + NamedTuple{filter(!=(key), keys(nt))}(nt) +end + function _readxl(file::ExcelFile) + kw=NamedTuple(file.keywords) + if haskey(file.keywords, :transpose) + if file.keywords[:transpose]==true + haskey(kw, :first_row) && (kw=NamedTuple{filter(!=(:first_row), keys(kw))}(kw)) + f=XLSX.readtransposedtable + else + haskey(kw, :first_column) && (kw=NamedTuple{filter(!=(:first_column), keys(kw))}(kw)) + f=XLSX.readtable + end + kw=NamedTuple{filter(!=(:transpose), keys(kw))}(kw) + else + haskey(kw, :first_column) && (kw=NamedTuple{filter(!=(:first_column), keys(kw))}(kw)) + f=XLSX.readtable + end if isnothing(file.columns) if isnothing(file.sheet) - table=XLSX.readtable(file.filename, "Sheet1"; file.keywords...) + table=f(file.filename; kw...) else - table=XLSX.readtable(file.filename, file.sheet; file.keywords...) + table=f(file.filename, file.sheet; kw...) end else - table=XLSX.readtable(file.filename, file.sheet, file.columns; file.keywords...) + table=f(file.filename, file.sheet, file.columns; kw...) end +# else +# if isnothing(file.columns) +# if isnothing(file.sheet) +# table=XLSX.readtable(file.filename; dropkey(file.keywords, :transpose)...) +# else +# table=XLSX.readtable(file.filename, file.sheet; dropkey(file.keywords, :transpose)...) +# end +# else +# table=XLSX.readtable(file.filename, file.sheet, file.columns; dropkey(file.keywords, :transpose)...) +# end +# end colnames=Vector{Symbol}(undef, length(table.data)) for (k, v) in table.column_label_index colnames[v] = Symbol(k) diff --git a/test/runtests.jl b/test/runtests.jl index 0415b98..79b3ab0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -60,7 +60,7 @@ data_directory = joinpath(dirname(pathof(ExcelFiles)), "..", "data") end df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false)) - @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O] + @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O] @test length(df[1]) == 4 @test length(df) == 13 @test df[1] == [1., 1.5, 2., 2.5] From 82cd396dc69bf0f85a3289e1af35481858a99596 Mon Sep 17 00:00:00 2001 From: TimG1964 Date: Mon, 23 Feb 2026 15:23:50 +0000 Subject: [PATCH 4/7] Update for XLSX.jl v0.11.0 --- test/runtests.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 79b3ab0..62da573 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -18,16 +18,16 @@ data_directory = joinpath(dirname(pathof(ExcelFiles)), "..", "data") efile = load(filename, "Sheet1") # XLSX.jl v0.10.4 - @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1"A"true293"FF"#NADate("2015-03-03")Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2"CCC"falsefalse#NA3.5"GGG"#NADate("1988-04-09")19:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4"HHHH"false15:02:00#NA#NA#NA#NA
" +# @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1"A"true293"FF"#NADate("2015-03-03")Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2"CCC"falsefalse#NA3.5"GGG"#NADate("1988-04-09")19:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4"HHHH"false15:02:00#NA#NA#NA#NA
" # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) -# @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1.0"A"true293.0"FF"#NA2015-03-03Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2.0"CCC"falsefalse#NA3.5"GGG"#NA1988-04-0919:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4.0"HHHH"false15:02:00#NA#NA#NA#NA
" + @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1.0"A"true293.0"FF"#NA2015-03-03Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2.0"CCC"falsefalse#NA3.5"GGG"#NA1988-04-0919:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4.0"HHHH"false15:02:00#NA#NA#NA#NA
" # XLSX.jl v0.10.4 - @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"string\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"string\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"string\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"string\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" +# @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"string\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"string\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"string\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"string\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) -# @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" + @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" # This test is truncated (... with 8 more columns:) so probably isn't robust - although it passes locally. # @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2 │ 9 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed" @@ -134,10 +134,10 @@ data_directory = joinpath(dirname(pathof(ExcelFiles)), "..", "data") # Too few column labels # XLSX.jl v0.10.4 - @test_throws AssertionError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) +# @test_throws AssertionError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) # XLSX.jl v0.11.0 -# @test_throws XLSX.XLSXError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) + @test_throws XLSX.XLSXError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) # Test for constructing DataFrame with empty header cell data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E")) From fa7e83954c81f144584ea4d1e642f75e66e2bf91 Mon Sep 17 00:00:00 2001 From: Tim Gebbels Date: Mon, 20 Apr 2026 20:31:54 +0100 Subject: [PATCH 5/7] Switch to Tables.jl and update XLSX.jl compat. --- Project.toml | 16 +-- data/TestData.xlsx | Bin 12924 -> 0 bytes docs/src/index.md | 114 +++++++++++++---- src/ExcelFiles.jl | 107 +++++++--------- test/data/TestData.xlsx | Bin 0 -> 13598 bytes test/runtests.jl | 264 ++++++++++++++++++++-------------------- 6 files changed, 266 insertions(+), 235 deletions(-) delete mode 100644 data/TestData.xlsx create mode 100644 test/data/TestData.xlsx diff --git a/Project.toml b/Project.toml index ee12dfb..3e8272a 100644 --- a/Project.toml +++ b/Project.toml @@ -3,26 +3,16 @@ uuid = "89b67f3b-d1aa-5f6f-9ca4-282e8d98620d" version = "1.0.1-DEV" [deps] -DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -IterableTables = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d" -IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -TableShowUtils = "5e66a065-1f0a-5976-b372-e0b8c017ca10" -TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -TableTraitsUtils = "382cd787-c1b6-5bf2-a167-d5b971a19bda" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] -DataValues = "0.4.11" FileIO = "1" -IterableTables = "0.8.3, 0.9, 0.10, 0.11, 1" -IteratorInterfaceExtensions = "0.1.1, 1" -TableShowUtils = "0.2" -TableTraits = "0.3.1, 0.4, 1" -TableTraitsUtils = "0.3, 0.4, 1" -XLSX = "0.10, 0.11" +Tables = "1" +XLSX = "0.11" julia = "1" [extras] diff --git a/data/TestData.xlsx b/data/TestData.xlsx deleted file mode 100644 index d188f4e53acb532a45e43481bfc5b8a4b602dc28..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12924 zcmeHtWmH_-(rx4JjeBr+cXxujyA#|YXz(OJAUMI@Ef8FSgkV8}LvU-{<#lq;`*IHF z+#m19_x{|w$LPJgdw0#zd#zb(R;?-(1t@4B02Tlb0077VrGzV^aS#B25F7x21%QXt zmvDA+w{&th*6?{@>1N36?dU+33k^w|1Aqko{=dh6@CbZ)H=+#51`NNXZtuz0?*WeC znd9l?RK})7qN&UyO^(^7FXbLxEYXRO!;*)|S#T&7wQc!3&P^_v7^UtbLE2d(O%Kj9 zLNR1cc&b&u9#`Lsx5NwA?B+#+(FfFuJ)Y$9=y`X>mNMa#$Br+%#n*<(n;Z)25a73} zM9D6)t{M%Lya#|9=bH@p3A_5W%DHqpE&=9j9Bh6WlOQTNKqw<2GtIOS4Av4MlN6=e zgXI)U%JdL<(GVw`!ohJgE8g;s%IiDF>QcXg3Isi|3uF0up0stSXp(Ec7s;6s<&vRXpJaXdu*zu!;e%n*%mT`3-4f}DU$m{eF z1-1PQhT%(Hqx$-MTjBGe#*5${Ogf^MMW7v^+~!O2{%{C~n|SJRiLV9lIZNg_c#cFS z`nP~rKxtHkp~MIt)4bNvZ1F=T-q*lOhF33Zo~=)FWo z>|)|2L7~8VQZNA@AE5v$fAQi5O;(CC@Et|4lOln=*x1$5!HtFa=lQ?h`ycGGe|q%t z1f}OatjG}{*}GQ*ms5*x(8U$J#AI8^H3DDCEub~TD&$pp0p-VMz! z3dZdYkYB8Emd9WM1t=Q5Dk4&DTs+|Dsa=z$UCLK`F+8Wvr!G=uQt1Z3ds40LRJq&zkClqZ+dP^EFX2jkaTr>)7_BpEpzHyQ38)=qI%D zH<^gjSv1l}M;~uqG_J4h1fSAC0rdE|apgS_#A^-4A_g!(Hb^{f%mle63?y&&p{d>( zsFbD=@HGVjFT!;ds(upZPG1PcivE_C?&gmmPqI8Wc2ZdxXk(MXqw7!O2Gg2u?kL0H8=C zMn4>#4)N9sCnXxf%@=nRFQcfHOd}Y9@)Z>1`JmW{U_B^pOxY!Y@w-gtc%MLSYpynZ z^%*k5%T&%5wFcX7lBJ)_qP5I9!kCOFLZQaJbVu!)Jycz!$jF?39_mTJ2J6ZVs_BFhI^o^^bASPIx~LjD^_JInyT%&=9;*VMt-e%@w2 zB^))+(5!Jhp-78#BRB+ZQB@>mCVOCpmj~ zYXwI=7)RrfkQ9|G$WYYL1jI--#=VG3NW8sbnKuYc&!%-2>l)XFYiv#7Y#v;j4N+4r z%hixZc;}(so_}9X5@PN&-

TCy?;t<~9yxJvCS!F6V5j*)SEUuU$Z5n(uc>vWD>n zqLO?NN{JXptVAgrM?H9v_n9dIzfwEX&>J_Zk(`;n+?@YSA(bozr+Uc7I80_~Zgd@#yrQ(LCIX{$l;GBB;&sygKbx6hn zcJP-l000319s=yf|B|~uJM;gQJ_vB&3jXbX_VFoc)Gmw_y&n|b9=dLncj$ThYD%ac zh~sP5goeImBdJ80*tj?Eh2BzGZNaie#cDjj)8Q{syGT4weFYKoZk#TrClV&7V35JE zaDVT}^Kt;vU3@E-jX-T0s@BlcukQY(G%tTx=`Jf7ZXBKZSC7$*JdH6#WF@#XPegP5 z)#7?8P*}MXf?!lGO4AkzW4br&t)Qi6w)&&Qvg+2}QXwrpLX~u2Q_f;I4$-^nH$nUg zGZO4LjnFBzQAC(*KT=G!+`8`w^z$HJPz=b4ix*_RL)$c+wlG*8v-vC=F*hTu%Nx@R zhsA=7b!)=y*3IiH&-&Gp_PW{^|8UFVWgb+S)Tz;quOy%L+iY#4+Vk!_y<6#XPMTO? zoWE-Bgt;`$4r$2qhmn1-DEwVD@}Q@3OQ8XP22=n5@3(5W*;rb-yRrPbu>F#cr1{ER zR$v5(Ce@3q4VTsxJ@*j(yVb>%;-D?~}pa&$k76*f~$VPOX^Q8|-QD?ZU? z(y&7ot=TL3;5$sXI0;Q0f+cxyF%l_14lwIuaQeywNaEzLQ^LPhXUMihoH z_+eBS@Q_2vtEo2J;1ay=!6%ZO?OP(V{q9WXkqjACs1J1k^(X>yDFrjWvU~-WumcOn z*tF4*e*V-o;vp<5drRtpUk}Jf5-#;Neqx4`v`>|!FGgyk8InYuJ}Gw*nXKnf3L9RFwy(EsRsEPOhSkN2XtTL-*-5S!{eXNefXDTS9rJ zP*4jA?Jp40^N4cuY@j4g#wd*`19buVc83>YDB8Ae9Vd#YWkx#MI7JCp11|VXAHJ6) zWYGk3l@@mV190}c8rcO<_C{CU|AXi!Y#^*Ed)M>z1Or-hKSXys6M>cRDICZe5#&sK zZ{9Ydh%3@-bMKf&zS%xU>gSgbJu3pJNkIp_r|LDRAYLyLgaz8;&!e$@{H;9;ikSHl zY({(cL1214KK?o-y&F~w6-Ac{qFR&Y=@f+QeWpW4&T>-hy4vp=jKM`%Plw_^h=xDPPPBYsZxexE$yB6?Y{pRlcmSdWjXCg6~lM~zp=P=Sy>T?mqpvB zW=Fr0sZ|SDS=z;8Rqvyj_BcO)lIaNKFEhq5r>0e4O-w>zQ%{OxJ*6WwFQiL1nJ#dM zPL|JsR@fiauMu>}naWF9z9by9%TUnMeE`H>(zNM(52BN(X^*Dui61wH%YgO5aU zzX4&FeqlG>5<4!9w`G@!9&v?pl&>w5;cH#?9)nU*Hi0b_L7vlWid8(9?6P4WlIMp+ zm$BDJ9W|0ZSW#Ffs#KPQ2jQO%G4=}^*m|U8a>>h&c=AJ`vbi~S&I6>;nEtE%;yQZSL{>W-1 zZD%HLhY@LAp8qB}+Zgi4_Wbwllr3CKT3SfurJov%aza zT;DRj8IGp00s$+xRJUkKpb!3$(HP5$aj9OzwXgT|P$y*R?Dw>j?=Oi}+@pB*mA-|F zXS*r$@dmXPM`v{ElO zvl9bomp(r6(W?JFHWjT?z@liJHKHTX>uPF0)e-sVlYTxu4oug3I%a!cEGw(X4>971 zI`%FaDy@WOIcunUO$81PRdj4&>I+2iaWwUIZ)OMOTkKz{<@P$XRyC})BHp%#Q7{km ztuS55-cvjZeIQF>4`vpSz{9MPMR-+^(r%+b(@Tw4hzt~Ax?^_eIH+qn2(f%Bkslv-YojzyIU9X4k9xGQM$wG7=DN$iH+ z6mM!TUa(oqhMoG4<&HF)h_@aZ4H`BuT*AS9uVX;Vn{bYejc_jpkcOt4y_|~SYs%r| z4kXcX?i7pS^1m*pq11#z%*C_FiB0eZk|nr?kBVC!vboQ=z%q<@VkQYtz(RWj-ZyTA zw%H^s_Q=^awvwCap=Rm0buHRTvL%%JF48(~>s__KmQ%*(CkV=a_`+=2zs2*8(pp-7 z@nclVpwCFtK}tSll3~I81IR2P18?@UI+#@Brh$s8Z%SUIdOv2i2OBASSVLfG`U=EJ zS6A+%E46V#_xhxqF1Pmm*W7Z(6sgJeVNT|U)xJ*~fv+UA2Sl}hY~}i$S&+)zYxuqG44tl3JEBbkgkYFe}31U>BNO~P0q3p`Xe>|P7Qv}8CL!Q9u z(%CB9krL=`S?#J(oVoO(9&*^9EMQem&-+~DH%pSbGju=ZDjNFc=Jn>-rrObWfPY0Zb320jiK#HAt7n&0$bi+Ej-at=7PBm29e>Q zNGaYZVjF1TKiqt2gQhSqVYJDkw77zY9W5Mvp4c`)s1BvG4sFYBpg&(2hNl&pW0j-# z#;HI z`|*mMTh03_MonYbFTL>FSB-^MSf(%GG~}r~lwi$ACa!N4_mbISy}wMC4dJMWr9bE# zTu(Z6yf$a2PBP4&zX6$rkdIgYFodeoxYCgq^RMYNQJ(p zKi(X^j^Qz2Ofg!!t-;+86Z}dM^a*dX!<(P3dGCcb_JoR+H;D;*=2_3_h^g~~P@WUc zjJ#t@I1g?Qs4*eE%||2#M1ZX_;b<=M?E%|cQ=jLmX1LnkMUP0^i-`^?` zTKF?swMWR$Sr+}uM2$@PB{@#{n*FDlKSsBRg4)+7HvAAIK1<3DO?s-dsUV=s$q44# zenWLaxgj+`ext#Q!uEsC7LMat=&%5&!wbQ=>^&uPZ~%Rii$LVNp++9=b@;TJPn{eI z-K}i62wF$9xp68hSV!sAaU=2tHE&JQU}^ghn%jg)CNadI4%laPc|mO6itoOmP*BI$ z+pp>1Iq;U)g=Gw)Vr}Eqc>?ak?kq$PO8{()*q?0RuupLv_6_Db=iMj@r)Tv%15SCG z(h0*7l_rZKBczHN^xCFpneGSy>SfJFbu-&sh(WL~Whn4$zxG||F54e??75Z040ZVW zd!;W{l!i9lIaj<`s@QuS%9FDY6yCJk5a{N1_(@WX3piEKig&6}QPu#xypf9rK(Wvn z2bSnc_j^NYE_B7B!brh!Mt?YB&u~Q;c{d{8Q48~UUS!pt8oA;a!el>!^nTzS^$6VH z+niK8IMU#m&X7;uH$5>$ugAeJWsnG8+vDj@C|uEH&`@ZCTEFBR{5bC$-Nok-SrU-GvT)^3KK z-WB=AFfPYM_isipap{1WSNUmYz{p7XOQv$I&I^>bfH@C9S`ZNq8{Uy*xRM+Qa+ zuDCn4IIFMa-#U;t!?f6`(<4cCq#{0Nu0lWj`Wc_l+)M-3Vp;Vxtk7EiHlcWCIm)-4 zo+;+0GH0ozp@VW`@5Q4S1Zd6HH?%WD*RXSG=ThJv_2KfS&51d?!!AV}g}~@=dSppR zx7CAGy+8h2>{}>XPY`BuQ8ilJrjF^?<1_6qi{=gr|jaQm( z=yB@{9+ay6RQq>Ib$pnKRLh%ptV*ap@3VEvGz0iY`XtOW8?|?(1Twfa;+;5$)pb^v zGVCeb^o-EQ8mq*+cfU~8Y>~sg9qY8ad&JO|MTmD&S$Osd|PQd9$glmFr8Lrst}fi`e}P}t2^Tx1s9m9p0YX{ zn+erJto`Jv+C>2#u2~tBhz)YN}>-Zhu-oP1Hn#XBUz zf!@iCw#K)v6berb0atWNFpWH%a727y=tf`f{bsGbFvD^2Fb%~>*q z3VWt{RK%%P`HY&~_TCT^RR#gKV%z>Q_Hr)2rCl|F&nUszQT=Jn*!E#Om{qMOp^L6l zyD(-oRh;3@fj8AbnJYc5WF&^oa9QkBO7yEuaKWaNDa(|&wWf+*53v7~qTXQC%OL+$ zA#lqA%D*(h&E3br((PxUP_Aj{JkJgMS+)^H_tT`MORy$Ph~|%f38|z)hR%#?2c<8X5%3ZxjsWKMThVsW=`N#K0gfa zqE{%OdElE-@`SF&PMfRf)?ipf;w2ZJHG(u015ZnY@cp&D3bpB#Pg9bs?E%f%VZ8sR zp8SDHlE!NnqW9Y$WJDpU_y(P1F(dQA*pbUAfG!O_mD@oCq!nDa& zAqp@3`u*&;$)hI+VJ6@hYvZ`N1XFF(;nd$EO!E#b9UXjq6jx;ImzO zO6QmD^S2_lGZO1ppB`2=NJspUr$3_M6hhwRwXO0!1h(Ed*0eMIFd?bj6muP!FnL=! zEta-WKE3CP(Z4!koak~-8sYKi0Z||`IDv?_W5wvH_^|3Qd~CsyzbxV}>z8)>EGrx| zL5n=J$BLMMp9y#R-Gd;>7FcCHxXaBnDR;QT@XA*<*XBy_YJIZcM*_HhXF!afrWZ^( z`=EfaJ^;-Z)DqJH4ygaWv(qr73FHFbK?S=G@DI1KF?F@HPD* zZzrMp2FQzol*rG>3tF2tMu@3UKQcFS5~$rk#VgnCmG$yb3Tw9}B}abt>tASx?lT-E z67xGTEnnMyebKov4#{GG@KV0kv_r%+8cAtzNrOP0pFmyB_E=Xe5GTSsjY`=9!Ad{x zC1b%V??(@W9!68HE4U_`d$;-TU-0nX;<|)%`wh$QRIl|#`6Dq7?nzk8dV7q|Rm5Z} z2@FXK*XeF&Z28cR1cc4y#ut}Z7ME+4sra5@*zKLSc&x+7*pTw-*gJ%6p5IpvGNvUl zr|~Sie`pYMcA6rg}(n%jPE9++Kc3?9V44|DZHJDfatnB%}H z`Lllwv!%DYrK^*vgQBgIy&KD)Km7_|GZIGaz@_+pP-t7|ahnHbzm7y=_cm;J%`iBf z#wuVt&nEjw6>v*uJL7St=2_=(UGzMg)A;Obv=CY93R0>v7G0VepVm-AO(LBPtQ^uNBvq+miaD5s8^7dM2)goj z(dj{io5v^o;?$KK3B&zZXLBDIF?!jRqXSOw3PyEtM zDAL!S&vdw{yzP`^)J&Xq#n(znc4ON+?BU#9aQqyRc6?geJ`A{#*5%W0?a~L8AJn;mpmQb>=1p})7J4lq`dupSea1$5?SfkhjEZ(N48)8OYoY+4Zh27V z)9osPgLRJ3mo6KE=W`$P?%j@d-raz`_3tE;X&lPd1V5H7m}D5>qNjzkxr(dv3pW;X zXIIOg&jxPu`ERWgJm>xi#?Qg~27Sv)cc}Rw6~wO&&^q5N+nakOWBZ#b+Z^KR-!vYM zHZ^rdFcOf?yB$_-EpTR%T5KGM5~`LVza^6-q1AHxz>V+Al>7kn=~wHs)nphL!$3~k zn$f1S$JDa-p7wy6LuAdOpi8W#me99Phc@qNBr8G4kl?E+Z^)k}K8NK=3Yb7yp+$DC zfmYya05*ws;74wUtviTv8@KENGE9p_Luz3YU#^f=!7z6jyk(=LmGQ30O9H=oWhC*Xx%T z*Dy34sR24fHpAa6QIyZ9qyit4iJQvCdxHI#btYzb8Jv%Ma&>YA5eIwG%(Eq7QmKv^ zqnw22W>_}obMC_hEw57Wl>Ej0F}-?OQx#=;Imke+#)0wRWbz-lcs{b*oeAcmBbce^ zf99g;ix>Z;BAA1J92tpSPV+xIuApvEBM##-IB6q9wb=5BeP^oya;7vD1s2jn+nELf7uIj92ldB}pU>mAM76?p0!h^pTG$`Xb4h zJ?reS!4WtJXs|O;=&w~%g6>+zTEoyFm{U$gFW7!D87z%kYx6KMtHpSm_lzaCV&3!my-W#GjVu50+w_1u%GP} zvwlo8uhgP9GSuKXRpf5~;zs(o`pwlKMYdOB~e5uw~*T8iE%5zum_^P}& zOcggm_Rn0(G7`m@7yPcnPSE_U%gNs#Pnbl1sD6P$enr?h)47-YhGm?OrbR1yAK2Zth%LT_J}&NLh2 z>o*O0i_FD8LT6D66x<2Z<%mI2y@1}-Fp0k2RsD-8YBov zCUETd&&}e0(ENWc|Ij+FqVQLMzxJvAk0tm`KX@_uQ`hQK!>7G_zfJeRDdUq)zNf~2 z?HKxP3INn1{xbeI{XF`Hv{S%CApRo)&?A zqts#lBg(Im&{LGByYatKUVt~}|KW*#?ae<$dAeu&8|4kbFO;V{xK9C|CiuSr^2vSy z{7(0unm$c(ew)ry{LA!d%JUTAY5e{hL7eIr!k@kVAENoEcu%9y-*{iBf8qTlCVguC z*Ff*LEdWqN0|5Lj?0ahdS8e{YIS2ip%>Pj8r`AuG!Qb|<;8pSeX;D;BfB`$^&j0}( OAOLQkVqpIH>Hh%uYA`PV diff --git a/docs/src/index.md b/docs/src/index.md index 1a79d63..afaf4af 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -8,7 +8,7 @@ an Excel (.xlsx) file and to save simple tabular data to an Excel file. For more extensive functionality when reading and writing Excel files, -consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/). +consider using [XLSX.jl](https://juliadata.github.io/XLSX.jl/stable/). Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable` and `writetable`. @@ -24,25 +24,66 @@ using ExcelFiles, DataFrames df = DataFrame(load("data.xlsx", "Sheet1")) ``` -The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`: +The call to `load` returns an object that is a [Tables.jl](https://github.com/JuliaData/Tables.jl) table, so it can be passed to any function that can handle Tables.jl tables. Here are some examples of materializing an Excel file into such data structures: ```julia -using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly +using ExcelFiles, DataFrames, PrettyTables + +# Load into a DataFrame +julia> DataFrame(load("HTable.xlsx")) +5×10 DataFrame + Row │ Year 1940 1950 1960 1970 1980 1990 2000 2010 2020 + │ String Any Any Float64 Float64 Any Any Float64 Float64 Float64 +─────┼─────────────────────────────────────────────────────────────────────────────────────────── + 1 │ Col A 1 2 3.0 4.0 5 6 7.0 8.0 9.0 + 2 │ Col B 10 20 30.0 40.0 50 60 70.0 80.0 90.0 + 3 │ Col C 100 200 300.0 400.0 500 600 700.0 800.0 900.0 + 4 │ Col D 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 + 5 │ Col E Hello 2025-12-19 3.0 3.33 Hello 2025-12-19 3.0 3.33 1.0 + +julia> DataFrame(load("HTable.xlsx"; transpose=true)) +9×6 DataFrame + Row │ Year Col A Col B Col C Col D Col E + │ Int64 Int64 Int64 Int64 Float64 Any +─────┼───────────────────────────────────────────────── + 1 │ 1940 1 10 100 0.1 Hello + 2 │ 1950 2 20 200 0.2 2025-12-19 + 3 │ 1960 3 30 300 0.3 3 + 4 │ 1970 4 40 400 0.4 3.33 + 5 │ 1980 5 50 500 0.5 Hello + 6 │ 1990 6 60 600 0.6 2025-12-19 + 7 │ 2000 7 70 700 0.7 3 + 8 │ 2010 8 80 800 0.8 3.33 + 9 │ 2020 9 90 900 0.9 true + + +# Load into a PrettyTable +julia> PrettyTable(load("HTable.xlsx")) +┌───────┬───────┬────────────┬───────┬───────┬───────┬────────────┬───────┬───────┬───────┐ +│ Year │ 1940 │ 1950 │ 1960 │ 1970 │ 1980 │ 1990 │ 2000 │ 2010 │ 2020 │ +├───────┼───────┼────────────┼───────┼───────┼───────┼────────────┼───────┼───────┼───────┤ +│ Col A │ 1 │ 2 │ 3.0 │ 4.0 │ 5 │ 6 │ 7.0 │ 8.0 │ 9.0 │ +│ Col B │ 10 │ 20 │ 30.0 │ 40.0 │ 50 │ 60 │ 70.0 │ 80.0 │ 90.0 │ +│ Col C │ 100 │ 200 │ 300.0 │ 400.0 │ 500 │ 600 │ 700.0 │ 800.0 │ 900.0 │ +│ Col D │ 0.1 │ 0.2 │ 0.3 │ 0.4 │ 0.5 │ 0.6 │ 0.7 │ 0.8 │ 0.9 │ +│ Col E │ Hello │ 2025-12-19 │ 3.0 │ 3.33 │ Hello │ 2025-12-19 │ 3.0 │ 3.33 │ 1.0 │ +└───────┴───────┴────────────┴───────┴───────┴───────┴────────────┴───────┴───────┴───────┘ + +julia> PrettyTable(load("HTable.xlsx"; transpose=true)) +┌──────┬───────┬───────┬───────┬───────┬────────────┐ +│ Year │ Col A │ Col B │ Col C │ Col D │ Col E │ +├──────┼───────┼───────┼───────┼───────┼────────────┤ +│ 1940 │ 1 │ 10 │ 100 │ 0.1 │ Hello │ +│ 1950 │ 2 │ 20 │ 200 │ 0.2 │ 2025-12-19 │ +│ 1960 │ 3 │ 30 │ 300 │ 0.3 │ 3 │ +│ 1970 │ 4 │ 40 │ 400 │ 0.4 │ 3.33 │ +│ 1980 │ 5 │ 50 │ 500 │ 0.5 │ Hello │ +│ 1990 │ 6 │ 60 │ 600 │ 0.6 │ 2025-12-19 │ +│ 2000 │ 7 │ 70 │ 700 │ 0.7 │ 3 │ +│ 2010 │ 8 │ 80 │ 800 │ 0.8 │ 3.33 │ +│ 2020 │ 9 │ 90 │ 900 │ 0.9 │ true │ +└──────┴───────┴───────┴───────┴───────┴────────────┘ -# Load into a DataTable -dt = DataTable(load("data.xlsx", "Sheet1")) - -# Load into an IndexedTable -it = IndexedTable(load("data.xlsx", "Sheet1")) - -# Load into a TimeArray -ta = TimeArray(load("data.xlsx", "Sheet1")) - -# Load into a TS -ts = TS(load("data.xlsx", "Sheet1")) - -# Plot directly with Gadfly -plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line) ``` The `load` function takes a number of arguments and keywords: @@ -53,9 +94,11 @@ The `load` function takes a number of arguments and keywords: [sheet::String, [columns::String]]; [first_row::Int], + [first_column::String] [column_labels::Vector{String}], [header::Bool], - [normalizenames::Bool] + [normalizenames::Bool], + [transpose::Bool] ) ``` @@ -63,24 +106,35 @@ The `load` function takes a number of arguments and keywords: * `source`: The name of the file to be loaded. * `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. -* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. +* `columns`: Determines which columns to read. For example, `"B:D"` will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. If `transpose = true` or is omitted, `columns` should be used to specify rows. For example, specifying `"2:4"` with `transpose = true` will read only from these rows. ### Keywords: -* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. -* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet (ignored if `transpose = true`). +* `first_column`: Indicates the first row of the data table to be read. For example, `first_column="B"` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet (ignored if `transpose = false` or is omitted). * `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. +* `header`: Indicates if the first row (column if `transpose = true`) is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row (column) of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. * `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`. +* `transpose`: Set to `true` to transpose the table to read data from rows not columns. + +### Examples + +```julia +julia> PrettyTable(load("HTable.xlsx", "Offset"; first_row=2)) + +julia> df = DataFrame(load("HTable.xlsx", "Offset", "2:7"; transpose=true, first_column="B")) +julia> df = DataFrame(load("HTable.xlsx"; normalizenames=true, transpose=true, column_labels=["Date", "Name1", "Name2", "Name3", "Name4", "Name5"])) + +``` ## Save an Excel file -The following code saves any iterable table as an excel file: +The following code saves any Tables.jl table (such as a `DataFrame`) as an Excel file: ```julia using ExcelFiles -save("output.xlsx", it) +save("output.xlsx", tbl) ``` -This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). The `save` function takes a number of arguments and keywords: @@ -92,15 +146,21 @@ The `save` function takes a number of arguments and keywords: ) ``` -#### Arguments: +### Arguments: * `source`: The name of the file to be created on save. -#### Keywords: +### Keywords: * `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`. * `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`. +### Examples + +```julia +julia> save("myfile.xlsx", df; sheetname="myname", overwrite=true) +``` + ## Using the pipe syntax The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code: @@ -111,7 +171,7 @@ using ExcelFiles, DataFrame df = load("data.xlsx", "Sheet1") |> DataFrame ``` -To save an iterable table, one can use the following form: +To save any Tables.jl compatible table (such as a DataFrame), one can use the following form: ```julia using ExcelFiles, DataFrame diff --git a/src/ExcelFiles.jl b/src/ExcelFiles.jl index 4b256fa..9259cec 100644 --- a/src/ExcelFiles.jl +++ b/src/ExcelFiles.jl @@ -1,11 +1,8 @@ module ExcelFiles +using XLSX, FileIO, Tables, Dates -using XLSX, IteratorInterfaceExtensions, TableTraits, DataValues -using TableTraitsUtils, FileIO, TableShowUtils, Dates, Printf -import IterableTables - -export load, save, File, @format_str +export load, save, File struct ExcelFile filename::String @@ -14,94 +11,72 @@ struct ExcelFile keywords end -function Base.show(io::IO, source::ExcelFile) - TableShowUtils.printtable(io, getiterator(source), "Excel file") -end - -function Base.show(io::IO, ::MIME"text/html", source::ExcelFile) - TableShowUtils.printHTMLtable(io, getiterator(source)) -end - -Base.Multimedia.showable(::MIME"text/html", source::ExcelFile) = true +# --- Display --- -function Base.show(io::IO, ::MIME"application/vnd.dataresource+json", source::ExcelFile) - TableShowUtils.printdataresource(io, getiterator(source)) +# Radically simplified - now relies on universal adoption of Tables.jl among consumers. +# Retain only basic show method. +function Base.show(io::IO, f::ExcelFile) + print(io, "ExcelFile(\"$(f.filename)\")") end -Base.Multimedia.showable(::MIME"application/vnd.dataresource+json", source::ExcelFile) = true +# --- FileIO integration --- -function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet, columns; kw...) +function fileio_load(f::FileIO.File{FileIO.format"Excel"}, sheet, columns; kw...) return ExcelFile(f.filename, sheet, columns, kw) end -function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet; kw...) +function fileio_load(f::FileIO.File{FileIO.format"Excel"}, sheet; kw...) return ExcelFile(f.filename, sheet, nothing, kw) end -function fileio_load(f::FileIO.File{FileIO.format"Excel", String}; kw...) +function fileio_load(f::FileIO.File{FileIO.format"Excel"}; kw...) return ExcelFile(f.filename, nothing, nothing, kw) end function fileio_save(f::FileIO.File{FileIO.format"Excel"}, data; kw...) - cols, colnames = TableTraitsUtils.create_columns_from_iterabletable(data, na_representation=:missing) - return XLSX.writetable(f.filename, cols, colnames; kw...) + XLSX.writetable(f.filename, data; kw...) end -IteratorInterfaceExtensions.isiterable(x::ExcelFile) = true -TableTraits.isiterabletable(x::ExcelFile) = true +# --- Tables.jl interface --- + +Tables.istable(::ExcelFile) = true +Tables.columnaccess(::ExcelFile) = true + +function Tables.schema(file::ExcelFile) + tbl = _readxl(file) + return Tables.schema(tbl) +end + +function Tables.columns(file::ExcelFile) + return Tables.columns(_readxl(file)) +end -function dropkey(p::Base.Pairs, key::Symbol) - nt = NamedTuple(p) # convert to NamedTuple - NamedTuple{filter(!=(key), keys(nt))}(nt) +function Tables.rows(file::ExcelFile) + return Tables.rows(_readxl(file)) end +# --- Internal reader --- + function _readxl(file::ExcelFile) - kw=NamedTuple(file.keywords) - if haskey(file.keywords, :transpose) - if file.keywords[:transpose]==true - haskey(kw, :first_row) && (kw=NamedTuple{filter(!=(:first_row), keys(kw))}(kw)) - f=XLSX.readtransposedtable - else - haskey(kw, :first_column) && (kw=NamedTuple{filter(!=(:first_column), keys(kw))}(kw)) - f=XLSX.readtable - end - kw=NamedTuple{filter(!=(:transpose), keys(kw))}(kw) + kw = NamedTuple(file.keywords) + + if get(kw, :transpose, false) + f = XLSX.readtransposedtable + kw = NamedTuple{filter(k -> k ∉ (:transpose, :first_row), keys(kw))}(kw) else - haskey(kw, :first_column) && (kw=NamedTuple{filter(!=(:first_column), keys(kw))}(kw)) - f=XLSX.readtable + f = XLSX.readtable + kw = NamedTuple{filter(k -> k ∉ (:transpose, :first_column), keys(kw))}(kw) end + if isnothing(file.columns) if isnothing(file.sheet) - table=f(file.filename; kw...) + table = f(file.filename; kw...) else - table=f(file.filename, file.sheet; kw...) + table = f(file.filename, file.sheet; kw...) end else - table=f(file.filename, file.sheet, file.columns; kw...) + table = f(file.filename, file.sheet, file.columns; kw...) end -# else -# if isnothing(file.columns) -# if isnothing(file.sheet) -# table=XLSX.readtable(file.filename; dropkey(file.keywords, :transpose)...) -# else -# table=XLSX.readtable(file.filename, file.sheet; dropkey(file.keywords, :transpose)...) -# end -# else -# table=XLSX.readtable(file.filename, file.sheet, file.columns; dropkey(file.keywords, :transpose)...) -# end -# end - colnames=Vector{Symbol}(undef, length(table.data)) - for (k, v) in table.column_label_index - colnames[v] = Symbol(k) - end - return table.data, colnames -end - -function IteratorInterfaceExtensions.getiterator(file::ExcelFile) - column_data, col_names = _readxl(file) - return create_tableiterator(column_data, col_names) -end -function Base.collect(file::ExcelFile) - return collect(getiterator(file)) + return table # XLSX v0.11 returns a Tables.jl-compatible object directly end end # module diff --git a/test/data/TestData.xlsx b/test/data/TestData.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..420a8cb485ba0421407119a0e585a8a48fa428d6 GIT binary patch literal 13598 zcmeHu1$SFZvb7mwW{8=YnVr}%#>~vj%pAuMGc!A8W=_n^%*@R6>E_P&a>tYT1@E1; zq$8Yl7c5uY5T+*y0k3_wZUnf-kX^Q31h^QlKY_1= zA*qL2>zKDvs>`&G@|^~A!WD9T3Qk{8#qzrmH@kqGG^wLaQtU}V3U@IJQ=Q=}zAtwX z21ad$9H{k*sT`hM6zDyoriz+z1HZsm98gO+oQOdQO;w!GEK*%9M5)cJuv=->DQ4EW zTuudi(FQo->nkXb%)c?YR*`}D3NRuGASx_?$=Y^?7WVXXe?9*ngZ~%P_8*sC5-s)l z2gAFd(@#%d`fjI}BN2rpoCQBM6DoN5h%F)1hUbvrEVq;6A}V0}gNk~$dOZy+F7t#R z^by`{u#|)%qjD41IhO_{JlHxyQjyuki`bT||3q?{zMj5G5*2r)aB7X9DsCvulIUM0 z7MVQ%P>DE7r;H7al7}0F&YR?`)+435qIXvYJS`-DTpCnS&z^Y@Gn(Q(8=t=q#~;io zc{GuP)@QG0JYVL~XF+&$ZT^?kfz@c%=1>jSRevUiOzlzcD6vddn0~tD zB_os~&ylSoCcSeS*|onP@eTdF*u<3nJu-|P1tr2_GE=HkVunY0s2#l#EjdUbmr|@7 zhl^3WkcD6KSgP? z(}b!kVB|pP|LDiZoc6o3asZNT%mR5<%$5ZDi z)=!IGWVIs-hS+FPRMbh?+dtpw^s`?r2kkA?=%b}_$?kEGHSvtyD|LX_WhjJ%1W|Bp zhl3OyOT8+pUC&LjLL%eXKUnvI1ug)?_;R0-e3Xp8h%c-i|7wOo0g+ApU-^92GJ+xFSEzJ5p3I!!lA4G|BkL=TvDXZ=XO$z9&JGV*pz>8Yu1ov6 zKdpM-Wa1Fak_4hS10LvPnI3jIgjp%w2h0kRaTmF;^5Oa?QW%ZVTBGMgULJE+TjNG_Sfzk)9kqJVK!* zJ;wjiVNuzhwSbyx?OBA7fZW}B=DzhJA{Istt-fna7{4BBQ93tI@J_ABD`3JfvUCbo z-W``SK4lZP3fw_lezmqmmG1_GC&fssY`$1K<6McjjBgH2c*h5z#T zMAyD{agKRiGpYA7$;q(%nYokuj;C-hsq6kZ$!~I$Jv4>cu)Ml^_REvT`W9Il>Fyx; zrR&8l(IbjB0pI=KeYC_5n*}hS2Hpm?`5S2GjC}S^PJn^g%2q!$c9AaPoMBkPw9o z8WLFoJAqrdbfx2Ley)rgXUC+jA)*LJLG{&ZLM) zPb#=0+v7n`LY_azeZYN(Mym9canwiH*b5b9wdt?CyYJmX;wX}DPYkvpgik>#3a1piSv zo>%Zc$xEFe7^@K&5Ks<05D?~Xd9gP!G<2}1|8-*gOJ`D*H6wD^P(911UOOfdZU}wu z#e<{6#v&!Fl7>o@j&HIK{Dmd`AZ%c^^cAk#Pg&u?-1DiKOvoUYTW8ugi1QX#*u}ERa~Acb4tel%EC4NlsAVYN7WID zrthd6|Cm*VvHmT$VQeqmz~=+YXItoqdnNiC&dwAwndH5E-3GUiP08(dKU!cR!?QU^ zfZ-4*`^BZBW55!0yemU*KMuzSY3|9Tw&UKkM&`R`s`QF!v?=8lMF;)pX3OE!B44}3 zn&(g$;e1Luv!0$*I9(lz?a`Hn#g4;fI9%GPKXGp=*sRcXJM<>UHaO(u?2+uCj=N`! ziJY`aQf4~zH?R`!>54-@I5#SJF~5adMR+nj9Jh9>fBSe5W{pN%f8>YQugyeM73z|2%(&U9*?f*H9|N4xL#}5gOs6*d#xF z8l6Ha7;fRG%u=yPqo#EIEF2Y`wh-`zNZEF64jIOhOWvy>Gm6Z_2o(KX z{4BN~u;?|6Bzlu-0;i^P8V?7T?SH*Rc!^@yV63fmC(TuD^i<>O?Kyl=WjBSIjx z72bM}Cn&|l1QbtpBR7oHT9$azh3LEp++9N>6HkQ3V~L_tw2Y9!bBqUW z|Ib188nI`LvX)>O9)0h$=(G!%8&wwkZRM3eznHm^o82$n^tGtb^nn?F#W6UrPgHK7xY;8(qhrX$te^zkci%?x1IeO5MC?e3O7>1yRA-|~9 z?`x=_nytgB;v{6G@S}8Tu_!XkYVf0~8A>noao7c}a=>@oF2v^dpXY=~dc(Gx{659Y z!97g-Vie^Y_9JSDY+qJlk`hq^RjyOcmXo69&?J@JHby$c_^r~HeMY$=T9;beoL-D= zUp!zQ^;$}STYoijo7=$PansEC-TDx_0FSoX1t;%tdsrVAFl zCwZeK4P-cL3PfMZcMjfxq=_b|QVUlFh5afGc)g-{EZ$XJMrxOtas=vI$%H z+~wji^z+&hDin?L7K>=)Tj{pD6Q`_PqPAnkQWVOaa>_p2ph7;@I1O7=gQq^xK_u_v z2+5lsO;5+bMv^gDjoxo}yRkEw>(c}#MY;#v(KZ)Pvh;L*fm|qDk6J_p$yZ$9=gxoh zyY>Qq8CAJH##CP9Qku&z7uPGND;CjW{o;2rk3n_IqfBAgT=g-6FVF{|^Dn~1 zSz4zsAoi|)UT_xh!cQ0e6q~CiJeF@dQPVs7jq$5u$B+|Vk?E?ZaVlcwa%`QMd;_=F zZTqH2(rVg37ncknRylfKwWh&%$XS@;BKX4#?fdS&Xc%QII+u?adr{&J^&XE#URxW2 zClre4h*XUN+<6B{o1dz3NaNYcO~KPsLy@T)(9}g|{5DEl3u$my9f=EmmIwtgpwu(C zq%#YlNZ*i_#`;33>3~0Mf1AD#qVA_{Ko0%l$xGBy0$iyJE*}Ahs!E#VIj5wtCI#%6 z(SnF_NU}EjrF|_rzmmUSfr+SLtFg-W2y^)JUt_76wr(f2t}N{2F>*EUAo`y3k<_yeue9ik1MB<1^JW*+@)#4;GUfHTQ1U|(m1n_0 z*Pb=F!y+2NOYkNHqkS;WL04veeoWqhk=neo9co$%SyVfhzKPAo^(R3{FQ6FM5A$Fq zL}VfF_d?LSK|HSrG+xZ2_sl=7*Kjt?CU}@KPh;A5Z&y990(GRV}kpLJ}=Qd zn6aBFlC8@sgi>;5M|U^whXhhsQpp$jLi3P-j+mu?N{EU4X>V9}e>8?;+<)giN~7ngY+#XJv zMsn*5WLHFw_mNqAQWK(LpWXJF#|0$yo?g1~C6C7<{N|qC_2%Q?-g$B0x^J@oI9(V= zGqAD5mn=0}Ocd7GqfzR@cv_xc7C9Ezycr$8z7ecF9c!vMUdE4Yk(s4u96>nS3JQ7JrR(= zd{?1V!zWXmtaIMf=pg#ayeLp?`uaE`293({RY@H6apd0P)Wf*Ic~nvCCNo^&I_%pQ zV)0CLs)TDd5$_ij`21MR!Nb&qq980!VByoM(ToZI3H55=3NYx}r19`tsjfsMoWcgI zA_%i9RB`2{4TbU~QHHJK5M%n&Q1^J?g6W241MA`aC|+o%{%cWpbsU1#oG?jA&wX1wwg#IoXK z1K$%RIWlS&;8!Qwj>nW;wnH<+CkC}Tfb(RvLrr5zdY<#=b? z9z70z=_W|`hKBHMe+OxcDFx|;JNjdrDbYRWjDC1RLoZNM>4Pw1&aLhRqHKJgL1W_{`Rk=H*j&!gHbaPGunaADf0 zkDVRwVC-5eUDW(^gTFS8e7Z;g$MmKBIw8z|1gdE>SglYD+Ig{IHfiIm^0d|NZS>v7 z{L~h>>)lS}C=b7(dzf?!7yb%||4P41p>j%#sO7}L9k~L|&y8tZtv=_S>cSN$(Nm;v zYU}5DHpVyEuc(dB3kN>t5Q_S!k0n&PrfkwoV%#&5PghQ~KSdQ!r|U@_khd#8FUjT%o!Iaf$y zYmRY*Q2^6Qk~+$dY4Z>22!6fPP;OzyhH(QH&6w40FOlxS(|}!rMy{bYIrPk6-uVC< z(mZyRX}#2RY-7kzxF9JdcghUCwN0dmd+x@;`C7?$xM)7N-<)BA)n<+z{F2%1Q+j)LrrhKJK8>8r@n zC+)0QEe-5D3>GbV;U=F9og26Jy7A)`WG?l0SBLg-`tq+$o>+!tHW{{BnvOc49Wea5 zNKyt)8g~1RT|rkHEpwf6{oRI=<@L%lDSpuWt3=={%D<0P)gy3NQN2Vr_*E{Y+2@Dz zR9V%JcP0W)>J-^&4{~B`jh0+`h!^KAS7aW80|F?5F-TTi1O)PwHu9d51Boc)UO{@( z4SJ+;>uA-#XIkYd_FSfIj_%@mw{1;qyF&~2e)u#n=_J!C1C1yq%9Cq41#boSfUof` zQh@`Gag)*%l9|YVko&aV8QQuykp#@wld9g9J2-Bjj-71_D!H5L*f zTxJdBETtlRSUj&XN+%hDq8qxQl@D(cNf7jyX-<{nl+i^pZVHZ=>|2fbh6<(y$9J=U z)P8uBJyQSr9zAW)f;rIp6w z-1Lj9F)e|y?CVYg>kn$Z?CnolJ_xd+PUkgaUMiFp*MhBXXCnZ?8K{h-epeOgaRF0Y z>I{Pi{|Lzvl6A(EY6m?WH!R*>1^#+nVAPfryk_Y~Yd#E{*cV4O4A~!*nPdiu?>pP! z*W&WzdDz1;VQEV%U^@Z=170w-38 zay919L@sm=h*i8fsEbKr8xQ!b4NJx0Z;$;)XDTc0nf)x2~q zzzs!@RYW|A`BClmjJ3AxjC?oO%bnk27FJk54fFUC(h@mXtQk_NaHC==a#zox3Dd`y zv!=ct626C~yI0BLMm^R&fa~{xPZOdFzcB@IO}d8?DoUHfT@i6ly%$*~?WIE|12e+A z0PRX;(CU;AM9CNgmQH+s23n$r-=C9&Ip2`35V;@B@Xd`dYQorFv%{q{R~y{+yx=)< z7#V{SHSNv=PiMr^AmK9?qcjt}adHpajWDM#a`zsDGT$AcvvIIj5YL)}Ws|l1M((2p zVFP%RnLHKjhxR0x&vfO8CnLRBIQn`D5C*HVmjU_4;*ZgVv#Vd-+NfwlAIdUUzSp*s zY#-XZ3Id;Qn7a9Qq^fFmtnA-%Kf}M=KD1iVWwe_m2*F`%oy-ic@TxXD;>-7Zn+l5p zHFY{gjxVS{2;WiB9f|Ljr3+m@e9Bx~Bk~Q0L)P$=RP;dId{=j;Xpb1awd6=5+e3Qv zBvs9c97DRg^TZ$p@0ysQQmp97HQX(%r&yi<`bw%fvAvoW;#gg| zP}ji_Y2`NIxCV<5`peeS8oWMJb^kjhz^VO3N7uNU3xASh0lbk-OZ0|%@3^4Q%CMg$q zI@+Z-TE$U1?(NIa7zvBzxSmUGsfG4MvsFwf!Nh0BIE~E}{WzY3GCtOMNA7)>Y#_re zck0daP!VI01EB#aO#=?OC|Dqj>{tMyy$~WE8;)_QD1`vZnzj+W zZ(vabKKg_#DXiBZ_ozmlmTTgkn8KqBZKb~F>AG_v`ZM9OsI7}%kOh^M9z~^FbrC40 z95jZ&G*2>NAkMH*-@t>qTH;QXxi)1o_P`?JpdOF5tOeufy2uAuuAT3}zSk`H%wP_z z0Y5&`SG>AS^SXbZiK_lkt-<_WalANRTXJ=Y$hwjWL-vaz-SW;Hfkde}Z4ErkbhCJB z&ho?@mFR`-<(Or>(GTIm?I)v++4~Ucu0TzSKF(|Dpo2(ZUN!kGI z&8jnHe_a^7MDlSr89n{zGshR@`9*1%tDj-pYTF!5FpPRd6RgUMxUVlN5cXjuIYs$G z)bX33)pwTXgDFEz{yGTUJ-8rHhW0J?Wtzx(#qVRLwD#jEIZSJ!raEUVzFNZN)Ltj# z64+vn1m|k31M5;2C-{6aYdc!ZoT$qCX5FxMGN!LfQoQ|GIuT?Amb>r~x>N>ZSTc;u z{@sMScQCQ5Va;@SGl)iZN|5qVuLFjGpoyah$kr_3M@s0;&;{;G@!SB+o1eU%4SiEu z@e`^F`>l2YyZr${F}JLk#?T_5)a*?`e9!mhGGw}UuJy5YrpM%0C*M3;e&qJ`1uGkb zE6JI{imEDkvQ$aoZUxUe3>LOj zPl)Hv_*&H7cbOICVa@m9fH%eqIxuVy*m}<8iE_b**HUqlnj3VDbYjhB$|9$P@9vd8>80 zk1xxGADcTJuJUUpNJmL?b(s*o`7Wekd4oJEMXnLtGVHAj*U-7EXRldXLdVeFbriqp ztnk)d%gfp}{;4B#c6z>KO8DOHZ z(jYJA9kF9gQOk|{57=lY#ZLCAzqozMHo4=u+nUVVjD{}zrtuytSF4AF{ov6_proItQJMtf~t4*W2AGE#avnN-i5X5c0l?cwX#wHFKT<|!_4Dn)oj znAcptJ^Y>&zLBnh1zYX`^qX|`VewB+5-I9&Hh?|b3 zaS(b9Xdm$^-FAN65Ll`H6$Na0E^K)@({oipFZ3Y&WKwAZXe0F;ADX;%4y?ui-S`UZ zCx7*tm&RiYqp+~PP!7TMR>M38mGg}cZQ$`+*Mv+KElnoJGu{CYOmdvxi0*OYY=ctSlNUwAG~G_(51vNhEuGY#+HqQpO}zo z1W^>d^{2WoH}-jx)1v&*k8eKfdu3Grxpk-Fr43RXFwNnBWSHphX*P6mFtoGMwU9Km zGPkGy=exf*kfKM;0G%8?r~a+}=dF&&Ju1R6U3(CLm4kJVHYj16L1w*k+>>I7F{w}Kl0;k$tG zv*Te>yXSs&?7o3bxL9M|E_A>eFwg&~6G3C|i%S6`2!OkR`1hUA*R{|WHqkY;`b#P_ zA_if)7!W_+`n@6=S<820Ow_=3YHIF71~^!N^yiWYUY&;PggP+Wj52)TR=l)}=Vkn` zDb~?M;Y&!ligf_KWSuaLq#_y3Wy+i7!!BZSu$?V8QB)ltq{1eHnkkPhohq6)-mh{8 zOlghB$B|_BjyFPBVE$uxV(NBE)slSzxQucLIPfelLku@i^=iLp%i}tfeDP^Ss@ z=Y__cXZy4LxQ9RHz^F@ErK*6-G6gsp5@1W*z*=9%&f3PFUfac-h0&kxjVYKt^H2UTwOmVx16L;HtB>_Jvbr;j7zS(jILz zZQ&A2L2*hH&trgfHi6V-ruDV^p98s5dH&(gh8|Y{qGG%?Kh_Jxyhz*A3z8 z7hmo?pRXpAt^>`Sx`FZ-jRQ!$XjPrz?aVwdAw@iu)YEGjd5%9gwAfuCLyjfC7H zD&_@r^`6w7mg+2OB(8P;3G%}HcffFu+LrnBh~oYDOaih{+I*sR>Xh$qppq3|G6Vz z%Kmkv#zpQ=8eF}ybvWHE zC1SQb1IuSQO)brF&A%$2;80PTiD5!%02lT`0}yS9SNU`)2&sdD?-NFHzucoNx*vAr z_(gt%MS+*H@=<3*JX9#52t1vci~MMXvoyDZgZDKnx4v^GkvE;hL?l59{W}7| zYD*WND~-HUq|gt#m&jQmNF4{d3UWl#B=;*fvfNQFjHY;I-!tpc7yYT;MR>>G4SXS zS82XN21Pjo;)k~DXltVI#p?V4i0+Sq`+?9S`Om|f!x`ke!`(CbhZV0T9&DTZ^2Zj3 z0sEV0@>iNkvu8)*j|7M-2p27V@Zo1N-PIZJZd4ToEf!~!z5EF9by~m~(2Dd=Ik*2;W70oA`z7(ex|1v- zh5YW9infI|Gm-dwXkitB(H` zj%F9b^!r9;9`DP6W*G>&7ppq;*tGr`Zl9_i|{r*_>B-q z{tMxsYsVjwg|~QbgWTVE2$a9@{w?HvYyIzW show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1"A"true293"FF"#NADate("2015-03-03")Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2"CCC"falsefalse#NA3.5"GGG"#NADate("1988-04-09")19:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4"HHHH"false15:02:00#NA#NA#NA#NA
" - - # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) - @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1.0"A"true293.0"FF"#NA2015-03-03Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2.0"CCC"falsefalse#NA3.5"GGG"#NA1988-04-0919:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4.0"HHHH"false15:02:00#NA#NA#NA#NA
" - - # XLSX.jl v0.10.4 -# @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"string\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"string\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"string\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"string\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" - - # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) - @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" + @test Tables.istable(efile) == true -# This test is truncated (... with 8 more columns:) so probably isn't robust - although it passes locally. -# @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2 │ 9 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed" - - @test TableTraits.isiterabletable(efile) == true - @test IteratorInterfaceExtensions.isiterable(efile) == true - @test showable("text/html", efile) == true - @test showable("application/vnd.dataresource+json", efile) == true - - @test isiterable(efile) == true + # Test show renders expected number of rows and columns, without depending on exact truncation/wrapping + @testset "show plain text" begin + s = sprint(show, efile) + @test s == "ExcelFile(\"$filename\")" + end - full_dfs = [create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=3)), create_columns_from_iterabletable(load(filename, "Sheet1"))] - for (df, names) in full_dfs + @testset "ReadTable" begin + for source in [load(filename, "Sheet1", "C:O"; first_row=3), load(filename, "Sheet1")] + df, names = get_cols(source) + @test length(df) == 13 + @test length(df[1]) == 4 + + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test isequal(df[12], [missing, missing, missing, missing]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + end + + df, names = get_cols(load(filename, "Sheet1", "C:O"; first_row=4, header=false)) + @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O] + @test length(df[1]) == 4 @test length(df) == 13 + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test all(ismissing, df[12]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + @test ismissing(df[12][4]) + + good_colnames = [:c1, :c2, :c3, :c4, :c5, :c6, :c7, :c8, :c9, :c10, :c11, :c12, :c13] + + df, names = get_cols(load(filename, "Sheet1", "C:O"; first_row=4, header=false, column_labels=good_colnames)) + @test names == good_colnames @test length(df[1]) == 4 + @test length(df) == 13 + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test all(ismissing, df[12]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + @test ismissing(df[12][4]) + + # Test for saving DataFrame to XLSX + input = (Day = ["Nov. 27", "Nov. 28", "Nov. 29"], Highest = [78, 79, 75]) |> DataFrame + save("file.xlsx", input) + output = load("file.xlsx", "Sheet1") |> DataFrame + @test input == output + rm("file.xlsx") + + # Test for saving DataFrame to XLSX with sheetname keyword + input = (Day = ["Nov. 27", "Nov. 28", "Nov. 29"], Highest = [78, 79, 75]) |> DataFrame + save("file.xlsx", input, sheetname="SheetName") + output = load("file.xlsx", "SheetName") |> DataFrame + @test input == output + rm("file.xlsx") + + df, names = get_cols(load(filename, "Sheet1"; column_labels=good_colnames)) + @test names == good_colnames + @test length(df[1]) == 4 + @test length(df) == 13 + @test df[1] == [1., 1.5, 2., 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == [true, false, false, true] + @test isequal(df[4], [2, "EEEEE", false, 1.5]) + @test isequal(df[5], [9., "III", missing, true]) + @test isequal(df[6], [3., missing, 3.5, 4.]) + @test isequal(df[7], ["FF", missing, "GGG", "HHHH"]) + @test isequal(df[8], [missing, true, missing, false]) + @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] + @test isequal(df[10], [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), missing]) + @test all(ismissing, df[11]) + @test all(ismissing, df[12]) + @test isequal(df[13], [missing, 3.4, "HKEJW", missing]) + @test ismissing(df[12][4]) + + # Too few column labels + @test_throws XLSX.XLSXError get_cols(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) + + # Test for constructing DataFrame with empty header cell + data, names = get_cols(load(filename, "Sheet2", "C:E")) + @test names == [:Col1, Symbol("#Empty"), :Col3] + + # normalizenames keyword (XLSX.jl v0.11 only) + data, names = get_cols(load(filename, "Sheet2", "C:E"; normalizenames=true)) + @test names == [:Col1, :_Empty, :Col3] - @test df[1] == [1., 1.5, 2., 2.5] - @test df[2] == ["A", "BB", "CCC", "DDDD"] - @test df[3] == [true, false, false, true] - @test df[4] == [2, "EEEEE", false, 1.5] - @test df[5] == [9., "III", NA, true] - @test df[6] == [3., NA, 3.5, 4] - @test df[7] == ["FF", NA, "GGG", "HHHH"] - @test df[8] == [NA, true, NA, false] - @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)] - @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] - @test df[12] == [DataValue(), DataValue(), DataValue(), NA] - @test df[13] == [NA, 3.4, "HKEJW", NA] end - df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false)) - @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O] - @test length(df[1]) == 4 - @test length(df) == 13 - @test df[1] == [1., 1.5, 2., 2.5] - @test df[2] == ["A", "BB", "CCC", "DDDD"] - @test df[3] == [true, false, false, true] - @test df[4] == [2, "EEEEE", false, 1.5] - @test df[5] == [9., "III", NA, true] - @test df[6] == [3, NA, 3.5, 4] - @test df[7] == ["FF", NA, "GGG", "HHHH"] - @test df[8] == [NA, true, NA, false] - @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] - @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] - @test df[12] == [DataValue(), DataValue(), DataValue(), NA] - @test df[13] == [NA, 3.4, "HKEJW", NA] - @test DataValues.isna(df[12][4]) - - good_colnames = [:c1, :c2, :c3, :c4, :c5, :c6, :c7, :c8, :c9, :c10, :c11, :c12, :c13] - - df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false, column_labels=good_colnames)) - @test names == good_colnames - @test length(df[1]) == 4 - @test length(df) == 13 - @test df[1] == [1., 1.5, 2., 2.5] - @test df[2] == ["A", "BB", "CCC", "DDDD"] - @test df[3] == [true, false, false, true] - @test df[4] == [2, "EEEEE", false, 1.5] - @test df[5] == [9., "III", NA, true] - @test df[6] == [3, NA, 3.5, 4] - @test df[7] == ["FF", NA, "GGG", "HHHH"] - @test df[8] == [NA, true, NA, false] - @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] - @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] - @test df[12] == [DataValue(), DataValue(), DataValue(), NA] - @test df[13] == [NA, 3.4, "HKEJW", NA] - @test DataValues.isna(df[12][4]) - -# Test for saving DataFrame to XLSX - input = (Day = ["Nov. 27","Nov. 28","Nov. 29"], Highest = [78,79,75]) |> DataFrame - file = save("file.xlsx", input) - output = load("file.xlsx", "Sheet1") |> DataFrame - @test input == output - rm("file.xlsx") - -# Test for saving DataFrame to XLSX with sheetname keyword - input = (Day = ["Nov. 27","Nov. 28","Nov. 29"], Highest = [78,79,75]) |> DataFrame - file = save("file.xlsx", input, sheetname="SheetName") - output = load("file.xlsx", "SheetName") |> DataFrame - @test input == output - rm("file.xlsx") - - df, names = create_columns_from_iterabletable(load(filename, "Sheet1"; column_labels=good_colnames)) - @test names == good_colnames - @test length(df[1]) == 4 - @test length(df) == 13 - @test df[1] == [1., 1.5, 2., 2.5] - @test df[2] == ["A", "BB", "CCC", "DDDD"] - @test df[3] == [true, false, false, true] - @test df[4] == [2, "EEEEE", false, 1.5] - @test df[5] == [9., "III", NA, true] - @test df[6] == [3, NA, 3.5, 4] - @test df[7] == ["FF", NA, "GGG", "HHHH"] - @test df[8] == [NA, true, NA, false] - @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] - @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] - @test df[12] == [DataValue(), DataValue(), DataValue(), NA] - @test df[13] == [NA, 3.4, "HKEJW", NA] - @test DataValues.isna(df[12][4]) - -# Too few column labels - # XLSX.jl v0.10.4 -# @test_throws AssertionError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) - - # XLSX.jl v0.11.0 - @test_throws XLSX.XLSXError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) - -# Test for constructing DataFrame with empty header cell - data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E")) - @test names == [:Col1, Symbol("#Empty"), :Col3] - - # XLSX.jl v0.11.0. The `normalizenames` keyword not available in 0.10.4 -# data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E"; normalizenames=true)) -# @test names == [:Col1, :_Empty, :Col3] + @testset "Transposed tables" begin + # Note: readtransposedtable cannot handle entirely empty rows/columns, + # so the Transpose sheet omits those from the original Sheet1 data. + # Note: eltype of mixed date columns is Dates.TimeType (not Any) when + # there are no missing values, since a common supertype can be inferred. + df, names = get_cols(load(filename, "Transpose"; transpose=true, first_column=2)) + @test length(df) == 5 + @test length(df[1]) == 4 + @test names == [Symbol("Some Float64s"), Symbol("Some Strings"), Symbol("Some Bools"), Symbol("Mixed with NA"), Symbol("Some dates")] -end + @test df[1] == [1.0, 1.5, 2.0, 2.5] + @test df[2] == ["A", "BB", "CCC", "DDDD"] + @test df[3] == Bool[true, false, false, true] + @test isequal(df[4], Any[9, "III", missing, true]) + @test df[5] == Dates.TimeType[Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)] + end +end \ No newline at end of file From ac4a0ee850495ae73b5071a3e9b4789b781654f0 Mon Sep 17 00:00:00 2001 From: Tim Gebbels Date: Mon, 20 Apr 2026 20:44:55 +0100 Subject: [PATCH 6/7] Align README and docs. --- README.md | 133 ++++++++++++++++++++++++++++++++-------------- docs/src/index.md | 2 - 2 files changed, 93 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 28503d4..05281ff 100644 --- a/README.md +++ b/README.md @@ -15,17 +15,17 @@ an Excel (.xlsx) file and to save simple tabular data to an Excel file. For more extensive functionality when reading and writing Excel files, -consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/). +consider using [XLSX.jl](https://juliadata.github.io/XLSX.jl/stable/). Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable` -and `writetable`. +and `writetable`. ## Installation Use ``Pkg.add("ExcelFiles")`` in Julia to install ExcelFiles and its dependencies. -## Usage +# Usage -### Load an Excel file +## Load an Excel file To read an Excel file into a `DataFrame`, use the following julia code: @@ -35,25 +35,66 @@ using ExcelFiles, DataFrames df = DataFrame(load("data.xlsx", "Sheet1")) ``` -The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`: +The call to `load` returns an object that is a [Tables.jl](https://github.com/JuliaData/Tables.jl) table, so it can be passed to any function that can handle Tables.jl tables. Here are some examples of materializing an Excel file into such data structures: ```julia -using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly +using ExcelFiles, DataFrames, PrettyTables + +# Load into a DataFrame +julia> DataFrame(load("HTable.xlsx")) +5×10 DataFrame + Row │ Year 1940 1950 1960 1970 1980 1990 2000 2010 2020 + │ String Any Any Float64 Float64 Any Any Float64 Float64 Float64 +─────┼─────────────────────────────────────────────────────────────────────────────────────────── + 1 │ Col A 1 2 3.0 4.0 5 6 7.0 8.0 9.0 + 2 │ Col B 10 20 30.0 40.0 50 60 70.0 80.0 90.0 + 3 │ Col C 100 200 300.0 400.0 500 600 700.0 800.0 900.0 + 4 │ Col D 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 + 5 │ Col E Hello 2025-12-19 3.0 3.33 Hello 2025-12-19 3.0 3.33 1.0 + +julia> DataFrame(load("HTable.xlsx"; transpose=true)) +9×6 DataFrame + Row │ Year Col A Col B Col C Col D Col E + │ Int64 Int64 Int64 Int64 Float64 Any +─────┼───────────────────────────────────────────────── + 1 │ 1940 1 10 100 0.1 Hello + 2 │ 1950 2 20 200 0.2 2025-12-19 + 3 │ 1960 3 30 300 0.3 3 + 4 │ 1970 4 40 400 0.4 3.33 + 5 │ 1980 5 50 500 0.5 Hello + 6 │ 1990 6 60 600 0.6 2025-12-19 + 7 │ 2000 7 70 700 0.7 3 + 8 │ 2010 8 80 800 0.8 3.33 + 9 │ 2020 9 90 900 0.9 true + + +# Load into a PrettyTable +julia> PrettyTable(load("HTable.xlsx")) +┌───────┬───────┬────────────┬───────┬───────┬───────┬────────────┬───────┬───────┬───────┐ +│ Year │ 1940 │ 1950 │ 1960 │ 1970 │ 1980 │ 1990 │ 2000 │ 2010 │ 2020 │ +├───────┼───────┼────────────┼───────┼───────┼───────┼────────────┼───────┼───────┼───────┤ +│ Col A │ 1 │ 2 │ 3.0 │ 4.0 │ 5 │ 6 │ 7.0 │ 8.0 │ 9.0 │ +│ Col B │ 10 │ 20 │ 30.0 │ 40.0 │ 50 │ 60 │ 70.0 │ 80.0 │ 90.0 │ +│ Col C │ 100 │ 200 │ 300.0 │ 400.0 │ 500 │ 600 │ 700.0 │ 800.0 │ 900.0 │ +│ Col D │ 0.1 │ 0.2 │ 0.3 │ 0.4 │ 0.5 │ 0.6 │ 0.7 │ 0.8 │ 0.9 │ +│ Col E │ Hello │ 2025-12-19 │ 3.0 │ 3.33 │ Hello │ 2025-12-19 │ 3.0 │ 3.33 │ 1.0 │ +└───────┴───────┴────────────┴───────┴───────┴───────┴────────────┴───────┴───────┴───────┘ + +julia> PrettyTable(load("HTable.xlsx"; transpose=true)) +┌──────┬───────┬───────┬───────┬───────┬────────────┐ +│ Year │ Col A │ Col B │ Col C │ Col D │ Col E │ +├──────┼───────┼───────┼───────┼───────┼────────────┤ +│ 1940 │ 1 │ 10 │ 100 │ 0.1 │ Hello │ +│ 1950 │ 2 │ 20 │ 200 │ 0.2 │ 2025-12-19 │ +│ 1960 │ 3 │ 30 │ 300 │ 0.3 │ 3 │ +│ 1970 │ 4 │ 40 │ 400 │ 0.4 │ 3.33 │ +│ 1980 │ 5 │ 50 │ 500 │ 0.5 │ Hello │ +│ 1990 │ 6 │ 60 │ 600 │ 0.6 │ 2025-12-19 │ +│ 2000 │ 7 │ 70 │ 700 │ 0.7 │ 3 │ +│ 2010 │ 8 │ 80 │ 800 │ 0.8 │ 3.33 │ +│ 2020 │ 9 │ 90 │ 900 │ 0.9 │ true │ +└──────┴───────┴───────┴───────┴───────┴────────────┘ -# Load into a DataTable -dt = DataTable(load("data.xlsx", "Sheet1")) - -# Load into an IndexedTable -it = IndexedTable(load("data.xlsx", "Sheet1")) - -# Load into a TimeArray -ta = TimeArray(load("data.xlsx", "Sheet1")) - -# Load into a TS -ts = TS(load("data.xlsx", "Sheet1")) - -# Plot directly with Gadfly -plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line) ``` The `load` function takes a number of arguments and keywords: @@ -62,9 +103,9 @@ The `load` function takes a number of arguments and keywords: FileIO.load( source::String, [sheet::String, - [range::String]]; + [columns::String]]; [first_row::Int], - [first_column::Int], + [first_column::String] [column_labels::Vector{String}], [header::Bool], [normalizenames::Bool], @@ -72,31 +113,39 @@ The `load` function takes a number of arguments and keywords: ) ``` -#### Arguments: +### Arguments: * `source`: The name of the file to be loaded. * `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. -* `range`: Determines which rows/columns to read. Given as a column range like `"A:F"` when `transpose=false` or as a row range like `"2:7"` when `transpose=true`. For example, `"B:D"` will select columns B, C and D. If `range` is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid `sheet` **must** be specified when specifying `range`. +* `columns`: Determines which columns to read. For example, `"B:D"` will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. If `transpose = true` or is omitted, `columns` should be used to specify rows. For example, specifying `"2:4"` with `transpose = true` will read only from these rows. -#### Keywords: +### Keywords: -* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. This keyword will be ignored if `transpose=true`. -* `first_column`: Indicates the first column of the data table to be read. For example, `first_column=5` or `first_column="E"` will look for a table starting at sheet column 5 ("E"). If first_row is not given, the algorithm will look for the first non-empty row in the sheet. This keyword will be ignored if `transpose=false`. -* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row (or column if `transpose=true`) of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. -* `column_labels`: Specifies column names for the header of the table. If `column_labels` is given and `header=true`, the headers given by `column_labels` will be used, and the first row (or column if `transpose=true`) of the table (containing headers) will be ignored. +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet (ignored if `transpose = true`). +* `first_column`: Indicates the first row of the data table to be read. For example, `first_column="B"` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet (ignored if `transpose = false` or is omitted). +* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. +* `header`: Indicates if the first row (column if `transpose = true`) is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row (column) of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. * `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`. -* `transpose`: Set to `true` to read a transposed table organised in rows rather than columns. Default=`false`. +* `transpose`: Set to `true` to transpose the table to read data from rows not columns. + +### Examples + +```julia +julia> PrettyTable(load("HTable.xlsx", "Offset"; first_row=2)) -### Save an Excel file +julia> df = DataFrame(load("HTable.xlsx", "Offset", "2:7"; transpose=true, first_column="B")) -The following code saves any iterable table as an excel file: +julia> df = DataFrame(load("HTable.xlsx"; normalizenames=true, transpose=true, column_labels=["Date", "Name1", "Name2", "Name3", "Name4", "Name5"])) +``` +## Save an Excel file + +The following code saves any Tables.jl table (such as a `DataFrame`) as an Excel file: ```julia using ExcelFiles -save("output.xlsx", it) +save("output.xlsx", tbl) ``` -This will work as long as `it` is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). The `save` function takes a number of arguments and keywords: @@ -108,16 +157,22 @@ The `save` function takes a number of arguments and keywords: ) ``` -#### Arguments: +### Arguments: * `source`: The name of the file to be created on save. -#### Keywords: +### Keywords: * `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`. * `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`. -### Using the pipe syntax +### Examples + +```julia +julia> save("myfile.xlsx", df; sheetname="myname", overwrite=true) +``` + +## Using the pipe syntax The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code: @@ -127,7 +182,7 @@ using ExcelFiles, DataFrame df = load("data.xlsx", "Sheet1") |> DataFrame ``` -To save an iterable table, one can use the following form: +To save any Tables.jl compatible table (such as a DataFrame), one can use the following form: ```julia using ExcelFiles, DataFrame @@ -136,5 +191,3 @@ df = # Aquire a DataFrame somehow df |> save("output.xlsx") ``` - -The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file. diff --git a/docs/src/index.md b/docs/src/index.md index afaf4af..539669f 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -180,5 +180,3 @@ df = # Aquire a DataFrame somehow df |> save("output.xlsx") ``` - -The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file. From 6dae7a8a4814d66599a5289f21a5fdfb4245a2fa Mon Sep 17 00:00:00 2001 From: Tim Gebbels Date: Mon, 20 Apr 2026 20:49:04 +0100 Subject: [PATCH 7/7] Update XLSX.jl compat. --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 3e8272a..ca3b3e8 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,7 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] FileIO = "1" Tables = "1" -XLSX = "0.11" +XLSX = "0.11.3" julia = "1" [extras]