Categories
powershell

How to convert Excel file to csv

One-liner to convert Excel to CSV (or to and from any other format).
There is a bug 320369 if you have excel in English and your locale is not America. Just change your settings to us_en before conversion.

Categories
powershell xml

xml to csv in powershell

Powershell is very strong with XML, to convert an XML document to something flat like a CSV file, it is incredibly powerfull.

Let’s take a file called emp.xml




10

CLARK
MILLER
KING



20

SMITH
FORD
ADAMS
SCOTT
JONES



30

ALLEN
WARD
MARTIN
BLAKE
TURNER
JAMES



To get all employees, it is awfully easy
([xml](gc emp.xml)).EMPTABLE.DEPT.EMPLIST.ENAME

CLARK
MILLER
KING
SMITH
FORD
ADAMS
SCOTT
JONES
ALLEN
WARD
MARTIN
BLAKE
TURNER
JAMES

Now I actually want to have each employee together with his department. I create an object for each department add the ename and the deptno

$d=([xml](gc emp.xml)).EMPTABLE.DEPT | % {
foreach ($i in $_.EMPLIST.ENAME) {
$o = New-Object Object
Add-Member -InputObject $o -MemberType NoteProperty -Name DEPTNO -Value $_.DEPTNO
Add-Member -InputObject $o -MemberType NoteProperty -Name ENAME -Value $i
$o
}
}
$d


DEPTNO ENAME
------ -----
10 CLARK
10 MILLER
10 KING
20 SMITH
20 FORD
20 ADAMS
20 SCOTT
20 JONES
30 ALLEN
30 WARD
30 MARTIN
30 BLAKE
30 TURNER
30 JAMES

This could be convert to multiple format.

HTML
$d|ConvertTo-HTML

DEPTNO ENAME
10 CLARK
10 MILLER
10 KING
20 SMITH
20 FORD
20 ADAMS
20 SCOTT
20 JONES
30 ALLEN
30 WARD
30 MARTIN
30 BLAKE
30 TURNER
30 JAMES

CSV
$d|ConvertTo-CSV

"DEPTNO","ENAME"
"10","CLARK"
"10","MILLER"
"10","KING"
"20","SMITH"
"20","FORD"
"20","ADAMS"
"20","SCOTT"
"20","JONES"
"30","ALLEN"
"30","WARD"
"30","MARTIN"
"30","BLAKE"
"30","TURNER"
"30","JAMES"

JSON
$d|ConvertTo-JSon

[
{
"DEPTNO": "10",
"ENAME": "CLARK"
},
{
"DEPTNO": "10",
"ENAME": "MILLER"
},
{
"DEPTNO": "10",
"ENAME": "KING"
},
{
"DEPTNO": "20",
"ENAME": "SMITH"
},
{
"DEPTNO": "20",
"ENAME": "FORD"
},
{
"DEPTNO": "20",
"ENAME": "ADAMS"
},
{
"DEPTNO": "20",
"ENAME": "SCOTT"
},
{
"DEPTNO": "20",
"ENAME": "JONES"
},
{
"DEPTNO": "30",
"ENAME": "ALLEN"
},
{
"DEPTNO": "30",
"ENAME": "WARD"
},
{
"DEPTNO": "30",
"ENAME": "MARTIN"
},
{
"DEPTNO": "30",
"ENAME": "BLAKE"
},
{
"DEPTNO": "30",
"ENAME": "TURNER"
},
{
"DEPTNO": "30",
"ENAME": "JAMES"
}
]

Or even to xml with ($d|ConvertTo-XML).OuterXml

It is so lightening fast that you could process pretty large files (millions of lines) in just a few seconds

Categories
sql sqlplus

CSV part 4, fast !!

I got some comments that my other csv solutions were slow to export gigabytes of data.

One more try.

thanks to the feedbacks, I provided a new version

This could generate very large files in just a few minutes (instead of hours).

I use bulk collect and utl_file to boost performance

CREATE TYPE collist IS TABLE OF VARCHAR2 (4000)
/

CREATE OR REPLACE PROCEDURE bulk_csv (directory_name VARCHAR2,
file_name VARCHAR2,
query VARCHAR2)
AUTHID CURRENT_USER
IS
-- $Id$
fh UTL_FILE.file_type;
stmt VARCHAR2 (32767) := NULL;
header VARCHAR2 (32767) := NULL;
curid NUMBER;
desctab DBMS_SQL.DESC_TAB;
colcnt NUMBER;
namevar VARCHAR2 (32767);

TYPE cola IS TABLE OF collist
INDEX BY BINARY_INTEGER;

res cola;
rcur SYS_REFCURSOR;
current_line VARCHAR2 (32767);
next_line VARCHAR2 (32767);

BEGIN
curid := DBMS_SQL.open_cursor;
DBMS_SQL.parse (curid, query, DBMS_SQL.NATIVE);
DBMS_SQL.DESCRIBE_COLUMNS (curid, colcnt, desctab);

FOR i IN 1 .. colcnt
LOOP
DBMS_SQL.DEFINE_COLUMN (curid,
i,
namevar,
32767);
END LOOP;

IF DBMS_SQL.execute (curid) = 0
THEN
FOR i IN 1 .. colcnt
LOOP
IF (i > 1)
THEN
header := header || ';';
stmt := stmt || ',';
END IF;

header := header || desctab (i).col_name;
stmt :=
stmt
|| CASE
WHEN desctab (i).col_type IN
(DBMS_SQL.Varchar2_Type,
DBMS_SQL.Char_Type)
THEN
'"'||desctab (i).col_name || '"'
WHEN desctab (i).col_type IN
(DBMS_SQL.Number_Type,
DBMS_SQL.Date_Type,
DBMS_SQL.Binary_Float_Type,
DBMS_SQL.Binary_Bouble_Type,
DBMS_SQL.Timestamp_Type,
DBMS_SQL.Timestamp_With_TZ_Type,
DBMS_SQL.Interval_Year_to_Month_Type,
DBMS_SQL.Interval_Day_To_Second_Type,
DBMS_SQL.Timestamp_With_Local_TZ_type)
THEN
'to_char("' || desctab (i).col_name || '")'
WHEN desctab (i).col_type = DBMS_SQL.Raw_Type
THEN
'rawtohex("' || desctab (i).col_name || '")'
WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
THEN
'''unsupport datatype : ROWID'''
WHEN desctab (i).col_type = DBMS_SQL.Long_Type
THEN
'''unsupport datatype : LONG'''
WHEN desctab (i).col_type = DBMS_SQL.Long_Raw_Type
THEN
'''unsupport datatype : LONG RAW'''
WHEN desctab (i).col_type = DBMS_SQL.User_Defined_Type
THEN
'''unsupport datatype : User Defined Type'''
WHEN desctab (i).col_type = DBMS_SQL.MLSLabel_Type
THEN
'''unsupport datatype : MLSLABEL'''
WHEN desctab (i).col_type = DBMS_SQL.Ref_Type
THEN
'''unsupport datatype : REF'''
WHEN desctab (i).col_type = DBMS_SQL.Clob_Type
THEN
'''unsupport datatype : CLOB'''
WHEN desctab (i).col_type = DBMS_SQL.Blob_Type
THEN
'''unsupport datatype : BLOB'''
WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
THEN
'''unsupport datatype : ROWID'''
WHEN desctab (i).col_type = DBMS_SQL.Bfile_Type
THEN
'''unsupport datatype : BFILE'''
WHEN desctab (i).col_type = DBMS_SQL.Urowid_Type
THEN
'''unsupport datatype : UROWID'''
ELSE
'''unsupport datatype : '||desctab (i).col_type||''''
END;
END LOOP;

stmt := 'select collist(' || stmt || ') from (' || query || ')';

fh :=
UTL_FILE.fopen (directory_name,
file_name,
'W',
32767);

begin
OPEN rcur FOR stmt;
exception
when others then
dbms_output.put_line(stmt);
raise;
end;
LOOP
FETCH rcur
BULK COLLECT INTO res
LIMIT 10000;

current_line := header;
next_line := NULL;

FOR f IN 1 .. res.COUNT
LOOP
FOR g IN 1 .. res (f).COUNT
LOOP
IF (g > 1)
THEN
next_line := next_line || ';';
END IF;

IF ( NVL(LENGTH (current_line),0)
+ NVL(LENGTH (next_line),0)
+ NVL(LENGTH (res (f) (g)),0)
+ 5 > 32767)
THEN
UTL_FILE.put_line (fh, current_line);
current_line := NULL;
END IF;

IF (NVL(LENGTH (next_line),0) + NVL(LENGTH (res (f) (g)),0) + 5 > 32767)
THEN
UTL_FILE.put_line (fh, next_line);
next_line := NULL;
END IF;

next_line := next_line || res (f) (g);
END LOOP;

current_line :=
CASE
WHEN current_line IS NOT NULL
THEN
current_line || CHR (10)
END
|| next_line;
next_line := NULL;
END LOOP;

UTL_FILE.put_line (fh, current_line);
EXIT WHEN rcur%NOTFOUND;
END LOOP;

CLOSE rcur;

UTL_FILE.fclose (fh);
END IF;

DBMS_SQL.CLOSE_CURSOR (curid);
END;
/

CREATE OR REPLACE DIRECTORY tmp AS '/tmp';

EXEC bulk_csv('TMP','emp.csv','SELECT * FROM EMP ORDER BY ENAME')


EMPNO;ENAME;JOB;MGR;HIREDATE;SAL;COMM;DEPTNO
7876;ADAMS;CLERK;7788;1987-05-23 00:00:00;1100;;20
7499;ALLEN;SALESMAN;7698;1981-02-20 00:00:00;1600;30;30
7698;BLAKE;MANAGER;7839;1981-05-01 00:00:00;2850;;30
7782;CLARK;MANAGER;7839;1981-06-09 00:00:00;2450;;10
7902;FORD;ANALYST;7566;1981-12-03 00:00:00;3000;;20
7900;JAMES;CLERK;7698;1981-12-03 00:00:00;950;;30
7566;JONES;MANAGER;7839;1981-04-02 00:00:00;2975;;20
7839;KING;PRESIDENT;;1981-11-17 00:00:00;5000;;10
7654;MARTIN;SALESMAN;7698;1981-09-28 00:00:00;1250;140;30
7934;MILLER;CLERK;7782;1982-01-23 00:00:00;1300;;10
7788;SCOTT;ANALYST;7566;1987-04-19 00:00:00;3000;;20
7369;SMITH;CLERK;7902;1980-12-17 00:00:00;800;;20
7844;TURNER;SALESMAN;7698;1981-09-08 00:00:00;1500;0;30
7521;WARD;SALESMAN;7698;1981-02-22 00:00:00;1250;50;30