Categories
sql sqlplus

CSV part 4, fast !!

I got some comments that my other csv solutions were slow to export gigabytes of data.

One more try.

thanks to the feedbacks, I provided a new version

This could generate very large files in just a few minutes (instead of hours).

I use bulk collect and utl_file to boost performance

CREATE TYPE collist IS TABLE OF VARCHAR2 (4000)
/

CREATE OR REPLACE PROCEDURE bulk_csv (directory_name VARCHAR2,
file_name VARCHAR2,
query VARCHAR2)
AUTHID CURRENT_USER
IS
-- $Id$
fh UTL_FILE.file_type;
stmt VARCHAR2 (32767) := NULL;
header VARCHAR2 (32767) := NULL;
curid NUMBER;
desctab DBMS_SQL.DESC_TAB;
colcnt NUMBER;
namevar VARCHAR2 (32767);

TYPE cola IS TABLE OF collist
INDEX BY BINARY_INTEGER;

res cola;
rcur SYS_REFCURSOR;
current_line VARCHAR2 (32767);
next_line VARCHAR2 (32767);

BEGIN
curid := DBMS_SQL.open_cursor;
DBMS_SQL.parse (curid, query, DBMS_SQL.NATIVE);
DBMS_SQL.DESCRIBE_COLUMNS (curid, colcnt, desctab);

FOR i IN 1 .. colcnt
LOOP
DBMS_SQL.DEFINE_COLUMN (curid,
i,
namevar,
32767);
END LOOP;

IF DBMS_SQL.execute (curid) = 0
THEN
FOR i IN 1 .. colcnt
LOOP
IF (i > 1)
THEN
header := header || ';';
stmt := stmt || ',';
END IF;

header := header || desctab (i).col_name;
stmt :=
stmt
|| CASE
WHEN desctab (i).col_type IN
(DBMS_SQL.Varchar2_Type,
DBMS_SQL.Char_Type)
THEN
'"'||desctab (i).col_name || '"'
WHEN desctab (i).col_type IN
(DBMS_SQL.Number_Type,
DBMS_SQL.Date_Type,
DBMS_SQL.Binary_Float_Type,
DBMS_SQL.Binary_Bouble_Type,
DBMS_SQL.Timestamp_Type,
DBMS_SQL.Timestamp_With_TZ_Type,
DBMS_SQL.Interval_Year_to_Month_Type,
DBMS_SQL.Interval_Day_To_Second_Type,
DBMS_SQL.Timestamp_With_Local_TZ_type)
THEN
'to_char("' || desctab (i).col_name || '")'
WHEN desctab (i).col_type = DBMS_SQL.Raw_Type
THEN
'rawtohex("' || desctab (i).col_name || '")'
WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
THEN
'''unsupport datatype : ROWID'''
WHEN desctab (i).col_type = DBMS_SQL.Long_Type
THEN
'''unsupport datatype : LONG'''
WHEN desctab (i).col_type = DBMS_SQL.Long_Raw_Type
THEN
'''unsupport datatype : LONG RAW'''
WHEN desctab (i).col_type = DBMS_SQL.User_Defined_Type
THEN
'''unsupport datatype : User Defined Type'''
WHEN desctab (i).col_type = DBMS_SQL.MLSLabel_Type
THEN
'''unsupport datatype : MLSLABEL'''
WHEN desctab (i).col_type = DBMS_SQL.Ref_Type
THEN
'''unsupport datatype : REF'''
WHEN desctab (i).col_type = DBMS_SQL.Clob_Type
THEN
'''unsupport datatype : CLOB'''
WHEN desctab (i).col_type = DBMS_SQL.Blob_Type
THEN
'''unsupport datatype : BLOB'''
WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
THEN
'''unsupport datatype : ROWID'''
WHEN desctab (i).col_type = DBMS_SQL.Bfile_Type
THEN
'''unsupport datatype : BFILE'''
WHEN desctab (i).col_type = DBMS_SQL.Urowid_Type
THEN
'''unsupport datatype : UROWID'''
ELSE
'''unsupport datatype : '||desctab (i).col_type||''''
END;
END LOOP;

stmt := 'select collist(' || stmt || ') from (' || query || ')';

fh :=
UTL_FILE.fopen (directory_name,
file_name,
'W',
32767);

begin
OPEN rcur FOR stmt;
exception
when others then
dbms_output.put_line(stmt);
raise;
end;
LOOP
FETCH rcur
BULK COLLECT INTO res
LIMIT 10000;

current_line := header;
next_line := NULL;

FOR f IN 1 .. res.COUNT
LOOP
FOR g IN 1 .. res (f).COUNT
LOOP
IF (g > 1)
THEN
next_line := next_line || ';';
END IF;

IF ( NVL(LENGTH (current_line),0)
+ NVL(LENGTH (next_line),0)
+ NVL(LENGTH (res (f) (g)),0)
+ 5 > 32767)
THEN
UTL_FILE.put_line (fh, current_line);
current_line := NULL;
END IF;

IF (NVL(LENGTH (next_line),0) + NVL(LENGTH (res (f) (g)),0) + 5 > 32767)
THEN
UTL_FILE.put_line (fh, next_line);
next_line := NULL;
END IF;

next_line := next_line || res (f) (g);
END LOOP;

current_line :=
CASE
WHEN current_line IS NOT NULL
THEN
current_line || CHR (10)
END
|| next_line;
next_line := NULL;
END LOOP;

UTL_FILE.put_line (fh, current_line);
EXIT WHEN rcur%NOTFOUND;
END LOOP;

CLOSE rcur;

UTL_FILE.fclose (fh);
END IF;

DBMS_SQL.CLOSE_CURSOR (curid);
END;
/

CREATE OR REPLACE DIRECTORY tmp AS '/tmp';

EXEC bulk_csv('TMP','emp.csv','SELECT * FROM EMP ORDER BY ENAME')


EMPNO;ENAME;JOB;MGR;HIREDATE;SAL;COMM;DEPTNO
7876;ADAMS;CLERK;7788;1987-05-23 00:00:00;1100;;20
7499;ALLEN;SALESMAN;7698;1981-02-20 00:00:00;1600;30;30
7698;BLAKE;MANAGER;7839;1981-05-01 00:00:00;2850;;30
7782;CLARK;MANAGER;7839;1981-06-09 00:00:00;2450;;10
7902;FORD;ANALYST;7566;1981-12-03 00:00:00;3000;;20
7900;JAMES;CLERK;7698;1981-12-03 00:00:00;950;;30
7566;JONES;MANAGER;7839;1981-04-02 00:00:00;2975;;20
7839;KING;PRESIDENT;;1981-11-17 00:00:00;5000;;10
7654;MARTIN;SALESMAN;7698;1981-09-28 00:00:00;1250;140;30
7934;MILLER;CLERK;7782;1982-01-23 00:00:00;1300;;10
7788;SCOTT;ANALYST;7566;1987-04-19 00:00:00;3000;;20
7369;SMITH;CLERK;7902;1980-12-17 00:00:00;800;;20
7844;TURNER;SALESMAN;7698;1981-09-08 00:00:00;1500;0;30
7521;WARD;SALESMAN;7698;1981-02-22 00:00:00;1250;50;30

By Laurent Schneider

Oracle Certified Master

15 replies on “CSV part 4, fast !!”

Recent versions may have improved upon this, but I recall utl_file.put_line was not particularly efficient, hence when outputting lots of rows, we would opt for:

str vc2(32767)

while
str := str || '' || chr(10);
if length(str) < 30000 then utl_file.put_line(str); end if; str := null; end loop;

or thereabouts, thus limiting the number of calls to utl_file.put_line until you reached the limits of how many rows you could spit out to the file at once.

Cheers
Connor

whoops… I used html tags there is meant to the term &lt. data &gt. between the two concatenation operators

some hints to make your nice code even better
– use dbms_types packge, that defines type constants instead of literals
– consider using sys_refcursor type/dbms_sql cursor parameter instead of table_name
– it would be nice if you will be able to enclose strings in “

thanks all, using put_line to put multiple lines, dbms_types types and query/cursor instead of table

@Saurabh : nope, no support for object type. Every object like xml may have its own “to_char” method, and I doubt you want to export XML in a csv file 😉

Great attempt.
I tried using this with a very long query(may be 200 lines) but it fails.
Is it that we can pass the query as a input sql file for large queries.

@Robin

You can also use the array interface of dbms_sql.parse. You would declare two local variable, thus


l_stmtarr dbms_sql.varchar2s;
l_cntr integer := floor( length( query ) / 256 );

Then before this line, DBMS_SQL.parse (curid, query, DBMS_SQL.NATIVE); do something like


for i in 0 .. l_cntr
loop
l_stmtarr(i) := substr( query, i*256 + 1, 256 );
end loop;

Now your parse line becomes

dbms_sql.parse ( curid, l_stmtarr, 0, l_cntr, false, dbms_sql.native );

Hi laurent,
your code rocks but I’ve found a little bug:
it writes the header every 10000 ( the bulk size)
I’ve tried your example lowering the bulk threshold (less pga use 😉 ) this snippet resolve the problem
kIndest Regards

current_line := header;
LOOP
FETCH rcur
BULK COLLECT INTO res
LIMIT 1000;

if current_line != header then
current_line := null;
end if;
next_line := NULL;

Leave a Reply

Your email address will not be published.