CSV part 4, fast !!

I got some comments that my other csv solutions were slow to export gigabytes of data.

One more try.

thanks to the feedbacks, I provided a new version

This could generate very large files in just a few minutes (instead of hours).

I use bulk collect and utl_file to boost performance

CREATE TYPE collist IS TABLE OF VARCHAR2 (4000)
/

CREATE OR REPLACE PROCEDURE bulk_csv (directory_name VARCHAR2,
file_name VARCHAR2,
query VARCHAR2)
AUTHID CURRENT_USER
IS
-- $Id$
fh UTL_FILE.file_type;
stmt VARCHAR2 (32767) := NULL;
header VARCHAR2 (32767) := NULL;
curid NUMBER;
desctab DBMS_SQL.DESC_TAB;
colcnt NUMBER;
namevar VARCHAR2 (32767);

TYPE cola IS TABLE OF collist
INDEX BY BINARY_INTEGER;

res cola;
rcur SYS_REFCURSOR;
current_line VARCHAR2 (32767);
next_line VARCHAR2 (32767);

BEGIN
curid := DBMS_SQL.open_cursor;
DBMS_SQL.parse (curid, query, DBMS_SQL.NATIVE);
DBMS_SQL.DESCRIBE_COLUMNS (curid, colcnt, desctab);

FOR i IN 1 .. colcnt
LOOP
DBMS_SQL.DEFINE_COLUMN (curid,
i,
namevar,
32767);
END LOOP;

IF DBMS_SQL.execute (curid) = 0
THEN
FOR i IN 1 .. colcnt
LOOP
IF (i > 1)
THEN
header := header || ';';
stmt := stmt || ',';
END IF;

header := header || desctab (i).col_name;
stmt :=
stmt
|| CASE
WHEN desctab (i).col_type IN
(DBMS_SQL.Varchar2_Type,
DBMS_SQL.Char_Type)
THEN
'"'||desctab (i).col_name || '"'
WHEN desctab (i).col_type IN
(DBMS_SQL.Number_Type,
DBMS_SQL.Date_Type,
DBMS_SQL.Binary_Float_Type,
DBMS_SQL.Binary_Bouble_Type,
DBMS_SQL.Timestamp_Type,
DBMS_SQL.Timestamp_With_TZ_Type,
DBMS_SQL.Interval_Year_to_Month_Type,
DBMS_SQL.Interval_Day_To_Second_Type,
DBMS_SQL.Timestamp_With_Local_TZ_type)
THEN
'to_char("' || desctab (i).col_name || '")'
WHEN desctab (i).col_type = DBMS_SQL.Raw_Type
THEN
'rawtohex("' || desctab (i).col_name || '")'
WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
THEN
'''unsupport datatype : ROWID'''
WHEN desctab (i).col_type = DBMS_SQL.Long_Type
THEN
'''unsupport datatype : LONG'''
WHEN desctab (i).col_type = DBMS_SQL.Long_Raw_Type
THEN
'''unsupport datatype : LONG RAW'''
WHEN desctab (i).col_type = DBMS_SQL.User_Defined_Type
THEN
'''unsupport datatype : User Defined Type'''
WHEN desctab (i).col_type = DBMS_SQL.MLSLabel_Type
THEN
'''unsupport datatype : MLSLABEL'''
WHEN desctab (i).col_type = DBMS_SQL.Ref_Type
THEN
'''unsupport datatype : REF'''
WHEN desctab (i).col_type = DBMS_SQL.Clob_Type
THEN
'''unsupport datatype : CLOB'''
WHEN desctab (i).col_type = DBMS_SQL.Blob_Type
THEN
'''unsupport datatype : BLOB'''
WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
THEN
'''unsupport datatype : ROWID'''
WHEN desctab (i).col_type = DBMS_SQL.Bfile_Type
THEN
'''unsupport datatype : BFILE'''
WHEN desctab (i).col_type = DBMS_SQL.Urowid_Type
THEN
'''unsupport datatype : UROWID'''
ELSE
'''unsupport datatype : '||desctab (i).col_type||''''
END;
END LOOP;

stmt := 'select collist(' || stmt || ') from (' || query || ')';

fh :=
UTL_FILE.fopen (directory_name,
file_name,
'W',
32767);

begin
OPEN rcur FOR stmt;
exception
when others then
dbms_output.put_line(stmt);
raise;
end;
LOOP
FETCH rcur
BULK COLLECT INTO res
LIMIT 10000;

current_line := header;
next_line := NULL;

FOR f IN 1 .. res.COUNT
LOOP
FOR g IN 1 .. res (f).COUNT
LOOP
IF (g > 1)
THEN
next_line := next_line || ';';
END IF;

IF ( NVL(LENGTH (current_line),0)
+ NVL(LENGTH (next_line),0)
+ NVL(LENGTH (res (f) (g)),0)
+ 5 > 32767)
THEN
UTL_FILE.put_line (fh, current_line);
current_line := NULL;
END IF;

IF (NVL(LENGTH (next_line),0) + NVL(LENGTH (res (f) (g)),0) + 5 > 32767)
THEN
UTL_FILE.put_line (fh, next_line);
next_line := NULL;
END IF;

next_line := next_line || res (f) (g);
END LOOP;

current_line :=
CASE
WHEN current_line IS NOT NULL
THEN
current_line || CHR (10)
END
|| next_line;
next_line := NULL;
END LOOP;

UTL_FILE.put_line (fh, current_line);
EXIT WHEN rcur%NOTFOUND;
END LOOP;

CLOSE rcur;

UTL_FILE.fclose (fh);
END IF;

DBMS_SQL.CLOSE_CURSOR (curid);
END;
/

CREATE OR REPLACE DIRECTORY tmp AS '/tmp';

EXEC bulk_csv('TMP','emp.csv','SELECT * FROM EMP ORDER BY ENAME')


EMPNO;ENAME;JOB;MGR;HIREDATE;SAL;COMM;DEPTNO
7876;ADAMS;CLERK;7788;1987-05-23 00:00:00;1100;;20
7499;ALLEN;SALESMAN;7698;1981-02-20 00:00:00;1600;30;30
7698;BLAKE;MANAGER;7839;1981-05-01 00:00:00;2850;;30
7782;CLARK;MANAGER;7839;1981-06-09 00:00:00;2450;;10
7902;FORD;ANALYST;7566;1981-12-03 00:00:00;3000;;20
7900;JAMES;CLERK;7698;1981-12-03 00:00:00;950;;30
7566;JONES;MANAGER;7839;1981-04-02 00:00:00;2975;;20
7839;KING;PRESIDENT;;1981-11-17 00:00:00;5000;;10
7654;MARTIN;SALESMAN;7698;1981-09-28 00:00:00;1250;140;30
7934;MILLER;CLERK;7782;1982-01-23 00:00:00;1300;;10
7788;SCOTT;ANALYST;7566;1987-04-19 00:00:00;3000;;20
7369;SMITH;CLERK;7902;1980-12-17 00:00:00;800;;20
7844;TURNER;SALESMAN;7698;1981-09-08 00:00:00;1500;0;30
7521;WARD;SALESMAN;7698;1981-02-22 00:00:00;1250;50;30

15 thoughts on “CSV part 4, fast !!

  1. Connor McDonald

    Recent versions may have improved upon this, but I recall utl_file.put_line was not particularly efficient, hence when outputting lots of rows, we would opt for:

    str vc2(32767)

    while
    str := str || '' || chr(10);
    if length(str) < 30000 then utl_file.put_line(str); end if; str := null; end loop;

    or thereabouts, thus limiting the number of calls to utl_file.put_line until you reached the limits of how many rows you could spit out to the file at once.

    Cheers
    Connor

  2. Connor McDonald

    whoops… I used html tags there is meant to the term &lt. data &gt. between the two concatenation operators

  3. Laurent Schneider Post author

    Thanks for the hint, but as my problem is rather that lines are too long (a few kb) than too short (a few bytes), I will stick to putline for each row

  4. Stefan Oravec

    some hints to make your nice code even better
    – use dbms_types packge, that defines type constants instead of literals
    – consider using sys_refcursor type/dbms_sql cursor parameter instead of table_name
    – it would be nice if you will be able to enclose strings in “

  5. Laurent Schneider Post author

    thanks all, using put_line to put multiple lines, dbms_types types and query/cursor instead of table

    @Saurabh : nope, no support for object type. Every object like xml may have its own “to_char” method, and I doubt you want to export XML in a csv file 😉

  6. Robin

    Great attempt.
    I tried using this with a very long query(may be 200 lines) but it fails.
    Is it that we can pass the query as a input sql file for large queries.

  7. Sanjeev Chauhan

    @Robin

    You can also use the array interface of dbms_sql.parse. You would declare two local variable, thus


    l_stmtarr dbms_sql.varchar2s;
    l_cntr integer := floor( length( query ) / 256 );

    Then before this line, DBMS_SQL.parse (curid, query, DBMS_SQL.NATIVE); do something like


    for i in 0 .. l_cntr
    loop
    l_stmtarr(i) := substr( query, i*256 + 1, 256 );
    end loop;

    Now your parse line becomes

    dbms_sql.parse ( curid, l_stmtarr, 0, l_cntr, false, dbms_sql.native );

  8. Dario

    Hi laurent,
    your code rocks but I’ve found a little bug:
    it writes the header every 10000 ( the bulk size)
    I’ve tried your example lowering the bulk threshold (less pga use 😉 ) this snippet resolve the problem
    kIndest Regards

    current_line := header;
    LOOP
    FETCH rcur
    BULK COLLECT INTO res
    LIMIT 1000;

    if current_line != header then
    current_line := null;
    end if;
    next_line := NULL;

Comments are closed.