Tag Archives: csv

CSV part 4, fast !!

I got some comments that my other csv solutions were slow to export gigabytes of data.

One more try.

thanks to the feedbacks, I provided a new version

This could generate very large files in just a few minutes (instead of hours).

I use bulk collect and utl_file to boost performance

CREATE TYPE collist IS TABLE OF VARCHAR2 (4000)
/

CREATE OR REPLACE PROCEDURE bulk_csv (directory_name    VARCHAR2,
                                      file_name         VARCHAR2,
                                      query        VARCHAR2)
   AUTHID CURRENT_USER
IS
   -- $Id$
   fh             UTL_FILE.file_type;
   stmt           VARCHAR2 (32767) := NULL;
   header         VARCHAR2 (32767) := NULL;
   curid          NUMBER;
   desctab        DBMS_SQL.DESC_TAB;
   colcnt         NUMBER;
   namevar        VARCHAR2 (32767);

   TYPE cola IS TABLE OF collist
                   INDEX BY BINARY_INTEGER;

   res            cola;
   rcur           SYS_REFCURSOR;
   current_line   VARCHAR2 (32767);
   next_line      VARCHAR2 (32767);

BEGIN
   curid := DBMS_SQL.open_cursor;
   DBMS_SQL.parse (curid, query, DBMS_SQL.NATIVE);
   DBMS_SQL.DESCRIBE_COLUMNS (curid, colcnt, desctab);

   FOR i IN 1 .. colcnt
   LOOP
      DBMS_SQL.DEFINE_COLUMN (curid,
                              i,
                              namevar,
                              32767);
   END LOOP;

   IF DBMS_SQL.execute (curid) = 0
   THEN
      FOR i IN 1 .. colcnt
      LOOP
         IF (i > 1)
         THEN
            header := header || ';';
            stmt := stmt || ',';
         END IF;

         header := header || desctab (i).col_name;
         stmt :=
               stmt
            || CASE
                  WHEN desctab (i).col_type IN
                          (DBMS_SQL.Varchar2_Type,
                           DBMS_SQL.Char_Type)
                  THEN
                     '"'||desctab (i).col_name || '"'
                  WHEN desctab (i).col_type IN
                          (DBMS_SQL.Number_Type,
                           DBMS_SQL.Date_Type,
                           DBMS_SQL.Binary_Float_Type,
                           DBMS_SQL.Binary_Bouble_Type,
                           DBMS_SQL.Timestamp_Type,
                           DBMS_SQL.Timestamp_With_TZ_Type,
                           DBMS_SQL.Interval_Year_to_Month_Type,
                           DBMS_SQL.Interval_Day_To_Second_Type,
                           DBMS_SQL.Timestamp_With_Local_TZ_type)
                  THEN
                     'to_char("' || desctab (i).col_name || '")'
                  WHEN desctab (i).col_type = DBMS_SQL.Raw_Type
                  THEN
                     'rawtohex("' || desctab (i).col_name || '")'
                  WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
                  THEN
                     '''unsupport datatype : ROWID'''
                  WHEN desctab (i).col_type = DBMS_SQL.Long_Type
                  THEN
                     '''unsupport datatype : LONG'''
                  WHEN desctab (i).col_type = DBMS_SQL.Long_Raw_Type
                  THEN
                     '''unsupport datatype : LONG RAW'''
                  WHEN desctab (i).col_type = DBMS_SQL.User_Defined_Type
                  THEN
                     '''unsupport datatype : User Defined Type'''
                  WHEN desctab (i).col_type = DBMS_SQL.MLSLabel_Type
                  THEN
                     '''unsupport datatype : MLSLABEL'''
                  WHEN desctab (i).col_type = DBMS_SQL.Ref_Type
                  THEN
                     '''unsupport datatype : REF'''
                  WHEN desctab (i).col_type = DBMS_SQL.Clob_Type
                  THEN
                     '''unsupport datatype : CLOB'''
                  WHEN desctab (i).col_type = DBMS_SQL.Blob_Type
                  THEN
                     '''unsupport datatype : BLOB'''
                  WHEN desctab (i).col_type = DBMS_SQL.Rowid_Type
                  THEN
                     '''unsupport datatype : ROWID'''
                  WHEN desctab (i).col_type = DBMS_SQL.Bfile_Type
                  THEN
                     '''unsupport datatype : BFILE'''
                  WHEN desctab (i).col_type = DBMS_SQL.Urowid_Type
                  THEN
                     '''unsupport datatype : UROWID'''
                  ELSE
                     '''unsupport datatype : '||desctab (i).col_type||''''
               END;
      END LOOP;

      stmt := 'select collist(' || stmt || ') from (' || query || ')';

      fh :=
         UTL_FILE.fopen (directory_name,
                         file_name,
                         'W',
                         32767);

      begin
            OPEN rcur FOR stmt;
      exception 
        when others then 
          dbms_output.put_line(stmt);
          raise;
      end;
      LOOP
         FETCH rcur
         BULK COLLECT INTO res
         LIMIT 10000;

         current_line := header;
         next_line := NULL;

         FOR f IN 1 .. res.COUNT
         LOOP
            FOR g IN 1 .. res (f).COUNT
            LOOP
               IF (g > 1)
               THEN
                  next_line := next_line || ';';
               END IF;

               IF (  NVL(LENGTH (current_line),0)
                   + NVL(LENGTH (next_line),0)
                   + NVL(LENGTH (res (f) (g)),0)
                   + 5 > 32767)
               THEN
                  UTL_FILE.put_line (fh, current_line);
                  current_line := NULL;
               END IF;

               IF (NVL(LENGTH (next_line),0) + NVL(LENGTH (res (f) (g)),0) + 5 > 32767)
               THEN
                  UTL_FILE.put_line (fh, next_line);
                  next_line := NULL;
               END IF;

               next_line := next_line || res (f) (g);
            END LOOP;

            current_line :=
                  CASE
                     WHEN current_line IS NOT NULL
                     THEN
                        current_line || CHR (10)
                  END
               || next_line;
            next_line := NULL;
         END LOOP;

         UTL_FILE.put_line (fh, current_line);
         EXIT WHEN rcur%NOTFOUND;
      END LOOP;

      CLOSE rcur;

      UTL_FILE.fclose (fh);
   END IF;

   DBMS_SQL.CLOSE_CURSOR (curid);
END;
/

CREATE OR REPLACE DIRECTORY tmp AS '/tmp';

EXEC bulk_csv('TMP','emp.csv','SELECT * FROM EMP ORDER BY ENAME')


EMPNO;ENAME;JOB;MGR;HIREDATE;SAL;COMM;DEPTNO
7876;ADAMS;CLERK;7788;1987-05-23 00:00:00;1100;;20
7499;ALLEN;SALESMAN;7698;1981-02-20 00:00:00;1600;30;30
7698;BLAKE;MANAGER;7839;1981-05-01 00:00:00;2850;;30
7782;CLARK;MANAGER;7839;1981-06-09 00:00:00;2450;;10
7902;FORD;ANALYST;7566;1981-12-03 00:00:00;3000;;20
7900;JAMES;CLERK;7698;1981-12-03 00:00:00;950;;30
7566;JONES;MANAGER;7839;1981-04-02 00:00:00;2975;;20
7839;KING;PRESIDENT;;1981-11-17 00:00:00;5000;;10
7654;MARTIN;SALESMAN;7698;1981-09-28 00:00:00;1250;140;30
7934;MILLER;CLERK;7782;1982-01-23 00:00:00;1300;;10
7788;SCOTT;ANALYST;7566;1987-04-19 00:00:00;3000;;20
7369;SMITH;CLERK;7902;1980-12-17 00:00:00;800;;20
7844;TURNER;SALESMAN;7698;1981-09-08 00:00:00;1500;0;30
7521;WARD;SALESMAN;7698;1981-02-22 00:00:00;1250;50;30