Thursday, November 28, 2013

INSERT ALL

During a performance inspection of some E-Business Suite batch programs running on 11.2.0.3, I noticed almost 99% of the consistent reads missing in the tkprof file for a particular insert statement. It was a giant INSERT ALL statement where the accompanying select statement contained several subqueries in a "case when exists (query)" construct. And those subqueries in the select clause was what was missing from the tkprof file. I've seen this behaviour before in version 9 databases in regular SELECT statements. But since version 10 this anomaly was resolved.

This is my reproduction of the scenario on 11.2.0.3: a small table containing 100 rows, a larger table containing 100,000 rows, and two new tables where the rows are copied into.

SQL> create table small_table (id,name)
  2  as
  3   select level
  4        , 'Name ' || to_char(level)
  5     from dual
  6  connect by level <= 100
  7  /
 
Table created.
 
SQL> create table large_table (id,name)
  2  as
  3   select level
  4        , 'Name ' || to_char(level)
  5     from dual
  6  connect by level <= 100000
  7  /
 
Table created.
 
SQL> create table new_table_1
  2  ( id         number
  3  , name       varchar2(11)
  4  , average_id number
  5  )
  6  /
 
Table created.
 
SQL> create table new_table_2
  2  ( id         number
  3  , name       varchar2(11)
  4  , average_id number
  5  )
  6  /
 
Table created.
 
SQL> begin
  2    dbms_stats.gather_table_stats(user,'small_table');
  3    dbms_stats.gather_table_stats(user,'large_table');
  4    dbms_stats.gather_table_stats(user,'new_table_1');
  5    dbms_stats.gather_table_stats(user,'new_table_2');
  6  end;
  7  /
 
PL/SQL procedure successfully completed.

For each of the 100 rows of the small_table, I'm selecting the average ID of 1000 rows in the large_table, as a scalar subquery.
The execution plan shows 100 starts of the scalar subquery on large_table. And we can see that it took 3.24 seconds.

SQL> set serveroutput off
SQL> alter session set statistics_level = all
  2  /
 
Session altered.
 
SQL> select s.id
  2       , s.name
  3       , ( select avg(l.id)
  4             from large_table l
  5            where ceil(l.id / 1000) = s.id
  6         ) avg_id
  7    from small_table s
  8  /
 
        ID NAME                                              AVG_ID
---------- --------------------------------------------- ----------
         1 Name 1                                             500,5
         2 Name 2                                            1500,5
         3 Name 3                                            2500,5
...[95 lines removed]...
        99 Name 99                                          98500,5
       100 Name 100                                         99500,5
 
100 rows selected.
 
SQL> select * from table(dbms_xplan.display_cursor(null,null,'allstats last'))
  2  /
 
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------
SQL_ID  7167wkgunj23z, child number 0
-------------------------------------
select s.id      , s.name      , ( select avg(l.id)            from
large_table l           where ceil(l.id / 1000) = s.id        ) avg_id
from small_table s
 
Plan hash value: 1885542926
 
--------------------------------------------------------------------------------------------
| Id  | Operation          | Name        | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
--------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT   |             |      1 |        |    100 |00:00:00.01 |       9 |
|   1 |  SORT AGGREGATE    |             |    100 |      1 |    100 |00:00:03.28 |   58900 |
|*  2 |   TABLE ACCESS FULL| LARGE_TABLE |    100 |   1000 |    100K|00:00:03.24 |   58900 |
|   3 |  TABLE ACCESS FULL | SMALL_TABLE |      1 |    100 |    100 |00:00:00.01 |       9 |
--------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - filter(CEIL("L"."ID"/1000)=:B1)
 
 
22 rows selected.

A full table scan against small_table and for each row of small_table (100) a full table scan against large_table. Now, let's see what the plan looks like when the same query is wrapped up in an INSERT SELECT statement.

SQL> insert into new_table_1
  2  select s.id
  3       , s.name
  4       , ( select avg(l.id)
  5             from large_table l
  6            where ceil(l.id / 1000) = s.id
  7         ) avg_id
  8    from small_table s
  9  /
 
100 rows created.
 
SQL> select * from table(dbms_xplan.display_cursor(null,null,'allstats last'))
  2  /
 
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------
SQL_ID  dfg734y8ym7vk, child number 0
-------------------------------------
insert into new_table_1 select s.id      , s.name      , ( select
avg(l.id)            from large_table l           where ceil(l.id /
1000) = s.id        ) avg_id   from small_table s
 
Plan hash value: 1885542926
 
--------------------------------------------------------------------------------------------------
| Id  | Operation                | Name        | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
--------------------------------------------------------------------------------------------------
|   0 | INSERT STATEMENT         |             |      1 |        |      0 |00:00:03.25 |   58942 |
|   1 |  LOAD TABLE CONVENTIONAL |             |      1 |        |      0 |00:00:03.25 |   58942 |
|   2 |   SORT AGGREGATE         |             |    100 |      1 |    100 |00:00:03.25 |   58900 |
|*  3 |    TABLE ACCESS FULL     | LARGE_TABLE |    100 |   1000 |    100K|00:00:03.21 |   58900 |
|   4 |   TABLE ACCESS FULL      | SMALL_TABLE |      1 |    100 |    100 |00:00:00.01 |       2 |
--------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   3 - filter(CEIL("L"."ID"/1000)=:B1)
 
 
23 rows selected.

An additional LOAD TABLE CONVENTIONAL step, and the selection against large_table still visible. Next, I'm going to use the same query with an INSERT ALL statement.

SQL> rollback
  2  /
 
Rollback complete.
 
SQL> insert all
  2    when mod(id,2) = 0 then into new_table_1
  3    when mod(id,2) = 1 then into new_table_2
  4  select s.id
  5       , s.name
  6       , ( select avg(l.id)
  7             from large_table l
  8            where ceil(l.id / 1000) = s.id
  9         ) avg_id
10    from small_table s
11  /
 
100 rows created.
 
SQL> select * from table(dbms_xplan.display_cursor(null,null,'allstats last'))
  2  /
 
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------
SQL_ID  2m5kj5cvf1jx1, child number 0
-------------------------------------
insert all   when mod(id,2) = 0 then into new_table_1   when mod(id,2)
= 1 then into new_table_2 select s.id      , s.name      , ( select
avg(l.id)            from large_table l           where ceil(l.id /
1000) = s.id        ) avg_id   from small_table s
 
Plan hash value: 1492625026
 
---------------------------------------------------------------------------------------------
| Id  | Operation           | Name        | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------
|   0 | INSERT STATEMENT    |             |      1 |        |      0 |00:00:03.27 |   58947 |
|   1 |  MULTI-TABLE INSERT |             |      1 |        |      0 |00:00:03.27 |   58947 |
|   2 |   TABLE ACCESS FULL | SMALL_TABLE |      1 |    100 |    100 |00:00:00.01 |       2 |
|   3 |   INTO              | NEW_TABLE_1 |      0 |        |      0 |00:00:00.01 |       0 |
|   4 |   INTO              | NEW_TABLE_2 |      0 |        |      0 |00:00:00.01 |       0 |
---------------------------------------------------------------------------------------------
 
 
19 rows selected.

And look: the selections against large_table have disappeared. And it looks like the insertion is what takes 3.27 seconds. But the MULTI-TABLE INSERT step is just accumulating the time and buffers from the steps below, visible and invisible.

I repeated this test on 12.1.0.1. The optimizer in this version has improved, as can be seen by this plan for the exact same statement:

--------------------------------------------------------------------------------------------------------------------------
| Id  | Operation             | Name        | Starts | E-Rows | A-Rows |   A-Time   | Buffers |  OMem |  1Mem | Used-Mem |
--------------------------------------------------------------------------------------------------------------------------
|   0 | INSERT STATEMENT      |             |      1 |        |      0 |00:00:00.09 |     304 |       |       |          |
|   1 |  MULTI-TABLE INSERT   |             |      1 |        |      0 |00:00:00.09 |     304 |       |       |          |
|*  2 |   HASH JOIN OUTER     |             |      1 |    100K|    100 |00:00:00.09 |     294 |  1696K|  1696K| 1491K (0)|
|   3 |    TABLE ACCESS FULL  | SMALL_TABLE |      1 |    100 |    100 |00:00:00.01 |       3 |       |       |          |
|   4 |    VIEW               | VW_SSQ_1    |      1 |    100K|    100 |00:00:00.09 |     291 |       |       |          |
|   5 |     SORT GROUP BY     |             |      1 |    100K|    100 |00:00:00.09 |     291 | 73728 | 73728 |          |
|   6 |      TABLE ACCESS FULL| LARGE_TABLE |      1 |    100K|    100K|00:00:00.03 |     291 |       |       |          |
|   7 |   INTO                | NEW_TABLE_1 |      0 |        |      0 |00:00:00.01 |       0 |       |       |          |
|   8 |   INTO                | NEW_TABLE_2 |      0 |        |      0 |00:00:00.01 |       0 |       |       |          |
--------------------------------------------------------------------------------------------------------------------------

A much better plan: no more 100 starts of the query against large_table, but one nice HASH JOIN OUTER. However, the goal here is not to have the most efficient execution plan, but a plan equal to the 11.2.0.3 plan, that's why I added the optimizer hint NO_QUERY_TRANSFORMATION. And this is what the INSERT ALL looks like in 12.1.0.1:

SQL> insert all
  2    when mod(id,2) = 0 then into new_table_1
  3    when mod(id,2) = 1 then into new_table_2
  4  select /*+ no_query_transformation */ s.id
  5       , s.name
  6       , ( select avg(l.id)
  7             from large_table l
  8            where ceil(l.id / 1000) = s.id
  9         ) avg_id
 10    from small_table s
 11  /

100 rows created.

SQL> select * from table(dbms_xplan.display_cursor(null,null,'allstats last'))
  2  /

PLAN_TABLE_OUTPUT
---------------------------------------------------------------------------------------------------------------------------------------
SQL_ID  atb16ym82sygz, child number 0
-------------------------------------
insert all   when mod(id,2) = 0 then into new_table_1   when mod(id,2)
= 1 then into new_table_2 select /*+ no_query_transformation */ s.id
  , s.name      , ( select avg(l.id)            from large_table l
     where ceil(l.id / 1000) = s.id        ) avg_id   from small_table s

Plan hash value: 220198846

---------------------------------------------------------------------------------------------
| Id  | Operation           | Name        | Starts | E-Rows | A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------
|   0 | INSERT STATEMENT    |             |      1 |        |      0 |00:00:02.03 |   29113 |
|   1 |  MULTI-TABLE INSERT |             |      1 |        |      0 |00:00:02.03 |   29113 |
|   2 |   SORT AGGREGATE    |             |    100 |      1 |    100 |00:00:02.03 |   29100 |
|*  3 |    TABLE ACCESS FULL| LARGE_TABLE |    100 |   1000 |    100K|00:00:02.00 |   29100 |
|   4 |   VIEW              |             |      1 |    100 |    100 |00:00:02.03 |   29103 |
|   5 |    TABLE ACCESS FULL| SMALL_TABLE |      1 |    100 |    100 |00:00:00.01 |       3 |
|   6 |   INTO              | NEW_TABLE_1 |      0 |        |      0 |00:00:00.01 |       0 |
|   7 |   INTO              | NEW_TABLE_2 |      0 |        |      0 |00:00:00.01 |       0 |
---------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------
   3 - filter(CEIL("L"."ID"/1000)=:B1)

27 rows selected.

So in 12.1.0.1 the scalar subqueries against large_table are visible again. Problem solved, unless you're not on 12c yet.

Sunday, October 6, 2013

Distributing tables evenly into groups using the SQL Model Clause

My colleague Ronald Rood recently had a nice SQL challenge for me. He had to perform an export of all the tables in a schema the old fashioned way and wanted to manually parallellize the operation. For that to work, all tables need to be assigned to a group.

For the parallellization to work, the groups need to be balanced. If, say, all large tables are in one group, the parallellization won't help much, because the total time wouldn't be reduced much. So the question is how to distribute the tables evenly where all groups approximately have the same total size in number of bytes. Another requirement, which came later, was to also take into account the total number of tables, and distribute the number of tables evenly as well. The schema contained 90,000 tables from a standard software application, of which most tables were empty.

Ronald supplied a create table script for me to test with, simulating the content of his dba_tables dictionary view, which is the source for his query. The test table contains 14,999 records. Here is the table and a select statement that gives you an idea what's in the table.

SQL> desc mytables
 Name                                                   Null?    Type         
 ------------------------------------------------------ -------- --------------
 TABLE_NAME                                                      VARCHAR2(30)
 NUM_ROWS                                                        NUMBER
 AVG_ROW_LEN                                                     NUMBER

SQL> select rownum
  2       , nvl(num_rows,0) * nvl(avg_row_len,0) bytes
  3    from mytables
  4   order by bytes desc
  5  /

         ROWNUM           BYTES
--------------- ---------------
              1     29387074200
              2     28471135400
              3     23974896400
              4     18589737600
              5     17953177900
              6     17014479300
              7     10880526800
              8      8832810600
              9      8372641700
             10      7888944000
             11      7527559700
             12      6314082900
             13      5814484500
             14      5452809600
             15      5194260000
             16      5160845400
             17      4323377800
             18      4245004800
             19      4226310600
             20      4196381000
...

           2256               4
           2257               4
           2258               3
           2259               3
           2260               3
           2261               3
           2262               3
           2263               2
           2264               2
           2265               2
           2266               2
           2267               2
           2268               2
           2269               2
           2270               2
           2271               2
           2272               2
           2273               0
           2274               0
           2275               0
           2276               0
...

          14995               0
          14996               0
          14997               0
          14998               0
          14999               0

14999 rows selected.


So there are a few large tables, the largest one being approximately 29GB. And most of the tables, 12727, are empty.

The algorithm I chose to distribute the tables in N groups, is the simplest one I could think of, and goes schematically like this:
  • order all tables by size in descending order
  • place the N largest tables in groups 1 .. N
  • iterate over the tables with a non-empty size in descending order and add the table to the first group encountered whose size is below the running average size of the groups
  • iterate over the empty tables and add the table to the first group encountered whose total number of tables is below the average number of tables per group
This algorithm isn't perfect, but for a large number of tables, it's pretty good. And certainly good enough for this one-off task.

Below is the SQL statement used, with lines 59-64 being different, just to show the distribution better. The original statement would just contain a "select * from t" there.

SQL> var NUMBER_OF_GROUPS number

SQL> exec :NUMBER_OF_GROUPS := 10



PL/SQL procedure successfully completed.

 
SQL> with t as
  2  ( select table_name
  3         , orig_bytes
  4         , grp
  5      from ( select table_name
  6                  , nvl(num_rows,0) * nvl(avg_row_len,0) bytes
  7                  , count(*) over () cnt
  8               from mytables
  9           )
 10     model
 11           dimension by (row_number() over (order by bytes desc) i)
 12           measures
 13           ( table_name
 14           , bytes
 15           , bytes orig_bytes
 16           , row_number() over (order by bytes desc) grp
 17           , cnt
 18           , sum(bytes) over (order by bytes desc) running_sum
 19           , 1 aantal
 20           )
 21           rules iterate (100000) until (iteration_number+1 >= cnt[1] - :AANTAL_GROEPEN)
 22           ( grp[:AANTAL_GROEPEN+1+iteration_number]
 23             = case
 24               when bytes[:AANTAL_GROEPEN+1+iteration_number] > 0 then
 25                 case
 26                 when bytes[1] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 1
 27                 when bytes[2] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 2
 28                 when bytes[3] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 3
 29                 when bytes[4] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 4
 30                 when bytes[5] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 5
 31                 when bytes[6] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 6
 32                 when bytes[7] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 7
 33                 when bytes[8] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 8
 34                 when bytes[9] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 9
 35                 when bytes[10] <= running_sum[cv()-1] / :AANTAL_GROEPEN then 10
 36                 end
 37               else  -- lege tabellen
 38                 case
 39                 when aantal[1] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 1
 40                 when aantal[2] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 2
 41                 when aantal[3] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 3
 42                 when aantal[4] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 4
 43                 when aantal[5] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 5
 44                 when aantal[6] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 6
 45                 when aantal[7] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 7
 46                 when aantal[8] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 8
 47                 when aantal[9] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 9
 48                 when aantal[10] <= (:AANTAL_GROEPEN+1+iteration_number ) / :AANTAL_GROEPEN then 10
 49                 end
 50               end
 51           , bytes[grp[:AANTAL_GROEPEN+1+iteration_number]]
 52             = bytes[cv()] + bytes[:AANTAL_GROEPEN+1+iteration_number]
 53           , aantal[grp[:AANTAL_GROEPEN+1+iteration_number]]
 54             = aantal[cv()] + 1
 55           )
 56  )
 57  select grp
 58       , sum(orig_bytes)
 59       , count(*)
 60    from t
 61   group by grp
 62   order by grp
 63  /


           GRP SUM(ORIG_BYTES)        COUNT(*)

--------------- --------------- ---------------

              1     29387074200            1500

              2     28471135400            1500

              3     27978114708            1500

              4     27978114707            1500

              5     27978114707            1500

              6     27978114708            1500

              7     27978114706            1500

              8     27978114706            1500

              9     27978114707            1500

             10     26076134516            1499



10 rows selected.

A few explanatory remarks. I needed some extra measures to help with the calculation of the average size of the groups:
  • the running sum of bytes
 The analytic function "sum(bytes) over (order by bytes desc)" calculates this running sum. Divides by the :NUMBER_OF_GROUPS, this gives us the average size of the groups, needed in the algorithm.
  • the current size of all groups
The bytes "array" contains 14,999 elements, but the first NUMBER_OF_GROUPS (10) elements are used for the intermediate results of the group size.
  • the total number of elements
This one is calculated by a simple "count(*) over ()" and is needed in the UNTIL clause to stop the iteration when there are no more elements.

The whole statement is a tad ugly because the case expression has 10 repeating WHEN..THEN constructions, but it does the job and it allows you to play with the NUMBER_OF_GROUPS variable, which needs to be between 1 and 10. But, it is another nice use case for the SQL model clause, and it might come in handy for other DBA's or developers who need a single query to distribute their data evenly.

UPDATE:
Of course it has turned out the problem isn't new.
Here is a blogpost from Brendan Furey about the same subject.
And here is the AskTom thread he refers to with several model clause queries and one using recursive subquery factoring.

Wednesday, April 17, 2013

OGh APEX World 2013

Last Tuesday, the fourth OGh APEX World day took place, again at Figi Zeist. Several people have already written about this day:

Here is a short writeup about my experiences that day.

For me, this year was a little different. In the first three editions of the OGh APEX day, Learco Brizzi, Marti Koppelmans and myself "just" invite the speakers and put together the program. This year, I was also a speaker and Ciber had a demo booth which was my base location for that day.

No changes, fortunately, at the keynote. Each year, we are very grateful that the APEX product development team sends a member to our event to deliver a presentation, mostly about upcoming features. This year, David Peake, the product manager of APEX himself, delivered the keynote. His talk consisted of two parts. The first part was about how APEX will fit in the new 12c pluggable database architecture and the second part was about several 5.0 features that may or may not make it to the final product. For more details about this keynote, I'll not repeat Christian Rokitta, but simply refer you to his blogpost.

After the keynote, we had programmed a total of 15 sessions in 3 tracks. The first track were customer cases. A track designed to learn how other companies use APEX in their organizations. This year we had lots of interesting cases to choose from, of which unfortunately only 5 could be selected. The second track was the international track, where all sessions were in English, especially for our foreign guests, so they too can have a day full of interesting sessions. The third track was an allround technical track, in Dutch, with sessions for beginners as well as expert developers. It was my impression that during each of the five slots, the 300 participants were spread out pretty evenly across the three tracks.

The first session I chose was Carsten Czarski's "APEX Hosting In The Own Company: apex.mycompany.com". He had a very lively story about what you need to setup when hosting an APEX development environment in your own company. This is a great way to spread the adoption of APEX: if the development environment is in place, the applications will soon follow. Especially the importance and usage of the resource manager was well explained.

After the break, I presented my paper about professional software development using APEX. On the main stage, due to a pretty high number of preregistrations. This meant a fantastic theater-style room, but also meant two spotlights shining in my face for 50 minutes. Last time I did this talk, at UKOUG, I was not too happy with how it went. This time, I had made several improvements, focused much more on why each step is so important, which made the story much better, in my opinion. I had lots of in-depth conversations afterwards, which showed me the topic is indeed an important one where people are interested in. At the beginning of the session I also did a little poll to see whether the people in the audience just put the large APEX export file under version control, or the individual components, and whether they have self-contained development environments. The results were only a few raised hands in these cases. No hands at all for one-step builds and continuous integration. So there's still a lot to win almost everywhere :-)!

The third session was the customer case "CB Goes Responsive". CB Logistics is the client I'm currently working for and I've participated at the project that was presented here, so obviously I liked to see this one. Chris Amelink and Mark Rooijakkers from CB and Stijn van Raes from iAdvise explained how we created a responsive web application with APEX and Twitter Bootstrap. The business case was a wish of the publishers of e-books to be able to see the number of sales, one day after the sales were made. It's very interesting for them to see if any promotional actions have a measurable effect and this application does that.

My fourth session was Roel Hartman's session "My First Mobi" where he showed how to start with mobile development. Not every demo went as smooth as Roel would have wanted, but he still managed to get his points across. And in case you missed it and want to know more about building mobile web applications with APEX, you can find his (and Christian's and David's) book here.

The last session of the day for me was Christian Rokitta's session about PhoneGap. This session was on top my list as I had only heard from PhoneGap and was very curious to see how it works. With PhoneGap you can build hybrid apps, using HTML, CSS and Javascript and still use the abilities of a native app. A nice demonstration was when he took a photo from the audience with his tablet and uploaded it into the database.

Again I enjoyed a day full of APEX content and speaking with everyone. I'm already looking forward to a fifth edition.

Wednesday, March 20, 2013

Paper "Professional Software Development using APEX"

As announced on Twitter yesterday, my paper titled "Professional Software Development Using Oracle Application Express" has been put online. I'm copying the first two paragraphs of the summary here, so you can decide if you want to read the rest as well:

Software development involves much more than just producing lines of code. It is also about version control, deployment to other environments, integrating code and unit testing. It is appropriate to the profession of software development to have a framework in place that handles all of this, in order for the developers to focus on the creative and fun part of their job: producing excellent and well-designed code. Without such a framework, you need to be cautious and deployments become more difficult and more error-prone. Which means more time and money needlessly spent and a development process which is less fun than it should be. Fortunately, these problems are well known and solutions are already widely adapted. However, in database application development in general and in APEX development specific, these practices are not so common, unfortunately. It is our belief that database and APEX development should not be treated different and deserve a similar framework. So that’s what we set out to do.

This paper describes how we develop new APEX applications in such a way that most of the effort does gets spent on actually developing the code. If you can take advantage of version control, if you can build and deploy your entire application in one step, can make a daily build to continuously integrate all developed code, and can make sure your developers have their own self-contained development environment, then the benefits are many. You’ll experience less errors, higher productivity and seamless software delivery from your team. Refactoring code becomes painless since you can easily be assured the changes won’t break the application. Overall quality goes up, ensuring a much longer lifetime of the application.


You can download the PDF here.

Tuesday, January 29, 2013

Dummy output parameters

Yesterday I encountered a code snippet which taught me something I did not think was possible. But it is, as I'll show in this blogpost. It's not spectacular in any way, just convenient at most.

When you need a function or procedure to retrieve some value, you'll start examining the existing code base if a function already exists that does the job. Probably you won't find an exact match, but there is this procedure with retrieves a lot of values including the value you're interested in. Sounds familiar? You now have two choices depending on whether you want to minimize code or execution time.

If you want to minimize execution time, you don't want to reuse this procedure as it does more work than you need, and you'll end up writing your own specific function.

If you want to minimize code, you'll end up reusing the procedure and ignore all output parameters you're not interested in. It's this last variant I will be talking about in the rest of this post. Here is an example of the api procedure I want to reuse:

SQL> create or replace procedure api
  2  ( p1 out number
  3  , p2 out number
  4  , p3 out number
  5  )
  6  as
  7  begin
  8    p1 := 1;
  9    p2 := 2;
 10    p3 := 3;
 11  end api;
 12  /

Procedure created.

And this is how I would reuse this code:

SQL> declare
  2    l_number_i_am_interested_in number;
  3    l_p2_dummy                  number;
  4    l_p3_dummy                  number;
  5  begin
  6    api
  7    ( p1 => l_number_i_am_interested_in
  8    , p2 => l_p2_dummy
  9    , p3 => l_p3_dummy
 10    );
 11  end;
 12  /

PL/SQL procedure successfully completed.

For each output parameter I would declare a variable with the suffix _dummy to make my intent clear that I'm not going to really use these variables. The code snippet I saw, was of this form:

SQL> declare
  2    l_number_i_am_interested_in number;
  3    l_dummy                     number;
  4  begin
  5    api
  6    ( p1 => l_number_i_am_interested_in
  7    , p2 => l_dummy
  8    , p3 => l_dummy
  9    );
 10  end;
 11  /

PL/SQL procedure successfully completed.

Just one variable for the two dummy output parameters. I thought this could not work, but clearly it does. What value is assigned to l_dummy? The actual parameters l_dummy and l_dummy get a value assigned equal to the final values of the formal parameters p2 and p3. So l_dummy will get value 2 and l_dummy will get value 3. The order of assigning these values will now determine the real value of l_dummy. Fortunately, in this situation, it doesn't matter which value is assigned to the l_dummy variable, since we're not interested in it. But of course I'm a bit curious, so:

SQL> declare
  2    l_number_i_am_interested_in number;
  3    l_dummy                     number;
  4  begin
  5    api
  6    ( p1 => l_number_i_am_interested_in
  7    , p2 => l_dummy
  8    , p3 => l_dummy
  9    );
 10    dbms_output.put_line(l_dummy);
 11  end;
 12  /
3

PL/SQL procedure successfully completed.

It appears to be the last value in this case. So after the invocation of api ends, p1 gets the value 1 assigned, then l_dummy will get the value 2 assigned, and then -overwriting the previous step- l_dummy will get the value 3 assigned. This is confirmed when extending the example with a lot more dummy output parameters:

SQL> create or replace procedure api
  2  ( p1 out number
  3  , p2 out number
  4  , p3 out number
  5  , p4 out number
  6  , p5 out number
  7  , p6 out number
  8  , p7 out number
  9  , p8 out number
 10  , p9 out number
 11  )
 12  as
 13  begin
 14    p1 := 1;
 15    p2 := 2;
 16    p3 := 3;
 17    p4 := 4;
 18    p5 := 5;
 19    p6 := 6;
 20    p7 := 7;
 21    p8 := 8;
 22    p9 := 9;
 23  end api;
 24  /

Procedure created.

SQL> declare
  2    l_number_i_am_interested_in number;
  3    l_dummy                     number;
  4  begin
  5    api
  6    ( p1 => l_number_i_am_interested_in
  7    , p2 => l_dummy
  8    , p3 => l_dummy
  9    , p4 => l_dummy
 10    , p5 => l_dummy
 11    , p6 => l_dummy
 12    , p7 => l_dummy
 13    , p8 => l_dummy
 14    , p9 => l_dummy
 15    );
 16    dbms_output.put_line(l_dummy);
 17  end;
 18  /
9

PL/SQL procedure successfully completed.

Less variables declared makes the code cleaner in my opinion, so I like this newly learned construct.


UPDATE

And in case you mix the order of the actual parameters, it's still the last formal parameter that gets the value:

SQL> declare
  2    l_number_i_am_interested_in number;
  3    l_dummy                     number;
  4  begin
  5    api
  6    ( p1 => l_number_i_am_interested_in
  7    , p2 => l_dummy
  8    , p3 => l_dummy
  9    , p7 => l_dummy
 10    , p8 => l_dummy
 11    , p9 => l_dummy
 12    , p4 => l_dummy
 13    , p5 => l_dummy
 14    , p6 => l_dummy
 15    );
 16    dbms_output.put_line(l_dummy);
 17  end;
 18  /
9

PL/SQL procedure successfully completed.