472,805 Members | 3,586 Online

# querying the number of connected components

I have graph
x y
- -
1 2
2 3
3 1
4 5
and would like to query how many connected components it has (two in
the example -- {1,2,3} and {4,5}). Is it doable with "recursive with"?

Nov 12 '05 #1
9 1920
I don't know graph theory. So, there must be more sophiscated
algorithm.
Anyway, here is a trial code. It will be better to test with more data.

CREATE TABLE connected
(x SMALLINT NOT NULL
,y SMALLINT NOT NULL
)
;

INSERT INTO connected
VALUES
(1, 2)
,(2, 3)
,(3, 1)
,(4, 5)
;

------------------------- Commands Entered -------------------------
WITH
OrderedNumber (n) AS (
SELECT n1*10 + n2
FROM (VALUES 0,1,2,3,4,5,6,7,8,9) p1(n1)
, (VALUES 0,1,2,3,4,5,6,7,8,9) p2(n2)
WHERE n1*10 + n2 < 100
)
,
Recurse (seq, y, con) AS (
SELECT 0, y
, CAST(CASE
WHEN x < y THEN
'00,' || SUBSTR(DIGITS(x),4,2) || ',' ||
SUBSTR(DIGITS(y),4,2)
ELSE '00,' || SUBSTR(DIGITS(y),4,2) || ',' ||
SUBSTR(DIGITS(x),4,2)
END
AS VARCHAR(20)
)
FROM connected
UNION ALL
SELECT seq + 1
, new.y
, SUBSTR(con,1,ni-1) || ',' || SUBSTR(DIGITS(new.y),4,2) ||
SUBSTR(con,ni)
FROM Recurse old
, connected new
, TABLE
(SELECT MAX((n+1)*3)
FROM OrderedNumber odn
WHERE
LOCATE(SUBSTR(DIGITS(new.y),4,2),con) = 0
AND (CASE
WHEN n < (LENGTH(con)+1)/3 THEN
SMALLINT(SUBSTR(con,n*3+1,2))
ELSE 99
END) < new.y
) p(ni)
WHERE seq < 100
AND old.y = new.x
AND p.ni IS NOT NULL
)
,ContainedNode AS (
SELECT s
, con
, SUBSTR(con,n*3+1,2) node
, (LENGTH(c.con)+1)/3 max_n
FROM (SELECT con
, INT(ROWNUMBER() OVER()) s
FROM (SELECT DISTINCT SUBSTR(a.con,4)
FROM Recurse a
) b(con)
) c(con, s)
, TABLE
(SELECT n
FROM OrderedNumber d
WHERE d.n <= LENGTH(c.con)/3
) e
)
SELECT DISTINCT con AS Connected
FROM ContainedNode a
WHERE
NOT EXISTS
(SELECT *
FROM ContainedNode c
WHERE c.s <> a.s
AND c.node = a.node
AND c.max_n >= a.max_n
)
ORDER BY
con
;
--------------------------------------------------------------------

CONNECTED
--------------------
01,02,03
04,05

2 record(s) selected.

Nov 12 '05 #2
If I added following data, previous sample code produced incorrect
rsults.
INSERT INTO connected
VALUES
(6, 8)
,(7, 8)
,(8, 9)
,(8,10)
,(9,10)
;

This is a revised sample. I hope that no more error exist.
------------------------- Commands Entered -------------------------
WITH
OrderedNumber (n) AS (
SELECT n1*10 + n2
FROM (VALUES 0,1,2,3,4,5,6,7,8,9) p1(n1)
, (VALUES 0,1,2,3,4,5,6,7,8,9) p2(n2)
WHERE n1*10 + n2 < 100
)
,
Biconnected (x, y) AS (
SELECT *
FROM connected
UNION
SELECT y, x
FROM connected
)
,
Recurse (seq, y, nodes, link) AS (
SELECT 0, y
, CAST(CASE
WHEN x < y THEN
'00,' || SUBSTR(DIGITS(x),4,2) || ',' ||
SUBSTR(DIGITS(y),4,2)
ELSE '00,' || SUBSTR(DIGITS(y),4,2) || ',' ||
SUBSTR(DIGITS(x),4,2)
END
AS VARCHAR(20)
)
, CAST('0000,'||SUBSTR(DIGITS(x),4,2)||SUBSTR(DIGITS (y),4,2) AS
VARCHAR(100))
FROM Biconnected
UNION ALL
SELECT seq + 1
, new.y
, SUBSTR(nodes,1,(n+1)*3-1)
|| CASE
WHEN SMALLINT(SUBSTR(nodes,n*3+1,2)) = new.y THEN
''
ELSE ',' || SUBSTR(DIGITS(new.y),4,2)
END
|| SUBSTR(nodes,(n+1)*3)
SUBSTR(DIGITS(new.x),4,2)||SUBSTR(DIGITS(new.y),4, 2)
FROM Recurse old
, Biconnected new
, TABLE
(SELECT MAX(n)
FROM OrderedNumber odn
WHERE

AND
(CASE
WHEN n < (LENGTH(nodes)+1)/3 THEN
SMALLINT(SUBSTR(nodes,n*3+1,2))
ELSE 99
END) <= new.y
) p(n)
WHERE seq < 100
AND old.y = new.x
AND p.n IS NOT NULL
)
,
ContainedNode AS (
SELECT s
, nodes
, SUBSTR(nodes,n*3+1,2) node
, (LENGTH(c.nodes)+1)/3 max_n
FROM (SELECT nodes
, INT(ROWNUMBER() OVER()) s
FROM (SELECT DISTINCT SUBSTR(a.nodes,4)
FROM Recurse a
) b(nodes)
) c(nodes, s)
, TABLE
(SELECT n
FROM OrderedNumber d
WHERE d.n <= LENGTH(c.nodes)/3
) e
)
SELECT DISTINCT nodes AS Connected_Nodes
FROM ContainedNode a
WHERE
NOT EXISTS
(SELECT *
FROM ContainedNode c
WHERE c.s <> a.s
AND c.node = a.node
AND c.max_n >= a.max_n
)
ORDER BY
Connected_Nodes
;
-------------------------------------------------------------------------

CONNECTED_NODES
--------------------
01,02,03
04,05
06,07,08,09,10

3 record(s) selected.

Nov 12 '05 #3
Tonkuma wrote:
If I added following data, previous sample code produced incorrect
rsults.
INSERT INTO connected
VALUES
(6, 8)
,(7, 8)
,(8, 9)
,(8,10)
,(9,10)
;

This is a revised sample. I hope that no more error exist.
------------------------- Commands Entered -------------------------
WITH
OrderedNumber (n) AS (
SELECT n1*10 + n2
FROM (VALUES 0,1,2,3,4,5,6,7,8,9) p1(n1)
, (VALUES 0,1,2,3,4,5,6,7,8,9) p2(n2)
WHERE n1*10 + n2 < 100
)
,
Biconnected (x, y) AS (
SELECT *
FROM connected
UNION
SELECT y, x
FROM connected
)
,
Recurse (seq, y, nodes, link) AS (
SELECT 0, y
, CAST(CASE
WHEN x < y THEN
'00,' || SUBSTR(DIGITS(x),4,2) || ',' ||
SUBSTR(DIGITS(y),4,2)
ELSE '00,' || SUBSTR(DIGITS(y),4,2) || ',' ||
SUBSTR(DIGITS(x),4,2)
END
AS VARCHAR(20)
)
, CAST('0000,'||SUBSTR(DIGITS(x),4,2)||SUBSTR(DIGITS (y),4,2) AS
VARCHAR(100))
FROM Biconnected
UNION ALL
SELECT seq + 1
, new.y
, SUBSTR(nodes,1,(n+1)*3-1)
|| CASE
WHEN SMALLINT(SUBSTR(nodes,n*3+1,2)) = new.y THEN
''
ELSE ',' || SUBSTR(DIGITS(new.y),4,2)
END
|| SUBSTR(nodes,(n+1)*3)
SUBSTR(DIGITS(new.x),4,2)||SUBSTR(DIGITS(new.y),4, 2)
FROM Recurse old
, Biconnected new
, TABLE
(SELECT MAX(n)
FROM OrderedNumber odn
WHERE

AND
(CASE
WHEN n < (LENGTH(nodes)+1)/3 THEN
SMALLINT(SUBSTR(nodes,n*3+1,2))
ELSE 99
END) <= new.y
) p(n)
WHERE seq < 100
AND old.y = new.x
AND p.n IS NOT NULL
)
,
ContainedNode AS (
SELECT s
, nodes
, SUBSTR(nodes,n*3+1,2) node
, (LENGTH(c.nodes)+1)/3 max_n
FROM (SELECT nodes
, INT(ROWNUMBER() OVER()) s
FROM (SELECT DISTINCT SUBSTR(a.nodes,4)
FROM Recurse a
) b(nodes)
) c(nodes, s)
, TABLE
(SELECT n
FROM OrderedNumber d
WHERE d.n <= LENGTH(c.nodes)/3
) e
)
SELECT DISTINCT nodes AS Connected_Nodes
FROM ContainedNode a
WHERE
NOT EXISTS
(SELECT *
FROM ContainedNode c
WHERE c.s <> a.s
AND c.node = a.node
AND c.max_n >= a.max_n
)
ORDER BY
Connected_Nodes
;
-------------------------------------------------------------------------

CONNECTED_NODES
--------------------
01,02,03
04,05
06,07,08,09,10

3 record(s) selected.

Hmm. Too much formatting and string manipulations obfuscate your
solution. Here is some attemt (I have to translate from my RDBMS of
choice to db2):

with SymmetricInput as (
select x,y from input
union
select y,x from input
), TransitiveClosure as (
select x,y, '['||x||','||y||')' AS path from input
union all
select tc.x,i.y, path||'-'||'['||i.x||','||i.y||')'
from input i,TransitiveClosure tc
where tc.y=i.x
-- TO DO: the recursion has to cut off the cycles
-- (don't have Graemes DB2 cookbook printout at hand)
), MonotonicPaths as (
select x,y, '['||x||','||y||')' AS path from TransitiveClosure
where x<y
union all
select tc.x,i.y, path||'-'||'['||i.x||','||i.y||')'
from TransitiveClosure i,MonotonicPaths tc
where tc.y=i.x and i.x<i.y
-- Monotonic paths are acyclic
) select distinct path from MonotonicPaths
where x not in (
select x from MonotonicPaths oo where
length(oo.path)>length(o.path)
) and y not in (
select y from MonotonicPaths oo where
length(oo.path)>length(o.path)
);

Nov 12 '05 #4
You are right.
One of my poor design may be to define data type as number(SMALLINT).
So, I need many conversion from number to char.
Another issue would be that I used two near same information(nodes
column information is contained in link column).

Nov 12 '05 #5
rows that are not part of EXISTSing rows?

Assuming Y will always be more than X:

DECLARE GLOBAL TEMPORARY TABLE Connected (X SMALLINT, Y SMALLINT)
INSERT INTO SESSION.Connected VALUES (1, 2), (2, 3), (3, 1), (4, 5),
(6, 8), (7, 8), (8, 9), (8, 10), (9, 10)

SELECT \
COUNT(*) \
FROM \
SESSION.Connected Outer \
WHERE \
NOT EXISTS \
( \
SELECT \
* \
FROM \
SESSION.Connected Inner \
WHERE \
Inner.Y = Outer.X \
AND Inner.Y > Inner.X \
)
Or

SELECT \
COUNT(*) \
FROM \
( \
SELECT \
X \
FROM \
SESSION.Connected \
EXCEPT \
SELECT \
Y \
FROM \
SESSION.Connected \
WHERE \
Y > X
) A

B.

Nov 12 '05 #6
Try

(1,2)
(1,3)

Your first query returns 2. In general, it is very suspicious that you
can do graph queries with plain (non recursive) SQL.
Brian Tkatch wrote:
rows that are not part of EXISTSing rows?

Assuming Y will always be more than X:

DECLARE GLOBAL TEMPORARY TABLE Connected (X SMALLINT, Y SMALLINT)
INSERT INTO SESSION.Connected VALUES (1, 2), (2, 3), (3, 1), (4, 5),
(6, 8), (7, 8), (8, 9), (8, 10), (9, 10)

SELECT \
COUNT(*) \
FROM \
SESSION.Connected Outer \
WHERE \
NOT EXISTS \
( \
SELECT \
* \
FROM \
SESSION.Connected Inner \
WHERE \
Inner.Y = Outer.X \
AND Inner.Y > Inner.X \
)
Or

SELECT \
COUNT(*) \
FROM \
( \
SELECT \
X \
FROM \
SESSION.Connected \
EXCEPT \
SELECT \
Y \
FROM \
SESSION.Connected \
WHERE \
Y > X
) A

B.

Nov 12 '05 #7
Actually, i have no idea what a graph query is. I was just looking at
the lists for chains as you provided in the original message.

I guess i misunderstood.

B.

Nov 12 '05 #8
Brian Tkatch wrote:
Actually, i have no idea what a graph query is.

"Graph query" is a sloppy name for a "query on a graph structure". A
typical graph query is finding transitive closure. It is will known
that graph queries such as transitive closure can't be answered with
standard SQL means -- you have to use recursive SQL. I can't prove the
same for the number of connected components, though; this is why I was
"suspicious" rather than "sure".

Nov 12 '05 #9
Ah, ok, thanx.

I was just trying a different approach, that is, looking for breaks
rather than chains.

B.

Nov 12 '05 #10

This thread has been closed and replies have been disabled. Please start a new discussion.