should be it

This commit is contained in:
2025-10-24 19:21:19 -05:00
parent a4b23fc57c
commit f09560c7b1
14047 changed files with 3161551 additions and 1 deletions

View File

@@ -0,0 +1 @@
sf0.1

View File

@@ -0,0 +1,9 @@
DuckDB implementation of the queries from the [LDBC Social Network Benchmark](https://arxiv.org/abs/2001.02299).
Download the data, initialize the schema, and load the data.
```bash
python download-benchmark-data.py
cat schema.sql | duckdb ldbc.duckdb
sed "s|PATHVAR|`pwd`/sf0.1|" snb-load.sql | duckdb ldbc.duckdb
```

View File

@@ -0,0 +1,12 @@
import urllib.request
import tarfile
import tempfile
import os
dirname = os.path.dirname(os.path.realpath(__file__))
tf = tempfile.NamedTemporaryFile()
urllib.request.urlretrieve(
'https://github.com/duckdb/duckdb-data/releases/download/v1.0/ldbc-snb-sf0.1.tar.gz', tf.name
)
tarfile.open(tf.name).extractall(dirname)

View File

@@ -0,0 +1,36 @@
/* Q1. Posting summary
\set date '\'2011-07-21T22:00:00\''::timestamp
*/
WITH
message_count AS (
SELECT 0.0 + count(*) AS cnt
FROM message
WHERE 1=1
AND m_creationdate < '2011-07-21T22:00:00'
)
, message_prep AS (
SELECT extract(year from m_creationdate) AS messageYear
, m_c_replyof IS NOT NULL AS isComment
, CASE
WHEN m_length < 40 THEN 0 -- short
WHEN m_length < 80 THEN 1 -- one liner
WHEN m_length < 160 THEN 2 -- tweet
ELSE 3 -- long
END AS lengthCategory
, m_length
FROM message
WHERE 1=1
AND m_creationdate < '2011-07-21T22:00:00'
--AND m_content IS NOT NULL
AND m_ps_imagefile IS NULL -- FIXME CHECKME: posts w/ m_ps_imagefile IS NOT NULL should have m_content IS NULL
)
SELECT messageYear, isComment, lengthCategory
, count(*) AS messageCount
, avg(m_length) AS averageMessageLength
, sum(m_length) AS sumMessageLength
, count(*) / mc.cnt AS percentageOfMessages
FROM message_prep
, message_count mc
GROUP BY messageYear, isComment, lengthCategory, mc.cnt
ORDER BY messageYear DESC, isComment ASC, lengthCategory ASC
;

View File

@@ -0,0 +1,77 @@
/* Q10. Experts in social circle using shortest path semantics between startPerson and friends
\set personId 19791209310731
\set country '\'Pakistan\''
\set tagClass '\'MusicalArtist\''
\set minPathDistance 3
\set maxPathDistance 5
*/
WITH RECURSIVE friends(startPerson, hopCount, friend) AS (
SELECT p_personid, 0, p_personid
FROM person
WHERE 1=1
AND p_personid = 19791209310731
UNION
SELECT f.startPerson
, f.hopCount+1
, CASE WHEN f.friend = k.k_person1id then k.k_person2id ELSE k.k_person1id END
FROM friends f
, knows k
WHERE 1=1
-- join
AND f.friend = k.k_person1id -- note, that knows table have both (p1, p2) and (p2, p1)
-- filter
-- stop condition
AND f.hopCount < 5
)
, friends_shortest AS (
-- if a friend is reachable from startPerson using hopCount 2, 3 and 4, its distance from startPerson is 2
SELECT startPerson, min(hopCount) AS hopCount, friend
FROM friends
GROUP BY startPerson, friend
)
, friend_list AS (
SELECT DISTINCT f.friend AS friendid
FROM friends_shortest f
, person tf -- the friend's preson record
, place ci -- city
, place co -- country
WHERE 1=1
-- join
AND f.friend = tf.p_personid
AND tf.p_placeid = ci.pl_placeid
AND ci.pl_containerplaceid = co.pl_placeid
-- filter
AND f.hopCount BETWEEN 3 AND 5
AND co.pl_name = 'Pakistan'
)
, messages_of_tagclass_by_friends AS (
SELECT DISTINCT f.friendid
, m.m_messageid AS messageid
FROM friend_list f
, message m
, message_tag pt
, tag t
, tagclass tc
WHERE 1=1
-- join
AND f.friendid = m.m_creatorid
AND m.m_messageid = pt.mt_messageid
AND pt.mt_tagid = t.t_tagid
AND t.t_tagclassid = tc.tc_tagclassid
-- filter
AND tc.tc_name = 'MusicalArtist'
)
SELECT m.friendid AS "person.id"
, t.t_name AS "tag.name"
, count(*) AS messageCount
FROM messages_of_tagclass_by_friends m
, message_tag pt
, tag t
WHERE 1=1
-- join
AND m.messageid = pt.mt_messageid
AND pt.mt_tagid = t.t_tagid
GROUP BY m.friendid, t.t_name
ORDER BY messageCount DESC, t.t_name, m.friendid
LIMIT 100
;

View File

@@ -0,0 +1,34 @@
/* Q11. Friend triangles
\set country '\'Belarus\''
*/
pragma enable_profiling;
WITH persons_of_country_w_friends AS (
SELECT p.p_personid AS personid
, k.k_person2id as friendid
FROM person p
, place ci -- city
, place co -- country
, knows k
WHERE 1=1
-- join
AND p.p_placeid = ci.pl_placeid
AND ci.pl_containerplaceid = co.pl_placeid
AND p.p_personid = k.k_person1id
-- filter
AND co.pl_name = 'Belarus'
)
SELECT count(*)
FROM persons_of_country_w_friends p1
, persons_of_country_w_friends p2
, persons_of_country_w_friends p3
WHERE 1=1
-- join
AND p1.friendid = p2.personid
AND p2.friendid = p3.personid
AND p3.friendid = p1.personid
-- filter: unique trinagles only
AND p1.personid < p2.personid
AND p2.personid < p3.personid
;

View File

@@ -0,0 +1,34 @@
/* Q13. Zombies in a country
\set country '\'Belarus\''
\set endDate '\'2013-01-01T00:00:00.000+00:00\''::timestamp
*/
WITH zombies AS (
SELECT p.p_personid AS zombieid
FROM place co -- country
, place ci -- city
, person p
LEFT JOIN message m ON (p.p_personid = m.m_creatorid AND m.m_creationdate BETWEEN p.p_creationdate AND '2013-01-01T00:00:00')
WHERE 1=1
-- join
AND co.pl_placeid = ci.pl_containerplaceid
AND ci.pl_placeid = p.p_placeid
-- filter
AND co.pl_name = 'Belarus'
AND p.p_creationdate < '2013-01-01T00:00:00'
GROUP BY p.p_personid, p.p_creationdate
-- average of [0, 1) messages per month is equivalent with having less messages than the month span between person creationDate and parameter :endDate
HAVING count(m_messageid) < 12*extract(YEAR FROM '2013-01-01T00:00:00'::date)+extract(MONTH FROM '2013-01-01T00:00:00'::date) - (12*extract(YEAR FROM p.p_creationdate) + extract(MONTH FROM p.p_creationdate)) + 1
)
SELECT z.zombieid AS "zombie.id"
, count(zl.zombieid) AS zombieLikeCount
, count(l.l_personid) AS totalLikeCount
, CASE WHEN count(l.l_personid) = 0 THEN 0 ELSE count(zl.zombieid)::float/count(l.l_personid) END AS zombieScore
FROM message m
INNER JOIN likes l ON (m.m_messageid = l.l_messageid)
INNER JOIN person p ON (l.l_personid = p.p_personid AND p.p_creationdate < '2013-01-01T00:00:00')
LEFT JOIN zombies zl ON (p.p_personid = zl.zombieid) -- see if the like was given by a zombie
RIGHT JOIN zombies z ON (z.zombieid = m.m_creatorid)
GROUP BY z.zombieid
ORDER BY zombieScore DESC, z.zombieid
LIMIT 100
;

View File

@@ -0,0 +1,137 @@
/* Q14. International dialog
\set country1 '\'Indonesia\''
\set country2 '\'Brazil\''
*/
WITH person1_list AS (
SELECT p.p_personid AS personid
, ci.pl_placeid AS cityid
FROM place co -- country
, place ci -- city
, person p
WHERE 1=1
-- join
AND co.pl_placeid = ci.pl_containerplaceid
AND ci.pl_placeid = p.p_placeid
-- filter
AND co.pl_name = 'Indonesia'
)
, person2_list AS (
SELECT p.p_personid AS personid
FROM place co -- country
, place ci -- city
, person p
WHERE 1=1
-- join
AND co.pl_placeid = ci.pl_containerplaceid
AND ci.pl_placeid = p.p_placeid
-- filter
AND co.pl_name = 'Brazil'
)
, case1 AS (
SELECT DISTINCT
p1.personid AS person1id
, p2.personid AS person2id
, 4 AS score
FROM person1_list p1
, person2_list p2
, message m -- message by p2
, message r -- reply by p1
WHERE 1=1
-- join
AND m.m_messageid = r.m_c_replyof
AND p1.personid = r.m_creatorid
AND p2.personid = m.m_creatorid
)
, case2 AS (
SELECT DISTINCT
p1.personid AS person1id
, p2.personid AS person2id
, 1 AS score
FROM person1_list p1
, person2_list p2
, message m -- message by p1
, message r -- reply by p2
WHERE 1=1
-- join
AND m.m_messageid = r.m_c_replyof
AND p2.personid = r.m_creatorid
AND p1.personid = m.m_creatorid
)
, case3 AS (
SELECT -- no need for distinct
p1.personid AS person1id
, p2.personid AS person2id
, 15 AS score
FROM person1_list p1
, person2_list p2
, knows k
WHERE 1=1
-- join
AND p1.personid = k.k_person1id
AND p2.personid = k.k_person2id
)
, case4 AS (
SELECT DISTINCT
p1.personid AS person1id
, p2.personid AS person2id
, 10 AS score
FROM person1_list p1
, person2_list p2
, message m -- message by p2
, likes l
WHERE 1=1
-- join
AND p2.personid = m.m_creatorid
AND m.m_messageid = l.l_messageid
AND l.l_personid = p1.personid
)
, case5 AS (
SELECT DISTINCT
p1.personid AS person1id
, p2.personid AS person2id
, 1 AS score
FROM person1_list p1
, person2_list p2
, message m -- message by p1
, likes l
WHERE 1=1
-- join
AND p1.personid = m.m_creatorid
AND m.m_messageid = l.l_messageid
AND l.l_personid = p2.personid
)
, pair_scores AS (
SELECT person1id, person2id, sum(score) AS score
FROM (SELECT * FROM case1
UNION ALL SELECT * FROM case2
UNION ALL SELECT * FROM case3
UNION ALL SELECT * FROM case4
UNION ALL SELECT * FROM case5
) t
GROUP BY person1id, person2id
)
, score_ranks AS (
SELECT s.person1id
, s.person2id
, ci.pl_name AS cityName
, s.score
, row_number() OVER (PARTITION BY ci.pl_placeid ORDER BY s.score DESC NULLS LAST, s.person1id, s.person2id) AS rn
FROM place co -- country
INNER JOIN place ci ON (co.pl_placeid = ci.pl_containerplaceid) -- city
LEFT JOIN person1_list p1l ON (ci.pl_placeid = p1l.cityid)
LEFT JOIN pair_scores s ON (p1l.personid = s.person1id)
WHERE 1=1
-- filter
AND co.pl_name = 'Indonesia'
)
SELECT s.person1id AS "person1.id"
, s.person2id AS "person2.id"
, s.cityName AS "city1.name"
, s.score
FROM score_ranks s
WHERE 1=1
-- filter
AND s.rn = 1
ORDER BY s.score DESC, s.person1id, s.person2id
LIMIT 100
;

View File

@@ -0,0 +1,28 @@
/* Q2. Tag evolution
\set year 2010
\set month 11
*/
WITH detail AS (
SELECT t.t_name
, count(DISTINCT CASE WHEN extract(MONTH FROM m.m_creationdate) = 11 THEN m.m_messageid ELSE NULL END) AS countMonth1
, count(DISTINCT CASE WHEN extract(MONTH FROM m.m_creationdate) != 11 THEN m.m_messageid ELSE NULL END) AS countMonth2
FROM message m
, message_tag mt
, tag t
WHERE 1=1
-- join
AND m.m_messageid = mt.mt_messageid
AND mt.mt_tagid = t.t_tagid
-- filter
AND m.m_creationdate >= '2010-11-1'::date
AND m.m_creationdate < '2010-11-1'::date + interval '2' month
GROUP BY t.t_name
)
SELECT t_name as "tag.name"
, countMonth1
, countMonth2
, abs(countMonth1-countMonth2) AS diff
FROM detail d
ORDER BY diff desc, t_name
LIMIT 100
;

View File

@@ -0,0 +1,33 @@
/* Q3. Popular topics in a country
\set tagClass '\'MusicalArtist\''
\set country '\'Burma\''
*/
SELECT f.f_forumid AS "forum.id"
, f.f_title AS "forum.title"
, f.f_creationdate AS "forum.creationDate"
, f.f_moderatorid AS "person.id"
, count(DISTINCT p.m_messageid) AS postCount
FROM tagClass tc
, tag t
, message_tag pt
, message p
, forum f
, person m -- moderator
, place ci -- city
, place co -- country
WHERE 1=1
-- join
AND tc.tc_tagclassid = t.t_tagclassid
AND t.t_tagid = pt.mt_tagid
AND pt.mt_messageid = p.m_messageid
AND p.m_ps_forumid = f.f_forumid
AND f.f_moderatorid = m.p_personid
AND m.p_placeid = ci.pl_placeid
AND ci.pl_containerplaceid = co.pl_placeid
-- filter
AND tc.tc_name = 'MusicalArtist'
AND co.pl_name = 'Burma'
GROUP BY f.f_forumid, f.f_title, f.f_creationdate, f.f_moderatorid
ORDER BY postCount DESC, f.f_forumid
LIMIT 20
;

View File

@@ -0,0 +1,39 @@
/* Q4. Top posters in a country
\set country '\'Belarus\''
*/
WITH top100_popular_forums AS (
SELECT fp_forumid AS forumid
FROM forum_person fp
, person p
, place ci -- city
, place co -- country
WHERE 1=1
-- join
AND fp.fp_personid = p.p_personid
AND p.p_placeid = ci.pl_placeid
AND ci.pl_containerplaceid = co.pl_placeid
-- filter
AND co.pl_name = 'Belarus'
GROUP BY fp_forumid
ORDER BY count(*) DESC, fp_forumid
LIMIT 100
)
SELECT au.p_personid AS "person.id"
, au.p_firstname AS "person.firstName"
, au.p_lastname AS "person.lastName"
, au.p_creationdate
-- a single person might be member of more than 1 of the top100 forums, so their posts should be DISTINCT counted
, count(DISTINCT p.m_messageid) AS postCount
FROM top100_popular_forums t
INNER JOIN forum_person fp ON (t.forumid = fp.fp_forumid)
-- author of the post
INNER JOIN person au ON (fp.fp_personid = au.p_personid)
LEFT JOIN message p ON (1=1
AND au.p_personid = p.m_creatorid
AND p.m_ps_forumid IN (SELECT forumid from top100_popular_forums)
AND p.m_c_replyof IS NULL
)
GROUP BY au.p_personid, au.p_firstname, au.p_lastname, au.p_creationdate
ORDER BY postCount DESC, au.p_personid
LIMIT 100
;

View File

@@ -0,0 +1,32 @@
/* Q5. Most active Posters of a given Topic
\set tag '\'Abbas_I_of_Persia\''
*/
WITH detail AS (
SELECT cr.p_personid AS person_id
, count(DISTINCT r.m_messageid) AS replyCount
, count(DISTINCT l.l_messageid||' '||l.l_personid) AS likeCount
, count(DISTINCT m.m_messageid) AS messageCount
, null as score
FROM tag t
, message_tag pt
, message m LEFT JOIN message r ON (m.m_messageid = r.m_c_replyof)
LEFT JOIN likes l ON (m.m_messageid = l.l_messageid) -- l: likes to m
, person cr -- creator
WHERE 1=1
-- join
AND t.t_tagid = pt.mt_tagid
AND pt.mt_messageid = m.m_messageid
AND m.m_creatorid = cr.p_personid
-- filter
AND t.t_name = 'Abbas_I_of_Persia'
GROUP BY cr.p_personid
)
SELECT person_id AS "person.id"
, replyCount
, likeCount
, messageCount
, 1*messageCount + 2*replyCount + 10*likeCount AS score
FROM detail
ORDER BY score DESC, person_id
LIMIT 100
;

View File

@@ -0,0 +1,35 @@
/* Q6. Most authoritative users on a given topic
\set tag '\'Arnold_Schwarzenegger\''
*/
WITH poster_w_liker AS (
SELECT DISTINCT
m1.m_creatorid posterPersonid
, l2.l_personid as likerPersonid
FROM tag t
, message_tag pt
-- as an optimization, we use that the set of message1 is the same as message2
, message m1 LEFT JOIN likes l2 ON (m1.m_messageid = l2.l_messageid)
--, person p2 -- we don't need the person itself as its ID is in the like l2
WHERE 1=1
-- join
AND t.t_tagid = pt.mt_tagid
AND pt.mt_messageid = m1.m_messageid
-- filter
AND t.t_name = 'Arnold_Schwarzenegger'
)
, popularity_score AS (
SELECT m3.m_creatorid as personid, count(*) as popularityScore
FROM message m3
, likes l3
WHERE 1=1
-- join
AND m3.m_messageid = l3.l_messageid
GROUP BY personId
)
SELECT pl.posterPersonid as "person1.id"
, sum(coalesce(ps.popularityScore, 0)) as authorityScore
FROM poster_w_liker pl LEFT JOIN popularity_score ps ON (pl.likerPersonid = ps.personid)
GROUP BY pl.posterPersonid
ORDER BY authorityScore DESC, pl.posterPersonid ASC
LIMIT 100
;

View File

@@ -0,0 +1,23 @@
/* Q7. Related Topics
\set tag '\'Enrique_Iglesias\''
*/
SELECT t2.t_name AS "relatedTag.name"
, count(*) AS count
FROM tag t INNER JOIN message_tag pt ON (t.t_tagid = pt.mt_tagid)
-- as an optimization, we don't need message here as it's ID is in message_tag pt
-- so proceed to the comment directly
INNER JOIN message c ON (pt.mt_messageid = c.m_c_replyof)
-- comment's tag
INNER JOIN message_tag ct ON (c.m_messageid = ct.mt_messageid)
INNER JOIN tag t2 ON (ct.mt_tagid = t2.t_tagid)
-- comment doesn't have the given tag: antijoin in the where clause
LEFT JOIN message_tag nt ON (c.m_messageid = nt.mt_messageid AND nt.mt_tagid = pt.mt_tagid)
WHERE 1=1
-- join
AND nt.mt_messageid IS NULL -- antijoin: comment (c) does not have the given tag
-- filter
AND t.t_name = 'Enrique_Iglesias'
GROUP BY t2.t_name
ORDER BY count DESC, t2.t_name
LIMIT 100
;

View File

@@ -0,0 +1,54 @@
/* Q8. Central Person for a Tag
\set tag '\'Che_Guevara\''
\set date '\'2011-07-22T00:00:00.000+00:00\''::timestamp
*/
WITH person_tag_interest AS (
SELECT p.p_personid AS personid
FROM person p
, person_tag pt
, tag t
WHERE 1=1
-- join
AND p.p_personid = pt.pt_personid
AND pt.pt_tagid = t.t_tagid
-- filter
AND t.t_name = 'Che_Guevara'
)
, person_message_score AS (
SELECT p.p_personid AS personid
, count(*) AS message_score
FROM message m
, person p
, message_tag pt
, tag t
WHERE 1=1
-- join
AND m.m_creatorid = p.p_personid
AND m.m_messageid = pt.mt_messageid
AND pt.mt_tagid = t.t_tagid
-- filter
AND m.m_creationdate > '2011-07-22T00:00:00'
AND t.t_name = 'Che_Guevara'
GROUP BY p.p_personid
)
, person_score AS (
SELECT coalesce(pti.personid, pms.personid) AS personid
, CASE WHEN pti.personid IS NULL then 0 ELSE 100 END -- scored from interest in the given tag
+ coalesce(pms.message_score, 0) AS score
FROM person_tag_interest pti
FULL JOIN person_message_score pms ON (pti.personid = pms.personid)
)
SELECT p.personid AS "person.id"
, p.score AS score
, sum(f.score) AS friendsScore
FROM person_score p
, knows k
, person_score f -- the friend
WHERE 1=1
-- join
AND p.personid = k.k_person1id
AND k.k_person2id = f.personid
GROUP BY p.personid, p.score
ORDER BY p.score + sum(f.score) DESC, p.personid
LIMIT 100
;

View File

@@ -0,0 +1,48 @@
/* Q9. Top thread initiators
\set startDate '\'2012-06-01T00:00:00.000+00:00\''::timestamp
\set endDate '\'2012-07-01T00:00:00.000+00:00\''::timestamp
*/
WITH RECURSIVE post_all(psa_threadid
, psa_thread_creatorid
, psa_messageid
, psa_creationdate
, psa_messagetype
) AS (
SELECT m_messageid AS psa_threadid
, m_creatorid AS psa_thread_creatorid
, m_messageid AS psa_messageid
, m_creationdate
, 'Post'
FROM message
WHERE 1=1
AND m_c_replyof IS NULL -- post, not comment
AND m_creationdate BETWEEN '2012-06-01T00:00:00' AND '2012-07-01T00:00:00'
UNION ALL
SELECT psa.psa_threadid AS psa_threadid
, psa.psa_thread_creatorid AS psa_thread_creatorid
, m_messageid
, m_creationdate
, 'Comment'
FROM message p
, post_all psa
WHERE 1=1
AND p.m_c_replyof = psa.psa_messageid
-- this is a performance optimisation only
AND m_creationdate BETWEEN '2012-06-01T00:00:00' AND '2012-07-01T00:00:00'
)
SELECT p.p_personid AS "person.id"
, p.p_firstname AS "person.firstName"
, p.p_lastname AS "person.lastName"
, count(DISTINCT psa.psa_threadid) AS threadCount
-- if the thread initiator message does not count as a reply
--, count(DISTINCT CASE WHEN psa.psa_messagetype = 'Comment' then psa.psa_messageid ELSE null END) AS messageCount
, count(DISTINCT psa.psa_messageid) AS messageCount
FROM person p left join post_all psa on (
1=1
AND p.p_personid = psa.psa_thread_creatorid
AND psa_creationdate BETWEEN '2012-06-01T00:00:00' AND '2012-07-01T00:00:00'
)
GROUP BY p.p_personid, p.p_firstname, p.p_lastname
ORDER BY messageCount DESC, p.p_personid
LIMIT 100
;

View File

@@ -0,0 +1,34 @@
select p_personid, p_firstname, p_lastname,
( select count(distinct m_messageid)
from message, message_tag pt1
where
m_creatorid = p_personid and
m_c_replyof IS NULL and -- post, not comment
m_messageid = mt_messageid and
exists (select * from person_tag where pt_personid = 21990232556256 and pt_tagid = pt1.mt_tagid)
) -
( select count(*)
from message
where
m_creatorid = p_personid and
m_c_replyof IS NULL and -- post, not comment
not exists (select * from person_tag, message_tag where pt_personid = 21990232556256 and pt_tagid = mt_tagid and mt_messageid = m_messageid)
) as score,
p_gender, pl_name
from person, place,
( select distinct k2.k_person2id
from knows k1, knows k2
where
k1.k_person1id = 21990232556256 and k1.k_person2id = k2.k_person1id and k2.k_person2id <> 21990232556256 and
not exists (select * from knows where k_person1id = 21990232556256 and k_person2id = k2.k_person2id)
) f
where
p_placeid = pl_placeid and
p_personid = f.k_person2id and
(
(extract(month from p_birthday) = 10 and (case when extract(day from p_birthday) >= 21 then true else false end)) -- :month
or
(extract(month from p_birthday) = 11 and (case when extract(day from p_birthday) < 22 then true else false end)) -- :nextMonth
)
order by score desc, p_personid
limit 10

View File

@@ -0,0 +1,21 @@
select p_personid,p_firstname, p_lastname, o_name, pc_workfrom
from person, person_company, organisation, place,
( select k_person2id
from knows
where
k_person1id = 21990232556256
union
select k2.k_person2id
from knows k1, knows k2
where
k1.k_person1id = 21990232556256 and k1.k_person2id = k2.k_person1id and k2.k_person2id <> 21990232556256
) f
where
p_personid = f.k_person2id and
p_personid = pc_personid and
pc_organisationid = o_organisationid and
pc_workfrom < 2012 and -- :workFromYear
o_placeid = pl_placeid and
pl_name = 'United_States' -- :countryName
order by pc_workfrom, p_personid, o_name desc
limit 10

View File

@@ -0,0 +1,23 @@
with recursive extended_tags(s_subtagclassid,s_supertagclassid) as (
select tc_tagclassid, tc_tagclassid from tagclass
UNION
select tc.tc_tagclassid, t.s_supertagclassid from tagclass tc, extended_tags t
where tc.tc_subclassoftagclassid=t.s_subtagclassid
)
select p_personid, p_firstname, p_lastname, array_agg(distinct t_name), count(*)
from person, message p1, knows, message p2, message_tag,
(select distinct t_tagid, t_name from tag where (t_tagclassid in (
select distinct s_subtagclassid from extended_tags k, tagclass
where tc_tagclassid = k.s_supertagclassid and tc_name = 'OfficeHolder') -- :tagClassName
)) selected_tags
where
k_person1id = 21990232556256 and
k_person2id = p_personid and
p_personid = p1.m_creatorid and
p1.m_c_replyof = p2.m_messageid and
p2.m_c_replyof is null and
p2.m_messageid = mt_messageid and
mt_tagid = t_tagid
group by p_personid, p_firstname, p_lastname
order by 5 desc, 1
limit 20

View File

@@ -0,0 +1,9 @@
select p_personid, p_firstname, p_lastname, m_messageid, COALESCE(m_ps_imagefile, m_content, '') AS content, m_creationdate
from person, message, knows
where
p_personid = m_creatorid and
m_creationdate < '2011-07-21T22:00:00' and
k_person1id = 21990232556256 and
k_person2id = p_personid
order by m_creationdate desc, m_messageid asc
limit 20

View File

@@ -0,0 +1,37 @@
select p_personid, p_firstname, p_lastname, ct1, ct2, total
from
( select k_person2id
from knows
where
k_person1id = 6597069767251
union
select k2.k_person2id
from knows k1, knows k2
where
k1.k_person1id = 6597069767251 and k1.k_person2id = k2.k_person1id and k2.k_person2id <> 15393162789164
) f, person, place p1, place p2,
(
select chn.m_c_creatorid, ct1, ct2, ct1 + ct2 as total
from
(
select m_creatorid as m_c_creatorid, count(*) as ct1 from message, place
where
m_locationid = pl_placeid and pl_name = 'United_States' and
m_creationdate >= '2010-07-21T22:00:00' and m_creationdate < '2012-07-26T22:00:00' --('2011-07-21T22:00:00' + INTERVAL '1 days' * 5)
group by m_c_creatorid
) chn,
(
select m_creatorid as m_c_creatorid, count(*) as ct2 from message, place
where
m_locationid = pl_placeid and pl_name = 'Canada' and
m_creationdate >= '2010-07-21T22:00:00' and m_creationdate < '2012-01-26T22:00:00' --('2011-07-21T22:00:00' + INTERVAL '1 days' * 5)
group by m_creatorid --m_c_creatorid
) ind
where chn.m_c_creatorid = ind.m_c_creatorid
) cpc
where
f.k_person2id = p_personid and p_placeid = p1.pl_placeid and
p1.pl_containerplaceid = p2.pl_placeid and p2.pl_name <> 'United_States' and p2.pl_name <> 'Canada' and
f.k_person2id = cpc.m_c_creatorid
order by 6 desc, 1
limit 20

View File

@@ -0,0 +1,22 @@
select t_name, count(*)
from tag, message, message_tag recent, knows
where
m_messageid = mt_messageid and
mt_tagid = t_tagid and
m_creatorid = k_person2id and
m_c_replyof IS NULL and -- post, not comment
k_person1id = 21990232556256 and
m_creationdate >= '2011-07-21T22:00:00' and m_creationdate < '2012-07-26T22:00:00' and --('2011-07-21T22:00:00' + INTERVAL '1 days' * 5)
not exists (
select * from
(select distinct mt_tagid from message, message_tag, knows
where
k_person1id = 21990232556256 and
k_person2id = m_creatorid and
m_c_replyof IS NULL and -- post, not comment
mt_messageid = m_messageid and
m_creationdate < '2011-07-21T22:00:00') tags
where tags.mt_tagid = recent.mt_tagid)
group by t_name
order by 2 desc, t_name
limit 10

View File

@@ -0,0 +1,21 @@
select f_title, count(m_messageid)
from (
select f_title, f_forumid, f.k_person2id
from forum, forum_person,
( select k_person2id
from knows
where
k_person1id = 21990232556256
union
select k2.k_person2id
from knows k1, knows k2
where
k1.k_person1id = 21990232556256 and k1.k_person2id = k2.k_person1id and k2.k_person2id <> 21990232556256
) f
where f_forumid = fp_forumid and fp_personid = f.k_person2id and
fp_creationdate >= '2011-07-21T22:00:00'
) tmp left join message
on tmp.f_forumid = m_ps_forumid and m_creatorid = tmp.k_person2id
group by f_forumid, f_title
order by 2 desc, f_forumid
limit 20

View File

@@ -0,0 +1,22 @@
select t_name, count(*)
from tag, message_tag, message,
( select k_person2id
from knows
where
k_person1id = 21990232556256
union
select k2.k_person2id
from knows k1, knows k2
where
k1.k_person1id = 21990232556256 and k1.k_person2id = k2.k_person1id and k2.k_person2id <> 21990232556256
) f
where
m_creatorid = f.k_person2id and
m_c_replyof IS NULL and -- post, not comment
m_messageid = mt_messageid and
mt_tagid = t_tagid and
t_name <> 'Hamid_Karzai' and
exists (select * from tag, message_tag where mt_messageid = m_messageid and mt_tagid = t_tagid and t_name = 'Hamid_Karzai')
group by t_name
order by 2 desc, t_name
limit 10

View File

@@ -0,0 +1,21 @@
select p_personid, p_firstname, p_lastname, l.l_creationdate, m_messageid,
COALESCE(m_ps_imagefile,'')||COALESCE(m_content,''),
0 as lag, -- TODO
--EXTRACT(EPOCH FROM (l.l_creationdate - m_creationdate)) / 60 as lag,
(case when exists (select 1 from knows where k_person1id = 21990232556256 and k_person2id = p_personid) then 0 else 1 end) as isnew
from
(select l_personid, max(l_creationdate) as l_creationdate
from likes, message
where
m_messageid = l_messageid and
m_creatorid = 21990232556256
group by l_personid
order by 2 desc
limit 20
) tmp, message, person, likes as l
where
p_personid = tmp.l_personid and
tmp.l_personid = l.l_personid and
tmp.l_creationdate = l.l_creationdate and
l.l_messageid = m_messageid
order by 4 desc, 1

View File

@@ -0,0 +1,8 @@
select p1.m_creatorid, p_firstname, p_lastname, p1.m_creationdate, p1.m_messageid, p1.m_content
from message p1, message p2, person
where
p1.m_c_replyof = p2.m_messageid and
p2.m_creatorid = 21990232556256 and
p_personid = p1.m_creatorid
order by p1.m_creationdate desc, 5
limit 20

View File

@@ -0,0 +1,18 @@
select p_personid, p_firstname, p_lastname,
m_messageid, COALESCE(m_ps_imagefile,'')||COALESCE(m_content,'') AS content, m_creationdate
from
( select k_person2id
from knows
where
k_person1id = 21990232556256
union
select k2.k_person2id
from knows k1, knows k2
where
k1.k_person1id = 21990232556256 and k1.k_person2id = k2.k_person1id and k2.k_person2id <> 21990232556256
) f, person, message
where
p_personid = m_creatorid and p_personid = f.k_person2id and
m_creationdate < '2012-07-26T22:00:00'
order by m_creationdate desc, m_messageid asc
limit 20

View File

@@ -0,0 +1,3 @@
select p_firstname, p_lastname, p_birthday, p_locationip, p_browserused, p_placeid, p_gender, p_creationdate
from person
where p_personid = 21990232556256;

View File

@@ -0,0 +1,25 @@
with recursive cposts(m_messageid, m_content, m_ps_imagefile, m_creationdate, m_c_replyof, m_creatorid) AS (
select m_messageid, m_content, m_ps_imagefile, m_creationdate, m_c_replyof, m_creatorid
from message
where m_creatorid = 21990232556256
order by m_creationdate desc
limit 10
), parent(postid,replyof,orig_postid,creator) AS (
select m_messageid, m_c_replyof, m_messageid, m_creatorid from cposts
UNION ALL
select m_messageid, m_c_replyof, orig_postid, m_creatorid
from message,parent
where m_messageid=replyof
)
select p1.m_messageid, COALESCE(m_ps_imagefile,'')||COALESCE(m_content,'') as content, p1.m_creationdate,
p2.m_messageid, p2.p_personid, p2.p_firstname, p2.p_lastname
from
(select m_messageid, m_content, m_ps_imagefile, m_creationdate, m_c_replyof from cposts
) p1
left join
(select orig_postid, postid as m_messageid, p_personid, p_firstname, p_lastname
from parent, person
where replyof is null and creator = p_personid
)p2
on p2.orig_postid = p1.m_messageid
order by m_creationdate desc, p2.m_messageid desc;

View File

@@ -0,0 +1,4 @@
select p_personid, p_firstname, p_lastname, k_creationdate
from knows, person
where k_person1id = 21990232556256 and k_person2id = p_personid
order by k_creationdate desc, p_personid asc;

View File

@@ -0,0 +1,3 @@
select COALESCE(m_ps_imagefile,'')||COALESCE(m_content,'') AS content, m_creationdate
from message
where m_messageid = 687194767741;

View File

@@ -0,0 +1,3 @@
select p_personid, p_firstname, p_lastname
from message, person
where m_messageid = 687194767741 and m_creatorid = p_personid;

View File

@@ -0,0 +1,9 @@
WITH RECURSIVE chain(parent, child) as(
SELECT m_c_replyof, m_messageid FROM message where m_messageid = 687194767741
UNION ALL
SELECT p.m_c_replyof, p.m_messageid FROM message p, chain c where p.m_messageid = c.parent
)
select f_forumid, f_title, p_personid, p_firstname, p_lastname
from message, person, forum
where m_messageid = (select coalesce(min(parent), 687194767741) from chain)
and m_ps_forumid = f_forumid and f_moderatorid = p_personid;

View File

@@ -0,0 +1,11 @@
select p2.m_messageid, p2.m_content, p2.m_creationdate, p_personid, p_firstname, p_lastname,
(case when exists (
select 1 from knows
where p1.m_creatorid = k_person1id and p2.m_creatorid = k_person2id)
then TRUE
else FALSE
end) as knows
from message p1, message p2, person
where
p1.m_messageid = 687194767741 and p2.m_c_replyof = p1.m_messageid and p2.m_creatorid = p_personid
order by p2.m_creationdate desc, p2.m_creatorid asc;

View File

@@ -0,0 +1,164 @@
/*
* m_ps_ denotes field specific to posts
* m_c_ denotes field specific to comments
* other m_ fields are common to posts and messages
* Note: to distinguish between "post" and "comment" records:
* - m_c_replyof IS NULL for all "post" records
* - m_c_replyof IS NOT NULL for all "comment" records
*/
create table post (
m_creationdate timestamp without time zone not null,
m_messageid bigint not null,
m_ps_imagefile varchar,
m_locationip varchar not null,
m_browserused varchar not null,
m_ps_language varchar,
m_content text,
m_length int not null,
m_creatorid bigint,
m_ps_forumid bigint,
m_locationid bigint
);
create table comment (
m_creationdate timestamp without time zone not null,
m_messageid bigint not null,
m_locationip varchar not null,
m_browserused varchar not null,
m_content text not null,
m_length int not null,
m_creatorid bigint,
m_locationid bigint,
m_c_parentpostid bigint,
m_c_parentcommentid bigint
);
create view message as select m_creationdate, m_messageid, m_ps_imagefile, m_locationip, m_browserused, m_content,
m_length, m_creatorid, m_ps_forumid, m_locationid, null as m_c_replyof from post
union all select m_creationdate, m_messageid, null as m_ps_imagefile, m_locationip, m_browserused, m_content,
m_length, m_creatorid, null as m_ps_forumid, m_locationid, coalesce(m_c_parentpostid, m_c_parentcommentid) m_c_replyof from comment;
create table forum (
f_creationdate timestamp without time zone not null,
-- f_deletiondate timestamp without time zone not null,
f_forumid bigint not null,
f_title varchar not null,
f_moderatorid bigint
);
create table forum_person (
fp_creationdate timestamp without time zone not null,
-- fp_deletiondate timestamp without time zone not null,
fp_forumid bigint not null,
fp_personid bigint not null
);
create table forum_tag (
ft_creationdate timestamp without time zone not null,
-- ft_deletiondate timestamp without time zone not null,
ft_forumid bigint not null,
ft_tagid bigint not null
);
create table organisation (
o_organisationid bigint not null,
o_type varchar not null,
o_name varchar not null,
o_url varchar not null,
o_placeid bigint
);
create table person (
p_creationdate timestamp without time zone not null,
-- p_deletiondate timestamp without time zone not null,
p_personid bigint not null,
p_firstname varchar not null,
p_lastname varchar not null,
p_gender varchar not null,
p_birthday date not null,
p_locationip varchar not null,
p_browserused varchar not null,
p_placeid bigint
);
create table person_email (
pe_creationdate timestamp without time zone not null,
-- pe_deletiondate timestamp without time zone not null,
pe_personid bigint not null,
pe_email varchar not null
);
create table person_tag (
pt_creationdate timestamp without time zone not null,
-- pt_deletiondate timestamp without time zone not null,
pt_personid bigint not null,
pt_tagid bigint not null
);
create table knows (
k_creationdate timestamp without time zone not null,
-- k_deletiondate timestamp without time zone not null,
k_person1id bigint not null,
k_person2id bigint not null
);
create table likes (
l_creationdate timestamp without time zone not null,
-- l_deletiondate timestamp without time zone not null,
l_personid bigint not null,
l_messageid bigint not null
);
create table person_language (
plang_creationdate timestamp without time zone not null,
-- plang_deletiondate timestamp without time zone not null,
plang_personid bigint not null,
plang_language varchar not null
);
create table person_university (
pu_creationdate timestamp without time zone not null,
-- pu_deletiondate timestamp without time zone not null,
pu_personid bigint not null,
pu_organisationid bigint not null,
pu_classyear int not null
);
create table person_company (
pc_creationdate timestamp without time zone not null,
-- pc_deletiondate timestamp without time zone not null,
pc_personid bigint not null,
pc_organisationid bigint not null,
pc_workfrom int not null
);
create table place (
pl_placeid bigint not null,
pl_name varchar not null,
pl_url varchar not null,
pl_type varchar not null,
pl_containerplaceid bigint
);
create table message_tag (
mt_creationdate timestamp without time zone not null,
-- mt_deletiondate timestamp without time zone not null,
mt_messageid bigint not null,
mt_tagid bigint not null
);
create table tagclass (
tc_tagclassid bigint not null,
tc_name varchar not null,
tc_url varchar not null,
tc_subclassoftagclassid bigint
);
create table tag (
t_tagid bigint not null,
t_name varchar not null,
t_url varchar not null,
t_tagclassid bigint not null
);

View File

@@ -0,0 +1,59 @@
-- Populate forum table
COPY forum FROM 'PATHVAR/dynamic/forum_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate forum_person table
COPY forum_person FROM 'PATHVAR/dynamic/forum_hasMember_person_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate forum_tag table
COPY forum_tag FROM 'PATHVAR/dynamic/forum_hasTag_tag_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate organisation table
COPY organisation FROM 'PATHVAR/static/organisation_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate person table
COPY person FROM 'PATHVAR/dynamic/person_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate person_email table
COPY person_email FROM 'PATHVAR/dynamic/person_email_emailaddress_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate person_tag table
COPY person_tag FROM 'PATHVAR/dynamic/person_hasInterest_tag_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate knows table
COPY knows ( k_creationdate, k_person1id, k_person2id) FROM 'PATHVAR/dynamic/person_knows_person_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
COPY knows ( k_creationdate, k_person2id, k_person1id) FROM 'PATHVAR/dynamic/person_knows_person_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate likes table
COPY likes FROM 'PATHVAR/dynamic/person_likes_post_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
COPY likes FROM 'PATHVAR/dynamic/person_likes_comment_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate person_language table
COPY person_language FROM 'PATHVAR/dynamic/person_speaks_language_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate person_university table
COPY person_university FROM 'PATHVAR/dynamic/person_studyAt_organisation_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate person_company table
COPY person_company FROM 'PATHVAR/dynamic/person_workAt_organisation_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate place table
COPY place FROM 'PATHVAR/static/place_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate message_tag table
COPY message_tag FROM 'PATHVAR/dynamic/post_hasTag_tag_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
COPY message_tag FROM 'PATHVAR/dynamic/comment_hasTag_tag_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate tagclass table
COPY tagclass FROM 'PATHVAR/static/tagclass_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- Populate tag table
COPY tag FROM 'PATHVAR/static/tag_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
-- PROBLEMATIC
-- Populate message table
COPY post FROM 'PATHVAR/dynamic/post_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
COPY comment FROM 'PATHVAR/dynamic/comment_0_0.csv.gz' (DELIMITER '|', HEADER, TIMESTAMPFORMAT '%Y-%m-%dT%H:%M:%S.%g+00:00');
create view country as select city.pl_placeid as ctry_city, ctry.pl_name as ctry_name from place city, place ctry where city.pl_containerplaceid = ctry.pl_placeid and ctry.pl_type = 'country';