Hi, I am trying to do something in SQL that I have done in PHP, but I am trying to have the operation performed as a stored procedure because of the sheer number of inserts I have to perform in PHP. I could really use some help!
I'm not how to assign variables based on subqueries, or how to even structure the nested queries...
I have a table with the following fields
scores
userId INT
testId INT
score INT
and I would like to correlate all of the users in another table 'correlations',
correlations
user1 (FK scores)
user2 (FK scores)
total_delta (the total of the difference in test scores between user1 and user2)
num_common_tests (number of tests both users have taken in common)
avg_delta (total_delta/num_common_tests)
in PHP i'm doing the following:
[PHP]
/**
* Loads all test scores into an array from the database, ordered by userId
* Array has the form [userId,[index,testId,score]]
* ...this takes very little time
*/
function get_user_scores(){
global $user_scores,$users,$num_users,$con,$table,$query_ limit;
$sql = "select userId, testId, score from ".$table." order by userId limit ".$query_limit;
$rs = mysql_query($sql) or die(mysql_error()."<br>".$sql); //pointer to resultset from database
while ($row = mysql_fetch_assoc($rs)) {
$testId= intval($row['testId']);
$score= intval($row['score']);
$userId = intval($row['userId']);
if(array_key_exists($userId,$user_scores)){
array_push($user_scores[$userId],array($testId,$score));
} else {
$user_ratings[$userId] = array();
array_push($user_scores[$userId],array($testId,$score));
}
}
$users = array_keys($user_scores);
$num_users = count($users);
}
/**
* Builds the correlation array. For each user, compares other users who have at least one shared test.
* This builds an N choose 2 array with the form [index,user1,user2,cummulative delta,num common tests,avg delta]
* ...very slow when users > 100
*/
function build_correlations(){
global $num_users,$users,$correlation_array,$correlation_ array_index;
for ($i=0; $i < $num_users; $i++){
$user1 = $users[$i];
for ($k = $i+1; $k < $num_users; $k++){
$user2 = $users[$k];
$user1_correlation = correlate_users($user1,$user2);
if($user1_correlation!=null){
$correlation_array[$correlation_array_index] = $user1_correlation;
$correlation_array_index++;
}
}
}
}
/**
* Calculates the correlation between 2 users
*/
function correlate_users($user1,$user2){
global $correlation_array,$user_scores;
$user1_scores= $user_scores[$user1];
$user2_scores= $user_scores[$user2];
$total_delta = 0;
$num_common_tests = 0;
foreach ($user1_scores as &$value1){
$test1 = $value1[0];
$user1_score = $value1[1];
foreach ($user2_scores as &$value2){
$test2 = $value2[0];
if ($test1 == $test2){ //shared test, we can calculate a correlation
$user2_score = $value2[1];
$delta = abs($user2_score - $user1_score);
$total_delta += $delta;
$num_common_tests++;
}
}
unset($value2);
}
unset($value1);
if($num_common_tests > 0){ //add to correlations array
$avg_delta = $total_delta/$num_common_tests;
return array($user1,$user2,$total_delta,$num_common_tests ,$avg_delta);
}
return null;
}
/**
* Saves contents of correlation array to the database
*/
function save_correlations(){
global $con,$correlation_array;
for($i=0; $i < count($correlation_array); $i++){
$sql = "insert into correlations (user1, user2, total_delta, num_common_tests, avg_delta) values (". $correlation_array[$i][0] .",". $correlation_array[$i][1] .",". $correlation_array[$i][2] .",". $correlation_array[$i][3] .",". $correlation_array[$i][4] .")";
$rs = mysql_query($sql) or die(mysql_error()."<br>".$sql);
}
print("Correlations saved<br>\n");
}
[/PHP]