Commit 94b8aa8d authored by Gaurav Kukreja's avatar Gaurav Kukreja

Parallel implementation of Hash Join

- using my implementation of Hash Map
- Changed input query for query compiler
Signed-off-by: 's avatarGaurav Kukreja <mailme.gaurav@gmail.com>
parent 02eb152a
...@@ -3,8 +3,9 @@ CCFLAGS=-g -O3 -fno-stack-protector -std=c++11 -fpermissive ...@@ -3,8 +3,9 @@ CCFLAGS=-g -O3 -fno-stack-protector -std=c++11 -fpermissive
CCFLAGS+=-fopenmp CCFLAGS+=-fopenmp
LIBS+= -lrt -L/home/gaurav/Downloads/tbb/tbb41_20121003oss/lib/ia32/cc4.1.0_libc2.4_kernel2.6.16.21 LIBS+= -lrt -L/home/gaurav/Downloads/tbb/tbb41_20121003oss/lib/ia32/cc4.1.0_libc2.4_kernel2.6.16.21
SOURCES= data_migrate.cpp schema_methods.cpp transactions.cpp oltp.cpp schema_constructors.cpp storage.cpp query.cpp SOURCES= data_migrate.cpp schema_methods.cpp transactions.cpp oltp.cpp schema_constructors.cpp storage.cpp query.cpp
INCLUDE=include/data_migrate.h include/main.h include/transactions.h include/oltp.h include/schema.h include/query.h INCLUDE=include/data_migrate.h include/main.h include/transactions.h include/oltp.h include/schema.h include/query.h include/hash_map.h
fakedb_query: main_query.cpp $(SOURCES) $(INCLUDE) gen_query_code.cpp fakedb_query: main_query.cpp $(SOURCES) $(INCLUDE) gen_query_code.cpp
$(CC) $(CCFLAGS) $^ -o $@ -I/home/gaurav/Downloads/tbb/tbb41_20121003oss/include/tbb/ $(LIBS) -ltbb $(CC) $(CCFLAGS) $^ -o $@ -I/home/gaurav/Downloads/tbb/tbb41_20121003oss/include/tbb/ $(LIBS) -ltbb
......
...@@ -7,11 +7,7 @@ ...@@ -7,11 +7,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <omp.h> #include <omp.h>
#include <task_scheduler_init.h> #include "include/hash_map.h"
#include <blocked_range.h>
#include <parallel_for.h>
#include <concurrent_hash_map.h>
#include <utility> #include <utility>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
...@@ -19,21 +15,8 @@ ...@@ -19,21 +15,8 @@
#include "include/schema.h" #include "include/schema.h"
#include "include/storage.h" #include "include/storage.h"
using namespace tbb; typedef MyHashMap HashTable;
// Structure that defines hashing and comparison operations for user's type.
struct MyHashCompare {
static uint64_t hash( const uint64_t x ) {
uint64_t h = 5892;
h = ((h<<5) + h) * x;
return h;
}
//! True if strings are equal
static bool equal( const uint64_t x, const uint64_t y ) {
return x==y;
}
};
typedef concurrent_hash_map<uint64_t,uint64_t,MyHashCompare> HashTable;
class mystring { class mystring {
public: public:
char str[50]; char str[50];
...@@ -70,42 +53,39 @@ int mystring::operator==(mystring _mystr) { ...@@ -70,42 +53,39 @@ int mystring::operator==(mystring _mystr) {
} }
void query2() { void query2() {
task_scheduler_init init;
timeval start_time, end_time, time_taken; timeval start_time, end_time, time_taken;
gettimeofday(&start_time,NULL); gettimeofday(&start_time,NULL);
HashTable hash_map_0; HashTable hash_map_0;
HashTable::accessor hash_map_0_acc; uint64_t max_count_item = item_vect[0].count;
uint64_t max_count_district = district_vect[0].count;
#pragma omp parallel for #pragma omp parallel for
for(uint64_t i = 0; i < max_count_district; i++) for(uint64_t i = 0; i < max_count_item; i++)
{ {
//hash_map_0 should be declared just above //hash_map_0 should be declared just above
//generate hash table here //generate hash table here
hash_map_0.insert(make_pair(district_vect[i].d_id, district_vect[i].tuple_id)); hash_map_0.insert(item_vect[i].i_id, item_vect[i].tuple_id);
} }
uint64_t max_count_customer = customer_vect[0].count; uint64_t max_count_orderline = orderline_vect[0].count;
#pragma omp parallel for #pragma omp parallel for
for(uint64_t i = 0; i < max_count_customer; i++) for(uint64_t i = 0; i < max_count_orderline; i++)
{ {
//Match with Hash Table here //Match with Hash Table here
if(customer_vect[i].c_w_id==1) if(orderline_vect[i].ol_w_id==1)
{ {
hash_map_0.find(hash_map_0_acc, customer_vect[i].c_d_id); item* item_iter=&item_vect.at(hash_map_0.find(orderline_vect[i].ol_i_id));
district* district_iter=&district_vect.at(hash_map_0_acc->second); mystring i_name;
mystring d_name; i_name = item_iter->i_name;
d_name = district_iter->d_name; mystring ol_o_id;
mystring c_first; ol_o_id = orderline_vect[i].ol_o_id;
c_first = customer_vect[i].c_first; mystring ol_quantity;
mystring c_last; ol_quantity = orderline_vect[i].ol_quantity;
c_last = customer_vect[i].c_last; mystring ol_amount;
mystring c_id; ol_amount = orderline_vect[i].ol_amount;
c_id = customer_vect[i].c_id; printf("%s\t", i_name.str);
printf("%s\t", d_name.str); printf("%s\t", ol_o_id.str);
printf("%s\t", c_first.str); printf("%s\t", ol_quantity.str);
printf("%s\t", c_last.str); printf("%s\t", ol_amount.str);
printf("%s\t", c_id.str);
printf("\n"); printf("\n");
} }
} }
......
/**
* author Gaurav Kukreja
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <atomic>
#include <functional>
using namespace std;
#define DEFAULT_BUCKET_INIT_LOG_SIZE 6
#define DEFAULT_BUCKET_INIT_SIZE (1 << DEFAULT_BUCKET_INIT_LOG_SIZE)
#define ENOTEXIST -2
class Pair
{
public:
uint64_t hashedKey;
uint64_t key;
uint64_t value;
Pair *next;
Pair(uint64_t _hashedKey, uint64_t _key, uint64_t _value)
{
hashedKey= _hashedKey;
key = _key;
value = _value;
next = NULL;
}
~Pair()
{
next = NULL;
}
};
class Bucket
{
public:
Pair* pairList;
Bucket()
{
pairList = NULL;
}
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t find(uint64_t key);
};
uint64_t Bucket::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
Pair *temp = new Pair(hashedKey, key, value);
temp->next = pairList;
pairList = temp; }
uint64_t Bucket::find(uint64_t hashedKey)
{
for(Pair *temp=pairList; temp != NULL; temp=temp->next)
{
if(temp->hashedKey == hashedKey)
{
return temp->value;
}
}
}
class MyHashMap
{
private:
Bucket *bucketArray;
int bucketLogSize;
atomic<int> rehashing_lock;
int rehash();
uint64_t generate_hash(uint64_t key);
public:
atomic<uint64_t> num_pairs;
MyHashMap()
{
int i;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*DEFAULT_BUCKET_INIT_SIZE);
for(i=0;i<DEFAULT_BUCKET_INIT_SIZE;i++)
bucketArray[i].pairList = NULL;
bucketLogSize = DEFAULT_BUCKET_INIT_LOG_SIZE;
num_pairs = 0;
rehashing_lock=0;
}
uint64_t insert(uint64_t key, uint64_t value);
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t find(uint64_t key);
};
uint64_t MyHashMap::generate_hash(uint64_t key)
{
uint64_t hashedKey = 5381;
hashedKey = ((hashedKey << 5) + hashedKey) + key; /* hashedKey * 33 + c */ return hashedKey;
}
int MyHashMap::rehash()
{
Bucket *oldBucketArray;
int oldBucketSize = (1 << bucketLogSize);
oldBucketArray = bucketArray;
bucketLogSize++;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*(1 << bucketLogSize));
for(int i=0; i < (1 << bucketLogSize); i++)
bucketArray[i].pairList = NULL;
for(int i=0; i<oldBucketSize; i++)
{
for(Pair *iter=oldBucketArray[i].pairList; iter != NULL; iter=iter->next)
{ insert(iter->hashedKey, iter->key, iter->value);
}
}
free(oldBucketArray);
}
uint64_t MyHashMap::insert(uint64_t key, uint64_t value)
{
/*while(rehashing_lock);
rehashing_lock++;
if(num_pairs > (1 << bucketLogSize) + (1 << (bucketLogSize -2)))
{ rehash();
}
rehashing_lock--;*/
uint64_t hashedKey = generate_hash(key);
num_pairs++;
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::find(uint64_t key)
{
uint64_t hashedKey = generate_hash(key);
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
return bucketArray[bucketNum].find(hashedKey);
}
\ No newline at end of file
...@@ -7,17 +7,17 @@ ...@@ -7,17 +7,17 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
#include <math.h> #include <math.h>
#include <atomic>
#include <functional> #include <functional>
using namespace std; using namespace std;
#define DEFAULT_BUCKET_INIT_SIZE pow(2, DEFAULT_BUCKET_INIT_LOG_SIZE) #define DEFAULT_BUCKET_INIT_LOG_SIZE 6
#define DEFAULT_BUCKET_INIT_LOG_SIZE 10 #define DEFAULT_BUCKET_INIT_SIZE (1 << DEFAULT_BUCKET_INIT_LOG_SIZE)
#define ENOTEXIST -2 #define ENOTEXIST -2
class Pair class Pair
{ {
public: public:
...@@ -29,11 +29,10 @@ ...@@ -29,11 +29,10 @@
Pair(uint64_t _hashedKey, uint64_t _key, uint64_t _value) Pair(uint64_t _hashedKey, uint64_t _key, uint64_t _value)
{ {
hashedKey= _hashedKey; hashedKey= _hashedKey;
key = _key; key = _key;
value = _value; value = _value;
next = NULL; next = NULL;
} }
~Pair() ~Pair()
{ {
next = NULL; next = NULL;
...@@ -43,17 +42,15 @@ ...@@ -43,17 +42,15 @@
class Bucket class Bucket
{ {
public: public:
Pair *pairList; Pair* pairList;
int num_pairs;
Bucket() Bucket()
{ {
pairList = NULL; pairList = NULL;
num_pairs = 0;
} }
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value); uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t at(uint64_t key); uint64_t find(uint64_t key);
}; };
...@@ -61,21 +58,19 @@ ...@@ -61,21 +58,19 @@
{ {
Pair *temp = new Pair(hashedKey, key, value); Pair *temp = new Pair(hashedKey, key, value);
num_pairs++;
temp->next = pairList; temp->next = pairList;
pairList = temp; pairList = temp; }
}
uint64_t Bucket::at(uint64_t hashedKey) uint64_t Bucket::find(uint64_t hashedKey)
{
for(Pair *temp=pairList; temp != NULL; temp=temp->next)
{ {
if(temp->hashedKey == hashedKey) for(Pair *temp=pairList; temp != NULL; temp=temp->next)
{ {
return temp->value; if(temp->hashedKey == hashedKey)
{
return temp->value;
}
} }
} }
}
class MyHashMap class MyHashMap
{ {
...@@ -83,98 +78,80 @@ ...@@ -83,98 +78,80 @@
Bucket *bucketArray; Bucket *bucketArray;
int bucketLogSize; int bucketLogSize;
atomic<int> rehashing_lock;
int rehash(); int rehash();
uint64_t generate_hash(uint64_t key); uint64_t generate_hash(uint64_t key);
public: public:
uint64_t num_pairs; atomic<uint64_t> num_pairs;
MyHashMap() MyHashMap()
{ {
int i;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*DEFAULT_BUCKET_INIT_SIZE); bucketArray = (Bucket*)malloc(sizeof(Bucket)*DEFAULT_BUCKET_INIT_SIZE);
for(i=0;i<DEFAULT_BUCKET_INIT_SIZE;i++)
bucketArray[i].pairList = NULL;
bucketLogSize = DEFAULT_BUCKET_INIT_LOG_SIZE; bucketLogSize = DEFAULT_BUCKET_INIT_LOG_SIZE;
num_pairs = 0; num_pairs = 0;
rehashing_lock=0;
} }
uint64_t insert(uint64_t key, uint64_t value); uint64_t insert(uint64_t key, uint64_t value);
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value); uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t at(uint64_t key); uint64_t find(uint64_t key);
}; };
uint64_t MyHashMap::generate_hash(uint64_t key) uint64_t MyHashMap::generate_hash(uint64_t key)
{ {
/* uint64_t hashedKey = 5381;
hash<uint64_t> hash_func; hashedKey = ((hashedKey << 5) + hashedKey) + key; /* hashedKey * 33 + c */ return hashedKey;
uint64_t hashedKey = hash_func(key);
//printf("Hash Value = %llu\n", hashedKey);
*/
//uint64_t hashedKey = 5381;
//hashedKey = ((hashedKey << 5) + hashedKey) + key; /* hashedKey * 33 + c */
//printf("Hash Value = %llu\n", hashedKey);
return key;
} }
int MyHashMap::rehash() int MyHashMap::rehash()
{ {
Bucket *oldBucketArray; Bucket *oldBucketArray;
int oldBucketSize = (1 << bucketLogSize);
oldBucketArray = bucketArray; oldBucketArray = bucketArray;
bucketLogSize++; bucketLogSize++;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*(1 << bucketLogSize));
for(int i=0; i < (1 << bucketLogSize); i++)
bucketArray[i].pairList = NULL;
//printf("rehashing : bucketLogSize = %d\n", bucketLogSize); for(int i=0; i<oldBucketSize; i++)
//printf("rehashing : num_pairs = %llu\n", num_pairs); {
for(Pair *iter=oldBucketArray[i].pairList; iter != NULL; iter=iter->next)
bucketArray = (Bucket*)malloc(sizeof(Bucket)*pow(2, bucketLogSize)); { insert(iter->hashedKey, iter->key, iter->value);
/*for(int i=0; i<pow(2, bucketLogSize); i++) }
{ }
bucketArray[i] = new Bucket(); free(oldBucketArray);
}*/ }
for(int i=0; i<pow(2, bucketLogSize-1); i++)
{
if(oldBucketArray[i].num_pairs!=0)
{
for(Pair *iter=oldBucketArray[i].pairList; iter != NULL; iter=iter->next)
{
insert(iter->hashedKey, iter->key, iter->value);
}
//free((void*)&oldBucketArray[i]);
}
}
}
uint64_t MyHashMap::insert(uint64_t key, uint64_t value)
{
if(num_pairs > pow(2, bucketLogSize)+pow(2, bucketLogSize-6))
{
rehash();
}
uint64_t hashedKey = generate_hash(key);
insert(hashedKey, key, value);
}
uint64_t MyHashMap::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
int bucketNum = (int)((uint64_t)hashedKey % (uint64_t)pow(2, bucketLogSize));
//printf("Bucket Number = %d\n", bucketNum);
num_pairs++;
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::at(uint64_t key)
{
uint64_t hashedKey = generate_hash(key);
int bucketNum = (int)((uint64_t)hashedKey % (uint64_t)pow(2, bucketLogSize));
return bucketArray[bucketNum].at(hashedKey);
}
uint64_t MyHashMap::insert(uint64_t key, uint64_t value)
{
while(rehashing_lock);
rehashing_lock++;
if(num_pairs > (1 << bucketLogSize) + (1 << (bucketLogSize -2)))
{
rehash();
}
rehashing_lock--;
uint64_t hashedKey = generate_hash(key);
num_pairs++;
insert(hashedKey, key, value);
}
uint64_t MyHashMap::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::find(uint64_t key)
{
uint64_t hashedKey = generate_hash(key);
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
return bucketArray[bucketNum].find(hashedKey);
}
\ No newline at end of file
No preview for this file type
...@@ -16,7 +16,7 @@ int main() ...@@ -16,7 +16,7 @@ int main()
gettimeofday(&start_time,NULL); gettimeofday(&start_time,NULL);
for(long int i=0;i<100000;i++) for(long int i=0;i<1000000;i++)
{ {
map.insert(rand(), rand()); map.insert(rand(), rand());
} }
...@@ -25,14 +25,19 @@ int main() ...@@ -25,14 +25,19 @@ int main()
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec; time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec; time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("MyHashMap::insert()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n"; cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
unordered_map<uint64_t, uint64_t> hash_map; unordered_map<uint64_t, uint64_t> hash_map;
gettimeofday(&start_time,NULL); gettimeofday(&start_time,NULL);
for(long int i=0;i<100000;i++) for(long int i=0;i<1000000;i++)
{ {
hash_map.insert(make_pair(rand(), rand())); hash_map.insert(make_pair(rand(), rand()));
} }
...@@ -41,8 +46,54 @@ int main() ...@@ -41,8 +46,54 @@ int main()
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec; time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec; time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("unordered_map::insert()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
gettimeofday(&start_time,NULL);
for(long int i=0;i<1000000;i++)
{
map.find(rand());
}
gettimeofday(&end_time,NULL);
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("MyHashMap::find()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
//unordered_map<uint64_t, uint64_t> hash_map;
gettimeofday(&start_time,NULL);
for(long int i=0;i<1000000;i++)
{
hash_map.find(rand());
}
gettimeofday(&end_time,NULL);
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("unordered_map::find()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n"; cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
return 0; return 0;
} }
\ No newline at end of file
...@@ -7,11 +7,7 @@ ...@@ -7,11 +7,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <omp.h> #include <omp.h>
#include <task_scheduler_init.h> #include "include/hash_map.h"
#include <blocked_range.h>
#include <parallel_for.h>
#include <concurrent_hash_map.h>
#include <utility> #include <utility>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
...@@ -19,21 +15,8 @@ ...@@ -19,21 +15,8 @@
#include "include/schema.h" #include "include/schema.h"
#include "include/storage.h" #include "include/storage.h"
using namespace tbb; typedef MyHashMap HashTable;
// Structure that defines hashing and comparison operations for user's type.
struct MyHashCompare {
static uint64_t hash( const uint64_t x ) {
uint64_t h = 5892;
h = ((h<<5) + h) * x;
return h;
}
//! True if strings are equal
static bool equal( const uint64_t x, const uint64_t y ) {
return x==y;
}
};
typedef concurrent_hash_map<uint64_t,uint64_t,MyHashCompare> HashTable;
class mystring { class mystring {
public: public:
char str[50]; char str[50];
...@@ -70,42 +53,39 @@ int mystring::operator==(mystring _mystr) { ...@@ -70,42 +53,39 @@ int mystring::operator==(mystring _mystr) {
} }
void query2() { void query2() {
task_scheduler_init init;
timeval start_time, end_time, time_taken; timeval start_time, end_time, time_taken;
gettimeofday(&start_time,NULL); gettimeofday(&start_time,NULL);
HashTable hash_map_0; HashTable hash_map_0;
HashTable::accessor hash_map_0_acc; uint64_t max_count_item = item_vect[0].count;
uint64_t max_count_district = district_vect[0].count;
#pragma omp parallel for #pragma omp parallel for
for(uint64_t i = 0; i < max_count_district; i++) for(uint64_t i = 0; i < max_count_item; i++)
{ {
//hash_map_0 should be declared just above //hash_map_0 should be declared just above
//generate hash table here //generate hash table here
hash_map_0.insert(make_pair(district_vect[i].d_id, district_vect[i].tuple_id)); hash_map_0.insert(item_vect[i].i_id, item_vect[i].tuple_id);
} }
uint64_t max_count_customer = customer_vect[0].count; uint64_t max_count_orderline = orderline_vect[0].count;
#pragma omp parallel for #pragma omp parallel for
for(uint64_t i = 0; i < max_count_customer; i++) for(uint64_t i = 0; i < max_count_orderline; i++)
{ {
//Match with Hash Table here //Match with Hash Table here
if(customer_vect[i].c_w_id==1) if(orderline_vect[i].ol_w_id==1)
{ {
hash_map_0.find(hash_map_0_acc, customer_vect[i].c_d_id); item* item_iter=&item_vect.at(hash_map_0.find(orderline_vect[i].ol_i_id));
district* district_iter=&district_vect.at(hash_map_0_acc->second); mystring i_name;
mystring d_name; i_name = item_iter->i_name;
d_name = district_iter->d_name; mystring ol_o_id;
mystring c_first; ol_o_id = orderline_vect[i].ol_o_id;
c_first = customer_vect[i].c_first; mystring ol_quantity;
mystring c_last; ol_quantity = orderline_vect[i].ol_quantity;
c_last = customer_vect[i].c_last; mystring ol_amount;
mystring c_id; ol_amount = orderline_vect[i].ol_amount;
c_id = customer_vect[i].c_id; printf("%s\t", i_name.str);
printf("%s\t", d_name.str); printf("%s\t", ol_o_id.str);
printf("%s\t", c_first.str); printf("%s\t", ol_quantity.str);
printf("%s\t", c_last.str); printf("%s\t", ol_amount.str);
printf("%s\t", c_id.str);
printf("\n"); printf("\n");
} }
} }
......
...@@ -22,15 +22,15 @@ void generate_parse_tree() { ...@@ -22,15 +22,15 @@ void generate_parse_tree() {
expr_root = print1; expr_root = print1;
table_field_vect_t *table=new table_field_vect_t; table_field_vect_t *table=new table_field_vect_t;
//tables and fields //tables and fields
table->table_name = "district"; table->table_name = "item";
table->field_vect.push_back((field_t)"d_name"); table->field_vect.push_back((field_t)"i_name");
print1->table_vect.push_back(*table); print1->table_vect.push_back(*table);
delete table; delete table;
table = new table_field_vect_t; table = new table_field_vect_t;
table->table_name = "customer"; table->table_name = "orderline";
table->field_vect.push_back((field_t)"c_first"); table->field_vect.push_back((field_t)"ol_o_id");
table->field_vect.push_back((field_t)"c_last"); table->field_vect.push_back((field_t)"ol_quantity");
table->field_vect.push_back((field_t)"c_id"); table->field_vect.push_back((field_t)"ol_amount");
print1->table_vect.push_back(*table); print1->table_vect.push_back(*table);
delete table; delete table;
print1->l_input = new select_Operator; print1->l_input = new select_Operator;
...@@ -42,15 +42,15 @@ void generate_parse_tree() { ...@@ -42,15 +42,15 @@ void generate_parse_tree() {
select1->consumer = print1; select1->consumer = print1;
//tables and fields //tables and fields
table = new table_field_vect_t; table = new table_field_vect_t;
table->table_name = "district"; table->table_name = "item";
table->field_vect.push_back((field_t)"d_name"); table->field_vect.push_back((field_t)"i_name");
select1->table_vect.push_back(*table); select1->table_vect.push_back(*table);
delete table; delete table;
table = new table_field_vect_t; table = new table_field_vect_t;
table->table_name = "customer"; table->table_name = "orderline";
table->field_vect.push_back((field_t)"c_first"); table->field_vect.push_back((field_t)"ol_o_id");
table->field_vect.push_back((field_t)"c_last"); table->field_vect.push_back((field_t)"ol_quantity");
table->field_vect.push_back((field_t)"c_id"); table->field_vect.push_back((field_t)"ol_amount");
select1->table_vect.push_back(*table); select1->table_vect.push_back(*table);
delete table; delete table;
select1->l_input = new join_Operator; select1->l_input = new join_Operator;
...@@ -63,48 +63,29 @@ void generate_parse_tree() { ...@@ -63,48 +63,29 @@ void generate_parse_tree() {
join1->l_input = new tableScan_Operator; join1->l_input = new tableScan_Operator;
join1->r_input = new tableScan_Operator; join1->r_input = new tableScan_Operator;
predicate_t pred; predicate_t pred;
pred.pred_l_operand = "c_w_id"; pred.pred_l_operand = "ol_w_id";
pred.pred_operator = "=="; pred.pred_operator = "==";
pred.pred_r_operand = "1"; pred.pred_r_operand = "1";
join1->primary_pred_vect.push_back(pred); join1->primary_pred_vect.push_back(pred);
join1->hashed_pred.pred_l_operand = "c_d_id"; join1->hashed_pred.pred_l_operand = "ol_i_id";
join1->hashed_pred.pred_operator = "=="; join1->hashed_pred.pred_operator = "==";
join1->hashed_pred.pred_r_operand = "d_id"; join1->hashed_pred.pred_r_operand = "i_id";
join_Operator* _join1 = dynamic_cast<join_Operator*>(join1); join_Operator* _join1 = dynamic_cast<join_Operator*>(join1);
_join1->hash_key_type="uint64_t"; _join1->hash_key_type="uint64_t";
_join1->hash_key_field="d_id"; _join1->hash_key_field="i_id";
//left TABLESCAN Operator //left TABLESCAN Operator
Operator *tableScan1; Operator *tableScan1;
tableScan1 = join1->l_input; tableScan1 = join1->l_input;
tableScan1->oper_type=TABLESCAN; tableScan1->oper_type=TABLESCAN;
tableScan1->consumer = join1; tableScan1->consumer = join1;
tableScan1->table_name = "district"; tableScan1->table_name = "item";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_id");
table->field_vect.push_back((field_t)"w_name");
tableScan1->table_vect.push_back(*table);
delete table;
*/
//tableScan1->l_input = new table_Operator;
//right TABLESCAN Operator //right TABLESCAN Operator
Operator *tableScan2; Operator *tableScan2;
tableScan2 = join1->r_input; tableScan2 = join1->r_input;
tableScan2->consumer = join1; tableScan2->consumer = join1;
tableScan2->table_name = "customer"; tableScan2->table_name = "orderline";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_w_id");
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
tableScan2->table_vect.push_back(*table);
delete table;
*/
//tableScan2->l_input = new table_Operator;
} }
/*
* Hardcoded Parse Tree
*
*/
#include <iostream>
#include <list>
#include <string>
#include "include/parse_tree.h"
using namespace std;
Operator *expr_root;
void generate_parse_tree() {
//PRINT Operation
print_Operator *print1;
print1 = new print_Operator;
print1->oper_type=PRINT;
expr_root = print1;
table_field_vect_t *table=new table_field_vect_t;
//tables and fields
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_name");
print1->table_vect.push_back(*table);
delete table;
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
print1->table_vect.push_back(*table);
delete table;
print1->l_input = new select_Operator;
//SELECT Operator
Operator *select1;
select1 = print1->l_input;
select->oper_type=SELECT;
select1->consumer = print1;
//tables and fields
table = new table_field_vect_t;
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_name");
select1->table_vect.push_back(*table);
delete table;
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
select1->table_vect.push_back(*table);
delete table;
select1->l_input = new join_Operator;
//JOIN Operator
Operator *join1;
join1 = select1->l_input;
join1->oper_type=JOIN;
join1->consumer = select1;
join1->l_input = new tableScan_Operator;
join1->r_input = new tableScan_Operator;
predicate_t pred;
pred.pred_l_operand = "d_w_id";
pred.pred_operator = "==";
pred.pred_r_operand = "1";
join1->primary_pred_vect.push_back(pred);
join1->hashed_pred.pred_l_operand = "d_w_id";
join1->hashed_pred.pred_operator = "==";
join1->hashed_pred.pred_r_operand = "w_id";
join_Operator* _join1 = dynamic_cast<join_Operator*>(join1);
_join1->hash_key_type="uint64_t";
_join1->hash_key_field="w_id";
//left TABLESCAN Operator
Operator *tableScan1;
tableScan1 = join1->l_input;
tableScan1->oper_type=TABLESCAN;
tableScan1->consumer = join1;
tableScan1->table_name = "warehouse";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_id");
table->field_vect.push_back((field_t)"w_name");
tableScan1->table_vect.push_back(*table);
delete table;
*/
//tableScan1->l_input = new table_Operator;
//right TABLESCAN Operator
Operator *tableScan2;
tableScan2 = join1->r_input;
tableScan2->consumer = join1;
tableScan2->table_name = "district";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_w_id");
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
tableScan2->table_vect.push_back(*table);
delete table;
*/
//tableScan2->l_input = new table_Operator;
}
...@@ -30,17 +30,18 @@ void generate_query_code() { ...@@ -30,17 +30,18 @@ void generate_query_code() {
fprintf(qcode_fp, "#include <stdlib.h>\n\n"); fprintf(qcode_fp, "#include <stdlib.h>\n\n");
fprintf(qcode_fp, "#include <omp.h>\n"); fprintf(qcode_fp, "#include <omp.h>\n");
fprintf(qcode_fp, "#include <task_scheduler_init.h>\n"); //fprintf(qcode_fp, "#include <task_scheduler_init.h>\n");
fprintf(qcode_fp, "#include <blocked_range.h>\n"); //fprintf(qcode_fp, "#include <blocked_range.h>\n");
fprintf(qcode_fp, "#include <parallel_for.h>\n"); //fprintf(qcode_fp, "#include <parallel_for.h>\n");
fprintf(qcode_fp, "#include <concurrent_hash_map.h>\n\n"); //fprintf(qcode_fp, "#include <concurrent_hash_map.h>\n\n");
fprintf(qcode_fp, "#include \"include/hash_map.h\"\n");
fprintf(qcode_fp, "#include <utility>\n"); fprintf(qcode_fp, "#include <utility>\n");
fprintf(qcode_fp, "#include <unordered_map>\n"); fprintf(qcode_fp, "#include <unordered_map>\n");
fprintf(qcode_fp, "#include <vector>\n\n"); fprintf(qcode_fp, "#include <vector>\n\n");
fprintf(qcode_fp, "#include \"include/schema.h\"\n"); fprintf(qcode_fp, "#include \"include/schema.h\"\n");
fprintf(qcode_fp, "#include \"include/storage.h\"\n\n"); fprintf(qcode_fp, "#include \"include/storage.h\"\n\n");
/*
fprintf(qcode_fp, "using namespace tbb;\n\n"); fprintf(qcode_fp, "using namespace tbb;\n\n");
fprintf(qcode_fp, ""); fprintf(qcode_fp, "");
fprintf(qcode_fp, "// Structure that defines hashing and comparison operations for user's type.\n"); fprintf(qcode_fp, "// Structure that defines hashing and comparison operations for user's type.\n");
...@@ -59,6 +60,10 @@ void generate_query_code() { ...@@ -59,6 +60,10 @@ void generate_query_code() {
// A concurrent hash table that maps strings to ints. // A concurrent hash table that maps strings to ints.
fprintf(qcode_fp, "typedef concurrent_hash_map<uint64_t,uint64_t,MyHashCompare> HashTable;\n"); fprintf(qcode_fp, "typedef concurrent_hash_map<uint64_t,uint64_t,MyHashCompare> HashTable;\n");
fprintf(qcode_fp, ""); fprintf(qcode_fp, "");
*/
fprintf(qcode_fp, "typedef MyHashMap HashTable;\n");
fprintf(qcode_fp, "\n");
fprintf(qcode_fp, "class mystring {\n"); fprintf(qcode_fp, "class mystring {\n");
...@@ -99,8 +104,8 @@ void generate_query_code() { ...@@ -99,8 +104,8 @@ void generate_query_code() {
fprintf(qcode_fp, "void query2() {\n"); fprintf(qcode_fp, "void query2() {\n");
open_braces++; open_braces++;
indent(); /* indent();
fprintf(qcode_fp, "task_scheduler_init init;\n"); fprintf(qcode_fp, "task_scheduler_init init;\n");*/
indent(); indent();
fprintf(qcode_fp, "timeval start_time, end_time, time_taken;\n\n"); fprintf(qcode_fp, "timeval start_time, end_time, time_taken;\n\n");
indent(); indent();
......
...@@ -83,8 +83,6 @@ void join_Operator::produce() { ...@@ -83,8 +83,6 @@ void join_Operator::produce() {
_join1->joinOp_num = num_hashOp++; _join1->joinOp_num = num_hashOp++;
indent(); indent();
fprintf(qcode_fp,"HashTable hash_map_%d;\n", _join1->joinOp_num); fprintf(qcode_fp,"HashTable hash_map_%d;\n", _join1->joinOp_num);
indent();
fprintf(qcode_fp,"HashTable::accessor hash_map_%d_acc;\n", _join1->joinOp_num);
l_input->produce(); l_input->produce();
r_input->produce(); r_input->produce();
} }
...@@ -103,7 +101,7 @@ void join_Operator::consume() { ...@@ -103,7 +101,7 @@ void join_Operator::consume() {
indent(); indent();
fprintf(qcode_fp, "//generate hash table here\n"); fprintf(qcode_fp, "//generate hash table here\n");
indent(); indent();
fprintf(qcode_fp, "hash_map_%d.insert(make_pair(%s_vect[i].%s, %s_vect[i].tuple_id));\n", _join1->joinOp_num, l_input->table_name.c_str(), _join1->hash_key_field.c_str(), l_input->table_name.c_str()); fprintf(qcode_fp, "hash_map_%d.insert(%s_vect[i].%s, %s_vect[i].tuple_id);\n", _join1->joinOp_num, l_input->table_name.c_str(), _join1->hash_key_field.c_str(), l_input->table_name.c_str());
_join1->l_input_consumed = true; _join1->l_input_consumed = true;
} }
else { else {
...@@ -133,20 +131,7 @@ void join_Operator::consume() { ...@@ -133,20 +131,7 @@ void join_Operator::consume() {
fprintf(qcode_fp, "{\n"); fprintf(qcode_fp, "{\n");
open_braces++; open_braces++;
indent(); indent();
fprintf(qcode_fp, "hash_map_%d.find(hash_map_%d_acc, %s_vect[i].%s);\n", _join1->joinOp_num, _join1->joinOp_num, r_input->table_name.c_str(), hashed_pred.pred_l_operand.c_str()); fprintf(qcode_fp, "%s* %s_iter=&%s_vect.at(hash_map_%d.find(%s_vect[i].%s));\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num, r_input->table_name.c_str(), hashed_pred.pred_l_operand.c_str());
indent();
fprintf(qcode_fp, "%s* %s_iter=&%s_vect.at(hash_map_%d_acc->second);\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num);
//fprintf(qcode_fp, "%s* %s_iter=&%s_vect.at(hash_map_%d.at(%s_iter->%s));\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num, r_input->table_name.c_str(), hashed_pred.pred_l_operand.c_str());
/*
indent();
fprintf(qcode_fp, "if(%s_iter == %s_vect.end()) {\n", l_input->table_name.c_str(), l_input->table_name.c_str());
open_braces++;
indent();
fprintf(qcode_fp, "continue;\n");
open_braces--;
indent();
fprintf(qcode_fp, "}\n");
*/
consumer->consume(); consumer->consume();
open_braces--; open_braces--;
indent(); indent();
...@@ -155,16 +140,7 @@ void join_Operator::consume() { ...@@ -155,16 +140,7 @@ void join_Operator::consume() {
else { else {
indent(); indent();
fprintf(qcode_fp, "vector<%s>::iterator %s_iter=%s_vect.at(hash_map_%d.at(%s));\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num, hashed_pred.pred_l_operand.c_str()); fprintf(qcode_fp, "vector<%s>::iterator %s_iter=%s_vect.at(hash_map_%d.at(%s));\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num, hashed_pred.pred_l_operand.c_str());
/*
indent();
fprintf(qcode_fp, "if(%s_iter == %s_vect.end()) {\n", l_input->table_name.c_str(), l_input->table_name.c_str());
open_braces++;
indent();
fprintf(qcode_fp, "continue;\n");
open_braces--;
indent();
fprintf(qcode_fp, "}\n");
*/
consumer->consume(); consumer->consume();
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment