Commit 94b8aa8d authored by Gaurav Kukreja's avatar Gaurav Kukreja

Parallel implementation of Hash Join

- using my implementation of Hash Map
- Changed input query for query compiler
Signed-off-by: 's avatarGaurav Kukreja <mailme.gaurav@gmail.com>
parent 02eb152a
......@@ -3,8 +3,9 @@ CCFLAGS=-g -O3 -fno-stack-protector -std=c++11 -fpermissive
CCFLAGS+=-fopenmp
LIBS+= -lrt -L/home/gaurav/Downloads/tbb/tbb41_20121003oss/lib/ia32/cc4.1.0_libc2.4_kernel2.6.16.21
SOURCES= data_migrate.cpp schema_methods.cpp transactions.cpp oltp.cpp schema_constructors.cpp storage.cpp query.cpp
INCLUDE=include/data_migrate.h include/main.h include/transactions.h include/oltp.h include/schema.h include/query.h
INCLUDE=include/data_migrate.h include/main.h include/transactions.h include/oltp.h include/schema.h include/query.h include/hash_map.h
fakedb_query: main_query.cpp $(SOURCES) $(INCLUDE) gen_query_code.cpp
$(CC) $(CCFLAGS) $^ -o $@ -I/home/gaurav/Downloads/tbb/tbb41_20121003oss/include/tbb/ $(LIBS) -ltbb
......
......@@ -7,11 +7,7 @@
#include <stdlib.h>
#include <omp.h>
#include <task_scheduler_init.h>
#include <blocked_range.h>
#include <parallel_for.h>
#include <concurrent_hash_map.h>
#include "include/hash_map.h"
#include <utility>
#include <unordered_map>
#include <vector>
......@@ -19,21 +15,8 @@
#include "include/schema.h"
#include "include/storage.h"
using namespace tbb;
typedef MyHashMap HashTable;
// Structure that defines hashing and comparison operations for user's type.
struct MyHashCompare {
static uint64_t hash( const uint64_t x ) {
uint64_t h = 5892;
h = ((h<<5) + h) * x;
return h;
}
//! True if strings are equal
static bool equal( const uint64_t x, const uint64_t y ) {
return x==y;
}
};
typedef concurrent_hash_map<uint64_t,uint64_t,MyHashCompare> HashTable;
class mystring {
public:
char str[50];
......@@ -70,42 +53,39 @@ int mystring::operator==(mystring _mystr) {
}
void query2() {
task_scheduler_init init;
timeval start_time, end_time, time_taken;
gettimeofday(&start_time,NULL);
HashTable hash_map_0;
HashTable::accessor hash_map_0_acc;
uint64_t max_count_district = district_vect[0].count;
uint64_t max_count_item = item_vect[0].count;
#pragma omp parallel for
for(uint64_t i = 0; i < max_count_district; i++)
for(uint64_t i = 0; i < max_count_item; i++)
{
//hash_map_0 should be declared just above
//generate hash table here
hash_map_0.insert(make_pair(district_vect[i].d_id, district_vect[i].tuple_id));
hash_map_0.insert(item_vect[i].i_id, item_vect[i].tuple_id);
}
uint64_t max_count_customer = customer_vect[0].count;
uint64_t max_count_orderline = orderline_vect[0].count;
#pragma omp parallel for
for(uint64_t i = 0; i < max_count_customer; i++)
for(uint64_t i = 0; i < max_count_orderline; i++)
{
//Match with Hash Table here
if(customer_vect[i].c_w_id==1)
if(orderline_vect[i].ol_w_id==1)
{
hash_map_0.find(hash_map_0_acc, customer_vect[i].c_d_id);
district* district_iter=&district_vect.at(hash_map_0_acc->second);
mystring d_name;
d_name = district_iter->d_name;
mystring c_first;
c_first = customer_vect[i].c_first;
mystring c_last;
c_last = customer_vect[i].c_last;
mystring c_id;
c_id = customer_vect[i].c_id;
printf("%s\t", d_name.str);
printf("%s\t", c_first.str);
printf("%s\t", c_last.str);
printf("%s\t", c_id.str);
item* item_iter=&item_vect.at(hash_map_0.find(orderline_vect[i].ol_i_id));
mystring i_name;
i_name = item_iter->i_name;
mystring ol_o_id;
ol_o_id = orderline_vect[i].ol_o_id;
mystring ol_quantity;
ol_quantity = orderline_vect[i].ol_quantity;
mystring ol_amount;
ol_amount = orderline_vect[i].ol_amount;
printf("%s\t", i_name.str);
printf("%s\t", ol_o_id.str);
printf("%s\t", ol_quantity.str);
printf("%s\t", ol_amount.str);
printf("\n");
}
}
......
/**
* author Gaurav Kukreja
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <atomic>
#include <functional>
using namespace std;
#define DEFAULT_BUCKET_INIT_LOG_SIZE 6
#define DEFAULT_BUCKET_INIT_SIZE (1 << DEFAULT_BUCKET_INIT_LOG_SIZE)
#define ENOTEXIST -2
class Pair
{
public:
uint64_t hashedKey;
uint64_t key;
uint64_t value;
Pair *next;
Pair(uint64_t _hashedKey, uint64_t _key, uint64_t _value)
{
hashedKey= _hashedKey;
key = _key;
value = _value;
next = NULL;
}
~Pair()
{
next = NULL;
}
};
class Bucket
{
public:
Pair* pairList;
Bucket()
{
pairList = NULL;
}
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t find(uint64_t key);
};
uint64_t Bucket::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
Pair *temp = new Pair(hashedKey, key, value);
temp->next = pairList;
pairList = temp; }
uint64_t Bucket::find(uint64_t hashedKey)
{
for(Pair *temp=pairList; temp != NULL; temp=temp->next)
{
if(temp->hashedKey == hashedKey)
{
return temp->value;
}
}
}
class MyHashMap
{
private:
Bucket *bucketArray;
int bucketLogSize;
atomic<int> rehashing_lock;
int rehash();
uint64_t generate_hash(uint64_t key);
public:
atomic<uint64_t> num_pairs;
MyHashMap()
{
int i;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*DEFAULT_BUCKET_INIT_SIZE);
for(i=0;i<DEFAULT_BUCKET_INIT_SIZE;i++)
bucketArray[i].pairList = NULL;
bucketLogSize = DEFAULT_BUCKET_INIT_LOG_SIZE;
num_pairs = 0;
rehashing_lock=0;
}
uint64_t insert(uint64_t key, uint64_t value);
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t find(uint64_t key);
};
uint64_t MyHashMap::generate_hash(uint64_t key)
{
uint64_t hashedKey = 5381;
hashedKey = ((hashedKey << 5) + hashedKey) + key; /* hashedKey * 33 + c */ return hashedKey;
}
int MyHashMap::rehash()
{
Bucket *oldBucketArray;
int oldBucketSize = (1 << bucketLogSize);
oldBucketArray = bucketArray;
bucketLogSize++;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*(1 << bucketLogSize));
for(int i=0; i < (1 << bucketLogSize); i++)
bucketArray[i].pairList = NULL;
for(int i=0; i<oldBucketSize; i++)
{
for(Pair *iter=oldBucketArray[i].pairList; iter != NULL; iter=iter->next)
{ insert(iter->hashedKey, iter->key, iter->value);
}
}
free(oldBucketArray);
}
uint64_t MyHashMap::insert(uint64_t key, uint64_t value)
{
/*while(rehashing_lock);
rehashing_lock++;
if(num_pairs > (1 << bucketLogSize) + (1 << (bucketLogSize -2)))
{ rehash();
}
rehashing_lock--;*/
uint64_t hashedKey = generate_hash(key);
num_pairs++;
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::find(uint64_t key)
{
uint64_t hashedKey = generate_hash(key);
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
return bucketArray[bucketNum].find(hashedKey);
}
\ No newline at end of file
......@@ -7,17 +7,17 @@
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <atomic>
#include <functional>
using namespace std;
using namespace std;
#define DEFAULT_BUCKET_INIT_SIZE pow(2, DEFAULT_BUCKET_INIT_LOG_SIZE)
#define DEFAULT_BUCKET_INIT_LOG_SIZE 10
#define DEFAULT_BUCKET_INIT_LOG_SIZE 6
#define DEFAULT_BUCKET_INIT_SIZE (1 << DEFAULT_BUCKET_INIT_LOG_SIZE)
#define ENOTEXIST -2
class Pair
{
public:
......@@ -29,11 +29,10 @@
Pair(uint64_t _hashedKey, uint64_t _key, uint64_t _value)
{
hashedKey= _hashedKey;
key = _key;
key = _key;
value = _value;
next = NULL;
}
~Pair()
{
next = NULL;
......@@ -43,17 +42,15 @@
class Bucket
{
public:
Pair *pairList;
int num_pairs;
Pair* pairList;
Bucket()
{
pairList = NULL;
num_pairs = 0;
}
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t at(uint64_t key);
uint64_t find(uint64_t key);
};
......@@ -61,21 +58,19 @@
{
Pair *temp = new Pair(hashedKey, key, value);
num_pairs++;
temp->next = pairList;
pairList = temp;
}
pairList = temp; }
uint64_t Bucket::at(uint64_t hashedKey)
{
for(Pair *temp=pairList; temp != NULL; temp=temp->next)
uint64_t Bucket::find(uint64_t hashedKey)
{
if(temp->hashedKey == hashedKey)
for(Pair *temp=pairList; temp != NULL; temp=temp->next)
{
return temp->value;
if(temp->hashedKey == hashedKey)
{
return temp->value;
}
}
}
}
class MyHashMap
{
......@@ -83,98 +78,80 @@
Bucket *bucketArray;
int bucketLogSize;
atomic<int> rehashing_lock;
int rehash();
uint64_t generate_hash(uint64_t key);
public:
uint64_t num_pairs;
atomic<uint64_t> num_pairs;
MyHashMap()
{
int i;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*DEFAULT_BUCKET_INIT_SIZE);
for(i=0;i<DEFAULT_BUCKET_INIT_SIZE;i++)
bucketArray[i].pairList = NULL;
bucketLogSize = DEFAULT_BUCKET_INIT_LOG_SIZE;
num_pairs = 0;
rehashing_lock=0;
}
uint64_t insert(uint64_t key, uint64_t value);
uint64_t insert(uint64_t hashedKey, uint64_t key, uint64_t value);
uint64_t at(uint64_t key);
uint64_t find(uint64_t key);
};
uint64_t MyHashMap::generate_hash(uint64_t key)
{
/*
hash<uint64_t> hash_func;
uint64_t hashedKey = hash_func(key);
//printf("Hash Value = %llu\n", hashedKey);
*/
//uint64_t hashedKey = 5381;
//hashedKey = ((hashedKey << 5) + hashedKey) + key; /* hashedKey * 33 + c */
//printf("Hash Value = %llu\n", hashedKey);
return key;
uint64_t hashedKey = 5381;
hashedKey = ((hashedKey << 5) + hashedKey) + key; /* hashedKey * 33 + c */ return hashedKey;
}
int MyHashMap::rehash()
{
Bucket *oldBucketArray;
int oldBucketSize = (1 << bucketLogSize);
oldBucketArray = bucketArray;
bucketLogSize++;
bucketArray = (Bucket*)malloc(sizeof(Bucket)*(1 << bucketLogSize));
for(int i=0; i < (1 << bucketLogSize); i++)
bucketArray[i].pairList = NULL;
//printf("rehashing : bucketLogSize = %d\n", bucketLogSize);
//printf("rehashing : num_pairs = %llu\n", num_pairs);
bucketArray = (Bucket*)malloc(sizeof(Bucket)*pow(2, bucketLogSize));
/*for(int i=0; i<pow(2, bucketLogSize); i++)
{
bucketArray[i] = new Bucket();
}*/
for(int i=0; i<pow(2, bucketLogSize-1); i++)
{
if(oldBucketArray[i].num_pairs!=0)
{
for(Pair *iter=oldBucketArray[i].pairList; iter != NULL; iter=iter->next)
{
insert(iter->hashedKey, iter->key, iter->value);
}
//free((void*)&oldBucketArray[i]);
}
}
}
uint64_t MyHashMap::insert(uint64_t key, uint64_t value)
{
if(num_pairs > pow(2, bucketLogSize)+pow(2, bucketLogSize-6))
{
rehash();
}
uint64_t hashedKey = generate_hash(key);
insert(hashedKey, key, value);
}
uint64_t MyHashMap::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
int bucketNum = (int)((uint64_t)hashedKey % (uint64_t)pow(2, bucketLogSize));
//printf("Bucket Number = %d\n", bucketNum);
num_pairs++;
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::at(uint64_t key)
{
uint64_t hashedKey = generate_hash(key);
int bucketNum = (int)((uint64_t)hashedKey % (uint64_t)pow(2, bucketLogSize));
return bucketArray[bucketNum].at(hashedKey);
}
for(int i=0; i<oldBucketSize; i++)
{
for(Pair *iter=oldBucketArray[i].pairList; iter != NULL; iter=iter->next)
{ insert(iter->hashedKey, iter->key, iter->value);
}
}
free(oldBucketArray);
}
uint64_t MyHashMap::insert(uint64_t key, uint64_t value)
{
while(rehashing_lock);
rehashing_lock++;
if(num_pairs > (1 << bucketLogSize) + (1 << (bucketLogSize -2)))
{
rehash();
}
rehashing_lock--;
uint64_t hashedKey = generate_hash(key);
num_pairs++;
insert(hashedKey, key, value);
}
uint64_t MyHashMap::insert(uint64_t hashedKey, uint64_t key, uint64_t value)
{
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
bucketArray[bucketNum].insert(hashedKey, key, value);
}
uint64_t MyHashMap::find(uint64_t key)
{
uint64_t hashedKey = generate_hash(key);
uint64_t bucketNum = (hashedKey & ((1 << bucketLogSize)-1));
return bucketArray[bucketNum].find(hashedKey);
}
\ No newline at end of file
No preview for this file type
......@@ -16,7 +16,7 @@ int main()
gettimeofday(&start_time,NULL);
for(long int i=0;i<100000;i++)
for(long int i=0;i<1000000;i++)
{
map.insert(rand(), rand());
}
......@@ -25,14 +25,19 @@ int main()
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("MyHashMap::insert()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
unordered_map<uint64_t, uint64_t> hash_map;
gettimeofday(&start_time,NULL);
for(long int i=0;i<100000;i++)
for(long int i=0;i<1000000;i++)
{
hash_map.insert(make_pair(rand(), rand()));
}
......@@ -41,8 +46,54 @@ int main()
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("unordered_map::insert()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
gettimeofday(&start_time,NULL);
for(long int i=0;i<1000000;i++)
{
map.find(rand());
}
gettimeofday(&end_time,NULL);
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("MyHashMap::find()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
//unordered_map<uint64_t, uint64_t> hash_map;
gettimeofday(&start_time,NULL);
for(long int i=0;i<1000000;i++)
{
hash_map.find(rand());
}
gettimeofday(&end_time,NULL);
time_taken.tv_sec = end_time.tv_sec - start_time.tv_sec;
time_taken.tv_usec = end_time.tv_usec - start_time.tv_usec;
if(time_taken.tv_usec < 0)
{
time_taken.tv_sec--;
time_taken.tv_usec = 1000000+time_taken.tv_usec;
}
printf("unordered_map::find()\n");
cout << "Time Taken " << time_taken.tv_sec << " s " << time_taken.tv_usec << " us\n";
return 0;
}
\ No newline at end of file
......@@ -7,11 +7,7 @@
#include <stdlib.h>
#include <omp.h>
#include <task_scheduler_init.h>
#include <blocked_range.h>
#include <parallel_for.h>
#include <concurrent_hash_map.h>
#include "include/hash_map.h"
#include <utility>
#include <unordered_map>
#include <vector>
......@@ -19,21 +15,8 @@
#include "include/schema.h"
#include "include/storage.h"
using namespace tbb;
typedef MyHashMap HashTable;
// Structure that defines hashing and comparison operations for user's type.
struct MyHashCompare {
static uint64_t hash( const uint64_t x ) {
uint64_t h = 5892;
h = ((h<<5) + h) * x;
return h;
}
//! True if strings are equal
static bool equal( const uint64_t x, const uint64_t y ) {
return x==y;
}
};
typedef concurrent_hash_map<uint64_t,uint64_t,MyHashCompare> HashTable;
class mystring {
public:
char str[50];
......@@ -70,42 +53,39 @@ int mystring::operator==(mystring _mystr) {
}
void query2() {
task_scheduler_init init;
timeval start_time, end_time, time_taken;
gettimeofday(&start_time,NULL);
HashTable hash_map_0;
HashTable::accessor hash_map_0_acc;
uint64_t max_count_district = district_vect[0].count;
uint64_t max_count_item = item_vect[0].count;
#pragma omp parallel for
for(uint64_t i = 0; i < max_count_district; i++)
for(uint64_t i = 0; i < max_count_item; i++)
{
//hash_map_0 should be declared just above
//generate hash table here
hash_map_0.insert(make_pair(district_vect[i].d_id, district_vect[i].tuple_id));
hash_map_0.insert(item_vect[i].i_id, item_vect[i].tuple_id);
}
uint64_t max_count_customer = customer_vect[0].count;
uint64_t max_count_orderline = orderline_vect[0].count;
#pragma omp parallel for
for(uint64_t i = 0; i < max_count_customer; i++)
for(uint64_t i = 0; i < max_count_orderline; i++)
{
//Match with Hash Table here
if(customer_vect[i].c_w_id==1)
if(orderline_vect[i].ol_w_id==1)
{
hash_map_0.find(hash_map_0_acc, customer_vect[i].c_d_id);
district* district_iter=&district_vect.at(hash_map_0_acc->second);
mystring d_name;
d_name = district_iter->d_name;
mystring c_first;
c_first = customer_vect[i].c_first;
mystring c_last;
c_last = customer_vect[i].c_last;
mystring c_id;
c_id = customer_vect[i].c_id;
printf("%s\t", d_name.str);
printf("%s\t", c_first.str);
printf("%s\t", c_last.str);
printf("%s\t", c_id.str);
item* item_iter=&item_vect.at(hash_map_0.find(orderline_vect[i].ol_i_id));
mystring i_name;
i_name = item_iter->i_name;
mystring ol_o_id;
ol_o_id = orderline_vect[i].ol_o_id;
mystring ol_quantity;
ol_quantity = orderline_vect[i].ol_quantity;
mystring ol_amount;
ol_amount = orderline_vect[i].ol_amount;
printf("%s\t", i_name.str);
printf("%s\t", ol_o_id.str);
printf("%s\t", ol_quantity.str);
printf("%s\t", ol_amount.str);
printf("\n");
}
}
......
......@@ -22,15 +22,15 @@ void generate_parse_tree() {
expr_root = print1;
table_field_vect_t *table=new table_field_vect_t;
//tables and fields
table->table_name = "district";
table->field_vect.push_back((field_t)"d_name");
table->table_name = "item";
table->field_vect.push_back((field_t)"i_name");
print1->table_vect.push_back(*table);
delete table;
table = new table_field_vect_t;
table->table_name = "customer";
table->field_vect.push_back((field_t)"c_first");
table->field_vect.push_back((field_t)"c_last");
table->field_vect.push_back((field_t)"c_id");
table->table_name = "orderline";
table->field_vect.push_back((field_t)"ol_o_id");
table->field_vect.push_back((field_t)"ol_quantity");
table->field_vect.push_back((field_t)"ol_amount");
print1->table_vect.push_back(*table);
delete table;
print1->l_input = new select_Operator;
......@@ -42,15 +42,15 @@ void generate_parse_tree() {
select1->consumer = print1;
//tables and fields
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_name");
table->table_name = "item";
table->field_vect.push_back((field_t)"i_name");
select1->table_vect.push_back(*table);
delete table;
table = new table_field_vect_t;
table->table_name = "customer";
table->field_vect.push_back((field_t)"c_first");
table->field_vect.push_back((field_t)"c_last");
table->field_vect.push_back((field_t)"c_id");
table->table_name = "orderline";
table->field_vect.push_back((field_t)"ol_o_id");
table->field_vect.push_back((field_t)"ol_quantity");
table->field_vect.push_back((field_t)"ol_amount");
select1->table_vect.push_back(*table);
delete table;
select1->l_input = new join_Operator;
......@@ -63,48 +63,29 @@ void generate_parse_tree() {
join1->l_input = new tableScan_Operator;
join1->r_input = new tableScan_Operator;
predicate_t pred;
pred.pred_l_operand = "c_w_id";
pred.pred_l_operand = "ol_w_id";
pred.pred_operator = "==";
pred.pred_r_operand = "1";
join1->primary_pred_vect.push_back(pred);
join1->hashed_pred.pred_l_operand = "c_d_id";
join1->hashed_pred.pred_l_operand = "ol_i_id";
join1->hashed_pred.pred_operator = "==";
join1->hashed_pred.pred_r_operand = "d_id";
join1->hashed_pred.pred_r_operand = "i_id";
join_Operator* _join1 = dynamic_cast<join_Operator*>(join1);
_join1->hash_key_type="uint64_t";
_join1->hash_key_field="d_id";
_join1->hash_key_field="i_id";
//left TABLESCAN Operator
Operator *tableScan1;
tableScan1 = join1->l_input;
tableScan1->oper_type=TABLESCAN;
tableScan1->consumer = join1;
tableScan1->table_name = "district";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_id");
table->field_vect.push_back((field_t)"w_name");
tableScan1->table_vect.push_back(*table);
delete table;
*/
//tableScan1->l_input = new table_Operator;
tableScan1->table_name = "item";
//right TABLESCAN Operator
Operator *tableScan2;
tableScan2 = join1->r_input;
tableScan2->consumer = join1;
tableScan2->table_name = "customer";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_w_id");
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
tableScan2->table_vect.push_back(*table);
delete table;
*/
//tableScan2->l_input = new table_Operator;
tableScan2->table_name = "orderline";
}
/*
* Hardcoded Parse Tree
*
*/
#include <iostream>
#include <list>
#include <string>
#include "include/parse_tree.h"
using namespace std;
Operator *expr_root;
void generate_parse_tree() {
//PRINT Operation
print_Operator *print1;
print1 = new print_Operator;
print1->oper_type=PRINT;
expr_root = print1;
table_field_vect_t *table=new table_field_vect_t;
//tables and fields
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_name");
print1->table_vect.push_back(*table);
delete table;
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
print1->table_vect.push_back(*table);
delete table;
print1->l_input = new select_Operator;
//SELECT Operator
Operator *select1;
select1 = print1->l_input;
select->oper_type=SELECT;
select1->consumer = print1;
//tables and fields
table = new table_field_vect_t;
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_name");
select1->table_vect.push_back(*table);
delete table;
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
select1->table_vect.push_back(*table);
delete table;
select1->l_input = new join_Operator;
//JOIN Operator
Operator *join1;
join1 = select1->l_input;
join1->oper_type=JOIN;
join1->consumer = select1;
join1->l_input = new tableScan_Operator;
join1->r_input = new tableScan_Operator;
predicate_t pred;
pred.pred_l_operand = "d_w_id";
pred.pred_operator = "==";
pred.pred_r_operand = "1";
join1->primary_pred_vect.push_back(pred);
join1->hashed_pred.pred_l_operand = "d_w_id";
join1->hashed_pred.pred_operator = "==";
join1->hashed_pred.pred_r_operand = "w_id";
join_Operator* _join1 = dynamic_cast<join_Operator*>(join1);
_join1->hash_key_type="uint64_t";
_join1->hash_key_field="w_id";
//left TABLESCAN Operator
Operator *tableScan1;
tableScan1 = join1->l_input;
tableScan1->oper_type=TABLESCAN;
tableScan1->consumer = join1;
tableScan1->table_name = "warehouse";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "warehouse";
table->field_vect.push_back((field_t)"w_id");
table->field_vect.push_back((field_t)"w_name");
tableScan1->table_vect.push_back(*table);
delete table;
*/
//tableScan1->l_input = new table_Operator;
//right TABLESCAN Operator
Operator *tableScan2;
tableScan2 = join1->r_input;
tableScan2->consumer = join1;
tableScan2->table_name = "district";
//OVERKILL : table_name is enough for tableScan_Operator
/*
table = new table_field_vect_t;
table->table_name = "district";
table->field_vect.push_back((field_t)"d_w_id");
table->field_vect.push_back((field_t)"d_name");
table->field_vect.push_back((field_t)"d_tax");
tableScan2->table_vect.push_back(*table);
delete table;
*/
//tableScan2->l_input = new table_Operator;
}
......@@ -30,17 +30,18 @@ void generate_query_code() {
fprintf(qcode_fp, "#include <stdlib.h>\n\n");
fprintf(qcode_fp, "#include <omp.h>\n");
fprintf(qcode_fp, "#include <task_scheduler_init.h>\n");
fprintf(qcode_fp, "#include <blocked_range.h>\n");
fprintf(qcode_fp, "#include <parallel_for.h>\n");
fprintf(qcode_fp, "#include <concurrent_hash_map.h>\n\n");
//fprintf(qcode_fp, "#include <task_scheduler_init.h>\n");
//fprintf(qcode_fp, "#include <blocked_range.h>\n");
//fprintf(qcode_fp, "#include <parallel_for.h>\n");
//fprintf(qcode_fp, "#include <concurrent_hash_map.h>\n\n");
fprintf(qcode_fp, "#include \"include/hash_map.h\"\n");
fprintf(qcode_fp, "#include <utility>\n");
fprintf(qcode_fp, "#include <unordered_map>\n");
fprintf(qcode_fp, "#include <vector>\n\n");
fprintf(qcode_fp, "#include \"include/schema.h\"\n");
fprintf(qcode_fp, "#include \"include/storage.h\"\n\n");
/*
fprintf(qcode_fp, "using namespace tbb;\n\n");
fprintf(qcode_fp, "");
fprintf(qcode_fp, "// Structure that defines hashing and comparison operations for user's type.\n");
......@@ -59,6 +60,10 @@ void generate_query_code() {
// A concurrent hash table that maps strings to ints.
fprintf(qcode_fp, "typedef concurrent_hash_map<uint64_t,uint64_t,MyHashCompare> HashTable;\n");
fprintf(qcode_fp, "");
*/
fprintf(qcode_fp, "typedef MyHashMap HashTable;\n");
fprintf(qcode_fp, "\n");
fprintf(qcode_fp, "class mystring {\n");
......@@ -99,8 +104,8 @@ void generate_query_code() {
fprintf(qcode_fp, "void query2() {\n");
open_braces++;
indent();
fprintf(qcode_fp, "task_scheduler_init init;\n");
/* indent();
fprintf(qcode_fp, "task_scheduler_init init;\n");*/
indent();
fprintf(qcode_fp, "timeval start_time, end_time, time_taken;\n\n");
indent();
......
......@@ -83,8 +83,6 @@ void join_Operator::produce() {
_join1->joinOp_num = num_hashOp++;
indent();
fprintf(qcode_fp,"HashTable hash_map_%d;\n", _join1->joinOp_num);
indent();
fprintf(qcode_fp,"HashTable::accessor hash_map_%d_acc;\n", _join1->joinOp_num);
l_input->produce();
r_input->produce();
}
......@@ -103,7 +101,7 @@ void join_Operator::consume() {
indent();
fprintf(qcode_fp, "//generate hash table here\n");
indent();
fprintf(qcode_fp, "hash_map_%d.insert(make_pair(%s_vect[i].%s, %s_vect[i].tuple_id));\n", _join1->joinOp_num, l_input->table_name.c_str(), _join1->hash_key_field.c_str(), l_input->table_name.c_str());
fprintf(qcode_fp, "hash_map_%d.insert(%s_vect[i].%s, %s_vect[i].tuple_id);\n", _join1->joinOp_num, l_input->table_name.c_str(), _join1->hash_key_field.c_str(), l_input->table_name.c_str());
_join1->l_input_consumed = true;
}
else {
......@@ -133,20 +131,7 @@ void join_Operator::consume() {
fprintf(qcode_fp, "{\n");
open_braces++;
indent();
fprintf(qcode_fp, "hash_map_%d.find(hash_map_%d_acc, %s_vect[i].%s);\n", _join1->joinOp_num, _join1->joinOp_num, r_input->table_name.c_str(), hashed_pred.pred_l_operand.c_str());
indent();
fprintf(qcode_fp, "%s* %s_iter=&%s_vect.at(hash_map_%d_acc->second);\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num);
//fprintf(qcode_fp, "%s* %s_iter=&%s_vect.at(hash_map_%d.at(%s_iter->%s));\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num, r_input->table_name.c_str(), hashed_pred.pred_l_operand.c_str());
/*
indent();
fprintf(qcode_fp, "if(%s_iter == %s_vect.end()) {\n", l_input->table_name.c_str(), l_input->table_name.c_str());
open_braces++;
indent();
fprintf(qcode_fp, "continue;\n");
open_braces--;
indent();
fprintf(qcode_fp, "}\n");
*/
fprintf(qcode_fp, "%s* %s_iter=&%s_vect.at(hash_map_%d.find(%s_vect[i].%s));\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num, r_input->table_name.c_str(), hashed_pred.pred_l_operand.c_str());
consumer->consume();
open_braces--;
indent();
......@@ -155,16 +140,7 @@ void join_Operator::consume() {
else {
indent();
fprintf(qcode_fp, "vector<%s>::iterator %s_iter=%s_vect.at(hash_map_%d.at(%s));\n", l_input->table_name.c_str(), l_input->table_name.c_str(), l_input->table_name.c_str(), _join1->joinOp_num, hashed_pred.pred_l_operand.c_str());
/*
indent();
fprintf(qcode_fp, "if(%s_iter == %s_vect.end()) {\n", l_input->table_name.c_str(), l_input->table_name.c_str());
open_braces++;
indent();
fprintf(qcode_fp, "continue;\n");
open_braces--;
indent();
fprintf(qcode_fp, "}\n");
*/
consumer->consume();
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment