#include <setjoin.h>
Classes | |
struct | invertedList |
struct | invIndexStruct |
Public Member Functions | |
SetJoin ()=default | |
SetJoin (const std::vector< std::vector< ui > > &sorted_records, const std::vector< double > &recwt, const std::vector< double > &_wordwt, std::string _sim_pairs_filepath, double _det, ui _maxHeapSize=0, bool _isWeightedComp=false) | |
SetJoin (const std::vector< std::vector< ui > > &work_records, const std::vector< std::vector< ui > > &query_records, const std::vector< double > &workwt, const std::vector< double > &querywt, const std::vector< double > &_wordwt, const std::string &_sim_pairs_filepath, double _det, ui _maxHeapSize=0, bool _isWeightedComp=false) | |
~SetJoin () | |
void | loadDataset (const std::vector< std::vector< ui > > &records, std::string file) |
void | prepare (const std::vector< std::vector< ui > > &offsets, ui column) |
void | resizeData (std::vector< std::vector< ui > > &dataset) |
double | weightedOverlap (ui x, ui y) |
double | weightedJaccard (ui x, ui y) |
double | jaccard (ui x, ui y) |
double | weightedCosine (ui x, ui y) |
double | cosine (ui x, ui y) |
double | weightedDice (ui x, ui y) |
double | dice (ui x, ui y) |
bool | overlap (int x, int y, int posx=0, int posy=0, int current_overlap=0) |
bool | overlapRS (int x, int y, int posx=0, int posy=0, int current_overlap=0) |
void | setSelfJoin (double threshold, std::vector< std::pair< int, int > > &sim_pairs) |
void | setRSJoin (double threshold, std::vector< std::pair< int, int > > &sim_pairs) |
Public Attributes | |
double | overlap_cost = 0 |
double | allocation_cost = 0 |
double | index_cost = 0 |
bool | ifRS = false |
SimFuncType | simFType {SimFuncType::JACCARD} |
double(SetJoin::* | weightedFunc )(ui, ui) = nullptr |
double(SetJoin::* | normalFunc )(ui, ui) = nullptr |
bool(SetJoin::* | overlapFunc )(int, int, int, int, int) = nullptr |
std::vector< std::pair< int, int > > | cacheVec |
std::vector< std::vector< std::pair< int, int > > > | indexVecs |
double | det |
uint64_t | resultNum = 0 |
uint64_t | candidateNum = 0 |
uint64_t | lengthSum = 0 |
uint64_t | listlens = 0 |
int | prime_exp [MAX_LINE_LENGTH] |
std::vector< std::vector< ui > > | dataset_all |
std::vector< std::vector< ui > > | work_dataset |
std::vector< std::vector< ui > > | query_dataset |
std::vector< double > | work_weights |
std::vector< double > | query_weights |
std::vector< double > | wordwt |
std::vector< ui > | workEmpty |
std::vector< ui > | queryEmpty |
std::vector< std::pair< int, int > > | result_pairs |
std::string | simP_file_path |
ui | maxHeapSize {0} |
bool | isWeightedComp {false} |
std::vector< WeightPair > | result_pairs_ |
int | isHeap = 0 |
std::vector< invertedList > | indexLists |
|
default |
|
inline |
|
inline |
|
inline |
|
inline |
bool SetJoin::overlap | ( | int | x, |
int | y, | ||
int | posx = 0, | ||
int | posy = 0, | ||
int | current_overlap = 0 ) |
bool SetJoin::overlapRS | ( | int | x, |
int | y, | ||
int | posx = 0, | ||
int | posy = 0, | ||
int | current_overlap = 0 ) |
|
inline |
void SetJoin::setRSJoin | ( | double | threshold, |
std::vector< std::pair< int, int > > & | sim_pairs ) |
void SetJoin::setSelfJoin | ( | double | threshold, |
std::vector< std::pair< int, int > > & | sim_pairs ) |
double SetJoin::allocation_cost = 0 |
std::vector<std::pair<int, int> > SetJoin::cacheVec |
uint64_t SetJoin::candidateNum = 0 |
std::vector<std::vector<ui> > SetJoin::dataset_all |
double SetJoin::det |
bool SetJoin::ifRS = false |
double SetJoin::index_cost = 0 |
std::vector<invertedList> SetJoin::indexLists |
std::vector<std::vector<std::pair<int, int> > > SetJoin::indexVecs |
int SetJoin::isHeap = 0 |
bool SetJoin::isWeightedComp {false} |
uint64_t SetJoin::lengthSum = 0 |
uint64_t SetJoin::listlens = 0 |
ui SetJoin::maxHeapSize {0} |
double SetJoin::overlap_cost = 0 |
bool(SetJoin::* SetJoin::overlapFunc) (int, int, int, int, int) = nullptr |
int SetJoin::prime_exp[MAX_LINE_LENGTH] |
std::vector<std::vector<ui> > SetJoin::query_dataset |
std::vector<double> SetJoin::query_weights |
std::vector<ui> SetJoin::queryEmpty |
std::vector<std::pair<int, int> > SetJoin::result_pairs |
std::vector<WeightPair> SetJoin::result_pairs_ |
uint64_t SetJoin::resultNum = 0 |
SimFuncType SetJoin::simFType {SimFuncType::JACCARD} |
std::string SetJoin::simP_file_path |
std::vector<double> SetJoin::wordwt |
std::vector<std::vector<ui> > SetJoin::work_dataset |
std::vector<double> SetJoin::work_weights |
std::vector<ui> SetJoin::workEmpty |