|
| SetJoinParallel (const std::vector< std::vector< ui > > &sorted_records, const std::vector< double > &recwt, const std::vector< double > &_wordwt, double _det, ui _maxHeapSize=0, bool _isWeightedComp=false) |
|
| SetJoinParallel (const std::vector< std::vector< ui > > &work_records, const std::vector< std::vector< ui > > &query_records, const std::vector< double > &workwt, const std::vector< double > &querywt, const std::vector< double > &_wordwt, double _det, ui _maxHeapSize=0, bool _isWeightedComp=false) |
|
| ~SetJoinParallel ()=default |
|
void | showPara () const |
|
void | resizeData (std::vector< std::vector< ui > > &dataset) |
|
void | reportTimeCost () |
|
void | reportLargestGroup () |
|
unsigned long long | getResultPairsAmount () |
|
void | mergeResults (std::vector< std::pair< int, int > > &finalPairs) |
|
bool | overlapSelf (ui x, ui y, int posx=0, int posy=0, int current_overlap=0) |
|
bool | overlapSelfIC (ui x, ui y, int posx=0, int posy=0, int current_overlap=0) |
|
bool | overlapRS (ui x, ui y, int posx=0, int posy=0, int current_overlap=0) |
|
bool | overlapRSIC (ui x, ui y, int posx=0, int posy=0, int current_overlap=0) |
|
double | weightedOverlap (ui x, ui y) |
|
double | weightedJaccard (ui x, ui y) |
|
double | jaccard (ui x, ui y) |
|
double | weightedCosine (ui x, ui y) |
|
double | cosine (ui x, ui y) |
|
double | weightedDice (ui x, ui y) |
|
double | dice (ui x, ui y) |
|
void | index (double threshold) |
|
void | GreedyFindCandidateAndSimPairs (const int &tid, const int indexLenGrp, const ui rid, ui record_length, const std::vector< ui > &p_keys, const std::vector< ui > &od_keys, const std::vector< ui > &odk_st) |
|
void | findSimPairsSelf () |
|
void | findSimPairsRS () |
|
|
bool | ifRS = false |
|
int | earlyTerminated [MAXTHREADNUM] = { 0 } |
|
int | earlyTerminatedEmpty [MAXTHREADNUM] = { 0 } |
|
SimFuncType | simFType {SimFuncType::JACCARD} |
|
std::string | typeMap [3] = {"Jaccard", "Cosine", "Dice"} |
|
double(SetJoinParallel::* | weightedFunc )(ui, ui) = nullptr |
|
double(SetJoinParallel::* | normalFunc )(ui, ui) = nullptr |
|
bool(SetJoinParallel::* | overlapFunc )(ui, ui, int, int, int) = nullptr |
|
double | det |
|
uint64_t | resultNum = 0 |
|
uint64_t | candidateNum = 0 |
|
uint64_t | listlens = 0 |
|
ui | maxIndexPartNum {0} |
|
std::vector< std::vector< ui > > | work_dataset |
|
std::vector< std::vector< ui > > | query_dataset |
|
std::vector< double > | work_weights |
|
std::vector< double > | query_weights |
|
std::vector< double > | wordwt |
|
std::vector< ui > | workEmpty |
|
std::vector< ui > | queryEmpty |
|
std::vector< ui > | workLength |
|
double | coe {0.0} |
|
double | coePart {0.0} |
|
double | ALPHA {0.0} |
|
ui | work_n {0} |
|
ui | query_n {0} |
|
ui | work_maxSize {0} |
|
ui | work_minSize {0} |
|
ui | query_maxSize {0} |
|
ui | query_minSize {0} |
|
ui | maxHeapSize {0} |
|
std::vector< std::pair< int, int > > | result_pairs [MAXTHREADNUM] |
|
std::vector< std::pair< int, int > > | emptyPairs [MAXTHREADNUM] |
|
bool | isWeightedComp {false} |
|
std::vector< WeightPair > | result_pairs_ [MAXTHREADNUM] |
|
int | isHeap [MAXTHREADNUM] = { 0 } |
|
double | index_cost |
|
double | search_cost |
|
double | hashInFind_cost [MAXTHREADNUM] |
|
double | mem_cost [MAXTHREADNUM] |
|
double | find_cost [MAXTHREADNUM] |
|
double | alloc_cost [MAXTHREADNUM] |
|
double | verif_cost [MAXTHREADNUM] |
|
bool | flagIC {false} |
|
std::vector< int > | grpIdA |
|
std::vector< int > | grpIdB |
|
std::vector< std::vector< int > > | groupA |
|
std::vector< std::vector< int > > | groupB |
|
std::vector< ui > | revIdMapA |
|
std::vector< ui > | revIdMapB |
|
std::vector< ui > | idMapA |
|
std::vector< ui > | idMapB |
|
double ** | featureValueCache {nullptr} |
|
int * | discreteCacheIdx {nullptr} |
|