Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
OvlpSelfJoinParallel Class Reference

#include <ovlpjoin_parallel.h>

Public Member Functions

void overlapjoin (int overlap_threshold, std::vector< std::pair< int, int > > &finalPairs)
 
void small_case (int L, int R, std::vector< std::pair< int, int > > &finalPairs)
 
int64_t small_estimate (int L, int R)
 
int64_t large_estimate (int L, int R)
 
int divide (int nL)
 
int estimate ()
 
void large_case (int L, int R, std::vector< std::pair< int, int > > &finalPairs)
 
 OvlpSelfJoinParallel (const std::vector< std::vector< ui > > &sorted_records, const std::vector< double > &recwt, const std::vector< double > &_wordwt, ui _maxHeapSize=0, bool _isWeightedComp=false)
 
double weightedOverlapCoeff (int id1, int id2)
 
double overlapCoeff (int id1, int id2)
 
void set_external_store (const std::string &_resPair_path)
 
bool comp_comb1 (const int a, const int b, int tid)
 
bool build_heap (const std::vector< std::pair< int, int > > &vec, const std::vector< std::vector< ui > > &dataset, int L, std::vector< int > &heap, std::vector< combination_p1 > &combs, int &heap_size, int tid)
 

Public Attributes

int n1 {0}
 
int c {0}
 
ui total_eles {0}
 
int earlyTerminated [MAXTHREADNUM] = { 0 }
 
std::vector< std::vector< ui > > records
 
std::vector< std::vector< ui > > datasets
 
std::vector< double > weights
 
std::vector< double > wordwt
 
std::vector< std::pair< int, int > > idmap_records
 
std::vector< std::vector< std::pair< int, int > > > ele_lists
 
std::vector< std::pair< int, int > > result_pairs [MAXTHREADNUM]
 
std::vector< int > heap [MAXTHREADNUM]
 
std::vector< combination_p1combs [MAXTHREADNUM]
 
std::unordered_set< int > random_ids
 
std::vector< std::pair< int, int > > buck
 
ui maxHeapSize {0}
 
bool isWeightedComp {false}
 
std::vector< WeightPairresult_pairs_ [MAXTHREADNUM]
 
int isHeap [MAXTHREADNUM] = { 0 }
 
int64_t candidate_num {0}
 
int64_t result_num {0}
 
int64_t list_cost {0}
 
double heap_cost {0.0}
 
double binary_cost {0.0}
 
uint64_t heap_op {0}
 
int64_t large_cost {0}
 
int64_t large_est_cost {0}
 
int alive_id {0}
 
bool if_external_IO = false
 
std::string resultPair_storePath
 

Constructor & Destructor Documentation

◆ OvlpSelfJoinParallel()

OvlpSelfJoinParallel::OvlpSelfJoinParallel ( const std::vector< std::vector< ui > > & sorted_records,
const std::vector< double > & recwt,
const std::vector< double > & _wordwt,
ui _maxHeapSize = 0,
bool _isWeightedComp = false )
inline

Member Function Documentation

◆ build_heap()

bool OvlpSelfJoinParallel::build_heap ( const std::vector< std::pair< int, int > > & vec,
const std::vector< std::vector< ui > > & dataset,
int L,
std::vector< int > & heap,
std::vector< combination_p1 > & combs,
int & heap_size,
int tid )

◆ comp_comb1()

bool OvlpSelfJoinParallel::comp_comb1 ( const int a,
const int b,
int tid )
inline

◆ divide()

int OvlpSelfJoinParallel::divide ( int nL)

◆ estimate()

int OvlpSelfJoinParallel::estimate ( )

◆ large_case()

void OvlpSelfJoinParallel::large_case ( int L,
int R,
std::vector< std::pair< int, int > > & finalPairs )

◆ large_estimate()

int64_t OvlpSelfJoinParallel::large_estimate ( int L,
int R )

◆ overlapCoeff()

double OvlpSelfJoinParallel::overlapCoeff ( int id1,
int id2 )
inline

◆ overlapjoin()

void OvlpSelfJoinParallel::overlapjoin ( int overlap_threshold,
std::vector< std::pair< int, int > > & finalPairs )

◆ set_external_store()

void OvlpSelfJoinParallel::set_external_store ( const std::string & _resPair_path)
inline

◆ small_case()

void OvlpSelfJoinParallel::small_case ( int L,
int R,
std::vector< std::pair< int, int > > & finalPairs )

◆ small_estimate()

int64_t OvlpSelfJoinParallel::small_estimate ( int L,
int R )

◆ weightedOverlapCoeff()

double OvlpSelfJoinParallel::weightedOverlapCoeff ( int id1,
int id2 )
inline

Member Data Documentation

◆ alive_id

int OvlpSelfJoinParallel::alive_id {0}

◆ binary_cost

double OvlpSelfJoinParallel::binary_cost {0.0}

◆ buck

std::vector<std::pair<int, int> > OvlpSelfJoinParallel::buck

◆ c

int OvlpSelfJoinParallel::c {0}

◆ candidate_num

int64_t OvlpSelfJoinParallel::candidate_num {0}

◆ combs

std::vector<combination_p1> OvlpSelfJoinParallel::combs[MAXTHREADNUM]

◆ datasets

std::vector<std::vector<ui> > OvlpSelfJoinParallel::datasets

◆ earlyTerminated

int OvlpSelfJoinParallel::earlyTerminated[MAXTHREADNUM] = { 0 }

◆ ele_lists

std::vector<std::vector<std::pair<int, int> > > OvlpSelfJoinParallel::ele_lists

◆ heap

std::vector<int> OvlpSelfJoinParallel::heap[MAXTHREADNUM]

◆ heap_cost

double OvlpSelfJoinParallel::heap_cost {0.0}

◆ heap_op

uint64_t OvlpSelfJoinParallel::heap_op {0}

◆ idmap_records

std::vector<std::pair<int, int> > OvlpSelfJoinParallel::idmap_records

◆ if_external_IO

bool OvlpSelfJoinParallel::if_external_IO = false

◆ isHeap

int OvlpSelfJoinParallel::isHeap[MAXTHREADNUM] = { 0 }

◆ isWeightedComp

bool OvlpSelfJoinParallel::isWeightedComp {false}

◆ large_cost

int64_t OvlpSelfJoinParallel::large_cost {0}

◆ large_est_cost

int64_t OvlpSelfJoinParallel::large_est_cost {0}

◆ list_cost

int64_t OvlpSelfJoinParallel::list_cost {0}

◆ maxHeapSize

ui OvlpSelfJoinParallel::maxHeapSize {0}

◆ n1

int OvlpSelfJoinParallel::n1 {0}

◆ random_ids

std::unordered_set<int> OvlpSelfJoinParallel::random_ids

◆ records

std::vector<std::vector<ui> > OvlpSelfJoinParallel::records

◆ result_num

int64_t OvlpSelfJoinParallel::result_num {0}

◆ result_pairs

std::vector<std::pair<int, int> > OvlpSelfJoinParallel::result_pairs[MAXTHREADNUM]

◆ result_pairs_

std::vector<WeightPair> OvlpSelfJoinParallel::result_pairs_[MAXTHREADNUM]

◆ resultPair_storePath

std::string OvlpSelfJoinParallel::resultPair_storePath

◆ total_eles

ui OvlpSelfJoinParallel::total_eles {0}

◆ weights

std::vector<double> OvlpSelfJoinParallel::weights

◆ wordwt

std::vector<double> OvlpSelfJoinParallel::wordwt

The documentation for this class was generated from the following files: