Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
OvlpRSJoinParallel Class Reference

#include <ovlpjoin_parallel.h>

Public Member Functions

void overlapjoin (int overlap_threshold, std::vector< std::pair< int, int > > &finalPairs)
 
void small_case (int L1, int R1, int L2, int R2, std::vector< std::pair< int, int > > &finalPairs)
 
 OvlpRSJoinParallel (const std::vector< std::vector< ui > > &sorted_records_1, const std::vector< std::vector< ui > > &sorted_records_2, const std::vector< double > &rec1wt, const std::vector< double > &rec2wt, const std::vector< double > &_wordwt, ui _maxHeapSize=0, bool _isWeightedComp=false)
 
double weightedOverlapCoeff (int id1, int id2)
 
double overlapCoeff (int id1, int id2)
 
void set_external_store (const std::string &_resPair_path)
 
bool comp_comb1 (const int a, const int b, int tid)
 
bool comp_comb2 (const int a, const int b, int tid)
 
bool build_heap (const std::vector< std::pair< int, int > > &vec, const std::vector< std::vector< ui > > &dataset, int L, std::vector< int > &heap, std::vector< combination_p1 > &combs, int &heap_size, int tid)
 
bool build_heap (const std::vector< std::pair< int, int > > &vec, const std::vector< std::vector< ui > > &dataset, int L, std::vector< int > &heap, std::vector< combination_p2 > &combs, int &heap_size, int tid)
 

Public Attributes

int n1 {0}
 
int n2 {0}
 
int c {0}
 
ui total_eles {0}
 
std::vector< std::vector< ui > > records1
 
std::vector< std::vector< ui > > records2
 
std::vector< std::vector< ui > > datasets1
 
std::vector< std::vector< ui > > datasets2
 
std::vector< double > recWeights1
 
std::vector< double > recWeights2
 
std::vector< double > wordwt
 
std::vector< std::pair< int, int > > idmap_records1
 
std::vector< std::pair< int, int > > idmap_records2
 
std::vector< std::vector< std::pair< int, int > > > ele_lists1
 
std::vector< std::vector< std::pair< int, int > > > ele_lists2
 
std::vector< std::pair< int, int > > result_pairs [MAXTHREADNUM]
 
std::vector< int > heap1 [MAXTHREADNUM]
 
std::vector< int > heap2 [MAXTHREADNUM]
 
std::vector< combination_p1combs1 [MAXTHREADNUM]
 
std::vector< combination_p2combs2 [MAXTHREADNUM]
 
ui maxHeapSize {0}
 
bool isWeightedComp {false}
 
std::vector< WeightPairresult_pairs_ [MAXTHREADNUM]
 
int isHeap [MAXTHREADNUM] = { 0 }
 
int64_t candidate_num
 
int64_t result_num
 
bool if_external_IO = false
 
std::string resultPair_storePath
 

Constructor & Destructor Documentation

◆ OvlpRSJoinParallel()

OvlpRSJoinParallel::OvlpRSJoinParallel ( const std::vector< std::vector< ui > > & sorted_records_1,
const std::vector< std::vector< ui > > & sorted_records_2,
const std::vector< double > & rec1wt,
const std::vector< double > & rec2wt,
const std::vector< double > & _wordwt,
ui _maxHeapSize = 0,
bool _isWeightedComp = false )
inline

Member Function Documentation

◆ build_heap() [1/2]

bool OvlpRSJoinParallel::build_heap ( const std::vector< std::pair< int, int > > & vec,
const std::vector< std::vector< ui > > & dataset,
int L,
std::vector< int > & heap,
std::vector< combination_p1 > & combs,
int & heap_size,
int tid )

◆ build_heap() [2/2]

bool OvlpRSJoinParallel::build_heap ( const std::vector< std::pair< int, int > > & vec,
const std::vector< std::vector< ui > > & dataset,
int L,
std::vector< int > & heap,
std::vector< combination_p2 > & combs,
int & heap_size,
int tid )

◆ comp_comb1()

bool OvlpRSJoinParallel::comp_comb1 ( const int a,
const int b,
int tid )
inline

◆ comp_comb2()

bool OvlpRSJoinParallel::comp_comb2 ( const int a,
const int b,
int tid )
inline

◆ overlapCoeff()

double OvlpRSJoinParallel::overlapCoeff ( int id1,
int id2 )
inline

◆ overlapjoin()

void OvlpRSJoinParallel::overlapjoin ( int overlap_threshold,
std::vector< std::pair< int, int > > & finalPairs )

◆ set_external_store()

void OvlpRSJoinParallel::set_external_store ( const std::string & _resPair_path)
inline

◆ small_case()

void OvlpRSJoinParallel::small_case ( int L1,
int R1,
int L2,
int R2,
std::vector< std::pair< int, int > > & finalPairs )

◆ weightedOverlapCoeff()

double OvlpRSJoinParallel::weightedOverlapCoeff ( int id1,
int id2 )
inline

Member Data Documentation

◆ c

int OvlpRSJoinParallel::c {0}

◆ candidate_num

int64_t OvlpRSJoinParallel::candidate_num

◆ combs1

std::vector<combination_p1> OvlpRSJoinParallel::combs1[MAXTHREADNUM]

◆ combs2

std::vector<combination_p2> OvlpRSJoinParallel::combs2[MAXTHREADNUM]

◆ datasets1

std::vector<std::vector<ui> > OvlpRSJoinParallel::datasets1

◆ datasets2

std::vector<std::vector<ui> > OvlpRSJoinParallel::datasets2

◆ ele_lists1

std::vector<std::vector<std::pair<int, int> > > OvlpRSJoinParallel::ele_lists1

◆ ele_lists2

std::vector<std::vector<std::pair<int, int> > > OvlpRSJoinParallel::ele_lists2

◆ heap1

std::vector<int> OvlpRSJoinParallel::heap1[MAXTHREADNUM]

◆ heap2

std::vector<int> OvlpRSJoinParallel::heap2[MAXTHREADNUM]

◆ idmap_records1

std::vector<std::pair<int, int> > OvlpRSJoinParallel::idmap_records1

◆ idmap_records2

std::vector<std::pair<int, int> > OvlpRSJoinParallel::idmap_records2

◆ if_external_IO

bool OvlpRSJoinParallel::if_external_IO = false

◆ isHeap

int OvlpRSJoinParallel::isHeap[MAXTHREADNUM] = { 0 }

◆ isWeightedComp

bool OvlpRSJoinParallel::isWeightedComp {false}

◆ maxHeapSize

ui OvlpRSJoinParallel::maxHeapSize {0}

◆ n1

int OvlpRSJoinParallel::n1 {0}

◆ n2

int OvlpRSJoinParallel::n2 {0}

◆ records1

std::vector<std::vector<ui> > OvlpRSJoinParallel::records1

◆ records2

std::vector<std::vector<ui> > OvlpRSJoinParallel::records2

◆ recWeights1

std::vector<double> OvlpRSJoinParallel::recWeights1

◆ recWeights2

std::vector<double> OvlpRSJoinParallel::recWeights2

◆ result_num

int64_t OvlpRSJoinParallel::result_num

◆ result_pairs

std::vector<std::pair<int, int> > OvlpRSJoinParallel::result_pairs[MAXTHREADNUM]

◆ result_pairs_

std::vector<WeightPair> OvlpRSJoinParallel::result_pairs_[MAXTHREADNUM]

◆ resultPair_storePath

std::string OvlpRSJoinParallel::resultPair_storePath

◆ total_eles

ui OvlpRSJoinParallel::total_eles {0}

◆ wordwt

std::vector<double> OvlpRSJoinParallel::wordwt

The documentation for this class was generated from the following files: