Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
OvlpRSJoin Class Reference

#include <ovlpjoin.h>

Public Member Functions

void overlapjoin (int overlap_threshold, std::vector< std::pair< int, int > > &finalPairs)
 
void small_case (int L1, int R1, int L2, int R2, std::vector< std::pair< int, int > > &finalPairs)
 
 OvlpRSJoin (const std::vector< std::vector< ui > > &sorted_records_1, const std::vector< std::vector< ui > > &sorted_records_2, const std::vector< double > &_recWeights1, const std::vector< double > &_recWeights2, const std::vector< double > &_wordwt, ui _maxHeapSize=0, bool _isWeightedComp=false)
 
void set_external_store (const std::string &_resPair_path)
 
bool comp_comb1 (const int a, const int b)
 
bool comp_comb2 (const int a, const int b)
 
double weightedOverlapCoeff (int id1, int id2)
 
double overlapCoeff (int id1, int id2)
 
bool build_heap (const std::vector< std::pair< int, int > > &vec, const std::vector< std::vector< ui > > &dataset, int L, std::vector< int > &heap, std::vector< combination1 > &combs, int &heap_size)
 
bool build_heap (const std::vector< std::pair< int, int > > &vec, const std::vector< std::vector< ui > > &dataset, int L, std::vector< int > &heap, std::vector< combination2 > &combs, int &heap_size)
 

Public Attributes

int n1 {0}
 
int n2 {0}
 
int c {0}
 
ui total_eles {0}
 
std::vector< std::vector< ui > > records1
 
std::vector< std::vector< ui > > records2
 
std::vector< std::vector< ui > > datasets1
 
std::vector< std::vector< ui > > datasets2
 
std::vector< double > recWeights1
 
std::vector< double > recWeights2
 
std::vector< double > wordwt
 
std::vector< std::pair< int, int > > idmap_records1
 
std::vector< std::pair< int, int > > idmap_records2
 
std::vector< std::vector< std::pair< int, int > > > ele_lists1
 
std::vector< std::vector< std::pair< int, int > > > ele_lists2
 
std::vector< std::pair< int, int > > result_pairs
 
std::vector< int > heap1
 
std::vector< int > heap2
 
std::vector< combination1combs1
 
std::vector< combination2combs2
 
ui maxHeapSize {0}
 
bool isWeightedComp {false}
 
std::vector< WeightPairresult_pairs_
 
int isHeap {0}
 
int64_t candidate_num
 
int64_t result_num
 
bool if_external_IO = false
 
std::string resultPair_storePath
 

Constructor & Destructor Documentation

◆ OvlpRSJoin()

OvlpRSJoin::OvlpRSJoin ( const std::vector< std::vector< ui > > & sorted_records_1,
const std::vector< std::vector< ui > > & sorted_records_2,
const std::vector< double > & _recWeights1,
const std::vector< double > & _recWeights2,
const std::vector< double > & _wordwt,
ui _maxHeapSize = 0,
bool _isWeightedComp = false )
inline

Member Function Documentation

◆ build_heap() [1/2]

bool OvlpRSJoin::build_heap ( const std::vector< std::pair< int, int > > & vec,
const std::vector< std::vector< ui > > & dataset,
int L,
std::vector< int > & heap,
std::vector< combination1 > & combs,
int & heap_size )

◆ build_heap() [2/2]

bool OvlpRSJoin::build_heap ( const std::vector< std::pair< int, int > > & vec,
const std::vector< std::vector< ui > > & dataset,
int L,
std::vector< int > & heap,
std::vector< combination2 > & combs,
int & heap_size )

◆ comp_comb1()

bool OvlpRSJoin::comp_comb1 ( const int a,
const int b )
inline

◆ comp_comb2()

bool OvlpRSJoin::comp_comb2 ( const int a,
const int b )
inline

◆ overlapCoeff()

double OvlpRSJoin::overlapCoeff ( int id1,
int id2 )
inline

◆ overlapjoin()

void OvlpRSJoin::overlapjoin ( int overlap_threshold,
std::vector< std::pair< int, int > > & finalPairs )

◆ set_external_store()

void OvlpRSJoin::set_external_store ( const std::string & _resPair_path)
inline

◆ small_case()

void OvlpRSJoin::small_case ( int L1,
int R1,
int L2,
int R2,
std::vector< std::pair< int, int > > & finalPairs )

◆ weightedOverlapCoeff()

double OvlpRSJoin::weightedOverlapCoeff ( int id1,
int id2 )
inline

Member Data Documentation

◆ c

int OvlpRSJoin::c {0}

◆ candidate_num

int64_t OvlpRSJoin::candidate_num

◆ combs1

std::vector<combination1> OvlpRSJoin::combs1

◆ combs2

std::vector<combination2> OvlpRSJoin::combs2

◆ datasets1

std::vector<std::vector<ui> > OvlpRSJoin::datasets1

◆ datasets2

std::vector<std::vector<ui> > OvlpRSJoin::datasets2

◆ ele_lists1

std::vector<std::vector<std::pair<int, int> > > OvlpRSJoin::ele_lists1

◆ ele_lists2

std::vector<std::vector<std::pair<int, int> > > OvlpRSJoin::ele_lists2

◆ heap1

std::vector<int> OvlpRSJoin::heap1

◆ heap2

std::vector<int> OvlpRSJoin::heap2

◆ idmap_records1

std::vector<std::pair<int, int> > OvlpRSJoin::idmap_records1

◆ idmap_records2

std::vector<std::pair<int, int> > OvlpRSJoin::idmap_records2

◆ if_external_IO

bool OvlpRSJoin::if_external_IO = false

◆ isHeap

int OvlpRSJoin::isHeap {0}

◆ isWeightedComp

bool OvlpRSJoin::isWeightedComp {false}

◆ maxHeapSize

ui OvlpRSJoin::maxHeapSize {0}

◆ n1

int OvlpRSJoin::n1 {0}

◆ n2

int OvlpRSJoin::n2 {0}

◆ records1

std::vector<std::vector<ui> > OvlpRSJoin::records1

◆ records2

std::vector<std::vector<ui> > OvlpRSJoin::records2

◆ recWeights1

std::vector<double> OvlpRSJoin::recWeights1

◆ recWeights2

std::vector<double> OvlpRSJoin::recWeights2

◆ result_num

int64_t OvlpRSJoin::result_num

◆ result_pairs

std::vector<std::pair<int, int> > OvlpRSJoin::result_pairs

◆ result_pairs_

std::vector<WeightPair> OvlpRSJoin::result_pairs_

◆ resultPair_storePath

std::string OvlpRSJoin::resultPair_storePath

◆ total_eles

ui OvlpRSJoin::total_eles {0}

◆ wordwt

std::vector<double> OvlpRSJoin::wordwt

The documentation for this class was generated from the following files: