Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
StringJoin Class Reference

#include <stringjoin.h>

Public Types

using InvLists = std::unordered_map<uint64_t, std::vector<int>>
 

Public Member Functions

 StringJoin ()=default
 
 StringJoin (const std::vector< std::string > &data, int threshold, ui _maxHeapSize=0)
 
 StringJoin (const std::vector< std::string > &work, const std::vector< std::string > &query, int threshold, ui _maxHeapSize=0)
 
 ~StringJoin ()
 
 StringJoin (const StringJoin &other)=delete
 
 StringJoin (StringJoin &&other)=delete
 
void init ()
 
void prepareSelf ()
 
void prepareRS ()
 
bool verifyLeftPartSelf (int xid, int yid, int xlen, int ylen, int Tau)
 
bool verifyRightPartSelf (int xid, int yid, int xlen, int ylen, int xpos, int ypos, int Tau)
 
bool verifyLeftPartRS (int xid, int yid, int xlen, int ylen, int Tau)
 
bool verifyRightPartRS (int xid, int yid, int xlen, int ylen, int xpos, int ypos, int Tau)
 
void selfJoin (std::vector< std::pair< int, int > > &finalPairs)
 
void RSJoin (std::vector< std::pair< int, int > > &finalPairs)
 
void checkSelfResults () const
 
void printDebugInfo (int currLen) const
 

Public Attributes

int workMinDictLen {19260817}
 
int workMaxDictLen {0}
 
int queryMinDictLen {19260817}
 
int queryMaxDictLen {0}
 
int maxDictLen {0}
 
int minDictLen {0}
 
int workN {0}
 
int queryN {0}
 
int N {0}
 
int D {0}
 
int PN {0}
 
int hashNumber {31}
 
int modNumber {1000000007}
 
std::vector< std::string > work_dataset
 
std::vector< std::string > query_dataset
 
std::vector< std::pair< int, int > > pairs
 
uint64_t candNum {0}
 
uint64_t veriNum {0}
 
uint64_t listNum {0}
 
uint64_t realNum {0}
 
std::vector< int > results
 
bool valid
 
int left
 
int right
 
int _left
 
int _right
 
int ** matrix {nullptr}
 
int ** _matrix {nullptr}
 
bool * quickRef {nullptr}
 
int ** partLen {nullptr}
 
int ** partPos {nullptr}
 
int * dist {nullptr}
 
uint64_t * power {nullptr}
 
InvLists ** invLists {nullptr}
 
std::vector< PIndex > ** partIndex {nullptr}
 
ui maxHeapSize {0}
 

Member Typedef Documentation

◆ InvLists

using StringJoin::InvLists = std::unordered_map<uint64_t, std::vector<int>>

Constructor & Destructor Documentation

◆ StringJoin() [1/5]

StringJoin::StringJoin ( )
default

◆ StringJoin() [2/5]

StringJoin::StringJoin ( const std::vector< std::string > & data,
int threshold,
ui _maxHeapSize = 0 )
inline

◆ StringJoin() [3/5]

StringJoin::StringJoin ( const std::vector< std::string > & work,
const std::vector< std::string > & query,
int threshold,
ui _maxHeapSize = 0 )
inline

◆ ~StringJoin()

StringJoin::~StringJoin ( )
inline

◆ StringJoin() [4/5]

StringJoin::StringJoin ( const StringJoin & other)
delete

◆ StringJoin() [5/5]

StringJoin::StringJoin ( StringJoin && other)
delete

Member Function Documentation

◆ checkSelfResults()

void StringJoin::checkSelfResults ( ) const

◆ init()

void StringJoin::init ( )

◆ prepareRS()

void StringJoin::prepareRS ( )

◆ prepareSelf()

void StringJoin::prepareSelf ( )

◆ printDebugInfo()

void StringJoin::printDebugInfo ( int currLen) const

◆ RSJoin()

void StringJoin::RSJoin ( std::vector< std::pair< int, int > > & finalPairs)

◆ selfJoin()

void StringJoin::selfJoin ( std::vector< std::pair< int, int > > & finalPairs)

◆ verifyLeftPartRS()

bool StringJoin::verifyLeftPartRS ( int xid,
int yid,
int xlen,
int ylen,
int Tau )

◆ verifyLeftPartSelf()

bool StringJoin::verifyLeftPartSelf ( int xid,
int yid,
int xlen,
int ylen,
int Tau )

◆ verifyRightPartRS()

bool StringJoin::verifyRightPartRS ( int xid,
int yid,
int xlen,
int ylen,
int xpos,
int ypos,
int Tau )

◆ verifyRightPartSelf()

bool StringJoin::verifyRightPartSelf ( int xid,
int yid,
int xlen,
int ylen,
int xpos,
int ypos,
int Tau )

Member Data Documentation

◆ _left

int StringJoin::_left

◆ _matrix

int** StringJoin::_matrix {nullptr}

◆ _right

int StringJoin::_right

◆ candNum

uint64_t StringJoin::candNum {0}

◆ D

int StringJoin::D {0}

◆ dist

int* StringJoin::dist {nullptr}

◆ hashNumber

int StringJoin::hashNumber {31}

◆ invLists

InvLists** StringJoin::invLists {nullptr}

◆ left

int StringJoin::left

◆ listNum

uint64_t StringJoin::listNum {0}

◆ matrix

int** StringJoin::matrix {nullptr}

◆ maxDictLen

int StringJoin::maxDictLen {0}

◆ maxHeapSize

ui StringJoin::maxHeapSize {0}

◆ minDictLen

int StringJoin::minDictLen {0}

◆ modNumber

int StringJoin::modNumber {1000000007}

◆ N

int StringJoin::N {0}

◆ pairs

std::vector<std::pair<int, int> > StringJoin::pairs

◆ partIndex

std::vector<PIndex>** StringJoin::partIndex {nullptr}

◆ partLen

int** StringJoin::partLen {nullptr}

◆ partPos

int** StringJoin::partPos {nullptr}

◆ PN

int StringJoin::PN {0}

◆ power

uint64_t* StringJoin::power {nullptr}

◆ query_dataset

std::vector<std::string> StringJoin::query_dataset

◆ queryMaxDictLen

int StringJoin::queryMaxDictLen {0}

◆ queryMinDictLen

int StringJoin::queryMinDictLen {19260817}

◆ queryN

int StringJoin::queryN {0}

◆ quickRef

bool* StringJoin::quickRef {nullptr}

◆ realNum

uint64_t StringJoin::realNum {0}

◆ results

std::vector<int> StringJoin::results

◆ right

int StringJoin::right

◆ valid

bool StringJoin::valid

◆ veriNum

uint64_t StringJoin::veriNum {0}

◆ work_dataset

std::vector<std::string> StringJoin::work_dataset

◆ workMaxDictLen

int StringJoin::workMaxDictLen {0}

◆ workMinDictLen

int StringJoin::workMinDictLen {19260817}

◆ workN

int StringJoin::workN {0}

The documentation for this class was generated from the following files: