Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
simjoin_blocker.h
Go to the documentation of this file.
1/*
2 * author: Yunqi Li
3 * contact: liyunqixa@gmail.com
4 */
5#ifndef _SIMJOIN_BLOCKER_H_
6#define _SIMJOIN_BLOCKER_H_
7
8#include "common/config.h"
9#include "common/dataframe.h"
10#include "common/io.h"
11#include "common/setjoin.h"
13#include "common/ovlpjoin.h"
15#include "common/stringjoin.h"
18#include "topk/topk.h"
19#include "group/group.h"
21#include <bitset>
22#include <fstream>
23#include <parallel/algorithm>
24#define NDEBUG
25#include <assert.h>
26
27
29{
30public:
31 SimJoinBlocker() = default;
32 ~SimJoinBlocker() = default;
33 SimJoinBlocker(const SimJoinBlocker &other) = delete;
34 SimJoinBlocker(SimJoinBlocker &&other) = delete;
35
36public:
37 static void selfSimilarityJoinParallel(uint64_t K, const std::string &topKattr,
38 const std::string &attrType, bool ifWeighted);
39
40 static void RSSimilarityJoinSerial(uint64_t K, const std::string &topKattr,
41 const std::string &attrType, bool ifWeighted,
42 bool isJoinTopK);
43 // TODO:
44 static void selfSimilarityJoinSerial(uint64_t K, const std::string &topKattr,
45 const std::string &attrType, bool ifWeighted);
46
47 static void RSSimilarityJoinParallel(uint64_t K, const std::string &topKattr,
48 const std::string &attrType, bool ifWeighted);
49
50public:
51 static void estimateDensity(bool isWeighted, std::vector<double> &densities,
52 std::unordered_map<std::string, double> &attrAverage,
53 const std::string &defaultSampleResDir = "");
54 // take account interchangeable values
55 static void selfInterchangeableJoin(uint64_t K, const std::string &topKattr,
56 const std::string &attrType, bool ifWeighted);
57};
58
59
60#endif // _SIMJOIN_BLOCKER_H_
Definition simjoin_blocker.h:29
SimJoinBlocker(const SimJoinBlocker &other)=delete
SimJoinBlocker(SimJoinBlocker &&other)=delete
static void RSSimilarityJoinParallel(uint64_t K, const std::string &topKattr, const std::string &attrType, bool ifWeighted)
static void selfSimilarityJoinParallel(uint64_t K, const std::string &topKattr, const std::string &attrType, bool ifWeighted)
Definition simjoin_blocker.cc:8
SimJoinBlocker()=default
~SimJoinBlocker()=default
static void estimateDensity(bool isWeighted, std::vector< double > &densities, std::unordered_map< std::string, double > &attrAverage, const std::string &defaultSampleResDir="")
Definition simjoin_blocker.cc:383
static void selfInterchangeableJoin(uint64_t K, const std::string &topKattr, const std::string &attrType, bool ifWeighted)
Definition simjoin_blocker.cc:653
static void selfSimilarityJoinSerial(uint64_t K, const std::string &topKattr, const std::string &attrType, bool ifWeighted)
static void RSSimilarityJoinSerial(uint64_t K, const std::string &topKattr, const std::string &attrType, bool ifWeighted, bool isJoinTopK)
Definition simjoin_blocker.cc:180