Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
jaccard_sampler.h
Go to the documentation of this file.
1/*
2 * author: Yunqi Li
3 * contact: liyunqixa@gmail.com
4 */
5#ifndef _JACCARD_SAMPLER_H_
6#define _JACCARD_SAMPLER_H_
7
10
11
12/*
13 * To minimize the sampling time, we employ the parallel join
14 * on small datasets (i.e., size less than 10 k), the parallel algorithm may be slower (~0.02s)
15 * on large datasets (i.e., size larger than 1m), the parallel algorithm is significantly faster
16 */
17class JacSampler : public Sampler
18{
19public:
20 using Sampler::Sampler;
21 double det{0.0};
22
23public:
24 JacSampler() = default;
25 JacSampler(double _det, std::string _blkAttr, bool _isRS) : Sampler(_blkAttr, _isRS), det(_det) {
26 std::cout << "spawn jaccard sampler: " << blkAttr << " " << det << " is RS Join: " << isRS << std::endl;
27 }
28 ~JacSampler() = default;
29 JacSampler(const JacSampler &other) = delete;
30 JacSampler(JacSampler &&other) = delete;
31
32public:
33 void sample(const std::string &pathTableA, const std::string &pathTableB);
34};
35
36
37#endif // _JACCARD_SAMPLER_H_
Definition jaccard_sampler.h:18
~JacSampler()=default
double det
Definition jaccard_sampler.h:21
JacSampler()=default
JacSampler(JacSampler &&other)=delete
void sample(const std::string &pathTableA, const std::string &pathTableB)
Definition sampler_impl.cc:222
JacSampler(double _det, std::string _blkAttr, bool _isRS)
Definition jaccard_sampler.h:25
JacSampler(const JacSampler &other)=delete
Definition base_sampler.h:15
std::string blkAttr
Definition base_sampler.h:17
std::string pathTableA
Definition base_sampler.h:20
std::string pathTableB
Definition base_sampler.h:21
bool isRS
Definition base_sampler.h:18
Sampler()=default