Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
base_sampler.h
Go to the documentation of this file.
1/*
2 * author: Yunqi Li
3 * contact: liyunqixa@gmail.com
4 */
5#ifndef _BASE_SAMPLER_H_
6#define _BASE_SAMPLER_H_
7
8#include "common/tokenizer.h"
9#include "common/dataframe.h"
11#include <vector>
12
13
15{
16public:
17 std::string blkAttr;
18 bool isRS{false};
20 std::string pathTableA;
21 std::string pathTableB;
24 std::vector<std::vector<ui>> recordsA;
25 std::vector<std::vector<ui>> recordsB;
26 std::vector<double> weightsA;
27 std::vector<double> weightsB;
28 std::vector<double> wordwt;
29 std::vector<ui> idMapA;
30 std::vector<ui> idMapB;
31 std::vector<std::pair<int, int>> pairs;
32
33public:
34 Sampler() = default;
35 Sampler(std::string _blkAttr, bool _isRS) : blkAttr(_blkAttr), isRS(_isRS) { }
36 ~Sampler() = default;
37 Sampler(const Sampler &other) = delete;
38 Sampler(Sampler &&other) = delete;
39
40public:
41 void readTable(const std::string &_pathTableA, const std::string &_pathTableB);
42 void prepareRecords(ui columnA, ui columnB, TokenizerType tt, ui q);
43 // virtual api
44 virtual void sample(const std::string &pathTableA, const std::string &pathTableB) = 0;
45};
46
47
48#endif // _BASE_SAMPLER_H_
std::vector< ui > q
Definition block.cc:9
Definition base_sampler.h:15
Sampler(std::string _blkAttr, bool _isRS)
Definition base_sampler.h:35
std::vector< double > weightsB
Definition base_sampler.h:27
Sampler(const Sampler &other)=delete
virtual void sample(const std::string &pathTableA, const std::string &pathTableB)=0
~Sampler()=default
std::vector< std::pair< int, int > > pairs
Definition base_sampler.h:31
ui numWord
Definition base_sampler.h:19
Table tableA
Definition base_sampler.h:22
std::vector< std::vector< ui > > recordsA
Definition base_sampler.h:24
std::vector< double > wordwt
Definition base_sampler.h:28
std::string blkAttr
Definition base_sampler.h:17
Sampler(Sampler &&other)=delete
void prepareRecords(ui columnA, ui columnB, TokenizerType tt, ui q)
Definition sampler_impl.cc:29
Table tableB
Definition base_sampler.h:23
std::string pathTableA
Definition base_sampler.h:20
void readTable(const std::string &_pathTableA, const std::string &_pathTableB)
Definition sampler_impl.cc:11
std::string pathTableB
Definition base_sampler.h:21
std::vector< ui > idMapA
Definition base_sampler.h:29
std::vector< double > weightsA
Definition base_sampler.h:26
std::vector< ui > idMapB
Definition base_sampler.h:30
bool isRS
Definition base_sampler.h:18
std::vector< std::vector< ui > > recordsB
Definition base_sampler.h:25
Sampler()=default
Definition dataframe.h:19
TokenizerType
Definition type.h:39
unsigned int ui
Definition type.h:8