Public Member Functions | |
__init__ (self, inmemory_) | |
train_and_save (self, blk_attr, rawtable, rawtable2=None) | |
train_all_and_save (self, attrs, rawtable, rawtable2=None) | |
apply_sample (self, blk_attr, tau) | |
group_interchangeable (self, blk_attr, tau) | |
load_blk_res (self, usage) | |
load_match_res (self) | |
load_model (self) | |
Public Attributes | |
str | model = "" |
str | blk_res = "" |
str | match_res = "" |
list | setences = [] |
int | inmemory = inmemory_ |
Protected Member Functions | |
_preprocess (self, blk_attr, rawtable, rawtable2=None) | |
Word2Vec for attribute: str_eq_1w or numeric But do numeric really needs normalization?
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.__init__ | ( | self, | |
inmemory_ ) |
|
protected |
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.apply_sample | ( | self, | |
blk_attr, | |||
tau ) |
Apply Word2Vec for sampling and labeling cand
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.group_interchangeable | ( | self, | |
blk_attr, | |||
tau ) |
Apply Word2Vec for grouping interchangeable value in blocking result
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.load_blk_res | ( | self, | |
usage ) |
Avoid multi-io usage: 0 for labeler & training and 1 for value matcher only use this method when training using a sample
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.load_match_res | ( | self | ) |
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.load_model | ( | self | ) |
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.train_all_and_save | ( | self, | |
attrs, | |||
rawtable, | |||
rawtable2 = None ) |
Train model for all attributes except id attrs: attributes that could use word2vec
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.train_and_save | ( | self, | |
blk_attr, | |||
rawtable, | |||
rawtable2 = None ) |
str simjoin_entitymatching.value_matcher.word2vec.Word2Vec.blk_res = "" |
int simjoin_entitymatching.value_matcher.word2vec.Word2Vec.inmemory = inmemory_ |
str simjoin_entitymatching.value_matcher.word2vec.Word2Vec.match_res = "" |
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.model = "" |
simjoin_entitymatching.value_matcher.word2vec.Word2Vec.setences = [] |