Entity Matching by Similarity Join
 
Loading...
Searching...
No Matches
test_sample Namespace Reference

Variables

 file = Path(__file__).resolve()
 
 parent
 
 root
 
str dir_path = "../datasets/tables/megallen/amazon-google-structured"
 
str path_tableA = "/".join([dir_path, "table_a.csv"])
 
str path_tableB = "/".join([dir_path, "table_b.csv"])
 
str path_gold = "/".join([dir_path, "gold.csv"])
 
 gold_graph = nx.Graph()
 
 tableA = read_csv_table(path_tableA)
 
 tableB = read_csv_table(path_tableB)
 
 gold = read_csv_golds(path_gold, gold_graph)
 
 sample_strategy
 
 blocking_attr
 
 cluster_tau
 
 sample_tau
 
 step2_tau
 
 num_data
 
 ob = OverlapBlocker()
 
 C
 
int cur_golds = 0
 
 row_index = list(C.index)
 
str id1 = str(C.loc[index, 'ltable_id']) + 'A'
 
str id2 = str(C.loc[index, 'rtable_id']) + 'B'
 
int recall = cur_golds / len(gold) * 1.0
 
int density = cur_golds / len(C) * 1.0
 

Variable Documentation

◆ blocking_attr

test_sample.blocking_attr

◆ C

test_sample.C
Initial value:
1= ob.block_tables(tableA, tableB, "title", "title",
2 word_level=True, overlap_size=4,
3 l_output_attrs=["id"],
4 r_output_attrs=["id"],
5 allow_missing=False,
6 show_progress=False)

◆ cluster_tau

test_sample.cluster_tau

◆ cur_golds

int test_sample.cur_golds = 0

◆ density

int test_sample.density = cur_golds / len(C) * 1.0

◆ dir_path

str test_sample.dir_path = "../datasets/tables/megallen/amazon-google-structured"

◆ file

test_sample.file = Path(__file__).resolve()

◆ gold

test_sample.gold = read_csv_golds(path_gold, gold_graph)

◆ gold_graph

test_sample.gold_graph = nx.Graph()

◆ id1

str test_sample.id1 = str(C.loc[index, 'ltable_id']) + 'A'

◆ id2

str test_sample.id2 = str(C.loc[index, 'rtable_id']) + 'B'

◆ num_data

test_sample.num_data

◆ ob

test_sample.ob = OverlapBlocker()

◆ parent

test_sample.parent

◆ path_gold

str test_sample.path_gold = "/".join([dir_path, "gold.csv"])

◆ path_tableA

str test_sample.path_tableA = "/".join([dir_path, "table_a.csv"])

◆ path_tableB

str test_sample.path_tableB = "/".join([dir_path, "table_b.csv"])

◆ recall

int test_sample.recall = cur_golds / len(gold) * 1.0

◆ root

test_sample.root

◆ row_index

test_sample.row_index = list(C.index)

◆ sample_strategy

test_sample.sample_strategy

◆ sample_tau

test_sample.sample_tau

◆ step2_tau

test_sample.step2_tau

◆ tableA

test_sample.tableA = read_csv_table(path_tableA)

◆ tableB

test_sample.tableB = read_csv_table(path_tableB)