Variables
	file = Path(__file__).resolve()

	parent

	root

str	dir_path = "../datasets/tables/megallen/amazon-google-structured"

str	path_tableA = "/".join([dir_path, "table_a.csv"])

str	path_tableB = "/".join([dir_path, "table_b.csv"])

str	path_gold = "/".join([dir_path, "gold.csv"])

	gold_graph = nx.Graph()

	tableA = read_csv_table(path_tableA)

	tableB = read_csv_table(path_tableB)

	gold = read_csv_golds(path_gold, gold_graph)

	sample_strategy

	blocking_attr

	cluster_tau

	sample_tau

	step2_tau

	num_data

	ob = OverlapBlocker()

	C

int	cur_golds = 0

	row_index = list(C.index)

str	id1 = str(C.loc[index, 'ltable_id']) + 'A'

str	id2 = str(C.loc[index, 'rtable_id']) + 'B'

int	recall = cur_golds / len(gold) * 1.0

int	density = cur_golds / len(C) * 1.0

Variable Documentation

◆ blocking_attr

test_sample.blocking_attr

◆ C

test_sample.C

Initial value:

=  ob.block_tables(tableA, tableB, "title", "title", 
                    word_level=True, overlap_size=4, 
                    l_output_attrs=["id"], 
                    r_output_attrs=["id"], 
                    allow_missing=False,
                    show_progress=False)

◆ cluster_tau

test_sample.cluster_tau

◆ cur_golds

int test_sample.cur_golds = 0

◆ density

int test_sample.density = cur_golds / len(C) * 1.0

◆ dir_path

str test_sample.dir_path = "../datasets/tables/megallen/amazon-google-structured"

◆ file

test_sample.file = Path(__file__).resolve()

◆ gold

test_sample.gold = read_csv_golds(path_gold, gold_graph)

◆ gold_graph

test_sample.gold_graph = nx.Graph()

◆ id1

str test_sample.id1 = str(C.loc[index, 'ltable_id']) + 'A'

◆ id2

str test_sample.id2 = str(C.loc[index, 'rtable_id']) + 'B'

◆ num_data

test_sample.num_data

◆ ob

test_sample.ob = OverlapBlocker()

◆ parent

test_sample.parent

◆ path_gold

str test_sample.path_gold = "/".join([dir_path, "gold.csv"])

◆ path_tableA

str test_sample.path_tableA = "/".join([dir_path, "table_a.csv"])

◆ path_tableB

str test_sample.path_tableB = "/".join([dir_path, "table_b.csv"])

◆ recall

int test_sample.recall = cur_golds / len(gold) * 1.0

◆ root

test_sample.root

◆ row_index

test_sample.row_index = list(C.index)

◆ sample_strategy

test_sample.sample_strategy

◆ sample_tau

test_sample.sample_tau

◆ step2_tau

test_sample.step2_tau

◆ tableA

test_sample.tableA = read_csv_table(path_tableA)

◆ tableB

test_sample.tableB = read_csv_table(path_tableB)

Variables