Variables | |
file = Path(__file__).resolve() | |
parent | |
root | |
str | dir_path = "../datasets/tables/megallen/amazon-google-structured" |
str | path_tableA = "/".join([dir_path, "table_a.csv"]) |
str | path_tableB = "/".join([dir_path, "table_b.csv"]) |
str | path_gold = "/".join([dir_path, "gold.csv"]) |
gold_graph = nx.Graph() | |
tableA = read_csv_table(path_tableA) | |
tableB = read_csv_table(path_tableB) | |
gold = read_csv_golds(path_gold, gold_graph) | |
sample_strategy | |
blocking_attr | |
cluster_tau | |
sample_tau | |
step2_tau | |
num_data | |
ob = OverlapBlocker() | |
C | |
int | cur_golds = 0 |
row_index = list(C.index) | |
str | id1 = str(C.loc[index, 'ltable_id']) + 'A' |
str | id2 = str(C.loc[index, 'rtable_id']) + 'B' |
int | recall = cur_golds / len(gold) * 1.0 |
int | density = cur_golds / len(C) * 1.0 |
test_sample.blocking_attr |
test_sample.C |
test_sample.cluster_tau |
int test_sample.cur_golds = 0 |
str test_sample.dir_path = "../datasets/tables/megallen/amazon-google-structured" |
test_sample.file = Path(__file__).resolve() |
test_sample.gold = read_csv_golds(path_gold, gold_graph) |
test_sample.gold_graph = nx.Graph() |
str test_sample.id1 = str(C.loc[index, 'ltable_id']) + 'A' |
str test_sample.id2 = str(C.loc[index, 'rtable_id']) + 'B' |
test_sample.num_data |
test_sample.ob = OverlapBlocker() |
test_sample.parent |
str test_sample.path_gold = "/".join([dir_path, "gold.csv"]) |
str test_sample.path_tableA = "/".join([dir_path, "table_a.csv"]) |
str test_sample.path_tableB = "/".join([dir_path, "table_b.csv"]) |
test_sample.root |
test_sample.row_index = list(C.index) |
test_sample.sample_strategy |
test_sample.sample_tau |
test_sample.step2_tau |
test_sample.tableA = read_csv_table(path_tableA) |
test_sample.tableB = read_csv_table(path_tableB) |