Variables | |
file = Path(__file__).resolve() | |
parent | |
root | |
str | dir_path = "../datasets/tables/megallen/amazon-google-structured" |
str | path_tableA = "/".join([dir_path, "table_a.csv"]) |
str | path_tableB = "/".join([dir_path, "table_b.csv"]) |
str | path_gold = "/".join([dir_path, "gold.csv"]) |
str | path_rule = "simjoin_entitymatching/blocker/rules/rules_amazon_google_structured_1.txt" |
str | path_range = "simjoin_entitymatching/matcher/model/ranges/ranges_amazon_google_structured_1.txt" |
str | path_tree = "simjoin_entitymatching/matcher/model/trees/trees_amazon_google_structured_1.txt" |
str | path_rf = "simjoin_entitymatching/matcher/model/rf_amazon_google_structured_1.joblib" |
gold_graph = nx.Graph() | |
tableA = read_csv_table(path_tableA) | |
tableB = read_csv_table(path_tableB) | |
gold = read_csv_golds(path_gold, gold_graph) | |
dict | map_A = {tableA.loc[ridx, "id"] : ridx for ridx in list(tableA.index)} |
dict | map_B = {tableB.loc[ridx, "id"] : ridx for ridx in list(tableB.index)} |
attr_types_ltable = au.get_attr_types(tableA) | |
attr_types_rtable = au.get_attr_types(tableB) | |
rf = randf.RandomForest() | |
graph | |
at_ltable | |
at_rtable | |
wrtie_fea_names | |
blk_res_cand | |
H | |
false_neg = pd.read_csv("test/debug/false_neg.csv") | |
lid = int(row["ltable_id"]) | |
rid = int(row["rtable_id"]) | |
exclude_attrs | |
vis_rf_path.at_ltable |
vis_rf_path.at_rtable |
vis_rf_path.attr_types_ltable = au.get_attr_types(tableA) |
vis_rf_path.attr_types_rtable = au.get_attr_types(tableB) |
vis_rf_path.blk_res_cand |
str vis_rf_path.dir_path = "../datasets/tables/megallen/amazon-google-structured" |
vis_rf_path.exclude_attrs |
vis_rf_path.false_neg = pd.read_csv("test/debug/false_neg.csv") |
vis_rf_path.file = Path(__file__).resolve() |
vis_rf_path.gold = read_csv_golds(path_gold, gold_graph) |
vis_rf_path.gold_graph = nx.Graph() |
vis_rf_path.graph |
vis_rf_path.H |
vis_rf_path.lid = int(row["ltable_id"]) |
dict vis_rf_path.map_A = {tableA.loc[ridx, "id"] : ridx for ridx in list(tableA.index)} |
dict vis_rf_path.map_B = {tableB.loc[ridx, "id"] : ridx for ridx in list(tableB.index)} |
vis_rf_path.parent |
str vis_rf_path.path_gold = "/".join([dir_path, "gold.csv"]) |
str vis_rf_path.path_range = "simjoin_entitymatching/matcher/model/ranges/ranges_amazon_google_structured_1.txt" |
str vis_rf_path.path_rf = "simjoin_entitymatching/matcher/model/rf_amazon_google_structured_1.joblib" |
str vis_rf_path.path_rule = "simjoin_entitymatching/blocker/rules/rules_amazon_google_structured_1.txt" |
str vis_rf_path.path_tableA = "/".join([dir_path, "table_a.csv"]) |
str vis_rf_path.path_tableB = "/".join([dir_path, "table_b.csv"]) |
str vis_rf_path.path_tree = "simjoin_entitymatching/matcher/model/trees/trees_amazon_google_structured_1.txt" |
vis_rf_path.rf = randf.RandomForest() |
vis_rf_path.rid = int(row["rtable_id"]) |
vis_rf_path.root |
vis_rf_path.tableA = read_csv_table(path_tableA) |
vis_rf_path.tableB = read_csv_table(path_tableB) |
vis_rf_path.wrtie_fea_names |