Skip to content

Commit f3ec46b

Browse files
authored
Merge pull request #269 from microsoft/encode-nominals
Encoding categorical nominal to integers on discover DECI/Notears/PC backend
2 parents da0379b + 714cd71 commit f3ec46b

File tree

4 files changed

+15
-0
lines changed

4 files changed

+15
-0
lines changed

python/backend/backend/discover/algorithms/commons/base_runner.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,16 @@ def _remove_rows_with_missing_values(self):
8383
self._number_of_rows - self._prepared_data.shape[0]
8484
)
8585

86+
def _transform_categorical_nominal_to_continuous(self):
87+
# TODO: remove this once categorical values are properly handled by each algorithm
88+
for name in self._prepared_data.columns:
89+
if (
90+
self._nature_by_variable[name]
91+
== CausalVariableNature.CategoricalNominal
92+
):
93+
logging.info(f"encoding categorical nominal column {name} to integers")
94+
self._prepared_data[name] = pd.factorize(self._prepared_data[name])[0]
95+
8696
def _prepare_data(self):
8797
self._prepared_data = pd.DataFrame.from_dict(self._dataset_data)
8898
self._remove_rows_with_missing_values()

python/backend/backend/discover/algorithms/deci.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def __init__(self, p: DeciPayload, progress_callback: ProgressCallback = None):
8989
self._is_dag = None
9090

9191
def _build_causica_dataset(self) -> Dataset:
92+
self._transform_categorical_nominal_to_continuous()
9293
numpy_data = self._prepared_data.to_numpy()
9394
data_mask = np.ones(numpy_data.shape)
9495

python/backend/backend/discover/algorithms/notears.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def __init__(self, p: NotearsPayload, progress_callback: ProgressCallback = None
2020
super().__init__(p, progress_callback)
2121

2222
def do_causal_discovery(self) -> CausalGraph:
23+
self._transform_categorical_nominal_to_continuous()
24+
2325
notears_graph = from_pandas(
2426
self._prepared_data,
2527
tabu_child_nodes=self._constraints.causes,

python/backend/backend/discover/algorithms/pc.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def __init__(self, p: PCPayload, progress_callback: ProgressCallback = None):
2323
super().__init__(p, progress_callback)
2424

2525
def do_causal_discovery(self) -> CausalGraph:
26+
self._transform_categorical_nominal_to_continuous()
27+
2628
n = PC(alpha=0.2)
2729
n.learn(self._prepared_data.to_numpy())
2830
graph_gc = networkx.DiGraph(n.causal_matrix)

0 commit comments

Comments
 (0)