resources/titanic_pipeline.yml

# Same example with https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html
source: titanic
dbname: ml_tips
train_sample_rate: 0.8
oversample_n_times: 3 # Multiply all samples with this scale. The value should be int
#oversample_pos_n_times: 2 # Multiply positive samples with this scale. The value should be int.

query_dir: queries

id_column: "rowid"
target_column: "survived"

#stratify: True

numerical_columns:
  - columns:
      - "age"
      - "fare"
    transformer:
      imputer:
        strategy: "median" # median, mean, constant
        phase: "train" # train, test, or null (whole dataset to get stats)
      normalizer:
        strategy: "minmax"  # standardize, log1p, minmax
        phase: "train"

categorical_columns:
  - columns:
      - "embarked"
      - "sex"
      - "pclass"
    transformer:
      imputer:
        strategy: "constant"
        phase: "train"
        fill_value: "missing"

vectorizer:
  train_table: "train"
  test_table: "test"
  # whole_table: "whole" # vectorize all data
  # Options for creating dense vector which is required by train_randomforest*
  dense:
    mode: "auto" # auto or force. Default: auto
    hashing: true # true or false. Default: true
    feature_cardinality: "auto" # "auto" or integer, which represents maximum cardinality of categorical columns

# Not implemented yet
tuner:
  strategy: "gridsearch" # gridsearch, randomsearch, python

trainer:
  - name: "train_classifier"
    model_table: "model_lr"
    # source_table: "train" # Set if you want to set specific table name
  - name: "train_randomforest_classifier"
    model_table: "model_rf"
    option: "-trees 15 -seed 31"

predictor:
  - name: "predict_classifier"
    model_table: "model_lr"
    output_table: "prediction_lr"
    # target_table: "test" # Set if you want to set specific table name
  - name: "predict_randomforest_classifier"
    model_table: "model_rf"
    output_table: "prediction_rf"

evaluator:
  metrics:
    - auc
    - logloss