# !pip install sklearn
# !pip install h2o
# !pip install pandas
import h2o
from h2o.automl import H2OAutoML
import numpy as np
#scikit-learnからデータの読み込み
from sklearn import datasets
iris = datasets.load_iris()
# 学習用データと検証用データに分割
from sklearn.model_selection import train_test_split as split
x_train, x_test, y_train, y_test = split(iris.data,iris.target,train_size=0.8,test_size=0.2)
train = np.concatenate([x_train, y_train.reshape(120,1)],axis=1)
test = np.concatenate([x_test, y_test.reshape(30,1)],axis=1)
h2o.init()
h2o_train = h2o.H2OFrame(train)
h2o_test = h2o.H2OFrame(test)
aml = H2OAutoML(
max_runtime_secs = 300,
max_models = None,
stopping_metric ='AUTO',
sort_metric ='AUTO',
seed = 42,
)
# 多クラス分類の場合、型変換が必要、これをやらないと回帰モデルのAutoMLになる
h2o_train[h2o_train.columns[-1]] = h2o_train[h2o_train.columns[-1]].asfactor()
aml.train(
y = h2o_train.columns[-1],
training_frame=h2o_train
)
Checking whether there is an H2O instance running at http://localhost:54321 . connected.
H2O_cluster_uptime: |
18 hours 6 mins |
H2O_cluster_timezone: |
Asia/Tokyo |
H2O_data_parsing_timezone: |
UTC |
H2O_cluster_version: |
3.36.1.2 |
H2O_cluster_version_age: |
9 days |
H2O_cluster_name: |
H2O_from_python_mnbi_zkstet |
H2O_cluster_total_nodes: |
1 |
H2O_cluster_free_memory: |
6.044 Gb |
H2O_cluster_total_cores: |
8 |
H2O_cluster_allowed_cores: |
8 |
H2O_cluster_status: |
locked, healthy |
H2O_connection_url: |
http://localhost:54321 |
H2O_connection_proxy: |
{"http": null, "https": null} |
H2O_internal_security: |
False |
Python_version: |
3.10.4 final |
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |█
10:46:56.488: AutoML: XGBoost is not available; skipping it.
10:46:57.115: _min_rows param, The dataset size is too small to split for min_rows=100.0: must have at least 200.0 (weighted) rows, but have only 120.0.
██████████████████████████████████████████████████████████████| (done) 100%
Model Details
=============
H2ODeepLearningEstimator : Deep Learning
Model Key: DeepLearning_grid_3_AutoML_8_20220605_104656_model_3
Status of Neuron Layers: predicting C5, 3-class classification, multinomial distribution, CrossEntropy loss, 21,003 weights/biases, 253.9 KB, 963 training samples, mini-batch size 1
|
|
layer |
units |
type |
dropout |
l1 |
l2 |
mean_rate |
rate_rms |
momentum |
mean_weight |
weight_rms |
mean_bias |
bias_rms |
0 |
|
1 |
4 |
Input |
10.0 |
|
|
|
|
|
|
|
|
|
1 |
|
2 |
100 |
RectifierDropout |
0.0 |
0.0 |
0.0 |
0.00077 |
0.00069 |
0.0 |
0.008151 |
0.130095 |
0.499209 |
0.002897 |
2 |
|
3 |
100 |
RectifierDropout |
0.0 |
0.0 |
0.0 |
0.001252 |
0.001224 |
0.0 |
0.00101 |
0.100545 |
1.00014 |
0.003832 |
3 |
|
4 |
100 |
RectifierDropout |
0.0 |
0.0 |
0.0 |
0.099782 |
0.290947 |
0.0 |
0.000925 |
0.100185 |
0.999985 |
0.001261 |
4 |
|
5 |
3 |
Softmax |
|
0.0 |
0.0 |
0.10793 |
0.300379 |
0.0 |
-0.066706 |
0.539547 |
-0.000791 |
0.000989 |
ModelMetricsMultinomial: deeplearning
** Reported on train data. **
MSE: 0.05029182049761996
RMSE: 0.22425837887940767
LogLoss: 0.18070025670812911
Mean Per-Class Error: 0.05940170940170939
AUC: NaN
AUCPR: NaN
Multinomial auc values: Table is not computed because it is disabled (model parameter 'auc_type' is set to AUTO or NONE) or due to domain size (maximum is 50 domains).
Multinomial auc_pr values: Table is not computed because it is disabled (model parameter 'auc_type' is set to AUTO or NONE) or due to domain size (maximum is 50 domains).
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
|
0 |
1 |
2 |
Error |
Rate |
0 |
41.0 |
0.0 |
0.0 |
0.000000 |
0 / 41 |
1 |
0.0 |
38.0 |
2.0 |
0.050000 |
2 / 40 |
2 |
0.0 |
5.0 |
34.0 |
0.128205 |
5 / 39 |
3 |
41.0 |
43.0 |
36.0 |
0.058333 |
7 / 120 |
Top-3 Hit Ratios:
|
k |
hit_ratio |
0 |
1 |
0.941667 |
1 |
2 |
1.000000 |
2 |
3 |
1.000000 |
ModelMetricsMultinomial: deeplearning
** Reported on cross-validation data. **
MSE: 0.013888362795128895
RMSE: 0.11784889814982953
LogLoss: 0.05575414737917049
Mean Per-Class Error: 0.01688034188034188
AUC: NaN
AUCPR: NaN
Multinomial auc values: Table is not computed because it is disabled (model parameter 'auc_type' is set to AUTO or NONE) or due to domain size (maximum is 50 domains).
Multinomial auc_pr values: Table is not computed because it is disabled (model parameter 'auc_type' is set to AUTO or NONE) or due to domain size (maximum is 50 domains).
Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
|
0 |
1 |
2 |
Error |
Rate |
0 |
41.0 |
0.0 |
0.0 |
0.000000 |
0 / 41 |
1 |
0.0 |
39.0 |
1.0 |
0.025000 |
1 / 40 |
2 |
0.0 |
1.0 |
38.0 |
0.025641 |
1 / 39 |
3 |
41.0 |
40.0 |
39.0 |
0.016667 |
2 / 120 |
Top-3 Hit Ratios:
|
k |
hit_ratio |
0 |
1 |
0.983333 |
1 |
2 |
1.000000 |
2 |
3 |
1.000000 |
Cross-Validation Metrics Summary:
|
|
mean |
sd |
cv_1_valid |
cv_2_valid |
cv_3_valid |
cv_4_valid |
cv_5_valid |
0 |
accuracy |
0.891667 |
0.069722 |
0.875000 |
0.833333 |
0.916667 |
0.833333 |
1.000000 |
1 |
auc |
NaN |
0.000000 |
NaN |
NaN |
NaN |
NaN |
NaN |
2 |
err |
0.108333 |
0.069722 |
0.125000 |
0.166667 |
0.083333 |
0.166667 |
0.000000 |
3 |
err_count |
2.600000 |
1.673320 |
3.000000 |
4.000000 |
2.000000 |
4.000000 |
0.000000 |
4 |
logloss |
0.255812 |
0.109040 |
0.345721 |
0.388784 |
0.222287 |
0.198158 |
0.124109 |
5 |
max_per_class_error |
0.367619 |
0.288447 |
0.428571 |
0.666667 |
0.142857 |
0.600000 |
0.000000 |
6 |
mean_per_class_accuracy |
0.862460 |
0.100201 |
0.857143 |
0.777778 |
0.919048 |
0.758333 |
1.000000 |
7 |
mean_per_class_error |
0.137540 |
0.100201 |
0.142857 |
0.222222 |
0.080952 |
0.241667 |
0.000000 |
8 |
mse |
0.077454 |
0.038816 |
0.094789 |
0.131434 |
0.062697 |
0.071481 |
0.026868 |
9 |
pr_auc |
NaN |
0.000000 |
NaN |
NaN |
NaN |
NaN |
NaN |
10 |
r2 |
0.872881 |
0.085958 |
0.847915 |
0.737132 |
0.909490 |
0.907891 |
0.961976 |
11 |
rmse |
0.270417 |
0.073558 |
0.307878 |
0.362538 |
0.250394 |
0.267359 |
0.163914 |
Scoring History:
|
|
timestamp |
duration |
training_speed |
epochs |
iterations |
samples |
training_rmse |
training_logloss |
training_r2 |
training_classification_error |
training_auc |
training_pr_auc |
0 |
|
2022-06-05 10:51:34 |
0.000 sec |
None |
0.000000 |
0 |
0.0 |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
1 |
|
2022-06-05 10:51:34 |
33.034 sec |
7538 obs/sec |
0.816667 |
1 |
98.0 |
0.524912 |
0.772247 |
0.586529 |
0.316667 |
NaN |
NaN |
2 |
|
2022-06-05 10:51:34 |
33.137 sec |
8598 obs/sec |
8.025000 |
10 |
963.0 |
0.224258 |
0.180700 |
0.924531 |
0.058333 |
NaN |
NaN |
Variable Importances:
|
variable |
relative_importance |
scaled_importance |
percentage |
0 |
C3 |
1.000000 |
1.000000 |
0.270757 |
1 |
C1 |
0.983227 |
0.983227 |
0.266216 |
2 |
C2 |
0.872008 |
0.872008 |
0.236102 |
3 |
C4 |
0.838109 |
0.838109 |
0.226924 |