{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "4e37649f", "metadata": {}, "outputs": [], "source": [ "import h2o\n", "from h2o.automl import H2OAutoML\n", "\n" ] }, { "cell_type":...

Use the H20 AutoML: Automatic Machine Learning toolkit to classify Glioblastomas using the database Data_Glioblastoma5Patients_SC.csv and evaluate the performance, discuss the results.



code and csv is provided. so i did most of the work. just revise the code and make it work with the Data_Glioblastoma5Patients_SC.csv inJupiter notebook.


{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "4e37649f", "metadata": {}, "outputs": [], "source": [ "import h2o\n", "from h2o.automl import H2OAutoML\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "ac821c50", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Checking whether there is an H2O instance running at http://localhost:54321 . connected.\n" ] }, { "data": { "text/html": [ "

\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "


































































H2O_cluster_uptime:1 hour 11 mins
H2O_cluster_timezone:America/Chicago
H2O_data_parsing_timezone:UTC
H2O_cluster_version:3.32.1.3
H2O_cluster_version_age:3 days
H2O_cluster_name:H2O_from_python_jennifernwogu_ilnoqf
H2O_cluster_total_nodes:1
H2O_cluster_free_memory:458.5 Mb
H2O_cluster_total_cores:4
H2O_cluster_allowed_cores:4
H2O_cluster_status:locked, healthy
H2O_connection_url:http://localhost:54321
H2O_connection_proxy:{\"http\": null, \"https\": null}
H2O_internal_security:False
H2O_API_Extensions:Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4
Python_version:3.8.8 final

" ], "text/plain": [ "-------------------------- ------------------------------------------------------------------\n", "H2O_cluster_uptime: 1 hour 11 mins\n", "H2O_cluster_timezone: America/Chicago\n", "H2O_data_parsing_timezone: UTC\n", "H2O_cluster_version: 3.32.1.3\n", "H2O_cluster_version_age: 3 days\n", "H2O_cluster_name: H2O_from_python_jennifernwogu_ilnoqf\n", "H2O_cluster_total_nodes: 1\n", "H2O_cluster_free_memory: 458.5 Mb\n", "H2O_cluster_total_cores: 4\n", "H2O_cluster_allowed_cores: 4\n", "H2O_cluster_status: locked, healthy\n", "H2O_connection_url: http://localhost:54321\n", "H2O_connection_proxy: {\"http\": null, \"https\": null}\n", "H2O_internal_security: False\n", "H2O_API_Extensions: Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4\n", "Python_version: 3.8.8 final\n", "-------------------------- ------------------------------------------------------------------" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Parse progress: |█████████████████████████████████████████████████████████| 100%\n", "Parse progress: |█████████████████████████████████████████████████████████| 100%\n" ] } ], "source": [ "h2o.init()\n", "\n", "# Import a sample binary outcome train/test set into H2O\n", "train = h2o.import_file(\"https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv\")\n", "test = h2o.import_file(\"https://s3.amazonaws.com/erin-data/higgs/higgs_test_5k.csv\")\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "9de5a992", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Identify predictors and response\n", "x = train.columns\n", "y = \"response\"\n", "x.remove(y)\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "47ee65d6", "metadata": {}, "outputs": [], "source": [ "\n", "# For binary classification, response should be a factor\n", "train[y] = train[y].asfactor()\n", "test[y] = test[y].asfactor()" ] }, { "cell_type": "code", "execution_count": 5, "id": "101f1881", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AutoML progress: |████████████████████████████████████████████████████████| 100%\n" ] } ], "source": [ "# Run AutoML for 20 base models (limited to 1 hour max runtime by default)\n", "aml = H2OAutoML(max_models=20, seed=1)\n", "aml.train(x=x, y=y, training_frame=train)" ] }, { "cell_type": "code", "execution_count": 6, "id": "c06878d6", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "




























































































model_id

auc

logloss

aucpr

mean_per_class_error

rmse

mse
StackedEnsemble_AllModels_AutoML_20210522_195449
0.788938
0.550169
0.807374
0.324269
0.4320870.186699
StackedEnsemble_BestOfFamily_AutoML_20210522_1954490.787794
0.551334
0.806123
0.325839
0.4326410.187178
GBM_5_AutoML_20210522_195449
0.78219

0.558353
0.801738
0.319658
0.4355120.18967
GBM_2_AutoML_20210522_195449
0.777673
0.562514
0.796364
0.334056
0.4375830.191479
GBM_1_AutoML_20210522_195449
0.777294
0.562744
0.799184
0.356261
0.4377270.191605
GBM_3_AutoML_20210522_195449
0.775488
0.564794
0.794892
0.327971
0.4387220.192477
GBM_grid__1_AutoML_20210522_195449_model_1
0.772926
0.568181
0.791195
0.322808
0.4399970.193598
GBM_4_AutoML_20210522_195449
0.77248

0.569483
0.792582
0.336913
0.4408730.194369
GBM_grid__1_AutoML_20210522_195449_model_2
0.77049

0.569351
0.788633
0.369523
May 23, 2021
SOLUTION.PDF

Get Answer To This Question

Submit New Assignment

Copy and Paste Your Assignment Here