For an introduction to XGBoost’s scikit-learn estimator interface, see Using the Scikit-Learn Estimator Interface.
Created on 1 Apr 2015
@author: Jamie Hall
import pickle import numpy as np from sklearn.datasets import fetch_california_housing, load_digits, load_iris from sklearn.metrics import confusion_matrix, mean_squared_error from sklearn.model_selection import GridSearchCV, KFold, train_test_split import xgboost as xgb rng = np.random.RandomState(31337) print("Zeros and Ones from the Digits dataset: binary classification") digits = load_digits(n_class=2) y = digits["target"] X = digits["data"] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X): xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index]) predictions = xgb_model.predict(X[test_index]) actuals = y[test_index] print(confusion_matrix(actuals, predictions)) print("Iris: multiclass classification") iris = load_iris() y = iris["target"] X = iris["data"] kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X): xgb_model = xgb.XGBClassifier(n_jobs=1).fit(X[train_index], y[train_index]) predictions = xgb_model.predict(X[test_index]) actuals = y[test_index] print(confusion_matrix(actuals, predictions)) print("California Housing: regression") X, y = fetch_california_housing(return_X_y=True) kf = KFold(n_splits=2, shuffle=True, random_state=rng) for train_index, test_index in kf.split(X): xgb_model = xgb.XGBRegressor(n_jobs=1).fit(X[train_index], y[train_index]) predictions = xgb_model.predict(X[test_index]) actuals = y[test_index] print(mean_squared_error(actuals, predictions)) print("Parameter optimization") xgb_model = xgb.XGBRegressor(n_jobs=1) clf = GridSearchCV( xgb_model, {"max_depth": [2, 4], "n_estimators": [50, 100]}, verbose=1, n_jobs=1, cv=3, ) clf.fit(X, y) print(clf.best_score_) print(clf.best_params_) # The sklearn API models are picklable print("Pickling sklearn API models") # must open in binary format to pickle pickle.dump(clf, open("best_calif.pkl", "wb")) clf2 = pickle.load(open("best_calif.pkl", "rb")) print(np.allclose(clf.predict(X), clf2.predict(X))) # Early-stopping X = digits["data"] y = digits["target"] X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = xgb.XGBClassifier(n_jobs=1, early_stopping_rounds=10, eval_metric="auc") clf.fit(X_train, y_train, eval_set=[(X_test, y_test)])
Gallery generated by Sphinx-Gallery
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4