# -*- coding: utf-8 -*-
"""
Created on Thu Nov 16 09:05:49 2017

@author: chenk
"""
def lightGBM(df):
#	import json
	import lightgbm as lgb
#	import pandas as pd
	import numpy as np
	from sklearn.metrics import roc_auc_score
	from sklearn.cross_validation import KFold
	from sklearn.metrics import average_precision_score
   #	import matplotlib as plt
#	import xlwt
#	workbook = xlwt.Workbook(encoding = 'utf-8')
#	worksheet1 = workbook.add_sheet('roc')
#	worksheet2 = workbook.add_sheet('auc')
#	worksheet3 = workbook.add_sheet('fi')
	print("load data")
    
	n_folds = 5
	kf = KFold(len(df), n_folds,shuffle=True)
	auc = np.zeros([n_folds,1])
	AP = np.zeros([n_folds,1])
	#f_imptc = np.zeros([n_folds,df.shape[1]-1])
	itr = 0
	for train_index, val_index in kf:
		
		y_train = df[train_index,0]
		y_test = df[val_index,0]
		X_train = df[train_index,1:]
		X_test = df[val_index,1:]
		# create dataset for lightgbm
		lgb_train = lgb.Dataset(X_train, y_train)
		#lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
		# specify your configurations as a dict
		params = {
			'task': 'train',
			'boosting_type': 'gbdt',
			'objective': 'binary',
			#'metric': {'binary_logloss', 'auc'},
			'metric': {'binary_logloss'},
			'metric_freq': 1,
			'is_training_metric': 'true',
			'max_bin': 255,
			'num_trees': 100,
			'learning_rate': 0.1,
			'num_leaves': 20,
			'tree_learner': 'serial',
			'feature_fraction': 0.9,
			'bagging_freq': 5,
			'bagging_fraction': 0.8,
			#'min_data_in_leaf': 50,
			#'min_sum_hessian_in_leaf': 5.0,
			#'is_enable_sparse': 'true',
			#'use_two_round_loading': 'false',
			'verbose': 0
		}
		print('Start training...')
		# train
		gbm = lgb.train(params,
						lgb_train,
						num_boost_round=50,
						#valid_sets=lgb_eval,
						#early_stopping_rounds=5
						)
		
		
		print('Save model...')
		# save model to file
		#gbm.save_model('model.txt')
		print('Start predicting...')
		# predict
		y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
		print(y_pred)
#		for ii in range(0,len(y_pred)):
#			worksheet1.write(ii,1+2*itr,y_pred[ii])
#		for jj in range(0,len(y_test)):
#			worksheet1.write(jj,0+2*itr,y_test[jj])    
		# eval
		#print(y_pred)
		auc[itr] = roc_auc_score(y_test, y_pred)
		AP[itr] = average_precision_score(y_test, y_pred)

		#print('The roc of prediction is:', roc_auc_score(y_test, y_pred) )
		#print('Dump model to JSON...')
		# dump model to json (and save to file)
		#model_json = gbm.dump_model()
		#with open('model.json', 'w+') as f:
		#    json.dump(model_json, f, indent=4)
		#print('Feature names:', gbm.feature_name())
		#print('Calculate feature importances...')
		# feature importances
		#f_imptc[itr,0:] = list(gbm.feature_importance())
		#print('Feature importances:', list(gbm.feature_importance()))
		itr = itr + 1

	aucave = np.average(auc)
	APave=np.average(AP) 

#	for ii in range(0,len(auc)):
#		worksheet2.write(ii,0,auc[ii][0])
#		
#	fi = np.average(f_imptc,axis=0)
#	for ii in range(0,len(fi)):
#		worksheet3.write(ii,0,fi[ii])
#	workbook.save(testresult)
	#return aucave,lgb_eval
	return aucave, APave