Python classifier.Classifier() Examples

The following are 4 code examples of classifier.Classifier(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module classifier , or try the search function .
Example #1
Source File: paperranker.py    From prediction_api with MIT License 5 votes vote down vote up
def __init__(self, use_clf=False):
        self.clf = Classifier()
        self.use_clf = use_clf
        self.weight = {
           'coauthor_score': 0.7 if use_clf else 0.9,
           'pubyear_score': 0.1,
        }
        if use_clf:
            self.weight['field_score'] = 0.2
        print(self.weight) 
Example #2
Source File: predict.py    From Agriculture_KnowledgeGraph with GNU General Public License v3.0 5 votes vote down vote up
def create_predict(HudongItem_csv):
	# 读取neo4j内容 
	db = Neo4j()
	db.connectDB()
	data_set = db.getLabeledHudongItem('labels.txt')
	classifier = Classifier('wiki.zh.bin')
	classifier.load_trainSet(data_set)     
	classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10)
	predict_List = readCSVbyColumn(HudongItem_csv, 'title')
	file_object = open('predict_labels2.txt','a')
	
	count = 0
	vis = set()
	for p in predict_List:
		cur = HudongItem(db.matchHudongItembyTitle(p))
		count += 1
		title = cur.title
		if title in vis:
			continue
		vis.add(title)
		label = classifier.KNN_predict(cur)
		print(str(title)+" "+str(label)+": "+str(count)+"/"+str(len(predict_List)))
		file_object.write(str(title)+" "+str(label)+"\n")
		
	file_object.close()
	
#create_predict('hudong_pedia2.csv') 
Example #3
Source File: predict.py    From Agriculture_KnowledgeGraph with GNU General Public License v3.0 5 votes vote down vote up
def create_predict(HudongItem_csv):
	# 读取neo4j内容 
	db = Neo4j()
	db.connectDB()
	data_set = db.getLabeledHudongItem('labels.txt')
	classifier = Classifier('wiki.zh.bin')
	classifier.load_trainSet(data_set)
	classifier.set_parameter(weight=[1.0, 3.0, 0.2, 4.0, 0],k=10)
	predict_List = readCSVbyColumn(HudongItem_csv, 'title')
	file_object = open('predict_labels2.txt','a')
	
	count = 0
	vis = set()
	for p in predict_List:
		cur = HudongItem(db.matchHudongItembyTitle(p))
		if count > 200:
			break
		count += 1
		if count <140 :
			continue
		title = cur.title
		if title in vis:
			continue
		vis.add(title)
		label = classifier.KNN_predict(cur)
		print(str(title)+" "+str(label)+": "+str(count)+"/"+str(len(predict_List)))
		file_object.write(str(title)+" "+str(label)+"\n")
		
	file_object.close() 
Example #4
Source File: watershed_main.py    From Video-Highlight-Detection with MIT License 4 votes vote down vote up
def train():
    if args.dataset=='baidu_VH':
        dataset=baidu_VH(PROJECT_METAROOT)
    elif args.dataset=='summe':
        pass
        #dataset=
    else:
        raise ValueError('No such dataset')
    log.l.info(dataset.print_info())
    train_data=AsyncReader(dataset,root_path=BAIDU_VH_ROOT,mode='train',modality=args.modality)
    train_data.set_params({'limitedfiles':None,
                           'sample_rate':100,
                           'save_path':'tmp_results/train_{}_sampled.pkl'.format(args.modality)})
    X_train,Y_train=train_data.read_data(k=args.thread)

    val_data=AsyncReader(dataset,root_path=BAIDU_VH_ROOT,mode='val',modality=args.modality)
    val_data.set_params({'limitedfiles':None,
                           'sample_rate':1,
                           'save_path':'tmp_results/val_{}_sampled.pkl'.format(args.modality)})
    X_val,Y_val=val_data.read_data(k=args.thread)


    model=Classifier(model_name=args.model_name,if_grid_search=args.if_grid_search,model_kernel=args.model_kernel)
    if args.if_grid_search:
        model.set_grid_search_params(grid_search_params[args.model_name])
        X_train_grid_search,Y_train_grid_search=Sample_data(X_train,Y_train,args.grid_search_sample_rate)
        model.grid_search(X_train_grid_search,Y_train_grid_search)
    model.fit(X_train,Y_train)

    X_val_metric,Y_val_metric=Sample_data(X_val,Y_val,0.1)
    predict_val=model.predict(X_val_metric)
    metrics=get_metrics(predict_val,Y_val_metric,metrics=METRICS)
    # print metrics
    log.l.info('the metrics of {} is :{}'.format(METRICS,metrics))
    del X_train,Y_train#,X_train_grid_search,Y_train_grid_search,X_val_metric,Y_val_metric
    if args.create_curves:
    # for test set:
        val_curves_dic=dict()
        for k,v in val_data.data_dic.items():
            val_curves_dic[k]=model.predict(v)

        test_data=AsyncReader(dataset,root_path=BAIDU_VH_ROOT,mode='test',modality=args.modality)
        test_data.set_params({'limitedfiles':None,
                               'sample_rate':1,
                               'save_path':'tmp_results/test_{}_sampled.pkl'.format(args.modality)})
        _,_=test_data.read_data(k=args.thread)

        test_curves_dic=dict()
        for k,v in test_data.data_dic.items():
            test_curves_dic[k]=model.predict(v)
        return_info={'val':val_curves_dic,
                     'test':test_curves_dic}
        if args.save_curves:
            joblib.dump(return_info,'tmp_results/val_test_{}_curves.pkl'.format(args.modality))
        return return_info
    return None