1、准备数据集
数据集下载地址:https://github.com/wangle1218/QASystemOnMedicalKG/blob/master/data/medical.json
2、导入相关包
from py2neo import Graph, Node, Relationship
import pandas as pd
3、连接Neo4j
neo_graph = Graph( host= "127.0.0.1" , user= 'neo4j' , password= 'xxxx' )
neo_graph
4、构建知识图谱
from py2neo import Graph
import os
from tqdm import tqdm
import json
import datetimeclass CreateKG ( ) : def __init__ ( self, kg_host, kg_port, kg_user, kg_password, data_path) : self. graph = Graph( host= kg_host, user= kg_user, password= kg_password) if not data_path or data_path == '' : raise Exception( "数据集地址为空" ) if not os. path. exists( data_path) : raise Exception( "数据集不存在" ) self. data_path = data_path
def saveEntity ( self, label, data) : print ( "\n写入实体:" , label) for item in tqdm( data, ncols= 80 ) : try : property = [ ] for key, value in item. items( ) : value = value. replace( "'" , "" ) property . append( key + ":" + "'" + value + "'" ) if len ( property ) == 0 : continue cql = "MERGE(n:" + label + "{" + "," . join( property ) + "})" self. graph. run( cql) except Exception as e: pass def saveRelation ( self, s_label, e_label, label, data) : print ( "\n写入关系:" , label) for item in tqdm( data, ncols= 80 ) : try : s_name = item[ "s_name" ] e_name = item[ "e_name" ] cql = "MATCH(p:" + s_label + "),(q:" + e_label + ") WHERE p.name='" + s_name + "' AND q.name='" + e_name + "' MERGE (p)-[r:" + label + "]->(q)" self. graph. run( cql) except Exception as e: pass def getValue ( self, key, data) : if key in data: return data[ key] return "" def init ( self) : diseases = [ ] departments = [ ] symptoms = [ ] cureWays = [ ] checks = [ ] drugs = [ ] crowds = [ ] foods = [ ] diseaseDepartmentRelations = [ ] diseaseSymptomRelations = [ ] diseaseCureWayRelations = [ ] diseaseCheckRelations = [ ] diseaseDrugRelations = [ ] diseaseCrowdRelations = [ ] diseaseSuitableFoodRelations = [ ] diseaseTabooFoodRelations = [ ] diseaseDiseaseRelations = [ ] print ( "====数据抽取======" ) with open ( self. data_path, 'r' , encoding= 'utf8' ) as f: for line in tqdm( f. readlines( ) , ncols= 80 ) : data = json. loads( line) disease = { "name" : data[ "name" ] , "desc" : self. getValue( "desc" , data) , "prevent" : self. getValue( "prevent" , data) , "cause" : self. getValue( "cause" , data) , "get_prob" : self. getValue( "get_prob" , data) , "get_way" : self. getValue( "get_way" , data) , "cure_lasttime" : self. getValue( "cure_lasttime" , data) , "cured_prob" : self. getValue( "cured_prob" , data) , "cost_money" : self. getValue( "cost_money" , data) , } diseases. append( disease) if "cure_department" in data: for department in data[ "cure_department" ] : diseaseDepartmentRelations. append( { "s_name" : data[ "name" ] , "e_name" : department} ) property = { "name" : department} if property not in departments: departments. append( property ) if "symptom" in data: for symptom in data[ "symptom" ] : diseaseSymptomRelations. append( { "s_name" : data[ "name" ] , "e_name" : symptom} ) property = { "name" : symptom} if property not in symptoms: symptoms. append( property ) if "cure_way" in data: for cure_way in data[ "cure_way" ] : diseaseCureWayRelations. append( { "s_name" : data[ "name" ] , "e_name" : cure_way} ) property = { "name" : cure_way} if property not in cureWays: cureWays. append( property ) if "check" in data: for check in data[ "check" ] : diseaseCheckRelations. append( { "s_name" : data[ "name" ] , "e_name" : check} ) property = { "name" : check} if property not in checks: checks. append( property ) if "common_drug" in data: for common_drug in data[ "common_drug" ] : diseaseDrugRelations. append( { "s_name" : data[ "name" ] , "e_name" : common_drug} ) property = { "name" : common_drug} if property not in drugs: drugs. append( property ) if "easy_get" in data: easy_get = data[ "easy_get" ] diseaseCrowdRelations. append( { "s_name" : data[ "name" ] , "e_name" : easy_get} ) property = { "name" : easy_get} if property not in crowds: crowds. append( property ) if "recommand_eat" in data: for recommand_eat in data[ "recommand_eat" ] : diseaseSuitableFoodRelations. append( { "s_name" : data[ "name" ] , "e_name" : recommand_eat} ) property = { "name" : recommand_eat} if property not in foods: foods. append( property ) if "not_eat" in data: for not_eat in data[ "not_eat" ] : diseaseTabooFoodRelations. append( { "s_name" : data[ "name" ] , "e_name" : not_eat} ) property = { "name" : not_eat} if property not in foods: foods. append( property ) if "acompany" in data: for acompany in data[ "acompany" ] : diseaseDiseaseRelations. append( { "s_name" : data[ "name" ] , "e_name" : acompany} ) self. saveEntity( "disease" , diseases) self. saveEntity( "department" , departments) self. saveEntity( "symptom" , symptoms) self. saveEntity( "cureWay" , cureWays) self. saveEntity( "check" , checks) self. saveEntity( "drug" , drugs) self. saveEntity( "crowd" , crowds) self. saveEntity( "food" , foods) self. saveRelation( "disease" , "department" , "diseaseDepartmentRelations" , diseaseDepartmentRelations) self. saveRelation( "disease" , "symptom" , "diseaseSymptomRelation" , diseaseSymptomRelations) self. saveRelation( "disease" , "cureWay" , "diseaseCureWayRelation" , diseaseCureWayRelations) self. saveRelation( "disease" , "check" , "diseaseCheckRelation" , diseaseCheckRelations) self. saveRelation( "disease" , "drug" , "diseaseDrugRelation" , diseaseDrugRelations) self. saveRelation( "disease" , "crowd" , "diseaseCrowdRelation" , diseaseCrowdRelations) self. saveRelation( "disease" , "food" , "diseaseSuitableFoodRelation" , diseaseSuitableFoodRelations) self. saveRelation( "disease" , "food" , "diseaseTabooFoodRelation" , diseaseTabooFoodRelations) self. saveRelation( "disease" , "disease" , "diseaseDiseaseRelation" , diseaseDiseaseRelations) if __name__ == '__main__' : start = datetime. datetime. now( ) kg_host = "127.0.0.1" kg_port = 7474 kg_user = "neo4j" kg_password = "960418.hmx" data_path = "dataset/知识图谱/medical.json" kg = CreateKG( kg_host, kg_port, kg_user, kg_password, data_path) kg. init( ) end = datetime. datetime. now( ) print ( "共耗时:{}" . format ( end - start) )
5、实验结果
构建的知识图谱
鼻炎的病症描述:
match (n:disease) where n.name = ‘鼻炎’ return n.descol
鼻炎所属的科室:
match (n:disease)-[e:diseaseDepartmentRelations]->(n1:department) where n.name = ‘鼻炎’ return n,n1
鼻炎的症状:
match (n:disease)-[e:diseaseSymptomRelation]->(n1:symptom) where n.name = ‘鼻炎’ return n,n1
鼻炎的治疗方式:
match (n:disease)-[e:diseaseCureWayRelation]->(n1:cureWay) where n.name = ‘鼻炎’ return n,n1
鼻炎应该用什么药:
match (n:disease)-[e:diseaseDrugRelation]->(n1:drug) where n.name = ‘鼻炎’ return n,n1