
基于SpringBoot+Neo4j+Spark实现的论文智能分析问答系统(采用朴素贝叶斯分类器)
毕业设计——基于SpringBoot+Neo4j+Spark实现的论文智能分析问答系统(采用朴素贝叶斯分类器)
·
完整项目地址:https://download.csdn.net/download/lijunhcn/88430302
写在前面
分析了下这个电影知识问答系统,底层功能实现是操作cypher语句,前台的业务:
1.汉语分词器HanLP将原始语句分词
2.语句抽象化(提高匹配问题模板标签准确率)
3.获取模板标签,使用模板将句子转化成系统可以识别的结果
4.cypher语句获取结果返回前台
既然涉及问答系统,中途也看了微软小冰和其他的语料库资料,感觉自己做出一个偏向应用的石油相关智能问答系统的可能性不大,首先自己不做
爬虫就语料库这个问题也解决不了的,要真有现成的语料库那也就没我做的必要了。
区别
对比自己想做的石油论文智能分析系统,我的数据来源都是国外网站,用户的原始语句是英文就用不到分词,但词汇库就复杂了,需要自己去找英
文人名词汇表,提取论文信息生成全文搜索词汇表。所以对这个项目我抱的期望不是很大,先罗列几个比较困难的点,做出来更新:
1.项目中通过稠密向量来生成训练集,而每个局部向量是由词汇表来确定的,电影知识问答系统中是个190词的电影相关汉语词汇表,但石油相关
词汇都是英语网站的数据,所以词汇表内容都是英语词汇,解决办法是在有些数据后生成这个表,但是搞爬虫的同学还在准备中期考试 、
2.问题归类,英语比较吃力了,同样的一个问题怎么来问,同一个问题预设问法越多,模型在学习后识别同类问题的准确率才会更高。
3.。。。
/*将author.csv引入到neo4j中,在Neo4j中创建Author节点**/
load csv with headers from "file:///author.csv" as line
merge(p:Author{id:toInteger(line.id),name:line.name,email:line.email,birth:line.birth});
/*将paper.csv引入到neo4j中,在Neo4j中创建Paper节点*/
load csv with headers from "file:///paper.csv" as line
merge(p:Paper{id:toInteger(line.id),name:line.name,doi:line.doi,document_id:line.document_id,publisher:line.publisher,
publication_date:line.publication_date,summary:line.summary,introduction:line.introduction});
/*将keyword.csv引入到neo4j中,在Neo4j中创建Keyword节点**/
load csv with headers from "file:///keyword.csv" as line
merge(p:Keyword{id:toInteger(line.id),name:line.name});
/*将author_paper.csv引入到neo4j,创建Author与Paper之间的create的relationship*/
load csv with headers from "file:///author_paper.csv" as line
match (from:Author{id:toInteger(line.author_id)}),(to:Paper{id:toInteger(line.paper_id)})
merge (from)-[r:create{author_id:toInteger(line.author_id),paper_id:toInteger(line.paper_id)}]->(to);
/*将paper_keyword.csv引入到neo4j,创建paper与keyword之间的attribute的relationship*/
load csv with headers from "file:///paper_keyword.csv" as line
match (from:Paper{id:toInteger(line.paper_id)}),(to:Keyword{id:toInteger(line.keyword_id)})
merge (from)-[r:attribute{paper_id:toInteger(line.paper_id),keyword_id:toInteger(line.keyword_id)}]->(to);
/* mysql数据库的SQL */
--实体类表
CREATE TABLE `author` (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`name` VARCHAR(100) NULL DEFAULT NULL,
`email` VARCHAR(50) NULL DEFAULT NULL,
`birth` INT(11) NULL DEFAULT NULL,
PRIMARY KEY (`id`)
)
COMMENT='论文作者'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=4
;
CREATE TABLE `paper` (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`doi` VARCHAR(50) NULL DEFAULT NULL,
`document_id` VARCHAR(50) NULL DEFAULT NULL,
`publisher` VARCHAR(50) NULL DEFAULT NULL,
`publication_date` VARCHAR(50) NULL DEFAULT NULL,
`abstract` VARCHAR(255) NULL DEFAULT NULL,
`keywords` VARCHAR(100) NULL DEFAULT NULL,
PRIMARY KEY (`id`)
)
COMMENT='论文详细信息'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;
CREATE TABLE `genre` (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`type` VARCHAR(255) NULL DEFAULT NULL,
PRIMARY KEY (`id`)
)
COMMENT='论文类别'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;
CREATE TABLE `meeting` (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`location` VARCHAR(100) NULL DEFAULT NULL,
`date` VARCHAR(100) NULL DEFAULT NULL,
`name` VARCHAR(100) NULL DEFAULT NULL,
PRIMARY KEY (`id`)
)
COMMENT='论文参与的会议'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;
CREATE TABLE `origination` (
`id` INT(11) NOT NULL AUTO_INCREMENT,
`name` VARCHAR(255) NULL DEFAULT NULL,
`location` VARCHAR(255) NULL DEFAULT NULL,
PRIMARY KEY (`id`)
)
COMMENT='作者属于的组织'
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
AUTO_INCREMENT=2
;
--关联表,在转入neo4j会转化成相应的relationship
CREATE TABLE `author_paper` (
`author_id` INT(11) NOT NULL,
`paper_id` INT(11) NULL DEFAULT NULL,
UNIQUE INDEX `author_id` (`author_id`),
INDEX `paper_id` (`paper_id`),
CONSTRAINT `FK__author_paper_author` FOREIGN KEY (`author_id`) REFERENCES `author` (`id`),
CONSTRAINT `FK__author_paper_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;
CREATE TABLE `paper_genre` (
`paper_id` INT(11) NOT NULL,
`genre_id` INT(11) NULL DEFAULT NULL,
UNIQUE INDEX `paper_id` (`paper_id`),
INDEX `genre_id` (`genre_id`),
CONSTRAINT `FK__paper_genre_genre` FOREIGN KEY (`genre_id`) REFERENCES `genre` (`id`),
CONSTRAINT `FK__paper_genre_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;
CREATE TABLE `paper_meeting` (
`paper_id` INT(11) NOT NULL,
`meeting_id` INT(11) NOT NULL,
UNIQUE INDEX `paper_id` (`paper_id`),
INDEX `meeting_id` (`meeting_id`),
CONSTRAINT `FK__paper_meeting_meeting` FOREIGN KEY (`meeting_id`) REFERENCES `meeting` (`id`),
CONSTRAINT `FK__paper_meeting_paper` FOREIGN KEY (`paper_id`) REFERENCES `paper` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;
CREATE TABLE `author_origination` (
`author_id` INT(11) NOT NULL,
`origination_id` INT(11) NOT NULL,
UNIQUE INDEX `author_id` (`author_id`),
INDEX `origination_id` (`origination_id`),
CONSTRAINT `FK__author_origination_author` FOREIGN KEY (`author_id`) REFERENCES `author` (`id`),
CONSTRAINT `FK__author_origination_origination` FOREIGN KEY (`origination_id`) REFERENCES `origination` (`id`)
)
COLLATE='latin1_swedish_ci'
ENGINE=InnoDB
;
更多推荐
所有评论(0)