包引入
- pom引入lucene和hanlp
- lucene版本为7.4
hanlp分词器版本为1.1.6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>7.4.0</version>
</dependency>
<dependency>
<groupId>com.hankcs.nlp</groupId>
<artifactId>hanlp-lucene-plugin</artifactId>
<version>1.1.6</version>
</dependency>对数据文件进行索引
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54public class DocIndex {
// 建立索引文件的目录
public static String indexPath = "/index";
public static void index(String path, Map<String, SugRecord> map, boolean reIndex) {
indexPath = path + indexPath;
if (!reIndex) {
return;
}
IndexWriter writer = null;
try {
// 存储索引数据的目录
Directory dir = FSDirectory.open(Paths.get(indexPath));
// 创建分析器
Analyzer analyzer = new HanLPAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
writer = new IndexWriter(dir, iwc);
indexDoc(writer, map);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void indexDoc(IndexWriter writer, Map<String, SugRecord> map) {
map.forEach((k, v) -> {
// 创建一个新的空文档
Document doc = new Document();
//创建索引字段
doc.add(new TextField("id", v.getId(), Field.Store.YES));
doc.add(new TextField("question", v.getQueryNorm(), Field.Store.NO));
doc.add(new TextField("query", v.getQuery(), Field.Store.NO));
// 写文档
try {
writer.addDocument(doc);
} catch (IOException e) {
e.printStackTrace();
}
});
try {
writer.flush();
writer.commit();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
对索引进行检索
1 | 4j |