create table t_word
(
id int unsigned auto_increment comment '自增id' primary key,
uid char(32) not null comment '32位唯一id',
word varchar(256) null comment '英文单词',
translate varchar(256) null
);
create fulltext index full_idx_translate
on t_word (translate);
create fulltext index full_idx_word
on t_word (word);
INSERT INTO t_word (id, uid, word, translate) VALUES (1, '9d592499c65648b0a9519206688ef3f9', 'lion', '狮子');
INSERT INTO t_word (id, uid, word, translate) VALUES (2, 'ce26ac4239514bc6af481bcb1d9b67df', 'panda', '熊猫');
INSERT INTO t_word (id, uid, word, translate) VALUES (3, 'a7d6042853c44904b68275daafb44702', 'tiger', '老虎');
INSERT INTO t_word (id, uid, word, translate) VALUES (4, 'f13bd0a8ecea44fc9ade1625eeb4cc3c', 'goat', '山羊');
INSERT INTO t_word (id, uid, word, translate) VALUES (5, '27d5cbfc93a046388d712085e567474f', 'sheep', '绵羊');
INSERT INTO t_word (id, uid, word, translate) VALUES (6, 'ed35df138cf348aa937781be8ee21cbf', 'lamb', '羊羔');
INSERT INTO t_word (id, uid, word, translate) VALUES (7, 'fba5861d9527440990276e999f47ef8f', 'buffalo', '水牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (8, '3a72e76f210841b1939fff0d3d721375', 'bull', '公牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (9, '272e0b28ea7a48248a86f17533bf9943', 'cow', '母牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (10, '47127adface54e418e4c1b9980af6d16', 'calf', '小牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (11, '10592499c65648b0a9519206688ef3f9', 'little lion', '小狮子');
INSERT INTO t_word (id, uid, word, translate) VALUES (12, '1bf095110b634a01bee5b31c5ee7ee0c', 'little cow', '母牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (13, '4813e588cde54c30bd65bfdbb243ad1f', 'little calf', '小小牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (14, '5e377e281ad344048b6938a638b78ccb', 'little bull', '小公牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (15, '2855ad0da2964c7682c178eb8271f13d', 'little buffalo', '小水牛');
INSERT INTO t_word (id, uid, word, translate) VALUES (16, '72f24c9a77644d57a36f3bdf2b8116b0', 'little lamb', '小羊羔');
INSERT INTO t_word (id, uid, word, translate) VALUES (17, '2d592499c65648b0a9519206688ef3f9', 'I''m a big lion', '我是一只大狮子');
3、删除全文索引
alter table 表名 drop index 索引名;
4、全文索引使用
语法
MATCH(col1,col2,...) AGAINST(expr[search_modifier])
search_modifier:
{
IN NATURAL LANGUAGE MODE
| IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION
| IN BOOLEAN MODE
| WITH QUERY EXPANSION
}
// 默认是使用 in natural language mode
select * from t_word where match(word) against ('lion');
// 或者 显示写
select * from t_word where match(word) against ('lion' in natural language mode);
// 默认是使用 in natural language mode
select * from t_word where match(word) against ('lion');
// 或者 显示写
select * from t_word where match(word) against ('lion' in natural language mode);
// 排除包含lion记录、查询出包含cow或者little的记录,提升包含calf单词的排名,降低包含cow记录的排名,查询出以go开头的记录
select * from t_word where match(word) against ('-lion cow little >calf <cow go*' in boolean mode) ;
好像问题都解决了, 但是问题才刚开始
回到最开始的需求,我想模糊搜索
select * from t_word where match(word) against('lio' in boolean mode);
预期值:把包含lion的都查询出来
实际结果:啥都没有。
全匹配查询的时候能查询出来
select * from t_word where match(translate) against('小水牛' in boolean mode);
只查询部分查询不出来。如:下面只查询 "小水" 或者"水牛" 都没有数据
select * from t_word where match(translate) against('小水' in boolean mode);
# test: 库名 t_chinese_phrase: 表名字
SET GLOBAL innodb_ft_aux_table="test/t_chinese_phrase";
# 查询分词情况
SELECT * FROM INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE;
# 查询分词情况
select * from information_schema.innodb_ft_index_table;
查询结果如下:
因为我们上面设置了分词数是1,所以,可以看到都是按照一个词进行分词的。
字段解析: FIRST_DOC_ID :word第一次出现的文档ID LAST_DOC_ID : word最后一次出现的文档ID DOC_COUNT :含有word的文档个数 DOC_ID :当前文档ID POSITION : word 当在前文档ID的位置
查询
1、使用自然语言模式 NATURAL LANGUAGE MODE 查询
在自然语言模式(NATURAL LANGUAGE MODE)下,文本的查询被转换为n-gram分词查询的并集。
SELECT * FROM t_chinese_phrase WHERE MATCH (phrase) AGAINST ('繁荣昌盛' in boolean mode) ;
实际使用
回到我们最开始的查询需求,看看实际的效果
查询包含了“昌”的数据
SELECT * FROM t_chinese_phrase WHERE MATCH (phrase) AGAINST ('昌' IN boolean MODE) ;
SELECT * FROM t_chinese_phrase WHERE MATCH (phrase) AGAINST ('昌' ) order by id asc;
发表评论
还没有评论,快来抢沙发吧!