21CTO社区导读:
本文为大家介绍Sphinx的理论与实践。你可以利用它建立自己网站的全网搜索,包括中文分词等强大特性,Sphinx与PHP匹配特别合适。
$ wget http://sphinxsearch.com/files/ ... ar.gz解压源码包:
$ tar zxvf sphinx-2.0.5-release.tar.gz执行configure配置程序:
$ cd sphinx-2.0.5-release
$ ./configure [options]
有一些参数可以在配置的时候指定,主要如下:./configure --prefix=/usr/local/sphinx --with-mysql=/usr/local/mysql编译
$ make安装
$ make install如果编译中没有产生错误,这个步骤应该不会遇到问题。如果完成后未正确安装, 就要回去找make过程中遇到的错误了。
$ cd /usr/local/sphinx/etc这里,sphinx提供了一个简单的例子,基本步骤是先将 /sphinx/etc 目录下面的 sphinx.conf.dist 重命名为 sphinx.conf, 然后修改 sphinx.conf 中的配置,主要是修改你服务器上面的 mysql 的用户名、密码、数据库名等。修改的位置是 sphinx.conf 的 source src1 下面几行。
$ cp sphinx.conf.dist sphinx.conf
$ emacs sphinx.conf
$ mysql -u test < /usr/local/sphinx/etc/example.sql这里是导入 sphinx 准备的测试数据,我们把数据导入到 mysql 的 test 数据库中。
$ /usr/local/mysql/bin/mysql -uroot -pxxxxxx test < /usr/local/sphinx/etc/example.sql这个命令是建立索引,当然数据基础是刚刚导入的example.sql的数据,如果这 里出错,最大的可能是你的 sphinx.conf 中的数据库配置错了,你需要回去检查 并修正。但是,还有可能出现 sphinx 必须的库文件无法找到,例如出现以下错误:
$ cd /usr/local/sphinx/etc
$ /usr/local/sphinx/bin/indexer --all
/usr/local/sphinx/bin/indexer: error while loading shared libraries: libmysqlclient.so.16: cannot open shared object file: No such file or directory这主要是因为你安装了一些库后,没有能够配置相应的环境变量。
ln -s /usr/local/mysql/lib/libmysqlclient.so.15 /usr/lib/libmysqlclient.so.15这里我假设你相应的软件包安装在 /usr/local/xxx 目录下,如果你不是安装在相应目录下,你就需要使用你自己的路径。
$ cd /usr/local/sphinx/etc
$ /usr/local/sphinx/bin/search test
$ cd /usr/local/sphinx/etc
$ /usr/local/sphinx/bin/searchd
source base
{
type = mysql
sql_host = 127.0.0.1
sql_user = root
sql_pass =
sql_db = test
sql_port = 8686
sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM
invoice_main;
sql_query = SELECT id, uid, ... FROM table;
sql_attr_timestamp = addtime
sql_attr_timestamp = modtime
}
index base
{
source = base
path = /path/to/var/data/base
docinfo = extern
enable_star = 1
min_infix_len = 3
charset_type = utf-8
# 中文一元切词
charset_table = 0..9, A..Z->a..z, _, a..z,
U+410..U+42F->U+430..U+44F, U+430..U+44F
ngram_len = 1
ngram_chars = U+3000..U+2FA1F
}
source delta : base
{
sql_query_pre = SET NAMES utf8
sql_query = SELECT id, uid, ... FROM table WHERE id > ( SELECT
max_doc_id FROM sph_counter WHERE counter_id = 1)
}
index delta : base
{
source = delta
path = /path/to/var/data/delta
}
source orig
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass =
sql_db = test
sql_port = 3306 # optional, default is 3306
sql_query = \
SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \
FROM documents
sql_attr_uint = group_id
sql_attr_timestamp = date_added
sql_query_info = SELECT * FROM documents WHERE id=$id
}
index orig
{
source = orig
path = idx/orig
docinfo = extern
charset_type = sbcs
}
index rtindex
{
type = rt
rt_mem_limit = 32M
path = idx/rtindex
charset_type = utf-8
rt_field = title
rt_field = content
rt_attr_uint = group_id
rt_attr_timestamp = date_added
}
source attach
{
type = mysql
sql_host = 127.0.0.1
sql_user =
sql_pass =
sql_db =
sql_port = 9306 # optional, default is 3306
sql_query = select 1 from testrt
sql_query_post = ATTACH INDEX orig TO RTINDEX rtindex
}
index attach
{
source = attach
path = idx/attach
docinfo = extern
charset_type = sbcs
}
./bin/searchd -c ./etc/sphinx.conf
$ ./bin/indexer -c ./etc/sphinx.conf orig --rotate转换实时索引:
$ ./bin/indexer -c ./etc/sphinx.conf attach现在来收获成果:
mysql -P9306 -h127.0.0.1
mysql> select * from rtindex;
source baseXMLPIPE
{
# ...
}
index base
{
# ...
}
source delta : base
{
# ...
}
index delta : base
{
# ...
}
index main
{
type = distributed
local = base
local = delta
agent = 127.0.0.1:9313:main,delta
agent = 127.0.0.1:9314:main,delta
agent = 127.0.0.1:9315:main,delta
# remote agent connection timeout, milliseconds
# optional, default is 1000 ms, ie. 1 sec
agent_connect_timeout = 1000
# remote agent query timeout, milliseconds
# optional, default is 3000 ms, ie. 3 sec
agent_query_timeout = 3000
}
source products
{
type = xmlpipe2
xmlpipe_command = php53 /home/guweigang/work/scripts/exportProducts2Sphinx.php
}
index products
{
source = products
path = /home/guweigang/local/sphinx/var/data/products
docinfo = extern
charset_type = utf-8
charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
ngram_len = 1
ngram_chars = U+3000..U+2FA1F
}
source products
{
type = xmlpipe2
xmlpipe_command = php /home/work/tmp/xmlpipe2.php
}
<?php
$connection = new \Phalcon\Db\Adapter\Pdo\Mysql(array(
"host" => "10.48.31.126",
"port" => "8006",
"username" => "root",
"password" => "root",
"dbname" => "Vs_Finance_Biz",
"options" => array(
\PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES utf8'
)
));
$query = $connection->query("SELECT productId AS id,
nestId as nestid,
productInfo as productinfo,
UNIX_TIMESTAMP(addTime) AS addtime,
UNIX_TIMESTAMP(modTime) AS modtime
FROM product
WHERE userStatus = 0
AND isDelete = 0
AND auditStatus = 0
AND nestId = 21");
$arrField = array("id" => "", "nestid" => "", "name" => "");
$arrAttr = array(
"cities" => "multi",
);
$dom = new DOMDocument("1.0", "utf-8");
$docset = $dom->createElement('sphinx:docset');
$schemaroot = $dom->createElement('sphinx:schema');
$docset->appendChild($schemaroot);
foreach($arrField as $node => $type) {
$entry = $dom->createElement('sphinx:field');
$nameAttr = $dom->createAttribute('name');
$nameAttr->value = $node;
$entry->appendChild($nameAttr);
$schemaroot->appendChild($entry);
}
foreach($arrAttr as $node => $type) {
$entry = $dom->createElement('sphinx:attr');
$nameAttr = $dom->createAttribute('name');
$nameAttr->value = $node;
$entry->appendChild($nameAttr);
$typeAttr = $dom->createAttribute('type');
$typeAttr->value = $type;
$entry->appendChild($typeAttr);
$schemaroot->appendChild($entry);
}
$dom->appendChild($docset);
while($document = $query->fetch()) {
$docroot = $dom->createElement('sphinx:document');
$idAttr = $dom->createAttribute('id');
$idAttr->value = $document['id'];
$docroot->appendChild($idAttr);
foreach(array_merge($arrAttr, $arrField) as $node => $type) {
if(empty($document['productinfo'])) continue;
$productInfo = json_decode($document['productinfo'], true);
$document['name'] = $productInfo['name'];
$document['cities'] = $productInfo['cities'];
switch(true) {
case $type == 'multi':
$value = join(",", $document[$node]);
$value = "(".$value.")";
break;
default:
$value = $document[$node];
break;
}
$dataChild = $dom->createElement($node);
$dataChild->appendChild($dom->createTextNode($value));
$docroot->appendChild($dataChild);
}
$docset->appendChild($docroot);
}
echo $dom->saveXML();
本文为 @ 21CTO 创作并授权 21CTO 发布,未经许可,请勿转载。
内容授权事宜请您联系 webmaster@21cto.com或关注 21CTO 公众号。
该文观点仅代表作者本人,21CTO 平台仅提供信息存储空间服务。