Topics

Install Sphinx 2.0.4 on CentOS 6.2

– Make sure to check CentOS, PHP-FPM, Nginx, memcached and MYSQL posts.

– Get fresh RPM from sphinxsearch.com

– Install Sphinx

rpm -Uhv http://sphinxsearch.com/files/sphinx-2.0.4-1.rhel6.x86_64.rpm

– Create sample MySQL database

CREATE DATABASE `Sample_DB` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
USE `Sample_DB`;

CREATE TABLE IF NOT EXISTS `sites` (
  `id` int(11) unsigned NOT NULL AUTO_INCREMENT,
  `url` varchar(255) NOT NULL,
  `Title` varchar(1024) NOT NULL,
  `Description` varchar(4000) NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=MyISAM  DEFAULT CHARSET=utf8;

INSERT INTO `sites` (`url`, `Title`, `Description`) VALUES
( 'www.petercolephoto.com', 'Cole, Peter', 'A gallery of 1100 plus adeventure sports images organized and presented in thumbnail/popup album form using PHP/mySQL on an Apache/Linux server.'),
( 'www.customwebapps.com', 'Custom Web Apps', 'PHP/MYSQL Web-based software development company'),
( 'www.dynamicpros.com', 'DynamicPros', 'Professional web programming services. Specializing in MySQL and e-commerce solutions.'),
( 'www.perl-resume.com', 'Sagayam, Christopher', 'Perl and CGI consultant with experience in Perl, Linux, NT, MySQL, MS-SQL, CGI, internet programming.'),
( 'cfortune.kics.bc.ca', 'Fortune, Chris', 'Freelance computer programmer in PHP, MySQL, HTML, CSS, Javascript, DHTML, Perl, CGI, ecommerce.'),
( 'php4hire.com', 'Klimov, Victor', 'Main expertise is designing customized web based applications using PHP and MySQL for Linux, Unix and Windows NT platforms.'),
( 'www.erasmos.com', 'Rasmussen, Sean', 'Internet systems developer, using languages and tools like Java, Perl, PHP, and SQL (Oracle, Mysql. Sun Java Certified. Seeks either contract or permanent employment.'),
( 'www.dalinowen.com', 'Owen, Dalin S.', 'Network security professional: Linux, UNIX, MS, C/C++, Perl, Apache, Qmail, MySQL, Tripwire, IPF, IPFW, PF, IPCHAINS, IPBUCKETS, Snort.'),
( 'www.kickfire.com', 'Kickfire', 'Database appliance based on MySQL.'),
( 'www.evoknow.com', 'Evoknow, Inc.', 'Full service company developing scalable Web applications in PHP and MySQL. Sacramento, California.'),
( 'www.primeharbor.com', 'PrimeHarbor Technologies, Inc.', 'Consulting firm specializing in OpenSource tools and languages. Expertise in MySQL, Apache, qmail, perl, python, java, C/C++, and kylix. Atlanta, Georgia.'),
( 'vidgen.sourceforge.net', 'VI Document Generator', 'VidGen bridges the gap between Databases and Dynamically Printed Documents with use of SVG, PHP, and mySQL. Input SVG and export various script formats like: Xerox VIPP, SOLscript, PDFlib-PDI or PreS. Or merge to PDF, PS, PPML, VPS, PCL or W3C SVGprint.'),
( 'www.collegefundsoftware.com', 'CollegeFund Software', 'Develops QuarkXTensions and AppleScripts developed for QuarkXPress. Also offers software development, training and consulting for the publishing industry. As well as web-based development using Perl, PHP and MySQL.'),
( 'www.usefulcomputerinfo.com', 'Useful Computer Info', 'Tutorials for windows, active server pages, mysql, fonts and search engine optimization.'),
( 'mysqltutorials.bravehost.com', 'Mysqlhelpster', 'Provides help with Mysql, Php, Html and Javascript.'),
( 'www.rapidweb.info', 'RapidWeb', 'A web hosting service offers an open-source MySQL-based content management system.'),
( 'weblogs.us', 'Weblogs.us', 'Offering free WordPress blogs. Full FTP and MySQL access. No ads required on hosted pages.'),
( 'cafelog.com', 'b2', 'A news/ weblog tool (aka logware). Requires a server that can run PHP4, and a MySQL database. Freeware.'),
( 'monauraljerk.org', 'Monaural Jerk', 'Free, open-source PHP/MySQL weblog system. Includes calendar navigation, "edit this page", searching, channels, RSS, XML, tell-a-friend, and spider-friendly URLs.'),
( 'scoop.kuro5hin.org', 'Scoop', 'Free collaborative media software for running community web logs . Requires Perl, mod_perl, and MySQL.'),
( 'b2evolution.net', 'b2evolution', 'Full featured PHP/ mySQL weblog tool. Supports multiple categories, sub-catgeories, multiple weblogs, skins, statistics, comments, anti-spam filters.'),
( 'www.geeklog.net', 'Geeklog', 'Free open-source blog tool. Runs on many different operating systems and uses PHP4 and MySQL.'),
( 'thingamablog.sourceforge.net', 'Thingamablog', 'A cross-platform, standalone application for authoring and publishing weblogs. It will work on any platform on which Java can run. Thingamablog does not require a third-party blogging host, a CGI/PHP enabled web server, or a MySQL database. The only requirement to setup and manage a blog is FTP access to a web server.'),
( 'boastology.com', 'boastMachine', 'PHP/MySQL powered blogging platform.'),
( 'blogcms.com', 'BLOG:CMS', 'Blogware requires PHP and MySQL. Features include weblog, forum, wiki, news aggregator, and photo gallery.'),
( 'presstopia.com', 'Presstopia', 'Open source ASP.NET weblog application. Supports MySQL, MS Access, MS SQL Serve, Atom 0.3, RSS 2.0 feeds, multiple authors, comment, trackbacks, update pings.'),
( 'wheatblog.sourceforge.net', 'Wheatblog', 'A lightweight, open source, customizable blogging and content management solution powered by PHP and MySQL or SQLite.'),
( 'www.gidforums.com', 'GIDForums', 'Webmaster community that discusses issues on hosting, website design, and PHP/MySQL coding.'),
( 'www.catamatic.com', 'Click-IT', 'Low cost readymade dynamic websites with database and full content management facilities, plus bespoke PHP and mySQL programming services.'),
( 'www.dhishna.com', 'Dhishna Technologies', 'Offers design using ASP, SML, Flash, Photoshop, PHP, Mysql and Java. Based in Kerala, India.');

CREATE TABLE IF NOT EXISTS `sph_counter` (
  `counter_id` int(11) NOT NULL,
  `max_doc_id` int(11) NOT NULL,
  PRIMARY KEY (`counter_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;

INSERT INTO `sph_counter` (`counter_id`, `max_doc_id`) VALUES
(1, 1);

– Config sphinx

mkdir -p /server/sphinx/data
chown -R sphinx /server/sphinx
mv  /etc/sphinx/sphinx.conf /etc/sphinx/sphinx.conf.old
vi /etc/sphinx/sphinx.conf
#
# Sphinx configuration file sample
#
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
# OR http://www.molecularsciences.org/book/export/html/405


#############################################################################
## data source definition for sites
#############################################################################

source sites
{
	# data source type. mandatory, no default value
	# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
	type					= mysql

	#####################################################################
	## SQL settings (for 'mysql' and 'pgsql' types)
	#####################################################################

	# some straightforward parameters for SQL source types
	sql_host				= localhost
	sql_user				= root
	sql_pass				= rootpass
	sql_db					= Sample_DB
	#sql_port				= 3306	# optional, default is 3306
	
	sql_query_pre   		= SET SESSION query_cache_type=OFF
	
	#update counter
	sql_query_pre 			= REPLACE INTO sph_counter SELECT 1, MAX(id) FROM sites

	# main document fetch query
	# mandatory, integer document ID field MUST be the first selected column 
	sql_query				= 	SELECT  id,url ,Title,Description FROM sites \
                                WHERE   id<=( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )

    sql_ranged_throttle		= 0

}


#############################################################################
## sitesDelta source definition
#############################################################################



source sitesDelta : sites
{
    sql_query_pre   	= 	SET SESSION query_cache_type=OFF
	sql_query			= 	SELECT  id,url ,Title,Description FROM sites \
							WHERE   id > ( SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )


}



#############################################################################
## sites index definition
#############################################################################

# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index sites
{
	# document source(s) to index
	# multi-value, mandatory
	# document IDs must be globally unique across all sources
	source			= sites

	# index files path and file name, without extension
	# mandatory, path must be writable, extensions will be auto-appended
	path			= /server/sphinx/data/sites


	# minimum indexed word length
	# default is 1 (index everything)
	min_word_len		= 3

	# minimum word infix length to index
	# optional, default is 0 (do not index infixes)
	#
	min_infix_len		= 3

	# enable star-syntax (wildcards) when searching prefix/infix indexes
	# known values are 0 and 1
	# optional, default is 0 (do not use wildcard syntax)
	#
	enable_star		= 1

	#enable chinese search
	ngram_len                                      = 1
}






#############################################################################
## sitesDelta index definition
#############################################################################


index sitesDelta : sites
{
	# document source(s) to index
	# multi-value, mandatory
	# document IDs must be globally unique across all sources
	source			= sitesDelta

	# index files path and file name, without extension
	# mandatory, path must be writable, extensions will be auto-appended
	path			= /server/sphinx/data/sitesDelta

	# minimum indexed word length
	# default is 1 (index everything)
	min_word_len		= 3

	# minimum word infix length to index
	# optional, default is 0 (do not index infixes)
	#
	min_infix_len		= 3

	# enable star-syntax (wildcards) when searching prefix/infix indexes
	# known values are 0 and 1
	# optional, default is 0 (do not use wildcard syntax)
	#
	enable_star		= 1


	#enable chinese search
	ngram_len                                      = 1


}





#######################################################################################


#############################################################################
## indexer settings
#############################################################################

indexer
{
	# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
	# optional, default is 32M, max is 2047M, recommended is 256M to 1024M
	mem_limit			= 32M     

}




#############################################################################
## searchd settings
#############################################################################

searchd
{
	# hostname, port, or hostname:port, or /unix/socket/path to listen on
	# multi-value, multiple listen points are allowed
	# optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)
	#
	# listen				= 127.0.0.1
	# listen				= 192.168.0.1:9312
	# listen				= 9312

	compat_sphinxql_magics = 0

	# log file, searchd run info is logged here
	# optional, default is 'searchd.log'
	log				= /var/log/searchd.log

	# query log file, all search queries are logged here
	# optional, default is empty (do not log queries)
	query_log			= /var/log/query.log

	# client read timeout, seconds
	# optional, default is 5
	read_timeout		= 5

	# request timeout, seconds
	# optional, default is 5 minutes
	client_timeout		= 300

	# maximum amount of children to fork (concurrent searches to run)
	# optional, default is 0 (unlimited)
	max_children		= 30

	# PID file, searchd process ID file name
	# mandatory
	pid_file			= /var/log/searchd.pid

	# max amount of matches the daemon ever keeps in RAM, per-index
	# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
	# default is 1000 (just like Google)
	max_matches			= 1000

	# seamless rotate, prevents rotate stalls if precaching huge datasets 
	# optional, default is 1
	seamless_rotate		= 1

	# whether to forcibly preopen all indexes on startup
	# optional, default is 0 (do not preopen)
	preopen_indexes		= 0

	# whether to unlink .old index copies on succesful rotation.
	# optional, default is 1 (do unlink)
	unlink_old			= 1

	# crash log path
	# searchd will (try to) log crashed query to 'crash_log_path.PID' file
	# optional, default is empty (do not create crash logs)
	#
	#crash_log_path		= /server/log/sphinx/crash


}

# --eof--

– Create indexes

/usr/bin/indexer --all
chown -R sphinx /server/sphinx

– Start Sphinx

/etc/init.d/searchd start

– Run a test search for word ‘mysql’

/usr/bin/search mysql

– Search using PHP
– Get sphinxapi.php : Sphinx comes with a PHP implementation of searchd client API. You can find it at api/sphinxapi.php in the root of the sphinx source tarball (tar.gz).

<?php 
error_reporting(E_ALL);
ini_set("display_errors",1);

$q = trim( html_entity_decode( $_GET['q'], ENT_QUOTES, 'UTF-8') ) ;

$sphx = sphinx_search($q, 0, 20);
//print_r( $sphx );

if( empty($sphx['ids']) )
    die('No Results');

$ids = $sphx['ids'];

$sql =  "SELECT `id`, `url`, `Title`, `Description`
            FROM `sites`
            WHERE  `id` in ( {$ids}  )
            ORDER BY FIELD(`id`, {$ids} )";
db();
            
if(  !($r = mysql_query($sql)))
    die("[MYSQL]".mysql_error() . mysql_errno() );
    
$max = $sphx['total'];
$num_rows = $sphx['docs'];

echo "<b>Displaying {$num_rows} results of {$max}</b>

";

while($row = mysql_fetch_assoc($r) ) {
    echo "<a href=\"http://{$row['url']}\">{$row['Title']}</a>
{$row['Description']}
<hr />";    
    }
mysql_free_result($r);
    
/*
 * SPHINX Search
 */

/*
 * Search sites by Keywords using sphinx; with an option to search sites tags only
 * @param string $q te keyword
 * @param int $i id of the first result to return
 * @param int $max max results to return
 * @param bollen $url set to true to return matches from the 'url' column only
 * 
 * @return string $ids comma seperated list of ids
 */
function sphinx_search($q, $i, $limit, $url=false){
        require_once 'sphinxapi.php';
        
        $ids = '';
        
        $cl = new SphinxClient();
        $cl->SetServer( "localhost" , 9312);
        $cl->SetMatchMode( SPH_MATCH_EXTENDED  );
        $cl->SetSortMode ( SPH_SORT_RELEVANCE );
        $cl->SetFieldWeights(array('url' => 3000, 'Title' => 300, 'Description' => 30 ));
        $cl->SetLimits( $i , $limit);
        $q = $cl->EscapeString( $q);
        
        //search url only
        $q = $url ? "@url {$q}" : $q;
        
        $result = $cl->Query( $q, 'sites sitesDelta' );

        if ( $result === false )
                error_log( '[SPHINX]Query failed: ' . $cl->GetLastError() );
        elseif ( $cl->GetLastWarning() ) 
                error_log( '[SPHINX]WARNING: ' .  $cl->GetLastWarning() );
       
        if ( !empty($result["matches"]) ){
            foreach ( $result["matches"] as $doc => $docinfo ) 
                 $ids .= "$doc,";
            $ids = substr( $ids, 0, -1 );
       }else
           return false;
       
       return  array( 'ids' => $ids, 'total' => $result['total'], 'docs' => count($result["matches"])  );

}

/*
 * Connect to MySQL
 */
function db(){
    
    if( !empty($GLOBALS['db']) ) return true;
    
    if( !$GLOBALS['db'] = mysql_connect('localhost', 'root', 'rootpass' ) ) {
        die("[MYSQL]".mysql_error() . mysql_errno() );
    }
    elseif(!mysql_select_db('Sample_DB')) {
        die("[MYSQL]".mysql_error() . mysql_errno() );
    }    
    
}
?>

– Run sphinx.php http://www.example.com/sphinx.php?q=*mysql

– Create script to rotate delta indexes

vi /server/sphinx-delta.sh
#!/bin/sh
if [ -z "$(/sbin/pidof indexer)" ]
	then
    		/usr/bin/indexer  sitesDelta  --rotate;
fi

– add cron job

chmod +x /server/sphinx-delta.sh
crontab -e
##########################sphinx
#rotate delta index every 30 min
*/30  *  *  *  * /server/sphinx-deltas.sh > /dev/null 2>&1

– To merge indexes run

/usr/bin/indexer --merge sites sitesDelta --rotate;

By continuing to use the site, you agree to the use of cookies. more information

The cookie settings on this website are set to "allow cookies" to give you the best browsing experience possible. If you continue to use this website without changing your cookie settings or you click "Accept" below then you are consenting to this.

Close