antnutch.sh
nutch执行前需要从数据库中读取数据写到nutch配置文件中,然后重新编译nutch
#!/bin/bash## @file uodateD02.sh# @brief read data from mysql and ant nutch# @author Dajun Miao# @version 0.1# @date 2014-05-19#USER="root"PASSWORD="123456"DATABASE="admin_platform"B02_WEBSITE="b02_website"B05_FILTERRULE="b05_filter_rule"SITEADDR="site_addr"SITEATTR="site_attr"RULETYPE="rule_type"CONTENT="content"DBPATH="172.37.0.203"echo "# config file for urlfilter-blackwhite plugin" > /home/apache-nutch-1.8/conf/blackwhite-urlfilter.txtregsites=`mysql -u$USER -p$PASSWORD -h$DBPATH $DATABASE <> /home/apache-nutch-1.8/conf/blackwhite-urlfilter.txtdoneblacksites=`mysql -u$USER -p$PASSWORD -h$DBPATH $DATABASE < > /home/apache-nutch-1.8/conf/blackwhite-urlfilter.txtdonewhitesites=`mysql -u$USER -p$PASSWORD -h$DBPATH $DATABASE < > /home/apache-nutch-1.8/conf/blackwhite-urlfilter.txtdoneblacksites=`mysql -u$USER -p$PASSWORD -h$DBPATH $DATABASE < > /home/urlsdonecd /home/apache-nutch-1.8/ant runtime
runnutch.sh
#!/bin/bash## @file runnutch.sh# @brief run nutch# @author Dajun Miao# @version 0.1# @date 2014-05-05##cd /home/apache-nutch-1.8/runtime/local/bin/#./crawl /home/urls data http://172.37.0.201:8080/solr/ 1source /root/.bashrchadoop fs -rm -r /urlshadoop fs -put /home/urls /urlscd /home/apache-nutch-1.8/runtime/deploy/bin/./crawl /urls /nutch http://172.16.10.15:8080/solr/Nutch 1
定时器:cronnutch.sh
* 20 * * * antnutch.sh&&runnutch.sh >> mylog.log 2>&1