现在乱码问题是
res.write(爬到的内容),在页面上显示是正常的
但是直接在node端打印console.log(爬到的内容)是乱码
调整乱码问题的关键属性是cheerio.load中的{decodeEntities:false}属性
代码一
var cheerio = require('cheerio') , superagent = require('superagent') , express = require('express');var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';var app = express();app.get('/', function (req, res, next) { superagent.get(url).end(function (err, pres) { var html = pres.text; var $ = cheerio.load(html, {decodeEntities: false}); var ans = $('.r_search_item').eq(0).html(); res.write(ans);}); });app.listen(3000, function () { console.log('app is listening at port 3000');});复制代码
代码二
var cheerio = require('cheerio') , superagent = require('superagent') , express = require('express');var url = 'http://acm.hdu.edu.cn/statistic.php?pid=1000';var app = express();app.get('/', function (req, res, next) { superagent.get(url).end(function (err, pres) { var html = pres.text; var $ = cheerio.load(html); var ans = $('.r_search_item').eq(0).html(); res.write(ans);}); });app.listen(3000, function () { console.log('app is listening at port 3000');});复制代码
总结
当为'代码一'时,添加{decodeEntities: false}属性,node端正常,页面乱码 当为'代码一'时,没有{decodeEntities: false}属性,node端乱码,页面正常