4.0.0 com.fu es-jd 0.0.1-SNAPSHOT es-jd es-jd 1.8 7.6.2 UTF-8 UTF-8 2.3.7.RELEASE com.alibaba fastjson 1.2.83 org.springframework.boot spring-boot-starter-data-elasticsearch org.springframework.boot spring-boot-starter-thymeleaf org.springframework.boot spring-boot-starter-web org.springframework.boot spring-boot-devtools runtime true org.springframework.boot spring-boot-configuration-processor true org.projectlombok lombok true org.springframework.boot spring-boot-starter-test test org.junit.vintage junit-vintage-engine org.springframework.boot spring-boot-dependencies ${spring-boot.version} pom import org.apache.maven.plugins maven-compiler-plugin 3.8.1 1.8 UTF-8 org.springframework.boot spring-boot-maven-plugin 2.3.7.RELEASE com.fu.EsJdApplication repackage repackage
3、编写代码package com.fu.config;
import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class ElasticSearchClientConfig {@Bean
public RestHighLevelClient restHighLevelClient() {RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")));
return client;
}
}
package com.fu.controller;
import com.fu.service.ContentService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
import java.util.List;
import java.util.Map;
@RestController
public class ContentController {@Autowired
private ContentService contentService;
@GetMapping("/parse/{keyword}")
public Boolean parse(@PathVariable("keyword") String keyword) throws Exception {return contentService.parseContent(keyword);
}
@GetMapping("/search/{keyword}/{pageNo}/{pageSize}")
public List
package com.fu.controller;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
@Controller
public class IndexController {@GetMapping({"/","/index"})
public String index(){return "index";
}
}
package com.fu.service;
import com.alibaba.fastjson.JSON;
import com.fu.entity.Content;
import com.fu.utils.HtmlParseUtil;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.TermQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@Service
public class ContentService {@Autowired
@Qualifier("restHighLevelClient")
private RestHighLevelClient Client;
// 1.解析数据放入 es索引中
public Boolean parseContent(String keywords) throws Exception {Listcontents = new HtmlParseUtil().parseJD(keywords);
//把查询到的数据放入es索引库中
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.timeout("5m");
for (int i = 0; i< contents.size(); i++) {bulkRequest.add(
new IndexRequest("jd_goods")
.source(JSON.toJSONString(contents.get(i)), XContentType.JSON));
}
BulkResponse bulk = Client.bulk(bulkRequest, RequestOptions.DEFAULT);
return !bulk.hasFailures();
}
// 2.获取这些数据实现搜索高亮功能
public List
package com.fu.utils;
import com.fu.entity.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class HtmlParseUtil {public ListparseJD(String keywords) throws Exception {//获取请求 https://search.jd.com/Search?keyword=java
String url = "https://search.jd.com/Search?keyword="+keywords+"&enc=utf-8";
//解析网页,Jsoup返回的document就是浏览器Document对象
Document document = Jsoup.parse(new URL(url), 30000);
// System.out.println(document.html());
//获取商品列表
Element j_goodsList = document.getElementById("J_goodsList");
//获取所有的li元素
Elements elements = document.getElementsByTag("li");
//创建一个集合存储商品内容
Listcontents = new ArrayList<>();
//获取元素中的内容
for (Element el : elements ){//由于图片是延迟加载
//date-lazy-img
String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");
String price = el.getElementsByClass("p-price").eq(0).text();
String title = el.getElementsByClass("p-name").eq(0).text();
// String commit = el.getElementsByClass("p-commit").eq(0).text();
String commit = "10万+";
String shopnum = el.getElementsByClass("p-shop").eq(0).text();
contents.add(new Content(img,price,title,commit,shopnum));
}
return contents;
}
}
package com.fu.entity;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {private String img; //图片
private String price; //价格
private String title; //标题
private String commit; //评价数量
private String shopnum; //商品编号
}
你是否还在寻找稳定的海外服务器提供商?创新互联www.cdcxhl.cn海外机房具备T级流量清洗系统配攻击溯源,准确流量调度确保服务器高可用性,企业级服务器适合批量采购,新人活动首月15元起,快前往官网查看详情吧