3.12 从bing爬虫,批量传图

This commit is contained in:
zhangsan 2025-03-12 15:07:12 +08:00
parent 2513d314b9
commit a6f7d83ecb
6 changed files with 148 additions and 9 deletions

View File

@ -57,6 +57,12 @@
<artifactId>cos_api</artifactId>
<version>5.6.227</version>
</dependency>
<!-- HTML 解析https://jsoup.org/ -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>

View File

@ -223,4 +223,17 @@ public class PictureController {
pictureService.doPictureReview(pictureReviewRequest, loginUser);
return ResultUtils.success(true);
}
/**
* 批量抓取并创建图片
*/
@PostMapping("/upload/batch")
@AuthCheck(mustRole = UserConstant.ADMIN_ROLE)
public BaseResponse<Integer> uploadPictureByBatch(@RequestBody PictureUploadByBatchRequest pictureUploadByBatchRequest,
HttpServletRequest request) {
ThrowUtils.throwIf(pictureUploadByBatchRequest == null, ErrorCode.PARAMS_ERROR);
User loginUser = userService.getLoginUser(request);
int uploadCount = pictureService.uploadPictureByBatch(pictureUploadByBatchRequest, loginUser);
return ResultUtils.success(uploadCount);
}
}

View File

@ -0,0 +1,29 @@
package edu.whut.smilepicturebackend.model.dto.picture;
import lombok.Data;
import java.io.Serializable;
/**
* 批量导入图片请求
*/
@Data
public class PictureUploadByBatchRequest implements Serializable {
/**
* 搜索词
*/
private String searchText;
/**
* 抓取数量
*/
private Integer count = 20;
/**
* 图片名称前缀
*/
private String namePrefix;
private static final long serialVersionUID = 1L;
}

View File

@ -22,5 +22,10 @@ public class PictureUploadRequest implements Serializable {
*/
private String fileUrl;
/**
* 图片名称
*/
private String picName;
private static final long serialVersionUID = 1L;
}

View File

@ -4,10 +4,7 @@ import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.extension.service.IService;
import edu.whut.smilepicturebackend.model.dto.picture.PictureEditRequest;
import edu.whut.smilepicturebackend.model.dto.picture.PictureQueryRequest;
import edu.whut.smilepicturebackend.model.dto.picture.PictureReviewRequest;
import edu.whut.smilepicturebackend.model.dto.picture.PictureUploadRequest;
import edu.whut.smilepicturebackend.model.dto.picture.*;
import edu.whut.smilepicturebackend.model.entity.Picture;
import edu.whut.smilepicturebackend.model.entity.User;
import edu.whut.smilepicturebackend.model.vo.PictureVO;
@ -80,5 +77,21 @@ public interface PictureService extends IService<Picture> {
*/
void doPictureReview(PictureReviewRequest pictureReviewRequest, User loginUser);
/**
* 填充审核参数
*
* @param picture
* @param loginUser
*/
void fillReviewParams(Picture picture, User loginUser);
/**
* 批量抓取和创建图片
*
* @param pictureUploadByBatchRequest
* @param loginUser
* @return 成功创建的图片数
*/
Integer uploadPictureByBatch(PictureUploadByBatchRequest pictureUploadByBatchRequest,
User loginUser);
}

View File

@ -17,10 +17,7 @@ import edu.whut.smilepicturebackend.manager.upload.FilePictureUpload;
import edu.whut.smilepicturebackend.manager.upload.PictureUploadTemplate;
import edu.whut.smilepicturebackend.manager.upload.UrlPictureUpload;
import edu.whut.smilepicturebackend.mapper.PictureMapper;
import edu.whut.smilepicturebackend.model.dto.picture.PictureEditRequest;
import edu.whut.smilepicturebackend.model.dto.picture.PictureQueryRequest;
import edu.whut.smilepicturebackend.model.dto.picture.PictureReviewRequest;
import edu.whut.smilepicturebackend.model.dto.picture.PictureUploadRequest;
import edu.whut.smilepicturebackend.model.dto.picture.*;
import edu.whut.smilepicturebackend.model.entity.Picture;
import edu.whut.smilepicturebackend.model.entity.User;
import edu.whut.smilepicturebackend.model.enums.PictureReviewStatusEnum;
@ -30,11 +27,17 @@ import edu.whut.smilepicturebackend.model.vo.UserVO;
import edu.whut.smilepicturebackend.service.PictureService;
import edu.whut.smilepicturebackend.service.UserService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import javax.servlet.http.HttpServletRequest;
import java.io.IOException;
import java.util.Date;
import java.util.List;
import java.util.Map;
@ -46,6 +49,7 @@ import java.util.stream.Collectors;
* @description 针对表picture(图片)的数据库操作Service实现
* @createDate 2025-06-11 11:23:11
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class PictureServiceImpl extends ServiceImpl<PictureMapper, Picture>
@ -105,6 +109,13 @@ public class PictureServiceImpl extends ServiceImpl<PictureMapper, Picture>
Picture picture = new Picture();
// 复制同名属性urlnamepicSizepicWidthpicHeightpicScalepicFormat
BeanUtils.copyProperties(uploadPictureResult, picture);
// 支持外层pictureUploadRequest传递图片名称
picture.setName(
StrUtil.blankToDefault(
pictureUploadRequest == null ? null : pictureUploadRequest.getPicName(),
uploadPictureResult.getName()
)
);
picture.setUserId(loginUser.getId());
// 补充审核参数
this.fillReviewParams(picture, loginUser);
@ -292,6 +303,68 @@ public class PictureServiceImpl extends ServiceImpl<PictureMapper, Picture>
picture.setReviewStatus(PictureReviewStatusEnum.REVIEWING.getValue());
}
}
//爬取网落图片可以用ai分析标签
@Override
public Integer uploadPictureByBatch(PictureUploadByBatchRequest pictureUploadByBatchRequest, User loginUser) {
// 校验参数
String searchText = pictureUploadByBatchRequest.getSearchText();
Integer count = pictureUploadByBatchRequest.getCount();
ThrowUtils.throwIf(count > 30, ErrorCode.PARAMS_ERROR, "最多 30 条");
// 名称前缀默认等于搜索关键词
String namePrefix = pictureUploadByBatchRequest.getNamePrefix();
if (StrUtil.isBlank(namePrefix)) {
namePrefix = searchText;
}
// 抓取内容
String fetchUrl = String.format("https://cn.bing.com/images/async?q=%s&mmasync=1", searchText);
//最全的html文档
Document document;
try {
document = Jsoup.connect(fetchUrl).get();
} catch (IOException e) {
log.error("获取页面失败", e);
throw new BusinessException(ErrorCode.OPERATION_ERROR, "获取页面失败");
}
// 解析内容
Element div = document.getElementsByClass("dgControl").first();
if (ObjUtil.isEmpty(div)) {
throw new BusinessException(ErrorCode.OPERATION_ERROR, "获取元素失败");
}
Elements imgElementList = div.select("img.mimg");
// 遍历元素依次处理上传图片
int uploadCount = 0;
for (Element imgElement : imgElementList) {
String fileUrl = imgElement.attr("src");
if (StrUtil.isBlank(fileUrl)) {
//并不是所有图片链接都是正确的
log.info("当前链接为空,已跳过:{}", fileUrl);
continue;
}
// 处理图片的地址防止转义或者和对象存储冲突的问题
// codefather.cn?yupi=dog应该只保留 codefather.cn
int questionMarkIndex = fileUrl.indexOf("?");
if (questionMarkIndex > -1) {
fileUrl = fileUrl.substring(0, questionMarkIndex);
}
// 上传图片
PictureUploadRequest pictureUploadRequest = new PictureUploadRequest();
pictureUploadRequest.setFileUrl(fileUrl);
pictureUploadRequest.setPicName(namePrefix + (uploadCount + 1));
try {
PictureVO pictureVO = this.uploadPicture(fileUrl, pictureUploadRequest, loginUser);
log.info("图片上传成功id = {}", pictureVO.getId());
uploadCount++;
} catch (Exception e) {
log.error("图片上传失败", e);
continue;
}
if (uploadCount >= count) {
break;
}
}
return uploadCount;
}
}