[原创] #AI挑战营终点站#RV1106手写数字识别部署

和和123 2024-6-9 15:15 楼主

#AI挑战营终点站#RV1106手写数字识别部署

下载和解压手写工具
参考：
https://wiki.luckfox.com/zh/Luckfox-Pico/Luckfox-Pico-quick-start
安装驱动：
下载：
https://files.luckfox.com/wiki/Luckfox-Pico/Software/DriverAssitant_v5.12.zip
安装：

安装烧写工具：
下载：
https://files.luckfox.com/wiki/Luckfox-Pico/Software/SocToolKit.zip
安装：
解压后执行

选择RV1106
下载固件库方法
① 将 SD 卡装在读卡器并连接到电脑，选择 SD 卡工具。
② 在 USB 磁盘下会显示 SD 卡大小，如果未显示重新插拔读卡器。
③ 选择 SD卡启动。
④ 导入启动文件。（注意：启动文件不包括update.img）
⑤点击创建 SD 卡。
将烧写好的sd卡插入板子，上电后通过adb登入

成功识别到摄像头会生成rkipc.ini文件
通过VLC拉流，查看摄像头内容（rtsp://172.32.0.93/live/0）
设置电脑ip：
查看板子ip信息并将电脑ip设置为同网段。

设置VLC：
配置开发环境
参考
https://wiki.luckfox.com/zh/Luckfox-Pico/Luckfox-Pico-SDK
https://wiki.luckfox.com/zh/Luckfox-Pico/Luckfox-Pico-RKNN-Test
https://github.com/luckfox-eng29/luckfox_pico_rtsp_opencv
下载源码：
git clone git@github.com:luckfox-eng29/luckfox_pico_rtsp_opencv.git
编译：
```
export LUCKFOX_SDK_PATH=<Your Luckfox-pico Sdk Path>
mkdir build
cd build
cmake ..
make && make install
```
将编译生成的文件上传板子，内容如图：

运行前请关闭系统默认的rkipc程序，路径如下：
在上面基础上增加图像获取和预处理

图像获取

使用RK提供的函数，从指定通道获取一帧图片`

s32Ret = RK_MPI_VPSS_GetChnFrame(0, 0, &stVpssFrame, -1);
if (s32Ret == RK_SUCCESS)
{
    void *data = RK_MPI_MB_Handle2VirAddr(stVpssFrame.stVFrame.pMbBlk);
    ...
    ...
    ...
 }

预处理部分

// 在图像中找到数字的轮廓，同时减小找到轮廓时的抖动
cv::Rect find_digit_contour(const cv::Mat &image) {

    // 预处理图像
    cv::Mat gray, blurred, edged;
    cv::cvtColor(image, gray, cv::COLOR_BGR2GRAY);
    cv::GaussianBlur(gray, blurred, cv::Size(5, 5), 0);
    cv::Canny(blurred, edged, 30, 150);

    // 应用形态学操作
    cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(5, 5));
    cv::dilate(edged, edged, kernel);
    cv::erode(edged, edged, kernel);

    // 查找轮廓，声明一个变量来存储轮廓
    std::vector<std::vector<cv::Point>> contours;
    cv::findContours(edged, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);

    if (contours.empty()) {
        return cv::Rect();
    }

    // 找到最大的轮廓
    auto largest_contour = std::max_element(contours.begin(), contours.end(),
                                            [](const std::vector<cv::Point>& a, const std::vector<cv::Point>& b) {
                                                return cv::contourArea(a) < cv::contourArea(b);
                                            });

    //    **轮廓面积过滤**：在找到轮廓之后，可以排除那些面积过小的轮廓。这样可以减少不必要的小轮廓对整体结果的影响。
    if (cv::contourArea(*largest_contour) < 10) {
        return cv::Rect();
    }

    // **轮廓形状过滤**：除了面积外，还可以考虑其他形状特征，如轮廓宽高比。这样可以排除一些不规则的轮廓，从而提高准确性。
    cv::Rect bounding_box = cv::boundingRect(*largest_contour);
    float aspect_ratio = static_cast<float>(bounding_box.width) / bounding_box.height;
    if (aspect_ratio < 0.2 || aspect_ratio > 3) {
        return cv::Rect();
    }

    // **轮廓稳定性检测**：
    // 通过比较当前帧和之前几帧的轮廓位置来判断轮廓的稳定性。
    // 如果多帧之间的轮廓位置变化较小，则可以认为轮廓比较稳定，不需要进行过多的调整。
    static std::vector<cv::Rect> prev_bounding_boxes;
    if (prev_bounding_boxes.size() > 5) {
        prev_bounding_boxes.erase(prev_bounding_boxes.begin());
    }
    prev_bounding_boxes.push_back(bounding_box);
    if (prev_bounding_boxes.size() == 5) {
        float avg_width = 0.0;
        float avg_height = 0.0;
        for (const auto& box : prev_bounding_boxes) {
            avg_width += box.width;
            avg_height += box.height;
        }
        avg_width /= prev_bounding_boxes.size();
        avg_height /= prev_bounding_boxes.size();
        float width_diff = std::abs(bounding_box.width - avg_width) / avg_width;
        float height_diff = std::abs(bounding_box.height - avg_height) / avg_height;
        if (width_diff > 0.1 || height_diff > 0.1) {
            return cv::Rect();
        }
    }
    // 对图像边框每个方向扩大15个像素
    bounding_box.x = std::max(0, bounding_box.x - 15);
    bounding_box.y = std::max(0, bounding_box.y - 15);
    bounding_box.width = std::min(image.cols - bounding_box.x, bounding_box.width + 30);
    bounding_box.height = std::min(image.rows - bounding_box.y, bounding_box.height + 30);

    // 返回最大轮廓的边界框
    return bounding_box;
}

// 预处理数字区域
cv::Mat preprocess_digit_region(const cv::Mat ®ion)
{
    // 将图像转换为灰度图像，然后调整大小为28x28，最后将像素值归一化为0到1之间的浮点数
    cv::Mat gray, resized, bitwized, normalized;
    cv::cvtColor(region, gray, cv::COLOR_BGR2GRAY);

    // 扩大图像中的数字轮廓，使其更容易识别
    cv::threshold(gray, gray, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);

    // 调整图像颜色，将图像颜色中低于127的像素值设置为0，高于200的像素值设置为255
    cv::threshold(gray, gray, 127, 255, cv::THRESH_BINARY_INV);

    // 对图像黑白进行反转，黑色变成白色，白色变成黑色
    cv::bitwise_not(gray, bitwized);
    // 手动实现黑白反转
    for (int i = 0; i < bitwized.rows; i++)
    {
        for (int j = 0; j < bitwized.cols; j++)
        {
            bitwized.at<uchar>(i, j) = 255 - bitwized.at<uchar>(i, j);
        }
    }

    // 将图片大小调整为28x28，图片形状不发生畸变，过短的部分使用黑色填充
    cv::resize(bitwized, resized, cv::Size(28, 28), 0, 0, cv::INTER_AREA);


    return resized;
}

后处理部分

// 将模型输出进行归一化，并计算输出的概率分布
// Parameters:
//   output_attrs: 输出张量属性，包含了零点（zero point）值和缩放因子等信息
//   output: 模型输出的数据，以INT8格式存储
//   out_fp32: 存储归一化后的浮点数输出数据
static void output_normalization(rknn_tensor_attr* output_attrs, uint8_t *output, float *out_fp32)
{
    int32_t zp =  output_attrs->zp;
    float scale = output_attrs->scale;

    // 将INT8格式的输出数据进行反量化为浮点数，并进行存储
    for(int i = 0; i < 10; i ++)
        out_fp32<i> = deqnt_affine_to_f32(output<i>,zp,scale);

    // 计算输出数据的L2范数
    float sum = 0;
    for(int i = 0; i < 10; i++)
        sum += out_fp32<i> * out_fp32<i>;

    // 对归一化后的浮点数输出进行归一化处理，确保输出数据的范围在[0,1]之间
    float norm = sqrt(sum);
    for(int i = 0; i < 10; i++)
        out_fp32<i> /= norm; 

    // 打印输出数据的值
    printf("\n===================Output data values:===================\n");
    for (int i = 0; i < 10; ++i)
    {
        printf("%f ", out_fp32<i>);
    }
    printf("\n");

    // 找出最大概率对应的数字，并记录最大概率及其对应的数字
    float max_prob = -1.0;
    int predicted_digit = -1;
    // 计算最大值的索引
    for (int i = 0; i < 10; ++i)
    {
        if (out_fp32<i> > max_prob)
        {
            max_prob = out_fp32<i>;
            predicted_digit = i;
        }
    }
    // 将预测的数字及其对应的概率记录到队列中
    predictions_queue.push_back({predicted_digit, max_prob});

    // 打印预测的数字与其对应的概率
    printf("========Predicted digit: %d, Probability: %.2f========\n\n", predicted_digit, max_prob);
}

结果：

本帖最后由和和123 于 2024-6-9 15:20 编辑

mnist数字识别落地.zip (2024-6-9 15:19 上传)

1.01 MB, 下载次数: 1

回复评论（1）

沙发 hjh0512

有用这个的开发板和配套教程没有，如果要是有的话，就好学习了。

点赞 2024-6-9 21:25

[原创] #AI挑战营终点站#RV1106手写数字识别部署

回复评论 （1）

沙发 hjh0512

回复评论（1）