Commit 6b6d3fe5 authored by Nicolas Peslerbe's avatar Nicolas Peslerbe

Update of all interfaces and sw drivers, more uniform, tests to be done soon

parent 486f0336
......@@ -7,8 +7,7 @@
* Inspirations: HLS libs / OpenCV code
************/
#include "inc/blur_interface.hpp"
#include "inc/blur.hpp"
#include "inc/blurInterface.hpp"
#include <iostream>
#include "inc/mtwist.h"
#include <stdio.h>
......@@ -17,6 +16,7 @@ using namespace hls;
#include <hls_video.h>
#include <string.h>
#include "inc/CustomMat.hpp"
#define INPUT_FORMAT HLS_8UC3
//int getShift(const int baseAdress, const unsigned short imageW, const short row, const short column){
//#pragma HLS INLINE
......@@ -35,6 +35,9 @@ using namespace hls;
//#endif
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3> IMAGE_MAT_T;
void init_blur(){}
void release_blur(){}
template<typename T>
Scalar<3, ap_uint<16> > sumColumn(const Scalar<3, T> windowBuffer[MAX_BLUR_AREA_LENGTH], unsigned short blurAreaLength){
#pragma HLS inline
......@@ -141,10 +144,10 @@ void exractFlow( IMAGE_FORMAT* image, IMAGE_FORMAT* image2, unsigned short imag
void blur_ip(
IMAGE_FORMAT* image,
IMAGE_FORMAT* image2,
ap_uint<32> x,
ap_uint<32> y,
ap_uint<32> wl,
ap_uint<32> param
unsigned int x,
unsigned int y,
unsigned int wl,
unsigned int param
){
#pragma HLS INTERFACE m_axi depth=1920*1080 port=image offset=slave bundle=master
......
/*****************************************
* HLS blur function using matrix stream
*
* Developer: Nicolas Peslerbe (development@peslerbe.com)
* Version: 0.1
*
* Inspirations: HLS libs / OpenCV code
************/
#ifndef __BLUR__HPP_
#define __BLUR__HPP_
#include <hls_video.h>
#define MAX_NB_OF_FACES 256
#define NB_MEM_ELMT MAX_NB_OF_FACES / 32 + (MAX_NB_OF_FACES % 32 != 0) // Ceil the possbiel number of faces
template<int ROWS, int COLS, int T>
void blur (
hls::Mat<ROWS, COLS, T>& _src,
hls::Mat<ROWS, COLS, T>& _mask,
hls::Mat<ROWS, COLS, T>& _dst)
{
assert(_src.rows == _dst.rows);
assert(_src.cols == _dst.cols);
assert(_mask.rows == _dst.rows);
assert(_mask.cols == _dst.cols);
hls::Scalar<HLS_MAT_CN(T), HLS_TNAME(T)> pin, pin2, pout;
for(unsigned short i= 0; i<_src.rows; i++) {
for(unsigned short j= 0; j<_src.cols; j++) {
#pragma HLS pipeline rewind
_src >> pin;
_mask >> pin2;
if((unsigned long int)pin2.val[0] & 0x01){
pout.val[0] = (pin.val[0] > pin2.val[0]) ? pin.val[0] - pin2.val[0] : 0;
} else {
pout.val[0] = (pin.val[0] < 255 - pin2.val[0]) ? pin.val[0] + pin2.val[0] : 255;
}
if((unsigned long int)pin2.val[1] & 0x01){
pout.val[1] = (pin.val[1] > pin2.val[1]) ? pin.val[1] - pin2.val[1] : 0;
} else {
pout.val[1] = (pin.val[1] < 255 - pin2.val[1]) ? pin.val[1] + pin2.val[1] : 255;
}
if((unsigned long int)pin2.val[2] & 0x01){
pout.val[2] = (pin.val[2] > pin2.val[2]) ? pin.val[2] - pin2.val[2] : 0;
} else {
pout.val[2] = (pin.val[2] < 255 - pin2.val[2]) ? pin.val[2] + pin2.val[2] : 255;
}
_dst << pout;
}
}
}
#endif
......@@ -11,28 +11,25 @@
#ifndef __BLUR_INTERFACE_HPP__
#define __BLUR_INTERFACE_HPP__
#include <hls_video.h>
/** RESIZING PARAMETERS **/
#define MAX_WIDTH 1920
#define MAX_HEIGHT 1080
#define INPUT_FORMAT HLS_8UC3
#define DEFAULT_SEED 0x123
#define BLUR_MARGIN 2
#define AXI_SIZE 32
void init_blur();
void release_blur();
typedef ap_uint<AXI_SIZE> IMAGE_FORMAT;
typedef unsigned int IMAGE_FORMAT;
void blur_ip(
IMAGE_FORMAT* image,
IMAGE_FORMAT* image2,
ap_uint<32> x,
ap_uint<32> y,
ap_uint<32> wl,
ap_uint<32> param
unsigned int x,
unsigned int y,
unsigned int wl,
unsigned int param
);
#endif
#endif // __BLUR_INTERFACE_HPP__
......@@ -9,7 +9,7 @@
* Inspirations: HLS libs / OpenCV code
************/
#include "../../common/c/inc/blurDrivers.hpp"
#include "../../common/c/inc/blur.hpp"
#include <hls_opencv.h>
#include <iostream>
#include "tb_compareFiles.hpp"
......
......@@ -3,11 +3,7 @@
#include "AlignedImage.hpp"
#ifdef PETALINUX
#else
#include "../../../blur/c/inc/blur_interface.hpp"
#endif
#include "../../../blur/c/inc/blurInterface.hpp"
#define PACK(a, b) a | ((unsigned int) b) << 16;
......@@ -19,6 +15,7 @@ class BlurExt{
public:
BlurExt(unsigned short blurMargin=4, bool random=false, unsigned short seed=0x1458) :
_random(random), _seed(seed), _blurMargin(blurMargin){
init_blur();
}
......@@ -41,18 +38,14 @@ public:
unsigned int wl = PACK(src._height, src._width);
unsigned int param = (_blurMargin & 0xF) << 2 | 0 << 1 | _random | (unsigned int) _seed << 16;
#ifdef PETALINUX
#else
blur_ip(
(IMAGE_FORMAT*) src.imageData,
(IMAGE_FORMAT*) outImg->imageData,
(ap_uint<32>) x,
(ap_uint<32>) y,
(ap_uint<32>) wl,
(ap_uint<32>) param
(unsigned int) x,
(unsigned int) y,
(unsigned int) wl,
(unsigned int) param
);
#endif
return *outImg;
}
......@@ -73,18 +66,14 @@ public:
unsigned int wl = PACK(src._height, src._width);
unsigned int param = (0 & 0xF) << 2 | 1 << 1 | 0 | (unsigned int) _seed << 16;
#ifdef PETALINUX
#else
blur_ip(
(IMAGE_FORMAT*) src.imageData,
(IMAGE_FORMAT*) outImg->imageData,
(ap_uint<32>) x,
(ap_uint<32>) y,
(ap_uint<32>) wl,
(ap_uint<32>) param
(unsigned int) x,
(unsigned int) y,
(unsigned int) wl,
(unsigned int) param
);
#endif
return *outImg;
}
......@@ -101,10 +90,10 @@ public:
// void blur_ip(
// (IMAGE_FORMAT*) src.imageData,
// (IMAGE_FORMAT*) outImg.imageData,
// (ap_uint<32>) x,
// (ap_uint<32>) y,
// (ap_uint<32>) wl,
// (ap_uint<32>) param
// (unsigned int) x,
// (unsigned int) y,
// (unsigned int) wl,
// (unsigned int) param
// )
//#endif
// return outImage;
......
......@@ -39,6 +39,10 @@ typedef array<int, 3> shape_t;
class Numpy{
#ifdef PETALINUX
vector<unsigned char> allAxis{0, 1, 2};
unsigned int computeReduceSize(unsigned int inSize, unsigned char axis, bool keepdims){
......
......@@ -3,11 +3,7 @@
#include "AlignedImage.hpp"
#ifdef PETALINUX
#else
#include "../../../resize/c/inc/resize_interface.hpp"
#endif
#include "../../../resize/c/inc/resizeInterface.hpp"
class Resize{
public:
......@@ -17,11 +13,7 @@ public:
AlignedImage apply(AlignedImage& src, unsigned short newH, unsigned short newW){
AlignedImage outImage(newH, newW);
#ifdef PETALINUX
#else
resize_ip((IMAGE_FORMAT*) src.imageData,(IMAGE_FORMAT*) outImage.imageData, src._height, src._width, newH, newW);
#endif
return outImage;
}
......
......@@ -43,7 +43,7 @@ for line in rawLines:
#print(template)
destF = open("c/inc/numpyDrivers.hpp", "w")
destF = open("c/inc/numpy.hpp", "w")
destTb = open("../numpy/testbench/tb_numpy.cpp", "w")
def fromTemplate(f, template, title):
......
......@@ -7,11 +7,8 @@
#include <array>
#include <iostream>
using namespace std;
#ifdef PETALINUX
#else
#include "../../../numpy/c/inc/numpy.hpp"
#endif
#include "../../../numpy/c/inc/numpyInterface.hpp"
#define ALL_EL (int) 0x80008000
#define SING(s) s | ((int) 0x8001 << 16)
......@@ -37,8 +34,9 @@ typedef array<int, 3> shape_t;
#SEC "class_header"
class Numpy{
vector<unsigned char> allAxis{0, 1, 2};
XResize_ip numpyObj;
unsigned int computeReduceSize(unsigned int inSize, unsigned char axis, bool keepdims){
if(keepdims) return NEW_SHAPE(R_SHAPE(0, inSize), R_SHAPE(1, inSize), R_SHAPE(2, inSize));
......@@ -62,13 +60,14 @@ class Numpy{
public:
Numpy(){
// init numpy drivers here
}
#SEC "class_bottom"
~Numpy(){}
~Numpy(){
}
};
......
#ifndef __NUMPY_HPP__
#define __NUMPY_HPP__
#include <hls_video.h>
#define GET(s, MASK, SHIFT) (s >> SHIFT) & MASK
#define SET(s, NB_BITS, MASK, SHIFT) ((ap_uint<NB_BITS>) s & MASK) << SHIFT
......@@ -86,15 +85,17 @@
typedef int UNIT_INT_T;
typedef float UNIT_T;
// Bool array should be unsigned int
#ifndef PETALINUX
UNIT_INT_T numpyTop( ap_uint<32> instructionReg,
UNIT_T *dst_mem, UNIT_INT_T *dst_int_mem, ap_uint<32> dst_sz,
UNIT_T *src1_mem, UNIT_INT_T *src1_int_mem, ap_uint<32> src1_sz,
UNIT_T *src2_mem, UNIT_INT_T *src2_int_mem, ap_uint<32> src2_sz,
UNIT_INT_T *param_int_mem, ap_uint<32> src_bool_sz,
void init_numpy();
void release_numpy();
UNIT_INT_T numpyTop( unsigned int instructionReg,
UNIT_T *dst_mem, UNIT_INT_T *dst_int_mem, unsigned int dst_sz,
UNIT_T *src1_mem, UNIT_INT_T *src1_int_mem, unsigned int src1_sz,
UNIT_T *src2_mem, UNIT_INT_T *src2_int_mem, unsigned int src2_sz,
UNIT_INT_T *param_int_mem, unsigned int src_bool_sz,
UNIT_T immVal1, UNIT_T immVal2,
UNIT_INT_T immVal_int1, UNIT_INT_T immVal_int2,
ap_uint<32> param0, ap_uint<32> param1);
#endif
unsigned int param0, unsigned int param1);
#endif
#include "inc/Hwtensor.hpp"
#include "inc/numpy.hpp"
#include "inc/numpyInterface.hpp"
#include <iostream>
#include "inc/operations.hpp"
#include <hls_video.h>
typedef Hwtensor<UNIT_T> TENSOR_T;
#define BUFFER_T1_S 16384
#define BUFFER_SORT_S 4096
using namespace std;
void init_numpy(){}
void release_numpy(){}
template <typename T>
void type0_op_flow( ap_uint<6> instructionReg,
......@@ -83,8 +89,8 @@ void type1_op_flow( ap_uint<6> instructionReg,
bool axis1 = (param & 0b010);
bool axis0 = (param & 0b001);
T buffer[32768];
UNIT_INT_T buffer_int[32768];
T buffer[BUFFER_T1_S];
UNIT_INT_T buffer_int[BUFFER_T1_S];
int currentPos = 0;
......@@ -180,8 +186,8 @@ UNIT_INT_T type2_op_flow( ap_uint<6> instructionReg,
{
bool descending = param0 & 0b1;
int totalL = SIZE(src1_sz);
T buffer[8192];
int buffer_int[8192];
T buffer[BUFFER_SORT_S];
int buffer_int[BUFFER_SORT_S];
for (int c = 0 ; c < totalL; c++){
#pragma HLS pipeline
buffer[c] = src1_mem[c];
......@@ -189,9 +195,9 @@ UNIT_INT_T type2_op_flow( ap_uint<6> instructionReg,
}
for (int c = 0 ; c < totalL - 1; c++){
#pragma HLS pipeline
#pragma HLS pipeline
for (int d = 0 ; d < totalL - c - 1; d++){
#pragma HLS pipeline
#pragma HLS pipeline
if ((!descending && buffer[d] > buffer[d+1]) || (descending && buffer[d] < buffer[d+1])){
T swap = buffer[d];
buffer[d] = buffer[d+1];
......@@ -216,8 +222,9 @@ UNIT_INT_T type2_op_flow( ap_uint<6> instructionReg,
for(int i = SHAPE0(min); i < SHAPE0(max); i++){
#pragma HLS pipeline
for(int j = SHAPE1(min); j < SHAPE1(max); j++){
#pragma HLS pipeline
#pragma HLS pipeline
for(int k = SHAPE2(min); k < SHAPE2(max); k++){
#pragma HLS pipeline
dst_mem[wPtr++] = src1_mem[k + SHAPE2(src1_sz) * j + SHAPE1(src1_sz) * SHAPE2(src1_sz) * i];
}
}
......@@ -383,14 +390,14 @@ UNIT_INT_T type2_op_flow( ap_uint<6> instructionReg,
return retVal;
}
UNIT_INT_T numpyTop( ap_uint<32> instructionReg,
UNIT_T *dst_mem, UNIT_INT_T *dst_int_mem, ap_uint<32> dst_sz,
UNIT_T *src1_mem, UNIT_INT_T *src1_int_mem, ap_uint<32> src1_sz,
UNIT_T *src2_mem, UNIT_INT_T *src2_int_mem, ap_uint<32> src2_sz,
UNIT_INT_T *param_int_mem, ap_uint<32> src_bool_sz,
UNIT_INT_T numpyTop( unsigned int instructionReg,
UNIT_T *dst_mem, UNIT_INT_T *dst_int_mem, unsigned int dst_sz,
UNIT_T *src1_mem, UNIT_INT_T *src1_int_mem, unsigned int src1_sz,
UNIT_T *src2_mem, UNIT_INT_T *src2_int_mem, unsigned int src2_sz,
UNIT_INT_T *param_int_mem, unsigned int src_bool_sz,
UNIT_T immVal1, UNIT_T immVal2,
UNIT_INT_T immVal_int1, UNIT_INT_T immVal_int2,
ap_uint<32> param0, ap_uint<32> param1)
unsigned int param0, unsigned int param1)
{
UNIT_INT_T retVal = 0;
#pragma HLS INTERFACE s_axilite port=dst_sz
......@@ -406,13 +413,13 @@ UNIT_INT_T numpyTop( ap_uint<32> instructionReg,
#pragma HLS INTERFACE s_axilite port=instructionReg
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE m_axi depth=3000 port=param_int_mem
#pragma HLS INTERFACE m_axi depth=3000 port=dst_mem
#pragma HLS INTERFACE m_axi depth=3000 port=src1_mem
#pragma HLS INTERFACE m_axi depth=3000 port=src2_mem
#pragma HLS INTERFACE m_axi depth=3000 port=dst_int_mem
#pragma HLS INTERFACE m_axi depth=3000 port=src1_int_mem
#pragma HLS INTERFACE m_axi depth=3000 port=src2_int_mem
#pragma HLS INTERFACE m_axi depth=5000 port=param_int_mem bundle=master
#pragma HLS INTERFACE m_axi depth=5000 port=dst_mem bundle=master
#pragma HLS INTERFACE m_axi depth=5000 port=src1_mem bundle=master
#pragma HLS INTERFACE m_axi depth=5000 port=src2_mem bundle=master
#pragma HLS INTERFACE m_axi depth=5000 port=dst_int_mem bundle=master
#pragma HLS INTERFACE m_axi depth=5000 port=src1_int_mem bundle=master
#pragma HLS INTERFACE m_axi depth=5000 port=src2_int_mem bundle=master
switch (GET_MTYPE(instructionReg)){
case FLOAT32:
switch(GET_OPTYPE(instructionReg))
......
/*****************************************
* Test file for the hardware resizing module
* The test is compatible with the stream or DMA like version of the module
* OpenCV and hls_video are required to make the module work
*
* Developer: Nicolas Peslerbe (development@peslerbe.com)
* Version: 0.1
*
* Inspirations: HLS libs / OpenCV code
************/
#include "csvreader.hpp"
#include "../../common/c/inc/numpyDrivers.hpp"
#include <iostream>
using namespace std;
template <typename T>
void printm(const Tensor <T>& tensor){
cout << "Print matrix of sizes: " << tensor.getDim(0) << "," << tensor.getDim(1) << "," << tensor.getDim(2) << endl;
for(int i = 0; i < tensor.getDim(0); i++){
for(int j = 0; j < tensor.getDim(1); j++){
cout << " [";
for(int k = 0; k < tensor.getDim(2); k++){
cout << tensor.get(i, j, k) << " ";
}
cout << "] ";
}
cout << endl;
}
}
typedef float NUMPY_TYPE;
int main (int argc, char** argv) {
Tensor<NUMPY_TYPE> matrix = parse2DCsvFile<NUMPY_TYPE>("float32_reduce_min/original_0.txt");
printm(matrix);
Numpy np;
matrix = np.reduce_max(matrix, 0);
Tensor<NUMPY_TYPE> matrix2 = parse2DCsvFile<NUMPY_TYPE>("float32_reduce_min/out_{'axis': 0}.txt");
printm(matrix-matrix2);
return 0;
}
/*****************************************
* HLS resize function using matrix stream
*
* Developer: Nicolas Peslerbe (development@peslerbe.com)
* Version: 0.1
*
* Inspirations: HLS libs / OpenCV code
************/
#ifndef __RESIZE__HPP_
#define __RESIZE__HPP_
#include <hls_video.h>
template <int ROWS, int COLS, int SRC_T, int DROWS, int DCOLS>
void resize(hls::Mat<ROWS, COLS, SRC_T>& src, hls::Mat<DROWS, DCOLS, SRC_T>& dst){
#pragma HLS inline
hls::Resize( src, dst );
}
#endif
......@@ -11,54 +11,36 @@
#ifndef __RESIZE_INTERFACE_HPP__
#define __RESIZE_INTERFACE_HPP__
#include <hls_video.h>
/** RESIZING PARAMETERS **/
#define MAX_W 1920
#define MAX_H 1080
#define INPUT_FORMAT HLS_8UC3
void init_resize();
void release_resize();
typedef hls::Mat<MAX_H, MAX_W, INPUT_FORMAT> IMAGE_MAT_T;
#ifdef PETALINUX
/** INTERFACES WITH THE OUTSIDE **/
typedef u32 IMAGE_FORMAT;
#define INPUT_DDR
#define OUTPUT_DDR
#define AXI_SIZE 32
#else
#define AXI_SIZE 32
#define INPUT_FORMAT HLS_8UC3
#include <hls_video.h>
typedef hls::Mat<MAX_H, MAX_W, INPUT_FORMAT> IMAGE_MAT_T;
typedef ap_uint<AXI_SIZE> IMAGE_FORMAT;
// Either you use DDR mem base adress as input and the module reads it or you use an AXI_stream
#ifndef INPUT_DDR
#define IN_STREAM_W 32
typedef hls::stream<ap_axiu<IN_STREAM_W,1,1,1> > axiins;
#endif
// Either you use DDR mem base adress as output and the module reads it or you use an AXI_stream
#ifndef OUTPUT_DDR
#define OUT_STREAM_W 32
typedef hls::stream<ap_axiu<OUT_STREAM_W,1,1,1> > axiouts;
#endif
void resize_ip(
#ifdef INPUT_DDR
IMAGE_FORMAT* image,
#else
axiins& INPUT_STREAM,
#endif
#ifdef OUTPUT_DDR
IMAGE_FORMAT* new_image,
#else
axiouts& OUTPUT_STREAM,
#endif
unsigned short srcH,
unsigned short srcW,
unsigned short destH,
unsigned short destW
);
#endif
#endif // __RESIZE_INTERFACE_HPP__
......@@ -7,23 +7,25 @@
* Inspirations: HLS libs / OpenCV code
************/
#include "inc/resize_interface.hpp"
#include "inc/resize.hpp"
#include "inc/resizeInterface.hpp"
#include <hls_video.h>
#include <iostream>
using namespace std;
void init_resize(){}
void release_resize(){}
template <int ROWS, int COLS, int SRC_T, int DROWS, int DCOLS>
void resize(hls::Mat<ROWS, COLS, SRC_T>& src, hls::Mat<DROWS, DCOLS, SRC_T>& dst){
#pragma HLS inline
hls::Resize( src, dst );
}
void resize_ip(
#ifdef INPUT_DDR
IMAGE_FORMAT* image,
#else
axiins& INPUT_STREAM,
#endif
#ifdef OUTPUT_DDR
IMAGE_FORMAT* new_image,
#else
axiouts& OUTPUT_STREAM,
#endif
unsigned short srcH,
unsigned short srcW,
unsigned short destH,
......@@ -39,21 +41,11 @@ void resize_ip(
IMAGE_MAT_T img_in(srcH, srcW);
IMAGE_MAT_T img_out(destH, destW);
#ifdef INPUT_DDR
#pragma HLS INTERFACE m_axi depth=1920*1080 port=image offset=slave
hls::AXIM2Mat<MAX_W>(image, srcW, img_in);
#else
#pragma HLS INTERFACE axis register both port=INPUT_STREAM bundle=VIDEO_IN
hls::AXIvideo2Mat(INPUT_STREAM, img_in);
#endif
resize(img_in, img_out);
#ifdef OUTPUT_DDR
#pragma HLS INTERFACE m_axi depth=1920*1080 port=new_image offset=slave
hls::Mat2AXIM<MAX_W>(img_out, new_image, destW);
#else
#pragma HLS INTERFACE axis register both port=OUTPUT_STREAM bundle=VIDEO_OUT
hls::Mat2AXIvideo(img_out, OUTPUT_STREAM);
#endif