Commit f7cbc131 authored by Nicolas Peslerbe's avatar Nicolas Peslerbe

New version of numpy with more functions

parent 6511821a
......@@ -153,6 +153,8 @@ void blur_ip(
#pragma HLS INTERFACE s_axilite port=return bundle=control
#pragma HLS INTERFACE s_axilite port=x bundle=control
#pragma HLS INTERFACE s_axilite port=y bundle=control
#pragma HLS INTERFACE s_axilite port=wl bundle=control
#pragma HLS INTERFACE s_axilite port=param bundle=control
bool random = (unsigned short) param & 0b1;
bool extractFace = (unsigned short) (param >> 1) & 0b1;
......
#ifndef __BLUR_DRIVERS_HPP__
#define __BLUR_DRIVERS_HPP__
#endif
\ No newline at end of file
......@@ -13,120 +13,190 @@
void operator=(const TypeName&) = delete
using namespace std;
#include <iostream>
#include "size.hpp"
#include "shape_util.hpp"
#include <typeinfo>
#include <cassert>
template <typename T>
class TensorObj{
private:
DISALLOW_COPY_AND_ASSIGN(TensorObj<T>);
public:
T* tensorData;
unsigned int size;
DISALLOW_COPY_AND_ASSIGN(TensorObj<T>);
T* _tensorData;
unsigned int _shape;
#ifdef PETALINUX
void* tensorDataPhys;
void* _tensorDataPhys;
#endif
unsigned int getPackedSize(){
return size;
void _reshape(unsigned int newShape, bool force=false){
if(!force) assert(SIZE(newShape) == size());
_shape = newShape;
}
unsigned int getSize(){
return GET_TOTAL_ELEMENTS(size);
public:
unsigned int packedShape() const{
return _shape;
}
unsigned int size(){
return SIZE(_shape);
}
unsigned short shape(const int shape) const{
return SHAPE(_shape, shape);
}
array<int, 3> shape(){
array<int, 3> shapeTemp{SHAPE0(_shape), SHAPE1(_shape), SHAPE2(_shape)};
return shapeTemp;
}
int nbDims(){
for(int i=2; i >= 0; i--){
if(shape(i) != 1){
return (i+1);
}
}
return 0;
}
/**
* In place reshape functions
**/
void reshape(unsigned short shape0, unsigned short shape1=1, unsigned short shape2=1){
unsigned int newShape = NEW_SHAPE(shape0, shape1, shape2);
_reshape(newShape);
}
template <size_t NB_EL>
unsigned char reshape( array<int, NB_EL>& arr, bool force=false){
unsigned short shape[3];
for(unsigned short i = 0; i < 3; i++){
if (i < arr.size())
shape[i] = arr[i];
else
shape[i] = 1;
}
_reshape(NEW_SHAPE(shape[0], shape[1], shape[2]), force);
}
/**
* In place dim expansion functions
**/
unsigned int _expand_dim(unsigned short axis){
assert(SHAPE(2) == 1);
short shape[3];
int pointer = 0;
for(int i=0; i < 3; i++){
if(axis == i){
shape[pointer] = 1;
pointer++;
}
shape[pointer] = SHAPE(i);
pointer++;
}
return NEW_SHAPE(shape[0], shape[1], shape[2]);
}
void expand_dim(unsigned short axis){
_reshape(_expand_dim(axis));
}
/**
* Getter and setter to access tensor elements, with full index or as a flattened tensor
**/
T get(unsigned short i, unsigned short j=0, unsigned short k=0) const{
return getDataPointer()[i * getDim(1) * getDim(2) + j * getDim(2) + k];
return getDataPointer()[i * SHAPE(1) * SHAPE(2) + j * SHAPE(2) + k];
}
void set(T value, unsigned short i, unsigned short j=0, unsigned short k=0){
getDataPointer()[i * getDim(1) * getDim(2) + j * getDim(2) + k] = value;
getDataPointer()[i * SHAPE(1) * SHAPE(2) + j * SHAPE(2) + k] = value;
}
void setLin(T value, unsigned int i){
void setFlat(T value, unsigned int i){
getDataPointer()[i] = value;
}
T getLin(unsigned int i) const{
T getFlat(unsigned int i) const{
return getDataPointer()[i];
}
/**
* Tensor initializers
**/
unsigned short getDim(const int dim) const{
switch(dim){
case 0:
return GET_DIM0(size);
break;
case 1:
return GET_DIM1(size);
break;
case 2:
return GET_DIM2(size);
break;
default:
return -1;
}
}
static std::unique_ptr<TensorObj<T> > initDim( const unsigned short shape0=1,
const unsigned short shape1=1,
const unsigned short shape2=1){
unsigned int totalSize() const{
return GET_TOTAL_ELEMENTS(size);
}
assert(sizeof(T) == 4 && "Only 32 bits types supported atm");
static std::unique_ptr<TensorObj<T> > initDim(const unsigned short dim0=1,
const unsigned short dim1=1,
const unsigned short dim2=1){
assert(sizeof(T) == 4);
unsigned int tempSize = NEW_SIZE(dim0, dim1, dim2);
return make_unique<TensorObj>(tempSize);
unsigned int tempShape = NEW_SHAPE(shape0, shape1, shape2);
return make_unique<TensorObj>(tempShape);
}
static std::unique_ptr<TensorObj<T> > init(const unsigned int newSize){
return make_unique<TensorObj>(newSize);
static std::unique_ptr<TensorObj<T> > init(const unsigned int newShape){
return make_unique<TensorObj>(newShape);
}
TensorObj(const unsigned int newSize){
TensorObj(const unsigned int newShape){
#ifdef PETALINUX
tensorData = mm.newShared(GET_TOTAL_ELEMENTS(newSize)*sizeof(T), &tensorDataPhys);
_tensorData = mm.newShared(SIZE(newShape)*sizeof(T), &_tensorDataPhys);
#else
tensorData = new T[GET_TOTAL_ELEMENTS(newSize)];
_tensorData = new T[SIZE(newShape)];
#endif
size = newSize;
//cout << "Tensor of size: " << getDim(0) << "," << getDim(1) << "," << getDim(2) << " initialized. Alloc: " << GET_TOTAL_ELEMENTS(newSize) << endl;
shape = newShape;
//cout << "Tensor of size: " << SHAPE(0) << "," << SHAPE(1) << "," << SHAPE(2) << " initialized. Alloc: " << SIZE(newShape) << endl;
}
public:
/**
* Data direct access
**/
T* getDataPointer() const{
return tensorData;
return _tensorData;
}
#ifdef PETALINUX
u32* getHwTensor(){ return (u32*) imageDataPhys;}
#else
T* getHwTensor(){ return tensorData;}
T* getHwTensor(){ return _tensorData;}
#endif
~TensorObj(){
#ifdef PETALINUX
mm.dealloc(tensorData);
mm.dealloc(_tensorData);
#else
delete[] tensorData;
delete[] _tensorData;
#endif
}
std::unique_ptr<TensorObj<T> > operator-(std::unique_ptr<TensorObj<T> >& second) {
assert(this->getSize() == second->getSize());
std::unique_ptr<TensorObj<T> > out = TensorObj<T>::init(this->getPackedSize());
for (int i=0; i < this->getSize(); i++){
assert(this->size() == second->size());
std::unique_ptr<TensorObj<T> > out = TensorObj<T>::init(this->getPackedShape());
for (int i=0; i < this->size(); i++){
out->setLin(this->getLin(i) - second->getLin(i), i);
}
return out;
}
bool assertWith(std::unique_ptr<TensorObj<T> >& second, bool large=false) {
assert(this->getSize() == second->getSize());
assert(this->size() == second->size());
int counter=0;
for (int i=0; i < this->getSize(); i++){
for (int i=0; i < this->size(); i++){
if((this->getLin(i) - second->getLin(i)) > 0.0025){
if(counter > 20 || ((this->getLin(i) - second->getLin(i) != 0x7FFFFFFF) && !large))
......@@ -146,15 +216,15 @@ template <typename T>
using Tensor = std::unique_ptr<TensorObj<T> >;
template <typename T>
Tensor<T> newTensor(const unsigned int newSize){
return TensorObj<T>::init(newSize);
Tensor<T> newTensor(const unsigned int newShape){
return TensorObj<T>::init(newShape);
}
template <typename T>
Tensor<T> newTensorDim(const unsigned short dim0=1,
const unsigned short dim1=1,
const unsigned short dim2=1){
return TensorObj<T>::initDim(dim0, dim1, dim2);
Tensor<T> newTensorDim(const unsigned short shape0=1,
const unsigned short shape1=1,
const unsigned short shape2=1){
return TensorObj<T>::initDim(shape0, shape1, shape2);
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -17,10 +17,10 @@ using namespace std;
template <size_t NB_EL> \
Tensor<float> reduce_##s (Tensor<float>& a, array<unsigned char, NB_EL> axis, bool keepdims=false){ \
unsigned char axisBool = axisarrayToAxisBool(axis); \
unsigned int newSize = computeReduceSize(a.getPackedSize(), axisBool, keepdims); \
unsigned int newSize = computeReduceSize(a.getPackedShape(), axisBool, keepdims); \
Tensor<float> out(newSize); \
/*numpyTop(SET_INSTRUCTION(op, OP_WITH_BUFFER, FLOAT32), out.getHwTensor(), NULL, out.getPackedSize(), \
a.getHwTensor(), NULL, a.getPackedSize(), \
/*numpyTop(SET_INSTRUCTION(op, OP_WITH_BUFFER, FLOAT32), out.getHwTensor(), NULL, out.getPackedShape(), \
a.getHwTensor(), NULL, a.getPackedShape(), \
NULL, NULL, 0, \
NULL, 0, \
0., 0., 0, 0, axisBool, 0); */ \
......@@ -29,10 +29,10 @@ using namespace std;
template <size_t NB_EL> \
Tensor<int> reduce_##s (Tensor<int>& a, array<unsigned char, NB_EL> axis, bool keepdims=false){ \
unsigned char axisBool = axisarrayToAxisBool(axis); \
unsigned int newSize = computeReduceSize(a.getPackedSize(), axisBool, keepdims); \
unsigned int newSize = computeReduceSize(a.getPackedShape(), axisBool, keepdims); \
Tensor<int> out(newSize); \
/*numpyTop(SET_INSTRUCTION(op, OP_WITH_BUFFER, INT32), out.getHwTensor(), NULL, out.getPackedSize(), \
a.getHwTensor(), NULL, a.getPackedSize(), \
/*numpyTop(SET_INSTRUCTION(op, OP_WITH_BUFFER, INT32), out.getHwTensor(), NULL, out.getPackedShape(), \
a.getHwTensor(), NULL, a.getPackedShape(), \
NULL, NULL, 0, \
NULL, 0, \
0., 0., 0, 0, axisBool, 0); */ \
......@@ -56,13 +56,13 @@ public:
unsigned int computeReduceSize(unsigned int inSize, unsigned char axis, bool keepdims){
cout << "New size: " <<(unsigned int)axis << endl;
if(keepdims) return NEW_SIZE(R_DIM(0, inSize), R_DIM(1, inSize), R_DIM(2, inSize));
if(keepdims) return NEW_SHAPE(R_DIM(0, inSize), R_DIM(1, inSize), R_DIM(2, inSize));
unsigned char i = 0;
unsigned short newDims[3] = {1, 1, 1};
if(R_DIM(0, inSize) != 1){newDims[i] = R_DIM(0, inSize);i++;}
if(R_DIM(1, inSize) != 1){newDims[i] = R_DIM(1, inSize);i++;}
if(R_DIM(2, inSize) != 1){newDims[i] = R_DIM(2, inSize);}
return NEW_SIZE(newDims[0], newDims[1], newDims[2]);
return NEW_SHAPE(newDims[0], newDims[1], newDims[2]);
}
template <size_t NB_EL>
......
#ifndef __SHAPE_UTIL_HPP__
#define __SHAPE_UTIL_HPP__
// Dimensions are defined as follow:
// dim 0 : 13 bits, dim 1: 12 bits, dim 2 : 7 bits
#define SHAPE0(s) ((unsigned short) (s & 0xFFF))
#define SHAPE1(s) ((unsigned short) ((s >> 12) & 0xFFF))
#define SHAPE2(s) ((unsigned short) ((s >> 24) & 0xFF))
#define SHAPE(i, s) SHAPE##i (s)
unsigned short getShape(const unsigned int fullShape, const unsigned short axis){
switch(axis){
case 0:
return SHAPE0(fullShape);
case 1:
return SHAPE1(fullShape);
case 2:
return SHAPE2(fullShape);
default:
return -1;
}
}
#define S_SHAPE0(s) ((unsigned int) s & 0xFFF)
#define S_SHAPE1(s) ((unsigned int) (s & 0xFFF) << 12)
#define S_SHAPE2(s) ((unsigned int) (s & 0xFF ) << 24)
#define SIZE(s) ((unsigned int) SHAPE0(s) * SHAPE1(s) * SHAPE2(s))
#define NEW_SHAPE(a, b, c) (S_SHAPE0(a) | S_SHAPE1(b) | S_SHAPE2(c))
#endif
#ifndef __SIZE_HPP__
#define __SIZE_HPP__
// Dimensions are defined as follow:
// dim 0 : 13 bits, dim 1: 12 bits, dim 2 : 7 bits
#define GET_DIM0(s) ((unsigned short) (s & 0xFFF))
#define GET_DIM1(s) ((unsigned short) ((s >> 12) & 0xFFF))
#define GET_DIM2(s) ((unsigned short) ((s >> 24) & 0xFF))
#define GET_DIM(i, s) GET_DIM##i (s)
#define SET_DIM0(s) ((unsigned int) s & 0xFFF)
#define SET_DIM1(s) ((unsigned int) (s & 0xFFF) << 12)
#define SET_DIM2(s) ((unsigned int) (s & 0xFF ) << 24)
#define GET_TOTAL_ELEMENTS(s) ((unsigned int) GET_DIM0(s) * GET_DIM1(s) * GET_DIM2(s))
#define NEW_SIZE(a, b, c) (SET_DIM0(a) | SET_DIM1(b) | SET_DIM2(c))
#endif
......@@ -325,7 +325,7 @@ functions = [{
{
"name": "reduce_prod",
"template": ["Mt_Mt_red"],
"tb" : ["no"]
"tb": ["no"]
},
{
"name": "reduce_any",
......@@ -336,6 +336,44 @@ functions = [{
"name": "reduce_all",
"template": ["Mt_Mt_red"],
"only": ["int"]
},
{
"name": "concatenate",
"template": ["concatenate"],
"op_name": "CONCAT"
},
{
"name": "sort",
"template": ["sort"]
},
{
"name": "extract",
"template": ["extract"]
},
{
"name": "transpose",
"template": ["transpose"]
},
{
"name": "matmul",
"template": ["matmul"]
},
{
"name": "range",
"template": ["range"]
},
{
"name": "boolean_mask",
"template": ["boolean_mask"]
},
{
"name": "broadcast_to",
"op_name": "BROADCAST",
"template": ["broadcast_to"]
},
{
"name": "gather",
"template": ["gather"]
}]
destF.write("// Auto generated interface driver for hardware numpy\n")
......
This diff is collapsed.
#ifndef __TENSOR_HPP__
#define __TENSOR_HPP__
#include "../../../common/c/inc/size.hpp"
#include "../../../common/c/inc/shape_util.hpp"
#include <hls_video.h>
/* Template class of Hwtensor */
......@@ -72,7 +72,7 @@ bool Hwtensor<T>::empty() {
template <typename FB_T, typename T>
void Array2Hwtensor(FB_T fb[], Hwtensor<T>& tensor)
{
int nbElement = GET_TOTAL_ELEMENTS(tensor.size);
int nbElement = SIZE(tensor.size);
for (int id = 1; id <= nbElement; id++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II=1
......@@ -84,7 +84,7 @@ void Array2Hwtensor(FB_T fb[], Hwtensor<T>& tensor)
template <typename FB_T, typename T>
void Hwtensor2Array(Hwtensor<T>& tensor, FB_T fb[])
{
int nbElement = GET_TOTAL_ELEMENTS(tensor.size);
int nbElement = SIZE(tensor.size);
for (int id = 1; id <= nbElement; id++) {
#pragma HLS loop_flatten off
#pragma HLS pipeline II=1
......
......@@ -72,9 +72,8 @@
#define REDUCE_ARGMIN 0x8
// Type 2 operations
#define CONCATENATE 0x0
#define CONCAT 0x0
#define RANGE 0x1
#define STACK 0x2
#define GATHER 0x3
#define SORT 0x4
#define ARGSORT 0x5
......@@ -82,18 +81,17 @@
#define EXTRACT 0x7
#define TRANSPOSE 0x8
#define MATMUL 0x9
#define GATHER_ND 0xA
#define BROADCAST 0xB
typedef int UNIT_INT_T;
typedef float UNIT_T;
// Bool array should be unsigned int
#ifndef PETALINUX
void numpyTop( ap_uint<32> instructionReg,
UNIT_INT_T numpyTop( ap_uint<32> instructionReg,
UNIT_T *dst_mem, UNIT_INT_T *dst_int_mem, ap_uint<32> dst_sz,
UNIT_T *src1_mem, UNIT_INT_T *src1_int_mem, ap_uint<32> src1_sz,
UNIT_T *src2_mem, UNIT_INT_T *src2_int_mem, ap_uint<32> src2_sz,
UNIT_INT_T *src_bool_mem, ap_uint<32> src_bool_sz,
UNIT_INT_T *param_int_mem, ap_uint<32> src_bool_sz,
UNIT_T immVal1, UNIT_T immVal2,
UNIT_INT_T immVal_int1, UNIT_INT_T immVal_int2,
ap_uint<32> param0, ap_uint<32> param1);
......
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
1.140460968017578125e+01
1.160778999328613281e+01
-1.949523925781250000e+00
3.871240615844726562e+00
1.309165573120117188e+01
-3.331951141357421875e+00
1.731937789916992188e+01
-4.537496566772460938e+00
1.036657714843750000e+01
-8.565225601196289062e+00
1.405712127685546875e+00
-1.222253322601318359e+01
-1.033185958862304688e+01
-1.014629364013671875e+01
8.344659805297851562e+00
1.369486236572265625e+01
-1.562559604644775391e+01
-1.467187881469726562e+01
-1.914886474609375000e+00
5.652374267578125000e+00
-6.374821662902832031e+00
-1.081877231597900391e+01
6.216459274291992188e+00
1.442430496215820312e+01
1.661743164062500000e+01
1.793783187866210938e+01
-1.035881042480468750e-01
-4.999876022338867188e+00
-1.192665100097656250e+00