diff --git a/.gitignore b/.gitignore index d46f692..a0a3277 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,12 @@ data/game_* data/windows* # Hardware Directories vivado_hls.log -./hls/ \ No newline at end of file +vitis_hls.log +hls_prj/ +vivado/ +vitis_include/ +./token + +# PYNQ +**/.ipynb_checkpoints +**/sds_trace_data.dat \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index c2d922e..43b6978 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,36 +1,53 @@ cmake_minimum_required(VERSION 3.10) # Set the project name project(Svd VERSION 1.0) -# Specify the C++ standard -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED True) -# To locate "custom"/manually added libraries +# To locate "custom", i.e. manually added, libraries list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) -# Locate libraries and headers (see Files in ./cmake/Modules/) -# find_package(Vitis REQUIRED) -find_package(Vivado REQUIRED) -find_package(OpenCv REQUIRED) +# Locate external libraries and headers (see Files in ./cmake/Modules/) +# Search for HLS: if Vitis is found, use C++14, else fall back to C++11. +find_package(Vitis REQUIRED) +if (Vitis_FOUND) + # Specify the C++14 standard + message("[INFO] Vitis HLS FOUND.") + set(CMAKE_CXX_STANDARD 14) + set(CMAKE_CXX_STANDARD_REQUIRED True) + set(HLS_INCLUDE_DIRS ${VITIS_INCLUDE_DIRS}) + add_compile_definitions(__VITIS_HLS__) +else() + message("[INFO] Vivado HLS FOUND.") + find_package(Vivado REQUIRED) + # Specify the C++11 standard + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED True) + set(HLS_INCLUDE_DIRS ${VIVADO_INCLUDE_DIRS}) +endif() +# find_package(OpenCv REQUIRED) -# set(HLS_INCLUDE_DIRS ${VITIS_INCLUDE_DIRS}) -# set(HLS_INCLUDE_DIRS ${VIVADO_INCLUDE_DIRS}) -# message(${HLS_INCLUDE_DIRS}) +message(${HLS_INCLUDE_DIRS}) # Add all definitions -if (WIN32) - add_compile_definitions(IMAGE_OUTPUT_PATH="C:/Users/ste/phd/hls_projects/hls_svd/data") -else() - add_compile_definitions(IMAGE_OUTPUT_PATH="/mnt/c/Users/ste/phd/hls_projects/hls_svd/data") -endif() # The following definitions is required for compiling half-precision numbers. add_compile_definitions(HLS_NO_XIL_FPO_LIB) # add_compile_definitions(USE_FLOAT) add_compile_definitions(DEBUG_LEVEL=2) +add_compile_definitions(INPUT_SIZE=1024) +add_compile_definitions(HIDDEN_SIZE=512) +add_compile_definitions(NUM_GATES=4) +add_compile_definitions(NUM_SAMPLES=2) +add_compile_definitions(NUM_TILES_U=4) +add_compile_definitions(NUM_ZERO_TILES_U=1) +add_compile_definitions(NUM_TILES_V=4) +add_compile_definitions(NUM_ZERO_TILES_V=1) +add_compile_definitions(NUM_TIMESTEPS=28) +add_compile_definitions(FIX_WIDTH=16) +add_compile_definitions(FIX_FRACT_WIDTH=5) + # Move executable in bin/, along side the DLLs (copied) set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) -file(COPY ${OpenCv_LIBS} DESTINATION ${EXECUTABLE_OUTPUT_PATH}) +# file(COPY ${OpenCv_LIBS} DESTINATION ${EXECUTABLE_OUTPUT_PATH}) # NOTE: an object file becomes a library. All libraries/objects must be LINKED later! # Tell the application where to find the other CMake config files. diff --git a/README.md b/README.md index a287172..058ff64 100644 --- a/README.md +++ b/README.md @@ -47,17 +47,108 @@ cmake .. make all ``` +## Notes on Using Vitis + +### AXIS Interface and DMA + +Vitis to include the TLAST side channel if and only if TKEEP and TSTRB are also included. + +In order to attach the port to a Xilinx DMA, the TLAST signal must be properly set HIGH at the end of the data transmission. + +The TKEEP and TSTRB signals must be *always* set to HIGH, as indicated in the [AXIS documentation](https://developer.arm.com/documentation/ihi0051/a/Interface-Signals/Byte-qualifiers/TKEEP-and-TSTRB-combinations). + + +### Partitioning hls::vector Arrays + +A standard way of partitioning an array is: +```c++ + hls::stream > x_streams[M][N]; +#pragma HLS ARRAY_PARTITION variable=x_streams complete dim=0 +``` +However, since we are dealing with a `hls::vector` type, setting `dim=0` (all dimensions) will partition the array on the vector dimension too. + +In the example above, Vitis will create `M * N * 4` different streams (instead of just `M * N`). To fix it, manually specify the partitioning on the dimensions, like so: +```c++ + hls::stream > x_streams[M][N]; +#pragma HLS ARRAY_PARTITION variable=x_streams complete dim=1 +#pragma HLS ARRAY_PARTITION variable=x_streams complete dim=2 +``` + +### Implementing AXIS Interfaces + +In order to implement AXIS interfaces, avoid using `depth` in the pragma, as follows: +```c++ +const int kAxiBitwidth = 128; + +void HlsVectorKernelU(hls::stream >& x_port, + hls::stream >& y_port) { +#pragma HLS INTERFACE axis port=x_port // depth=... <- DON'T SPECIFY THE DEPTH! +#pragma HLS INTERFACE axis port=y_port // depth=... <- DON'T SPECIFY THE DEPTH! + // ... +} +``` +The type `ap_axiu` must now be used to generate AXIS with side channels. Note: for using external DMAs, we need the TLAST, TKEEP and TSTRB signals. In particular, TKEEP and TSTRB must be all set (i.e. all ones) in order to signal data packets. + +#### AxiStreamInterface Class + +This repository contains a wrapper class for kernel arguments of type `hls::stream` named `AxiStreamInterface`. The class is implemented following a _Policy-based_ C++ paradigm, meaning that it accepts either a `AxiStreamPort` or `AxiStreamFifo` as possible policies (in practice, a template argument). + +The idea is to have a kernel argument, i.e. an HLS port, which can be either an AXIS interface with side-channels, or a bare FIFO interface connected to another kernel. In fact, Vitis HLS doesn't allow stream interfaces with side-channels within an IP. To overcome the issue, the `AxiStreamInterface` can be customized to be an IP port or a FIFO port, depending on the use of the kernel. + +An example of this can be seen in `HlsKernelU` and in `svd::SvdKernel`, which specialize the `svd::KernelU` function template. In the first case, the `svd::KernelU` has its output stream port `xu_port` connected to one of the IP's ports (with side-channels). In the latter case instead, `svd::KernelU` is connected to `svd::KernelS`, and so its `xu_port` argument is an internal FIFO (without side-channels). + +The `AxiStreamInterface` class in `axis_lib.h` can also be used with `hls::vector` types. + +### HLS Vector Patch + +If the project will be compiled with the Vitis HLS libraries, it needs a patch in the `hls::vector` class. + +Simply add the following line in the `vector` class after the `public:` statement: +```c++ +public: + static const int width = N; +``` + +In this way, one can access the number of elements in a `hls::vector` at compile/synthesis time by doing: + +```c++ +hls::vector a; +std::cout << "Number of elements in a: " << a::width << std::endl; + +// > Number of elements in a: 5 +``` + +## Notes on PYNQ Design + +### Vivado Project + +#### Xilinx DMA + +The DMA should be configured in the following way: + +* Max burst length to maximum +* Register buffer width to maximum + +#### HP Ports + +All HP ports should be set to 64bit width (to avoid receiving data interleaved by zeroes). + + ## TODOs List of TODOs: - * Import u, s, v new kernels - * Import (and clean up?) u, s, v old kernels - * Import DMA functions - * Import and clean up HLS SVD-model-Bouganis - * Import and clean up HLS SVD-model-2LSTM - * Import some testbenches to try compile something + + * ~Import u, s, v new kernels~ + * ~Import (and clean up?) u, s, v old kernels~ + * ~Import DMA functions~ + * ~Import and clean up HLS SVD-model-Bouganis~ + * ~Import and clean up HLS SVD-model-2LSTM~ + * ~Import some testbenches to try compile something~ ## Bugs List of possible bugs: -* Having not squared images in games generates distorted images. \ No newline at end of file + +* Constructing data handler storage might lead to segmentation faults. +* Having `R == 1` might trigger some asserts. +* Having `output_size == H` in HlsKernelV might break hardware runs. \ No newline at end of file diff --git a/cmake/Modules/FindVitis.cmake b/cmake/Modules/FindVitis.cmake index 8695d84..903adee 100644 --- a/cmake/Modules/FindVitis.cmake +++ b/cmake/Modules/FindVitis.cmake @@ -1,11 +1,13 @@ if (WIN32) - set(VITIS_INCLUDE_DIRS D:/Programs/Xilinx/Vitis_HLS/2020.2/include/) + # set(VITIS_INCLUDE_DIRS D:/Programs/Xilinx/Vitis_HLS/2021.1/include/) + set(VITIS_INCLUDE_DIRS C:/Users/ste/phd/hls_projects/hls_svd/vitis_include/2020.2/include/) + # set(VITIS_INCLUDE_DIRS C:/Users/ste/phd/hls_projects/hls_svd/vitis_include/2021.1/include/) else() - set(VITIS_INCLUDE_DIRS /mnt/d/Programs/Xilinx/Vitis_HLS/2020.2/include/) + set(VITIS_INCLUDE_DIRS /mnt/d/Programs/Xilinx/Vitis_HLS/2021.1/include/) endif() # NOTE: It handles the REQUIRED, QUIET and version-related arguments of find_package. # It also sets the _FOUND variable. The package is considered found # if all variables listed contain valid results, e.g. valid filepaths. include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Vitis DEFAULT_MSG VITIS_INCLUDE_DIRS) +find_package_handle_standard_args(Vitis DEFAULT_MSG VITIS_INCLUDE_DIRS) \ No newline at end of file diff --git a/include/dma/axis_lib.h b/include/dma/axis_lib.h new file mode 100644 index 0000000..c2721bb --- /dev/null +++ b/include/dma/axis_lib.h @@ -0,0 +1,930 @@ +#ifndef DMA_AXIS_LIB_H_ +#define DMA_AXIS_LIB_H_ + +#include "hls_utils/hls_metaprogramming.h" + +#include "ap_axi_sdata.h" +#include "ap_int.h" +#include "hls_stream.h" + +#include +#include +#include +#include + +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +namespace svd { + +template +struct AxiuPacketTlastOnlyType { + ap_uint data; + ap_uint<1> last = 0; + // ap_uint<4> keep = 0xF; +}; + +template +class AxiStreamFifo { +public: + typedef ap_int PacketType; + + AxiStreamFifo(hls::stream& port) : _port(port) { +#pragma HLS INLINE + } + + ~AxiStreamFifo() {}; + + template + inline void Push(const T &x, bool is_last = false) { +#pragma HLS INLINE + PacketType packet = *((PacketType*)&x); + this->_port.write(packet); + } + + /** + * @brief Pushes the last value. + * + * @param[in] x The value to push on the FIFO + * + * @tparam T The type of the value + */ + template + inline void PushLast(const T &x) { +#pragma HLS INLINE + PacketType packet = *((PacketType*)&x); + this->_port.write(packet); + } + + /** + * @brief Pushes a series of values from a buffer to the FIFO. + * + * @param[in] size The buffer size + * @param[in] x The buffer to read from + * + * @tparam T The type of the buffer + */ + template + inline void PushFromBuffer(const int size, const T *x, bool send_last = false) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + packet = *((PacketType*)&x[i]); + this->_port.write(packet); + } + } + + template + inline void PushBuffer(const int size, const T *x, bool send_last = false) { +#pragma HLS INLINE + assert(hlsutils::Bitwidth::value * size == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + PacketType packet; + for (int i = 0; i < size; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + auto tmp = x[i]; + packet.range(kHi, kLo) = *((ap_uint*)&tmp); + } + this->_port.write(packet); + } + + /** + * @brief Pushes a series of values from a stream to the FIFO. + * + * @param[in] size The size + * @param[in] x The stream to read from + * + * @tparam T The type of the stream + */ + template + inline void PushFromStream(const int size, const hls::stream &x) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + T x_val = x.read(); + packet = *((PacketType*)&x_val); + this->_port.write(packet); + } + } + + /** + * @brief Pops a value from the FIFO and converts it. + * + * @tparam T The type of the returned value + * + * @return The value from the FIFO + */ + template + inline T Pop() { +#pragma HLS INLINE + PacketType packet = this->_port.read(); + return *((T*)&packet); + } + + /** + * @brief Read value and returns false (used for compatibility). + * + * @param y The value read from the FIFO + * + * @tparam T The type of the read value + * + * @return True if the specified y is the last value to pop, False + * otherwise. + */ + template + inline bool isLastPop(T &y) { +#pragma HLS INLINE + PacketType packet = this->_port.read(); + y = *((T*)&packet); + return false; + } + + /** + * @brief Pops a series of values from the FIFO and writes them into a + * buffer. It also converts from ap_uint<> to T. + * + * @param[in] size The size + * @param y The output buffer + * + * @tparam T The type of the output buffer + */ + template + inline void PopToBuffer(const int size, T *y) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + packet = this->_port.read(); + y[i] = *((T*)&packet); + } + } + + /** + * @brief Pops a series of values from the FIFO and writes them into a + * stream. It also converts from ap_uint<> to T. + * + * @param[in] size The stream size + * @param y The output stream + * + * @tparam T The type of the output stream + */ + template + inline void PopToStream(const int size, hls::stream &y) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + packet = this->_port.read(); + y.write(*((T*)&packet)); + } + } + +#ifdef __VITIS_HLS__ + /** + * @brief Push a vector into the FIFO with default TLAST set to low. + * From the AXIS specification: The following options are + * available: + * * Set TLAST LOW. This indicates that all transfers are within + * the same packet. This option provides maximum opportunity + * for merging and upsizing but means that transfers could be + * delayed in a stream with intermittent bursts. A permanently + * LOW TLAST signal might also affect the interleaving of + * streams across a shared channel because the interconnect + * can use the TLAST signal to influence the arbitration + * process. + * * Set TLAST HIGH. This indicates that all transfers are + * individual packets. This option ensures that transfers do + * not get delayed in the infrastructure. It also ensures that + * the stream does not unnecessarily block the use of a shared + * channel by influencing the arbitration scheme. This option + * prevents merging on streams from masters that have this + * default setting and prevents efficient upsizing. + * + * @param[in] x The vector to push + * @param[in] is_last Indicates if last packet to push. Default false. + * + * @tparam T The type of the vector, its type must be of the same + * size of the FIFO. + * @tparam N Number of elements in the vector + */ + template + inline void PushVector(const hls::vector& x, bool is_last = false) { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + PacketType packet; + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + auto tmp = x[i]; + packet.range(kHi, kLo) = *((ap_uint*)&tmp); + } + this->_port.write(packet); + } + + /** + * @brief Pushes the last vector, i.e. a packet with TLAST set to high. + * + * @param[in] x The vector to push on the FIFO + * + * @tparam T The type of the vector + * @tparam N Number of elements in the vector + */ + template + inline void PushLastVector(const hls::vector& x) { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + PacketType packet; + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + auto tmp = x[i]; + packet.range(kHi, kLo) = *((ap_uint*)&tmp); + } + this->_port.write(packet); + } + + /** + * @brief Pops a vector from the FIFO and converts it. + * + * @tparam T The type of the returned vector + * @tparam N The number of elements in the vector + * + * @return The vector from the FIFO + */ + template + inline hls::vector PopVector() { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + const PacketType packet = this->_port.read(); + hls::vector y; + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + ap_uint tmp = packet.range(kHi, kLo); + y[i] = *((T*)&tmp); + } + return y; + } + + /** + * @brief Read vector and returns false (used for compatibility reasons). + * + * @param y The vector read from the FIFO + * + * @tparam T The type of the read vector + * @tparam N The number of elements in the vector. + * + * @return False + */ + template + inline bool isLastPopVector(hls::vector& y) { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + const PacketType packet = this->_port.read(); + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + ap_uint tmp = packet.range(kHi, kLo); + y[i] = *((T*)&tmp); + } + return false; + } +#endif // __VITIS_HLS__ + + hls::stream& get_port() { + return this->_port; + } + +private: + hls::stream& _port; +}; + +/** + * @brief Wrapper class for an AXI stream interface port. + * + * The function instantiating this class must apply the respective + * HLS directive in order to synthesize a proper AXI stream + * interface. + * + * For documentation on TKEEP and TSTRB, please visit: + * https://developer.arm.com/documentation/ihi0051/a/Interface-Signals/Byte-qualifiers/TKEEP-and-TSTRB-combinations + * + * @tparam Bitwidth The bitwidth of the interface. + */ +template +class AxiStreamPort { +public: + static_assert(Bitwidth % 8 == 0, "ERROR. Bitwidth not byte aligned."); + typedef ap_axiu PacketType; + typedef ap_uint SideChannelsType; + // typedef ap_uint > > SideChannelsType; + + AxiStreamPort(hls::stream& port) : _port(port), + _all_ones(~(SideChannelsType(0))), _has_side_channels(true) { +#pragma HLS INLINE + }; + + ~AxiStreamPort() {}; + + inline void set_name(const std::string name) { +#ifndef __SYNTHESIS__ + _name = name; +#endif + } + + std::string name() { +#ifndef __SYNTHESIS__ + return this->_name; +#else + return ""; +#endif + } + + /** + * @brief Push a value into the FIFO with default TLAST set to low. From + * the AXIS specification: The following options are available: + * * Set TLAST LOW. This indicates that all transfers are within + * the same packet. This option provides maximum opportunity for + * merging and upsizing but means that transfers could be + * delayed in a stream with intermittent bursts. A permanently + * LOW TLAST signal might also affect the interleaving of + * streams across a shared channel because the interconnect can + * use the TLAST signal to influence the arbitration process. + * * Set TLAST HIGH. This indicates that all transfers are + * individual packets. This option ensures that transfers do not + * get delayed in the infrastructure. It also ensures that the + * stream does not unnecessarily block the use of a shared + * channel by influencing the arbitration scheme. This option + * prevents merging on streams from masters that have this + * default setting and prevents efficient upsizing. + * + * @param[in] x The value to push + * @param[in] is_last Indicates if last packet to push. Default false. + * + * @tparam T The type of the value, its type must be of the same + * size of the FIFO. + */ + template + inline void Push(const T &x, bool is_last = false) { +#pragma HLS INLINE + PacketType packet; + packet.data = *((ap_uint*)&x); + packet.last = is_last? 1 : 0; + // NOTE: If TKEEP and TSTRB both high, the packet is a data type. + packet.keep = this->_all_ones; // Set TKEEP to all ones. + packet.strb = this->_all_ones; // Set TSTRB to all ones. + this->_port.write(packet); + } + + /** + * @brief Pushes the last value, i.e. a packet with TLAST set to high. + * + * @param[in] x The value to push on the FIFO + * + * @tparam T The type of the value + */ + template + inline void PushLast(const T &x) { +#pragma HLS INLINE + PacketType packet; + packet.data = *((ap_uint*)&x); + packet.last = 1; + // NOTE: If TKEEP and TSTRB both high, the packet is a data type. + packet.keep = this->_all_ones; // Set TKEEP to all ones. + packet.strb = this->_all_ones; // Set TSTRB to all ones. + this->_port.write(packet); + } + + /** + * @brief Pushes a series of values from a buffer to the FIFO. + * + * @param[in] size The buffer size + * @param[in] x The buffer to read from + * + * @tparam T The type of the buffer + */ + template + inline void PushFromBuffer(const int size, const T *x, bool send_last = false) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + packet.data = *((ap_uint*)&x[i]); + if (send_last && i == size - 1) { // The last packet needs special care. + packet.last = 1; + } + // NOTE: If TKEEP and TSTRB both high, the packet is a data type. + packet.keep = this->_all_ones; // Set TKEEP to all ones. + packet.strb = this->_all_ones; // Set TSTRB to all ones. + this->_port.write(packet); + } + } + + template + inline void PushBuffer(const int size, const T *x, bool send_last = false) { +#pragma HLS INLINE + assert(hlsutils::Bitwidth::value * size == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + PacketType packet; + for (int i = 0; i < size; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + auto tmp = x[i]; + packet.data.range(kHi, kLo) = *((ap_uint*)&tmp); + } + packet.last = send_last? 1 : 0; + // NOTE: If TKEEP and TSTRB are both high, then the packet is a data type. + packet.keep = this->_all_ones; // Set TKEEP to all ones. + packet.strb = this->_all_ones; // Set TSTRB to all ones. + this->_port.write(packet); + } + + /** + * @brief Pushes a series of values from a stream to the FIFO. + * + * @param[in] size The size + * @param[in] x The stream to read from + * + * @tparam T The type of the stream + */ + template + inline void PushFromStream(const int size, const hls::stream &x) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + T x_val = x.read(); + packet.data = *((ap_uint*)&x_val); + if (i == size - 1) { // The last packet needs special care. + packet.last = 1; + } + // NOTE: If TKEEP and TSTRB both high, the packet is a data type. + packet.keep = this->_all_ones; // Set TKEEP to all ones. + packet.strb = this->_all_ones; // Set TSTRB to all ones. + this->_port.write(packet); + } + } + + /** + * @brief Pops a value from the FIFO and converts it. + * + * @tparam T The type of the returned value + * + * @return The value from the FIFO + */ + template + inline T Pop() { +#pragma HLS INLINE + PacketType packet; + packet = this->_port.read(); + return *((T*)&packet.data); + } + + /** + * @brief Read value and return true if the specified y is the last value + * to pop, i.e. with TLAST set high. It also converts the read + * value to the specified type. + * + * @param y The value read from the FIFO + * + * @tparam T The type of the read value + * + * @return True if the specified y is the last value to pop, False + * otherwise. + */ + template + inline bool isLastPop(T &y) { +#pragma HLS INLINE + PacketType packet; + packet = this->_port.read(); + y = *((T*)&packet.data); + return packet.last == 1 ? true : false; + } + + /** + * @brief Pops a series of values from the FIFO and writes them into a + * buffer. It also converts from ap_uint<> to T. + * + * @param[in] size The size + * @param y The output buffer + * + * @tparam T The type of the output buffer + */ + template + inline void PopToBuffer(const int size, T *y) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + packet = this->_port.read(); + y[i] = *((T*)&packet.data); + } + } + + /** + * @brief Pops a series of values from the FIFO and writes them into a + * stream. It also converts from ap_uint<> to T. + * + * @param[in] size The stream size + * @param y The output stream + * + * @tparam T The type of the output stream + */ + template + inline void PopToStream(const int size, hls::stream &y) { +#pragma HLS INLINE + PacketType packet; + for (int i = 0; i < size; ++i) { +#pragma HLS PIPELINE II=1 + packet = this->_port.read(); + y.write(*((T*)&packet.data)); + } + } + +#ifdef __VITIS_HLS__ + /** + * @brief Push a vector into the FIFO with default TLAST set to low. + * From the AXIS specification: The following options are + * available: + * * Set TLAST LOW. This indicates that all transfers are within + * the same packet. This option provides maximum opportunity + * for merging and upsizing but means that transfers could be + * delayed in a stream with intermittent bursts. A permanently + * LOW TLAST signal might also affect the interleaving of + * streams across a shared channel because the interconnect + * can use the TLAST signal to influence the arbitration + * process. + * * Set TLAST HIGH. This indicates that all transfers are + * individual packets. This option ensures that transfers do + * not get delayed in the infrastructure. It also ensures that + * the stream does not unnecessarily block the use of a shared + * channel by influencing the arbitration scheme. This option + * prevents merging on streams from masters that have this + * default setting and prevents efficient upsizing. + * + * @param[in] x The vector to push + * @param[in] is_last Indicates if last packet to push. Default false. + * + * @tparam T The type of the vector, its type must be of the same + * size of the FIFO. + * @tparam N Number of elements in the vector + */ + template + inline void PushVector(const hls::vector& x, bool is_last = false) { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + PacketType packet; + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + auto tmp = x[i]; + packet.data.range(kHi, kLo) = *((ap_uint*)&tmp); + } + packet.last = is_last? 1 : 0; + // NOTE: If TKEEP and TSTRB are both high, then the packet is a data type. + packet.keep = this->_all_ones; // Set TKEEP to all ones. + packet.strb = this->_all_ones; // Set TSTRB to all ones. + this->_port.write(packet); + } + + /** + * @brief Pushes the last vector, i.e. a packet with TLAST set to high. + * + * @param[in] x The vector to push on the FIFO + * + * @tparam T The type of the vector + * @tparam N Number of elements in the vector + */ + template + inline void PushLastVector(const hls::vector& x) { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + PacketType packet; + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + auto tmp = x[i]; + packet.data.range(kHi, kLo) = *((ap_uint*)&tmp); + } + packet.last = 1; + // NOTE: If TKEEP and TSTRB both high, the packet is a data type. + packet.keep = this->_all_ones; // Set TKEEP to all ones. + packet.strb = this->_all_ones; // Set TSTRB to all ones. + this->_port.write(packet); + } + + /** + * @brief Pops a vector from the FIFO and converts it. + * + * @tparam T The type of the returned vector + * @tparam N The number of elements in the vector + * + * @return The vector from the FIFO + */ + template + inline hls::vector PopVector() { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + const PacketType packet = this->_port.read(); + hls::vector y; + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + ap_uint tmp = packet.data.range(kHi, kLo); + y[i] = *((T*)&tmp); + } + return y; + } + + /** + * @brief Read vector and return true if the specified y is the last + * vector to pop, i.e. with TLAST set high. It also converts the + * read vector to the specified type. + * + * @param y The vector read from the FIFO + * + * @tparam T The type of the read vector + * @tparam N The number of elements in the vector. + * + * @return True if the specified y is the last vector to pop, False + * otherwise. + */ + template + inline bool isLastPopVector(hls::vector& y) { +#pragma HLS INLINE + static_assert(hlsutils::Bitwidth::value * N == Bitwidth, "AxiStreamPort must have same bitwidth as hls::vector"); + assert(hlsutils::Bitwidth::value * N == Bitwidth); + const int kElemBitwidth = hlsutils::Bitwidth::value; + PacketType packet; + packet = this->_port.read(); + for (int i = 0; i < N; ++i) { + const int kHi = (i + 1) * kElemBitwidth - 1; + const int kLo = i * kElemBitwidth; + ap_uint tmp = packet.data.range(kHi, kLo); + y[i] = *((T*)&tmp); + } + return packet.last == 1 ? true : false; + } +#endif // __VITIS_HLS__ + + hls::stream& get_port() { + return this->_port; + } + +private: + hls::stream& _port; + SideChannelsType _all_ones; + bool _has_side_channels; +#ifndef __SYNTHESIS__ + std::string _name; +#endif +}; + +/** + * @brief This class describes an AXI stream interface (Policy-based + * design). + * + * It has to be used as a "generic" interface whithin a kernel. The + * port of the kernel attached to this class can then be either a + * FIFO or a AXIS port. + * + * @tparam AxiClass The policy class. + */ +template +class AxiStreamInterface : private AxiClass { +public: + AxiStreamInterface(hls::stream& port): AxiClass(port) { +#pragma HLS INLINE + } + + ~AxiStreamInterface() {}; + + template + inline void Push(const T &x, bool is_last = false) { +#pragma HLS INLINE + AxiClass::template Push(x, is_last); + } + + /** + * @brief Pushes the last value, i.e. a packet with TLAST set to high. + * + * @param[in] x The value to push on the FIFO + * + * @tparam T The type of the value + */ + template + inline void PushLast(const T &x) { +#pragma HLS INLINE + AxiClass::template PushLast(x); + } + + /** + * @brief Pushes a series of values from a buffer to the FIFO. + * + * @param[in] size The buffer size + * @param[in] x The buffer to read from + * + * @tparam T The type of the buffer + */ + template + inline void PushFromBuffer(const int size, const T *x, bool send_last = false) { +#pragma HLS INLINE + AxiClass::template PushFromBuffer(size, x, send_last); + } + + /** + * @brief Aggregates a buffer into a single packet and pushes it to the + * FIFO. + * + * @param[in] size The size of the buffer: assert(bitwidthElem * size + * == bitwidthStream) + * @param[in] x The buffer to read from + * @param[in] send_last Whether to send TLAST + * + * @tparam T The stream type + */ + template + inline void PushBuffer(const int size, const T *x, bool send_last = false) { +#pragma HLS INLINE + AxiClass::template PushBuffer(size, x, send_last); + } + + /** + * @brief Pushes a series of values from a stream to the FIFO. + * + * @param[in] size The size + * @param[in] x The stream to read from + * + * @tparam T The type of the stream + */ + template + inline void PushFromStream(const int size, const hls::stream &x) { +#pragma HLS INLINE + AxiClass::template PushFromStream(size, x); + } + + /** + * @brief Pops a value from the FIFO and converts it. + * + * @tparam T The type of the returned value + * + * @return The value from the FIFO + */ + template + inline T Pop() { +#pragma HLS INLINE + return AxiClass::template Pop(); + } + + /** + * @brief Read value and return true if the specified y is the last value + * to pop, i.e. with TLAST set high. It also converts the read + * value to the specified type. + * + * @param y The value read from the FIFO + * + * @tparam T The type of the read value + * + * @return True if the specified y is the last value to pop, False + * otherwise. + */ + template + inline bool isLastPop(T &y) { +#pragma HLS INLINE + return AxiClass::template isLastPop(y); + } + + /** + * @brief Pops a series of values from the FIFO and writes them into a + * buffer. It also converts from ap_uint<> to T. + * + * @param[in] size The size + * @param y The output buffer + * + * @tparam T The type of the output buffer + */ + template + inline void PopToBuffer(const int size, T *y) { +#pragma HLS INLINE + AxiClass::template PopToBuffer(size, y); + } + + /** + * @brief Pops a series of values from the FIFO and writes them into a + * stream. It also converts from ap_uint<> to T. + * + * @param[in] size The stream size + * @param y The output stream + * + * @tparam T The type of the output stream + */ + template + inline void PopToStream(const int size, hls::stream &y) { +#pragma HLS INLINE + AxiClass::template PopToStream(size, y); + } + +#ifdef __VITIS_HLS__ + /** + * @brief Push a vector into the FIFO with default TLAST set to low. + * From the AXIS specification: The following options are + * available: + * * Set TLAST LOW. This indicates that all transfers are within + * the same packet. This option provides maximum opportunity + * for merging and upsizing but means that transfers could be + * delayed in a stream with intermittent bursts. A permanently + * LOW TLAST signal might also affect the interleaving of + * streams across a shared channel because the interconnect + * can use the TLAST signal to influence the arbitration + * process. + * * Set TLAST HIGH. This indicates that all transfers are + * individual packets. This option ensures that transfers do + * not get delayed in the infrastructure. It also ensures that + * the stream does not unnecessarily block the use of a shared + * channel by influencing the arbitration scheme. This option + * prevents merging on streams from masters that have this + * default setting and prevents efficient upsizing. + * + * @param[in] x The vector to push + * @param[in] is_last Indicates if last packet to push. Default false. + * + * @tparam T The type of the vector, its type must be of the same + * size of the FIFO. + * @tparam N Number of elements in the vector + */ + template + inline void PushVector(const hls::vector& x, bool is_last = false) { +#pragma HLS INLINE + AxiClass::template PushVector(x, is_last); + } + + /** + * @brief Pushes the last vector, i.e. a packet with TLAST set to high. + * + * @param[in] x The vector to push on the FIFO + * + * @tparam T The type of the vector + * @tparam N Number of elements in the vector + */ + template + inline void PushLastVector(const hls::vector& x) { +#pragma HLS INLINE + AxiClass::template PushLastVector(x); + } + + /** + * @brief Pops a vector from the FIFO and converts it. + * + * @tparam T The type of the returned vector + * @tparam N The number of elements in the vector + * + * @return The vector from the FIFO + */ + template + inline hls::vector PopVector() { +#pragma HLS INLINE + return AxiClass::template PopVector(); + } + + /** + * @brief Read vector and return true if the specified y is the last + * vector to pop, i.e. with TLAST set high. It also converts the + * read vector to the specified type. + * + * @param y The vector read from the FIFO + * + * @tparam T The type of the read vector + * @tparam N The number of elements in the vector. + * + * @return True if the specified y is the last vector to pop, False + * otherwise. + */ + template + inline bool isLastPopVector(hls::vector& y) { +#pragma HLS INLINE + return AxiClass::template isLastPopVector(y); + } +#endif // __VITIS_HLS__ +}; + +} // svd + +#endif // end DMA_AXIS_LIB_H_ \ No newline at end of file diff --git a/include/dma/svd_dma.h b/include/dma/svd_dma.h index de326bc..aa18d43 100644 --- a/include/dma/svd_dma.h +++ b/include/dma/svd_dma.h @@ -5,27 +5,33 @@ #include "hls_utils/hls_metaprogramming.h" #include "hls_utils/priority_encoder.h" #include "dma/width_converter.h" +#include "dma/axis_lib.h" #include "hls_stream.h" +#include "assert.h" + +#include + +namespace svd { template void StreamSplitter(const int output_size, const Din *x, - hls::stream (&y)[hls_utils::Bitwidth::value / hls_utils::Bitwidth::value]) { + hls::stream (&y)[hlsutils::Bitwidth::value / hlsutils::Bitwidth::value]) { #pragma HLS ARRAY_PARTITION variable=y complete dim=1 - const int kDivider = hls_utils::Bitwidth::value / hls_utils::Bitwidth::value; + const int kDivider = hlsutils::Bitwidth::value / hlsutils::Bitwidth::value; const int kInputSize = output_size / kDivider; - assert(hls_utils::Bitwidth::value % hls_utils::Bitwidth::value == 0); - assert(hls_utils::Bitwidth::value >= hls_utils::Bitwidth::value); + assert(hlsutils::Bitwidth::value % hlsutils::Bitwidth::value == 0); + assert(hlsutils::Bitwidth::value >= hlsutils::Bitwidth::value); assert(output_size % kDivider == 0); DMA_Loop: for (int i = 0; i < kInputSize; ++i) { #pragma HLS PIPELINE II=1 Parallel_Write_Loop: for (int j = 0; j < kDivider; ++j) { - const int kHi = (j + 1) * hls_utils::Bitwidth::value - 1; - const int kLo = j * hls_utils::Bitwidth::value; - ap_uint::value> x_val = x[i].range(kHi, kLo); + const int kHi = (j + 1) * hlsutils::Bitwidth::value - 1; + const int kLo = j * hlsutils::Bitwidth::value; + ap_uint::value> x_val = x[i].range(kHi, kLo); y[j].write(*((Dout*)&x_val)); } } @@ -57,17 +63,17 @@ void S_DMA(const typename params::SPortD s_port[params::N][params::R], } template -void U_Dispatcher(const typename params::UPortD u_port[params::PrunedSizeU], +void U_Dispatcher(const typename params::UPortD u_port[params::R * params::PrunedSizeU], svd::SvdStreams &streams) { U_Dispatcher: for (int i = 0; i < params::R; ++i) { for (int j = 0; j < params::PeU; ++j) { - for (int k = 0; k < params::PrunedSizeU / params::R / params::PeU; ++k) { + for (int k = 0; k < params::PrunedSizeU / params::PeU; ++k) { #pragma HLS PIPELINE II=1 #pragma HLS LOOP_FLATTEN for (int g = 0; g < params::G; ++g) { streams.u[g][j].write(streams.u_dma[g].read()); - } + } } } } @@ -102,11 +108,11 @@ void NzIdxConverter(svd::SvdStreams &streams) { } template -void InputDMA( +void InputDMA(const int num_refinements, const typename params::ActivationD x_port[params::N][params::I], svd::SvdStreams &streams, svd::SvdBuffers &buffers) { -// #pragma HLS INLINE +#pragma HLS INLINE typename params::UnzIdxD tile_idx[params::N][params::G][params::PeU]; #pragma HLS ARRAY_PARTITION variable=tile_idx complete dim=0 Store_X_Buffer: @@ -124,7 +130,8 @@ void InputDMA( Stream_X_Tiles: for (int ii = 0; ii < params::N; ++ii) { #pragma HLS UNROLL - for (int i = 0; i < params::R; ++i) { + Stream_X_Tiles_inner: + for (int i = 0; i < num_refinements; ++i) { for (int k = 0; k < params::I / params::Tu; ++k) { #pragma HLS PIPELINE II=1 for (int j = 0; j < params::PeU; ++j) { @@ -145,17 +152,17 @@ void InputDMA( } template -void V_Dispatcher(const typename params::VPortD v_port[params::PrunedSizeV], +void V_Dispatcher(const typename params::VPortD v_port[params::R * params::PrunedSizeV], svd::SvdStreams &streams) { V_Dispatcher: for (int i = 0; i < params::R; ++i) { for (int j = 0; j < params::PeV; ++j) { - for (int k = 0; k < params::PrunedSizeV / params::R / params::PeV; ++k) { + for (int k = 0; k < params::PrunedSizeV / params::PeV; ++k) { #pragma HLS PIPELINE II=1 #pragma HLS LOOP_FLATTEN for (int g = 0; g < params::G; ++g) { streams.v[g][j].write(streams.v_dma[g].read()); - } + } } } } @@ -164,9 +171,9 @@ void V_Dispatcher(const typename params::VPortD v_port[params::PrunedSizeV], template void SvdInDMA( const typename params::ActivationD x_port[params::N][params::I], - const typename params::UPortD u_port[params::PrunedSizeU], + const typename params::UPortD u_port[params::R * params::PrunedSizeU], const typename params::SPortD s_port[params::N][params::R], - const typename params::VPortD v_port[params::PrunedSizeV], + const typename params::VPortD v_port[params::R * params::PrunedSizeV], const typename params::UnzD nz_u_port[params::R * params::G], const typename params::VnzD nz_v_port[params::R * params::G], svd::SvdStreams &streams, @@ -176,15 +183,15 @@ void SvdInDMA( #pragma HLS DATAFLOW #endif S_DMA(s_port, streams); - U_DMA: StreamSplitter(params::G * params::PrunedSizeU, u_port, streams.u_dma); - V_DMA: StreamSplitter(params::G * params::PrunedSizeV, v_port, streams.v_dma); + U_DMA: StreamSplitter(params::G * params::R * params::PrunedSizeU, u_port, streams.u_dma); + V_DMA: StreamSplitter(params::G * params::R * params::PrunedSizeV, v_port, streams.v_dma); if (params::ZTu > 0) { NzDMA(nz_u_port, nz_v_port, streams); NzIdxConverter(streams); } U_Dispatcher(u_port, streams); V_Dispatcher(v_port, streams); - InputDMA(x_port, streams, buffers); + InputDMA(params::R, x_port, streams, buffers); } template @@ -203,54 +210,52 @@ void SvdOutDMA( } } -namespace svd { - template -void ZeroTileCombination2LstmDMA(const ap_uint *comb_port, - hls::stream > (&comb_stream1_current)[NumGates / 2], - hls::stream > (&comb_stream1_recurrent)[NumGates / 2], - hls::stream > (&comb_stream2_current)[NumGates / 2], - hls::stream > (&comb_stream2_recurrent)[NumGates / 2]) { +void NZIndex2LstmDMA(const ap_uint *nz_port, + hls::stream > (&nz_stream1_cur)[NumGates / 2], + hls::stream > (&nz_stream1_rec)[NumGates / 2], + hls::stream > (&nz_stream2_cur)[NumGates / 2], + hls::stream > (&nz_stream2_rec)[NumGates / 2]) { +#pragma HLS INLINE assert(NumGates % 2 == 0); assert(NumTiles % 2 == 0); - assert(NumTiles >= 8); - - ZeroTileCombination_Dma_Iter_Loop: + // assert(NumTiles >= 8); // Minimum port size requirement. + NZIndex_Dma_Iter_Loop: for (int i = 0; i < NumIter; ++i) { #pragma HLS PIPELINE II=1 - ZeroTileCombination_Dma_Current_Loop: + NZIndex_Dma_Current_Loop: for (int g = 0; g < NumGates / 2; ++g) { - ap_uint comb = comb_port[i * NumGates + g]; - comb_stream1_current[g].write(comb); - comb_stream2_current[g].write(comb); + ap_uint nz_idx = nz_port[i * NumGates + g]; + nz_stream1_cur[g].write(nz_idx); + nz_stream2_cur[g].write(nz_idx); } - ZeroTileCombination_Dma_Recurrent_Loop: + NZIndex_Dma_Recur_Loop: for (int g = 0; g < NumGates / 2; ++g) { - ap_uint comb = comb_port[i * NumGates + NumGates / 2 + g]; - comb_stream1_recurrent[g].write(comb); - comb_stream2_recurrent[g].write(comb); + ap_uint nz_idx = nz_port[i * NumGates + NumGates / 2 + g]; + nz_stream1_rec[g].write(nz_idx); + nz_stream2_rec[g].write(nz_idx); } } } template -void ZeroTileCombinationDMA(const ap_uint *comb_port, - hls::stream > (¤t_comb_stream)[NumGates / 2], - hls::stream > (&recurrent_comb_stream)[NumGates / 2]) { +void NZIndexDMA(const ap_uint *nz_port, + hls::stream > (&cur_nz_stream)[NumGates / 2], + hls::stream > (&rec_nz_stream)[NumGates / 2]) { +#pragma HLS INLINE assert(NumGates % 2 == 0); assert(NumTiles % 2 == 0); - assert(NumTiles >= 8); - - ZeroTileCombination_Dma_Iter_Loop: + // assert(NumTiles >= 8); // Minimum port size requirement. + NZIndex_Dma_Iter_Loop: for (int i = 0; i < NumIter; ++i) { #pragma HLS PIPELINE II=1 - ZeroTileCombination_Dma_Current_Loop: + NZIndex_Dma_Current_Loop: for (int g = 0; g < NumGates / 2; ++g) { - current_comb_stream[g].write(comb_port[i * NumGates + g]); + cur_nz_stream[g].write(nz_port[i * NumGates + g]); } - ZeroTileCombination_Dma_Recurrent_Loop: + NZIndex_Dma_Recur_Loop: for (int g = 0; g < NumGates / 2; ++g) { - recurrent_comb_stream[g].write(comb_port[i * NumGates + NumGates / 2 + g]); + rec_nz_stream[g].write(nz_port[i * NumGates + NumGates / 2 + g]); } } } @@ -278,19 +283,22 @@ void InputDMA(const svd::ActivationD *x_dmem, // Store the input onto an on-chip buffer for data reuse. The buffer is shared // by the LSTM gates and their U-units (which contain T - ZT MAC units each). // =========================================================================== +#ifdef __VITIS_HLS__ +#pragma HLS INLINE +#endif #pragma HLS DATAFLOW - const int kNumElemsTile = VectLength / NumTiles; + const int kTileSize = VectLength / NumTiles; const int kNumPEs = NumTiles - NumZeroTiles; - svd::ActivationD x_buffer[NumTiles][kNumElemsTile]; + svd::ActivationD x_buffer[NumTiles][kTileSize]; #pragma HLS ARRAY_PARTITION variable=x_buffer complete dim=1 Write_Buffer: for (int i = 0; i < NumTiles; ++i) { - for (int j = 0; j < kNumElemsTile; ++j) { + for (int j = 0; j < kTileSize; ++j) { #pragma HLS PIPELINE II=1 - x_buffer[i][j] = x_dmem[i * kNumElemsTile + j]; + x_buffer[i][j] = x_dmem[i * kTileSize + j]; } } - hls::stream::value> > tile_idx_stream[NumGates][kNumPEs]; + hls::stream::value> > tile_idx_stream[NumGates][kNumPEs]; #pragma HLS ARRAY_PARTITION variable=tile_idx_stream complete dim=0 NZ_to_Idx: for (int i = 0; i < NumIter; ++i) { @@ -302,7 +310,7 @@ void InputDMA(const svd::ActivationD *x_dmem, if (j == 0) { nz_idx[k] = comb_stream[k].read(); } - int set_idx = PriorityEncoderLSB(nz_idx[k]); + int set_idx = hlsutils::PriorityEncoderLSB(nz_idx[k]); assert(set_idx < NumTiles); tile_idx_stream[k][j].write(set_idx); nz_idx[k][set_idx] = 0; @@ -311,9 +319,9 @@ void InputDMA(const svd::ActivationD *x_dmem, } Stream_Tiles: for (int i = 0; i < NumIter; ++i) { - for (int k = 0; k < kNumElemsTile; ++k) { + for (int k = 0; k < kTileSize; ++k) { #pragma HLS PIPELINE II=1 - ap_uint::value> tile_idx[NumGates][kNumPEs]; + ap_uint::value> tile_idx[NumGates][kNumPEs]; #pragma HLS ARRAY_PARTITION variable=tile_idx complete dim=0 for (int j = 0; j < kNumPEs; ++j) { for (int g = 0; g < NumGates; ++g) { @@ -331,7 +339,9 @@ template (&y)[InWidth / OutWidth]) { #pragma HLS INLINE +#ifndef __VITIS_HLS__ #pragma HLS ARRAY_PARTITION variable=y complete dim=1 +#endif const int kDivider = InWidth / OutWidth; const int kInputSize = output_size / kDivider; assert(InWidth % OutWidth == 0); @@ -380,26 +392,25 @@ void StreamSplitter(const int output_size, * * @param[in] use_nz_dim If true, there are #num_non_zero_tiles different * PEs (for the U-unit), else there are - * #num_elems_per_tile different PEs (for the V-unit). + * #tile_size different PEs (for the V-unit). * @param[in] gate_port The gate port * @param gate_stream The gate PEs stream * * @tparam NumIter Number of refinement steps. * @tparam num_non_zero_tiles Number of non pruned tiles. - * @tparam num_elems_per_tile Number of elements per tile. + * @tparam tile_size Number of elements per tile. */ template -void GateDMA(const bool use_nz_dim, const int num_iter, - const int num_non_zero_tiles, const int num_elems_per_tile, - const T *gate_port, hls::stream *gate_streams) { +void DispatchGateFromArray(const bool use_nz_dim, const int num_iter, + const int num_non_zero_tiles, const int tile_size, + const T* gate_port, hls::stream* gate_streams) { #pragma HLS INLINE -#pragma HLS FUNCTION_INSTANTIATE vriable=num_iter -#pragma HLS FUNCTION_INSTANTIATE vriable=num_non_zero_tiles -#pragma HLS FUNCTION_INSTANTIATE vriable=num_elems_per_tile +#pragma HLS FUNCTION_INSTANTIATE variable=num_iter +#pragma HLS FUNCTION_INSTANTIATE variable=num_non_zero_tiles +#pragma HLS FUNCTION_INSTANTIATE variable=tile_size const int kI = num_iter; const int kNZ = num_non_zero_tiles; - const int kE = num_elems_per_tile; - + const int kE = tile_size; I : for (int i = 0; i < kI; ++i) { Z : for (int z = 0; z < kNZ; ++z) { E : for (int e = 0; e < kE; ++e) { @@ -415,6 +426,29 @@ void GateDMA(const bool use_nz_dim, const int num_iter, } } +template +void DispatchGateFromStream(const bool use_nz_dim, const int num_refinements, + const int num_non_zero_tiles, const int tile_size, + hls::stream* gate_port, hls::stream gate_streams[NumGates][NumStreams]) { +#pragma HLS INLINE +#pragma HLS FUNCTION_INSTANTIATE variable=num_refinements +#pragma HLS FUNCTION_INSTANTIATE variable=num_non_zero_tiles + I : for (int i = 0; i < num_refinements; ++i) { + Z : for (int z = 0; z < num_non_zero_tiles; ++z) { + E : for (int e = 0; e < tile_size; ++e) { +#pragma HLS PIPELINE II=1 + for (int g = 0; g < NumGates; ++g) { + if (use_nz_dim) { + gate_streams[g][z].write(gate_port[g].read()); // for U weights + } else { + gate_streams[g][e].write(gate_port[g].read()); // for V weights + } + } + } + } + } +} + /** * @brief Dispatch input elements to several PEs. All PEs receive the same * amount of elements and in the same clock cycle. @@ -533,6 +567,40 @@ void PipelinedDispatcher(const int input_size, } } + +#ifdef __VITIS_HLS__ +template +void VectorizedInputDMA(const int R, + hls::stream& x_port, + hls::stream x_streams[params::N]) { + + typedef typename params::ActivationD ActivationType; + const int kNumTilesU = params::I / params::Tu; + svd::AxiStreamPort x_axis = svd::AxiStreamPort(x_port); + typename params::VectTuType x_buffer[params::N][kNumTilesU]; +#pragma HLS ARRAY_PARTITION variable=x_buffer complete dim=1 + + Store_X_Buffer: + for (int i = 0; i < params::N; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS LOOP_FLATTEN +#pragma HLS PIPELINE II=1 + // x_buffer[i][j] = x_axis.PopVector(); + } + } + Stream_X_Tiles: + for (int i = 0; i < R; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=params::R max=params::R + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 + for (int k = 0; k < params::N; ++k) { + x_streams[k] << x_buffer[k][j]; + } + } + } +} +#endif + } // end namespace svd #endif // end DMA_SVD_DMA_H_ \ No newline at end of file diff --git a/include/dma/width_converter.h b/include/dma/width_converter.h index 58ba6eb..2a65b73 100644 --- a/include/dma/width_converter.h +++ b/include/dma/width_converter.h @@ -2,6 +2,7 @@ #define DMA_WIDTH_CONVERTER_H_ #include "ap_int.h" +#include "assert.h" #include #include @@ -145,8 +146,6 @@ void Mem2MemDataWidthConverter(const int num_in_words, #pragma HLS PIPELINE II=1 const int kHi = ((i + 1) * InWidth) % OutWidth - 1; const int kLo = (i * InWidth) % OutWidth; - std::cout << "(" << kHi << ", " << kLo << ")\n"; - // if constexpr (std::is_same::value || std::is_same::value) { // elem_out(kHi, kLo) = in[i]; // } else { diff --git a/include/hls_utils/adder_tree.h b/include/hls_utils/adder_tree.h index 7b3c360..d6caf34 100644 --- a/include/hls_utils/adder_tree.h +++ b/include/hls_utils/adder_tree.h @@ -4,6 +4,12 @@ #include "hls_stream.h" #include "hls_utils/hls_metaprogramming.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +namespace hlsutils { + /** * @brief Given a static array, sum-reduce all its elements. * @@ -23,8 +29,8 @@ DataType adder_tree(DataType x[NumPE]) { // Determine the number of ranks for the adder tree and declare array: // - The adder_tree is larger than required as each rank only needs to be // half the size of the previous rank. - const unsigned kNumPEsLog2 = hls_utils::log2::value; - const unsigned kNumPEsSub1Log2 = hls_utils::log2::value; + const unsigned kNumPEsLog2 = hlsutils::log2::value; + const unsigned kNumPEsSub1Log2 = hlsutils::log2::value; const unsigned kNumRanks = kNumPEsLog2 != kNumPEsSub1Log2 ? kNumPEsLog2 : kNumPEsLog2 + 1; DataType adder_tree[kNumRanks][NumPE]; #pragma HLS ARRAY_PARTITION variable=adder_tree complete dim=0 @@ -96,8 +102,8 @@ DataType adder_tree(hls::stream x[NumPE]) { // Determine the number of ranks for the adder tree and declare array: // - The adder_tree is larger than required as each rank only needs to be // half the size of the previous rank. - const unsigned kNumPEsLog2 = hls_utils::log2::value; - const unsigned kNumPEsSub1Log2 = hls_utils::log2::value; + const unsigned kNumPEsLog2 = hlsutils::log2::value; + const unsigned kNumPEsSub1Log2 = hlsutils::log2::value; const unsigned kNumRanks = kNumPEsLog2 != kNumPEsSub1Log2 ? kNumPEsLog2 : kNumPEsLog2 + 1; DataType adder_tree[kNumRanks][NumPE]; #pragma HLS ARRAY_PARTITION variable=adder_tree complete dim=0 @@ -149,4 +155,68 @@ DataType adder_tree(hls::stream x[NumPE]) { return ret_val; } +#ifdef __VITIS_HLS__ +template +DataType adder_tree(hls::vector x) { +#pragma HLS ARRAY_PARTITION variable=x complete // to force II=1 +#pragma HLS PIPELINE II=1 + // Determine the number of ranks for the adder tree and declare array: + // - The adder_tree is larger than required as each rank only needs to be + // half the size of the previous rank. + const unsigned kNumPEsLog2 = hlsutils::log2::value; + const unsigned kNumPEsSub1Log2 = hlsutils::log2::value; + const unsigned kNumRanks = kNumPEsLog2 != kNumPEsSub1Log2 ? kNumPEsLog2 : kNumPEsLog2 + 1; + DataType adder_tree[kNumRanks][NumPE]; +#pragma HLS ARRAY_PARTITION variable=adder_tree complete dim=0 + + unsigned rank_size = NumPE; + DataType ret_val = 0; + + add_level_loop: + for(int adder_tree_rank = kNumRanks - 1; adder_tree_rank >= 0; --adder_tree_rank) { + const bool kLoopInit = adder_tree_rank == kNumRanks - 1 ? true : false; + const bool kLoopEpilog = adder_tree_rank == 0 ? true : false; + + if (kLoopInit) { + rank_size = NumPE; + } + + const bool prev_rank_is_odd = rank_size % 2 == 0 ? false : true; + rank_size = (rank_size + 1) / 2; + + add_col_loop: + for(int jj = 0; jj < (NumPE + 1) / 2; ++jj) { + if (jj < rank_size) { + if (prev_rank_is_odd && jj == rank_size - 1) { + // Bypass, no adder required. + if (kLoopInit) { + adder_tree[adder_tree_rank][jj] = x[jj * 2]; + // adder_tree[adder_tree_rank][jj] = x[jj * 2]; + } else { + adder_tree[adder_tree_rank][jj] = adder_tree[adder_tree_rank + 1][jj * 2]; + } + } else { + if (kLoopInit) { + auto y_acc = x[jj * 2] + x[jj * 2 + 1]; + // auto y_acc = x[jj * 2] + x[jj * 2 + 1]; +#pragma HLS RESOURCE variable=y_acc core=AddSub_DSP + adder_tree[adder_tree_rank][jj] = y_acc; + } else{ + auto y_acc = adder_tree[adder_tree_rank + 1][jj * 2] + adder_tree[adder_tree_rank + 1][jj * 2 + 1]; +#pragma HLS RESOURCE variable=y_acc core=AddSub_DSP + adder_tree[adder_tree_rank][jj] = y_acc; + } + } + } + } + if (kLoopEpilog) { + ret_val = adder_tree[0][0]; + } + } + return ret_val; +} +#endif + +} // hlsutils + #endif // end HLS_UTILS_ADDER_TREE_H_ \ No newline at end of file diff --git a/include/hls_utils/dot_prod_dsp.h b/include/hls_utils/dot_prod_dsp.h index 322c43b..79319e0 100644 --- a/include/hls_utils/dot_prod_dsp.h +++ b/include/hls_utils/dot_prod_dsp.h @@ -42,6 +42,8 @@ #include "ap_int.h" #include "assert.h" +namespace hlsutils { + /** * @brief Implements p0 += y_dsp * w_dsp + y_lut * w_lut; p1 += x_dsp * * w_dsp + x_lut * w_lut; @@ -149,4 +151,6 @@ void dot_prod_dsp_lut_generic(const T x_dsp, const T y_dsp, const T w_dsp, p1 += p1_tmp; } +} // hlsutils + #endif // end HLS_UTILS_DOT_PROD_DSP_H_ \ No newline at end of file diff --git a/include/hls_utils/hls_debugging.h b/include/hls_utils/hls_debugging.h index af9eaef..213f82b 100644 --- a/include/hls_utils/hls_debugging.h +++ b/include/hls_utils/hls_debugging.h @@ -2,6 +2,9 @@ #define HLS_UTILS_HLS_DEBUGGING #include "hls_utils/hw_timer.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif #include #include @@ -10,12 +13,12 @@ #define HLS_DEBUG_LEVEL 0 #endif -namespace hls_utils { +namespace hlsutils { static int hls_debug_level = HLS_DEBUG_LEVEL; -void Log(const int verbose_level, const std::string &str) { - std::cout << str << std::endl; +template +void Log(const int verbose_level, const T* str) { #ifndef __SYNTHESIS__ if (verbose_level < hls_debug_level) { std::cout << str << std::endl; @@ -23,7 +26,16 @@ void Log(const int verbose_level, const std::string &str) { #endif } +#ifdef __VITIS_HLS__ +template +void PrintVector(hls::vector &x) { + for (int i = 0; i < N; ++i) { + std::cout << x[i] << " "; + } + std::cout << std::endl; +} +#endif -} // hls_utils +} // hlsutils #endif // HLS_UTILS_HLS_DEBUGGING \ No newline at end of file diff --git a/include/hls_utils/hls_metaprogramming.h b/include/hls_utils/hls_metaprogramming.h index 252e1d9..156299d 100644 --- a/include/hls_utils/hls_metaprogramming.h +++ b/include/hls_utils/hls_metaprogramming.h @@ -1,11 +1,11 @@ #ifndef HLS_UTILS_HLS_METAPROGRAMMING_H_ #define HLS_UTILS_HLS_METAPROGRAMMING_H_ -namespace hls_utils { +namespace hlsutils { template struct log2 { - enum {value = 1 + hls_utils::log2::value}; + enum {value = 1 + hlsutils::log2::value}; }; template <> @@ -26,13 +26,13 @@ class GCDbase; template class GCD { public: - static const int value = hls_utils::GCDbase::value; + static const int value = hlsutils::GCDbase::value; }; template class GCDbase { public: - static const int value = hls_utils::GCDbase::value; + static const int value = hlsutils::GCDbase::value; }; template @@ -46,6 +46,46 @@ struct Bitwidth { static const int value = T::width; }; +template<> +struct Bitwidth { + static const int value = 8; +}; + +template<> +struct Bitwidth { + static const int value = 32; +}; + +template<> +struct Bitwidth { + static const int value = 16; +}; + +template<> +struct Bitwidth { + static const int value = 64; +}; + +template<> +struct Bitwidth { + static const int value = 8; +}; + +template<> +struct Bitwidth { + static const int value = 32; +}; + +template<> +struct Bitwidth { + static const int value = 16; +}; + +template<> +struct Bitwidth { + static const int value = 64; +}; + template<> struct Bitwidth { static const int value = 32; @@ -56,9 +96,16 @@ struct Bitwidth { static const int value = 64; }; -template +#ifdef __VITIS_HLS__ +template +struct VectBitwidth { + static const int value = N * Bitwidth::value; +}; +#endif + +template struct PrunedSize { - static const int value = N * X / T * (T - ZT); + static const int value = R * X / T * (T - ZT); }; @@ -71,6 +118,11 @@ struct is_pow2 { static const bool value = (N & (N - 1)) == 0; }; +template +struct round_up_div { + static const unsigned int value = (X + Y - 1) / Y; +}; + } // end namespace hls #endif // end HLS_UTILS_HLS_METAPROGRAMMING_H_ \ No newline at end of file diff --git a/include/hls_utils/hw_timer.h b/include/hls_utils/hw_timer.h index 94f11b9..58b1196 100644 --- a/include/hls_utils/hw_timer.h +++ b/include/hls_utils/hw_timer.h @@ -3,6 +3,8 @@ #include "hls_stream.h" +namespace hlsutils { + /** * @brief Synthesizeable HLS clock counter. Note: this module must be * placed in a DATAFLOW region and at the bottom of it (to avoid @@ -354,4 +356,9 @@ const int NUM_HW_TIMERS = 1; */ static unsigned long long hw_timers[NUM_HW_TIMERS]; +typedef long long CounterD; +typedef hls::stream ProbeStream; + +} // hlsutils + #endif // end HLS_UTILS_HW_TIMER_H_ \ No newline at end of file diff --git a/include/hls_utils/priority_encoder.h b/include/hls_utils/priority_encoder.h index 7805f7c..5a07fd1 100644 --- a/include/hls_utils/priority_encoder.h +++ b/include/hls_utils/priority_encoder.h @@ -3,7 +3,9 @@ #include "hls_utils/hls_metaprogramming.h" -namespace svd { +#include "assert.h" + +namespace hlsutils { /** * @brief Priority Encoder: returns the MSB set bit. @@ -64,7 +66,7 @@ int PriorityEncoderLSB(const T a) { } template -void PriorityEncoder(const int num_zero_tiles, const ap_uint a, hls::stream::value> > &idx_stream) { +void PriorityEncoder(const int num_zero_tiles, const ap_uint a, hls::stream::value> > &idx_stream) { ap_uint tmp = a; for (int i = 0; i < NumTiles - num_zero_tiles; ++i) { #pragma HLS PIPELINE II=1 @@ -76,7 +78,7 @@ void PriorityEncoder(const int num_zero_tiles, const ap_uint a, hls::s } template -void PriorityEncoder(const int num_zero_tiles, const T a, hls::stream::value> > &idx_stream) { +void PriorityEncoder(const int num_zero_tiles, const T a, hls::stream::value> > &idx_stream) { T tmp = a; for (int i = 0; i < T::width - num_zero_tiles; ++i) { #pragma HLS PIPELINE II=1 @@ -87,6 +89,6 @@ void PriorityEncoder(const int num_zero_tiles, const T a, hls::stream +void GemvKernel(const int num_rows, const int num_cols, + hls::stream > x_streams[N], + hls::stream > w_streams[N], + hls::stream y_streams[N]) { + assert(num_rows % T == 0); + const int kNumTiles = num_rows / T; + for (int i = 0; i < num_cols; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testgemv::R max=testgemv::R + hls::vector tmp[N] = {hls::vector(0)}; +#pragma HLS ARRAY_PARTITION variable=tmp complete + for (int j = 0; j < kNumTiles; ++j) { +#pragma HLS LOOP_TRIPCOUNT min=testgemv::I/T max=testgemv::I/T +#pragma HLS PIPELINE II=1 + for (int k = 0; k < N; ++k) { + tmp[k] += x_streams[k].read() * w_streams[k].read(); + if (j == kNumTiles - 1) { + y_streams[k] << tmp[k].reduce_add(); + } + } + } + } +} +#endif + +} // svd + +void HlsGemvKernel(const int num_rows, const int num_cols, + hls::stream >& x1_port, + hls::stream >& x2_port, + hls::stream >& w1_port, + hls::stream >& w2_port, + hls::stream& y1_port, + hls::stream& y2_port); + +#endif // end KERNEL_GEMV_KERNEL_H_ \ No newline at end of file diff --git a/include/kernel/s_kernel.h b/include/kernel/s_kernel.h index 9b7e7b8..dc105fc 100644 --- a/include/kernel/s_kernel.h +++ b/include/kernel/s_kernel.h @@ -3,15 +3,21 @@ #include "svd_params.h" #include "hls_utils/adder_tree.h" +#include "dma/axis_lib.h" +#include "hls_utils/hls_metaprogramming.h" + +#include "hls_stream.h" + +namespace svd { template -void KernelS(svd::SvdStreams &streams) { +void KernelS(const int num_refinements, svd::SvdStreams &streams) { typedef typename params::AccumulationD accum_t; - for (int i = 0; i < params::R; ++i) { + for (int i = 0; i < num_refinements; ++i) { #pragma HLS PIPELINE II=1 for (int j = 0; j < params::N; ++j) { for (int k = 0; k < params::G; ++k) { - auto sum = adder_tree(streams.xu[j][k]); + auto sum = hlsutils::adder_tree(streams.xu[j][k]); auto xs = sum * streams.s[j][k].read(); for (int ii = 0; ii < params::PeV; ++ii) { streams.xus[j][k][ii].write(xs); @@ -21,4 +27,98 @@ void KernelS(svd::SvdStreams &streams) { } } +template > +struct KernelS_Params { + static const int N = Ni; + static const int G = Gi; + static const int ActivationWidth = hlsutils::Bitwidth::value; + static const int VectG_AxiWidth = ActivationWidth * G; + typedef ActivationD_tp ActivationD; + typedef typename svd::AxiStreamPort::AxiuPacketType VectG_AxiPacketType; +#ifdef __VITIS_HLS__ + typedef hls::vector VectG_Type; +#endif +}; + +#ifndef __VITIS_HLS__ +#else +template < + typename params, + typename PortWrapper = svd::AxiStreamPort +> +void KernelS(const int num_active_inputs, + const int num_refinements[params::N], + hls::stream& xu_port, + hls::stream& s_port, + hls::stream& xus_port) { +#pragma HLS TOP name=KernelS +#pragma HLS DATAFLOW +#pragma HLS INLINE +#ifndef __VITIS_HLS__ +#pragma HLS STABLE variable=xu_port +#pragma HLS STABLE variable=s_port +#pragma HLS STABLE variable=xus_port +#endif + assert(num_active_inputs <= params::N); + assert(num_active_inputs > 0); + int R_max = num_refinements[0]; + int R_total = num_refinements[0] * num_active_inputs; // Total elements. + Get_Total_R: + for (int i = 1; i < num_active_inputs; ++i) { +#pragma HLS PIPELINE II=1 + if (num_refinements[i] > R_max) { + R_max = num_refinements[i]; + } + assert(num_refinements[i] >= num_refinements[i - 1]); + R_total += (num_refinements[i] - num_refinements[i - 1]) * (num_active_inputs - i); + } + auto xu_axis = svd::AxiStreamInterface(xu_port); + auto s_axis = svd::AxiStreamPort(s_port); + auto xus_axis = svd::AxiStreamInterface(xus_port); + S_Kernel: + for (int i = 0; i < R_total; ++i) { +#pragma HLS PIPELINE II=1 style=frp + typedef typename params::ActivationD ActivationType; + auto xu_val = xu_axis.template PopVector(); + auto s_val = s_axis.template PopVector(); + auto xus_val = xu_val * s_val; + const bool kIsLast = i == R_total - 1; + xus_axis.template PushVector(xus_val, kIsLast); + } +} +#endif // __VITIS_HLS__ + +} // svd + +namespace tests { + +static const int kNumInputs = 2; +static const int kInputSize = 512; +static const int Tu = 4; +// NOTE: The rest of the parameters are unused for now. +static const int kDummySize = 1; +static const int R = 8; +static const int Tv = 1; +static const int ZTu = 0; +static const int ZTv = 0; +static const int G = 4; + +typedef svd::SvdParameters params; + short, short, short> params; + +} // tests + +#ifndef __VITIS_HLS__ +#else +void HlsKernelS( + const int num_refinements[tests::params::N], + // const hls::vector num_refinements, + hls::stream& xu_port, + hls::stream& s_port, + hls::stream& xus_port); +#endif + #endif // end KERNEL_S_KERNEL_H_ \ No newline at end of file diff --git a/include/kernel/svd_kernel.h b/include/kernel/svd_kernel.h index 7d5e24d..f782fae 100644 --- a/include/kernel/svd_kernel.h +++ b/include/kernel/svd_kernel.h @@ -3,19 +3,216 @@ #include "svd_params.h" #include "dma/svd_dma.h" +#include "dma/axis_lib.h" #include "kernel/u_kernel.h" #include "kernel/s_kernel.h" #include "kernel/v_kernel.h" +namespace svd { + template inline void SvdKernel(svd::SvdStreams &streams) { #pragma HLS INLINE #ifndef __VITIS_HLS__ #pragma HLS DATAFLOW #endif - KernelU(streams); - KernelS(streams); - KernelV(streams); + svd::KernelU(params::R, streams); + svd::KernelS(params::R, streams); + svd::KernelV(params::R, streams); +} + +template < + typename params, + typename WrapperAxisGTv = svd::AxiStreamPort +> +void SvdKernel(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[params::N], + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& y_port) { +#pragma HLS TOP name=SvdKernel +#pragma HLS INLINE +#pragma HLS DATAFLOW +#ifndef __VITIS_HLS__ +#pragma HLS STABLE variable=x_port +#pragma HLS STABLE variable=u_port +#pragma HLS STABLE variable=s_port +#pragma HLS STABLE variable=v_port +#pragma HLS STABLE variable=y_port +#endif +#pragma HLS ARRAY_PARTITION variable=num_refinements complete + const bool pad_output = false; + typedef svd::AxiStreamFifo WrapperFifoG; + hls::stream xu_port("xu_port"); + hls::stream xus_port("xus_port"); +#pragma HLS STREAM variable=xu_port depth=2 +#pragma HLS STREAM variable=xus_port depth=2 + int num_refinements_u[params::N]; + int num_refinements_s[params::N]; + int num_refinements_v[params::N]; +#pragma HLS ARRAY_PARTITION variable=num_refinements_u complete +#pragma HLS ARRAY_PARTITION variable=num_refinements_s complete +#pragma HLS ARRAY_PARTITION variable=num_refinements_v complete + Duplicate_R_Stream: + for (int i = 0; i < params::N; ++i) { +#pragma HLS UNROLL + num_refinements_u[i] = num_refinements[i]; + num_refinements_s[i] = num_refinements[i]; + num_refinements_v[i] = num_refinements[i]; + } + svd::KernelU(num_active_inputs, input_size, + num_refinements_u, pad_output, x_port, u_port, xu_port); + svd::KernelS(num_active_inputs, num_refinements_s, + xu_port, s_port, xus_port); + svd::KernelV(num_active_inputs, + output_size, num_refinements_v, xus_port, v_port, y_port); } +/** + * @brief Sets the SVD kernel inputs, i.e. streams from arrays into + * hls::streams. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] output_size The output size + * @param[in] num_refinements The number of refinements + * @param[in] x The input array. Shape: (N, I) + * @param[in] u The u array. Shape: (R, I, G) + * @param[in] s The s array. Shape: (R, N, G) + * @param[in] v The v array. Shape: (R, H, G) + * @param x_port The x port to be used as argument to SvdKernel + * @param u_port The u port to be used as argument to SvdKernel + * @param s_port The s port to be used as argument to SvdKernel + * @param v_port The v port to be used as argument to SvdKernel + * + * @tparam params Collection of SVD configuration params. + */ +#ifdef __VITIS_HLS__ +template +void SetSvdKernelInputs(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[params::N], + const typename params::ActivationD* x, + const typename params::ActivationD* u, + const typename params::ActivationD* s, + const typename params::ActivationD* v, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port) { + typedef typename params::ActivationD ActivationType; + const int kG = params::G; + const int kTu = params::Tu; + const int kTv = params::Tv; + const int kGTv = kG * kTv; + const int kNumTilesU = input_size / kTu; + const int kNumTilesV = output_size / kTv; + auto x_axis = svd::AxiStreamPort(x_port); + auto u_axis = svd::AxiStreamPort(u_port); + auto s_axis = svd::AxiStreamPort(s_port); + auto v_axis = svd::AxiStreamPort(v_port); + int max_R = num_refinements[0]; + typename params::VectTuType x_val; + typename params::VectTuType u_val; + typename params::VectG_Type s_val; + typename params::VectTvType v_val; + for (int i = i; i < params::N; ++i) { + if (num_refinements[i] > max_R) { + max_R = num_refinements[i]; + } + } + for (int j = 0; j < kNumTilesU; ++j) { + for (int i = 0; i < num_active_inputs; ++i) { + for (int k = 0; k < kTu; ++k) { + x_val[k] = x[i * input_size + j * kTu + k]; + } + x_axis.template PushVector(x_val); + } + } + for (int i = 0; i < max_R; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < kG; ++k) { + for (int ii = 0; ii < kTu; ++ii) { + u_val[ii] = u[i * kNumTilesU * kTu * kG + (j * kTu + ii) * kG + k]; + } + u_axis.template PushVector(u_val); + } + } + } + for (int i = 0; i < max_R; ++i) { + for (int j = 0; j < num_active_inputs; ++j) { + if (i < num_refinements[j]) { + for (int k = 0; k < kG; ++k) { + s_val[k] = s[i * num_active_inputs * kG + j * kG + k]; + } + s_axis.template PushVector(s_val); + } + } + } + for (int i = 0; i < max_R; ++i) { + for (int j = 0; j < kNumTilesV; ++j) { + for (int k = 0; k < kG; ++k) { + for (int ii = 0; ii < kTv; ++ii) { + v_val[ii] = v[i * kNumTilesV * kTv * kG + (j * kTv + ii) * kG + k]; + } + v_axis.template PushVector(v_val); + } + } + } +} +#endif // __VITIS_HLS__ + +/** + * @brief Gets the svd kernel outputs, i.e. fills in an array from + * hls::streams. + * + * @param[in] num_active_inputs The number active inputs + * @param[in] output_size The output size (H) + * @param y_port The y port to be used as argument to SvdKernel + * @param y The output array. Shape: (N, G, H) + * + * @tparam params Collection of SVD configuration params. + */ +#ifdef __VITIS_HLS__ +template +void GetSvdKernelOutputs(const int num_active_inputs, const int output_size, + hls::stream& y_port, + typename params::ActivationD* y) { + typedef typename params::ActivationD ActivationType; + const int kG = params::G; + const int kTv = params::Tv; + const int kGTv = kG * kTv; + const int kNumTilesV = output_size / kTv; + auto y_axis = svd::AxiStreamPort(y_port); + for (int j = 0; j < kNumTilesV; ++j) { + for (int i = 0; i < num_active_inputs; ++i) { + auto y_val = y_axis.template PopVector(); + for (int k = 0; k < kTv; ++k) { + for (int ii = 0; ii < kG; ++ii) { + int y_idx = i * output_size * kG + ii * output_size + j * kTv + k; + y[y_idx] = y_val[k * kG + ii]; + } + } + } + } +} +#endif // __VITIS_HLS__ + +} // svd + +void HlsSvdKernel(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::svd_params::N], + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& y_port); + #endif // end KERNEL_SVD_KERNEL_H_ \ No newline at end of file diff --git a/include/kernel/u_kernel.h b/include/kernel/u_kernel.h index 71b566d..10f971b 100644 --- a/include/kernel/u_kernel.h +++ b/include/kernel/u_kernel.h @@ -3,24 +3,35 @@ #include "svd_params.h" #include "hls_utils/hls_metaprogramming.h" +#include "hls_utils/hls_debugging.h" +#include "hls_utils/adder_tree.h" +#include "dma/axis_lib.h" +#include "ap_axi_sdata.h" #include "hls_stream.h" +#include "assert.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +namespace svd { /** * @brief Kernel performing x @ U. * - * @param[in] size The size, ideally: params::R * params::PrunedSizeU / params::R / params::PeU * @param streams The streams group + * @param[in] size The size, ideally: params::R * params::PrunedSizeU / params::R + * / params::PeU * * @tparam params The algorithm characteristics */ template -void KernelU(svd::SvdStreams &streams) { +void KernelU(const int num_refinements, svd::SvdStreams &streams) { typename params::AccumulationD xu[params::N][params::G][params::PeU]; #pragma HLS ARRAY_PARTITION variable=xu complete dim=0 - for (int i = 0; i < params::R; ++i) { - for (int j = 0; j < params::PrunedSizeU / params::R / params::PeU; ++j) { -#pragma HLS PIPELINE II=1 + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < params::PrunedSizeU / params::PeU; ++j) { +#pragma HLS PIPELINE II=1 style=frp for (int k = 0; k < params::PeU; ++k) { for (int g = 0; g < params::G; ++g) { auto u = streams.u[g][k].read(); @@ -30,7 +41,7 @@ void KernelU(svd::SvdStreams &streams) { } xu[ii][g][k] += u * streams.x[ii][g][k].read(); #pragma HLS RESOURCE variable=xu[ii][g][k] core=DSP48 latency=3 - if (j == params::PrunedSizeU / params::R / params::PeU - 1) { + if (j == params::PrunedSizeU / params::PeU - 1) { streams.xu[ii][g][k].write(xu[ii][g][k]); } } @@ -40,8 +51,6 @@ void KernelU(svd::SvdStreams &streams) { } } -namespace svd { - /** * @brief Performs MAC operation. The weight values are read directly from * a port (which can be modeled as a FIFO). @@ -57,170 +66,47 @@ namespace svd { * @tparam NumIter The number of refinement steps * @tparam NumTimesteps The number of LSTM timesteps */ -template +template void UDotUnit2LstmPe(const int vect_length, const int num_tiles, - const int num_iter, const int num_timesteps, - hls::stream &x1_stream, - hls::stream &x2_stream, - hls::stream &gate_u_stream, - hls::stream &acc1_stream, - hls::stream &acc2_stream) { + const int num_iter, + hls::stream x1_stream[NumGates][NumPEs], + hls::stream x2_stream[NumGates][NumPEs], + hls::stream gate_u_stream[NumGates][NumPEs], + hls::stream acc1_stream[NumGates][NumPEs], + hls::stream acc2_stream[NumGates][NumPEs]) { #pragma HLS INLINE off #pragma HLS FUNCTION_INSTANTIATE variable=vect_length #pragma HLS FUNCTION_INSTANTIATE variable=num_iter #pragma HLS FUNCTION_INSTANTIATE variable=num_tiles -#pragma HLS FUNCTION_INSTANTIATE variable=num_timesteps -// #pragma HLS INTERFACE ap_ctrl_none port=return assert(vect_length % num_tiles == 0); - - const int kNumElemsPerTile = vect_length / num_tiles; - AccumType y1_mac = 0; - AccumType y2_mac = 0; - - ReduceProd_PE_IterTimesteps_Loop: - for (int i = 0; i < num_iter * num_timesteps; ++i) { - ReduceProd_PE_Loop: - for (int j = 0; j < kNumElemsPerTile; ++j) { -#pragma HLS PIPELINE II=1 - if (j == 0) { - y1_mac = 0; - y2_mac = 0; - } - auto u_val = gate_u_stream.read(); - auto mac1 = u_val * x1_stream.read(); - auto mac2 = u_val * x2_stream.read(); - mac1 += y1_mac; - mac2 += y2_mac; -#pragma HLS RESOURCE variable=mac1 core=DSP48 -#pragma HLS RESOURCE variable=mac2 core=DSP48 - y1_mac = mac1; - y2_mac = mac2; - if (j == kNumElemsPerTile - 1) { - acc1_stream.write(y1_mac); - acc2_stream.write(y2_mac); - } - } - } -} - -/** - * @brief Accumulate partial results from ReduceProd PEs. - * - * @param acc1_streams The acc 1 streams, each from a PE - * @param acc2_streams The acc 2 streams, each from a PE - * @param y1_stream The single y_1 stream - * @param y2_stream The single y_2 stream - * - * @tparam VectLength The input vector dimension - * @tparam NumTiles The number of used tiles (to determine the - * number of PEs) - * @tparam NumZeroTiles The number of pruned tiles (to determine the - * number of PEs) - * @tparam NumIter The number of refinement steps (to make the - * pipeline longer) - * @tparam NumTimesteps The number of LSTM timesteps (to make the - * pipeline longer) - * @tparam AdderTreeDesign Enable or disable AdderTree design. Default is - * active, i.e. true. - */ -template -void UDotUnit2LstmAccumulator(svd::AccumStream (&acc1_streams)[NumTiles-NumZeroTiles], - svd::AccumStream (&acc2_streams)[NumTiles-NumZeroTiles], - svd::ActivationStream &y1_stream, - svd::ActivationStream &y2_stream) { -#pragma HLS INLINE off -// #pragma HLS INTERFACE ap_ctrl_none port=return - const int kNumPEs = NumTiles - NumZeroTiles; - - if (AdderTreeDesign) { - // Determine the number of ranks for the adder tree and declare array - // - The adder_tree is larger than required as each rank only needs to be half the size of the previous rank - const unsigned kNumPEsLog2 = hls_utils::log2::value; - const unsigned kNumPEsSub1Log2 = hls_utils::log2::value; - const unsigned kNumRanks = kNumPEsLog2 != kNumPEsSub1Log2 ? kNumPEsLog2 : kNumPEsLog2 + 1; - svd::AccumD adder_tree1[kNumRanks][kNumPEs]; - svd::AccumD adder_tree2[kNumRanks][kNumPEs]; - - unsigned rank_size = kNumPEs; - - for (int i = 0; i < NumIter * NumTimesteps; ++i) { -#pragma HLS PIPELINE II=1 - add_level_loop: - for(int adder_tree_rank = kNumRanks - 1; adder_tree_rank >= 0; --adder_tree_rank) { - const bool kLoopInit = adder_tree_rank == kNumRanks - 1 ? true : false; - const bool kLoopEpilog = adder_tree_rank == 0 ? true : false; - - if (kLoopInit) { - rank_size = kNumPEs; - } - - const bool prev_rank_is_odd = rank_size % 2 == 0 ? false : true; - rank_size = (rank_size + 1) / 2; - // std::cout << "[" << adder_tree_rank << "] rank_size: " << rank_size << "\n"; - - add_col_loop: - for(int jj = 0; jj < (kNumPEs + 1) / 2; ++jj) { - if (jj < rank_size) { - if (prev_rank_is_odd && jj == rank_size - 1) { - // Bypass, no adder required. - if (kLoopInit) { - adder_tree1[adder_tree_rank][jj] = acc1_streams[jj * 2].read(); - adder_tree2[adder_tree_rank][jj] = acc2_streams[jj * 2].read(); - // std::cout << "\t\tstream[" << adder_tree_rank << "][" << jj * 2 << "] = [" << jj << "]\n"; - } else { - adder_tree1[adder_tree_rank][jj] = adder_tree1[adder_tree_rank + 1][jj * 2]; - adder_tree2[adder_tree_rank][jj] = adder_tree2[adder_tree_rank + 1][jj * 2]; - // std::cout << "\t\tbuffer[" << adder_tree_rank << "][" << jj * 2 << "] = [" << adder_tree_rank + 1 << "][" << jj << "]\n"; - } - } else { - if (kLoopInit) { - auto y1_acc = acc1_streams[jj * 2].read() + acc1_streams[jj * 2 + 1].read(); - auto y2_acc = acc2_streams[jj * 2].read() + acc2_streams[jj * 2 + 1].read(); -#pragma HLS RESOURCE variable=y1_acc core=AddSub_DSP -#pragma HLS RESOURCE variable=y2_acc core=AddSub_DSP - adder_tree1[adder_tree_rank][jj] = y1_acc; - adder_tree2[adder_tree_rank][jj] = y2_acc; - // std::cout << "\tstreams[" << adder_tree_rank << "][" << jj << "] = [" << jj * 2 << "] + [" << jj * 2 + 1 << "]\n"; - } else{ - auto y1_acc = adder_tree1[adder_tree_rank + 1][jj * 2] + adder_tree1[adder_tree_rank + 1][jj * 2 + 1]; - auto y2_acc = adder_tree2[adder_tree_rank + 1][jj * 2] + adder_tree2[adder_tree_rank + 1][jj * 2 + 1]; -#pragma HLS RESOURCE variable=y1_acc core=AddSub_DSP -#pragma HLS RESOURCE variable=y2_acc core=AddSub_DSP - adder_tree1[adder_tree_rank][jj] = y1_acc; - adder_tree2[adder_tree_rank][jj] = y2_acc; - // std::cout << "\tbuffer[" << adder_tree_rank << "][" << jj << "] = [" << adder_tree_rank + 1 << "][" << jj * 2 << "] + [" << adder_tree_rank + 1 << "][" << jj * 2 + 1 << "]\n"; - } + const int kNumInputs = 2; + const int kTileSize = vect_length / num_tiles; + AccumType y_mac[NumGates][NumPEs][kNumInputs] = {0}; + AccumType x_val[NumGates][NumPEs][kNumInputs] = {0}; +#pragma HLS ARRAY_PARTITION variable=y_mac complete dim=0 +#pragma HLS ARRAY_PARTITION variable=x_val complete dim=0 + U_Unit_PE: + for (int i = 0; i < num_iter; ++i) { + for (int j = 0; j < kTileSize; ++j) { +#pragma HLS PIPELINE II=1 style=frp + for (int k = 0; k < NumGates; ++k) { + for (int ii = 0; ii < NumPEs; ++ii) { + auto u_val = gate_u_stream[k][ii].read(); + x_val[k][ii][0] = x1_stream[k][ii].read(); + x_val[k][ii][1] = x2_stream[k][ii].read(); + for (int jj = 0; jj < kNumInputs; ++jj) { + if (j == 0) { + y_mac[k][ii][jj] = 0; } + auto mac = u_val * x_val[k][ii][jj]; + mac += y_mac[k][ii][jj]; +#pragma HLS RESOURCE variable=mac core=DSP48 + y_mac[k][ii][jj] = mac; + } + if (j == kTileSize - 1) { + acc1_stream[k][ii].write(y_mac[k][ii][0]); + acc2_stream[k][ii].write(y_mac[k][ii][1]); } - } - if (kLoopEpilog) { - y1_stream.write(adder_tree1[0][0]); - y2_stream.write(adder_tree2[0][0]); - // std::cout << "\n"; - } - } - } - } else { - svd::AccumD y1_acc = 0; - svd::AccumD y2_acc = 0; - for (int i = 0; i < NumIter * NumTimesteps; ++i) { - AdderTree_PE_Loop: - for (int j = 0; j < kNumPEs; ++j) { -#pragma HLS PIPELINE II=1 - if (j == 0) { - y1_acc = 0; - y2_acc = 0; - } - auto acc1 = y1_acc + acc1_streams[j].read(); - auto acc2 = y2_acc + acc2_streams[j].read(); -#pragma HLS RESOURCE variable=acc1 core=AddSub_DSP -#pragma HLS RESOURCE variable=acc2 core=AddSub_DSP - y1_acc = acc1; - y2_acc = acc2; - if (j == kNumPEs - 1) { - y1_stream.write(y1_acc); - y2_stream.write(y2_acc); } } } @@ -246,12 +132,12 @@ void UDotUnit2LstmAccumulator(svd::AccumStream (&acc1_streams)[NumTiles-NumZeroT * @tparam NumZeroTiles The number of zeroed, i.e. pruned, tiles */ template -void UDotUnit2Lstm(svd::ActivationStream (&x1_streams)[NumTiles-NumZeroTiles], - svd::ActivationStream (&x2_streams)[NumTiles-NumZeroTiles], - WeightStream (&gate_u_streams)[NumTiles-NumZeroTiles], - svd::ActivationStream &y1, - svd::ActivationStream &y2) { + int NumGates> +void UDotUnit2Lstm(svd::ActivationStream x1_streams[NumGates][NumTiles-NumZeroTiles], + svd::ActivationStream x2_streams[NumGates][NumTiles-NumZeroTiles], + WeightStream gate_u_streams[NumGates][NumTiles-NumZeroTiles], + svd::ActivationStream y1[NumGates], + svd::ActivationStream y2[NumGates]) { assert(VectLength % NumTiles == 0); assert(NumZeroTiles < NumTiles); assert(NumTiles >= 8); @@ -268,8 +154,8 @@ void UDotUnit2Lstm(svd::ActivationStream (&x1_streams)[NumTiles-NumZeroTiles], // =========================================================================== const int kNumNonZeroTiles = NumTiles - NumZeroTiles; const int kNumPEs = kNumNonZeroTiles; - const int kNumElemsPerTile = VectLength / NumTiles; - const int kStreamDepth = NumIter * kNumElemsPerTile; + const int kTileSize = VectLength / NumTiles; + const int kStreamDepth = NumIter * kTileSize; svd::AccumD y1_mul[kNumPEs]; svd::AccumD y2_mul[kNumPEs]; #pragma HLS ARRAY_PARTITION variable=y1_mul complete dim=1 @@ -294,8 +180,8 @@ void UDotUnit2Lstm(svd::ActivationStream (&x1_streams)[NumTiles-NumZeroTiles], y1_mul[i] = 0; y2_mul[i] = 0; ReduceProd_Tile_Loop: - for (int j = 0; j < kNumElemsPerTile / 2; ++j) { -#pragma HLS PIPELINE II=1 + for (int j = 0; j < kTileSize / 2; ++j) { +#pragma HLS PIPELINE II=1 style=frp // auto p0_tmp = y_dsp * w_dsp + y_lut * w_lut; // auto p1_tmp = x_dsp * w_dsp + x_lut * w_lut; // p0 += p0_tmp; @@ -332,7 +218,7 @@ void UDotUnit2Lstm(svd::ActivationStream (&x1_streams)[NumTiles-NumZeroTiles], ReduceProd_Accumulation_Loop: for (int i = 0; i < NumIter * NumTimesteps; ++i) { for (int j = 0; j < kNumPEs; ++j) { -#pragma HLS PIPELINE II=1 +#pragma HLS PIPELINE II=1 style=frp y1_acc += y1_mul[j]; y2_acc += y2_mul[j]; } @@ -343,40 +229,286 @@ void UDotUnit2Lstm(svd::ActivationStream (&x1_streams)[NumTiles-NumZeroTiles], // ============================================================================= // Implements #mac_PEs = NumTiles - NumZeroTiles & #Adder_Tree = 1 // ============================================================================= -// #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS DATAFLOW -// #pragma HLS INLINE - - const unsigned kNumNonZeroTiles = NumTiles - NumZeroTiles; - const unsigned kNumPEs = kNumNonZeroTiles; +#pragma HLS INLINE + const int kNumNonZeroTiles = NumTiles - NumZeroTiles; + const int kNumInputs = 2; // NOTE: both PE and adder-tree have II=1, but the adder-tree reads in round // robin fashion from the PE queues. Hence, before the adder-tree reads again // from the same PE queue, kNumPEs cycles pass. This contrains the depth of // the queues to kNumPEs. (THIS WON'T WORK, TOO LOW CONSUMER RATE) // FIXED: Using an adder tree allows to use a stream of depth 1. - const unsigned kStreamDepth = 1; // VectLength / NumTiles; + const int kStreamDepth = 2; // VectLength / NumTiles; + hls::stream acc_streams[kNumInputs][NumGates][kNumNonZeroTiles]; +#pragma HLS ARRAY_PARTITION variable=acc_streams complete dim=0 +#pragma HLS STREAM variable=acc_streams depth=kStreamDepth + svd::UDotUnit2LstmPe(VectLength, + NumTiles, NumIter, x1_streams, x2_streams, gate_u_streams, + acc_streams[0], acc_streams[1]); + UAccumUnit: + for (int i = 0; i < NumIter; ++i) { +#pragma HLS PIPELINE II=1 style=frp + for (int j = 0; j < NumGates; ++j) { + auto y1_val = svd::ActivationD(hlsutils::adder_tree(acc_streams[0][j])); + auto y2_val = svd::ActivationD(hlsutils::adder_tree(acc_streams[1][j])); + y1[j].write(y1_val); + y2[j].write(y2_val); + } + } +#endif // end REDUCE_PROD_2LSTM_DATAFLOW_DESIGN +} - hls::stream acc1_streams[kNumNonZeroTiles]; - hls::stream acc2_streams[kNumNonZeroTiles]; -#pragma HLS ARRAY_PARTITION variable=acc1_streams complete dim=1 -#pragma HLS ARRAY_PARTITION variable=acc1_streams complete dim=1 -#pragma HLS STREAM variable=acc1_streams depth=kStreamDepth -#pragma HLS STREAM variable=acc2_streams depth=kStreamDepth +#ifdef __VITIS_HLS__ +/** + * @brief Flexible Kernel-U. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] num_refinements The number of refinements steps (R) per input: + * the Rs must be positive, greater than zero and + * in ASCENDING ORDER. Their amount must be less + * or equal to num_active_inputs. There should be + * #num_active_inputs defined Rs (with no gaps), + * as only the first #num_active_inputs Rs will + * be considered. + * @param[in] pad_output Wether to pad output with zeroes + * @param x_port The input x port + * @param u_port The input u port + * @param xu_port The output xu port + * + * @tparam params The collection of fixed parameters and + * configurations. + */ +template < + typename params, + typename WrapperAxisG = svd::AxiStreamPort +> +void KernelU(const int num_active_inputs, + const int input_size, + const int num_refinements[params::N], + const bool pad_output, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port) { +#pragma HLS TOP name=KernelU +#pragma HLS DATAFLOW +#pragma HLS INLINE +#ifndef __VITIS_HLS__ +#pragma HLS STABLE variable=x_port +#pragma HLS STABLE variable=u_port +#pragma HLS STABLE variable=xu_port +#endif + assert(num_active_inputs <= params::N); + assert(num_active_inputs > 0); + assert(params::I % params::Tu == 0); + assert(input_size % params::Tu == 0); + assert(input_size <= params::I); + const int kNumTilesU = input_size / params::Tu; + const int kMaxNumTilesU = params::I / params::Tu; + const int kStreamDepth_X = 2 + kMaxNumTilesU * params::N; + const int kStreamDepth_U = 8 + kMaxNumTilesU * params::N; + const int kStreamDepth_XU = 2 + params::G; + assert(kNumTilesU <= kMaxNumTilesU); + typedef typename params::ActivationD ActivationType; + auto x_axis = svd::AxiStreamPort(x_port); + auto u_axis = svd::AxiStreamPort(u_port); + auto xu_axis = svd::AxiStreamInterface(xu_port); + hls::stream x_stream("x_stream"); + hls::stream u_streams[params::G]; + hls::stream xu_streams[params::G]; + ActivationType x_buffer[params::N][params::Tu][kMaxNumTilesU]; +#pragma HLS STREAM variable=x_stream depth=kStreamDepth_X +#pragma HLS STREAM variable=u_streams depth=kStreamDepth_U +#pragma HLS STREAM variable=xu_streams depth=kStreamDepth_XU +#pragma HLS ARRAY_PARTITION variable=u_streams complete dim=1 +#pragma HLS ARRAY_PARTITION variable=x_buffer complete dim=1 +#pragma HLS ARRAY_PARTITION variable=x_buffer complete dim=2 +#pragma HLS BIND_STORAGE variable=x_buffer type=ram_t2p impl=bram latency=1 + /* + * Ideally, if the Rs are ordered, it would be: R0 * N + (R1-R0) * (N-1) + + * (R2-R1) * (N-2) + * + * Imagine we have: R0 = 2, R1 = 3, R2 = 6 + * + * This means: + * - till refinement 2 we have input 0 to process + * - till refinement 3 we have input 1 to process + * - till refinement 6 we have input 2 to process + * + * So it would become: + * + * R_total = 2 * 3 + (3-2) * (3-1) + (6-3) * (3-2) + */ + int R_max = num_refinements[0]; + int R_total = num_refinements[0] * num_active_inputs; // Total elements. + Get_Total_R: + for (int i = 1; i < num_active_inputs; ++i) { +#pragma HLS PIPELINE II=1 style=frp + if (num_refinements[i] > R_max) { + R_max = num_refinements[i]; + } + assert(num_refinements[i] >= num_refinements[i - 1]); + R_total += (num_refinements[i] - num_refinements[i - 1]) * (num_active_inputs - i); + } - PE_Loop: - for (int pe = 0; pe < kNumPEs; ++pe) { -#pragma HLS UNROLL - UDotUnit2LstmPe(VectLength, - NumTiles, NumIter, NumTimesteps, - x1_streams[pe], x2_streams[pe], gate_u_streams[pe], acc1_streams[pe], - acc2_streams[pe]); + int R_prev = 0; + X_DMA: + for (int ii = 0; ii < num_active_inputs; ++ii) { + Stream_X_Tiles: + for (int i = 0; i < num_refinements[ii] - R_prev; ++i) { + assert(num_refinements[ii] - R_prev >= 1); + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < num_active_inputs - ii; ++k) { +#pragma HLS PIPELINE II=1 style=frp + assert(num_active_inputs - ii >= 1); + if (ii == 0 && i == 0) { + auto x_val = x_axis.template PopVector(); + x_stream << x_val; + for (int jj = 0; jj < params::Tu; ++jj) { + x_buffer[k][jj][j] = x_val[jj]; + } + } else { + assert(k + ii < params::N); + typename params::VectTuType x_val; + for (int jj = 0; jj < params::Tu; ++jj) { + x_val[jj] = x_buffer[k + ii][jj][j]; + } + x_stream << x_val; + } + } + } + } + R_prev = num_refinements[ii]; + } + U_DMA: + for (int i = 0; i < R_max; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=params::R max=params::R + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < params::G; ++k) { + auto u_val = u_axis.template PopVector(); + for (int ii = 0; ii < num_active_inputs; ++ii) { +#pragma HLS PIPELINE II=1 style=frp + if (i < num_refinements[ii]) { + u_streams[k] << u_val; + } + } + } + } + } + U_Kernel: + for (int i = 0; i < R_total; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 style=frp + auto x_val = x_stream.read(); + for (int k = 0; k < params::G; ++k) { + xu_streams[k] << hlsutils::adder_tree(x_val * u_streams[k].read()); + // xu_streams[k] << (x_val * u_streams[k].read()).reduce_add(); + } + } + } + int iter_cnt = 0; + XU_DMA: + for (int i = 0; i < R_max; ++i) { + typename params::VectG_Type xu_out[params::N] = {typename params::VectG_Type(0)}; +#pragma HLS ARRAY_PARTITION variable=xu_out complete dim=1 + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < num_active_inputs; ++k) { +#pragma HLS PIPELINE II=1 style=frp + for (int ii = 0; ii < params::G; ++ii) { + if (i < num_refinements[k]) { + xu_out[k][ii] += xu_streams[ii].read(); +#pragma HLS BIND_OP variable=xu_out[k][ii] op=add impl=dsp + } + } + if (i < num_refinements[k] && j == kNumTilesU - 1) { + const bool kIsLast = (iter_cnt == R_total - 1 && !pad_output); + xu_axis.template PushVector(xu_out[k], kIsLast); + ++iter_cnt; + } else if (pad_output) { + const bool kIsLast = i == R_max - 1 && j == kNumTilesU - 1 && k == num_active_inputs - 1; + xu_axis.template PushVector(xu_out[k], kIsLast); + ++iter_cnt; + } + } + } } - UDotUnit2LstmAccumulator( - acc1_streams, acc2_streams, y1, y2); -#endif // end REDUCE_PROD_2LSTM_DATAFLOW_DESIGN } +#endif // end __VITIS_HLS__ } // svd +namespace testu { + +static const int kNumInputs = 4; +static const int kInputSize = 1024; +static const int Tu = 4; +// NOTE: The rest of the parameters are unused for now. +static const int kDummySize = 1; +static const int R = 8; +static const int Tv = 1; +static const int ZTu = 0; +static const int ZTv = 0; +static const int G = 4; + +typedef svd::SvdParameters params; + short, short, short> params; + +static const int VectTuAxiBitwidth = hlsutils::Bitwidth::value * params::Tu; +static const int VectN_AxiBitwidth = hlsutils::Bitwidth::value * params::N; +static const int VectGN_AxiBitwidth = hlsutils::Bitwidth::value * params::G * params::N; +#ifdef __VITIS_HLS__ +typedef hls::vector VectTuType; +typedef hls::vector VectN_Type; +typedef hls::vector VectGN_Type; +#endif +typedef svd::AxiStreamPort::PacketType VectTuAxiPacketType; +typedef svd::AxiStreamPort::PacketType VectN_AxiPacketType; +typedef svd::AxiStreamPort::PacketType VectGN_AxiPacketType; + +} // testu + +#ifndef __VITIS_HLS__ +void HlsKernelU(const int num_refinements, + const typename testu::params::ActivationD x_port[testu::params::N][testu::params::I], + const typename testu::params::UPortD u_port[testu::params::R * testu::params::PrunedSizeU], + typename testu::params::ActivationD xu_port[testu::params::N][testu::params::G * testu::params::R]); +#else +void HlsVectorKernelU(const int num_refinements, + hls::stream >& x_port, + hls::stream >& u_port, + hls::stream >& xu_port); + +void HlsAxisKernelU(const int num_refinements, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port); + +/** + * @brief Synthesizeable flexible Kernel-U. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] num_refinements The number of refinements steps (R) per input: + * the Rs must be positive, greater than zero and + * in ASCENDING ORDER. Their amount must be less + * or equal to num_active_inputs. + * @param[in] pad_output Wether to pad output with zeroes + * @param x_port The input x port + * @param u_port The input u port + * @param xu_port The output xu port + */ +void HlsKernelU(const int num_active_inputs, + const int input_size, + const int num_refinements[testu::params::N], + // const hls::vector num_refinements, + const bool pad_output, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port); + +#endif // end __VITIS_HLS__ #endif // end KERNEL_U_KERNEL_H_ \ No newline at end of file diff --git a/include/kernel/v_kernel.h b/include/kernel/v_kernel.h index d07483d..b57abb1 100644 --- a/include/kernel/v_kernel.h +++ b/include/kernel/v_kernel.h @@ -3,8 +3,10 @@ #include "svd_params.h" +namespace svd { + template -void KernelV(svd::SvdStreams &streams) { +void KernelV(const int num_refinements, svd::SvdStreams &streams) { #pragma HLS INLINE #ifndef __VITIS_HLS__ #pragma HLS DATAFLOW @@ -34,8 +36,8 @@ void KernelV(svd::SvdStreams &streams) { #endif if (params::ZTv > 0) { // constexpr V_Nz_Converter: - for (int i = 0; i < params::R; ++i) { - for (int j = 0; j < params::PrunedSizeV / params::R / params::PeV; ++j) { + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < params::PrunedSizeV / params::PeV; ++j) { #pragma HLS PIPELINE II=1 #pragma HLS LOOP_FLATTEN for (int k = 0; k < params::G; ++k) { @@ -61,8 +63,8 @@ void KernelV(svd::SvdStreams &streams) { } } V_Unit: - for (int i = 0; i < params::R; ++i) { - for (int j = 0; j < params::PrunedSizeV / params::R / params::PeV; ++j) { + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < params::PrunedSizeV / params::PeV; ++j) { #pragma HLS PIPELINE II=1 #pragma HLS LOOP_FLATTEN for (int g = 0; g < params::G; ++g) { @@ -97,433 +99,388 @@ void KernelV(svd::SvdStreams &streams) { } } - -namespace svd { - -template +template void VDotUnit2LstmV2(const bool has_bias, svd::WeightStream *bias1, svd::WeightStream *bias2, - svd::ActivationStream &gate_dot1_streams, - svd::ActivationStream &gate_dot2_streams, - svd::WeightStream &gate_s1_streams, - svd::WeightStream &gate_s2_streams, - svd::WeightStream (&gate_v_streams)[VectLength / NumTiles], - hls::stream > &comb_stream_port, - svd::ActivationStream (&gate_out1_streams)[VectLength / NumTiles], - svd::ActivationStream (&gate_out2_streams)[VectLength / NumTiles], - bool debug_module = false, - ProbeStream *probe_ctrl = nullptr) { + svd::ActivationStream gate_dot1_streams[NumGates], + svd::ActivationStream gate_dot2_streams[NumGates], + svd::WeightStream gate_s1_streams[NumGates], + svd::WeightStream gate_s2_streams[NumGates], + svd::WeightStream gate_v_streams[NumGates][VectLength / NumTiles], + hls::stream > nz_port[NumGates], + svd::ActivationStream gate_out1_streams[NumGates][VectLength / NumTiles], + svd::ActivationStream gate_out2_streams[NumGates][VectLength / NumTiles]) { #pragma HLS INLINE #pragma HLS DATAFLOW - assert(VectLength % NumTiles == 0); assert(NumTiles > NumZeroTiles); assert(NumTiles % 2 == 0); assert(NumIter % 2 == 0); - + const int kNumInputs = 2; const int kFifoResizeFactor = 4; const int kNonZeroTiles = NumTiles - NumZeroTiles; - const int kNumTileElems = VectLength / NumTiles; + const int kTileSize = VectLength / NumTiles; // NOTE: By the time the dot products are available at the ports, the weight // values s1, s2 and v should be already at the FIFO ports. const int kStreamDepth = NumIter / kFifoResizeFactor; - hls::stream xs1_streams[kNumTileElems]; - hls::stream xs2_streams[kNumTileElems]; -#pragma HLS STREAM variable=xs1_streams depth=kStreamDepth dim=1 -#pragma HLS STREAM variable=xs2_streams depth=kStreamDepth dim=1 - - // svd::MultD xs1_val = 0; - // svd::MultD xs2_val = 0; - - ScalarMul: + hls::stream xus_streams[NumGates][kNumInputs][kTileSize]; +#pragma HLS STREAM variable=xus_streams depth=kStreamDepth dim=0 + S_Kernel: for (int i = 0; i < NumIter; ++i) { #pragma HLS INLINE off #pragma HLS PIPELINE II=1 - auto xs1_val = gate_s1_streams.read() * gate_dot1_streams.read(); - auto xs2_val = gate_s2_streams.read() * gate_dot2_streams.read(); -#pragma HLS RESOURCE variable=xs1_val core=DSP48 latency=3 -#pragma HLS RESOURCE variable=xs2_val core=DSP48 latency=3 - ScalarMulDispatcher: - for (int j = 0; j < kNumTileElems; ++j) { - xs1_streams[j].write(xs1_val); - xs2_streams[j].write(xs2_val); + for (int g = 0; g < NumGates; ++g) { + svd::MultD xus_val[kNumInputs]; + #pragma HLS ARRAY_PARTITION variable=xus_val complete dim=0 + xus_val[0] = gate_s1_streams[g].read() * gate_dot1_streams[g].read(); + xus_val[1] = gate_s2_streams[g].read() * gate_dot2_streams[g].read(); + #pragma HLS RESOURCE variable=xus_val[0] core=DSP48 latency=3 + #pragma HLS RESOURCE variable=xus_val[1] core=DSP48 latency=3 + for (int j = 0; j < kTileSize; ++j) { + for (int k = 0; k < kNumInputs; ++k) { + xus_streams[g][k][j].write(xus_val[k]); + } + } } } - - svd::WeightStream bias1_streams[kNumTileElems]; - svd::WeightStream bias2_streams[kNumTileElems]; - if (has_bias) { -#pragma HLS STREAM variable=bias1_streams depth=NumTiles dim=1 -#pragma HLS STREAM variable=bias2_streams depth=NumTiles dim=1 -#pragma HLS ARRAY_PARTITION variable=bias1_streams complete dim=1 -#pragma HLS ARRAY_PARTITION variable=bias2_streams complete dim=1 - } - + svd::WeightStream bias_streams[NumGates][kNumInputs][kTileSize]; if (has_bias) { - BiasDispatcher_tiles: +#pragma HLS ARRAY_PARTITION variable=bias_streams complete dim=0 +#pragma HLS STREAM variable=bias_streams depth=NumTiles + Bias_DMA: for (int i = 0; i < NumTiles; ++i) { -#pragma HLS INLINE off - BiasDispatcher_elems: - for (int j = 0; j < kNumTileElems; ++j) { + for (int j = 0; j < kTileSize; ++j) { #pragma HLS PIPELINE II=1 - bias1_streams[j].write(bias1->read()); - bias2_streams[j].write(bias2->read()); + for (int k = 0; k < NumGates; ++k) { + bias_streams[k][0][j].write(bias1[k].read()); + bias_streams[k][1][j].write(bias2[k].read()); + } } } } - const int kCombStreamDepth = NumIter * NumZeroTiles / (kFifoResizeFactor * 2); - const int kNzBitLength = hls_utils::log2::value; - hls::stream > nz_idx_streams[kNumTileElems]; -#pragma HLS STREAM variable=nz_idx_streams depth=kCombStreamDepth dim=1 + const int kNzBitLength = hlsutils::log2::value; + hls::stream > nz_idx_streams[NumGates][kTileSize]; +#pragma HLS STREAM variable=nz_idx_streams depth=kCombStreamDepth dim=0 #pragma HLS RESOURCE variable=nz_idx_streams core=FIFO_SRL - -#if 0 // USE_PRIORITY_ENCODER +// #define V_UNIT_USE_PRIORITY_ENCODER +#ifdef V_UNIT_USE_PRIORITY_ENCODER // =========================================================================== // NOTE: The critical path is HUGE here. So we go for the other solution. // =========================================================================== - ap_uint zero_comb = 0; - - Convert_Iter: + ap_uint z_idx = 0; + ZIndex_Converter: for (int i = 0; i < NumIter; ++i) { for (int j = 0; j < NumTiles - NumZeroTiles; ++j) { #pragma HLS PIPELINE II=1 if (j == 0) { - zero_comb = comb_stream_port.read(); - int set_idx = PriorityEncoderLSB(zero_comb); + z_idx = nz_port.read(); + int set_idx = PriorityEncoderLSB(z_idx); assert(set_idx < NumTiles); - for (int k = 0; k < kNumTileElems; ++k) { + for (int k = 0; k < kTileSize; ++k) { nz_idx_streams[k].write(set_idx); } - zero_comb[set_idx] = 0; + z_idx[set_idx] = 0; } else { - int set_idx = PriorityEncoderLSB(zero_comb); + int set_idx = PriorityEncoderLSB(z_idx); assert(set_idx < NumTiles); - for (int k = 0; k < kNumTileElems; ++k) { + for (int k = 0; k < kTileSize; ++k) { nz_idx_streams[k].write(set_idx); } - zero_comb[set_idx] = 0; + z_idx[set_idx] = 0; } } } #else - ap_uint c; - int nz_cnt = 0; - assert(nz_cnt < kNonZeroTiles); - - CombConverter_iter: + ap_uint z_idx[NumGates]; + int nz_cnt[NumGates] = {0}; +#pragma HLS ARRAY_PARTITION variable=z_idx complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_cnt complete dim=0 + ZIndex_Converter: for (int i = 0; i < NumIter; ++i) { #pragma HLS INLINE off - CombConverter_tiles: for (int j = 0; j < NumTiles; ++j) { #pragma HLS LOOP_FLATTEN #pragma HLS PIPELINE II=1 #pragma HLS LOOP_TRIPCOUNT min=kNonZeroTiles max=kNonZeroTiles - if (j == 0) { - c = comb_stream_port.read(); - // std::cout << "nz[" << i << "] = " << c.to_string(2, false) << "\n"; - if (c[0] == 1) { - for (int k = 0; k < kNumTileElems; ++k) { - nz_idx_streams[k].write(0); - } - nz_cnt++; - } - } else { - if (c[j] == 1) { - CombConverter_elem: - for (int k = 0; k < kNumTileElems; ++k) { - nz_idx_streams[k].write(j); + for (int g = 0; g < NumGates; ++g) { + assert(nz_cnt[g] < kNonZeroTiles); + if (j == 0) { + z_idx[g] = nz_port[g].read(); + if (z_idx[g][0] == 1) { + for (int k = 0; k < kTileSize; ++k) { + nz_idx_streams[g][k].write(0); + } + ++nz_cnt[g]; } - if (nz_cnt == kNonZeroTiles - 1) { - nz_cnt = 0; - break; - } else { - nz_cnt++; + } else { + if (z_idx[g][j] == 1) { + for (int k = 0; k < kTileSize; ++k) { + nz_idx_streams[g][k].write(j); + } + if (nz_cnt[g] == kNonZeroTiles - 1) { + nz_cnt[g] = 0; + break; + } else { + ++nz_cnt[g]; + } } } } } } #endif - -#if 1 V_Kernel: { #pragma HLS INLINE off - if (debug_module) { - probe_ctrl->write(1); - } - - svd::AccumD acc_buffer1[kNumTileElems][NumTiles]; - svd::AccumD acc_buffer2[kNumTileElems][NumTiles]; -#pragma HLS ARRAY_PARTITION variable=acc_buffer1 complete dim=1 -#pragma HLS ARRAY_PARTITION variable=acc_buffer2 complete dim=1 -// #pragma HLS RESOURCE variable=acc_buffer1 core=RAM_T2P_BRAM latency=1 -// #pragma HLS RESOURCE variable=acc_buffer2 core=RAM_T2P_BRAM latency=1 -// #pragma HLS RESOURCE variable=acc_buffer1 core=RAM_T2P_URAM -// #pragma HLS RESOURCE variable=acc_buffer2 core=RAM_T2P_URAM - -// #pragma HLS RESOURCE variable=acc_buffer1 core=XPM_MEMORY uram -// #pragma HLS RESOURCE variable=acc_buffer2 core=XPM_MEMORY uram - - + svd::AccumD acc_buffer[NumGates][kNumInputs][kTileSize][NumTiles]; +#pragma HLS ARRAY_PARTITION variable=acc_buffer complete dim=1 +#pragma HLS ARRAY_PARTITION variable=acc_buffer complete dim=2 +#pragma HLS ARRAY_PARTITION variable=acc_buffer complete dim=3 Init_buffer: for (int i = 0; i < NumTiles; ++i) { #pragma HLS PIPELINE II=1 - for (int j = 0; j < kNumTileElems; ++j) { - acc_buffer1[j][i] = 0; - acc_buffer2[j][i] = 0; + for (int j = 0; j < kTileSize; ++j) { + for (int k = 0; k < kNumInputs; ++k) { + for (int g = 0; g < NumGates; ++g) { + acc_buffer[g][k][j][i] = 0; + } + } } } - - ap_uint nz_idx[kNumTileElems]; - svd::AccumD xs1[kNumTileElems]; - svd::AccumD xs2[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=nz_idx complete -#pragma HLS ARRAY_PARTITION variable=xs1 complete -#pragma HLS ARRAY_PARTITION variable=xs2 complete - svd::AccumD mac_1[kNumTileElems]; - svd::AccumD mac_2[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=mac_1 complete -#pragma HLS ARRAY_PARTITION variable=mac_2 complete - svd::AccumD acc_1[kNumTileElems]; - svd::AccumD acc_2[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=acc_1 complete -#pragma HLS ARRAY_PARTITION variable=acc_2 complete - - svd::WeightD v[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=v complete - -#ifndef __SYNTHESIS__ - int nz_idx_buf[kNumTileElems][NumIter * kNonZeroTiles] = {-1}; - const int kPipelineDepth = 8; - const bool printout = false; -#endif - + ap_uint nz_idx[NumGates][kTileSize]; + svd::AccumD xus[NumGates][kNumInputs][kTileSize]; + svd::AccumD mac[NumGates][kNumInputs][kTileSize]; + svd::AccumD acc[NumGates][kNumInputs][kTileSize]; + svd::WeightD v[NumGates][kTileSize]; +#pragma HLS ARRAY_PARTITION variable=nz_idx complete dim=0 +#pragma HLS ARRAY_PARTITION variable=xus complete dim=0 +#pragma HLS ARRAY_PARTITION variable=mac complete dim=0 +#pragma HLS ARRAY_PARTITION variable=acc complete dim=0 +#pragma HLS ARRAY_PARTITION variable=v complete dim=0 for (int i = 0; i < NumIter; ++i) { for (int k = 0; k < kNonZeroTiles; ++k) { #pragma HLS LOOP_FLATTEN #pragma HLS PIPELINE II=1 - for (int j = 0; j < kNumTileElems; ++j) { - if (k == 0) { - xs1[j] = xs1_streams[j].read(); - xs2[j] = xs2_streams[j].read(); - } - nz_idx[j] = nz_idx_streams[j].read(); - -#ifndef __SYNTHESIS__ - // =================================================================== - // Naive dependency detection mechanism: - // =================================================================== - // std::cout << "\tPE n." << j << " accessing acc_buffer[" << j << "][" << nz_idx[j] << "]\n"; - const int idx = i * kNonZeroTiles + k; - nz_idx_buf[j][idx] = nz_idx[j]; - - if (idx > 0) { - int dependency_idx = -1; - if (j == 0 && printout) { - std::cout << "curr_idx: " << nz_idx[j] << "\nprev_idx: "; - } - for (int q = idx - 1; q > 0; --q) { - if (nz_idx_buf[j][idx] == nz_idx_buf[j][q]) { - dependency_idx = q; - break; - } - } - if (j == 0 && printout) { - for (int q = idx; q > 0; --q) { - if (q - 1 == dependency_idx && idx - dependency_idx < kPipelineDepth) { - std::cout << "<(("; - } - std::cout << nz_idx_buf[j][q - 1]; - if (q - 1 == dependency_idx && idx - dependency_idx < kPipelineDepth) { - std::cout << "))>"; - } - std::cout << " "; - if ((q - 1) % kNonZeroTiles == 0) { - std::cout << "| "; - } - } - std::cout << "\ndistance: " << idx - dependency_idx << "\n"; - } - // NOTE: The dependency will be the same for all PEs, i.e. j indexes. - if (j == 0 && dependency_idx != -1 && idx - dependency_idx < kPipelineDepth) { - //num_raw_hazards++; - if (printout) { - std::cout << "[WARNING] Possible dependecy detected: nz[" << idx << "] = " << nz_idx_buf[j][idx] - << " -> nz[" << dependency_idx << "] = " << nz_idx_buf[j][dependency_idx] - << ", distance: " << idx - dependency_idx << "\n"; + for (int g = 0; g < NumGates; ++g) { + for (int j = 0; j < kTileSize; ++j) { + if (k == 0) { + for (int ii = 0; ii < kNumInputs; ++ii) { + xus[g][ii][j] = xus_streams[g][ii][j].read(); } } - if (j == 0 && printout) { - std::cout << "\n"; + nz_idx[g][j] = nz_idx_streams[g][j].read(); + v[g][j] = gate_v_streams[g][j].read(); + for (int ii = 0; ii < kNumInputs; ++ii) { + mac[g][ii][j] = (xus[g][ii][j] * v[g][j]) + acc_buffer[g][ii][j][nz_idx[g][j]]; +#pragma HLS RESOURCE variable=mac[g][ii][j] core=DSP48 latency=3 +#pragma HLS DEPENDENCE variable=acc_buffer RAW false inter distance=kNonZeroTiles + acc_buffer[g][ii][j][nz_idx[g][j]] = mac[g][ii][j]; } } -#endif - v[j] = gate_v_streams[j].read(); - - mac_1[j] = (xs1[j] * v[j]) + acc_buffer1[j][nz_idx[j]]; - mac_2[j] = (xs2[j] * v[j]) + acc_buffer2[j][nz_idx[j]]; -#pragma HLS RESOURCE variable=mac_1[j] core=DSP48 latency=3 -#pragma HLS RESOURCE variable=mac_2[j] core=DSP48 latency=3 -#pragma HLS DEPENDENCE variable=acc_buffer1 RAW false inter -#pragma HLS DEPENDENCE variable=acc_buffer2 RAW false inter - - acc_buffer1[j][nz_idx[j]] = mac_1[j]; - acc_buffer2[j][nz_idx[j]] = mac_2[j]; - } // end kNumTileElems - } // end kNonZeroTiles - } // end NumIter - - WriteBack_tiles: + } + } + } + V_DMA: for (int i = 0; i < NumTiles; ++i) { #pragma HLS PIPELINE II=1 - WriteBack_elems: - for (int j = 0; j < kNumTileElems; ++j) { - if (has_bias) { - auto acc_1 = acc_buffer1[j][i] + bias1_streams[j].read(); - auto acc_2 = acc_buffer2[j][i] + bias2_streams[j].read(); + for (int j = 0; j < kTileSize; ++j) { + for (int k = 0; k < NumGates; ++k) { + if (has_bias) { + auto acc_1 = acc_buffer[k][0][j][i] + bias_streams[k][0][j].read(); + auto acc_2 = acc_buffer[k][1][j][i] + bias_streams[k][1][j].read(); #pragma HLS RESOURCE variable=acc_1 core=AddSub_DSP #pragma HLS RESOURCE variable=acc_2 core=AddSub_DSP - gate_out1_streams[j].write(acc_1); - gate_out2_streams[j].write(acc_2); - } else { - gate_out1_streams[j].write(acc_buffer1[j][i]); - gate_out2_streams[j].write(acc_buffer2[j][i]); + gate_out1_streams[k][j].write(acc_1); + gate_out2_streams[k][j].write(acc_2); + } else { + gate_out1_streams[k][j].write(acc_buffer[k][0][j][i]); + gate_out2_streams[k][j].write(acc_buffer[k][1][j][i]); + } } } } + } // end V_Function +} - if (debug_module) { - probe_ctrl->write(0); - } -#else - - svd::AccumD acc_buffer1[kNumTileElems][NumTiles]; - svd::AccumD acc_buffer2[kNumTileElems][NumTiles]; -#pragma HLS ARRAY_PARTITION variable=acc_buffer1 complete dim=1 -#pragma HLS ARRAY_PARTITION variable=acc_buffer2 complete dim=1 -#pragma HLS RESOURCE variable=acc_buffer1 core=RAM_T2P_BRAM latency=1 -#pragma HLS RESOURCE variable=acc_buffer2 core=RAM_T2P_BRAM latency=1 -// #pragma HLS STREAM variable=acc_buffer1 depth=1 -// #pragma HLS STREAM variable=acc_buffer2 depth=1 - - ap_uint nz_idx[kNumTileElems]; - svd::AccumD xs1[kNumTileElems]; - svd::AccumD xs2[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=nz_idx complete -#pragma HLS ARRAY_PARTITION variable=xs1 complete -#pragma HLS ARRAY_PARTITION variable=xs2 complete - svd::AccumD mac_1[kNumTileElems]; - svd::AccumD mac_2[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=mac_1 complete -#pragma HLS ARRAY_PARTITION variable=mac_2 complete - svd::AccumD acc_1[kNumTileElems]; - svd::AccumD acc_2[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=acc_1 complete -#pragma HLS ARRAY_PARTITION variable=acc_2 complete - - svd::WeightD v[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=v complete - - hls::stream xsv1_streams[kNumTileElems]; - hls::stream xsv2_streams[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=xsv1_streams complete -#pragma HLS ARRAY_PARTITION variable=xsv2_streams complete -#pragma HLS STREAM variable=xsv1_streams depth=kNonZeroTiles -#pragma HLS STREAM variable=xsv2_streams depth=kNonZeroTiles - hls::stream acc1_streams[kNumTileElems]; - hls::stream acc2_streams[kNumTileElems]; -#pragma HLS ARRAY_PARTITION variable=acc1_streams complete -#pragma HLS ARRAY_PARTITION variable=acc2_streams complete - const int kStreamDepthAcc = NumTiles; -#pragma HLS STREAM variable=acc1_streams depth=kStreamDepthAcc -#pragma HLS STREAM variable=acc2_streams depth=kStreamDepthAcc - for (int n = 0; n < NumIter; ++n) { - for (int nz = 0; nz < kNonZeroTiles; ++nz) { -#pragma HLS LOOP_FLATTEN +#ifndef __VITIS_HLS__ +#else +template < + typename params, + typename WrapperAxisG = svd::AxiStreamPort, + typename WrapperAxisGTv = svd::AxiStreamPort +> +void KernelV(const int num_active_inputs, + const int output_size, + const int num_refinements[params::N], + hls::stream& xus_port, + hls::stream& v_port, + hls::stream& y_port) { +#pragma HLS TOP name=KernelV +#pragma HLS DATAFLOW +#pragma HLS INLINE +#ifndef __VITIS_HLS__ +#pragma HLS STABLE variable=xus_port +#pragma HLS STABLE variable=v_port +#pragma HLS STABLE variable=y_port +#endif + assert(num_active_inputs <= params::N); + assert(num_active_inputs > 0); + assert(params::H % params::Tv == 0); + assert(output_size % params::Tv == 0); + assert(output_size <= params::H); + typedef typename params::ActivationD ActivationType; + const int kMaxNumTilesV = params::H / params::Tv; + const int kNumTilesV = output_size / params::Tv; + const int kStreamDepth_V = 32 + kMaxNumTilesV * params::G; + assert(kNumTilesV <= kMaxNumTilesV); + auto xus_axis = svd::AxiStreamInterface(xus_port); + auto v_axis = svd::AxiStreamPort(v_port); + auto y_axis = svd::AxiStreamInterface(y_port); + hls::stream v_streams[params::G]; + // NOTE: Having y_buffer as static made cosim work in one-process configuration. + static ActivationType y_buffer[params::G][params::N][params::Tv][kMaxNumTilesV] = {0}; + typename params::VectTvType v_val; + typename params::VectG_Type xus_val[params::N]; + typename params::VectGTvType y_out; +// NOTE: I'm not accessing dimension N of y_buffer in parallel. +#pragma HLS ARRAY_PARTITION variable=v_streams complete +#pragma HLS ARRAY_PARTITION variable=y_buffer complete dim=1 +#pragma HLS ARRAY_PARTITION variable=y_buffer complete dim=3 +#pragma HLS BIND_STORAGE variable=y_buffer type=ram_t2p impl=bram latency=1 + int R_max = num_refinements[0]; + Get_Max_R: + for (int i = 1; i < num_active_inputs; ++i) { #pragma HLS PIPELINE II=1 - for (int i = 0; i < kNumTileElems; ++i) { - if (nz == 0) { - xs1[i] = xs1_streams[i].read(); - xs2[i] = xs2_streams[i].read(); - } - svd::WeightD v_val = gate_v_streams[i].read(); - svd::AccumD xvs1_val = xs1[i] * v_val; - svd::AccumD xvs2_val = xs2[i] * v_val; -#pragma HLS RESOURCE variable=xvs1_val core=DSP48 latency=3 -#pragma HLS RESOURCE variable=xvs2_val core=DSP48 latency=3 - xsv1_streams[i].write(xvs1_val); - xsv2_streams[i].write(xvs2_val); - } + if (num_refinements[i] > R_max) { + R_max = num_refinements[i]; } } - V_Kernel: { -#pragma HLS INLINE off - - Init_buffer: - for (int i = 0; i < NumTiles; ++i) { - #pragma HLS PIPELINE II=1 - for (int j = 0; j < kNumTileElems; ++j) { - acc_buffer1[j][i] = 0; - acc_buffer2[j][i] = 0; - } - } - for (int n = 0; n < NumIter; ++n) { - for (int t = 0; t < kNonZeroTiles; ++t) { -#pragma HLS LOOP_FLATTEN + V_DMA: + for (int i = 0; i < R_max; ++i) { + for (int j = 0; j < kNumTilesV; ++j) { + for (int k = 0; k < params::G; ++k) { + for (int ii = 0; ii < num_active_inputs; ++ii) { #pragma HLS PIPELINE II=1 - for (int i = 0; i < kNumTileElems; ++i) { - ActivationD operand_prev1 = 0; - ActivationD operand_curr1 = 0; - ActivationD operand_prev2 = 0; - ActivationD operand_curr2 = 0; - // =================================================================== - // Setup the internal stream - // =================================================================== - nz_idx[i] = nz_idx_streams[i].read(); - operand_prev1 = acc_buffer1[i][nz_idx[i]]; - operand_prev2 = acc_buffer2[i][nz_idx[i]]; - operand_curr1 = xsv1_streams[i].read(); - operand_curr2 = xsv2_streams[i].read(); - // =================================================================== - // Accumulate the incoming streams - // =================================================================== - svd::AccumD sum1 = operand_prev1 + operand_curr1; - svd::AccumD sum2 = operand_prev2 + operand_curr2; -#pragma HLS RESOURCE variable=sum1 core=AddSub_DSP -#pragma HLS RESOURCE variable=sum2 core=AddSub_DSP - acc_buffer1[i][nz_idx[i]] = sum1; - acc_buffer2[i][nz_idx[i]] = sum2; + if (ii == 0) { + v_val = v_axis.template PopVector(); + } + if (i < num_refinements[ii]) { + v_streams[k] << v_val; + } } } } - // =================================================================== - // Write the results to the output streams - // =================================================================== - WriteBack_tiles: - for (int i = 0; i < NumTiles; ++i) { + } + V_Kernel: + for (int i = 0; i < R_max; ++i) { + for (int j = 0; j < kNumTilesV; ++j) { + for (int k = 0; k < num_active_inputs; ++k) { #pragma HLS PIPELINE II=1 - WriteBack_elems: - for (int j = 0; j < kNumTileElems; ++j) { - if (has_bias) { - auto acc_1 = acc_buffer1[j][i] + bias1_streams[j].read(); - auto acc_2 = acc_buffer2[j][i] + bias2_streams[j].read(); -#pragma HLS RESOURCE variable=acc_1 core=AddSub_DSP -#pragma HLS RESOURCE variable=acc_2 core=AddSub_DSP - gate_out1_streams[j].write(acc_1); - gate_out2_streams[j].write(acc_2); - } else { - gate_out1_streams[j].write(acc_buffer1[j][i]); - gate_out2_streams[j].write(acc_buffer2[j][i]); + for (int ii = 0; ii < params::G; ++ii) { + assert(j < kMaxNumTilesV); + assert(k < params::N); + if (i < num_refinements[k]) { + assert(i < 512); + if (j == 0 && ii == 0) { + xus_val[k] = xus_axis.template PopVector(); + } + auto v_val = v_streams[ii].read(); + for (int jj = 0; jj < params::Tv; ++jj) { + ActivationType y_val; + if (i == 0) { + y_val = v_val[jj] * xus_val[k][ii]; + } else { + y_val = y_buffer[ii][k][jj][j] + v_val[jj] * xus_val[k][ii]; + } + y_buffer[ii][k][jj][j] = y_val; +// #pragma HLS DEPENDENCE inter variable=y_buffer false + } + } + } + if (i == R_max - 1) { + for (int jj = 0; jj < params::G; ++jj) { + for (int ii = 0; ii < params::Tv; ++ii) { + y_out[ii * params::G + jj] = y_buffer[jj][k][ii][j]; + } + } + const bool kIsLast = j == kNumTilesV-1 && k == num_active_inputs-1; + const int kGTv = params::G * params::Tv; + y_axis.template PushVector(y_out, kIsLast); } } } -#endif - } // end V_Function +// if (i == R_max - 1) { +// for (int j = 0; j < kNumTilesV; ++j) { +// for (int k = 0; k < num_active_inputs; ++k) { +// for (int jj = 0; jj < params::G; ++jj) { +// for (int ii = 0; ii < params::Tv; ++ii) { +// #pragma HLS PIPELINE II=1 +// y_out[ii * params::G + jj] = y_buffer[jj][k][ii][j]; +// } +// } +// const bool kIsLast = j == kNumTilesV-1 && k == num_active_inputs-1; +// const int kGTv = params::G * params::Tv; +// y_axis.template PushVector(y_out, kIsLast); +// } +// } +// } + } +// DMA_Out: +// for (int j = 0; j < kNumTilesV; ++j) { +// for (int k = 0; k < num_active_inputs; ++k) { +// #pragma HLS PIPELINE II=1 +// for (int jj = 0; jj < params::G; ++jj) { +// for (int ii = 0; ii < params::Tv; ++ii) { +// assert(ii * params::G + jj < params::G * params::Tv); +// y_out[ii * params::G + jj] = y_buffer[jj][k][ii][j]; +// // y_out[ii * params::G + jj] = y_val[jj][ii]; +// } +// } +// const bool kIsLast = j == kNumTilesV-1 && k == num_active_inputs-1; +// const int kGTv = params::G * params::Tv; +// y_axis.template PushVector(y_out, kIsLast); +// } +// } + } +#endif // end __VITIS_HLS__ } // svd -#endif // end KERNEL_V_KERNEL_H_ \ No newline at end of file +namespace testv { + +static const int kNumInputs = 2; +static const int kInputSize = 512; +static const int Tu = 4; +// NOTE: The rest of the parameters are unused for now. +static const int kOutputSize = 512; +static const int R = 64; +static const int Tv = 4; +static const int ZTu = 0; +static const int ZTv = 0; +static const int G = 4; + +typedef svd::SvdParameters params; + short, short, short> params; + // ap_fixed, ap_fixed, ap_fixed > params; + // float, float, float > params; + +} // testv + +#ifndef __VITIS_HLS__ +#else +void HlsKernelV(const int num_active_inputs, + const int output_size, + const int num_refinements[testv::params::N], + // const hls::vector num_refinements, + hls::stream& xus_port, + hls::stream& v_port, + hls::stream& y_port); +#endif // end __VITIS_HLS__ + +#endif // end KERNEL_V_KERNEL_H_ diff --git a/include/layers/dense/hls/dense_svd.h b/include/layers/dense/hls/dense_svd.h new file mode 100644 index 0000000..14d7183 --- /dev/null +++ b/include/layers/dense/hls/dense_svd.h @@ -0,0 +1,172 @@ +#ifndef LAYERS_DENSE_HLS_DENSE_SVD_H_ +#define LAYERS_DENSE_HLS_DENSE_SVD_H_ + +#include "svd_params.h" +#include "kernel/svd_kernel.h" + +#include "ap_int.h" + + +namespace svd { + +static const int kDenseNumGates = 1; + +typedef svd::SvdParameters, + ap_fixed, + ap_fixed > dense_params; + +#ifndef __VITIS_HLS__ +#else +template +void DenseSvdKernel(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[params::N], + // const hls::vector num_refinements, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& bias_port, + hls::stream& y_port) { +#pragma HLS TOP name=DenseSvdKernel +// #pragma HLS INLINE +#pragma HLS DATAFLOW +#ifndef __VITIS_HLS__ +#pragma HLS STABLE variable=x_port +#pragma HLS STABLE variable=u_port +#pragma HLS STABLE variable=s_port +#pragma HLS STABLE variable=v_port +#pragma HLS STABLE variable=bias_port +#pragma HLS STABLE variable=y_port +#endif + static_assert(params::G == 1, "DenseSvdKernel must have params::G equal to one."); + assert(params::G == 1); + typedef typename params::ActivationD ActivationType; + typedef svd::AxiStreamFifo WrapperFifoGTv; + hls::stream y_fifo; +#pragma HLS STREAM variable=y_fifo depth=2 + auto y_axis = svd::AxiStreamFifo(y_fifo); + auto y_out_axis = svd::AxiStreamPort(y_port); + auto bias_axis = svd::AxiStreamPort(bias_port); + svd::SvdKernel(num_active_inputs, input_size, + output_size, num_refinements, x_port, u_port, s_port, v_port, y_fifo); + Apply_Bias: + for (int i = 0; i < output_size / params::Tv * num_active_inputs; ++i) { +#pragma HLS PIPELINE II=1 + const int kGTv = params::G * params::Tv; // NOTE: G is actually equal to 1. + const auto y_val = y_axis.template PopVector(); + const auto bias_val = bias_axis.template PopVector(); + const auto y_out = y_val + bias_val; +// #pragma HLS BIND_OP variable=y_out op=add impl=dsp latency=3 + const bool kIsLast = i == output_size / params::Tv * num_active_inputs - 1; + y_out_axis.template PushVector(y_out, kIsLast); + } +} +#endif // end __VITIS_HLS__ + +/** + * @brief Sets the DenseSvd kernel inputs, i.e. streams from arrays into + * hls::streams. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] output_size The output size + * @param[in] num_refinements The number of refinements + * @param[in] x The input array. Shape: (N, I) + * @param[in] u The u array. Shape: (R, I, G) + * @param[in] s The s array. Shape: (R, N, G) + * @param[in] v The v array. Shape: (R, H, G) + * @param[in] bias The bias array. Shape: (N, G, H) + * @param x_port The x port to be used as argument to SvdKernel + * @param u_port The u port to be used as argument to SvdKernel + * @param s_port The s port to be used as argument to SvdKernel + * @param v_port The v port to be used as argument to SvdKernel + * @param bias_port The bias port to be used as argument to + * SvdKernel + * + * @tparam params Collection of SVD configuration params. + */ +#ifdef __VITIS_HLS__ +template +void SetDenseSvdInputs(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[params::N], + const typename params::ActivationD* x, + const typename params::ActivationD* u, + const typename params::ActivationD* s, + const typename params::ActivationD* v, + const typename params::ActivationD* bias, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& bias_port) { + typedef typename params::ActivationD ActivationType; + const int kG = params::G; // NOTE: G is actually equal to 1. + const int kTv = params::Tv; + const int kGTv = kG * kTv; + const int kNumTilesV = output_size / kTv; + auto bias_axis = svd::AxiStreamPort(bias_port); + typename params::VectGTvType bias_val; + for (int i = 0; i < kNumTilesV; ++i) { + for (int j = 0; j < num_active_inputs; ++j) { + for (int k = 0; k < kTv; ++k) { + for (int ii = 0; ii < kG; ++ii) { + int bias_idx = j * output_size * kG + ii * output_size + i * kTv + k; + bias_val[k * kG + ii] = bias[bias_idx]; + } + } + bias_axis.template PushVector(bias_val); + } + } + svd::SetSvdKernelInputs(num_active_inputs, input_size, + output_size, num_refinements, x, u, s, v, x_port, u_port, s_port, v_port); +} +#endif // __VITIS_HLS__ + +} // svd + +void HlsDenseSvd(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::dense_params::N], + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& bias_port, + hls::stream& y_port); + + +/** + * @brief HLS Wrapper that calls a DenseSvd accelerator. + * + * Useful in Cosimulation. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] output_size The output size + * @param[in] num_refinements The number of refinements + * @param[in] x The input array. Shape: (N, I) + * @param[in] u The u array. Shape: (R, I, G) + * @param[in] s The s array. Shape: (R, N, G) + * @param[in] v The v array. Shape: (R, H, G) + * @param[in] bias The bias array. Shape: (N, G, H) + * @param y The y array. Shape: (N, G, H) + */ +void HlsWrapperDenseSvd(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::dense_params::N], + const typename svd::dense_params::ActivationD* x, + const typename svd::dense_params::ActivationD* u, + const typename svd::dense_params::ActivationD* s, + const typename svd::dense_params::ActivationD* v, + const typename svd::dense_params::ActivationD* bias, + typename svd::dense_params::ActivationD* y); + +#endif // end DENSE_HLS_DENSE_SVD_H_); diff --git a/include/lstm/hls/lstm_hardware.h b/include/layers/lstm/hls/lstm_hardware.h similarity index 97% rename from include/lstm/hls/lstm_hardware.h rename to include/layers/lstm/hls/lstm_hardware.h index 1206d40..6757ad6 100644 --- a/include/lstm/hls/lstm_hardware.h +++ b/include/layers/lstm/hls/lstm_hardware.h @@ -36,15 +36,21 @@ * streams * *****************************************************************************/ -#ifndef LSTM_HARDWARE_H_ -#define LSTM_HARDWARE_H_ +#ifndef LSTM_HLS_LSTM_HARDWARE_H_ +#define LSTM_HLS_LSTM_HARDWARE_H_ #include "math_utils/activation_functions.h" +#include "svd_params.h" +#ifdef AP_INT_MAX_W +#undef AP_INT_MAX_W #define AP_INT_MAX_W 4096 +#endif #include "ap_int.h" -#include "hls_linear_algebra.h" #include "ap_axi_sdata.h" +#ifndef __VITIS_HLS__ +#include "hls_linear_algebra.h" +#endif #ifndef __SYNTHESIS__ #include @@ -71,6 +77,7 @@ namespace svd { +#ifndef __VITIS_HLS__ // struct MY_CONFIG: hls::matrix_multiply_traits { // static const int ARCH = 4; @@ -137,11 +144,7 @@ struct MatrixConfigFixRecurrent: hls::matrix_multiply_traits < static const int N = 1; static const int K = HIDDEN_TILE_SIZE; }; - -void svd_fpga_cur_gemm_axi(const AxiD *a, const AxiD *b, AxiD *c); - -template -void cur_gemm(const ActivationD *a, const ActivationD *b, ActivationD *c); +#endif typedef struct { ap_uint data; @@ -149,6 +152,11 @@ typedef struct { } AxisPacketD; typedef hls::stream DmaInterfaceD; +void svd_fpga_cur_gemm_axi(const AxiD *a, const AxiD *b, AxiD *c); + +template +void cur_gemm(const ActivationD *a, const ActivationD *b, ActivationD *c); + } // end namespace svd void svd_fpga_cur_gemm_gate(const svd::ActivationD *a, const svd::ActivationD *b, svd::ActivationD *c); @@ -224,4 +232,4 @@ void dummy_gemm_v0(const svd::ActivationD a[16][16], const svd::ActivationD b[16 void test_dispatcher(); -#endif // end LSTM_HARDWARE_H_ \ No newline at end of file +#endif // end LSTM_HLS_LSTM_HARDWARE_H_ \ No newline at end of file diff --git a/include/layers/lstm/hls/lstm_svd.h b/include/layers/lstm/hls/lstm_svd.h new file mode 100644 index 0000000..308ce42 --- /dev/null +++ b/include/layers/lstm/hls/lstm_svd.h @@ -0,0 +1,410 @@ +#ifndef LSTM_HLS_LSTM_SVD_H_ +#define LSTM_HLS_LSTM_SVD_H_ + +#include "svd_params.h" +#include "kernel/svd_kernel.h" +#include "math_utils/activation_functions.h" +#include "layers/dense/hls/dense_svd.h" +#include "dma/axis_lib.h" + +#include "ap_int.h" +#include "hls_stream.h" + +namespace svd { + +#ifdef SDS_DESIGN +// ============================================================================= +// Ports using DMAs +// ============================================================================= +#pragma SDS data copy(x1_port[0:INPUT_SIZE]) +#pragma SDS data copy(x2_port[0:INPUT_SIZE]) +#pragma SDS data copy(h_t1_prev_port[0:HIDDEN_SIZE]) +#pragma SDS data copy(h_t2_prev_port[0:HIDDEN_SIZE]) +#pragma SDS data copy(c_t1_prev_port[0:HIDDEN_SIZE]) +#pragma SDS data copy(c_t2_prev_port[0:HIDDEN_SIZE]) +#pragma SDS data copy(bias1_port[0:4*HIDDEN_SIZE]) +#pragma SDS data copy(bias2_port[0:4*HIDDEN_SIZE]) +#pragma SDS data copy(nz_v_port[0:NUM_ITERATIONS * 8]) +#pragma SDS data copy(nz_u_port[0:NUM_ITERATIONS * 8]) +#pragma SDS data copy(h_t1_curr_port[0:HIDDEN_SIZE]) +#pragma SDS data copy(h_t2_curr_port[0:HIDDEN_SIZE]) +#pragma SDS data copy(c_t1_curr_port[0:HIDDEN_SIZE]) +#pragma SDS data copy(c_t2_curr_port[0:HIDDEN_SIZE]) +// Data Movers +#pragma SDS data data_mover(x1_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(x2_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(h_t1_prev_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(h_t2_prev_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(c_t1_prev_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(c_t2_prev_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(bias1_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(bias2_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(nz_v_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(nz_u_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(h_t1_curr_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(h_t2_curr_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(c_t1_curr_port:AXIDMA_SIMPLE) +#pragma SDS data data_mover(c_t2_curr_port:AXIDMA_SIMPLE) +// Port mapping +// #pragma SDS data sys_port(x1_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(x2_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(h_t1_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(h_t2_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(c_t1_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(c_t2_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(bias1_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(bias2_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(nz_v_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(nz_u_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port +// #pragma SDS data sys_port(h_t1_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port +// #pragma SDS data sys_port(h_t2_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port +// #pragma SDS data sys_port(c_t1_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port +// #pragma SDS data sys_port(c_t2_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port +// ============================================================================= +// Weight ports not using DMAs +// ============================================================================= +// #pragma SDS data zero_copy(u_cur_port[0:NUM_ITERATIONS*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) +// #pragma SDS data zero_copy(u_rec_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) +// #pragma SDS data zero_copy(v_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)]) +// #pragma SDS data zero_copy(s1_port[0:NUM_ITERATIONS]) +// #pragma SDS data zero_copy(s2_port[0:NUM_ITERATIONS]) +// ============================================================================= +// Weight ports using DMAs +// ============================================================================= +#pragma SDS data copy(u_cur_port[0:NUM_ITERATIONS*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) +#pragma SDS data copy(u_rec_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) +#pragma SDS data copy(v_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)]) +#pragma SDS data copy(s1_port[0:NUM_ITERATIONS]) +#pragma SDS data copy(s2_port[0:NUM_ITERATIONS]) +// Platform Port Mapping, available options: +// - ACP Coherent ports: ps_e_S_AXI_HPC[0-1]_FPD +// - HP ports: ps_e_S_AXI_HP[0-3]_FPD +// #pragma SDS data sys_port(u_cur_port:ps_e_S_AXI_HP0_FPD) // HP2 +// #pragma SDS data sys_port(u_rec_port:ps_e_S_AXI_HP1_FPD) // HP3 +// #pragma SDS data sys_port(v_port:ps_e_S_AXI_HP2_FPD) // HP3 +// #pragma SDS data sys_port(s1_port:ps_e_S_AXI_HP3_FPD) // HP3 +// #pragma SDS data sys_port(s2_port:ps_e_S_AXI_HP3_FPD) // HP3 +// ============================================================================= +// Other Configurations +// ============================================================================= +// Compiler hint on allocation +#pragma SDS data mem_attribute(x1_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(x2_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(h_t1_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(h_t2_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(c_t1_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(c_t2_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(u_cur_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(u_rec_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(v_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(s1_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(s2_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(bias1_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(bias2_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(nz_v_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(nz_u_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(h_t1_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(h_t2_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(c_t1_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +#pragma SDS data mem_attribute(c_t2_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) +// NOTE: All ports are accessed sequentially. +#pragma SDS data access_pattern(x1_port:SEQUENTIAL) +#pragma SDS data access_pattern(x2_port:SEQUENTIAL) +#pragma SDS data access_pattern(h_t1_prev_port:SEQUENTIAL) +#pragma SDS data access_pattern(h_t2_prev_port:SEQUENTIAL) +#pragma SDS data access_pattern(c_t1_prev_port:SEQUENTIAL) +#pragma SDS data access_pattern(c_t2_prev_port:SEQUENTIAL) +#pragma SDS data access_pattern(u_cur_port:SEQUENTIAL) +#pragma SDS data access_pattern(u_rec_port:SEQUENTIAL) +#pragma SDS data access_pattern(v_port:SEQUENTIAL) +#pragma SDS data access_pattern(s1_port:SEQUENTIAL) +#pragma SDS data access_pattern(s2_port:SEQUENTIAL) +#pragma SDS data access_pattern(bias1_port:SEQUENTIAL) +#pragma SDS data access_pattern(bias2_port:SEQUENTIAL) +#pragma SDS data access_pattern(nz_v_port:SEQUENTIAL) +#pragma SDS data access_pattern(nz_u_port:SEQUENTIAL) +#pragma SDS data access_pattern(h_t1_curr_port:SEQUENTIAL) +#pragma SDS data access_pattern(h_t2_curr_port:SEQUENTIAL) +#pragma SDS data access_pattern(c_t1_curr_port:SEQUENTIAL) +#pragma SDS data access_pattern(c_t2_curr_port:SEQUENTIAL) +#endif // end SDS_DESIGN +void SvdModel2LstmSDSoCV2( + const svd::ActivationD x1_port[INPUT_SIZE], + const svd::ActivationD x2_port[INPUT_SIZE], + const svd::ActivationD h_t1_prev_port[HIDDEN_SIZE], + const svd::ActivationD h_t2_prev_port[HIDDEN_SIZE], + const svd::ActivationD c_t1_prev_port[HIDDEN_SIZE], + const svd::ActivationD c_t2_prev_port[HIDDEN_SIZE], + const ap_uint *u_cur_port, // [NUM_ITERATIONS*4*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], + const ap_uint *u_rec_port, // [NUM_ITERATIONS*4*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], + const ap_uint *v_port, // [NUM_ITERATIONS*4*2*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)], + const ap_uint *s1_port, // [NUM_ITERATIONS*8], + const ap_uint *s2_port, // [NUM_ITERATIONS*8], + const svd::WeightD bias1_port[4 * HIDDEN_SIZE], + const svd::WeightD bias2_port[4 * HIDDEN_SIZE], + const ap_uint nz_v_port[NUM_ITERATIONS * 8], + const ap_uint nz_u_port[NUM_ITERATIONS * 8], + svd::ActivationD h_t1_curr_port[HIDDEN_SIZE], + svd::ActivationD h_t2_curr_port[HIDDEN_SIZE], + svd::ActivationD c_t1_curr_port[HIDDEN_SIZE], + svd::ActivationD c_t2_curr_port[HIDDEN_SIZE]); + +typedef svd::SvdParameters, + ap_fixed, + ap_fixed > lstm_params; + +#ifndef __VITIS_HLS__ +#else +template +void LstmSvdKernel(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[params::N], + // Current Gates + hls::stream& x_port, + hls::stream& u_cur_port, + hls::stream& s_cur_port, + hls::stream& v_cur_port, + // Recurrent Gates + hls::stream& h_prev_port, + hls::stream& u_rec_port, + hls::stream& s_rec_port, + hls::stream& v_rec_port, + // Non-Linearities + hls::stream& bias_port, + hls::stream& c_prev_port, + hls::stream& h_curr_port, + hls::stream& c_curr_port) { +#pragma HLS TOP name=LstmSvdKernel +// #pragma HLS INLINE +#pragma HLS DATAFLOW +// Current Gates +#pragma HLS ARRAY_PARTITION variable=num_refinements complete +#ifndef __VITIS_HLS__ +#pragma HLS STABLE variable=x_port +#pragma HLS STABLE variable=u_cur_port +#pragma HLS STABLE variable=s_cur_port +#pragma HLS STABLE variable=v_cur_port +// Recurrent Gates +#pragma HLS STABLE variable=h_prev_port +#pragma HLS STABLE variable=u_rec_port +#pragma HLS STABLE variable=s_rec_port +#pragma HLS STABLE variable=v_rec_port +// Non-Linearities +#pragma HLS STABLE variable=bias_port +#pragma HLS STABLE variable=c_prev_port +#pragma HLS STABLE variable=h_curr_port +#pragma HLS STABLE variable=c_curr_port +#endif + int refinements[2][params::N]; +#pragma HLS ARRAY_PARTITION variable=refinements complete dim=0 + for (int i = 0; i < 2; ++i) { +#pragma HLS UNROLL region + for (int j = 0; j < params::N; ++j) { + refinements[i][j] = num_refinements[j]; + } + } + typedef typename params::ActivationD ActivationType; + typedef svd::AxiStreamFifo WrapperFifoGTv; + hls::stream y_cur_fifo; + hls::stream y_rec_fifo; +#pragma HLS STREAM variable=y_cur_fifo depth=2 +#pragma HLS STREAM variable=y_rec_fifo depth=2 + auto y_cur_axis = svd::AxiStreamFifo(y_cur_fifo); + auto y_rec_axis = svd::AxiStreamFifo(y_rec_fifo); + auto bias_axis = svd::AxiStreamPort(bias_port); + auto c_prev_axis = svd::AxiStreamPort(c_prev_port); + auto c_curr_axis = svd::AxiStreamPort(c_curr_port); + auto h_curr_axis = svd::AxiStreamPort(h_curr_port); + // Current Gates + svd::SvdKernel(num_active_inputs, input_size, + output_size, refinements[0], x_port, u_cur_port, s_cur_port, + v_cur_port, y_cur_fifo); + // Recurrent Gates + svd::SvdKernel(num_active_inputs, output_size, + output_size, refinements[1], h_prev_port, u_rec_port, s_rec_port, + v_rec_port, y_rec_fifo); + // Non-Linearities + const int kTypeBitwidth = hlsutils::Bitwidth::value; + const int kLutSize = (kTypeBitwidth > 16) ? 256 : 512; + const int kGTv = params::G * params::Tv; + const bool kApplyBias = true; + NonLinearities: + for (int i = 0; i < output_size / params::Tv * num_active_inputs; ++i) { +#pragma HLS PIPELINE II=1 + auto y_cur = y_cur_axis.template PopVector(); + auto y_rec = y_rec_axis.template PopVector(); + auto bias = bias_axis.template PopVector(); + auto c_prev = c_prev_axis.template PopVector(); + ActivationType c_curr[params::Tv]; + ActivationType h_curr[params::Tv]; +#pragma HLS ARRAY_PARTITION variable=c_curr complete dim=0 +#pragma HLS ARRAY_PARTITION variable=h_curr complete dim=0 + for (int j = 0; j < params::Tv; ++j) { + svd::LstmNonLinearFunctions( + kApplyBias, + y_cur[j * params::G + 0], y_cur[j * params::G + 1], + y_cur[j * params::G + 2], y_cur[j * params::G + 3], + y_rec[j * params::G + 0], y_rec[j * params::G + 1], + y_rec[j * params::G + 2], y_rec[j * params::G + 3], + bias[j * params::G + 0], bias[j * params::G + 1], + bias[j * params::G + 2], bias[j * params::G + 3], + c_prev[j], c_curr[j], h_curr[j]); + } + const bool kIsLast = i == output_size / params::Tv * num_active_inputs - 1; + c_curr_axis.template PushBuffer(params::Tv, c_curr, kIsLast); + h_curr_axis.template PushBuffer(params::Tv, h_curr, kIsLast); + } +} +#endif // end __VITIS_HLS__ + +/** + * @brief Sets the LstmSvd kernel inputs, i.e. streams from arrays into + * hls::streams. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] output_size The output size + * @param[in] num_refinements The number of refinements + * @param[in] x The input array. Shape: (N, I) + * @param[in] u The u array. Shape: (R, I, G) + * @param[in] s The s array. Shape: (R, N, G) + * @param[in] v The v array. Shape: (R, H, G) + * @param[in] bias The bias array. Shape: (N, G, H) + * @param x_port The x port to be used as argument to SvdKernel + * @param u_port The u port to be used as argument to SvdKernel + * @param s_port The s port to be used as argument to SvdKernel + * @param v_port The v port to be used as argument to SvdKernel + * @param bias_port The bias port to be used as argument to + * SvdKernel + * + * @tparam params Collection of SVD configuration params. + */ +#ifdef __VITIS_HLS__ +template +void SetLstmSvdInputs(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[params::N], + // Current Gates + const typename params::ActivationD* x, + const typename params::ActivationD* u_cur, + const typename params::ActivationD* s_cur, + const typename params::ActivationD* v_cur, + // Recurrent Gates + const typename params::ActivationD* h, + const typename params::ActivationD* u_rec, + const typename params::ActivationD* s_rec, + const typename params::ActivationD* v_rec, + // Non-Linearities + const typename params::ActivationD* bias, + const typename params::ActivationD* c_prev, + // Current Gates + hls::stream& x_port, + hls::stream& u_cur_port, + hls::stream& s_cur_port, + hls::stream& v_cur_port, + // Recurrent Gates + hls::stream& h_prev_port, + hls::stream& u_rec_port, + hls::stream& s_rec_port, + hls::stream& v_rec_port, + // Non-Linearities + hls::stream& bias_port, + hls::stream& c_prev_port) { + svd::SetDenseSvdInputs(num_active_inputs, input_size, output_size, + num_refinements, x, u_cur, s_cur, v_cur, bias, x_port, u_cur_port, + s_cur_port, v_cur_port, bias_port); + svd::SetSvdKernelInputs(num_active_inputs, output_size, output_size, + num_refinements, h, u_rec, s_rec, v_rec, h_prev_port, u_rec_port, + s_rec_port, v_rec_port); + auto c_prev_axis = svd::AxiStreamPort(c_prev_port); + typedef typename params::ActivationD ActivationType; + const int kTv = params::Tv; + const int kNumTilesV = output_size / kTv; + typename params::VectTvType c_prev_val; + for (int i = 0; i < kNumTilesV; ++i) { + for (int j = 0; j < num_active_inputs; ++j) { + for (int k = 0; k < kTv; ++k) { + c_prev_val[k] = c_prev[j * output_size + i * kTv + k]; + } + c_prev_axis.template PushVector(c_prev_val); + } + } +} +#endif // end __VITIS_HLS__ + +#ifdef __VITIS_HLS__ +template +void GetLstmSvdOutputs(const int num_active_inputs, const int output_size, + typename params::ActivationD* h_curr, + typename params::ActivationD* c_curr, + hls::stream& h_curr_port, + hls::stream& c_curr_port) { + typedef typename params::ActivationD ActivationType; + const int kTv = params::Tv; + const int kNumTilesV = output_size / kTv; + auto h_axis = svd::AxiStreamPort(h_curr_port); + auto c_axis = svd::AxiStreamPort(c_curr_port); + typename params::VectTvType h_val; + typename params::VectTvType c_val; + for (int i = 0; i < kNumTilesV; ++i) { + for (int j = 0; j < num_active_inputs; ++j) { + h_val = h_axis.template PopVector(); + c_val = c_axis.template PopVector(); + for (int k = 0; k < kTv; ++k) { + c_curr[j * output_size + i * kTv + k] = c_val[k]; + h_curr[j * output_size + i * kTv + k] = h_val[k]; + } + } + } +} +#endif // end __VITIS_HLS__ + +} // svd + +void HlsLstmSvd(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::svd_params::N], + // const hls::vector num_refinements, + // Current Gates + hls::stream& x_port, + hls::stream& u_cur_port, + hls::stream& s_cur_port, + hls::stream& v_cur_port, + // Recurrent Gates + hls::stream& h_prev_port, + hls::stream& u_rec_port, + hls::stream& s_rec_port, + hls::stream& v_rec_port, + // Non-Linearities + hls::stream& bias_port, + hls::stream& c_prev_port, + hls::stream& h_curr_port, + hls::stream& c_curr_port); + +void HlsWrapperLstmSvd( + const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::lstm_params::N], + // Current Gates + const typename svd::lstm_params::ActivationD* x, + const typename svd::lstm_params::ActivationD* u_cur, + const typename svd::lstm_params::ActivationD* s_cur, + const typename svd::lstm_params::ActivationD* v_cur, + // Recurrent Gates + const typename svd::lstm_params::ActivationD* h, + const typename svd::lstm_params::ActivationD* u_rec, + const typename svd::lstm_params::ActivationD* s_rec, + const typename svd::lstm_params::ActivationD* v_rec, + // Non-Linearities + const typename svd::lstm_params::ActivationD* bias, + const typename svd::lstm_params::ActivationD* c_prev, + typename svd::lstm_params::ActivationD* h_curr, + typename svd::lstm_params::ActivationD* c_curr); + +#endif // end LSTM_HLS_LSTM_SVD_H_ \ No newline at end of file diff --git a/include/layers/lstm/hls/lstm_svd_emulator.h b/include/layers/lstm/hls/lstm_svd_emulator.h new file mode 100644 index 0000000..02cd1a5 --- /dev/null +++ b/include/layers/lstm/hls/lstm_svd_emulator.h @@ -0,0 +1,264 @@ +#ifndef LSTM_HLS_LSTM_SVD_EMULATOR_H_ +#define LSTM_HLS_LSTM_SVD_EMULATOR_H_ + +#include "math_utils/activation_functions.h" + +#include "hls_stream.h" + +#include +#include + +namespace svd { + +/** + * @brief Emulator used to test the accuracy of the HLS accelerator. It + * allows for testing different design points without recompiling. + * + * @param[in] InputSize The input size + * @param[in] HiddenSize The hidden size + * @param[in] NumIter The number of refinement steps + * @param[in] Tu The number of tiles of u + * @param[in] ZTu The number of pruned tiles of u + * @param[in] Tv The number of tiles of v + * @param[in] ZTv The number of pruned tiles of v + * @param[in] NumTimesteps The number timesteps (deprecated) + * @param[in] x The input data + * @param[in] cur_i_u The current i u + * @param[in] cur_i_s The current i s + * @param[in] cur_i_v The current i v + * @param[in] cur_i_unz The current i unz + * @param[in] cur_i_vnz The current i vnz + * @param[in] cur_f_u The current f u + * @param[in] cur_f_s The current f s + * @param[in] cur_f_v The current f v + * @param[in] cur_f_unz The current f unz + * @param[in] cur_f_vnz The current f vnz + * @param[in] cur_c_u The current c u + * @param[in] cur_c_s The current c s + * @param[in] cur_c_v The current c v + * @param[in] cur_c_unz The current c unz + * @param[in] cur_c_vnz The current c vnz + * @param[in] cur_o_u The current o u + * @param[in] cur_o_s The current o s + * @param[in] cur_o_v The current o v + * @param[in] cur_o_unz The current o unz + * @param[in] cur_o_vnz The current o vnz + * @param[in] rec_i_u The recurrent i u + * @param[in] rec_i_s The recurrent i s + * @param[in] rec_i_v The recurrent i v + * @param[in] rec_i_unz The recurrent i unz + * @param[in] rec_i_vnz The recurrent i vnz + * @param[in] rec_f_u The recurrent f u + * @param[in] rec_f_s The recurrent f s + * @param[in] rec_f_v The recurrent f v + * @param[in] rec_f_unz The recurrent f unz + * @param[in] rec_f_vnz The recurrent f vnz + * @param[in] rec_c_u The recurrent c u + * @param[in] rec_c_s The recurrent c s + * @param[in] rec_c_v The recurrent c v + * @param[in] rec_c_unz The recurrent c unz + * @param[in] rec_c_vnz The recurrent c vnz + * @param[in] rec_o_u The recurrent o u + * @param[in] rec_o_s The recurrent o s + * @param[in] rec_o_v The recurrent o v + * @param[in] rec_o_unz The recurrent o unz + * @param[in] rec_o_vnz The recurrent o vnz + * @param[in] bias The bias + * @param[in] c_prev The c previous + * @param[in] h_prev The h previous + * @param c_curr The c current + * @param h_curr The h current + * + * @tparam DataA Activation type + * @tparam DataW Weight type + * @tparam DataAcc Accumulation type + * @tparam DataMul Multiplication type + * @tparam TanhLutSize Size of the hard sigmoid LUT + */ +template +void LstmSvdSoftEmulator(const int InputSize, + const int HiddenSize, + const int NumIter, + const int Tu, + const int ZTu, + const int Tv, + const int ZTv, + const int NumTimesteps, + const DataA *x, + const DataW *cur_i_u, + const DataW *cur_i_s, + const DataW *cur_i_v, + const int *cur_i_unz, + const int *cur_i_vnz, + const DataW *cur_f_u, + const DataW *cur_f_s, + const DataW *cur_f_v, + const int *cur_f_unz, + const int *cur_f_vnz, + const DataW *cur_c_u, + const DataW *cur_c_s, + const DataW *cur_c_v, + const int *cur_c_unz, + const int *cur_c_vnz, + const DataW *cur_o_u, + const DataW *cur_o_s, + const DataW *cur_o_v, + const int *cur_o_unz, + const int *cur_o_vnz, + const DataW *rec_i_u, + const DataW *rec_i_s, + const DataW *rec_i_v, + const int *rec_i_unz, + const int *rec_i_vnz, + const DataW *rec_f_u, + const DataW *rec_f_s, + const DataW *rec_f_v, + const int *rec_f_unz, + const int *rec_f_vnz, + const DataW *rec_c_u, + const DataW *rec_c_s, + const DataW *rec_c_v, + const int *rec_c_unz, + const int *rec_c_vnz, + const DataW *rec_o_u, + const DataW *rec_o_s, + const DataW *rec_o_v, + const int *rec_o_unz, + const int *rec_o_vnz, + const DataW *bias, + DataA *c_prev, + DataA *h_prev, + DataA *c_curr, + DataA *h_curr) { + assert(Tu % 2 == 0); + assert(Tv % 2 == 0); + assert(Tu >= 8); + assert(Tv >= 8); + assert(Tu > ZTu); + assert(Tv > ZTv); + assert(NumIter % 2 == 0); + const DataW *u[8]; + const DataW *s[8]; + const DataW *v[8]; + const int *unz[8]; + const int *vnz[8]; + u[0] = cur_i_u; u[1] = cur_f_u; u[2] = cur_c_u; u[3] = cur_o_u; + u[4] = rec_i_u; u[5] = rec_f_u; u[6] = rec_c_u; u[7] = rec_o_u; + s[0] = cur_i_s; s[1] = cur_f_s; s[2] = cur_c_s; s[3] = cur_o_s; + s[4] = rec_i_s; s[5] = rec_f_s; s[6] = rec_c_s; s[7] = rec_o_s; + v[0] = cur_i_v; v[1] = cur_f_v; v[2] = cur_c_v; v[3] = cur_o_v; + v[4] = rec_i_v; v[5] = rec_f_v; v[6] = rec_c_v; v[7] = rec_o_v; + unz[0] = cur_i_unz; unz[1] = cur_f_unz; unz[2] = cur_c_unz; unz[3] = cur_o_unz; + unz[4] = rec_i_unz; unz[5] = rec_f_unz; unz[6] = rec_c_unz; unz[7] = rec_o_unz; + vnz[0] = cur_i_vnz; vnz[1] = cur_f_vnz; vnz[2] = cur_c_vnz; vnz[3] = cur_o_vnz; + vnz[4] = rec_i_vnz; vnz[5] = rec_f_vnz; vnz[6] = rec_c_vnz; vnz[7] = rec_o_vnz; + hls::stream **cur_out_fifo = new hls::stream*[4]; + hls::stream **rec_out_fifo = new hls::stream*[4]; + for (int i = 0; i < 4; ++i) { + cur_out_fifo[i] = new hls::stream[Tv]; + rec_out_fifo[i] = new hls::stream[Tv]; + } + DataAcc *u_acc[8]; + DataAcc **acc_buffer[8]; + DataMul xs_val[8] = {0}; + for (int i = 0; i < 8; ++i) { + u_acc[i] = new DataAcc[NumIter]; + } + DataA *h[2]; + DataA *c[2]; + if (NumTimesteps > 1) { + for (int i = 0; i < 2; ++i) { + h[i] = new DataA[HiddenSize]; + c[i] = new DataA[HiddenSize]; + std::memset(h[i], 0, HiddenSize * sizeof(DataA)); + std::memset(c[i], 0, HiddenSize * sizeof(DataA)); + } + } else { + c[0] = c_prev; + c[1] = c_curr; + h[0] = h_prev; + h[1] = h_curr; + } + for (int i = 0; i < 8; ++i) { + acc_buffer[i] = new DataAcc*[Tv]; + for (int j = 0; j < Tv; ++j) { + acc_buffer[i][j] = new DataAcc[HiddenSize / Tv]; + } + } + for (int t = 0; t < NumTimesteps; ++t) { + const int in_ptr = (t % 2) == 0 ? 0 : 1; + const int out_ptr = (t % 2) == 0 ? 1 : 0; + for (int i = 0; i < 8; ++i) { + std::memset(u_acc[i], 0, NumIter * sizeof(DataAcc)); + for (int j = 0; j < Tv; ++j) { + std::memset(acc_buffer[i][j], 0, HiddenSize / Tv * sizeof(DataAcc)); + } + } + for (int i = 0; i < NumIter; ++i) { + for (int q = 0; q < 4; ++q) { + for (int j = 0; j < Tu - ZTu; ++j) { + const int nz_idx = i * (Tu - ZTu) + j; + for (int k = 0; k < InputSize / Tu; ++k) { + int u_idx = i * InputSize / Tu * (Tu - ZTu) + j * InputSize / Tu + k; + u_acc[q][i] += x[t * InputSize + unz[q][nz_idx] * InputSize / Tu + k] * u[q][u_idx]; + } + for (int k = 0; k < HiddenSize / Tu; ++k) { + int u_idx = i * HiddenSize / Tu * (Tu - ZTu) + j * HiddenSize / Tu + k; + u_acc[q + 4][i] += h[in_ptr][unz[q + 4][nz_idx] * HiddenSize / Tu + k] * u[q + 4][u_idx]; + } + } + } + for (int q = 0; q < 8; ++q) { + xs_val[q] = s[q][i] * DataA(u_acc[q][i]); + for (int j = 0; j < Tv - ZTv; ++j) { + for (int k = 0; k < HiddenSize / Tv; ++k) { + const int v_idx = i * HiddenSize / Tv * (Tv - ZTv) + j * HiddenSize / Tv + k; + const int nz_idx = i * (Tv - ZTv) + j; + acc_buffer[q][vnz[q][nz_idx]][k] += xs_val[q] * v[q][v_idx]; + } + } + } + } + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < Tv; ++j) { + for (int k = 0; k < HiddenSize / Tv; ++k) { + cur_out_fifo[i][j].write(acc_buffer[i][j][k]); + rec_out_fifo[i][j].write(acc_buffer[i + 4][j][k]); + } + } + } + NonLinearityUnitSoftware(HiddenSize, + Tv, 4, c[in_ptr], cur_out_fifo, rec_out_fifo, h[out_ptr], c[out_ptr], + true, bias); + } + if (NumTimesteps > 1) { + std::memcpy(h_curr, h[(NumTimesteps - 1) % 2 == 0 ? 1 : 0], HiddenSize * sizeof(DataA)); + } + for (int i = 0; i < 4; ++i) { + delete[] cur_out_fifo[i]; + delete[] rec_out_fifo[i]; + } + delete[] cur_out_fifo; + delete[] rec_out_fifo; + for (int i = 0; i < 8; ++i) { + delete[] u_acc[i]; + for (int j = 0; j < Tv; ++j) { + delete[] acc_buffer[i][j]; + } + delete[] acc_buffer[i]; + } + if (NumTimesteps > 1) { + for (int i = 0; i < 2; ++i) { + delete[] h[i]; + delete[] c[i]; + } + } +} + +} // svd + +#endif // LSTM_HLS_LSTM_SVD_EMULATOR_H_ \ No newline at end of file diff --git a/include/lstm/lstm_data_handler.h b/include/layers/lstm/lstm_data_handler.h similarity index 69% rename from include/lstm/lstm_data_handler.h rename to include/layers/lstm/lstm_data_handler.h index b927b60..811bf2b 100644 --- a/include/lstm/lstm_data_handler.h +++ b/include/layers/lstm/lstm_data_handler.h @@ -1,5 +1,5 @@ -#ifndef LSTM_DATA_HANDLER_H_ -#define LSTM_DATA_HANDLER_H_ +#ifndef LAYERS_LSTM_DATA_HANDLER_H_ +#define LAYERS_LSTM_DATA_HANDLER_H_ #include "math_utils/data_handler.h" #include "hls_utils/hls_metaprogramming.h" @@ -10,8 +10,10 @@ #include #include #include +#include +#include -namespace lstm { +namespace svd { template void ArrangeWeights(const int arrange_type, @@ -281,6 +283,7 @@ class AcceleratorBlob { int u_cur_size_; int u_rec_size_; int v_size_; + int s_size_; std::unordered_map cur_gates_; std::unordered_map rec_gates_; FixType* fix_u_cur_; @@ -289,38 +292,96 @@ class AcceleratorBlob { std::vector > x_; std::vector > h_; std::vector > c_; + std::vector > h_prev_; + std::vector > c_prev_; + std::vector > h_curr_; + std::vector > c_curr_; std::vector > bias_; std::vector fix_x_; std::vector fix_h_; std::vector fix_c_; + std::vector fix_h_prev_; + std::vector fix_c_prev_; + std::vector fix_h_curr_; + std::vector fix_c_curr_; std::vector fix_bias_; std::vector fix_s_; - ap_uint* fix_z_u_; - ap_uint* fix_z_v_; + ap_uint* fix_nz_u_; + ap_uint* fix_nz_v_; void InitVector(const bool init_random, const int num_inputs, const int size, std::vector& fix_y, std::vector >& y) { for (int i = 0; i < num_inputs; ++i) { fix_y[i] = svd::AllocateContiguously(size); for (int j = 0; j < size; ++j) { - FloatType tmp = init_random ? rand() : 0; + FloatType tmp = init_random ? 0.00001 * rand() : 0; y[i][j] = tmp; fix_y[i][j] = FixType(tmp); } } } + void ArrangeWeights(const int arrange_type, const int n_steps, + std::unordered_map& gates, + FixType* y) { + int idx = 0; + switch (arrange_type) { + case 0: + // NOTE: the following arrangement is: (N, G, E) + for (int i = 0; i < n_steps; ++i) { + for (auto g : gates) { + for (int j = 0; j < gates.get_u_pruned_size(); ++j) { + y[idx] = g.second->get_u()->fix_pruned_data()[j]; + ++idx; + } + } + } + break; + case 1: + // NOTE: the following arrangement is: (G, N, E) + for (auto g : gates) { + for (int i = 0; i < n_steps; ++i) { + for (int j = 0; j < gates.get_u_pruned_size(); ++j) { + y[idx] = g.second->get_u()->fix_pruned_data()[j]; + ++idx; + } + } + } + break; + case 2: + // NOTE: the following arrangement is: (N, E, G) + for (int i = 0; i < n_steps; ++i) { + for (int j = 0; j < gates.get_u_pruned_size(); ++j) { + for (auto g : gates) { + y[idx] = g.second->get_u()->fix_pruned_data()[j]; + ++idx; + } + } + } + break; + default: + // NOTE: the following arrangement is: (N, G, E) + for (int i = 0; i < n_steps; ++i) { + for (auto g : gates) { + for (int j = 0; j < gates.get_u_pruned_size(); ++j) { + y[idx] = g.second->get_u()->fix_pruned_data()[j]; + ++idx; + } + } + } + break; + } + } + public: AcceleratorBlob(const int num_inputs, const int refinement_steps, const int u_cur_size, const int u_rec_size, const int v_size, const int num_tiles_u, const int num_zero_tiles_u, const int num_tiles_v, const int num_zero_tiles_v) { + srand(time(NULL)); this->lstm_num_inputs_ = num_inputs; this->lstm_input_size_ = u_cur_size; this->lstm_output_size_ = v_size; - std::cout << this->lstm_num_inputs_ << std::endl; - std::cout << this->lstm_input_size_ << std::endl; - std::cout << this->lstm_output_size_ << std::endl; // NOTE: The following instantiation order is important and must be that. this->cur_gates_["o"] = new SvdVecType(num_inputs, refinement_steps, u_cur_size, v_size, num_tiles_u, num_zero_tiles_u, num_tiles_v, num_zero_tiles_v); this->cur_gates_["c"] = new SvdVecType(num_inputs, refinement_steps, u_cur_size, v_size, num_tiles_u, num_zero_tiles_u, num_tiles_v, num_zero_tiles_v); @@ -335,37 +396,34 @@ class AcceleratorBlob { const int kU_RecTotalSize = kNumGates / 2 * this->rec_gates_["i"]->get_u()->get_pruned_total_size(); const int kV_TotalSize = kNumGates * this->cur_gates_["i"]->get_v()->get_pruned_total_size(); const int kS_TotalSize = kNumGates * refinement_steps; - std::cout << "allocate stuff" << std::endl; this->fix_u_cur_ = svd::AllocateContiguously(kU_CurTotalSize); this->fix_u_rec_ = svd::AllocateContiguously(kU_RecTotalSize); this->fix_v_ = svd::AllocateContiguously(kV_TotalSize); this->u_cur_size_ = kU_CurTotalSize; this->u_rec_size_ = kU_RecTotalSize; this->v_size_ = kV_TotalSize; - this->fix_z_u_ = svd::AllocateContiguously>(kS_TotalSize); - this->fix_z_v_ = svd::AllocateContiguously>(kS_TotalSize); + this->s_size_ = kS_TotalSize; + this->fix_nz_u_ = svd::AllocateContiguously >(kS_TotalSize); + this->fix_nz_v_ = svd::AllocateContiguously >(kS_TotalSize); // NOTE: the following arrangement is: (R, E, G) const int kArrangementTypeREG = 2; const int kArrangementTypeRGE = 0; const int kU_CurLengthPruned = this->cur_gates_["i"]->get_u()->get_pruned_size(); const int kU_RecLengthPruned = this->rec_gates_["i"]->get_u()->get_pruned_size(); const int kV_LengthPruned = this->cur_gates_["i"]->get_v()->get_pruned_size(); - std::cout << "ArrangeWeights U cur" << std::endl; - lstm::ArrangeWeights(kArrangementTypeREG, refinement_steps, kU_CurLengthPruned, + svd::ArrangeWeights(kArrangementTypeREG, refinement_steps, kU_CurLengthPruned, this->cur_gates_["i"]->get_u()->fix_pruned_data(), this->cur_gates_["f"]->get_u()->fix_pruned_data(), this->cur_gates_["c"]->get_u()->fix_pruned_data(), this->cur_gates_["o"]->get_u()->fix_pruned_data(), this->fix_u_cur_); - std::cout << "ArrangeWeights U rec" << std::endl; - lstm::ArrangeWeights(kArrangementTypeREG, refinement_steps, kU_RecLengthPruned, + svd::ArrangeWeights(kArrangementTypeREG, refinement_steps, kU_RecLengthPruned, this->rec_gates_["i"]->get_u()->fix_pruned_data(), this->rec_gates_["f"]->get_u()->fix_pruned_data(), this->rec_gates_["c"]->get_u()->fix_pruned_data(), this->rec_gates_["o"]->get_u()->fix_pruned_data(), this->fix_u_rec_); - std::cout << "ArrangeWeights V" << std::endl; - lstm::ArrangeWeights(kArrangementTypeREG, refinement_steps, kV_LengthPruned, + svd::ArrangeWeights(kArrangementTypeREG, refinement_steps, kV_LengthPruned, kV_LengthPruned, this->cur_gates_["i"]->get_v()->fix_pruned_data(), this->cur_gates_["f"]->get_v()->fix_pruned_data(), @@ -376,45 +434,51 @@ class AcceleratorBlob { this->rec_gates_["c"]->get_v()->fix_pruned_data(), this->rec_gates_["o"]->get_v()->fix_pruned_data(), this->fix_v_); - std::cout << "arrange NZ" << std::endl; - lstm::ArrangeWeights(kArrangementTypeRGE, refinement_steps, 1, 1, - this->cur_gates_["i"]->get_u()->get_fix_z_idx(), - this->cur_gates_["f"]->get_u()->get_fix_z_idx(), - this->cur_gates_["c"]->get_u()->get_fix_z_idx(), - this->cur_gates_["o"]->get_u()->get_fix_z_idx(), - this->rec_gates_["i"]->get_u()->get_fix_z_idx(), - this->rec_gates_["f"]->get_u()->get_fix_z_idx(), - this->rec_gates_["c"]->get_u()->get_fix_z_idx(), - this->rec_gates_["o"]->get_u()->get_fix_z_idx(), - this->fix_z_u_); - lstm::ArrangeWeights(kArrangementTypeRGE, refinement_steps, 1, 1, - this->cur_gates_["i"]->get_v()->get_fix_z_idx(), - this->cur_gates_["f"]->get_v()->get_fix_z_idx(), - this->cur_gates_["c"]->get_v()->get_fix_z_idx(), - this->cur_gates_["o"]->get_v()->get_fix_z_idx(), - this->rec_gates_["i"]->get_v()->get_fix_z_idx(), - this->rec_gates_["f"]->get_v()->get_fix_z_idx(), - this->rec_gates_["c"]->get_v()->get_fix_z_idx(), - this->rec_gates_["o"]->get_v()->get_fix_z_idx(), - this->fix_z_v_); + svd::ArrangeWeights(kArrangementTypeRGE, refinement_steps, 1, 1, + this->cur_gates_["i"]->get_u()->get_fix_nz_idx(), + this->cur_gates_["f"]->get_u()->get_fix_nz_idx(), + this->cur_gates_["c"]->get_u()->get_fix_nz_idx(), + this->cur_gates_["o"]->get_u()->get_fix_nz_idx(), + this->rec_gates_["i"]->get_u()->get_fix_nz_idx(), + this->rec_gates_["f"]->get_u()->get_fix_nz_idx(), + this->rec_gates_["c"]->get_u()->get_fix_nz_idx(), + this->rec_gates_["o"]->get_u()->get_fix_nz_idx(), + this->fix_nz_u_); + svd::ArrangeWeights(kArrangementTypeRGE, refinement_steps, 1, 1, + this->cur_gates_["i"]->get_v()->get_fix_nz_idx(), + this->cur_gates_["f"]->get_v()->get_fix_nz_idx(), + this->cur_gates_["c"]->get_v()->get_fix_nz_idx(), + this->cur_gates_["o"]->get_v()->get_fix_nz_idx(), + this->rec_gates_["i"]->get_v()->get_fix_nz_idx(), + this->rec_gates_["f"]->get_v()->get_fix_nz_idx(), + this->rec_gates_["c"]->get_v()->get_fix_nz_idx(), + this->rec_gates_["o"]->get_v()->get_fix_nz_idx(), + this->fix_nz_v_); this->fix_x_.resize(num_inputs); this->fix_h_.resize(num_inputs); this->fix_c_.resize(num_inputs); + this->fix_h_curr_.resize(num_inputs); + this->fix_c_curr_.resize(num_inputs); + this->fix_h_prev_.resize(num_inputs); + this->fix_c_prev_.resize(num_inputs); this->fix_bias_.resize(num_inputs); - this->x_.resize(num_inputs, std::vector(this->lstm_input_size_)); this->h_.resize(num_inputs, std::vector(this->lstm_output_size_)); this->c_.resize(num_inputs, std::vector(this->lstm_output_size_)); + this->h_curr_.resize(num_inputs, std::vector(this->lstm_output_size_)); + this->c_curr_.resize(num_inputs, std::vector(this->lstm_output_size_)); + this->h_prev_.resize(num_inputs, std::vector(this->lstm_output_size_)); + this->c_prev_.resize(num_inputs, std::vector(this->lstm_output_size_)); this->bias_.resize(num_inputs, std::vector(kNumGates / 2 * this->lstm_output_size_)); - const bool init_random = true; this->InitVector(init_random, num_inputs, this->lstm_input_size_, this->fix_x_, this->x_); this->InitVector(!init_random, num_inputs, this->lstm_output_size_, this->fix_h_, this->h_); this->InitVector(!init_random, num_inputs, this->lstm_output_size_, this->fix_c_, this->c_); + this->InitVector(!init_random, num_inputs, this->lstm_output_size_, this->fix_h_curr_, this->h_curr_); + this->InitVector(!init_random, num_inputs, this->lstm_output_size_, this->fix_c_curr_, this->c_curr_); + this->InitVector(!init_random, num_inputs, this->lstm_output_size_, this->fix_h_prev_, this->h_prev_); + this->InitVector(!init_random, num_inputs, this->lstm_output_size_, this->fix_c_prev_, this->c_prev_); this->InitVector(init_random, num_inputs, kNumGates / 2 * this->lstm_output_size_, this->fix_bias_, this->bias_); - - std::cout << "Arrange S" << std::endl; - for (int i = 0; i < num_inputs; ++i) { this->fix_s_.push_back(svd::AllocateContiguously(kS_TotalSize)); } @@ -442,10 +506,14 @@ class AcceleratorBlob { svd::FreeContiguously(this->fix_x_[i]); svd::FreeContiguously(this->fix_h_[i]); svd::FreeContiguously(this->fix_c_[i]); + svd::FreeContiguously(this->fix_h_curr_[i]); + svd::FreeContiguously(this->fix_c_curr_[i]); + svd::FreeContiguously(this->fix_h_prev_[i]); + svd::FreeContiguously(this->fix_c_prev_[i]); svd::FreeContiguously(this->fix_bias_[i]); } - svd::FreeContiguously(this->fix_z_u_); - svd::FreeContiguously(this->fix_z_v_); + svd::FreeContiguously(this->fix_nz_u_); + svd::FreeContiguously(this->fix_nz_v_); for (auto g : this->cur_gates_) { delete g.second; } @@ -454,17 +522,34 @@ class AcceleratorBlob { } } - void reset_lstm_outputs() { + void ResetLstmOutputs() { for (int i = 0; i < this->lstm_num_inputs_; ++i) { for (int j = 0; j < this->lstm_output_size_; ++j) { - h_[i][j] = 0; - c_[i][j] = 0; - fix_h_[i][j] = 0; - fix_c_[i][j] = 0; + this->h_[i][j] = 0; + this->c_[i][j] = 0; + this->h_curr_[i][j] = 0; + this->c_curr_[i][j] = 0; + this->h_prev_[i][j] = 0; + this->c_prev_[i][j] = 0; + this->fix_h_[i][j] = 0; + this->fix_c_[i][j] = 0; + this->fix_h_curr_[i][j] = 0; + this->fix_c_curr_[i][j] = 0; + this->fix_h_prev_[i][j] = 0; + this->fix_c_prev_[i][j] = 0; } } } + int get_lstm_input_size() { + return this->lstm_input_size_; + } + + int get_lstm_output_size() { + return this->lstm_output_size_; + } + + FixType* get_fix_u_cur() { return this->fix_u_cur_; } @@ -505,20 +590,44 @@ class AcceleratorBlob { return this->fix_h_[i]; } + FloatType* get_h(const int i) { + return this->h_[i].data(); + } + + FixType* get_fix_h_curr(const int i) { + return this->fix_h_curr_[i]; + } + + FixType* get_fix_h_prev(const int i) { + return this->fix_h_prev_[i]; + } + FixType* get_fix_c(const int i) { return this->fix_c_[i]; } + FixType* get_fix_c_curr(const int i) { + return this->fix_c_curr_[i]; + } + + FixType* get_fix_c_prev(const int i) { + return this->fix_c_prev_[i]; + } + FixType* get_fix_bias(const int i) { return this->fix_bias_[i]; } - ap_uint* get_fix_z_u() { - return this->fix_z_u_; + FloatType* get_bias(const int i) { + return this->bias_[i].data(); + } + + ap_uint* get_fix_nz_u() { + return this->fix_nz_u_; } - ap_uint* get_fix_z_v() { - return this->fix_z_v_; + ap_uint* get_fix_nz_v() { + return this->fix_nz_v_; } int get_u_cur_size() { @@ -533,8 +642,38 @@ class AcceleratorBlob { return this->v_size_; } + int get_s_size() { + return this->s_size_; + } + + FloatType* get_x(const int i) { + return this->x_[i].data(); + } + + int CountMismatches(FixType** x, const int verbose = 0) { + int num_errors = 0; + for (int i = 0; i < this->lstm_num_inputs_; ++i) { + for (int j = 0; j < this->lstm_output_size_; ++j) { + if (verbose > 0) { + std:: cout << j << ") hls/emulator: " << this->fix_h_curr_[i][j] << " / " << x[i][j]; + } + if (this->fix_h_curr_[i][j] != x[i][j]) { + ++num_errors; + if (verbose > 0) { + std:: cout << " <-- ERROR" << std::endl; + } + } else { + if (verbose > 0) { + std:: cout << std::endl; + } + } + } + } + return num_errors; + } + }; -} // lstm +} // svd -#endif // end LSTM_DATA_HANDLER_H_ \ No newline at end of file +#endif // end LAYERS_LSTM_DATA_HANDLER_H_ \ No newline at end of file diff --git a/include/lstm/sw/soft_lstm.h b/include/layers/lstm/sw/soft_lstm.h similarity index 96% rename from include/lstm/sw/soft_lstm.h rename to include/layers/lstm/sw/soft_lstm.h index 9104695..073a9ee 100644 --- a/include/lstm/sw/soft_lstm.h +++ b/include/layers/lstm/sw/soft_lstm.h @@ -36,8 +36,8 @@ * streams * *****************************************************************************/ -#ifndef LSTM_SW_LSTM_SOFTWARE_H_ -#define LSTM_SW_LSTM_SOFTWARE_H_ +#ifndef LAYERS_LSTM_SW_LSTM_SOFTWARE_H_ +#define LAYERS_LSTM_SW_LSTM_SOFTWARE_H_ #ifdef __cplusplus extern "C" @@ -85,4 +85,4 @@ void LstmUnbatched(const bool use_blas, const float *bias_o, float *out); -#endif // end LSTM_SW_LSTM_SOFTWARE_H_ \ No newline at end of file +#endif // end LAYERS_LSTM_SW_LSTM_SOFTWARE_H_ \ No newline at end of file diff --git a/include/lstm/sw/soft_lstm_svd.h b/include/layers/lstm/sw/soft_lstm_svd.h similarity index 68% rename from include/lstm/sw/soft_lstm_svd.h rename to include/layers/lstm/sw/soft_lstm_svd.h index e6927a2..46097e9 100644 --- a/include/lstm/sw/soft_lstm_svd.h +++ b/include/layers/lstm/sw/soft_lstm_svd.h @@ -1,5 +1,5 @@ -#ifndef LSTM_SW_SOFT_LSTM_SVD_H_ -#define LSTM_SW_SOFT_LSTM_SVD_H_ +#ifndef LAYERS_LSTM_SW_SOFT_LSTM_SVD_H_ +#define LAYERS_LSTM_SW_SOFT_LSTM_SVD_H_ #include "math_utils/blas_utils.h" #include "math_utils/activation_functions.h" @@ -17,7 +17,6 @@ #include #include #include -// using namespace Eigen; #endif #include @@ -29,29 +28,25 @@ #include #endif -#ifdef HLS_DESIGN #include "hls_math.h" +#ifdef AP_INT_MAX_W +#undef AP_INT_MAX_W #define AP_INT_MAX_W 4096 +#endif #include "ap_int.h" #define FIX8_INT_BIT 3 #define FIX16_INT_BIT 7 +namespace svd { + typedef half HalfD; typedef ap_fixed<8, FIX8_INT_BIT, AP_RND_ZERO, AP_SAT_SYM> Fix8D; typedef ap_fixed<16, FIX16_INT_BIT, AP_RND_ZERO, AP_SAT_SYM> Fix16D; typedef ap_fixed Accum8D; typedef ap_fixed Accum16D; typedef half AccumHalfD; -#else -typedef float HalfD; -typedef float Fix8D; -typedef float Fix16D; -typedef float Accum8D; -typedef float Accum16D; -typedef float AccumHalfD; -#endif // end HLS_DESIGN /* * @todo Using Eigen library is an attempt to using sparse matrixes @@ -64,22 +59,6 @@ typedef Eigen::Matrix Ma typedef Eigen::Triplet TripletD; #endif -#ifndef ALLOC - #ifdef SDS_DESIGN - #define ALLOC(x) sds_alloc(x) - #else - #define ALLOC(x) malloc(x) - #endif -#endif - -#ifndef FREE - #ifdef SDS_DESIGN - #define FREE(x) sds_free(x) - #else - #define FREE(x) free(x) - #endif -#endif - /** * @brief Used for performance comparisons against hardware designs. * @@ -370,7 +349,7 @@ void hls_copy_cast(const int n, const DtypeIn *a, DtypeOut *y) { } template -void SvdModel2LstmTemplatedLatencyCC(const int verbose, +void SvdModelLstmTemplatedLatencyCC(const int verbose, const T *x, const int num_samples, const int num_timesteps, @@ -512,7 +491,7 @@ void SvdModel2LstmTemplatedLatencyCC(const int verbose, // TanH lookup table // =========================================================================== T tanh_table[TableSize]; - hls_init_tanh_table(tanh_table); + svd::hls_init_tanh_table(tanh_table); // =========================================================================== // NOTE: We need to 'transpose' u in order to generate the us matrix. This is @@ -521,52 +500,52 @@ void SvdModel2LstmTemplatedLatencyCC(const int verbose, // BEFORE TRANSPOSE: s.shape = (n_steps) // BEFORE TRANSPOSE: u.shape = (n_steps, input_size) // BEFORE TRANSPOSE: us.shape = (n_steps, input_size) - hls_transpose(n_steps, input_size, cur_i_u, cur_i_u_T); - hls_transpose(n_steps, input_size, cur_f_u, cur_f_u_T); - hls_transpose(n_steps, input_size, cur_c_u, cur_c_u_T); - hls_transpose(n_steps, input_size, cur_o_u, cur_o_u_T); - hls_transpose(n_steps, hidden_size, rec_i_u, rec_i_u_T); - hls_transpose(n_steps, hidden_size, rec_f_u, rec_f_u_T); - hls_transpose(n_steps, hidden_size, rec_c_u, rec_c_u_T); - hls_transpose(n_steps, hidden_size, rec_o_u, rec_o_u_T); + svd::hls_transpose(n_steps, input_size, cur_i_u, cur_i_u_T); + svd::hls_transpose(n_steps, input_size, cur_f_u, cur_f_u_T); + svd::hls_transpose(n_steps, input_size, cur_c_u, cur_c_u_T); + svd::hls_transpose(n_steps, input_size, cur_o_u, cur_o_u_T); + svd::hls_transpose(n_steps, hidden_size, rec_i_u, rec_i_u_T); + svd::hls_transpose(n_steps, hidden_size, rec_f_u, rec_f_u_T); + svd::hls_transpose(n_steps, hidden_size, rec_c_u, rec_c_u_T); + svd::hls_transpose(n_steps, hidden_size, rec_o_u, rec_o_u_T); for (int i = 0; i < input_size; ++i) { - hls_mul(n_steps, &cur_i_u_T[i * n_steps], cur_i_s, &cur_i_us[i * n_steps]); - hls_mul(n_steps, &cur_f_u_T[i * n_steps], cur_f_s, &cur_f_us[i * n_steps]); - hls_mul(n_steps, &cur_c_u_T[i * n_steps], cur_c_s, &cur_c_us[i * n_steps]); - hls_mul(n_steps, &cur_o_u_T[i * n_steps], cur_o_s, &cur_o_us[i * n_steps]); + svd::hls_mul(n_steps, &cur_i_u_T[i * n_steps], cur_i_s, &cur_i_us[i * n_steps]); + svd::hls_mul(n_steps, &cur_f_u_T[i * n_steps], cur_f_s, &cur_f_us[i * n_steps]); + svd::hls_mul(n_steps, &cur_c_u_T[i * n_steps], cur_c_s, &cur_c_us[i * n_steps]); + svd::hls_mul(n_steps, &cur_o_u_T[i * n_steps], cur_o_s, &cur_o_us[i * n_steps]); } for (int i = 0; i < hidden_size; ++i) { - hls_mul(n_steps, &rec_i_u_T[i * n_steps], rec_i_s, &rec_i_us[i * n_steps]); - hls_mul(n_steps, &rec_f_u_T[i * n_steps], rec_f_s, &rec_f_us[i * n_steps]); - hls_mul(n_steps, &rec_c_u_T[i * n_steps], rec_c_s, &rec_c_us[i * n_steps]); - hls_mul(n_steps, &rec_o_u_T[i * n_steps], rec_o_s, &rec_o_us[i * n_steps]); + svd::hls_mul(n_steps, &rec_i_u_T[i * n_steps], rec_i_s, &rec_i_us[i * n_steps]); + svd::hls_mul(n_steps, &rec_f_u_T[i * n_steps], rec_f_s, &rec_f_us[i * n_steps]); + svd::hls_mul(n_steps, &rec_c_u_T[i * n_steps], rec_c_s, &rec_c_us[i * n_steps]); + svd::hls_mul(n_steps, &rec_o_u_T[i * n_steps], rec_o_s, &rec_o_us[i * n_steps]); } // =========================================================================== // Transpose back current v and current u vectors. // =========================================================================== // From (input_size, n_steps) to (n_steps, input_size) - hls_transpose(input_size, n_steps, cur_i_us, cur_i_u_T); - hls_transpose(input_size, n_steps, cur_f_us, cur_f_u_T); - hls_transpose(input_size, n_steps, cur_c_us, cur_c_u_T); - hls_transpose(input_size, n_steps, cur_o_us, cur_o_u_T); + svd::hls_transpose(input_size, n_steps, cur_i_us, cur_i_u_T); + svd::hls_transpose(input_size, n_steps, cur_f_us, cur_f_u_T); + svd::hls_transpose(input_size, n_steps, cur_c_us, cur_c_u_T); + svd::hls_transpose(input_size, n_steps, cur_o_us, cur_o_u_T); // From (n_steps, hidden_size) to (hidden_size, n_steps) - hls_transpose(n_steps, hidden_size, cur_i_v, cur_i_v_T); - hls_transpose(n_steps, hidden_size, cur_f_v, cur_f_v_T); - hls_transpose(n_steps, hidden_size, cur_c_v, cur_c_v_T); - hls_transpose(n_steps, hidden_size, cur_o_v, cur_o_v_T); + svd::hls_transpose(n_steps, hidden_size, cur_i_v, cur_i_v_T); + svd::hls_transpose(n_steps, hidden_size, cur_f_v, cur_f_v_T); + svd::hls_transpose(n_steps, hidden_size, cur_c_v, cur_c_v_T); + svd::hls_transpose(n_steps, hidden_size, cur_o_v, cur_o_v_T); // =========================================================================== // Transpose back recurrent v and recurrent u vectors. // =========================================================================== // From (hidden_size, n_steps) to (n_steps, hidden_size) - hls_transpose(hidden_size, n_steps, rec_i_us, rec_i_u_T); - hls_transpose(hidden_size, n_steps, rec_f_us, rec_f_u_T); - hls_transpose(hidden_size, n_steps, rec_c_us, rec_c_u_T); - hls_transpose(hidden_size, n_steps, rec_o_us, rec_o_u_T); + svd::hls_transpose(hidden_size, n_steps, rec_i_us, rec_i_u_T); + svd::hls_transpose(hidden_size, n_steps, rec_f_us, rec_f_u_T); + svd::hls_transpose(hidden_size, n_steps, rec_c_us, rec_c_u_T); + svd::hls_transpose(hidden_size, n_steps, rec_o_us, rec_o_u_T); // From (n_steps, hidden_size) to (hidden_size, n_steps) - hls_transpose(n_steps, hidden_size, rec_i_v, rec_i_v_T); - hls_transpose(n_steps, hidden_size, rec_f_v, rec_f_v_T); - hls_transpose(n_steps, hidden_size, rec_c_v, rec_c_v_T); - hls_transpose(n_steps, hidden_size, rec_o_v, rec_o_v_T); + svd::hls_transpose(n_steps, hidden_size, rec_i_v, rec_i_v_T); + svd::hls_transpose(n_steps, hidden_size, rec_f_v, rec_f_v_T); + svd::hls_transpose(n_steps, hidden_size, rec_c_v, rec_c_v_T); + svd::hls_transpose(n_steps, hidden_size, rec_o_v, rec_o_v_T); const int kSampleSize = num_timesteps * input_size; @@ -597,15 +576,15 @@ void SvdModel2LstmTemplatedLatencyCC(const int verbose, for (int j = 0; j < num_timesteps; ++j) { #if defined(MULTITHREAD_DESIGN) && !defined(SDS_DESIGN) && !defined(__SYNTHESIS__) - std::thread cur_i_ux_thread(hls_gemv, n_steps, input_size, cur_i_u_T, &x[i * kSampleSize + j * input_size], cur_i_ux); - std::thread cur_f_ux_thread(hls_gemv, n_steps, input_size, cur_f_u_T, &x[i * kSampleSize + j * input_size], cur_f_ux); - std::thread cur_c_ux_thread(hls_gemv, n_steps, input_size, cur_c_u_T, &x[i * kSampleSize + j * input_size], cur_c_ux); - std::thread cur_o_ux_thread(hls_gemv, n_steps, input_size, cur_o_u_T, &x[i * kSampleSize + j * input_size], cur_o_ux); + std::thread cur_i_ux_thread(svd::hls_gemv, n_steps, input_size, cur_i_u_T, &x[i * kSampleSize + j * input_size], cur_i_ux); + std::thread cur_f_ux_thread(svd::hls_gemv, n_steps, input_size, cur_f_u_T, &x[i * kSampleSize + j * input_size], cur_f_ux); + std::thread cur_c_ux_thread(svd::hls_gemv, n_steps, input_size, cur_c_u_T, &x[i * kSampleSize + j * input_size], cur_c_ux); + std::thread cur_o_ux_thread(svd::hls_gemv, n_steps, input_size, cur_o_u_T, &x[i * kSampleSize + j * input_size], cur_o_ux); - std::thread rec_i_uh_thread(hls_gemv, n_steps, hidden_size, rec_i_u_T, &out[i * hidden_size], rec_i_uh); - std::thread rec_f_uh_thread(hls_gemv, n_steps, hidden_size, rec_f_u_T, &out[i * hidden_size], rec_f_uh); - std::thread rec_c_uh_thread(hls_gemv, n_steps, hidden_size, rec_c_u_T, &out[i * hidden_size], rec_c_uh); - std::thread rec_o_uh_thread(hls_gemv, n_steps, hidden_size, rec_o_u_T, &out[i * hidden_size], rec_o_uh); + std::thread rec_i_uh_thread(svd::hls_gemv, n_steps, hidden_size, rec_i_u_T, &out[i * hidden_size], rec_i_uh); + std::thread rec_f_uh_thread(svd::hls_gemv, n_steps, hidden_size, rec_f_u_T, &out[i * hidden_size], rec_f_uh); + std::thread rec_c_uh_thread(svd::hls_gemv, n_steps, hidden_size, rec_c_u_T, &out[i * hidden_size], rec_c_uh); + std::thread rec_o_uh_thread(svd::hls_gemv, n_steps, hidden_size, rec_o_u_T, &out[i * hidden_size], rec_o_uh); cur_i_ux_thread.join(); cur_f_ux_thread.join(); @@ -617,15 +596,15 @@ void SvdModel2LstmTemplatedLatencyCC(const int verbose, rec_c_uh_thread.join(); rec_o_uh_thread.join(); - std::thread cur_i_y_thread(hls_gemv, hidden_size, n_steps, cur_i_v_T, cur_i_ux, cur_i_y); - std::thread cur_f_y_thread(hls_gemv, hidden_size, n_steps, cur_f_v_T, cur_f_ux, cur_f_y); - std::thread cur_c_y_thread(hls_gemv, hidden_size, n_steps, cur_c_v_T, cur_c_ux, cur_c_y); - std::thread cur_o_y_thread(hls_gemv, hidden_size, n_steps, cur_o_v_T, cur_o_ux, cur_o_y); + std::thread cur_i_y_thread(svd::hls_gemv, hidden_size, n_steps, cur_i_v_T, cur_i_ux, cur_i_y); + std::thread cur_f_y_thread(svd::hls_gemv, hidden_size, n_steps, cur_f_v_T, cur_f_ux, cur_f_y); + std::thread cur_c_y_thread(svd::hls_gemv, hidden_size, n_steps, cur_c_v_T, cur_c_ux, cur_c_y); + std::thread cur_o_y_thread(svd::hls_gemv, hidden_size, n_steps, cur_o_v_T, cur_o_ux, cur_o_y); - std::thread rec_i_y_thread(hls_gemv, hidden_size, n_steps, rec_i_v_T, rec_i_uh, rec_i_y); - std::thread rec_f_y_thread(hls_gemv, hidden_size, n_steps, rec_f_v_T, rec_f_uh, rec_f_y); - std::thread rec_c_y_thread(hls_gemv, hidden_size, n_steps, rec_c_v_T, rec_c_uh, rec_c_y); - std::thread rec_o_y_thread(hls_gemv, hidden_size, n_steps, rec_o_v_T, rec_o_uh, rec_o_y); + std::thread rec_i_y_thread(svd::hls_gemv, hidden_size, n_steps, rec_i_v_T, rec_i_uh, rec_i_y); + std::thread rec_f_y_thread(svd::hls_gemv, hidden_size, n_steps, rec_f_v_T, rec_f_uh, rec_f_y); + std::thread rec_c_y_thread(svd::hls_gemv, hidden_size, n_steps, rec_c_v_T, rec_c_uh, rec_c_y); + std::thread rec_o_y_thread(svd::hls_gemv, hidden_size, n_steps, rec_o_v_T, rec_o_uh, rec_o_y); cur_i_y_thread.join(); cur_f_y_thread.join(); @@ -644,54 +623,54 @@ void SvdModel2LstmTemplatedLatencyCC(const int verbose, // is simmetrical, i.e. same transposed matrices logic. // ======================================================================= // us.T @ x - hls_gemv(n_steps, input_size, cur_i_u_T, &x[i * kSampleSize + j * input_size], cur_i_ux); - hls_gemv(n_steps, input_size, cur_f_u_T, &x[i * kSampleSize + j * input_size], cur_f_ux); - hls_gemv(n_steps, input_size, cur_c_u_T, &x[i * kSampleSize + j * input_size], cur_c_ux); - hls_gemv(n_steps, input_size, cur_o_u_T, &x[i * kSampleSize + j * input_size], cur_o_ux); + svd::hls_gemv(n_steps, input_size, cur_i_u_T, &x[i * kSampleSize + j * input_size], cur_i_ux); + svd::hls_gemv(n_steps, input_size, cur_f_u_T, &x[i * kSampleSize + j * input_size], cur_f_ux); + svd::hls_gemv(n_steps, input_size, cur_c_u_T, &x[i * kSampleSize + j * input_size], cur_c_ux); + svd::hls_gemv(n_steps, input_size, cur_o_u_T, &x[i * kSampleSize + j * input_size], cur_o_ux); // v.T @ xus - hls_gemv(hidden_size, n_steps, cur_i_v_T, cur_i_ux, cur_i_y); - hls_gemv(hidden_size, n_steps, cur_f_v_T, cur_f_ux, cur_f_y); - hls_gemv(hidden_size, n_steps, cur_c_v_T, cur_c_ux, cur_c_y); - hls_gemv(hidden_size, n_steps, cur_o_v_T, cur_o_ux, cur_o_y); + svd::hls_gemv(hidden_size, n_steps, cur_i_v_T, cur_i_ux, cur_i_y); + svd::hls_gemv(hidden_size, n_steps, cur_f_v_T, cur_f_ux, cur_f_y); + svd::hls_gemv(hidden_size, n_steps, cur_c_v_T, cur_c_ux, cur_c_y); + svd::hls_gemv(hidden_size, n_steps, cur_o_v_T, cur_o_ux, cur_o_y); // ======================================================================= // Recurrent LSTM gates // ======================================================================= // us.T @ h - hls_gemv(n_steps, hidden_size, rec_i_u_T, &out[i * hidden_size], rec_i_uh); - hls_gemv(n_steps, hidden_size, rec_f_u_T, &out[i * hidden_size], rec_f_uh); - hls_gemv(n_steps, hidden_size, rec_c_u_T, &out[i * hidden_size], rec_c_uh); - hls_gemv(n_steps, hidden_size, rec_o_u_T, &out[i * hidden_size], rec_o_uh); + svd::hls_gemv(n_steps, hidden_size, rec_i_u_T, &out[i * hidden_size], rec_i_uh); + svd::hls_gemv(n_steps, hidden_size, rec_f_u_T, &out[i * hidden_size], rec_f_uh); + svd::hls_gemv(n_steps, hidden_size, rec_c_u_T, &out[i * hidden_size], rec_c_uh); + svd::hls_gemv(n_steps, hidden_size, rec_o_u_T, &out[i * hidden_size], rec_o_uh); // v.T @ hus - hls_gemv(hidden_size, n_steps, rec_i_v_T, rec_i_uh, rec_i_y); - hls_gemv(hidden_size, n_steps, rec_f_v_T, rec_f_uh, rec_f_y); - hls_gemv(hidden_size, n_steps, rec_c_v_T, rec_c_uh, rec_c_y); - hls_gemv(hidden_size, n_steps, rec_o_v_T, rec_o_uh, rec_o_y); + svd::hls_gemv(hidden_size, n_steps, rec_i_v_T, rec_i_uh, rec_i_y); + svd::hls_gemv(hidden_size, n_steps, rec_f_v_T, rec_f_uh, rec_f_y); + svd::hls_gemv(hidden_size, n_steps, rec_c_v_T, rec_c_uh, rec_c_y); + svd::hls_gemv(hidden_size, n_steps, rec_o_v_T, rec_o_uh, rec_o_y); #endif // ======================================================================= // Non linearities // ======================================================================= - hls_add(hidden_size, cur_i_y, bias_i, i_cur_bias); - hls_add(hidden_size, cur_f_y, bias_f, f_cur_bias); - hls_add(hidden_size, cur_c_y, bias_c, c_cur_bias); - hls_add(hidden_size, cur_o_y, bias_o, o_cur_bias); - - hls_add(hidden_size, i_cur_bias, rec_i_y, i_sum); - hls_add(hidden_size, f_cur_bias, rec_f_y, f_sum); - hls_add(hidden_size, c_cur_bias, rec_c_y, c_sum); - hls_add(hidden_size, o_cur_bias, rec_o_y, o_sum); - - hls_hard_sigmoid(hidden_size, i_sum, i_gate); - hls_hard_sigmoid(hidden_size, f_sum, f_gate); - hls_hard_sigmoid(hidden_size, o_sum, o_gate); - hls_tanh(hidden_size, c_sum, tanh_table, c_sum_tanh); - hls_mul(hidden_size, c_sum_tanh, i_gate, c_lhs); - hls_mul(hidden_size, c, f_gate, c_rhs); - - hls_add(hidden_size, c_lhs, c_rhs, c); - hls_tanh(hidden_size, c, tanh_table, c_tanh); - hls_mul(hidden_size, c_tanh, o_gate, &out[i * hidden_size]); + svd::hls_add(hidden_size, cur_i_y, bias_i, i_cur_bias); + svd::hls_add(hidden_size, cur_f_y, bias_f, f_cur_bias); + svd::hls_add(hidden_size, cur_c_y, bias_c, c_cur_bias); + svd::hls_add(hidden_size, cur_o_y, bias_o, o_cur_bias); + + svd::hls_add(hidden_size, i_cur_bias, rec_i_y, i_sum); + svd::hls_add(hidden_size, f_cur_bias, rec_f_y, f_sum); + svd::hls_add(hidden_size, c_cur_bias, rec_c_y, c_sum); + svd::hls_add(hidden_size, o_cur_bias, rec_o_y, o_sum); + + svd::hls_hard_sigmoid(hidden_size, i_sum, i_gate); + svd::hls_hard_sigmoid(hidden_size, f_sum, f_gate); + svd::hls_hard_sigmoid(hidden_size, o_sum, o_gate); + svd::hls_tanh(hidden_size, c_sum, tanh_table, c_sum_tanh); + svd::hls_mul(hidden_size, c_sum_tanh, i_gate, c_lhs); + svd::hls_mul(hidden_size, c, f_gate, c_rhs); + + svd::hls_add(hidden_size, c_lhs, c_rhs, c); + svd::hls_tanh(hidden_size, c, tanh_table, c_tanh); + svd::hls_mul(hidden_size, c_tanh, o_gate, &out[i * hidden_size]); } } #ifdef SDS_DESIGN @@ -789,7 +768,7 @@ void SvdModel2LstmTemplatedLatencyCC(const int verbose, #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmFix8(const int verbose, +void SvdModelLstmFix8(const int verbose, const Fix8D *x, const int num_samples, const int num_timesteps, @@ -829,7 +808,7 @@ void SvdModel2LstmFix8(const int verbose, #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmFix16(const int verbose, +void SvdModelLstmFix16(const int verbose, const Fix16D *x, const int num_samples, const int num_timesteps, @@ -869,7 +848,7 @@ void SvdModel2LstmFix16(const int verbose, #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmSoftware(const int verbose, +void SvdModelLstmSoftware(const int verbose, const bool use_blas, const int type, // 0:float, 1:fix8, 2:fix16 const float *x, @@ -998,7 +977,7 @@ void SvdModelEigenUnbatched(const int verbose, #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmSoftwareBatched(const int verbose, +void SvdModelLstmSoftwareBatched(const int verbose, const bool use_blas, const float *x, // (num_samples, num_inputs, num_timesteps, input_size) const int num_inputs, @@ -1037,253 +1016,7 @@ void SvdModel2LstmSoftwareBatched(const int verbose, const float *bias_o, float *out); -/** - * @brief Emulator used to test the accuracy of the HLS accelerator. It - * allows for testing different design points without recompiling. - * - * @param[in] InputSize The input size - * @param[in] HiddenSize The hidden size - * @param[in] NumIter The number of refinement steps - * @param[in] Tu The number of tiles of u - * @param[in] ZTu The number of pruned tiles of u - * @param[in] Tv The number of tiles of v - * @param[in] ZTv The number of pruned tiles of v - * @param[in] NumTimesteps The number timesteps (deprecated) - * @param[in] x The input data - * @param[in] cur_i_u The current i u - * @param[in] cur_i_s The current i s - * @param[in] cur_i_v The current i v - * @param[in] cur_i_unz The current i unz - * @param[in] cur_i_vnz The current i vnz - * @param[in] cur_f_u The current f u - * @param[in] cur_f_s The current f s - * @param[in] cur_f_v The current f v - * @param[in] cur_f_unz The current f unz - * @param[in] cur_f_vnz The current f vnz - * @param[in] cur_c_u The current c u - * @param[in] cur_c_s The current c s - * @param[in] cur_c_v The current c v - * @param[in] cur_c_unz The current c unz - * @param[in] cur_c_vnz The current c vnz - * @param[in] cur_o_u The current o u - * @param[in] cur_o_s The current o s - * @param[in] cur_o_v The current o v - * @param[in] cur_o_unz The current o unz - * @param[in] cur_o_vnz The current o vnz - * @param[in] rec_i_u The recurrent i u - * @param[in] rec_i_s The recurrent i s - * @param[in] rec_i_v The recurrent i v - * @param[in] rec_i_unz The recurrent i unz - * @param[in] rec_i_vnz The recurrent i vnz - * @param[in] rec_f_u The recurrent f u - * @param[in] rec_f_s The recurrent f s - * @param[in] rec_f_v The recurrent f v - * @param[in] rec_f_unz The recurrent f unz - * @param[in] rec_f_vnz The recurrent f vnz - * @param[in] rec_c_u The recurrent c u - * @param[in] rec_c_s The recurrent c s - * @param[in] rec_c_v The recurrent c v - * @param[in] rec_c_unz The recurrent c unz - * @param[in] rec_c_vnz The recurrent c vnz - * @param[in] rec_o_u The recurrent o u - * @param[in] rec_o_s The recurrent o s - * @param[in] rec_o_v The recurrent o v - * @param[in] rec_o_unz The recurrent o unz - * @param[in] rec_o_vnz The recurrent o vnz - * @param[in] bias The bias - * @param[in] c_prev The c previous - * @param[in] h_prev The h previous - * @param c_curr The c current - * @param h_curr The h current - * - * @tparam DataA Activation type - * @tparam DataW Weight type - * @tparam DataAcc Accumulation type - * @tparam DataMul Multiplication type - * @tparam TanhLutSize Size of the hard sigmoid LUT - */ -template -void SoftSvdModel(const int InputSize, - const int HiddenSize, - const int NumIter, - const int Tu, - const int ZTu, - const int Tv, - const int ZTv, - const int NumTimesteps, - const DataA *x, - const DataW *cur_i_u, - const DataW *cur_i_s, - const DataW *cur_i_v, - const int *cur_i_unz, - const int *cur_i_vnz, - const DataW *cur_f_u, - const DataW *cur_f_s, - const DataW *cur_f_v, - const int *cur_f_unz, - const int *cur_f_vnz, - const DataW *cur_c_u, - const DataW *cur_c_s, - const DataW *cur_c_v, - const int *cur_c_unz, - const int *cur_c_vnz, - const DataW *cur_o_u, - const DataW *cur_o_s, - const DataW *cur_o_v, - const int *cur_o_unz, - const int *cur_o_vnz, - const DataW *rec_i_u, - const DataW *rec_i_s, - const DataW *rec_i_v, - const int *rec_i_unz, - const int *rec_i_vnz, - const DataW *rec_f_u, - const DataW *rec_f_s, - const DataW *rec_f_v, - const int *rec_f_unz, - const int *rec_f_vnz, - const DataW *rec_c_u, - const DataW *rec_c_s, - const DataW *rec_c_v, - const int *rec_c_unz, - const int *rec_c_vnz, - const DataW *rec_o_u, - const DataW *rec_o_s, - const DataW *rec_o_v, - const int *rec_o_unz, - const int *rec_o_vnz, - const DataW *bias, - DataA *c_prev, - DataA *h_prev, - DataA *c_curr, - DataA *h_curr) { - assert(Tu % 2 == 0); - assert(Tv % 2 == 0); - assert(Tu >= 8); - assert(Tv >= 8); - assert(Tu > ZTu); - assert(Tv > ZTv); - assert(NumIter % 2 == 0); - const DataW *u[8]; - const DataW *s[8]; - const DataW *v[8]; - const int *unz[8]; - const int *vnz[8]; - u[0] = cur_i_u; u[1] = cur_f_u; u[2] = cur_c_u; u[3] = cur_o_u; - u[4] = rec_i_u; u[5] = rec_f_u; u[6] = rec_c_u; u[7] = rec_o_u; - s[0] = cur_i_s; s[1] = cur_f_s; s[2] = cur_c_s; s[3] = cur_o_s; - s[4] = rec_i_s; s[5] = rec_f_s; s[6] = rec_c_s; s[7] = rec_o_s; - v[0] = cur_i_v; v[1] = cur_f_v; v[2] = cur_c_v; v[3] = cur_o_v; - v[4] = rec_i_v; v[5] = rec_f_v; v[6] = rec_c_v; v[7] = rec_o_v; - unz[0] = cur_i_unz; unz[1] = cur_f_unz; unz[2] = cur_c_unz; unz[3] = cur_o_unz; - unz[4] = rec_i_unz; unz[5] = rec_f_unz; unz[6] = rec_c_unz; unz[7] = rec_o_unz; - vnz[0] = cur_i_vnz; vnz[1] = cur_f_vnz; vnz[2] = cur_c_vnz; vnz[3] = cur_o_vnz; - vnz[4] = rec_i_vnz; vnz[5] = rec_f_vnz; vnz[6] = rec_c_vnz; vnz[7] = rec_o_vnz; - hls::stream **cur_out_fifo = new hls::stream*[4]; - hls::stream **rec_out_fifo = new hls::stream*[4]; - for (int i = 0; i < 4; ++i) { - cur_out_fifo[i] = new hls::stream[Tv]; - rec_out_fifo[i] = new hls::stream[Tv]; - } - DataAcc *u_acc[8]; - DataAcc **acc_buffer[8]; - DataMul xs_val[8] = {0}; - for (int i = 0; i < 8; ++i) { - u_acc[i] = new DataAcc[NumIter]; - } - DataA *h[2]; - DataA *c[2]; - if (NumTimesteps > 1) { - for (int i = 0; i < 2; ++i) { - h[i] = new DataA[HiddenSize]; - c[i] = new DataA[HiddenSize]; - std::memset(h[i], 0, HiddenSize * sizeof(DataA)); - std::memset(c[i], 0, HiddenSize * sizeof(DataA)); - } - } else { - c[0] = c_prev; - c[1] = c_curr; - h[0] = h_prev; - h[1] = h_curr; - } - for (int i = 0; i < 8; ++i) { - acc_buffer[i] = new DataAcc*[Tv]; - for (int j = 0; j < Tv; ++j) { - acc_buffer[i][j] = new DataAcc[HiddenSize / Tv]; - } - } - for (int t = 0; t < NumTimesteps; ++t) { - const int in_ptr = (t % 2) == 0 ? 0 : 1; - const int out_ptr = (t % 2) == 0 ? 1 : 0; - for (int i = 0; i < 8; ++i) { - std::memset(u_acc[i], 0, NumIter * sizeof(DataAcc)); - for (int j = 0; j < Tv; ++j) { - std::memset(acc_buffer[i][j], 0, HiddenSize / Tv * sizeof(DataAcc)); - } - } - for (int i = 0; i < NumIter; ++i) { - for (int q = 0; q < 4; ++q) { - for (int j = 0; j < Tu - ZTu; ++j) { - const int nz_idx = i * (Tu - ZTu) + j; - for (int k = 0; k < InputSize / Tu; ++k) { - int u_idx = i * InputSize / Tu * (Tu - ZTu) + j * InputSize / Tu + k; - u_acc[q][i] += x[t * InputSize + unz[q][nz_idx] * InputSize / Tu + k] * u[q][u_idx]; - } - for (int k = 0; k < HiddenSize / Tu; ++k) { - int u_idx = i * HiddenSize / Tu * (Tu - ZTu) + j * HiddenSize / Tu + k; - u_acc[q + 4][i] += h[in_ptr][unz[q + 4][nz_idx] * HiddenSize / Tu + k] * u[q + 4][u_idx]; - } - } - } - for (int q = 0; q < 8; ++q) { - xs_val[q] = s[q][i] * DataA(u_acc[q][i]); - for (int j = 0; j < Tv - ZTv; ++j) { - for (int k = 0; k < HiddenSize / Tv; ++k) { - const int v_idx = i * HiddenSize / Tv * (Tv - ZTv) + j * HiddenSize / Tv + k; - const int nz_idx = i * (Tv - ZTv) + j; - acc_buffer[q][vnz[q][nz_idx]][k] += xs_val[q] * v[q][v_idx]; - } - } - } - } - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < Tv; ++j) { - for (int k = 0; k < HiddenSize / Tv; ++k) { - cur_out_fifo[i][j].write(acc_buffer[i][j][k]); - rec_out_fifo[i][j].write(acc_buffer[i + 4][j][k]); - } - } - } - NonLinearityUnitSoftware(HiddenSize, - Tv, 4, c[in_ptr], cur_out_fifo, rec_out_fifo, h[out_ptr], c[out_ptr], - true, bias); - } - if (NumTimesteps > 1) { - std::memcpy(h_curr, h[(NumTimesteps - 1) % 2 == 0 ? 1 : 0], HiddenSize * sizeof(DataA)); - } - for (int i = 0; i < 4; ++i) { - delete[] cur_out_fifo[i]; - delete[] rec_out_fifo[i]; - } - delete[] cur_out_fifo; - delete[] rec_out_fifo; - for (int i = 0; i < 8; ++i) { - delete[] u_acc[i]; - for (int j = 0; j < Tv; ++j) { - delete[] acc_buffer[i][j]; - } - delete[] acc_buffer[i]; - } - if (NumTimesteps > 1) { - for (int i = 0; i < 2; ++i) { - delete[] h[i]; - delete[] c[i]; - } - } -} -#endif // end LSTM_SW_SOFT_LSTM_SVD_H_ \ No newline at end of file +} // svd + +#endif // end LAYERS_LSTM_SW_SOFT_LSTM_SVD_H_ \ No newline at end of file diff --git a/include/lstm/hls/lstm_svd.h b/include/lstm/hls/lstm_svd.h deleted file mode 100644 index 0e8db4c..0000000 --- a/include/lstm/hls/lstm_svd.h +++ /dev/null @@ -1,143 +0,0 @@ -#ifndef LSTM_HLS_LSTM_SVD_H_ -#define LSTM_HLS_LSTM_SVD_H_ - -#include "svd_params.h" - -#ifdef SDS_DESIGN -// ============================================================================= -// Ports using DMAs -// ============================================================================= -#pragma SDS data copy(x1_port[0:INPUT_SIZE]) -#pragma SDS data copy(x2_port[0:INPUT_SIZE]) -#pragma SDS data copy(h_t1_prev_port[0:HIDDEN_SIZE]) -#pragma SDS data copy(h_t2_prev_port[0:HIDDEN_SIZE]) -#pragma SDS data copy(c_t1_prev_port[0:HIDDEN_SIZE]) -#pragma SDS data copy(c_t2_prev_port[0:HIDDEN_SIZE]) -#pragma SDS data copy(bias1_port[0:4*HIDDEN_SIZE]) -#pragma SDS data copy(bias2_port[0:4*HIDDEN_SIZE]) -#pragma SDS data copy(comb_v_port[0:NUM_ITERATIONS * 8]) -#pragma SDS data copy(comb_u_port[0:NUM_ITERATIONS * 8]) -#pragma SDS data copy(h_t1_curr_port[0:HIDDEN_SIZE]) -#pragma SDS data copy(h_t2_curr_port[0:HIDDEN_SIZE]) -#pragma SDS data copy(c_t1_curr_port[0:HIDDEN_SIZE]) -#pragma SDS data copy(c_t2_curr_port[0:HIDDEN_SIZE]) -// Data Movers -#pragma SDS data data_mover(x1_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(x2_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(h_t1_prev_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(h_t2_prev_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(c_t1_prev_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(c_t2_prev_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(bias1_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(bias2_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(comb_v_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(comb_u_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(h_t1_curr_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(h_t2_curr_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(c_t1_curr_port:AXIDMA_SIMPLE) -#pragma SDS data data_mover(c_t2_curr_port:AXIDMA_SIMPLE) -// Port mapping -// #pragma SDS data sys_port(x1_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(x2_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(h_t1_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(h_t2_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(c_t1_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(c_t2_prev_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(bias1_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(bias2_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(comb_v_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(comb_u_port:ps_e_S_AXI_HPC0_FPD) // Coherent HP port -// #pragma SDS data sys_port(h_t1_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port -// #pragma SDS data sys_port(h_t2_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port -// #pragma SDS data sys_port(c_t1_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port -// #pragma SDS data sys_port(c_t2_curr_port:ps_e_S_AXI_HPC1_FPD) // Coherent HP port -// ============================================================================= -// Weight ports not using DMAs -// ============================================================================= -// #pragma SDS data zero_copy(u_cur_port[0:NUM_ITERATIONS*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) -// #pragma SDS data zero_copy(u_rec_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) -// #pragma SDS data zero_copy(v_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)]) -// #pragma SDS data zero_copy(s1_port[0:NUM_ITERATIONS]) -// #pragma SDS data zero_copy(s2_port[0:NUM_ITERATIONS]) -// ============================================================================= -// Weight ports using DMAs -// ============================================================================= -#pragma SDS data copy(u_cur_port[0:NUM_ITERATIONS*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) -#pragma SDS data copy(u_rec_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)]) -#pragma SDS data copy(v_port[0:NUM_ITERATIONS*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)]) -#pragma SDS data copy(s1_port[0:NUM_ITERATIONS]) -#pragma SDS data copy(s2_port[0:NUM_ITERATIONS]) -// Platform Port Mapping, available options: -// - ACP Coherent ports: ps_e_S_AXI_HPC[0-1]_FPD -// - HP ports: ps_e_S_AXI_HP[0-3]_FPD -// #pragma SDS data sys_port(u_cur_port:ps_e_S_AXI_HP0_FPD) // HP2 -// #pragma SDS data sys_port(u_rec_port:ps_e_S_AXI_HP1_FPD) // HP3 -// #pragma SDS data sys_port(v_port:ps_e_S_AXI_HP2_FPD) // HP3 -// #pragma SDS data sys_port(s1_port:ps_e_S_AXI_HP3_FPD) // HP3 -// #pragma SDS data sys_port(s2_port:ps_e_S_AXI_HP3_FPD) // HP3 -// ============================================================================= -// Other Configurations -// ============================================================================= -// Compiler hint on allocation -#pragma SDS data mem_attribute(x1_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(x2_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(h_t1_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(h_t2_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(c_t1_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(c_t2_prev_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(u_cur_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(u_rec_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(v_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(s1_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(s2_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(bias1_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(bias2_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(comb_v_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(comb_u_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(h_t1_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(h_t2_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(c_t1_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -#pragma SDS data mem_attribute(c_t2_curr_port:PHYSICAL_CONTIGUOUS|NON_CACHEABLE) -// NOTE: All ports are accessed sequentially. -#pragma SDS data access_pattern(x1_port:SEQUENTIAL) -#pragma SDS data access_pattern(x2_port:SEQUENTIAL) -#pragma SDS data access_pattern(h_t1_prev_port:SEQUENTIAL) -#pragma SDS data access_pattern(h_t2_prev_port:SEQUENTIAL) -#pragma SDS data access_pattern(c_t1_prev_port:SEQUENTIAL) -#pragma SDS data access_pattern(c_t2_prev_port:SEQUENTIAL) -#pragma SDS data access_pattern(u_cur_port:SEQUENTIAL) -#pragma SDS data access_pattern(u_rec_port:SEQUENTIAL) -#pragma SDS data access_pattern(v_port:SEQUENTIAL) -#pragma SDS data access_pattern(s1_port:SEQUENTIAL) -#pragma SDS data access_pattern(s2_port:SEQUENTIAL) -#pragma SDS data access_pattern(bias1_port:SEQUENTIAL) -#pragma SDS data access_pattern(bias2_port:SEQUENTIAL) -#pragma SDS data access_pattern(comb_v_port:SEQUENTIAL) -#pragma SDS data access_pattern(comb_u_port:SEQUENTIAL) -#pragma SDS data access_pattern(h_t1_curr_port:SEQUENTIAL) -#pragma SDS data access_pattern(h_t2_curr_port:SEQUENTIAL) -#pragma SDS data access_pattern(c_t1_curr_port:SEQUENTIAL) -#pragma SDS data access_pattern(c_t2_curr_port:SEQUENTIAL) -#endif // end SDS_DESIGN -void SvdModel2LstmSDSoCV2( - const svd::ActivationD x1_port[INPUT_SIZE], - const svd::ActivationD x2_port[INPUT_SIZE], - const svd::ActivationD h_t1_prev_port[HIDDEN_SIZE], - const svd::ActivationD h_t2_prev_port[HIDDEN_SIZE], - const svd::ActivationD c_t1_prev_port[HIDDEN_SIZE], - const svd::ActivationD c_t2_prev_port[HIDDEN_SIZE], - const ap_uint *u_cur_port, // [NUM_ITERATIONS*4*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], - const ap_uint *u_rec_port, // [NUM_ITERATIONS*4*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], - const ap_uint *v_port, // [NUM_ITERATIONS*4*2*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)], - const ap_uint *s1_port, // [NUM_ITERATIONS*8], - const ap_uint *s2_port, // [NUM_ITERATIONS*8], - const svd::WeightD bias1_port[4 * HIDDEN_SIZE], - const svd::WeightD bias2_port[4 * HIDDEN_SIZE], - const ap_uint comb_v_port[NUM_ITERATIONS * 8], - const ap_uint comb_u_port[NUM_ITERATIONS * 8], - svd::ActivationD h_t1_curr_port[HIDDEN_SIZE], - svd::ActivationD h_t2_curr_port[HIDDEN_SIZE], - svd::ActivationD c_t1_curr_port[HIDDEN_SIZE], - svd::ActivationD c_t2_curr_port[HIDDEN_SIZE]); - -#endif // end LSTM_HLS_LSTM_SVD_H_ \ No newline at end of file diff --git a/include/math_utils/activation_functions.h b/include/math_utils/activation_functions.h index 08c72bf..afa1dff 100644 --- a/include/math_utils/activation_functions.h +++ b/include/math_utils/activation_functions.h @@ -5,6 +5,9 @@ #include "hls_stream.h" #include "assert.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif #include @@ -144,9 +147,9 @@ void NonLinearityUnitSoftware(const int VectLength, assert(NumGates >= 4); DataW tanh_table[TableSize]; InitTanhTable(tanh_table); - const int kNumElemsTile = VectLength / NumTiles; + const int kTileSize = VectLength / NumTiles; for(int i = 0; i < NumTiles; ++i) { - for (int j = 0; j < kNumElemsTile; ++j) { + for (int j = 0; j < kTileSize; ++j) { // ======================================================================= // Python (Keras) Implementation: // i = self.hard_sigm(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) @@ -160,10 +163,10 @@ void NonLinearityUnitSoftware(const int VectLength, DataA c_gate = 0; DataA o_gate = 0; if (has_bias) { - i_gate = cur_gate_stream[0][i].read() + rec_gate_stream[0][i].read() + bias[0 * VectLength + i * kNumElemsTile + j]; - f_gate = cur_gate_stream[1][i].read() + rec_gate_stream[1][i].read() + bias[1 * VectLength + i * kNumElemsTile + j]; - c_gate = cur_gate_stream[2][i].read() + rec_gate_stream[2][i].read() + bias[2 * VectLength + i * kNumElemsTile + j]; - o_gate = cur_gate_stream[3][i].read() + rec_gate_stream[3][i].read() + bias[3 * VectLength + i * kNumElemsTile + j]; + i_gate = cur_gate_stream[0][i].read() + rec_gate_stream[0][i].read() + bias[0 * VectLength + i * kTileSize + j]; + f_gate = cur_gate_stream[1][i].read() + rec_gate_stream[1][i].read() + bias[1 * VectLength + i * kTileSize + j]; + c_gate = cur_gate_stream[2][i].read() + rec_gate_stream[2][i].read() + bias[2 * VectLength + i * kTileSize + j]; + o_gate = cur_gate_stream[3][i].read() + rec_gate_stream[3][i].read() + bias[3 * VectLength + i * kTileSize + j]; } else { i_gate = cur_gate_stream[0][i].read() + rec_gate_stream[0][i].read(); f_gate = cur_gate_stream[1][i].read() + rec_gate_stream[1][i].read(); @@ -174,12 +177,12 @@ void NonLinearityUnitSoftware(const int VectLength, const auto sigma_f = HardSigmoid(f_gate); const auto sigma_o = HardSigmoid(o_gate); const auto tanh_cell = TanH(c_gate, tanh_table); - const auto c_lhs = sigma_f * c_t_prev[i * kNumElemsTile + j]; + const auto c_lhs = sigma_f * c_t_prev[i * kTileSize + j]; const auto c_t_tile = c_lhs + sigma_i * tanh_cell; - c_t[i * kNumElemsTile + j] = c_t_tile; + c_t[i * kTileSize + j] = c_t_tile; const auto c_tanh = TanH(c_t_tile, tanh_table); const auto h_t_tile = sigma_o * c_tanh; - h[i * kNumElemsTile + j] = h_t_tile; + h[i * kTileSize + j] = h_t_tile; } } } @@ -239,6 +242,7 @@ void LstmNonLinearFunctions(const bool has_bias, const ActivationType c_prev, ActivationType &c_curr, ActivationType &h_curr) { +#pragma HLS FUNCTION_INSTANTIATE variable=has_bias #pragma HLS PIPELINE II=1 ActivationType i_gate = 0; ActivationType f_gate = 0; @@ -249,19 +253,19 @@ void LstmNonLinearFunctions(const bool has_bias, f_gate = cur_gate_f + rec_gate_f + bias_f; c_gate = cur_gate_c + rec_gate_c + bias_c; o_gate = cur_gate_o + rec_gate_o + bias_o; -#pragma HLS RESOURCE variable=i_gate core=AddSub_DSP latency=3 -#pragma HLS RESOURCE variable=f_gate core=AddSub_DSP latency=3 -#pragma HLS RESOURCE variable=c_gate core=AddSub_DSP latency=3 -#pragma HLS RESOURCE variable=o_gate core=AddSub_DSP latency=3 +#pragma HLS RESOURCE variable=i_gate core=AddSub_DSP // latency=3 +#pragma HLS RESOURCE variable=f_gate core=AddSub_DSP // latency=3 +#pragma HLS RESOURCE variable=c_gate core=AddSub_DSP // latency=3 +#pragma HLS RESOURCE variable=o_gate core=AddSub_DSP // latency=3 } else { i_gate = cur_gate_i + rec_gate_i; f_gate = cur_gate_f + rec_gate_f; o_gate = cur_gate_c + rec_gate_c; c_gate = cur_gate_o + rec_gate_o; -#pragma HLS RESOURCE variable=i_gate core=AddSub_DSP latency=3 -#pragma HLS RESOURCE variable=f_gate core=AddSub_DSP latency=3 -#pragma HLS RESOURCE variable=c_gate core=AddSub_DSP latency=3 -#pragma HLS RESOURCE variable=o_gate core=AddSub_DSP latency=3 +#pragma HLS RESOURCE variable=i_gate core=AddSub_DSP // latency=3 +#pragma HLS RESOURCE variable=f_gate core=AddSub_DSP // latency=3 +#pragma HLS RESOURCE variable=c_gate core=AddSub_DSP // latency=3 +#pragma HLS RESOURCE variable=o_gate core=AddSub_DSP // latency=3 } const auto sigma_i = HardSigmoid(i_gate); @@ -281,6 +285,135 @@ void LstmNonLinearFunctions(const bool has_bias, h_curr = h_reg; } + +#ifdef __VITIS_HLS__ +/** + * @brief LSTM non-linearity function to be applied to each output element. + * It implements the following Python (Keras) implementation: + * + * i = self.recurrent_activation(x_i + K.dot(h_tm1_i, + * self.recurrent_kernel_i)) + * f = self.recurrent_activation(x_f + K.dot(h_tm1_f, + * self.recurrent_kernel_f)) + * c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c, + * self.recurrent_kernel_c)) + * o = self.recurrent_activation(x_o + K.dot(h_tm1_o, + * self.recurrent_kernel_o)) + * h = o * self.activation(c) + * + * @param[in] has_bias Indicates if bias is present. + * @param[in] cur_gate_i The current gate i element + * @param[in] cur_gate_f The current gate f element + * @param[in] cur_gate_c The current gate c element + * @param[in] cur_gate_o The current gate o element + * @param[in] rec_gate_i The recurrent gate i element + * @param[in] rec_gate_f The recurrent gate f element + * @param[in] rec_gate_c The recurrent gate c element + * @param[in] rec_gate_o The recurrent gate o element + * @param[in] bias_i The bias i element + * @param[in] bias_f The bias f element + * @param[in] bias_c The bias c element + * @param[in] bias_o The bias o element + * @param[in] c_prev The previous c (cell) state + * @param c_curr The current c (cell) state + * @param h_curr The h current + * + * @tparam ActivationType The activation type + * @tparam WeightType The weight type + * @tparam LutSize The tanh LUT size: having it templated helps + * inferring a ROM + */ +template +void LstmVectNonLinearFunctions(const bool has_bias, + const hls::vector cur_gate_i, + const hls::vector cur_gate_f, + const hls::vector cur_gate_c, + const hls::vector cur_gate_o, + const hls::vector rec_gate_i, + const hls::vector rec_gate_f, + const hls::vector rec_gate_c, + const hls::vector rec_gate_o, + const hls::vector bias_i, + const hls::vector bias_f, + const hls::vector bias_c, + const hls::vector bias_o, + const hls::vector c_prev, + const hls::vector &c_curr, + const hls::vector &h_curr) { +#pragma HLS FUNCTION_INSTANTIATE variable=has_bias +#pragma HLS PIPELINE II=1 + const hls::vector i_gate; + const hls::vector f_gate; + const hls::vector c_gate; + const hls::vector o_gate; + if (has_bias) { + i_gate = cur_gate_i + rec_gate_i + bias_i; + f_gate = cur_gate_f + rec_gate_f + bias_f; + c_gate = cur_gate_c + rec_gate_c + bias_c; + o_gate = cur_gate_o + rec_gate_o + bias_o; + } else { + i_gate = cur_gate_i + rec_gate_i; + f_gate = cur_gate_f + rec_gate_f; + o_gate = cur_gate_c + rec_gate_c; + c_gate = cur_gate_o + rec_gate_o; + } +#pragma HLS BIND_OP variable=i_gate op=add impl=dsp +#pragma HLS BIND_OP variable=f_gate op=add impl=dsp +#pragma HLS BIND_OP variable=c_gate op=add impl=dsp +#pragma HLS BIND_OP variable=o_gate op=add impl=dsp + hls::vector sigma_i; + hls::vector sigma_f; + hls::vector sigma_o; + hls::vector tanh_cell; + hls::vector c_tanh; + for (int i = 0; i < N; ++i) { + sigma_i[i] = HardSigmoid(i_gate[i]); + sigma_f[i] = HardSigmoid(f_gate[i]); + sigma_o[i] = HardSigmoid(o_gate[i]); + tanh_cell[i] = TanH(c_gate[i]); + } + const auto c_lhs = sigma_f * c_prev; + const auto c_reg = c_lhs + sigma_i * tanh_cell; +#pragma HLS BIND_OP variable=c_lhs op=add impl=dsp +#pragma HLS BIND_OP variable=c_reg op=add impl=dsp + c_curr = c_reg; + for (int i = 0; i < N; ++i) { + c_tanh[i] = TanH(c_reg[i]); + } + const auto h_reg = sigma_o * c_tanh; +#pragma HLS BIND_OP variable=h_reg op=mul impl=dsp // latency=3 + h_curr = h_reg; +} +#endif // end __VITIS_HLS__ + + + +/** + * @brief Processing element used in SvdLstm. Deprecated. + * + * @deprecated Old inplementation, not flexible enough. + * + * @param[in] size The size + * @param[in] c_t_prev The c t previous + * @param[in] cur_gate_i The current gate i + * @param[in] cur_gate_f The current gate f + * @param[in] cur_gate_c The current gate c + * @param[in] cur_gate_o The current gate o + * @param[in] rec_gate_i The record gate i + * @param[in] rec_gate_f The record gate f + * @param[in] rec_gate_c The record gate c + * @param[in] rec_gate_o The record gate o + * @param h { parameter_description } + * @param c_t { parameter_description } + * @param[in] has_bias Indicates if bias + * @param[in] i_bias I bias + * @param[in] f_bias The f bias + * @param[in] c_bias The c bias + * @param[in] o_bias The o bias + * + * @tparam A { description } + * @tparam W { description } + */ template void NonLinearityUnitPE(const int size, const A *c_t_prev, @@ -328,8 +461,10 @@ void NonLinearityUnitPE(const int size, /** * @brief Sub module to apply non linearities in parallel. + * @deprecated This function has been included in NonLinearityUnit. * - * @param[in] c_t_prev The previous LSTM cell state (internal internal) + * @param[in] c_t_prev_stream The previous LSTM cell state (internal + * internal) * @param current_gate_i_stream The current gate i stream * @param current_gate_f_stream The current gate f stream * @param current_gate_c_stream The current gate c stream @@ -338,30 +473,42 @@ void NonLinearityUnitPE(const int size, * @param recurrent_gate_f_stream The recurrent gate f stream * @param recurrent_gate_c_stream The recurrent gate c stream * @param recurrent_gate_o_stream The recurrent gate o stream - * @param h The LSTM output - * @param c_t The current LSTM cell state t + * @param h_stream The h stream + * @param c_t_stream The c t stream + * @param[in] has_bias Indicates if bias + * @param i_bias_stream I bias stream + * @param f_bias_stream The f bias stream + * @param c_bias_stream The c bias stream + * @param o_bias_stream The o bias stream + * @param h The LSTM output + * @param c_t The current LSTM cell state t * - * @tparam VectLength The output dimension - * @tparam NumTiles The number of tiles the output is divided into. + * @tparam NumElemsTile The number of tiles the output is + * divided into. + * @tparam VectLength The output dimension */ template -void NonLinearityUnitTile(const svd::ActivationD *c_t_prev, - svd::ActivationStream ¤t_gate_i_stream, - svd::ActivationStream ¤t_gate_f_stream, - svd::ActivationStream ¤t_gate_c_stream, - svd::ActivationStream ¤t_gate_o_stream, - svd::ActivationStream &recurrent_gate_i_stream, - svd::ActivationStream &recurrent_gate_f_stream, - svd::ActivationStream &recurrent_gate_c_stream, - svd::ActivationStream &recurrent_gate_o_stream, - svd::ActivationD *h, - svd::ActivationD *c_t, +void NonLinearityUnitTile(svd::ActivationStream& c_t_prev_stream, + svd::ActivationStream& current_gate_i_stream, + svd::ActivationStream& current_gate_f_stream, + svd::ActivationStream& current_gate_c_stream, + svd::ActivationStream& current_gate_o_stream, + svd::ActivationStream& recurrent_gate_i_stream, + svd::ActivationStream& recurrent_gate_f_stream, + svd::ActivationStream& recurrent_gate_c_stream, + svd::ActivationStream& recurrent_gate_o_stream, + svd::ActivationStream& h_stream, + svd::ActivationStream& c_t_stream, const bool has_bias = false, svd::WeightStream *i_bias_stream = nullptr, svd::WeightStream *f_bias_stream = nullptr, svd::WeightStream *c_bias_stream = nullptr, svd::WeightStream *o_bias_stream = nullptr) { +#ifndef __VITIS_HLS__ #pragma HLS INLINE off +#else +#pragma HLS INLINE +#endif // =========================================================================== // Initialize the lookup table // =========================================================================== @@ -384,137 +531,127 @@ void NonLinearityUnitTile(const svd::ActivationD *c_t_prev, svd::ActivationD rec_f = recurrent_gate_f_stream.read(); svd::ActivationD rec_c = recurrent_gate_c_stream.read(); svd::ActivationD rec_o = recurrent_gate_o_stream.read(); - WeightD i_bias_reg = 0; - WeightD f_bias_reg = 0; - WeightD c_bias_reg = 0; - WeightD o_bias_reg = 0; + WeightD i_bias = 0; + WeightD f_bias = 0; + WeightD c_bias = 0; + WeightD o_bias = 0; if (has_bias) { - i_bias_reg = i_bias_stream->read(); - f_bias_reg = f_bias_stream->read(); - c_bias_reg = c_bias_stream->read(); - o_bias_reg = o_bias_stream->read(); + i_bias = i_bias_stream->read(); + f_bias = f_bias_stream->read(); + c_bias = c_bias_stream->read(); + o_bias = o_bias_stream->read(); } - LstmNonLinearFunctions(has_bias, + auto c_t_prev = c_t_prev_stream.read(); + svd::ActivationD c_t; + svd::ActivationD h; + svd::LstmNonLinearFunctions(has_bias, cur_i, cur_f, cur_c, cur_o, rec_i, rec_f, rec_c, rec_o, - i_bias_reg, f_bias_reg, c_bias_reg, o_bias_reg, - c_t_prev[i], c_t[i], h[i]); + i_bias, f_bias, c_bias, o_bias, + c_t_prev, c_t, h); + c_t_stream.write(c_t); + h_stream.write(h); } } template void NonLinearityUnit(const svd::ActivationD *c_t_prev, - svd::ActivationStream (¤t_gate_stream)[NumGates][VectLength / NumTiles], - svd::ActivationStream (&recurrent_gate_stream)[NumGates][VectLength / NumTiles], + svd::ActivationStream (&cur_gate_stream)[NumGates][VectLength / NumTiles], + svd::ActivationStream (&rec_gate_stream)[NumGates][VectLength / NumTiles], svd::ActivationD *h, svd::ActivationD *c_t, const bool has_bias = false, const WeightD *bias_port = nullptr) { +#pragma HLS FUNCTION_INSTANTIATE variable=has_bias #pragma HLS INLINE -// #pragma HLS INTERFACE ap_ctrl_none port=return #pragma HLS DATAFLOW assert(VectLength % NumTiles == 0); - assert(NumGates >= 4); - const int kNumElemsTile = VectLength / NumTiles; - // NOTE: There are kNumElemsTile different streams, which are read in round + assert(NumGates == 4); + const int kTileSize = VectLength / NumTiles; + // NOTE: There are kTileSize different streams, which are read in round // robin fashion. Their depth is then set as their number plus 50%. - const int kOutputStreamDepth = kNumElemsTile + kNumElemsTile / 2; - - svd::ActivationD h_t_curr_internal[kNumElemsTile][NumTiles]; - svd::ActivationD c_t_curr_internal[kNumElemsTile][NumTiles]; - svd::ActivationD c_t_prev_internal[kNumElemsTile][NumTiles]; + const int kOutputStreamDepth = kTileSize + kTileSize / 2; + svd::ActivationStream h_t_curr_internal[kTileSize]; + svd::ActivationStream c_t_curr_internal[kTileSize]; + svd::ActivationStream c_t_prev_internal[kTileSize]; + svd::WeightStream bias_streams[NumGates][kTileSize]; #pragma HLS ARRAY_PARTITION variable=h_t_curr_internal complete dim=1 #pragma HLS ARRAY_PARTITION variable=c_t_curr_internal complete dim=1 #pragma HLS ARRAY_PARTITION variable=c_t_prev_internal complete dim=1 #pragma HLS STREAM variable=h_t_curr_internal depth=NumTiles #pragma HLS STREAM variable=c_t_curr_internal depth=kOutputStreamDepth #pragma HLS STREAM variable=c_t_prev_internal depth=kOutputStreamDepth - - NonLinearityUnit_Read2_c_prev: + C_prev_DMA: for (int i = 0; i < NumTiles; ++i) { - NonLinearityUnit_Read_c_prev: - for (int j = 0; j < kNumElemsTile; ++j) { + for (int j = 0; j < kTileSize; ++j) { #pragma HLS PIPELINE II=1 - c_t_prev_internal[j][i] = c_t_prev[i * kNumElemsTile + j]; + c_t_prev_internal[j].write(c_t_prev[i * kTileSize + j]); } } - - svd::WeightStream i_bias_streams[kNumElemsTile]; - svd::WeightStream f_bias_streams[kNumElemsTile]; - svd::WeightStream c_bias_streams[kNumElemsTile]; - svd::WeightStream o_bias_streams[kNumElemsTile]; if (has_bias) { -#pragma HLS ARRAY_PARTITION variable=i_bias_streams complete dim=1 -#pragma HLS ARRAY_PARTITION variable=f_bias_streams complete dim=1 -#pragma HLS ARRAY_PARTITION variable=c_bias_streams complete dim=1 -#pragma HLS ARRAY_PARTITION variable=o_bias_streams complete dim=1 -#pragma HLS STREAM variable=i_bias_streams depth=NumTiles -#pragma HLS STREAM variable=f_bias_streams depth=NumTiles -#pragma HLS STREAM variable=c_bias_streams depth=NumTiles -#pragma HLS STREAM variable=o_bias_streams depth=NumTiles - for (int i = 0; i < NumTiles; ++i) { - for (int j = 0; j < kNumElemsTile; ++j) { -#pragma HLS PIPELINE II=1 - i_bias_streams[j].write(bias_port[i * kNumElemsTile + j]); - } - } - for (int i = 0; i < NumTiles; ++i) { - for (int j = 0; j < kNumElemsTile; ++j) { -#pragma HLS PIPELINE II=1 - f_bias_streams[j].write(bias_port[VectLength + i * kNumElemsTile + j]); - } - } - for (int i = 0; i < NumTiles; ++i) { - for (int j = 0; j < kNumElemsTile; ++j) { -#pragma HLS PIPELINE II=1 - c_bias_streams[j].write(bias_port[2 * VectLength + i * kNumElemsTile + j]); - } - } - for (int i = 0; i < NumTiles; ++i) { - for (int j = 0; j < kNumElemsTile; ++j) { +#pragma HLS ARRAY_PARTITION variable=bias_streams complete dim=0 +#pragma HLS STREAM variable=bias_streams depth=NumTiles + Bias_DMA: + for (int k = 0; k < NumGates; ++k) { // Expected in this order: i->f->c->o + for (int i = 0; i < NumTiles; ++i) { + for (int j = 0; j < kTileSize; ++j) { #pragma HLS PIPELINE II=1 - o_bias_streams[j].write(bias_port[3 * VectLength + i * kNumElemsTile + j]); - } + bias_streams[k][j].write(bias_port[k * VectLength + i * kTileSize + j]); + } + } } } - - NonLinearityUnit_Tile_Loop: - for(int i = 0; i < kNumElemsTile; ++i) { -#pragma HLS UNROLL - NonLinearityUnitTile(c_t_prev_internal[i], - current_gate_stream[0][i], - current_gate_stream[1][i], - current_gate_stream[2][i], - current_gate_stream[3][i], - recurrent_gate_stream[0][i], - recurrent_gate_stream[1][i], - recurrent_gate_stream[2][i], - recurrent_gate_stream[3][i], - h_t_curr_internal[i], - c_t_curr_internal[i], - has_bias, - &i_bias_streams[i], - &f_bias_streams[i], - &c_bias_streams[i], - &o_bias_streams[i]); - } - - NonLinearityUnit_Writeback2_h: - for (int i = 0; i < NumTiles; ++i) { - NonLinearityUnit_Writeback_h: - for (int j = 0; j < kNumElemsTile; ++j) { +// NonLinearityUnit_Tile_Loop: +// for(int i = 0; i < kTileSize; ++i) { +// #pragma HLS UNROLL +// svd::NonLinearityUnitTile(c_t_prev_internal[i], +// cur_gate_stream[0][i], cur_gate_stream[1][i], +// cur_gate_stream[2][i], cur_gate_stream[3][i], +// rec_gate_stream[0][i], rec_gate_stream[1][i], +// rec_gate_stream[2][i], rec_gate_stream[3][i], +// h_t_curr_internal[i], c_t_curr_internal[i], +// has_bias, &bias_streams[0][i], &bias_streams[1][i], +// &bias_streams[2][i], &bias_streams[3][i]); +// } + const int kTableSize = (FIX_WIDTH <= 16) ? 512 : 256; + // =========================================================================== + // Apply non-linearities to each vector element + // =========================================================================== + NonLinearityUnit_Elem_Loop: + for(int i = 0; i < NumTiles; ++i) { #pragma HLS PIPELINE II=1 - h[i * kNumElemsTile + j] = h_t_curr_internal[j][i]; + for(int j = 0; j < kTileSize; ++j) { + auto cur_i = cur_gate_stream[0][j].read(); + auto cur_f = cur_gate_stream[1][j].read(); + auto cur_c = cur_gate_stream[2][j].read(); + auto cur_o = cur_gate_stream[3][j].read(); + auto rec_i = rec_gate_stream[0][j].read(); + auto rec_f = rec_gate_stream[1][j].read(); + auto rec_c = rec_gate_stream[2][j].read(); + auto rec_o = rec_gate_stream[3][j].read(); + svd::WeightD i_bias, f_bias, c_bias, o_bias; + if (has_bias) { + i_bias = bias_streams[0][j].read(); + f_bias = bias_streams[1][j].read(); + c_bias = bias_streams[2][j].read(); + o_bias = bias_streams[3][j].read(); + } + auto c_t_prev = c_t_prev_internal[j].read(); + svd::ActivationD c_t, h; + svd::LstmNonLinearFunctions( + has_bias, cur_i, cur_f, cur_c, cur_o, rec_i, rec_f, rec_c, rec_o, + i_bias, f_bias, c_bias, o_bias, c_t_prev, c_t, h); + c_t_curr_internal[j].write(c_t); + h_t_curr_internal[j].write(h); } } - - NonLinearityUnit_Writeback2_c: + H_t_curr_DMA: for (int i = 0; i < NumTiles; ++i) { - NonLinearityUnit_Writeback_c: - for (int j = 0; j < kNumElemsTile; ++j) { + for (int j = 0; j < kTileSize; ++j) { #pragma HLS PIPELINE II=1 - c_t[i * kNumElemsTile + j] = c_t_curr_internal[j][i]; + h[i * kTileSize + j] = h_t_curr_internal[j].read(); + c_t[i * kTileSize + j] = c_t_curr_internal[j].read(); } } } diff --git a/include/math_utils/data_handler.h b/include/math_utils/data_handler.h index 30468c5..d3480d6 100644 --- a/include/math_utils/data_handler.h +++ b/include/math_utils/data_handler.h @@ -8,6 +8,9 @@ #include #include #include +#include +#include +#include #ifdef SDS_DESIGN #include @@ -36,6 +39,7 @@ namespace svd { template T* AllocateContiguously(const int size) { +#ifndef __SYNTHESIS__ T* tmp; try { tmp = (T*)ALLOC(size * sizeof(T)); @@ -49,6 +53,14 @@ T* AllocateContiguously(const int size) { throw except_alloc; } return tmp; +#else + T* tmp = (T*)ALLOC(size * sizeof(T)); + if (!tmp) { + std::cout << "[ERROR] Contiguous allocation failed." << std::endl; + exit(1); + } + return tmp; +#endif } template @@ -81,6 +93,9 @@ class VectorBlob { VectorBlob(const int refinement_steps, const int vector_size, const int num_tiles, const int num_zero_tiles) { assert(num_tiles >= 1); + assert(refinement_steps > 0); + assert(vector_size > 0); + assert(num_tiles > 0); this->num_tile_elems_ = vector_size / num_tiles; this->size_ = vector_size; this->pruned_size_ = this->num_tile_elems_ * (num_tiles - num_zero_tiles); @@ -121,7 +136,7 @@ class VectorBlob { this->z_idx_.push_back(j); } else { for (int k = 0; k < this->num_tile_elems_; ++k) { - FloatType tmp = rand(); + FloatType tmp = 0.00001 * rand(); this->data_.push_back(tmp); this->pruned_data_.push_back(tmp); this->fix_data_.push_back(FixType(tmp)); @@ -133,7 +148,7 @@ class VectorBlob { } } else { for (int i = 0; i < this->total_size_; ++i) { - FloatType tmp = rand(); + FloatType tmp = 0.00001 * rand(); this->data_.push_back(tmp); this->pruned_data_.push_back(tmp); this->fix_data_.push_back(FixType(tmp)); @@ -207,6 +222,10 @@ class VectorBlob { IdxType get_fix_nz_idx(const int refinement_step) { return this->fix_nz_idx_[refinement_step]; } + + int get_refinement_steps() { + return this->refinement_steps_; + } }; @@ -222,6 +241,7 @@ class SvdComponents { const int u_size, const int v_size, const int num_tiles_u, const int num_zero_tiles_u, const int num_tiles_v, const int num_zero_tiles_v) { + assert(num_inputs > 0); this->num_inputs_ = num_inputs; this->u_ = new VectorBlob(refinement_steps, u_size, num_tiles_u, num_zero_tiles_u); this->v_ = new VectorBlob(refinement_steps, v_size, num_tiles_v, num_zero_tiles_v); @@ -243,6 +263,22 @@ class SvdComponents { return this->v_; } + int get_u_size() { + return this->u_->get_size(); + } + + int get_v_size() { + return this->v_->get_size(); + } + + int get_u_pruned_size() { + return this->u_->get_pruned_size(); + } + + int get_v_pruned_size() { + return this->v_->get_pruned_size(); + } + std::vector > get_s() { return this->s_; } @@ -254,6 +290,10 @@ class SvdComponents { int get_num_inputs() { return this->num_inputs_; } + + int get_refinement_steps() { + return this->s_[0].get_refinement_steps(); + } }; } // svd diff --git a/include/svd_ip.h b/include/svd_ip.h index 32b9c90..4396134 100644 --- a/include/svd_ip.h +++ b/include/svd_ip.h @@ -4,14 +4,16 @@ #include "svd_params.h" #include "kernel/svd_kernel.h" +namespace svd { + template inline void SvdIP( const typename params::ActivationD x_port[params::N][params::I], - const typename params::UPortD u_port[params::PrunedSizeU], + const typename params::UPortD u_port[params::R * params::PrunedSizeU], const typename params::SPortD s_port[params::N][params::R], - const typename params::VPortD v_port[params::PrunedSizeV], - const typename params::UnzD nz_u_port[params::R * params::G], - const typename params::VnzD nz_v_port[params::R * params::G], + const typename params::VPortD v_port[params::R * params::PrunedSizeV], + const typename params::UnzD nz_u_port[params::G * params::R], + const typename params::VnzD nz_v_port[params::G * params::R], typename params::ActivationD y_port[params::N][params::G][params::H]) { #pragma HLS INLINE #pragma HLS DATAFLOW @@ -22,28 +24,19 @@ inline void SvdIP( svd::SvdStreams streams; svd::SvdBuffers buffers; SvdInDMA(x_port, u_port, s_port, v_port, nz_u_port, nz_v_port, streams, buffers); - SvdKernel(streams); + svd::SvdKernel(streams); SvdOutDMA(streams, y_port); } -const int N = 2; -const int I = 256; -const int H = 128; -const int R = 16; -const int Tu = 16; -const int Tv = 32; -const int ZTu = 8; -const int ZTv = 8; -const int G = 4; -typedef svd::SvdParameters svd_params; - void SvdIp2Inputs( const typename svd_params::ActivationD x_port[svd_params::N][svd_params::I], - const typename svd_params::UPortD u_port[svd_params::PrunedSizeU], + const typename svd_params::UPortD u_port[svd_params::R * svd_params::PrunedSizeU], const typename svd_params::SPortD s_port[svd_params::N][svd_params::R], - const typename svd_params::VPortD v_port[svd_params::PrunedSizeV], - const ap_uint nz_u_port[svd_params::N], - const ap_uint nz_v_port[svd_params::N], + const typename svd_params::VPortD v_port[svd_params::R * svd_params::PrunedSizeV], + const ap_uint nz_u_port[svd_params::G * svd_params::R], + const ap_uint nz_v_port[svd_params::G * svd_params::R], typename svd_params::ActivationD y_port[svd_params::N][svd_params::G][svd_params::H]); +} // svd + #endif // end SVD_IP_H_ \ No newline at end of file diff --git a/include/svd_params.h b/include/svd_params.h index ae77116..db408c2 100644 --- a/include/svd_params.h +++ b/include/svd_params.h @@ -2,12 +2,63 @@ #define SVD_PARAMS_H_ #include "hls_utils/hls_metaprogramming.h" +#include "dma/axis_lib.h" #include "ap_int.h" +#include "ap_axi_sdata.h" #include "hls_stream.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +#include namespace svd { +template +struct ParamsU { + static const int N = Ni; + static const int I = Ii; + static const int Tu = Tui; + static const int ZTu = ZTui; + static const int G = Gi; + static const int TuElems = I / Tu; + static const int TuBits = hlsutils::log2::value > 0 ? hlsutils::log2::value : 1; + typedef ap_uint UnzD; + typedef ap_uint UnzIdxD; + typedef Type ActivationD; + typedef Type WeightD; + typedef Type AccumulationD; + typedef hls::stream UnzS; + typedef hls::stream > UnzIdxS; + typedef hls::stream ActivationS; + typedef hls::stream WeightS; + typedef hls::stream AccumulationS; + typedef ap_uint::value * G> UPortD; + static const int PrunedSizeU = I / Tu * (Tu - ZTu); + static const int ActivationWidth = hlsutils::Bitwidth::value; + static const int WeightWidth = hlsutils::Bitwidth::value; + static const int AccumulationWidth = hlsutils::Bitwidth::value; + static const int VectTuAxiWidth = ActivationWidth * Tu; + static const int VectN_AxiWidth = ActivationWidth * N; + static const int VectG_AxiWidth = ActivationWidth * G; + static const int VectGN_AxiWidth = ActivationWidth * G * N; + typedef typename svd::AxiStreamPort::AxiuPacketType VectTuAxiPacketType; + typedef typename svd::AxiStreamPort::AxiuPacketType VectN_AxiPacketType; + typedef typename svd::AxiStreamPort::AxiuPacketType VectG_AxiPacketType; + typedef typename svd::AxiStreamPort::AxiuPacketType VectGN_AxiPacketType; + typedef typename svd::AxiStreamFifo::AxiuType VectTuAxiuType; + typedef typename svd::AxiStreamFifo::AxiuType VectN_AxiuType; + typedef typename svd::AxiStreamFifo::AxiuType VectG_AxiuType; + typedef typename svd::AxiStreamFifo::AxiuType VectGN_AxiuType; +#ifdef __VITIS_HLS__ + typedef hls::vector VectTuType; + typedef hls::vector VectN_Type; + typedef hls::vector VectG_Type; + typedef hls::vector VectGN_Type; +#endif +}; + template , @@ -27,8 +78,8 @@ struct SvdParameters { static const int PeV = H / Tv; static const int TuElems = I / Tu; static const int TvElems = H / Tv; - static const int TuBits = hls_utils::log2::value > 0 ? hls_utils::log2::value : 1; - static const int TvBits = hls_utils::log2::value > 0 ? hls_utils::log2::value : 1; + static const int TuBits = hlsutils::log2::value > 0 ? hlsutils::log2::value : 1; + static const int TvBits = hlsutils::log2::value > 0 ? hlsutils::log2::value : 1; typedef ap_uint UnzD; typedef ap_uint VnzD; typedef ap_uint UnzIdxD; @@ -43,15 +94,35 @@ struct SvdParameters { typedef hls::stream ActivationS; typedef hls::stream WeightS; typedef hls::stream AccumulationS; - typedef ap_uint::value * G> SPortD; - typedef ap_uint::value * G> UPortD; - typedef ap_uint::value * G> VPortD; - static const int PrunedSizeU = R * I / Tu * (Tu - ZTu); - static const int PrunedSizeV = R * H / Tv * (Tv - ZTv); + typedef ap_uint::value * G> SPortD; + typedef ap_uint::value * G> UPortD; + typedef ap_uint::value * G> VPortD; + static const int PrunedSizeU = I / Tu * (Tu - ZTu); + static const int PrunedSizeV = H / Tv * (Tv - ZTv); static const int SizeS = R * G; - static const int ActivationWidth = hls_utils::Bitwidth::value; - static const int WeightWidth = hls_utils::Bitwidth::value; - static const int AccumulationWidth = hls_utils::Bitwidth::value; + static const int ActivationWidth = hlsutils::Bitwidth::value; + static const int WeightWidth = hlsutils::Bitwidth::value; + static const int AccumulationWidth = hlsutils::Bitwidth::value; + static const int VectTuAxiWidth = ActivationWidth * Tu; + static const int VectTvAxiWidth = ActivationWidth * Tv; + static const int VectN_AxiWidth = ActivationWidth * N; + static const int VectG_AxiWidth = ActivationWidth * G; + static const int VectGN_AxiWidth = ActivationWidth * G * N; + static const int VectGTvAxiWidth = ActivationWidth * G * Tv; + typedef typename svd::AxiStreamPort::PacketType VectTuAxiPacketType; + typedef typename svd::AxiStreamPort::PacketType VectTvAxiPacketType; + typedef typename svd::AxiStreamPort::PacketType VectN_AxiPacketType; + typedef typename svd::AxiStreamPort::PacketType VectG_AxiPacketType; + typedef typename svd::AxiStreamPort::PacketType VectGN_AxiPacketType; + typedef typename svd::AxiStreamPort::PacketType VectGTvAxiPacketType; +#ifdef __VITIS_HLS__ + typedef hls::vector VectTuType; + typedef hls::vector VectTvType; + typedef hls::vector VectN_Type; + typedef hls::vector VectG_Type; + typedef hls::vector VectGN_Type; + typedef hls::vector VectGTvType; +#endif }; template @@ -74,34 +145,34 @@ class SvdStreams { typename params::UnzIdxS tile_idx_stream[params::N][params::G][params::PeU]; SvdStreams() { -#pragma HLS STREAM depth=2 variable=x -#pragma HLS STREAM depth=2 variable=nz_u -#pragma HLS STREAM depth=2 variable=nz_v -#pragma HLS STREAM depth=2 variable=u -#pragma HLS STREAM depth=2 variable=s -#pragma HLS STREAM depth=2 variable=v -#pragma HLS STREAM depth=2 variable=xu -#pragma HLS STREAM depth=2 variable=xus -#pragma HLS STREAM depth=2 variable=xusv -#pragma HLS STREAM depth=2 variable=nz_u_idx -#pragma HLS STREAM depth=2 variable=nz_v_idx -#pragma HLS STREAM depth=2 variable=u_dma -#pragma HLS STREAM depth=2 variable=v_dma -#pragma HLS STREAM depth=2 variable=tile_idx_stream -#pragma HLS ARRAY_PARTITION complete dim=0 variable=x -#pragma HLS ARRAY_PARTITION complete dim=0 variable=nz_u -#pragma HLS ARRAY_PARTITION complete dim=0 variable=nz_v -#pragma HLS ARRAY_PARTITION complete dim=0 variable=u -#pragma HLS ARRAY_PARTITION complete dim=0 variable=s -#pragma HLS ARRAY_PARTITION complete dim=0 variable=v -#pragma HLS ARRAY_PARTITION complete dim=0 variable=u_dma -#pragma HLS ARRAY_PARTITION complete dim=0 variable=v_dma -#pragma HLS ARRAY_PARTITION complete dim=0 variable=xu -#pragma HLS ARRAY_PARTITION complete dim=0 variable=xus -#pragma HLS ARRAY_PARTITION complete dim=0 variable=xusv -#pragma HLS ARRAY_PARTITION complete dim=0 variable=nz_u_idx -#pragma HLS ARRAY_PARTITION complete dim=0 variable=nz_v_idx -#pragma HLS ARRAY_PARTITION variable=tile_idx_stream complete dim=0 +#pragma HLS STREAM depth=2 variable=this->x +#pragma HLS STREAM depth=2 variable=this->nz_u +#pragma HLS STREAM depth=2 variable=this->nz_v +#pragma HLS STREAM depth=2 variable=this->u +#pragma HLS STREAM depth=2 variable=this->s +#pragma HLS STREAM depth=2 variable=this->v +#pragma HLS STREAM depth=2 variable=this->xu +#pragma HLS STREAM depth=2 variable=this->xus +#pragma HLS STREAM depth=2 variable=this->xusv +#pragma HLS STREAM depth=2 variable=this->nz_u_idx +#pragma HLS STREAM depth=2 variable=this->nz_v_idx +#pragma HLS STREAM depth=2 variable=this->u_dma +#pragma HLS STREAM depth=2 variable=this->v_dma +#pragma HLS STREAM depth=2 variable=this->tile_idx_stream +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->x +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->nz_u +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->nz_v +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->u +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->s +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->v +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->u_dma +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->v_dma +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->xu +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->xus +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->xusv +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->nz_u_idx +#pragma HLS ARRAY_PARTITION complete dim=0 variable=this->nz_v_idx +#pragma HLS ARRAY_PARTITION variable=this->tile_idx_stream complete dim=0 }; ~SvdStreams() {}; @@ -162,7 +233,7 @@ class SvdBuffers { #define NUM_TILES_V 64 #endif #ifndef NUM_ZERO_TILES_V -#define NUM_ZERO_TILES_V 21 +#define NUM_ZERO_TILES_V 8 #endif #ifndef TILE_SIZE_CUR_U #define TILE_SIZE_CUR_U (INPUT_SIZE / NUM_TILES_U) @@ -184,19 +255,6 @@ class SvdBuffers { #define NUM_TIMESTEPS 28 #endif -#ifndef PRUNED_SIZE_CUR_U -#define PRUNED_SIZE_CUR_U (NUM_ITERATIONS * (INPUT_SIZE - NUM_ZERO_TILES_U * INPUT_SIZE / NUM_TILES_U)) -#endif -#ifndef PRUNED_SIZE_REC_U -#define PRUNED_SIZE_REC_U (NUM_ITERATIONS * (HIDDEN_SIZE - NUM_ZERO_TILES_U * HIDDEN_SIZE / NUM_TILES_U)) -#endif -#ifndef PRUNED_SIZE_CUR_V -#define PRUNED_SIZE_CUR_V (NUM_ITERATIONS * (HIDDEN_SIZE - NUM_ZERO_TILES_V * HIDDEN_SIZE / NUM_TILES_V)) -#endif -#ifndef PRUNED_SIZE_REC_V -#define PRUNED_SIZE_REC_V (NUM_ITERATIONS * (HIDDEN_SIZE - NUM_ZERO_TILES_V * HIDDEN_SIZE / NUM_TILES_V)) -#endif - #if defined(USE_FIX) #define USE_FIX 1 #define USE_FLOAT 0 @@ -226,8 +284,6 @@ class SvdBuffers { #define AXI_PORT_WIDTH 128 #endif - - #if USE_FLOAT typedef float WeightD; typedef float ActivationD; @@ -239,9 +295,14 @@ typedef double ActivationD; typedef double AccumD; typedef double MultD; #else // USE_FIX + +// typedef short WeightD; +// typedef short ActivationD; +// typedef short AccumD; +// typedef short MultD; + typedef ap_fixed WeightD; typedef ap_fixed ActivationD; - #if FIX_WIDTH == 8 typedef ap_fixed AccumD; typedef ap_fixed MultD; @@ -264,36 +325,27 @@ typedef uint64_t AccelD; typedef ap_uint AxiD; // 64bit for ZedBoard HP and ACP ports (128bit for ZCU104) -// Used by software versions: -typedef ap_fixed<16, 2 * FIX_FRACT_WIDTH> Fix16D; -typedef ap_fixed<8, 2 * FIX_FRACT_WIDTH> Fix8D; - typedef hls::stream ActivationStream; typedef hls::stream WeightStream; typedef hls::stream AxiStream; typedef hls::stream AccumStream; -typedef long long CounterD; -typedef hls::stream ProbeStream; +// TODO: Remove CounterD and ProbeStream types from here. +// typedef long long CounterD; +// typedef hls::stream ProbeStream; -#ifndef X_PORT_WIDTH -#define X_PORT_WIDTH (FIX_WIDTH * NUM_TILES_U) -#endif -#ifndef U_PORT_WIDTH -#define U_PORT_WIDTH (FIX_WIDTH * (NUM_TILES_U - NUM_ZERO_TILES_U)) -#endif -#ifndef V_PORT_WIDTH -#define V_PORT_WIDTH (FIX_WIDTH * (HIDDEN_SIZE / NUM_TILES_V)) -#endif -#ifndef S_PORT_WIDTH -#define S_PORT_WIDTH (FIX_WIDTH * 8) -#endif - -typedef ap_uint XPortD; -typedef ap_uint UPortD; -typedef ap_uint VPortD; -typedef ap_uint SPortD; +typedef svd::SvdParameters, + ap_fixed, + ap_fixed > svd_params; + // ActivationD, WeightD, AccumD> svd_params; } // namespace svd + +namespace testsvd { + +} // end testsvd + #endif // end SVD_PARAMS_H_ \ No newline at end of file diff --git a/include/testbenches/test_dense_svd.h b/include/testbenches/test_dense_svd.h new file mode 100644 index 0000000..694c585 --- /dev/null +++ b/include/testbenches/test_dense_svd.h @@ -0,0 +1,6 @@ +#ifndef TESTBENCHES_TEST_DENSE_SVD_H_ +#define TESTBENCHES_TEST_DENSE_SVD_H_ + +#include "layers/dense/hls/dense_svd.h" + +#endif // end TESTBENCHES_TEST_DENSE_SVD_H_ \ No newline at end of file diff --git a/include/testbenches/test_lstm_svd.h b/include/testbenches/test_lstm_svd.h new file mode 100644 index 0000000..390899a --- /dev/null +++ b/include/testbenches/test_lstm_svd.h @@ -0,0 +1,6 @@ +#ifndef TESTBENCHES_TEST_LSTM_SVD_H_ +#define TESTBENCHES_TEST_LSTM_SVD_H_ + +#include "layers/lstm/hls/lstm_svd.h" + +#endif // end TESTBENCHES_TEST_LSTM_SVD_H_ \ No newline at end of file diff --git a/include/testbenches/test_svd_kernel.h b/include/testbenches/test_svd_kernel.h new file mode 100644 index 0000000..7fa3533 --- /dev/null +++ b/include/testbenches/test_svd_kernel.h @@ -0,0 +1,7 @@ +#ifndef TESTBENCHES_TEST_SVD_KERNEL_H_ +#define TESTBENCHES_TEST_SVD_KERNEL_H_ + +#include "svd_params.h" +#include "kernel/svd_kernel.h" + +#endif // end TESTBENCHES_TEST_SVD_KERNEL_H_ \ No newline at end of file diff --git a/include/testbenches/test_u_kernel.h b/include/testbenches/test_u_kernel.h new file mode 100644 index 0000000..cf28208 --- /dev/null +++ b/include/testbenches/test_u_kernel.h @@ -0,0 +1,7 @@ +#ifndef TESTBENCHES_TEST_U_KERNEL_H_ +#define TESTBENCHES_TEST_U_KERNEL_H_ + +#include "kernel/u_kernel.h" +#include "hls_utils/hls_debugging.h" + +#endif // end TESTBENCHES_TEST_U_KERNEL_H_ \ No newline at end of file diff --git a/include/testbenches/test_v_kernel.h b/include/testbenches/test_v_kernel.h new file mode 100644 index 0000000..7de11c0 --- /dev/null +++ b/include/testbenches/test_v_kernel.h @@ -0,0 +1,6 @@ +#ifndef TESTBENCHES_TEST_V_KERNEL_H_ +#define TESTBENCHES_TEST_V_KERNEL_H_ + +#include "kernel/v_kernel.h" + +#endif // end TESTBENCHES_TEST_V_KERNEL_H_ \ No newline at end of file diff --git a/pynq/dense_svd/dense_svd.ipynb b/pynq/dense_svd/dense_svd.ipynb new file mode 100644 index 0000000..b2d7fb7 --- /dev/null +++ b/pynq/dense_svd/dense_svd.ipynb @@ -0,0 +1,628 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing DenseSvd\n", + "\n", + "This notebook will test an IP written in Vivado HLS." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%pybind11/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pynq import Overlay\n", + "import pynq.lib.dma\n", + "from pynq import allocate\n", + "import numpy as np\n", + "from pynq import DefaultIP\n", + "import timeit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Program FPGA and inspect Overlay." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "overlay = Overlay(\"overlay/dense_svd.bit\")\n", + "overlay?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the kernel register map." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RegisterMap {\n", + " CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0),\n", + " GIER = Register(Enable=0, RESERVED=0),\n", + " IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),\n", + " IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),\n", + " num_active_inputs = Register(num_active_inputs=0),\n", + " input_size = Register(input_size=0),\n", + " output_size = Register(output_size=0),\n", + " num_refinements_0 = Register(num_refinements_0=0),\n", + " num_refinements_1 = Register(num_refinements_1=0)\n", + "}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel = overlay.HlsDenseSvd_0\n", + "kernel.register_map\n", + "# print(\"stream size: \", adder.stream_size)\n", + "# accel_state = adder.get_state()\n", + "# print(\"accelerator state: \", accel_state)\n", + "# dma = overlay.axi_dma_0\n", + "# dma.register_map.MM2S_DMASR\n", + "# dma.register_map.S2MM_DMACR" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kernel IP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The kernel IP can be automatically bound by first creating our Kernel class. Then, the overlay can be instantiated again." + ] + }, + { + "cell_type": "code", + "execution_count": 191, + "metadata": {}, + "outputs": [], + "source": [ + "class KernelDriver(DefaultIP):\n", + " def __init__(self, description):\n", + " super().__init__(description=description)\n", + " self.max_G = 1\n", + " self.max_I = 1\n", + " self.max_H = 1\n", + " \n", + " bindto = ['xilinx.com:hls:HlsDenseSvd:1.0']\n", + "\n", + " def start_accel(self):\n", + " self.register_map.CTRL.AP_START = 1\n", + " self.write(0x0, 1)\n", + " self.write(0x0, 1)\n", + " while(self.read(0x0) % 2 == 0):\n", + " self.write(0x0, 1)\n", + " pass # Wait until start, i.e. bit 0, is set.\n", + "\n", + " def set_state(self, state):\n", + " # self.register_map.CTRL = state\n", + " # return self.register_map.CTRL\n", + " self.write(0x0, state)\n", + " return self.read(0x0)\n", + "\n", + " def get_state(self):\n", + " return self.register_map.CTRL\n", + " # return self.read(0x0)\n", + "\n", + " @property\n", + " def num_active_inputs(self):\n", + " return self.register_map.num_active_inputs\n", + "\n", + " @num_active_inputs.setter\n", + " def num_active_inputs(self, N):\n", + " self.register_map.num_active_inputs = N\n", + "\n", + " @property\n", + " def input_size(self):\n", + " return self.register_map.input_size\n", + "\n", + " @input_size.setter\n", + " def input_size(self, I):\n", + " self.register_map.input_size = I\n", + "\n", + " @property\n", + " def output_size(self):\n", + " return self.register_map.output_size\n", + "\n", + " @output_size.setter\n", + " def output_size(self, H):\n", + " self.register_map.output_size = H\n", + "\n", + " @property\n", + " def num_refinements(self):\n", + " return (self.register_map.num_refinements_0, self.register_map.num_refinements_1)\n", + "\n", + " @num_refinements.setter\n", + " def num_refinements(self, R):\n", + " self.register_map.num_refinements_0 = R[0]\n", + " self.register_map.num_refinements_1 = R[1]\n", + "\n", + "overlay = Overlay(\"overlay/dense_svd.bit\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check again the kernel:" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0)" + ] + }, + "execution_count": 192, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dense_svd = overlay.HlsDenseSvd_0\n", + "dense_svd.get_state()" + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 193, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dense_svd.read(0x10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To show the class is working, we setup the `num_refinements` using the setter method. We then read its corresponding register." + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(Register(num_refinements_0=1), Register(num_refinements_1=1))" + ] + }, + "execution_count": 194, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dense_svd.num_refinements = (1, 1)\n", + "dense_svd.num_refinements" + ] + }, + { + "cell_type": "code", + "execution_count": 195, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x4\n" + ] + }, + { + "data": { + "text/plain": [ + "Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0)" + ] + }, + "execution_count": 195, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(dense_svd.get_state())\n", + "dense_svd.get_state()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Allocation and Run\n", + "\n", + "The data structures must be contiguosly allocated." + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Buffers setup completed.\n", + "x_buffer.shape: (2, 256) - Bytes: 1024\n" + ] + } + ], + "source": [ + "data_t = np.int16\n", + "G = dense_svd.max_G\n", + "N = 2\n", + "I = 256\n", + "H = 256\n", + "R = 64\n", + "x_buffer = pynq.allocate(shape=(N, I), dtype=data_t)\n", + "u_buffer = pynq.allocate(shape=(R, I, G), dtype=data_t)\n", + "s_buffer = pynq.allocate(shape=(R, N, G), dtype=data_t)\n", + "v_buffer = pynq.allocate(shape=(R, H, G), dtype=data_t)\n", + "bias_buffer = pynq.allocate(shape=(N, G, H), dtype=data_t)\n", + "y_buffer = pynq.allocate(shape=(N, G, H), dtype=data_t)\n", + "\n", + "# Generate random arrays\n", + "x_np = np.random.rand(N, I).astype(dtype=data_t)\n", + "u_np = np.random.rand(R, I, G).astype(dtype=data_t)\n", + "s_np = np.random.rand(R, N, G).astype(dtype=data_t)\n", + "v_np = np.random.rand(R, H, G).astype(dtype=data_t)\n", + "bias_np = np.random.rand(N, G, H).astype(dtype=data_t)\n", + "y_np = np.zeros((N, G, H)).astype(dtype=data_t)\n", + "\n", + "np.copyto(x_buffer, x_np, casting='no')\n", + "np.copyto(u_buffer, u_np, casting='no')\n", + "np.copyto(s_buffer, s_np, casting='no')\n", + "np.copyto(v_buffer, v_np, casting='no')\n", + "np.copyto(bias_buffer, bias_np, casting='no')\n", + "np.copyto(y_buffer, y_np, casting='no')\n", + "\n", + "print('Buffers setup completed.')\n", + "print(f'x_buffer.shape: {x_buffer.shape} - Bytes: {x_buffer.nbytes}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Setup the kernel and then send the data through the DMAs." + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x4\n", + "0x1\n" + ] + }, + { + "data": { + "text/plain": [ + "RegisterMap {\n", + " CTRL = Register(AP_START=1, AP_DONE=0, AP_IDLE=0, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0),\n", + " GIER = Register(Enable=0, RESERVED=0),\n", + " IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),\n", + " IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),\n", + " num_active_inputs = Register(num_active_inputs=2),\n", + " input_size = Register(input_size=256),\n", + " output_size = Register(output_size=256),\n", + " num_refinements_0 = Register(num_refinements_0=64),\n", + " num_refinements_1 = Register(num_refinements_1=64)\n", + "}" + ] + }, + "execution_count": 197, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dense_svd.num_active_inputs = N\n", + "dense_svd.input_size = I\n", + "dense_svd.output_size = H\n", + "dense_svd.num_refinements = (R, R)\n", + "print(dense_svd.get_state())\n", + "dense_svd.start_accel()\n", + "print(dense_svd.get_state())\n", + "dense_svd.register_map" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting transfer:\n", + "Waiting transfer completion.\n", + "x_DMA done.\n", + "u_DMA done.\n", + "s_DMA done.\n", + "v_DMA done.\n", + "bias_DMA done.\n", + "y_DMA done.\n", + "Done.\n", + "\n", + "y_buffer.shape: (2, 1, 256)\n" + ] + } + ], + "source": [ + "# Transfer\n", + "print('Starting transfer:')\n", + "overlay.x_dma.sendchannel.transfer(x_buffer)\n", + "overlay.u_dma.sendchannel.transfer(u_buffer)\n", + "overlay.s_dma.sendchannel.transfer(s_buffer)\n", + "overlay.v_dma.sendchannel.transfer(v_buffer)\n", + "overlay.bias_dma.sendchannel.transfer(bias_buffer)\n", + "overlay.y_dma.recvchannel.transfer(y_buffer)\n", + "# Then wait\n", + "print('Waiting transfer completion.')\n", + "overlay.x_dma.sendchannel.wait()\n", + "print(f'x_DMA done.')\n", + "overlay.u_dma.sendchannel.wait()\n", + "print(f'u_DMA done.')\n", + "overlay.s_dma.sendchannel.wait()\n", + "print(f's_DMA done.')\n", + "overlay.v_dma.sendchannel.wait()\n", + "print(f'v_DMA done.')\n", + "overlay.bias_dma.sendchannel.wait()\n", + "print(f'bias_DMA done.')\n", + "overlay.y_dma.recvchannel.wait()\n", + "print(f'y_DMA done.')\n", + "print('Done.\\n')\n", + "\n", + "print(f'y_buffer.shape: {y_buffer.shape}')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def run_kernel(R, x_buffer, u_buffer, xu_buffer):\n", + " kernel_u.num_refinements = R\n", + " kernel_u.start_accel()\n", + " # Transfer\n", + " overlay.x_dma.sendchannel.transfer(x_buffer)\n", + " overlay.u_dma.sendchannel.transfer(u_buffer)\n", + " overlay.xu_dma.recvchannel.transfer(xu_buffer)\n", + " # Then wait\n", + " overlay.x_dma.sendchannel.wait()\n", + " overlay.u_dma.sendchannel.wait()\n", + " overlay.xu_dma.recvchannel.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 loops, best of 3: 148 ms per loop\n" + ] + } + ], + "source": [ + "%timeit run_kernel(R, x_buffer, u_buffer, xu_buffer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checking Correctness\n", + "\n", + "We first find the proper reshape mechanisms:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-1.25823639 1.03248304 -0.3389279 -0.26103506] [-1.25823639 1.03248304 -0.3389279 -0.26103506]\n", + "0.0\n", + "[ 0.38526848 -0.34712276 -0.39317614 0.77762274] [ 0.38526848 -0.34712276 -0.39317614 0.77762274]\n", + "0.0\n", + "(128, 4, 2)\n" + ] + } + ], + "source": [ + "# =============================================================================\n", + "# Reshape: (R, I, G) => (R, I // Tu, G, Tu)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I, G)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp.reshape(R, I // Tu, Tu, G), (0, 1, 3, 2))\n", + "print(u[0, 0:4, 0], u_tmp[0, 0, 0, 0:4])\n", + "print(u[0, 3, 0] - u_tmp[0, 0, 0, 3])\n", + "\n", + "# =============================================================================\n", + "# Reshape: (R, I // Tu, G, Tu) => (I, G, R)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I // Tu, G, Tu)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp, (1, 3, 2, 0)).reshape(I, G, R)\n", + "print(u[0, 0, 0, 0:4], u_tmp[0:4, 0, 0])\n", + "print(u[0, 0, 0, 3] - u_tmp[3, 0, 0])\n", + "\n", + "x = np.random.randn(N, I)\n", + "u = np.random.randn(I, G, R)\n", + "x = (x * 2).astype(np.int16)\n", + "u = (u * 2).astype(np.int16)\n", + "\n", + "xu = np.transpose(np.tensordot(x, u, axes=1), (2, 1, 0))\n", + "print(xu.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now check the Numpy computation against the FPGA result." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "All equal: True\n", + "gold[0]: [[ -3634 -22667]\n", + " [ 31065 15347]\n", + " [ 22140 -9595]\n", + " [ 9106 26136]]\n", + "fpga[0]: [[ -3634 -22667]\n", + " [ 31065 15347]\n", + " [ 22140 -9595]\n", + " [ 9106 26136]]\n" + ] + } + ], + "source": [ + "u_tmp = np.transpose(u_buffer, (1, 3, 2, 0)).reshape(I, G, R)\n", + "xu_gold = np.transpose(np.tensordot(x_buffer, u_tmp, axes=1), (2, 1, 0))\n", + "print('\\nAll equal:', np.allclose(xu_buffer, xu_gold))\n", + "print('gold[0]: ', xu_gold[0])\n", + "print('fpga[0]: ', xu_buffer[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pynq/dense_svd/overlay/dense_svd.bit b/pynq/dense_svd/overlay/dense_svd.bit new file mode 100644 index 0000000..8643112 Binary files /dev/null and b/pynq/dense_svd/overlay/dense_svd.bit differ diff --git a/pynq/dense_svd/overlay/dense_svd.hwh b/pynq/dense_svd/overlay/dense_svd.hwh new file mode 100644 index 0000000..9d7bb3f --- /dev/null +++ b/pynq/dense_svd/overlay/dense_svd.hwhdiff --git a/pynq/dense_svd/overlay/dense_svd.tcl b/pynq/dense_svd/overlay/dense_svd.tcl new file mode 100644 index 0000000..7657326 --- /dev/null +++ b/pynq/dense_svd/overlay/dense_svd.tcl @@ -0,0 +1,780 @@ + +################################################################ +# This is a generated script based on design: design_1 +# +# Though there are limitations about the generated script, +# the main purpose of this utility is to make learning +# IP Integrator Tcl commands easier. +################################################################ + +namespace eval _tcl { +proc get_script_folder {} { + set script_path [file normalize [info script]] + set script_folder [file dirname $script_path] + return $script_folder +} +} +variable script_folder +set script_folder [_tcl::get_script_folder] + +################################################################ +# Check if script is running in correct Vivado version. +################################################################ +set scripts_vivado_version 2020.2 +set current_vivado_version [version -short] + +if { [string first $scripts_vivado_version $current_vivado_version] == -1 } { + puts "" + catch {common::send_gid_msg -ssname BD::TCL -id 2041 -severity "ERROR" "This script was generated using Vivado <$scripts_vivado_version> and is being run in <$current_vivado_version> of Vivado. Please run the script in Vivado <$scripts_vivado_version> then open the design in Vivado <$current_vivado_version>. Upgrade the design by running \"Tools => Report => Report IP Status...\", then run write_bd_tcl to create an updated script."} + + return 1 +} + +################################################################ +# START +################################################################ + +# To test this script, run the following commands from Vivado Tcl console: +# source design_1_script.tcl + +# If there is no project opened, this script will create a +# project, but make sure you do not have an existing project +# <./myproj/project_1.xpr> in the current working folder. + +set list_projs [get_projects -quiet] +if { $list_projs eq "" } { + create_project project_1 myproj -part xc7z020clg484-1 + set_property BOARD_PART em.avnet.com:zed:part0:1.4 [current_project] +} + + +# CHANGE DESIGN NAME HERE +variable design_name +set design_name design_1 + +# If you do not already have an existing IP Integrator design open, +# you can create a design using the following command: +# create_bd_design $design_name + +# Creating design if needed +set errMsg "" +set nRet 0 + +set cur_design [current_bd_design -quiet] +set list_cells [get_bd_cells -quiet] + +if { ${design_name} eq "" } { + # USE CASES: + # 1) Design_name not set + + set errMsg "Please set the variable to a non-empty value." + set nRet 1 + +} elseif { ${cur_design} ne "" && ${list_cells} eq "" } { + # USE CASES: + # 2): Current design opened AND is empty AND names same. + # 3): Current design opened AND is empty AND names diff; design_name NOT in project. + # 4): Current design opened AND is empty AND names diff; design_name exists in project. + + if { $cur_design ne $design_name } { + common::send_gid_msg -ssname BD::TCL -id 2001 -severity "INFO" "Changing value of from <$design_name> to <$cur_design> since current design is empty." + set design_name [get_property NAME $cur_design] + } + common::send_gid_msg -ssname BD::TCL -id 2002 -severity "INFO" "Constructing design in IPI design <$cur_design>..." + +} elseif { ${cur_design} ne "" && $list_cells ne "" && $cur_design eq $design_name } { + # USE CASES: + # 5) Current design opened AND has components AND same names. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 1 +} elseif { [get_files -quiet ${design_name}.bd] ne "" } { + # USE CASES: + # 6) Current opened design, has components, but diff names, design_name exists in project. + # 7) No opened design, design_name exists in project. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 2 + +} else { + # USE CASES: + # 8) No opened design, design_name not in project. + # 9) Current opened design, has components, but diff names, design_name not in project. + + common::send_gid_msg -ssname BD::TCL -id 2003 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + common::send_gid_msg -ssname BD::TCL -id 2004 -severity "INFO" "Making design <$design_name> as current_bd_design." + current_bd_design $design_name + +} + +common::send_gid_msg -ssname BD::TCL -id 2005 -severity "INFO" "Currently the variable is equal to \"$design_name\"." + +if { $nRet != 0 } { + catch {common::send_gid_msg -ssname BD::TCL -id 2006 -severity "ERROR" $errMsg} + return $nRet +} + +set bCheckIPsPassed 1 +################################################################## +# CHECK IPs +################################################################## +set bCheckIPs 1 +if { $bCheckIPs == 1 } { + set list_check_ips "\ +xilinx.com:hls:HlsDenseSvd:1.0\ +xilinx.com:ip:axi_dma:7.1\ +xilinx.com:ip:processing_system7:5.5\ +xilinx.com:ip:proc_sys_reset:5.0\ +" + + set list_ips_missing "" + common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + +} + +if { $bCheckIPsPassed != 1 } { + common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 +} + +################################################################## +# DESIGN PROCs +################################################################## + + + +# Procedure to create entire design; Provide argument to make +# procedure reusable. If parentCell is "", will use root. +proc create_root_design { parentCell } { + + variable script_folder + variable design_name + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + + # Create interface ports + set DDR [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddrx_rtl:1.0 DDR ] + + set FIXED_IO [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_processing_system7:fixedio_rtl:1.0 FIXED_IO ] + + + # Create ports + + # Create instance: HlsDenseSvd_0, and set properties + set HlsDenseSvd_0 [ create_bd_cell -type ip -vlnv xilinx.com:hls:HlsDenseSvd:1.0 HlsDenseSvd_0 ] + + # Create instance: axi_mem_intercon, and set properties + set axi_mem_intercon [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {2} \ + ] $axi_mem_intercon + + # Create instance: axi_mem_intercon_1, and set properties + set axi_mem_intercon_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_1 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {2} \ + ] $axi_mem_intercon_1 + + # Create instance: axi_mem_intercon_2, and set properties + set axi_mem_intercon_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_2 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_2 + + # Create instance: axi_mem_intercon_3, and set properties + set axi_mem_intercon_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_3 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_3 + + # Create instance: bias_dma, and set properties + set bias_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 bias_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $bias_dma + + # Create instance: processing_system7_0, and set properties + set processing_system7_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 processing_system7_0 ] + set_property -dict [ list \ + CONFIG.PCW_ACT_APU_PERIPHERAL_FREQMHZ {666.666687} \ + CONFIG.PCW_ACT_CAN_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_DCI_PERIPHERAL_FREQMHZ {10.158730} \ + CONFIG.PCW_ACT_ENET0_PERIPHERAL_FREQMHZ {125.000000} \ + CONFIG.PCW_ACT_ENET1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA0_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_ACT_FPGA1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA2_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA3_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_PCAP_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_QSPI_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_SDIO_PERIPHERAL_FREQMHZ {50.000000} \ + CONFIG.PCW_ACT_SMC_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_SPI_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_TPIU_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_TTC0_CLK0_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC0_CLK1_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC0_CLK2_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK0_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK1_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK2_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_UART_PERIPHERAL_FREQMHZ {50.000000} \ + CONFIG.PCW_ACT_WDT_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_APU_PERIPHERAL_FREQMHZ {666.666667} \ + CONFIG.PCW_ARMPLL_CTRL_FBDIV {40} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_FREQMHZ {100} \ + CONFIG.PCW_CLK0_FREQ {100000000} \ + CONFIG.PCW_CLK1_FREQ {10000000} \ + CONFIG.PCW_CLK2_FREQ {10000000} \ + CONFIG.PCW_CLK3_FREQ {10000000} \ + CONFIG.PCW_CPU_CPU_PLL_FREQMHZ {1333.333} \ + CONFIG.PCW_CPU_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR0 {15} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR1 {7} \ + CONFIG.PCW_DDRPLL_CTRL_FBDIV {32} \ + CONFIG.PCW_DDR_DDR_PLL_FREQMHZ {1066.667} \ + CONFIG.PCW_DDR_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_DDR_RAM_HIGHADDR {0x1FFFFFFF} \ + CONFIG.PCW_ENET0_ENET0_IO {MIO 16 .. 27} \ + CONFIG.PCW_ENET0_GRP_MDIO_ENABLE {1} \ + CONFIG.PCW_ENET0_GRP_MDIO_IO {MIO 52 .. 53} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR0 {8} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_FREQMHZ {1000 Mbps} \ + CONFIG.PCW_ENET0_RESET_ENABLE {0} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET1_RESET_ENABLE {0} \ + CONFIG.PCW_ENET_RESET_ENABLE {1} \ + CONFIG.PCW_ENET_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_EN_EMIO_TTC0 {1} \ + CONFIG.PCW_EN_ENET0 {1} \ + CONFIG.PCW_EN_GPIO {1} \ + CONFIG.PCW_EN_QSPI {1} \ + CONFIG.PCW_EN_SDIO0 {1} \ + CONFIG.PCW_EN_TTC0 {1} \ + CONFIG.PCW_EN_UART1 {1} \ + CONFIG.PCW_EN_USB0 {1} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR1 {2} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_FPGA1_PERIPHERAL_FREQMHZ {150.000000} \ + CONFIG.PCW_FPGA2_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_FPGA_FCLK0_ENABLE {1} \ + CONFIG.PCW_FPGA_FCLK1_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK2_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK3_ENABLE {0} \ + CONFIG.PCW_GPIO_MIO_GPIO_ENABLE {1} \ + CONFIG.PCW_GPIO_MIO_GPIO_IO {MIO} \ + CONFIG.PCW_I2C0_GRP_INT_ENABLE {0} \ + CONFIG.PCW_I2C0_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_I2C0_RESET_ENABLE {0} \ + CONFIG.PCW_I2C1_RESET_ENABLE {0} \ + CONFIG.PCW_I2C_PERIPHERAL_FREQMHZ {25} \ + CONFIG.PCW_I2C_RESET_ENABLE {1} \ + CONFIG.PCW_IOPLL_CTRL_FBDIV {30} \ + CONFIG.PCW_IO_IO_PLL_FREQMHZ {1000.000} \ + CONFIG.PCW_MIO_0_DIRECTION {inout} \ + CONFIG.PCW_MIO_0_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_0_PULLUP {disabled} \ + CONFIG.PCW_MIO_0_SLEW {slow} \ + CONFIG.PCW_MIO_10_DIRECTION {inout} \ + CONFIG.PCW_MIO_10_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_10_PULLUP {disabled} \ + CONFIG.PCW_MIO_10_SLEW {slow} \ + CONFIG.PCW_MIO_11_DIRECTION {inout} \ + CONFIG.PCW_MIO_11_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_11_PULLUP {disabled} \ + CONFIG.PCW_MIO_11_SLEW {slow} \ + CONFIG.PCW_MIO_12_DIRECTION {inout} \ + CONFIG.PCW_MIO_12_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_12_PULLUP {disabled} \ + CONFIG.PCW_MIO_12_SLEW {slow} \ + CONFIG.PCW_MIO_13_DIRECTION {inout} \ + CONFIG.PCW_MIO_13_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_13_PULLUP {disabled} \ + CONFIG.PCW_MIO_13_SLEW {slow} \ + CONFIG.PCW_MIO_14_DIRECTION {inout} \ + CONFIG.PCW_MIO_14_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_14_PULLUP {disabled} \ + CONFIG.PCW_MIO_14_SLEW {slow} \ + CONFIG.PCW_MIO_15_DIRECTION {inout} \ + CONFIG.PCW_MIO_15_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_15_PULLUP {disabled} \ + CONFIG.PCW_MIO_15_SLEW {slow} \ + CONFIG.PCW_MIO_16_DIRECTION {out} \ + CONFIG.PCW_MIO_16_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_16_PULLUP {disabled} \ + CONFIG.PCW_MIO_16_SLEW {fast} \ + CONFIG.PCW_MIO_17_DIRECTION {out} \ + CONFIG.PCW_MIO_17_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_17_PULLUP {disabled} \ + CONFIG.PCW_MIO_17_SLEW {fast} \ + CONFIG.PCW_MIO_18_DIRECTION {out} \ + CONFIG.PCW_MIO_18_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_18_PULLUP {disabled} \ + CONFIG.PCW_MIO_18_SLEW {fast} \ + CONFIG.PCW_MIO_19_DIRECTION {out} \ + CONFIG.PCW_MIO_19_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_19_PULLUP {disabled} \ + CONFIG.PCW_MIO_19_SLEW {fast} \ + CONFIG.PCW_MIO_1_DIRECTION {out} \ + CONFIG.PCW_MIO_1_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_1_PULLUP {disabled} \ + CONFIG.PCW_MIO_1_SLEW {fast} \ + CONFIG.PCW_MIO_20_DIRECTION {out} \ + CONFIG.PCW_MIO_20_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_20_PULLUP {disabled} \ + CONFIG.PCW_MIO_20_SLEW {fast} \ + CONFIG.PCW_MIO_21_DIRECTION {out} \ + CONFIG.PCW_MIO_21_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_21_PULLUP {disabled} \ + CONFIG.PCW_MIO_21_SLEW {fast} \ + CONFIG.PCW_MIO_22_DIRECTION {in} \ + CONFIG.PCW_MIO_22_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_22_PULLUP {disabled} \ + CONFIG.PCW_MIO_22_SLEW {fast} \ + CONFIG.PCW_MIO_23_DIRECTION {in} \ + CONFIG.PCW_MIO_23_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_23_PULLUP {disabled} \ + CONFIG.PCW_MIO_23_SLEW {fast} \ + CONFIG.PCW_MIO_24_DIRECTION {in} \ + CONFIG.PCW_MIO_24_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_24_PULLUP {disabled} \ + CONFIG.PCW_MIO_24_SLEW {fast} \ + CONFIG.PCW_MIO_25_DIRECTION {in} \ + CONFIG.PCW_MIO_25_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_25_PULLUP {disabled} \ + CONFIG.PCW_MIO_25_SLEW {fast} \ + CONFIG.PCW_MIO_26_DIRECTION {in} \ + CONFIG.PCW_MIO_26_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_26_PULLUP {disabled} \ + CONFIG.PCW_MIO_26_SLEW {fast} \ + CONFIG.PCW_MIO_27_DIRECTION {in} \ + CONFIG.PCW_MIO_27_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_27_PULLUP {disabled} \ + CONFIG.PCW_MIO_27_SLEW {fast} \ + CONFIG.PCW_MIO_28_DIRECTION {inout} \ + CONFIG.PCW_MIO_28_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_28_PULLUP {disabled} \ + CONFIG.PCW_MIO_28_SLEW {fast} \ + CONFIG.PCW_MIO_29_DIRECTION {in} \ + CONFIG.PCW_MIO_29_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_29_PULLUP {disabled} \ + CONFIG.PCW_MIO_29_SLEW {fast} \ + CONFIG.PCW_MIO_2_DIRECTION {inout} \ + CONFIG.PCW_MIO_2_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_2_PULLUP {disabled} \ + CONFIG.PCW_MIO_2_SLEW {fast} \ + CONFIG.PCW_MIO_30_DIRECTION {out} \ + CONFIG.PCW_MIO_30_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_30_PULLUP {disabled} \ + CONFIG.PCW_MIO_30_SLEW {fast} \ + CONFIG.PCW_MIO_31_DIRECTION {in} \ + CONFIG.PCW_MIO_31_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_31_PULLUP {disabled} \ + CONFIG.PCW_MIO_31_SLEW {fast} \ + CONFIG.PCW_MIO_32_DIRECTION {inout} \ + CONFIG.PCW_MIO_32_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_32_PULLUP {disabled} \ + CONFIG.PCW_MIO_32_SLEW {fast} \ + CONFIG.PCW_MIO_33_DIRECTION {inout} \ + CONFIG.PCW_MIO_33_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_33_PULLUP {disabled} \ + CONFIG.PCW_MIO_33_SLEW {fast} \ + CONFIG.PCW_MIO_34_DIRECTION {inout} \ + CONFIG.PCW_MIO_34_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_34_PULLUP {disabled} \ + CONFIG.PCW_MIO_34_SLEW {fast} \ + CONFIG.PCW_MIO_35_DIRECTION {inout} \ + CONFIG.PCW_MIO_35_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_35_PULLUP {disabled} \ + CONFIG.PCW_MIO_35_SLEW {fast} \ + CONFIG.PCW_MIO_36_DIRECTION {in} \ + CONFIG.PCW_MIO_36_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_36_PULLUP {disabled} \ + CONFIG.PCW_MIO_36_SLEW {fast} \ + CONFIG.PCW_MIO_37_DIRECTION {inout} \ + CONFIG.PCW_MIO_37_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_37_PULLUP {disabled} \ + CONFIG.PCW_MIO_37_SLEW {fast} \ + CONFIG.PCW_MIO_38_DIRECTION {inout} \ + CONFIG.PCW_MIO_38_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_38_PULLUP {disabled} \ + CONFIG.PCW_MIO_38_SLEW {fast} \ + CONFIG.PCW_MIO_39_DIRECTION {inout} \ + CONFIG.PCW_MIO_39_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_39_PULLUP {disabled} \ + CONFIG.PCW_MIO_39_SLEW {fast} \ + CONFIG.PCW_MIO_3_DIRECTION {inout} \ + CONFIG.PCW_MIO_3_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_3_PULLUP {disabled} \ + CONFIG.PCW_MIO_3_SLEW {fast} \ + CONFIG.PCW_MIO_40_DIRECTION {inout} \ + CONFIG.PCW_MIO_40_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_40_PULLUP {disabled} \ + CONFIG.PCW_MIO_40_SLEW {fast} \ + CONFIG.PCW_MIO_41_DIRECTION {inout} \ + CONFIG.PCW_MIO_41_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_41_PULLUP {disabled} \ + CONFIG.PCW_MIO_41_SLEW {fast} \ + CONFIG.PCW_MIO_42_DIRECTION {inout} \ + CONFIG.PCW_MIO_42_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_42_PULLUP {disabled} \ + CONFIG.PCW_MIO_42_SLEW {fast} \ + CONFIG.PCW_MIO_43_DIRECTION {inout} \ + CONFIG.PCW_MIO_43_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_43_PULLUP {disabled} \ + CONFIG.PCW_MIO_43_SLEW {fast} \ + CONFIG.PCW_MIO_44_DIRECTION {inout} \ + CONFIG.PCW_MIO_44_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_44_PULLUP {disabled} \ + CONFIG.PCW_MIO_44_SLEW {fast} \ + CONFIG.PCW_MIO_45_DIRECTION {inout} \ + CONFIG.PCW_MIO_45_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_45_PULLUP {disabled} \ + CONFIG.PCW_MIO_45_SLEW {fast} \ + CONFIG.PCW_MIO_46_DIRECTION {in} \ + CONFIG.PCW_MIO_46_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_46_PULLUP {disabled} \ + CONFIG.PCW_MIO_46_SLEW {slow} \ + CONFIG.PCW_MIO_47_DIRECTION {in} \ + CONFIG.PCW_MIO_47_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_47_PULLUP {disabled} \ + CONFIG.PCW_MIO_47_SLEW {slow} \ + CONFIG.PCW_MIO_48_DIRECTION {out} \ + CONFIG.PCW_MIO_48_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_48_PULLUP {disabled} \ + CONFIG.PCW_MIO_48_SLEW {slow} \ + CONFIG.PCW_MIO_49_DIRECTION {in} \ + CONFIG.PCW_MIO_49_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_49_PULLUP {disabled} \ + CONFIG.PCW_MIO_49_SLEW {slow} \ + CONFIG.PCW_MIO_4_DIRECTION {inout} \ + CONFIG.PCW_MIO_4_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_4_PULLUP {disabled} \ + CONFIG.PCW_MIO_4_SLEW {fast} \ + CONFIG.PCW_MIO_50_DIRECTION {inout} \ + CONFIG.PCW_MIO_50_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_50_PULLUP {disabled} \ + CONFIG.PCW_MIO_50_SLEW {slow} \ + CONFIG.PCW_MIO_51_DIRECTION {inout} \ + CONFIG.PCW_MIO_51_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_51_PULLUP {disabled} \ + CONFIG.PCW_MIO_51_SLEW {slow} \ + CONFIG.PCW_MIO_52_DIRECTION {out} \ + CONFIG.PCW_MIO_52_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_52_PULLUP {disabled} \ + CONFIG.PCW_MIO_52_SLEW {slow} \ + CONFIG.PCW_MIO_53_DIRECTION {inout} \ + CONFIG.PCW_MIO_53_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_53_PULLUP {disabled} \ + CONFIG.PCW_MIO_53_SLEW {slow} \ + CONFIG.PCW_MIO_5_DIRECTION {inout} \ + CONFIG.PCW_MIO_5_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_5_PULLUP {disabled} \ + CONFIG.PCW_MIO_5_SLEW {fast} \ + CONFIG.PCW_MIO_6_DIRECTION {out} \ + CONFIG.PCW_MIO_6_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_6_PULLUP {disabled} \ + CONFIG.PCW_MIO_6_SLEW {fast} \ + CONFIG.PCW_MIO_7_DIRECTION {out} \ + CONFIG.PCW_MIO_7_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_7_PULLUP {disabled} \ + CONFIG.PCW_MIO_7_SLEW {slow} \ + CONFIG.PCW_MIO_8_DIRECTION {out} \ + CONFIG.PCW_MIO_8_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_8_PULLUP {disabled} \ + CONFIG.PCW_MIO_8_SLEW {fast} \ + CONFIG.PCW_MIO_9_DIRECTION {inout} \ + CONFIG.PCW_MIO_9_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_9_PULLUP {disabled} \ + CONFIG.PCW_MIO_9_SLEW {slow} \ + CONFIG.PCW_MIO_TREE_PERIPHERALS {GPIO#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#UART 1#UART 1#GPIO#GPIO#Enet 0#Enet 0} \ + CONFIG.PCW_MIO_TREE_SIGNALS {gpio[0]#qspi0_ss_b#qspi0_io[0]#qspi0_io[1]#qspi0_io[2]#qspi0_io[3]/HOLD_B#qspi0_sclk#gpio[7]#gpio[8]#gpio[9]#gpio[10]#gpio[11]#gpio[12]#gpio[13]#gpio[14]#gpio[15]#tx_clk#txd[0]#txd[1]#txd[2]#txd[3]#tx_ctl#rx_clk#rxd[0]#rxd[1]#rxd[2]#rxd[3]#rx_ctl#data[4]#dir#stp#nxt#data[0]#data[1]#data[2]#data[3]#clk#data[5]#data[6]#data[7]#clk#cmd#data[0]#data[1]#data[2]#data[3]#wp#cd#tx#rx#gpio[50]#gpio[51]#mdc#mdio} \ + CONFIG.PCW_NAND_GRP_D8_ENABLE {0} \ + CONFIG.PCW_NAND_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_A25_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_INT_ENABLE {0} \ + CONFIG.PCW_NOR_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_PCAP_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_PJTAG_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_PRESET_BANK0_VOLTAGE {LVCMOS 3.3V} \ + CONFIG.PCW_PRESET_BANK1_VOLTAGE {LVCMOS 1.8V} \ + CONFIG.PCW_QSPI_GRP_FBCLK_ENABLE {0} \ + CONFIG.PCW_QSPI_GRP_IO1_ENABLE {0} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_ENABLE {1} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_IO {MIO 1 .. 6} \ + CONFIG.PCW_QSPI_GRP_SS1_ENABLE {0} \ + CONFIG.PCW_QSPI_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_QSPI_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_QSPI_PERIPHERAL_FREQMHZ {200} \ + CONFIG.PCW_QSPI_QSPI_IO {MIO 1 .. 6} \ + CONFIG.PCW_SD0_GRP_CD_ENABLE {1} \ + CONFIG.PCW_SD0_GRP_CD_IO {MIO 47} \ + CONFIG.PCW_SD0_GRP_POW_ENABLE {0} \ + CONFIG.PCW_SD0_GRP_WP_ENABLE {1} \ + CONFIG.PCW_SD0_GRP_WP_IO {MIO 46} \ + CONFIG.PCW_SD0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_SD0_SD0_IO {MIO 40 .. 45} \ + CONFIG.PCW_SDIO_PERIPHERAL_DIVISOR0 {20} \ + CONFIG.PCW_SDIO_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_SDIO_PERIPHERAL_VALID {1} \ + CONFIG.PCW_SINGLE_QSPI_DATA_MODE {x4} \ + CONFIG.PCW_SMC_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_SPI_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_S_AXI_HP2_DATA_WIDTH {64} \ + CONFIG.PCW_TPIU_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC0_CLK0_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK1_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK2_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_TTC0_TTC0_IO {EMIO} \ + CONFIG.PCW_TTC_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_UART1_GRP_FULL_ENABLE {0} \ + CONFIG.PCW_UART1_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_UART1_UART1_IO {MIO 48 .. 49} \ + CONFIG.PCW_UART_PERIPHERAL_DIVISOR0 {20} \ + CONFIG.PCW_UART_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_UART_PERIPHERAL_VALID {1} \ + CONFIG.PCW_UIPARAM_ACT_DDR_FREQ_MHZ {533.333374} \ + CONFIG.PCW_UIPARAM_DDR_BANK_ADDR_COUNT {3} \ + CONFIG.PCW_UIPARAM_DDR_BL {8} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY0 {0.41} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY1 {0.411} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY2 {0.341} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY3 {0.358} \ + CONFIG.PCW_UIPARAM_DDR_CL {7} \ + CONFIG.PCW_UIPARAM_DDR_COL_ADDR_COUNT {10} \ + CONFIG.PCW_UIPARAM_DDR_CWL {6} \ + CONFIG.PCW_UIPARAM_DDR_DEVICE_CAPACITY {2048 MBits} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_0 {0.025} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_1 {0.028} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_2 {0.001} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_3 {0.001} \ + CONFIG.PCW_UIPARAM_DDR_DRAM_WIDTH {16 Bits} \ + CONFIG.PCW_UIPARAM_DDR_FREQ_MHZ {533.333313} \ + CONFIG.PCW_UIPARAM_DDR_MEMORY_TYPE {DDR 3} \ + CONFIG.PCW_UIPARAM_DDR_PARTNO {MT41J128M16 HA-15E} \ + CONFIG.PCW_UIPARAM_DDR_ROW_ADDR_COUNT {14} \ + CONFIG.PCW_UIPARAM_DDR_SPEED_BIN {DDR3_1066F} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_DATA_EYE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_READ_GATE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_WRITE_LEVEL {1} \ + CONFIG.PCW_UIPARAM_DDR_T_FAW {45.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RAS_MIN {36.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RC {49.5} \ + CONFIG.PCW_UIPARAM_DDR_T_RCD {7} \ + CONFIG.PCW_UIPARAM_DDR_T_RP {7} \ + CONFIG.PCW_UIPARAM_DDR_USE_INTERNAL_VREF {1} \ + CONFIG.PCW_USB0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_USB0_PERIPHERAL_FREQMHZ {60} \ + CONFIG.PCW_USB0_RESET_ENABLE {0} \ + CONFIG.PCW_USB0_USB0_IO {MIO 28 .. 39} \ + CONFIG.PCW_USB1_RESET_ENABLE {0} \ + CONFIG.PCW_USB_RESET_ENABLE {1} \ + CONFIG.PCW_USB_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_USE_S_AXI_HP0 {1} \ + CONFIG.PCW_USE_S_AXI_HP1 {1} \ + CONFIG.PCW_USE_S_AXI_HP2 {1} \ + CONFIG.PCW_USE_S_AXI_HP3 {1} \ + CONFIG.preset {ZedBoard} \ + ] $processing_system7_0 + + # Create instance: ps7_0_axi_periph, and set properties + set ps7_0_axi_periph [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 ps7_0_axi_periph ] + set_property -dict [ list \ + CONFIG.NUM_MI {7} \ + ] $ps7_0_axi_periph + + # Create instance: rst_ps7_0_100M, and set properties + set rst_ps7_0_100M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ps7_0_100M ] + + # Create instance: s_dma, and set properties + set s_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 s_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $s_dma + + # Create instance: u_dma, and set properties + set u_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 u_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $u_dma + + # Create instance: v_dma, and set properties + set v_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 v_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $v_dma + + # Create instance: x_dma, and set properties + set x_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 x_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $x_dma + + # Create instance: y_dma, and set properties + set y_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 y_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s {0} \ + CONFIG.c_include_s2mm_dre {1} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_micro_dma {0} \ + CONFIG.c_s2mm_burst_size {128} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $y_dma + + # Create interface connections + connect_bd_intf_net -intf_net HlsDenseSvd_0_y_port [get_bd_intf_pins HlsDenseSvd_0/y_port] [get_bd_intf_pins y_dma/S_AXIS_S2MM] + connect_bd_intf_net -intf_net axi_mem_intercon_1_M00_AXI [get_bd_intf_pins axi_mem_intercon_1/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP1] + connect_bd_intf_net -intf_net axi_mem_intercon_2_M00_AXI [get_bd_intf_pins axi_mem_intercon_2/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP2] + connect_bd_intf_net -intf_net axi_mem_intercon_3_M00_AXI [get_bd_intf_pins axi_mem_intercon_3/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP3] + connect_bd_intf_net -intf_net axi_mem_intercon_M00_AXI [get_bd_intf_pins axi_mem_intercon/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP0] + connect_bd_intf_net -intf_net bias_dma_M_AXIS_MM2S [get_bd_intf_pins HlsDenseSvd_0/bias_port] [get_bd_intf_pins bias_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net bias_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon/S01_AXI] [get_bd_intf_pins bias_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net processing_system7_0_DDR [get_bd_intf_ports DDR] [get_bd_intf_pins processing_system7_0/DDR] + connect_bd_intf_net -intf_net processing_system7_0_FIXED_IO [get_bd_intf_ports FIXED_IO] [get_bd_intf_pins processing_system7_0/FIXED_IO] + connect_bd_intf_net -intf_net processing_system7_0_M_AXI_GP0 [get_bd_intf_pins processing_system7_0/M_AXI_GP0] [get_bd_intf_pins ps7_0_axi_periph/S00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M00_AXI [get_bd_intf_pins bias_dma/S_AXI_LITE] [get_bd_intf_pins ps7_0_axi_periph/M00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M01_AXI [get_bd_intf_pins ps7_0_axi_periph/M01_AXI] [get_bd_intf_pins v_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M02_AXI [get_bd_intf_pins ps7_0_axi_periph/M02_AXI] [get_bd_intf_pins x_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M03_AXI [get_bd_intf_pins ps7_0_axi_periph/M03_AXI] [get_bd_intf_pins y_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M04_AXI [get_bd_intf_pins HlsDenseSvd_0/s_axi_ctrl] [get_bd_intf_pins ps7_0_axi_periph/M04_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M05_AXI [get_bd_intf_pins ps7_0_axi_periph/M05_AXI] [get_bd_intf_pins s_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M06_AXI [get_bd_intf_pins ps7_0_axi_periph/M06_AXI] [get_bd_intf_pins u_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net s_dma_M_AXIS_MM2S [get_bd_intf_pins HlsDenseSvd_0/s_port] [get_bd_intf_pins s_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net s_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon_1/S01_AXI] [get_bd_intf_pins s_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net u_dma_M_AXIS_MM2S [get_bd_intf_pins HlsDenseSvd_0/u_port] [get_bd_intf_pins u_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net u_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon_1/S00_AXI] [get_bd_intf_pins v_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net u_dma_M_AXI_MM2S1 [get_bd_intf_pins axi_mem_intercon_3/S00_AXI] [get_bd_intf_pins u_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net v_dma_M_AXIS_MM2S [get_bd_intf_pins HlsDenseSvd_0/v_port] [get_bd_intf_pins v_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net x_dma_M_AXIS_MM2S [get_bd_intf_pins HlsDenseSvd_0/x_port] [get_bd_intf_pins x_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net x_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon/S00_AXI] [get_bd_intf_pins x_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net xu_dma_M_AXI_S2MM [get_bd_intf_pins axi_mem_intercon_2/S00_AXI] [get_bd_intf_pins y_dma/M_AXI_S2MM] + + # Create port connections + connect_bd_net -net processing_system7_0_FCLK_CLK0 [get_bd_pins HlsDenseSvd_0/ap_clk] [get_bd_pins axi_mem_intercon/ACLK] [get_bd_pins axi_mem_intercon/M00_ACLK] [get_bd_pins axi_mem_intercon/S00_ACLK] [get_bd_pins axi_mem_intercon/S01_ACLK] [get_bd_pins axi_mem_intercon_1/ACLK] [get_bd_pins axi_mem_intercon_1/M00_ACLK] [get_bd_pins axi_mem_intercon_1/S00_ACLK] [get_bd_pins axi_mem_intercon_1/S01_ACLK] [get_bd_pins axi_mem_intercon_2/ACLK] [get_bd_pins axi_mem_intercon_2/M00_ACLK] [get_bd_pins axi_mem_intercon_2/S00_ACLK] [get_bd_pins axi_mem_intercon_3/ACLK] [get_bd_pins axi_mem_intercon_3/M00_ACLK] [get_bd_pins axi_mem_intercon_3/S00_ACLK] [get_bd_pins bias_dma/m_axi_mm2s_aclk] [get_bd_pins bias_dma/s_axi_lite_aclk] [get_bd_pins processing_system7_0/FCLK_CLK0] [get_bd_pins processing_system7_0/M_AXI_GP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP1_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP2_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP3_ACLK] [get_bd_pins ps7_0_axi_periph/ACLK] [get_bd_pins ps7_0_axi_periph/M00_ACLK] [get_bd_pins ps7_0_axi_periph/M01_ACLK] [get_bd_pins ps7_0_axi_periph/M02_ACLK] [get_bd_pins ps7_0_axi_periph/M03_ACLK] [get_bd_pins ps7_0_axi_periph/M04_ACLK] [get_bd_pins ps7_0_axi_periph/M05_ACLK] [get_bd_pins ps7_0_axi_periph/M06_ACLK] [get_bd_pins ps7_0_axi_periph/S00_ACLK] [get_bd_pins rst_ps7_0_100M/slowest_sync_clk] [get_bd_pins s_dma/m_axi_mm2s_aclk] [get_bd_pins s_dma/s_axi_lite_aclk] [get_bd_pins u_dma/m_axi_mm2s_aclk] [get_bd_pins u_dma/s_axi_lite_aclk] [get_bd_pins v_dma/m_axi_mm2s_aclk] [get_bd_pins v_dma/s_axi_lite_aclk] [get_bd_pins x_dma/m_axi_mm2s_aclk] [get_bd_pins x_dma/s_axi_lite_aclk] [get_bd_pins y_dma/m_axi_s2mm_aclk] [get_bd_pins y_dma/s_axi_lite_aclk] + connect_bd_net -net processing_system7_0_FCLK_RESET0_N [get_bd_pins processing_system7_0/FCLK_RESET0_N] [get_bd_pins rst_ps7_0_100M/ext_reset_in] + connect_bd_net -net rst_ps7_0_100M_peripheral_aresetn [get_bd_pins HlsDenseSvd_0/ap_rst_n] [get_bd_pins axi_mem_intercon/ARESETN] [get_bd_pins axi_mem_intercon/M00_ARESETN] [get_bd_pins axi_mem_intercon/S00_ARESETN] [get_bd_pins axi_mem_intercon/S01_ARESETN] [get_bd_pins axi_mem_intercon_1/ARESETN] [get_bd_pins axi_mem_intercon_1/M00_ARESETN] [get_bd_pins axi_mem_intercon_1/S00_ARESETN] [get_bd_pins axi_mem_intercon_1/S01_ARESETN] [get_bd_pins axi_mem_intercon_2/ARESETN] [get_bd_pins axi_mem_intercon_2/M00_ARESETN] [get_bd_pins axi_mem_intercon_2/S00_ARESETN] [get_bd_pins axi_mem_intercon_3/ARESETN] [get_bd_pins axi_mem_intercon_3/M00_ARESETN] [get_bd_pins axi_mem_intercon_3/S00_ARESETN] [get_bd_pins bias_dma/axi_resetn] [get_bd_pins ps7_0_axi_periph/ARESETN] [get_bd_pins ps7_0_axi_periph/M00_ARESETN] [get_bd_pins ps7_0_axi_periph/M01_ARESETN] [get_bd_pins ps7_0_axi_periph/M02_ARESETN] [get_bd_pins ps7_0_axi_periph/M03_ARESETN] [get_bd_pins ps7_0_axi_periph/M04_ARESETN] [get_bd_pins ps7_0_axi_periph/M05_ARESETN] [get_bd_pins ps7_0_axi_periph/M06_ARESETN] [get_bd_pins ps7_0_axi_periph/S00_ARESETN] [get_bd_pins rst_ps7_0_100M/peripheral_aresetn] [get_bd_pins s_dma/axi_resetn] [get_bd_pins u_dma/axi_resetn] [get_bd_pins v_dma/axi_resetn] [get_bd_pins x_dma/axi_resetn] [get_bd_pins y_dma/axi_resetn] + + # Create address segments + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces bias_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP0/HP0_DDR_LOWOCM] -force + assign_bd_address -offset 0x40000000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs HlsDenseSvd_0/s_axi_ctrl/Reg] -force + assign_bd_address -offset 0x41E30000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs bias_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E40000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs s_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E00000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs v_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E50000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs u_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E10000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs x_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E20000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs y_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces s_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP1/HP1_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces u_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP3/HP3_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces v_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP1/HP1_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces x_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP0/HP0_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces y_dma/Data_S2MM] [get_bd_addr_segs processing_system7_0/S_AXI_HP2/HP2_DDR_LOWOCM] -force + + + # Restore current instance + current_bd_instance $oldCurInst + + validate_bd_design + save_bd_design +} +# End of create_root_design() + + +################################################################## +# MAIN FLOW +################################################################## + +create_root_design "" + + diff --git a/pynq/kernel_svd/kernel_svd.ipynb b/pynq/kernel_svd/kernel_svd.ipynb new file mode 100644 index 0000000..5d5cc3d --- /dev/null +++ b/pynq/kernel_svd/kernel_svd.ipynb @@ -0,0 +1,557 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing DenseSvd\n", + "\n", + "This notebook will test an IP written in Vivado HLS." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%pybind11/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pynq import Overlay\n", + "import pynq.lib.dma\n", + "from pynq import allocate\n", + "import numpy as np\n", + "from pynq import DefaultIP\n", + "import timeit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Program FPGA and inspect Overlay." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "overlay = Overlay(\"overlay/kernel_svd.bit\")\n", + "overlay?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the kernel register map." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RegisterMap {\n", + " CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0),\n", + " GIER = Register(Enable=0, RESERVED=0),\n", + " IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),\n", + " IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),\n", + " num_active_inputs = Register(num_active_inputs=0),\n", + " input_size = Register(input_size=0),\n", + " output_size = Register(output_size=0),\n", + " num_refinements_0 = Register(num_refinements_0=0),\n", + " num_refinements_1 = Register(num_refinements_1=0)\n", + "}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel = overlay.HlsSvdKernel_0\n", + "kernel.register_map\n", + "# print(\"stream size: \", adder.stream_size)\n", + "# accel_state = adder.get_state()\n", + "# print(\"accelerator state: \", accel_state)\n", + "# dma = overlay.axi_dma_0\n", + "# dma.register_map.MM2S_DMASR\n", + "# dma.register_map.S2MM_DMACR" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kernel IP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The kernel IP can be automatically bound by first creating our Kernel class. Then, the overlay can be instantiated again." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "class KernelDriver(DefaultIP):\n", + " def __init__(self, description):\n", + " super().__init__(description=description)\n", + " self.max_G = 4\n", + " self.max_I = 1\n", + " self.max_H = 1\n", + " \n", + " bindto = ['xilinx.com:hls:HlsSvdKernel:1.0']\n", + "\n", + " def start_accel(self):\n", + " self.register_map.CTRL.AP_START = 1\n", + " self.write(0x0, 1)\n", + " self.write(0x0, 1)\n", + " while(self.read(0x0) % 2 == 0):\n", + " self.write(0x0, 1)\n", + " pass # Wait until start, i.e. bit 0, is set.\n", + "\n", + " def set_state(self, state):\n", + " # self.register_map.CTRL = state\n", + " # return self.register_map.CTRL\n", + " self.write(0x0, state)\n", + " return self.read(0x0)\n", + "\n", + " def get_state(self):\n", + " return self.register_map.CTRL\n", + " # return self.read(0x0)\n", + "\n", + " @property\n", + " def num_active_inputs(self):\n", + " return self.register_map.num_active_inputs\n", + "\n", + " @num_active_inputs.setter\n", + " def num_active_inputs(self, N):\n", + " self.register_map.num_active_inputs = N\n", + "\n", + " @property\n", + " def input_size(self):\n", + " return self.register_map.input_size\n", + "\n", + " @input_size.setter\n", + " def input_size(self, I):\n", + " self.register_map.input_size = I\n", + "\n", + " @property\n", + " def output_size(self):\n", + " return self.register_map.output_size\n", + "\n", + " @output_size.setter\n", + " def output_size(self, H):\n", + " self.register_map.output_size = H\n", + "\n", + " @property\n", + " def num_refinements(self):\n", + " return (self.register_map.num_refinements_0, self.register_map.num_refinements_1)\n", + "\n", + " @num_refinements.setter\n", + " def num_refinements(self, R):\n", + " self.register_map.num_refinements_0 = R[0]\n", + " self.register_map.num_refinements_1 = R[1]\n", + "\n", + "overlay = Overlay(\"overlay/kernel_svd.bit\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To show the class is working, we setup the `num_refinements` using the setter method. We then read its corresponding register." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(Register(num_refinements_0=1), Register(num_refinements_1=1))" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel_svd.num_refinements = (1, 1)\n", + "kernel_svd.num_refinements" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Allocation and Run\n", + "\n", + "The data structures must be contiguosly allocated." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Buffers setup completed.\n", + "x_buffer.shape: (2, 8) - Bytes: 32\n", + "u_buffer.shape: (4, 8, 4) - Bytes: 256\n" + ] + } + ], + "source": [ + "data_t = np.int16\n", + "G = kernel_svd.max_G\n", + "N = 2\n", + "I = 8\n", + "H = 8\n", + "R = 4\n", + "x_buffer = pynq.allocate(shape=(N, I), dtype=data_t)\n", + "u_buffer = pynq.allocate(shape=(R, I, G), dtype=data_t)\n", + "s_buffer = pynq.allocate(shape=(R, N, G), dtype=data_t)\n", + "v_buffer = pynq.allocate(shape=(R, H, G), dtype=data_t)\n", + "y_buffer = pynq.allocate(shape=(N, G, H), dtype=data_t)\n", + "\n", + "# Generate random arrays\n", + "x_np = np.random.rand(N, I).astype(dtype=data_t)\n", + "u_np = np.random.rand(R, I, G).astype(dtype=data_t)\n", + "s_np = np.random.rand(R, N, G).astype(dtype=data_t)\n", + "v_np = np.random.rand(R, H, G).astype(dtype=data_t)\n", + "y_np = np.zeros((N, G, H)).astype(dtype=data_t)\n", + "\n", + "np.copyto(x_buffer, x_np, casting='no')\n", + "np.copyto(u_buffer, u_np, casting='no')\n", + "np.copyto(s_buffer, s_np, casting='no')\n", + "np.copyto(v_buffer, v_np, casting='no')\n", + "np.copyto(y_buffer, y_np, casting='no')\n", + "\n", + "print('Buffers setup completed.')\n", + "print(f'x_buffer.shape: {x_buffer.shape} - Bytes: {x_buffer.nbytes}')\n", + "print(f'u_buffer.shape: {u_buffer.shape} - Bytes: {u_buffer.nbytes}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Setup the kernel and then send the data through the DMAs." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x4\n", + "0x1\n" + ] + }, + { + "data": { + "text/plain": [ + "RegisterMap {\n", + " CTRL = Register(AP_START=1, AP_DONE=0, AP_IDLE=0, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0),\n", + " GIER = Register(Enable=0, RESERVED=0),\n", + " IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),\n", + " IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),\n", + " num_active_inputs = Register(num_active_inputs=2),\n", + " input_size = Register(input_size=8),\n", + " output_size = Register(output_size=8),\n", + " num_refinements_0 = Register(num_refinements_0=4),\n", + " num_refinements_1 = Register(num_refinements_1=4)\n", + "}" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel_svd.num_active_inputs = N\n", + "kernel_svd.input_size = I\n", + "kernel_svd.output_size = H\n", + "kernel_svd.num_refinements = (R, R)\n", + "print(kernel_svd.get_state())\n", + "kernel_svd.start_accel()\n", + "print(kernel_svd.get_state())\n", + "kernel_svd.register_map" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting transfer:\n", + "Waiting transfer completion.\n", + "x_DMA done.\n", + "u_DMA done.\n", + "s_DMA done.\n", + "v_DMA done.\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0moverlay\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mv_dma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msendchannel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'v_DMA done.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0moverlay\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my_dma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecvchannel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 19\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'y_DMA done.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Done.\\n'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pynq/lib/dma.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0merror\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mmio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_offset\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 208\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 209\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merror\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0;36m0x10\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m raise RuntimeError(\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pynq/lib/dma.py\u001b[0m in \u001b[0;36merror\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 115\u001b[0m \"\"\"True if DMA engine is in an error state\n\u001b[1;32m 116\u001b[0m \"\"\"\n\u001b[0;32m--> 117\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mmio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_offset\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m&\u001b[0m \u001b[0;36m0x70\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;36m0x0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pynq/mmio.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, offset, length, word_order)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Device does not have capabilities for MMIO\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moffset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlength\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m4\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mword_order\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'little'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 111\u001b[0m \"\"\"The method to read data from MMIO.\n\u001b[1;32m 112\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# Transfer\n", + "print('Starting transfer:')\n", + "overlay.x_dma.sendchannel.transfer(x_buffer)\n", + "overlay.u_dma.sendchannel.transfer(u_buffer)\n", + "overlay.s_dma.sendchannel.transfer(s_buffer)\n", + "overlay.v_dma.sendchannel.transfer(v_buffer)\n", + "overlay.y_dma.recvchannel.transfer(y_buffer)\n", + "# Then wait\n", + "print('Waiting transfer completion.')\n", + "overlay.x_dma.sendchannel.wait()\n", + "print(f'x_DMA done.')\n", + "overlay.u_dma.sendchannel.wait()\n", + "print(f'u_DMA done.')\n", + "overlay.s_dma.sendchannel.wait()\n", + "print(f's_DMA done.')\n", + "overlay.v_dma.sendchannel.wait()\n", + "print(f'v_DMA done.')\n", + "overlay.y_dma.recvchannel.wait()\n", + "print(f'y_DMA done.')\n", + "print('Done.\\n')\n", + "\n", + "print(f'y_buffer.shape: {y_buffer.shape}')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def run_kernel(R, x_buffer, u_buffer, xu_buffer):\n", + " kernel_u.num_refinements = R\n", + " kernel_u.start_accel()\n", + " # Transfer\n", + " overlay.x_dma.sendchannel.transfer(x_buffer)\n", + " overlay.u_dma.sendchannel.transfer(u_buffer)\n", + " overlay.xu_dma.recvchannel.transfer(xu_buffer)\n", + " # Then wait\n", + " overlay.x_dma.sendchannel.wait()\n", + " overlay.u_dma.sendchannel.wait()\n", + " overlay.xu_dma.recvchannel.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 loops, best of 3: 148 ms per loop\n" + ] + } + ], + "source": [ + "%timeit run_kernel(R, x_buffer, u_buffer, xu_buffer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checking Correctness\n", + "\n", + "We first find the proper reshape mechanisms:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-1.25823639 1.03248304 -0.3389279 -0.26103506] [-1.25823639 1.03248304 -0.3389279 -0.26103506]\n", + "0.0\n", + "[ 0.38526848 -0.34712276 -0.39317614 0.77762274] [ 0.38526848 -0.34712276 -0.39317614 0.77762274]\n", + "0.0\n", + "(128, 4, 2)\n" + ] + } + ], + "source": [ + "# =============================================================================\n", + "# Reshape: (R, I, G) => (R, I // Tu, G, Tu)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I, G)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp.reshape(R, I // Tu, Tu, G), (0, 1, 3, 2))\n", + "print(u[0, 0:4, 0], u_tmp[0, 0, 0, 0:4])\n", + "print(u[0, 3, 0] - u_tmp[0, 0, 0, 3])\n", + "\n", + "# =============================================================================\n", + "# Reshape: (R, I // Tu, G, Tu) => (I, G, R)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I // Tu, G, Tu)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp, (1, 3, 2, 0)).reshape(I, G, R)\n", + "print(u[0, 0, 0, 0:4], u_tmp[0:4, 0, 0])\n", + "print(u[0, 0, 0, 3] - u_tmp[3, 0, 0])\n", + "\n", + "x = np.random.randn(N, I)\n", + "u = np.random.randn(I, G, R)\n", + "x = (x * 2).astype(np.int16)\n", + "u = (u * 2).astype(np.int16)\n", + "\n", + "xu = np.transpose(np.tensordot(x, u, axes=1), (2, 1, 0))\n", + "print(xu.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now check the Numpy computation against the FPGA result." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "All equal: True\n", + "gold[0]: [[ -3634 -22667]\n", + " [ 31065 15347]\n", + " [ 22140 -9595]\n", + " [ 9106 26136]]\n", + "fpga[0]: [[ -3634 -22667]\n", + " [ 31065 15347]\n", + " [ 22140 -9595]\n", + " [ 9106 26136]]\n" + ] + } + ], + "source": [ + "u_tmp = np.transpose(u_buffer, (1, 3, 2, 0)).reshape(I, G, R)\n", + "xu_gold = np.transpose(np.tensordot(x_buffer, u_tmp, axes=1), (2, 1, 0))\n", + "print('\\nAll equal:', np.allclose(xu_buffer, xu_gold))\n", + "print('gold[0]: ', xu_gold[0])\n", + "print('fpga[0]: ', xu_buffer[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pynq/kernel_svd/overlay/kernel_svd.bit b/pynq/kernel_svd/overlay/kernel_svd.bit new file mode 100644 index 0000000..7860370 Binary files /dev/null and b/pynq/kernel_svd/overlay/kernel_svd.bit differ diff --git a/pynq/kernel_svd/overlay/kernel_svd.hwh b/pynq/kernel_svd/overlay/kernel_svd.hwh new file mode 100644 index 0000000..6397772 --- /dev/null +++ b/pynq/kernel_svd/overlay/kernel_svd.hwh @@ -0,0 +1,13430 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pynq/kernel_svd/overlay/kernel_svd.tcl b/pynq/kernel_svd/overlay/kernel_svd.tcl new file mode 100644 index 0000000..fbb7ef4 --- /dev/null +++ b/pynq/kernel_svd/overlay/kernel_svd.tcl @@ -0,0 +1,762 @@ + +################################################################ +# This is a generated script based on design: design_1 +# +# Though there are limitations about the generated script, +# the main purpose of this utility is to make learning +# IP Integrator Tcl commands easier. +################################################################ + +namespace eval _tcl { +proc get_script_folder {} { + set script_path [file normalize [info script]] + set script_folder [file dirname $script_path] + return $script_folder +} +} +variable script_folder +set script_folder [_tcl::get_script_folder] + +################################################################ +# Check if script is running in correct Vivado version. +################################################################ +set scripts_vivado_version 2020.2 +set current_vivado_version [version -short] + +if { [string first $scripts_vivado_version $current_vivado_version] == -1 } { + puts "" + catch {common::send_gid_msg -ssname BD::TCL -id 2041 -severity "ERROR" "This script was generated using Vivado <$scripts_vivado_version> and is being run in <$current_vivado_version> of Vivado. Please run the script in Vivado <$scripts_vivado_version> then open the design in Vivado <$current_vivado_version>. Upgrade the design by running \"Tools => Report => Report IP Status...\", then run write_bd_tcl to create an updated script."} + + return 1 +} + +################################################################ +# START +################################################################ + +# To test this script, run the following commands from Vivado Tcl console: +# source design_1_script.tcl + +# If there is no project opened, this script will create a +# project, but make sure you do not have an existing project +# <./myproj/project_1.xpr> in the current working folder. + +set list_projs [get_projects -quiet] +if { $list_projs eq "" } { + create_project project_1 myproj -part xc7z020clg484-1 + set_property BOARD_PART em.avnet.com:zed:part0:1.4 [current_project] +} + + +# CHANGE DESIGN NAME HERE +variable design_name +set design_name design_1 + +# If you do not already have an existing IP Integrator design open, +# you can create a design using the following command: +# create_bd_design $design_name + +# Creating design if needed +set errMsg "" +set nRet 0 + +set cur_design [current_bd_design -quiet] +set list_cells [get_bd_cells -quiet] + +if { ${design_name} eq "" } { + # USE CASES: + # 1) Design_name not set + + set errMsg "Please set the variable to a non-empty value." + set nRet 1 + +} elseif { ${cur_design} ne "" && ${list_cells} eq "" } { + # USE CASES: + # 2): Current design opened AND is empty AND names same. + # 3): Current design opened AND is empty AND names diff; design_name NOT in project. + # 4): Current design opened AND is empty AND names diff; design_name exists in project. + + if { $cur_design ne $design_name } { + common::send_gid_msg -ssname BD::TCL -id 2001 -severity "INFO" "Changing value of from <$design_name> to <$cur_design> since current design is empty." + set design_name [get_property NAME $cur_design] + } + common::send_gid_msg -ssname BD::TCL -id 2002 -severity "INFO" "Constructing design in IPI design <$cur_design>..." + +} elseif { ${cur_design} ne "" && $list_cells ne "" && $cur_design eq $design_name } { + # USE CASES: + # 5) Current design opened AND has components AND same names. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 1 +} elseif { [get_files -quiet ${design_name}.bd] ne "" } { + # USE CASES: + # 6) Current opened design, has components, but diff names, design_name exists in project. + # 7) No opened design, design_name exists in project. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 2 + +} else { + # USE CASES: + # 8) No opened design, design_name not in project. + # 9) Current opened design, has components, but diff names, design_name not in project. + + common::send_gid_msg -ssname BD::TCL -id 2003 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + common::send_gid_msg -ssname BD::TCL -id 2004 -severity "INFO" "Making design <$design_name> as current_bd_design." + current_bd_design $design_name + +} + +common::send_gid_msg -ssname BD::TCL -id 2005 -severity "INFO" "Currently the variable is equal to \"$design_name\"." + +if { $nRet != 0 } { + catch {common::send_gid_msg -ssname BD::TCL -id 2006 -severity "ERROR" $errMsg} + return $nRet +} + +set bCheckIPsPassed 1 +################################################################## +# CHECK IPs +################################################################## +set bCheckIPs 1 +if { $bCheckIPs == 1 } { + set list_check_ips "\ +xilinx.com:hls:HlsSvdKernel:1.0\ +xilinx.com:ip:processing_system7:5.5\ +xilinx.com:ip:proc_sys_reset:5.0\ +xilinx.com:ip:axi_dma:7.1\ +" + + set list_ips_missing "" + common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + +} + +if { $bCheckIPsPassed != 1 } { + common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 +} + +################################################################## +# DESIGN PROCs +################################################################## + + + +# Procedure to create entire design; Provide argument to make +# procedure reusable. If parentCell is "", will use root. +proc create_root_design { parentCell } { + + variable script_folder + variable design_name + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + + # Create interface ports + set DDR [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddrx_rtl:1.0 DDR ] + + set FIXED_IO [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_processing_system7:fixedio_rtl:1.0 FIXED_IO ] + + + # Create ports + + # Create instance: HlsSvdKernel_0, and set properties + set HlsSvdKernel_0 [ create_bd_cell -type ip -vlnv xilinx.com:hls:HlsSvdKernel:1.0 HlsSvdKernel_0 ] + + # Create instance: axi_mem_intercon, and set properties + set axi_mem_intercon [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {1} \ + ] $axi_mem_intercon + + # Create instance: axi_mem_intercon_1, and set properties + set axi_mem_intercon_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_1 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {2} \ + ] $axi_mem_intercon_1 + + # Create instance: axi_mem_intercon_2, and set properties + set axi_mem_intercon_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_2 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_2 + + # Create instance: axi_mem_intercon_3, and set properties + set axi_mem_intercon_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_3 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_3 + + # Create instance: processing_system7_0, and set properties + set processing_system7_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 processing_system7_0 ] + set_property -dict [ list \ + CONFIG.PCW_ACT_APU_PERIPHERAL_FREQMHZ {666.666687} \ + CONFIG.PCW_ACT_CAN_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_DCI_PERIPHERAL_FREQMHZ {10.158730} \ + CONFIG.PCW_ACT_ENET0_PERIPHERAL_FREQMHZ {125.000000} \ + CONFIG.PCW_ACT_ENET1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA0_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_ACT_FPGA1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA2_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA3_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_PCAP_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_QSPI_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_SDIO_PERIPHERAL_FREQMHZ {50.000000} \ + CONFIG.PCW_ACT_SMC_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_SPI_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_TPIU_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_TTC0_CLK0_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC0_CLK1_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC0_CLK2_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK0_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK1_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK2_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_UART_PERIPHERAL_FREQMHZ {50.000000} \ + CONFIG.PCW_ACT_WDT_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_APU_PERIPHERAL_FREQMHZ {666.666667} \ + CONFIG.PCW_ARMPLL_CTRL_FBDIV {40} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_FREQMHZ {100} \ + CONFIG.PCW_CLK0_FREQ {100000000} \ + CONFIG.PCW_CLK1_FREQ {10000000} \ + CONFIG.PCW_CLK2_FREQ {10000000} \ + CONFIG.PCW_CLK3_FREQ {10000000} \ + CONFIG.PCW_CPU_CPU_PLL_FREQMHZ {1333.333} \ + CONFIG.PCW_CPU_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR0 {15} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR1 {7} \ + CONFIG.PCW_DDRPLL_CTRL_FBDIV {32} \ + CONFIG.PCW_DDR_DDR_PLL_FREQMHZ {1066.667} \ + CONFIG.PCW_DDR_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_DDR_RAM_HIGHADDR {0x1FFFFFFF} \ + CONFIG.PCW_ENET0_ENET0_IO {MIO 16 .. 27} \ + CONFIG.PCW_ENET0_GRP_MDIO_ENABLE {1} \ + CONFIG.PCW_ENET0_GRP_MDIO_IO {MIO 52 .. 53} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR0 {8} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_FREQMHZ {1000 Mbps} \ + CONFIG.PCW_ENET0_RESET_ENABLE {0} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET1_RESET_ENABLE {0} \ + CONFIG.PCW_ENET_RESET_ENABLE {1} \ + CONFIG.PCW_ENET_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_EN_EMIO_TTC0 {1} \ + CONFIG.PCW_EN_ENET0 {1} \ + CONFIG.PCW_EN_GPIO {1} \ + CONFIG.PCW_EN_QSPI {1} \ + CONFIG.PCW_EN_SDIO0 {1} \ + CONFIG.PCW_EN_TTC0 {1} \ + CONFIG.PCW_EN_UART1 {1} \ + CONFIG.PCW_EN_USB0 {1} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR1 {2} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_FPGA1_PERIPHERAL_FREQMHZ {150.000000} \ + CONFIG.PCW_FPGA2_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_FPGA_FCLK0_ENABLE {1} \ + CONFIG.PCW_FPGA_FCLK1_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK2_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK3_ENABLE {0} \ + CONFIG.PCW_GPIO_MIO_GPIO_ENABLE {1} \ + CONFIG.PCW_GPIO_MIO_GPIO_IO {MIO} \ + CONFIG.PCW_I2C0_GRP_INT_ENABLE {0} \ + CONFIG.PCW_I2C0_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_I2C0_RESET_ENABLE {0} \ + CONFIG.PCW_I2C1_RESET_ENABLE {0} \ + CONFIG.PCW_I2C_PERIPHERAL_FREQMHZ {25} \ + CONFIG.PCW_I2C_RESET_ENABLE {1} \ + CONFIG.PCW_IOPLL_CTRL_FBDIV {30} \ + CONFIG.PCW_IO_IO_PLL_FREQMHZ {1000.000} \ + CONFIG.PCW_MIO_0_DIRECTION {inout} \ + CONFIG.PCW_MIO_0_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_0_PULLUP {disabled} \ + CONFIG.PCW_MIO_0_SLEW {slow} \ + CONFIG.PCW_MIO_10_DIRECTION {inout} \ + CONFIG.PCW_MIO_10_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_10_PULLUP {disabled} \ + CONFIG.PCW_MIO_10_SLEW {slow} \ + CONFIG.PCW_MIO_11_DIRECTION {inout} \ + CONFIG.PCW_MIO_11_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_11_PULLUP {disabled} \ + CONFIG.PCW_MIO_11_SLEW {slow} \ + CONFIG.PCW_MIO_12_DIRECTION {inout} \ + CONFIG.PCW_MIO_12_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_12_PULLUP {disabled} \ + CONFIG.PCW_MIO_12_SLEW {slow} \ + CONFIG.PCW_MIO_13_DIRECTION {inout} \ + CONFIG.PCW_MIO_13_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_13_PULLUP {disabled} \ + CONFIG.PCW_MIO_13_SLEW {slow} \ + CONFIG.PCW_MIO_14_DIRECTION {inout} \ + CONFIG.PCW_MIO_14_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_14_PULLUP {disabled} \ + CONFIG.PCW_MIO_14_SLEW {slow} \ + CONFIG.PCW_MIO_15_DIRECTION {inout} \ + CONFIG.PCW_MIO_15_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_15_PULLUP {disabled} \ + CONFIG.PCW_MIO_15_SLEW {slow} \ + CONFIG.PCW_MIO_16_DIRECTION {out} \ + CONFIG.PCW_MIO_16_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_16_PULLUP {disabled} \ + CONFIG.PCW_MIO_16_SLEW {fast} \ + CONFIG.PCW_MIO_17_DIRECTION {out} \ + CONFIG.PCW_MIO_17_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_17_PULLUP {disabled} \ + CONFIG.PCW_MIO_17_SLEW {fast} \ + CONFIG.PCW_MIO_18_DIRECTION {out} \ + CONFIG.PCW_MIO_18_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_18_PULLUP {disabled} \ + CONFIG.PCW_MIO_18_SLEW {fast} \ + CONFIG.PCW_MIO_19_DIRECTION {out} \ + CONFIG.PCW_MIO_19_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_19_PULLUP {disabled} \ + CONFIG.PCW_MIO_19_SLEW {fast} \ + CONFIG.PCW_MIO_1_DIRECTION {out} \ + CONFIG.PCW_MIO_1_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_1_PULLUP {disabled} \ + CONFIG.PCW_MIO_1_SLEW {fast} \ + CONFIG.PCW_MIO_20_DIRECTION {out} \ + CONFIG.PCW_MIO_20_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_20_PULLUP {disabled} \ + CONFIG.PCW_MIO_20_SLEW {fast} \ + CONFIG.PCW_MIO_21_DIRECTION {out} \ + CONFIG.PCW_MIO_21_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_21_PULLUP {disabled} \ + CONFIG.PCW_MIO_21_SLEW {fast} \ + CONFIG.PCW_MIO_22_DIRECTION {in} \ + CONFIG.PCW_MIO_22_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_22_PULLUP {disabled} \ + CONFIG.PCW_MIO_22_SLEW {fast} \ + CONFIG.PCW_MIO_23_DIRECTION {in} \ + CONFIG.PCW_MIO_23_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_23_PULLUP {disabled} \ + CONFIG.PCW_MIO_23_SLEW {fast} \ + CONFIG.PCW_MIO_24_DIRECTION {in} \ + CONFIG.PCW_MIO_24_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_24_PULLUP {disabled} \ + CONFIG.PCW_MIO_24_SLEW {fast} \ + CONFIG.PCW_MIO_25_DIRECTION {in} \ + CONFIG.PCW_MIO_25_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_25_PULLUP {disabled} \ + CONFIG.PCW_MIO_25_SLEW {fast} \ + CONFIG.PCW_MIO_26_DIRECTION {in} \ + CONFIG.PCW_MIO_26_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_26_PULLUP {disabled} \ + CONFIG.PCW_MIO_26_SLEW {fast} \ + CONFIG.PCW_MIO_27_DIRECTION {in} \ + CONFIG.PCW_MIO_27_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_27_PULLUP {disabled} \ + CONFIG.PCW_MIO_27_SLEW {fast} \ + CONFIG.PCW_MIO_28_DIRECTION {inout} \ + CONFIG.PCW_MIO_28_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_28_PULLUP {disabled} \ + CONFIG.PCW_MIO_28_SLEW {fast} \ + CONFIG.PCW_MIO_29_DIRECTION {in} \ + CONFIG.PCW_MIO_29_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_29_PULLUP {disabled} \ + CONFIG.PCW_MIO_29_SLEW {fast} \ + CONFIG.PCW_MIO_2_DIRECTION {inout} \ + CONFIG.PCW_MIO_2_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_2_PULLUP {disabled} \ + CONFIG.PCW_MIO_2_SLEW {fast} \ + CONFIG.PCW_MIO_30_DIRECTION {out} \ + CONFIG.PCW_MIO_30_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_30_PULLUP {disabled} \ + CONFIG.PCW_MIO_30_SLEW {fast} \ + CONFIG.PCW_MIO_31_DIRECTION {in} \ + CONFIG.PCW_MIO_31_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_31_PULLUP {disabled} \ + CONFIG.PCW_MIO_31_SLEW {fast} \ + CONFIG.PCW_MIO_32_DIRECTION {inout} \ + CONFIG.PCW_MIO_32_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_32_PULLUP {disabled} \ + CONFIG.PCW_MIO_32_SLEW {fast} \ + CONFIG.PCW_MIO_33_DIRECTION {inout} \ + CONFIG.PCW_MIO_33_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_33_PULLUP {disabled} \ + CONFIG.PCW_MIO_33_SLEW {fast} \ + CONFIG.PCW_MIO_34_DIRECTION {inout} \ + CONFIG.PCW_MIO_34_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_34_PULLUP {disabled} \ + CONFIG.PCW_MIO_34_SLEW {fast} \ + CONFIG.PCW_MIO_35_DIRECTION {inout} \ + CONFIG.PCW_MIO_35_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_35_PULLUP {disabled} \ + CONFIG.PCW_MIO_35_SLEW {fast} \ + CONFIG.PCW_MIO_36_DIRECTION {in} \ + CONFIG.PCW_MIO_36_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_36_PULLUP {disabled} \ + CONFIG.PCW_MIO_36_SLEW {fast} \ + CONFIG.PCW_MIO_37_DIRECTION {inout} \ + CONFIG.PCW_MIO_37_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_37_PULLUP {disabled} \ + CONFIG.PCW_MIO_37_SLEW {fast} \ + CONFIG.PCW_MIO_38_DIRECTION {inout} \ + CONFIG.PCW_MIO_38_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_38_PULLUP {disabled} \ + CONFIG.PCW_MIO_38_SLEW {fast} \ + CONFIG.PCW_MIO_39_DIRECTION {inout} \ + CONFIG.PCW_MIO_39_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_39_PULLUP {disabled} \ + CONFIG.PCW_MIO_39_SLEW {fast} \ + CONFIG.PCW_MIO_3_DIRECTION {inout} \ + CONFIG.PCW_MIO_3_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_3_PULLUP {disabled} \ + CONFIG.PCW_MIO_3_SLEW {fast} \ + CONFIG.PCW_MIO_40_DIRECTION {inout} \ + CONFIG.PCW_MIO_40_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_40_PULLUP {disabled} \ + CONFIG.PCW_MIO_40_SLEW {fast} \ + CONFIG.PCW_MIO_41_DIRECTION {inout} \ + CONFIG.PCW_MIO_41_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_41_PULLUP {disabled} \ + CONFIG.PCW_MIO_41_SLEW {fast} \ + CONFIG.PCW_MIO_42_DIRECTION {inout} \ + CONFIG.PCW_MIO_42_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_42_PULLUP {disabled} \ + CONFIG.PCW_MIO_42_SLEW {fast} \ + CONFIG.PCW_MIO_43_DIRECTION {inout} \ + CONFIG.PCW_MIO_43_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_43_PULLUP {disabled} \ + CONFIG.PCW_MIO_43_SLEW {fast} \ + CONFIG.PCW_MIO_44_DIRECTION {inout} \ + CONFIG.PCW_MIO_44_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_44_PULLUP {disabled} \ + CONFIG.PCW_MIO_44_SLEW {fast} \ + CONFIG.PCW_MIO_45_DIRECTION {inout} \ + CONFIG.PCW_MIO_45_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_45_PULLUP {disabled} \ + CONFIG.PCW_MIO_45_SLEW {fast} \ + CONFIG.PCW_MIO_46_DIRECTION {in} \ + CONFIG.PCW_MIO_46_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_46_PULLUP {disabled} \ + CONFIG.PCW_MIO_46_SLEW {slow} \ + CONFIG.PCW_MIO_47_DIRECTION {in} \ + CONFIG.PCW_MIO_47_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_47_PULLUP {disabled} \ + CONFIG.PCW_MIO_47_SLEW {slow} \ + CONFIG.PCW_MIO_48_DIRECTION {out} \ + CONFIG.PCW_MIO_48_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_48_PULLUP {disabled} \ + CONFIG.PCW_MIO_48_SLEW {slow} \ + CONFIG.PCW_MIO_49_DIRECTION {in} \ + CONFIG.PCW_MIO_49_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_49_PULLUP {disabled} \ + CONFIG.PCW_MIO_49_SLEW {slow} \ + CONFIG.PCW_MIO_4_DIRECTION {inout} \ + CONFIG.PCW_MIO_4_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_4_PULLUP {disabled} \ + CONFIG.PCW_MIO_4_SLEW {fast} \ + CONFIG.PCW_MIO_50_DIRECTION {inout} \ + CONFIG.PCW_MIO_50_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_50_PULLUP {disabled} \ + CONFIG.PCW_MIO_50_SLEW {slow} \ + CONFIG.PCW_MIO_51_DIRECTION {inout} \ + CONFIG.PCW_MIO_51_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_51_PULLUP {disabled} \ + CONFIG.PCW_MIO_51_SLEW {slow} \ + CONFIG.PCW_MIO_52_DIRECTION {out} \ + CONFIG.PCW_MIO_52_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_52_PULLUP {disabled} \ + CONFIG.PCW_MIO_52_SLEW {slow} \ + CONFIG.PCW_MIO_53_DIRECTION {inout} \ + CONFIG.PCW_MIO_53_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_53_PULLUP {disabled} \ + CONFIG.PCW_MIO_53_SLEW {slow} \ + CONFIG.PCW_MIO_5_DIRECTION {inout} \ + CONFIG.PCW_MIO_5_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_5_PULLUP {disabled} \ + CONFIG.PCW_MIO_5_SLEW {fast} \ + CONFIG.PCW_MIO_6_DIRECTION {out} \ + CONFIG.PCW_MIO_6_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_6_PULLUP {disabled} \ + CONFIG.PCW_MIO_6_SLEW {fast} \ + CONFIG.PCW_MIO_7_DIRECTION {out} \ + CONFIG.PCW_MIO_7_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_7_PULLUP {disabled} \ + CONFIG.PCW_MIO_7_SLEW {slow} \ + CONFIG.PCW_MIO_8_DIRECTION {out} \ + CONFIG.PCW_MIO_8_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_8_PULLUP {disabled} \ + CONFIG.PCW_MIO_8_SLEW {fast} \ + CONFIG.PCW_MIO_9_DIRECTION {inout} \ + CONFIG.PCW_MIO_9_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_9_PULLUP {disabled} \ + CONFIG.PCW_MIO_9_SLEW {slow} \ + CONFIG.PCW_MIO_TREE_PERIPHERALS {GPIO#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#UART 1#UART 1#GPIO#GPIO#Enet 0#Enet 0} \ + CONFIG.PCW_MIO_TREE_SIGNALS {gpio[0]#qspi0_ss_b#qspi0_io[0]#qspi0_io[1]#qspi0_io[2]#qspi0_io[3]/HOLD_B#qspi0_sclk#gpio[7]#gpio[8]#gpio[9]#gpio[10]#gpio[11]#gpio[12]#gpio[13]#gpio[14]#gpio[15]#tx_clk#txd[0]#txd[1]#txd[2]#txd[3]#tx_ctl#rx_clk#rxd[0]#rxd[1]#rxd[2]#rxd[3]#rx_ctl#data[4]#dir#stp#nxt#data[0]#data[1]#data[2]#data[3]#clk#data[5]#data[6]#data[7]#clk#cmd#data[0]#data[1]#data[2]#data[3]#wp#cd#tx#rx#gpio[50]#gpio[51]#mdc#mdio} \ + CONFIG.PCW_NAND_GRP_D8_ENABLE {0} \ + CONFIG.PCW_NAND_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_A25_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_INT_ENABLE {0} \ + CONFIG.PCW_NOR_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_PCAP_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_PJTAG_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_PRESET_BANK0_VOLTAGE {LVCMOS 3.3V} \ + CONFIG.PCW_PRESET_BANK1_VOLTAGE {LVCMOS 1.8V} \ + CONFIG.PCW_QSPI_GRP_FBCLK_ENABLE {0} \ + CONFIG.PCW_QSPI_GRP_IO1_ENABLE {0} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_ENABLE {1} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_IO {MIO 1 .. 6} \ + CONFIG.PCW_QSPI_GRP_SS1_ENABLE {0} \ + CONFIG.PCW_QSPI_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_QSPI_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_QSPI_PERIPHERAL_FREQMHZ {200} \ + CONFIG.PCW_QSPI_QSPI_IO {MIO 1 .. 6} \ + CONFIG.PCW_SD0_GRP_CD_ENABLE {1} \ + CONFIG.PCW_SD0_GRP_CD_IO {MIO 47} \ + CONFIG.PCW_SD0_GRP_POW_ENABLE {0} \ + CONFIG.PCW_SD0_GRP_WP_ENABLE {1} \ + CONFIG.PCW_SD0_GRP_WP_IO {MIO 46} \ + CONFIG.PCW_SD0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_SD0_SD0_IO {MIO 40 .. 45} \ + CONFIG.PCW_SDIO_PERIPHERAL_DIVISOR0 {20} \ + CONFIG.PCW_SDIO_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_SDIO_PERIPHERAL_VALID {1} \ + CONFIG.PCW_SINGLE_QSPI_DATA_MODE {x4} \ + CONFIG.PCW_SMC_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_SPI_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_S_AXI_HP2_DATA_WIDTH {64} \ + CONFIG.PCW_TPIU_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC0_CLK0_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK1_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK2_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_TTC0_TTC0_IO {EMIO} \ + CONFIG.PCW_TTC_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_UART1_GRP_FULL_ENABLE {0} \ + CONFIG.PCW_UART1_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_UART1_UART1_IO {MIO 48 .. 49} \ + CONFIG.PCW_UART_PERIPHERAL_DIVISOR0 {20} \ + CONFIG.PCW_UART_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_UART_PERIPHERAL_VALID {1} \ + CONFIG.PCW_UIPARAM_ACT_DDR_FREQ_MHZ {533.333374} \ + CONFIG.PCW_UIPARAM_DDR_BANK_ADDR_COUNT {3} \ + CONFIG.PCW_UIPARAM_DDR_BL {8} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY0 {0.41} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY1 {0.411} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY2 {0.341} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY3 {0.358} \ + CONFIG.PCW_UIPARAM_DDR_CL {7} \ + CONFIG.PCW_UIPARAM_DDR_COL_ADDR_COUNT {10} \ + CONFIG.PCW_UIPARAM_DDR_CWL {6} \ + CONFIG.PCW_UIPARAM_DDR_DEVICE_CAPACITY {2048 MBits} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_0 {0.025} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_1 {0.028} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_2 {0.001} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_3 {0.001} \ + CONFIG.PCW_UIPARAM_DDR_DRAM_WIDTH {16 Bits} \ + CONFIG.PCW_UIPARAM_DDR_FREQ_MHZ {533.333313} \ + CONFIG.PCW_UIPARAM_DDR_MEMORY_TYPE {DDR 3} \ + CONFIG.PCW_UIPARAM_DDR_PARTNO {MT41J128M16 HA-15E} \ + CONFIG.PCW_UIPARAM_DDR_ROW_ADDR_COUNT {14} \ + CONFIG.PCW_UIPARAM_DDR_SPEED_BIN {DDR3_1066F} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_DATA_EYE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_READ_GATE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_WRITE_LEVEL {1} \ + CONFIG.PCW_UIPARAM_DDR_T_FAW {45.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RAS_MIN {36.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RC {49.5} \ + CONFIG.PCW_UIPARAM_DDR_T_RCD {7} \ + CONFIG.PCW_UIPARAM_DDR_T_RP {7} \ + CONFIG.PCW_UIPARAM_DDR_USE_INTERNAL_VREF {1} \ + CONFIG.PCW_USB0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_USB0_PERIPHERAL_FREQMHZ {60} \ + CONFIG.PCW_USB0_RESET_ENABLE {0} \ + CONFIG.PCW_USB0_USB0_IO {MIO 28 .. 39} \ + CONFIG.PCW_USB1_RESET_ENABLE {0} \ + CONFIG.PCW_USB_RESET_ENABLE {1} \ + CONFIG.PCW_USB_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_USE_S_AXI_HP0 {1} \ + CONFIG.PCW_USE_S_AXI_HP1 {1} \ + CONFIG.PCW_USE_S_AXI_HP2 {1} \ + CONFIG.PCW_USE_S_AXI_HP3 {1} \ + CONFIG.preset {ZedBoard} \ + ] $processing_system7_0 + + # Create instance: ps7_0_axi_periph, and set properties + set ps7_0_axi_periph [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 ps7_0_axi_periph ] + set_property -dict [ list \ + CONFIG.NUM_MI {7} \ + ] $ps7_0_axi_periph + + # Create instance: rst_ps7_0_100M, and set properties + set rst_ps7_0_100M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ps7_0_100M ] + + # Create instance: s_dma, and set properties + set s_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 s_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $s_dma + + # Create instance: u_dma, and set properties + set u_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 u_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $u_dma + + # Create instance: v_dma, and set properties + set v_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 v_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $v_dma + + # Create instance: x_dma, and set properties + set x_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 x_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $x_dma + + # Create instance: y_dma, and set properties + set y_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 y_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s {0} \ + CONFIG.c_include_s2mm_dre {1} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_micro_dma {0} \ + CONFIG.c_s2mm_burst_size {128} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $y_dma + + # Create interface connections + connect_bd_intf_net -intf_net HlsSvdKernel_0_y_port [get_bd_intf_pins HlsSvdKernel_0/y_port] [get_bd_intf_pins y_dma/S_AXIS_S2MM] + connect_bd_intf_net -intf_net axi_mem_intercon_1_M00_AXI [get_bd_intf_pins axi_mem_intercon_1/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP1] + connect_bd_intf_net -intf_net axi_mem_intercon_2_M00_AXI [get_bd_intf_pins axi_mem_intercon_2/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP2] + connect_bd_intf_net -intf_net axi_mem_intercon_3_M00_AXI [get_bd_intf_pins axi_mem_intercon_3/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP3] + connect_bd_intf_net -intf_net axi_mem_intercon_M00_AXI [get_bd_intf_pins axi_mem_intercon/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP0] + connect_bd_intf_net -intf_net processing_system7_0_DDR [get_bd_intf_ports DDR] [get_bd_intf_pins processing_system7_0/DDR] + connect_bd_intf_net -intf_net processing_system7_0_FIXED_IO [get_bd_intf_ports FIXED_IO] [get_bd_intf_pins processing_system7_0/FIXED_IO] + connect_bd_intf_net -intf_net processing_system7_0_M_AXI_GP0 [get_bd_intf_pins processing_system7_0/M_AXI_GP0] [get_bd_intf_pins ps7_0_axi_periph/S00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M00_AXI [get_bd_intf_pins HlsSvdKernel_0/s_axi_control] [get_bd_intf_pins ps7_0_axi_periph/M00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M01_AXI [get_bd_intf_pins ps7_0_axi_periph/M01_AXI] [get_bd_intf_pins v_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M02_AXI [get_bd_intf_pins ps7_0_axi_periph/M02_AXI] [get_bd_intf_pins x_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M03_AXI [get_bd_intf_pins ps7_0_axi_periph/M03_AXI] [get_bd_intf_pins y_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M05_AXI [get_bd_intf_pins ps7_0_axi_periph/M05_AXI] [get_bd_intf_pins s_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M06_AXI [get_bd_intf_pins ps7_0_axi_periph/M06_AXI] [get_bd_intf_pins u_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net s_dma_M_AXIS_MM2S [get_bd_intf_pins HlsSvdKernel_0/s_port] [get_bd_intf_pins s_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net s_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon_1/S01_AXI] [get_bd_intf_pins s_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net u_dma_M_AXIS_MM2S [get_bd_intf_pins HlsSvdKernel_0/u_port] [get_bd_intf_pins u_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net u_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon_1/S00_AXI] [get_bd_intf_pins v_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net u_dma_M_AXI_MM2S1 [get_bd_intf_pins axi_mem_intercon_3/S00_AXI] [get_bd_intf_pins u_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net v_dma_M_AXIS_MM2S [get_bd_intf_pins HlsSvdKernel_0/v_port] [get_bd_intf_pins v_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net x_dma_M_AXIS_MM2S [get_bd_intf_pins HlsSvdKernel_0/x_port] [get_bd_intf_pins x_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net x_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon/S00_AXI] [get_bd_intf_pins x_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net xu_dma_M_AXI_S2MM [get_bd_intf_pins axi_mem_intercon_2/S00_AXI] [get_bd_intf_pins y_dma/M_AXI_S2MM] + + # Create port connections + connect_bd_net -net processing_system7_0_FCLK_CLK0 [get_bd_pins HlsSvdKernel_0/ap_clk] [get_bd_pins axi_mem_intercon/ACLK] [get_bd_pins axi_mem_intercon/M00_ACLK] [get_bd_pins axi_mem_intercon/S00_ACLK] [get_bd_pins axi_mem_intercon_1/ACLK] [get_bd_pins axi_mem_intercon_1/M00_ACLK] [get_bd_pins axi_mem_intercon_1/S00_ACLK] [get_bd_pins axi_mem_intercon_1/S01_ACLK] [get_bd_pins axi_mem_intercon_2/ACLK] [get_bd_pins axi_mem_intercon_2/M00_ACLK] [get_bd_pins axi_mem_intercon_2/S00_ACLK] [get_bd_pins axi_mem_intercon_3/ACLK] [get_bd_pins axi_mem_intercon_3/M00_ACLK] [get_bd_pins axi_mem_intercon_3/S00_ACLK] [get_bd_pins processing_system7_0/FCLK_CLK0] [get_bd_pins processing_system7_0/M_AXI_GP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP1_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP2_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP3_ACLK] [get_bd_pins ps7_0_axi_periph/ACLK] [get_bd_pins ps7_0_axi_periph/M00_ACLK] [get_bd_pins ps7_0_axi_periph/M01_ACLK] [get_bd_pins ps7_0_axi_periph/M02_ACLK] [get_bd_pins ps7_0_axi_periph/M03_ACLK] [get_bd_pins ps7_0_axi_periph/M04_ACLK] [get_bd_pins ps7_0_axi_periph/M05_ACLK] [get_bd_pins ps7_0_axi_periph/M06_ACLK] [get_bd_pins ps7_0_axi_periph/S00_ACLK] [get_bd_pins rst_ps7_0_100M/slowest_sync_clk] [get_bd_pins s_dma/m_axi_mm2s_aclk] [get_bd_pins s_dma/s_axi_lite_aclk] [get_bd_pins u_dma/m_axi_mm2s_aclk] [get_bd_pins u_dma/s_axi_lite_aclk] [get_bd_pins v_dma/m_axi_mm2s_aclk] [get_bd_pins v_dma/s_axi_lite_aclk] [get_bd_pins x_dma/m_axi_mm2s_aclk] [get_bd_pins x_dma/s_axi_lite_aclk] [get_bd_pins y_dma/m_axi_s2mm_aclk] [get_bd_pins y_dma/s_axi_lite_aclk] + connect_bd_net -net processing_system7_0_FCLK_RESET0_N [get_bd_pins processing_system7_0/FCLK_RESET0_N] [get_bd_pins rst_ps7_0_100M/ext_reset_in] + connect_bd_net -net rst_ps7_0_100M_peripheral_aresetn [get_bd_pins HlsSvdKernel_0/ap_rst_n] [get_bd_pins axi_mem_intercon/ARESETN] [get_bd_pins axi_mem_intercon/M00_ARESETN] [get_bd_pins axi_mem_intercon/S00_ARESETN] [get_bd_pins axi_mem_intercon_1/ARESETN] [get_bd_pins axi_mem_intercon_1/M00_ARESETN] [get_bd_pins axi_mem_intercon_1/S00_ARESETN] [get_bd_pins axi_mem_intercon_1/S01_ARESETN] [get_bd_pins axi_mem_intercon_2/ARESETN] [get_bd_pins axi_mem_intercon_2/M00_ARESETN] [get_bd_pins axi_mem_intercon_2/S00_ARESETN] [get_bd_pins axi_mem_intercon_3/ARESETN] [get_bd_pins axi_mem_intercon_3/M00_ARESETN] [get_bd_pins axi_mem_intercon_3/S00_ARESETN] [get_bd_pins ps7_0_axi_periph/ARESETN] [get_bd_pins ps7_0_axi_periph/M00_ARESETN] [get_bd_pins ps7_0_axi_periph/M01_ARESETN] [get_bd_pins ps7_0_axi_periph/M02_ARESETN] [get_bd_pins ps7_0_axi_periph/M03_ARESETN] [get_bd_pins ps7_0_axi_periph/M04_ARESETN] [get_bd_pins ps7_0_axi_periph/M05_ARESETN] [get_bd_pins ps7_0_axi_periph/M06_ARESETN] [get_bd_pins ps7_0_axi_periph/S00_ARESETN] [get_bd_pins rst_ps7_0_100M/peripheral_aresetn] [get_bd_pins s_dma/axi_resetn] [get_bd_pins u_dma/axi_resetn] [get_bd_pins v_dma/axi_resetn] [get_bd_pins x_dma/axi_resetn] [get_bd_pins y_dma/axi_resetn] + + # Create address segments + assign_bd_address -offset 0x40000000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs HlsSvdKernel_0/s_axi_control/Reg] -force + assign_bd_address -offset 0x41E40000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs s_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E00000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs v_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E50000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs u_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E10000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs x_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E20000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs y_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces s_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP1/HP1_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces u_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP3/HP3_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces v_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP1/HP1_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces x_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP0/HP0_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces y_dma/Data_S2MM] [get_bd_addr_segs processing_system7_0/S_AXI_HP2/HP2_DDR_LOWOCM] -force + + + # Restore current instance + current_bd_instance $oldCurInst + + validate_bd_design + save_bd_design +} +# End of create_root_design() + + +################################################################## +# MAIN FLOW +################################################################## + +create_root_design "" + + diff --git a/pynq/kernel_u/kernel_u.ipynb b/pynq/kernel_u/kernel_u.ipynb new file mode 100644 index 0000000..51e133c --- /dev/null +++ b/pynq/kernel_u/kernel_u.ipynb @@ -0,0 +1,558 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing Kernel-U\n", + "\n", + "This notebook will test an IP written in Vivado HLS." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%pybind11/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pynq import Overlay\n", + "import pynq.lib.dma\n", + "from pynq import allocate\n", + "import numpy as np\n", + "from pynq import DefaultIP\n", + "import timeit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Program FPGA and inspect Overlay." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "overlay = Overlay(\"overlay/kernel_u.bit\")\n", + "overlay?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the kernel register map." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RegisterMap {\n", + " CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0),\n", + " GIER = Register(Enable=0, RESERVED=0),\n", + " IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),\n", + " IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),\n", + " num_refinements = Register(num_refinements=0)\n", + "}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel = overlay.HlsAxisKernelU_0\n", + "kernel.register_map\n", + "# print(\"stream size: \", adder.stream_size)\n", + "# accel_state = adder.get_state()\n", + "# print(\"accelerator state: \", accel_state)\n", + "# dma = overlay.axi_dma_0\n", + "# dma.register_map.MM2S_DMASR\n", + "# dma.register_map.S2MM_DMACR" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kernel IP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The kernel IP can be automatically bound by first creating our Kernel class. Then, the overlay can be instantiated again." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class KernelDriver(DefaultIP):\n", + " def __init__(self, description):\n", + " super().__init__(description=description)\n", + " \n", + " bindto = ['xilinx.com:hls:HlsAxisKernelU:1.0']\n", + "\n", + " def start_accel(self):\n", + " self.register_map.CTRL.AP_START = 1\n", + " self.write(0x0, 1)\n", + " self.write(0x0, 1)\n", + " while(self.read(0x0) % 2 == 0):\n", + " self.write(0x0, 1)\n", + " pass # Wait until start, i.e. bit 0, is set.\n", + "\n", + " def set_state(self, state):\n", + " # self.register_map.CTRL = state\n", + " # return self.register_map.CTRL\n", + " self.write(0x0, state)\n", + " return self.read(0x0)\n", + "\n", + " def get_state(self):\n", + " return self.register_map.CTRL\n", + " # return self.read(0x0)\n", + "\n", + " @property\n", + " def num_refinements(self):\n", + " return self.register_map.num_refinements\n", + " # return self.read(0x10)\n", + "\n", + " @num_refinements.setter\n", + " def num_refinements(self, R):\n", + " # self.register_map.num_refinements = R\n", + " self.write(0x10, R)\n", + "\n", + "overlay = Overlay(\"overlay/kernel_u.bit\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check again the kernel:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel_u = overlay.HlsAxisKernelU_0\n", + "kernel_u.get_state()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel_u.read(0x10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To show the class is working, we setup the `num_refinements` using the setter method. We then read its corresponding register." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel_u.num_refinements = 1\n", + "kernel_u.read(0x10)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x4\n", + "0x4\n" + ] + }, + { + "data": { + "text/plain": [ + "Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(kernel_u.get_state())\n", + "# kernel_u.start_accel()\n", + "print(kernel_u.get_state())\n", + "kernel_u.get_state()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Allocation and Run\n", + "\n", + "The data structures must be contiguosly allocated." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Buffers setup completed.\n", + "x_buffer.shape: (2, 512) - Bytes: 2048\n", + "u_buffer.shape: (128, 128, 4, 4) - Bytes: 524288\n", + "xu_buffer.shape: (128, 4, 2) - Bytes: 2048\n" + ] + } + ], + "source": [ + "# The following parameters are fixed in hardware and cannot be changed:\n", + "# - The number of inputs N\n", + "# - The input size I\n", + "# - The number of gates G\n", + "# - The tile size Tu\n", + "I = 512\n", + "G = 4\n", + "N = 2\n", + "Tu = 4\n", + "data_t = np.int16\n", + "# The number of refinements R can instead be adjusted.\n", + "R = 128\n", + "\n", + "x_buffer = pynq.allocate(shape=(N, I,), dtype=data_t)\n", + "u_buffer = pynq.allocate(shape=(R, I // Tu, G, Tu), dtype=data_t)\n", + "xu_buffer = pynq.allocate(shape=(R, G, N,), dtype=data_t)\n", + "\n", + "for i in range(N):\n", + " for j in range(I):\n", + " # for ii in range(R):\n", + " x_buffer[i, j] = data_t(np.random.uniform(low=-2**15, high=2**15))\n", + "\n", + "for i in range(R):\n", + " for j in range(I // Tu):\n", + " for k in range(G):\n", + " for ii in range(Tu):\n", + " u_buffer[i, j, k, ii] = data_t(np.random.uniform(low=-2**15, high=2**15))\n", + "\n", + "for i in range(R):\n", + " for j in range(G):\n", + " for k in range(N):\n", + " xu_buffer[i, j, k] = 0\n", + "\n", + "print('Buffers setup completed.')\n", + "print(f'x_buffer.shape: {x_buffer.shape} - Bytes: {x_buffer.nbytes}')\n", + "print(f'u_buffer.shape: {u_buffer.shape} - Bytes: {u_buffer.nbytes}')\n", + "print(f'xu_buffer.shape: {xu_buffer.shape} - Bytes: {xu_buffer.nbytes}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Setup the kernel and then send the data through the DMAs." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x4\n", + "0x1\n", + "Starting transfer:\n", + "Wait x...DONE.\n", + "Wait u...DONE.\n", + "Wait xu...DONE.\n", + "\n", + "xu_buffer.shape: (128, 4, 2)\n" + ] + } + ], + "source": [ + "kernel_u.num_refinements = R\n", + "print(kernel_u.get_state())\n", + "kernel_u.start_accel()\n", + "print(kernel_u.get_state())\n", + "\n", + "# Transfer\n", + "print('Starting transfer:')\n", + "overlay.x_dma.sendchannel.transfer(x_buffer)\n", + "overlay.u_dma.sendchannel.transfer(u_buffer)\n", + "overlay.xu_dma.recvchannel.transfer(xu_buffer)\n", + "# Then wait\n", + "print('Wait x...', end='')\n", + "overlay.x_dma.sendchannel.wait()\n", + "print('DONE.\\nWait u...', end='')\n", + "overlay.u_dma.sendchannel.wait()\n", + "print('DONE.\\nWait xu...', end='')\n", + "overlay.xu_dma.recvchannel.wait()\n", + "print('DONE.\\n')\n", + "\n", + "print(f'xu_buffer.shape: {xu_buffer.shape}')\n", + "# print(f'xu_buffer: {xu_buffer}')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def run_kernel(R, x_buffer, u_buffer, xu_buffer):\n", + " kernel_u.num_refinements = R\n", + " kernel_u.start_accel()\n", + " # Transfer\n", + " overlay.x_dma.sendchannel.transfer(x_buffer)\n", + " overlay.u_dma.sendchannel.transfer(u_buffer)\n", + " overlay.xu_dma.recvchannel.transfer(xu_buffer)\n", + " # Then wait\n", + " overlay.x_dma.sendchannel.wait()\n", + " overlay.u_dma.sendchannel.wait()\n", + " overlay.xu_dma.recvchannel.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 loops, best of 3: 148 ms per loop\n" + ] + } + ], + "source": [ + "%timeit run_kernel(R, x_buffer, u_buffer, xu_buffer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checking Correctness\n", + "\n", + "We first find the proper reshape mechanisms:" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-1.25823639 1.03248304 -0.3389279 -0.26103506] [-1.25823639 1.03248304 -0.3389279 -0.26103506]\n", + "0.0\n", + "[ 0.38526848 -0.34712276 -0.39317614 0.77762274] [ 0.38526848 -0.34712276 -0.39317614 0.77762274]\n", + "0.0\n", + "(128, 4, 2)\n" + ] + } + ], + "source": [ + "# =============================================================================\n", + "# Reshape: (R, I, G) => (R, I // Tu, G, Tu)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I, G)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp.reshape(R, I // Tu, Tu, G), (0, 1, 3, 2))\n", + "print(u[0, 0:4, 0], u_tmp[0, 0, 0, 0:4])\n", + "print(u[0, 3, 0] - u_tmp[0, 0, 0, 3])\n", + "\n", + "# =============================================================================\n", + "# Reshape: (R, I // Tu, G, Tu) => (I, G, R)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I // Tu, G, Tu)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp, (1, 3, 2, 0)).reshape(I, G, R)\n", + "print(u[0, 0, 0, 0:4], u_tmp[0:4, 0, 0])\n", + "print(u[0, 0, 0, 3] - u_tmp[3, 0, 0])\n", + "\n", + "x = np.random.randn(N, I)\n", + "u = np.random.randn(I, G, R)\n", + "x = (x * 2).astype(np.int16)\n", + "u = (u * 2).astype(np.int16)\n", + "\n", + "xu = np.transpose(np.tensordot(x, u, axes=1), (2, 1, 0))\n", + "print(xu.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now check the Numpy computation against the FPGA result." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "All equal: True\n", + "gold[0]: [[ -3634 -22667]\n", + " [ 31065 15347]\n", + " [ 22140 -9595]\n", + " [ 9106 26136]]\n", + "fpga[0]: [[ -3634 -22667]\n", + " [ 31065 15347]\n", + " [ 22140 -9595]\n", + " [ 9106 26136]]\n" + ] + } + ], + "source": [ + "u_tmp = np.transpose(u_buffer, (1, 3, 2, 0)).reshape(I, G, R)\n", + "xu_gold = np.transpose(np.tensordot(x_buffer, u_tmp, axes=1), (2, 1, 0))\n", + "print('\\nAll equal:', np.allclose(xu_buffer, xu_gold))\n", + "print('gold[0]: ', xu_gold[0])\n", + "print('fpga[0]: ', xu_buffer[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pynq/kernel_u/kernel_u_hier.ipynb b/pynq/kernel_u/kernel_u_hier.ipynb new file mode 100644 index 0000000..78cd282 --- /dev/null +++ b/pynq/kernel_u/kernel_u_hier.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing an IP that adds 1 to a stream\n", + "\n", + "This notebook will test an IP written in Vivado HLS. The IP adds +1 to a buffer. The HP ports **must** be configured at 64bit, not 32bit." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from pynq import Overlay\n", + "import pynq.lib.dma\n", + "from pynq import allocate\n", + "import numpy as np\n", + "from pynq import DefaultIP\n", + "from pynq import DefaultHierarchy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to define our own class **before** istantiating the overlay. In this way it will be automatically bound. We can use an accelerator driver as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "class AdderDriver(DefaultIP):\n", + " def __init__(self, description):\n", + " super().__init__(description=description)\n", + " bindto = [\"xilinx.com:hls:hls_adder:1.0\"]\n", + "\n", + " def start_accel(self):\n", + " self.write(0x0, 1)\n", + "\n", + " def set_state(self, state):\n", + " self.write(0x0, state)\n", + " return self.read(0x0)\n", + "\n", + " def get_state(self):\n", + " return self.read(0x0)\n", + "\n", + " @property\n", + " def stream_size(self):\n", + " return self.read(0x10)\n", + "\n", + " @stream_size.setter\n", + " def stream_size(self, size):\n", + " self.write(0x10, size)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But it comes more handy to use an Hierarchy class as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "class StreamAdderDriver(DefaultHierarchy):\n", + " def __init__(self, description):\n", + " super().__init__(description)\n", + "\n", + " def stream_add(self, stream):\n", + " in_buffer = allocate(shape=(len(stream),), dtype=np.float32)\n", + " out_buffer = allocate(shape=(len(stream),), dtype=np.float32)\n", + " for i, elem in enumerate(stream):\n", + " in_buffer[i] = elem\n", + " # NOTE: for managing the HLS accelerator, we exploit\n", + " # the driver that we defined above.\n", + " self.hls_adder.stream_size = len(stream)\n", + " self.hls_adder.start_accel() # NOTE: The start must be sent before setting the other arguments \n", + " self.dma.sendchannel.transfer(in_buffer)\n", + " self.dma.recvchannel.transfer(out_buffer)\n", + " self.dma.sendchannel.wait()\n", + " self.dma.recvchannel.wait()\n", + " result = out_buffer.view(dtype=np.float32).copy()\n", + " del in_buffer, out_buffer\n", + " return result\n", + "\n", + " @staticmethod\n", + " def checkhierarchy(description):\n", + " \"\"\"\n", + " An Hierarchy that meets these requirements will be\n", + " automatically registered to this driver.\n", + " \"\"\"\n", + " if \"dma\" in description[\"ip\"] and \"hls_adder\" in description[\"ip\"]:\n", + " return True\n", + " return False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can istantiate the overaly, so that the drivers above will be automatically registered." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "overlay = Overlay(\"overlay/streamed_add_hier.bit\", download=False)\n", + "# overlay.download()\n", + "# overlay?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Width of Buffer Length Register\n", + "This integer value specifies the number of valid bits used for the Control field buffer length and Status field bytes transferred in the Scatter/Gather descriptors. It also specifies the number of valid bits in the RX Length of the Status Stream App4 field when Use Rxlength is enabled. For Direct Register Mode, it specifies the number of valid bits in the MM2S_LENGTH and S2MM_LENGTH registers. The length width directly correlates to the number of bytes being specified in a Scatter/Gather descriptor or number of bytes being specified in App4.RxLength, MM2S_LENGTH, or S2MM_LENGTH. The number of bytes is equal to 2^Length Width. So a Length Width of 26 gives a byte count of 67,108,863 bytes. This value should be set to 23 for Multichannel mode." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1.00000000e+00 4.14159298e+00 7.28318548e+00 ..., 3.20856616e+03\n", + " 3.21170776e+03 3.21484937e+03]\n", + "[ True True True ..., True True True]\n", + "3.469756501941687e-05\n" + ] + } + ], + "source": [ + "stream = [i * np.pi for i in range(1024)]\n", + "# print(stream)\n", + "out_stream = overlay.adder.stream_add(stream)\n", + "print(out_stream)\n", + "print(np.isclose(np.array(stream) + 1, out_stream))\n", + "print(np.abs((np.array(stream) - (out_stream - 1))).mean())\n", + "\n", + "# # NOTE: The following is a neat way of printing the np.floats in HEX format. \n", + "# for orig, f32, u32 in zip(np.array(stream, dtype=np.float32).view(dtype=np.uint32), out_stream, out_stream.view(dtype=np.uint32)):\n", + "# print(\"{:x}\\t{:03.3}\\t{:x}\".format(orig, f32, u32))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pynq/kernel_u/overlay/kernel_u.bit b/pynq/kernel_u/overlay/kernel_u.bit new file mode 100644 index 0000000..06eadae Binary files /dev/null and b/pynq/kernel_u/overlay/kernel_u.bit differ diff --git a/pynq/kernel_u/overlay/kernel_u.hwh b/pynq/kernel_u/overlay/kernel_u.hwh new file mode 100644 index 0000000..4715498 --- /dev/null +++ b/pynq/kernel_u/overlay/kernel_u.hwhdiff --git a/pynq/kernel_u/overlay/kernel_u.tcl b/pynq/kernel_u/overlay/kernel_u.tcl new file mode 100644 index 0000000..bc90a73 --- /dev/null +++ b/pynq/kernel_u/overlay/kernel_u.tcl @@ -0,0 +1,1088 @@ + +################################################################ +# This is a generated script based on design: design_1 +# +# Though there are limitations about the generated script, +# the main purpose of this utility is to make learning +# IP Integrator Tcl commands easier. +################################################################ + +namespace eval _tcl { +proc get_script_folder {} { + set script_path [file normalize [info script]] + set script_folder [file dirname $script_path] + return $script_folder +} +} +variable script_folder +set script_folder [_tcl::get_script_folder] + +################################################################ +# Check if script is running in correct Vivado version. +################################################################ +set scripts_vivado_version 2018.3 +set current_vivado_version [version -short] + +if { [string first $scripts_vivado_version $current_vivado_version] == -1 } { + puts "" + catch {common::send_msg_id "BD_TCL-109" "ERROR" "This script was generated using Vivado <$scripts_vivado_version> and is being run in <$current_vivado_version> of Vivado. Please run the script in Vivado <$scripts_vivado_version> then open the design in Vivado <$current_vivado_version>. Upgrade the design by running \"Tools => Report => Report IP Status...\", then run write_bd_tcl to create an updated script."} + + return 1 +} + +################################################################ +# START +################################################################ + +# To test this script, run the following commands from Vivado Tcl console: +# source design_1_script.tcl + +# If there is no project opened, this script will create a +# project, but make sure you do not have an existing project +# <./myproj/project_1.xpr> in the current working folder. + +set list_projs [get_projects -quiet] +if { $list_projs eq "" } { + create_project project_1 myproj -part xc7z020clg400-1 + set_property BOARD_PART www.digilentinc.com:pynq-z1:part0:1.0 [current_project] +} + + +# CHANGE DESIGN NAME HERE +variable design_name +set design_name design_1 + +# If you do not already have an existing IP Integrator design open, +# you can create a design using the following command: +# create_bd_design $design_name + +# Creating design if needed +set errMsg "" +set nRet 0 + +set cur_design [current_bd_design -quiet] +set list_cells [get_bd_cells -quiet] + +if { ${design_name} eq "" } { + # USE CASES: + # 1) Design_name not set + + set errMsg "Please set the variable to a non-empty value." + set nRet 1 + +} elseif { ${cur_design} ne "" && ${list_cells} eq "" } { + # USE CASES: + # 2): Current design opened AND is empty AND names same. + # 3): Current design opened AND is empty AND names diff; design_name NOT in project. + # 4): Current design opened AND is empty AND names diff; design_name exists in project. + + if { $cur_design ne $design_name } { + common::send_msg_id "BD_TCL-001" "INFO" "Changing value of from <$design_name> to <$cur_design> since current design is empty." + set design_name [get_property NAME $cur_design] + } + common::send_msg_id "BD_TCL-002" "INFO" "Constructing design in IPI design <$cur_design>..." + +} elseif { ${cur_design} ne "" && $list_cells ne "" && $cur_design eq $design_name } { + # USE CASES: + # 5) Current design opened AND has components AND same names. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 1 +} elseif { [get_files -quiet ${design_name}.bd] ne "" } { + # USE CASES: + # 6) Current opened design, has components, but diff names, design_name exists in project. + # 7) No opened design, design_name exists in project. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 2 + +} else { + # USE CASES: + # 8) No opened design, design_name not in project. + # 9) Current opened design, has components, but diff names, design_name not in project. + + common::send_msg_id "BD_TCL-003" "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + common::send_msg_id "BD_TCL-004" "INFO" "Making design <$design_name> as current_bd_design." + current_bd_design $design_name + +} + +common::send_msg_id "BD_TCL-005" "INFO" "Currently the variable is equal to \"$design_name\"." + +if { $nRet != 0 } { + catch {common::send_msg_id "BD_TCL-114" "ERROR" $errMsg} + return $nRet +} + +set bCheckIPsPassed 1 +################################################################## +# CHECK IPs +################################################################## +set bCheckIPs 1 +if { $bCheckIPs == 1 } { + set list_check_ips "\ +xilinx.com:hls:HlsAxisKernelU:1.0\ +xilinx.com:ip:processing_system7:5.5\ +xilinx.com:ip:proc_sys_reset:5.0\ +xilinx.com:ip:axi_dma:7.1\ +" + + set list_ips_missing "" + common::send_msg_id "BD_TCL-006" "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_msg_id "BD_TCL-115" "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + +} + +if { $bCheckIPsPassed != 1 } { + common::send_msg_id "BD_TCL-1003" "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 +} + +################################################################## +# DESIGN PROCs +################################################################## + + + +# Procedure to create entire design; Provide argument to make +# procedure reusable. If parentCell is "", will use root. +proc create_root_design { parentCell } { + + variable script_folder + variable design_name + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_msg_id "BD_TCL-100" "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_msg_id "BD_TCL-101" "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + + # Create interface ports + set DDR [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddrx_rtl:1.0 DDR ] + set FIXED_IO [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_processing_system7:fixedio_rtl:1.0 FIXED_IO ] + + # Create ports + + # Create instance: HlsAxisKernelU_0, and set properties + set HlsAxisKernelU_0 [ create_bd_cell -type ip -vlnv xilinx.com:hls:HlsAxisKernelU:1.0 HlsAxisKernelU_0 ] + + # Create instance: axi_mem_intercon, and set properties + set axi_mem_intercon [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon + + # Create instance: axi_mem_intercon_1, and set properties + set axi_mem_intercon_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_1 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_1 + + # Create instance: axi_mem_intercon_2, and set properties + set axi_mem_intercon_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_2 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_2 + + # Create instance: processing_system7_0, and set properties + set processing_system7_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 processing_system7_0 ] + set_property -dict [ list \ + CONFIG.PCW_ACT_APU_PERIPHERAL_FREQMHZ {650.000000} \ + CONFIG.PCW_ACT_CAN0_PERIPHERAL_FREQMHZ {23.8095} \ + CONFIG.PCW_ACT_CAN1_PERIPHERAL_FREQMHZ {23.8095} \ + CONFIG.PCW_ACT_CAN_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_DCI_PERIPHERAL_FREQMHZ {10.096154} \ + CONFIG.PCW_ACT_ENET0_PERIPHERAL_FREQMHZ {125.000000} \ + CONFIG.PCW_ACT_ENET1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA0_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_ACT_FPGA1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA2_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA3_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_I2C_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_ACT_PCAP_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_QSPI_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_SDIO_PERIPHERAL_FREQMHZ {50.000000} \ + CONFIG.PCW_ACT_SMC_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_SPI_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_TPIU_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_TTC0_CLK0_PERIPHERAL_FREQMHZ {108.333336} \ + CONFIG.PCW_ACT_TTC0_CLK1_PERIPHERAL_FREQMHZ {108.333336} \ + CONFIG.PCW_ACT_TTC0_CLK2_PERIPHERAL_FREQMHZ {108.333336} \ + CONFIG.PCW_ACT_TTC1_CLK0_PERIPHERAL_FREQMHZ {108.333336} \ + CONFIG.PCW_ACT_TTC1_CLK1_PERIPHERAL_FREQMHZ {108.333336} \ + CONFIG.PCW_ACT_TTC1_CLK2_PERIPHERAL_FREQMHZ {108.333336} \ + CONFIG.PCW_ACT_TTC_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_ACT_UART_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_ACT_USB0_PERIPHERAL_FREQMHZ {60} \ + CONFIG.PCW_ACT_USB1_PERIPHERAL_FREQMHZ {60} \ + CONFIG.PCW_ACT_WDT_PERIPHERAL_FREQMHZ {108.333336} \ + CONFIG.PCW_APU_CLK_RATIO_ENABLE {6:2:1} \ + CONFIG.PCW_APU_PERIPHERAL_FREQMHZ {650} \ + CONFIG.PCW_ARMPLL_CTRL_FBDIV {26} \ + CONFIG.PCW_CAN0_BASEADDR {0xE0008000} \ + CONFIG.PCW_CAN0_HIGHADDR {0xE0008FFF} \ + CONFIG.PCW_CAN0_PERIPHERAL_CLKSRC {External} \ + CONFIG.PCW_CAN0_PERIPHERAL_FREQMHZ {-1} \ + CONFIG.PCW_CAN1_BASEADDR {0xE0009000} \ + CONFIG.PCW_CAN1_HIGHADDR {0xE0009FFF} \ + CONFIG.PCW_CAN1_PERIPHERAL_CLKSRC {External} \ + CONFIG.PCW_CAN1_PERIPHERAL_FREQMHZ {-1} \ + CONFIG.PCW_CAN_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_FREQMHZ {100} \ + CONFIG.PCW_CAN_PERIPHERAL_VALID {0} \ + CONFIG.PCW_CLK0_FREQ {100000000} \ + CONFIG.PCW_CLK1_FREQ {10000000} \ + CONFIG.PCW_CLK2_FREQ {10000000} \ + CONFIG.PCW_CLK3_FREQ {10000000} \ + CONFIG.PCW_CORE0_FIQ_INTR {0} \ + CONFIG.PCW_CORE0_IRQ_INTR {0} \ + CONFIG.PCW_CORE1_FIQ_INTR {0} \ + CONFIG.PCW_CORE1_IRQ_INTR {0} \ + CONFIG.PCW_CPU_CPU_6X4X_MAX_RANGE {667} \ + CONFIG.PCW_CPU_CPU_PLL_FREQMHZ {1300.000} \ + CONFIG.PCW_CPU_PERIPHERAL_CLKSRC {ARM PLL} \ + CONFIG.PCW_CPU_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_CRYSTAL_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_DCI_PERIPHERAL_CLKSRC {DDR PLL} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR0 {52} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR1 {2} \ + CONFIG.PCW_DCI_PERIPHERAL_FREQMHZ {10.159} \ + CONFIG.PCW_DDRPLL_CTRL_FBDIV {21} \ + CONFIG.PCW_DDR_DDR_PLL_FREQMHZ {1050.000} \ + CONFIG.PCW_DDR_HPRLPR_QUEUE_PARTITION {HPR(0)/LPR(32)} \ + CONFIG.PCW_DDR_HPR_TO_CRITICAL_PRIORITY_LEVEL {15} \ + CONFIG.PCW_DDR_LPR_TO_CRITICAL_PRIORITY_LEVEL {2} \ + CONFIG.PCW_DDR_PERIPHERAL_CLKSRC {DDR PLL} \ + CONFIG.PCW_DDR_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_DDR_PORT0_HPR_ENABLE {0} \ + CONFIG.PCW_DDR_PORT1_HPR_ENABLE {0} \ + CONFIG.PCW_DDR_PORT2_HPR_ENABLE {0} \ + CONFIG.PCW_DDR_PORT3_HPR_ENABLE {0} \ + CONFIG.PCW_DDR_RAM_BASEADDR {0x00100000} \ + CONFIG.PCW_DDR_RAM_HIGHADDR {0x1FFFFFFF} \ + CONFIG.PCW_DDR_WRITE_TO_CRITICAL_PRIORITY_LEVEL {2} \ + CONFIG.PCW_DM_WIDTH {4} \ + CONFIG.PCW_DQS_WIDTH {4} \ + CONFIG.PCW_DQ_WIDTH {32} \ + CONFIG.PCW_ENET0_BASEADDR {0xE000B000} \ + CONFIG.PCW_ENET0_ENET0_IO {MIO 16 .. 27} \ + CONFIG.PCW_ENET0_GRP_MDIO_ENABLE {1} \ + CONFIG.PCW_ENET0_GRP_MDIO_IO {MIO 52 .. 53} \ + CONFIG.PCW_ENET0_HIGHADDR {0xE000BFFF} \ + CONFIG.PCW_ENET0_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR0 {8} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_FREQMHZ {1000 Mbps} \ + CONFIG.PCW_ENET0_RESET_ENABLE {1} \ + CONFIG.PCW_ENET0_RESET_IO {MIO 9} \ + CONFIG.PCW_ENET1_BASEADDR {0xE000C000} \ + CONFIG.PCW_ENET1_GRP_MDIO_ENABLE {0} \ + CONFIG.PCW_ENET1_HIGHADDR {0xE000CFFF} \ + CONFIG.PCW_ENET1_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET1_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_ENET1_PERIPHERAL_FREQMHZ {1000 Mbps} \ + CONFIG.PCW_ENET1_RESET_ENABLE {0} \ + CONFIG.PCW_ENET_RESET_ENABLE {1} \ + CONFIG.PCW_ENET_RESET_POLARITY {Active Low} \ + CONFIG.PCW_ENET_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_EN_4K_TIMER {0} \ + CONFIG.PCW_EN_CAN0 {0} \ + CONFIG.PCW_EN_CAN1 {0} \ + CONFIG.PCW_EN_CLK0_PORT {1} \ + CONFIG.PCW_EN_CLK1_PORT {0} \ + CONFIG.PCW_EN_CLK2_PORT {0} \ + CONFIG.PCW_EN_CLK3_PORT {0} \ + CONFIG.PCW_EN_CLKTRIG0_PORT {0} \ + CONFIG.PCW_EN_CLKTRIG1_PORT {0} \ + CONFIG.PCW_EN_CLKTRIG2_PORT {0} \ + CONFIG.PCW_EN_CLKTRIG3_PORT {0} \ + CONFIG.PCW_EN_DDR {1} \ + CONFIG.PCW_EN_EMIO_CAN0 {0} \ + CONFIG.PCW_EN_EMIO_CAN1 {0} \ + CONFIG.PCW_EN_EMIO_CD_SDIO0 {0} \ + CONFIG.PCW_EN_EMIO_CD_SDIO1 {0} \ + CONFIG.PCW_EN_EMIO_ENET0 {0} \ + CONFIG.PCW_EN_EMIO_ENET1 {0} \ + CONFIG.PCW_EN_EMIO_GPIO {0} \ + CONFIG.PCW_EN_EMIO_I2C0 {0} \ + CONFIG.PCW_EN_EMIO_I2C1 {0} \ + CONFIG.PCW_EN_EMIO_MODEM_UART0 {0} \ + CONFIG.PCW_EN_EMIO_MODEM_UART1 {0} \ + CONFIG.PCW_EN_EMIO_PJTAG {0} \ + CONFIG.PCW_EN_EMIO_SDIO0 {0} \ + CONFIG.PCW_EN_EMIO_SDIO1 {0} \ + CONFIG.PCW_EN_EMIO_SPI0 {0} \ + CONFIG.PCW_EN_EMIO_SPI1 {0} \ + CONFIG.PCW_EN_EMIO_SRAM_INT {0} \ + CONFIG.PCW_EN_EMIO_TRACE {0} \ + CONFIG.PCW_EN_EMIO_TTC0 {0} \ + CONFIG.PCW_EN_EMIO_TTC1 {0} \ + CONFIG.PCW_EN_EMIO_UART0 {0} \ + CONFIG.PCW_EN_EMIO_UART1 {0} \ + CONFIG.PCW_EN_EMIO_WDT {0} \ + CONFIG.PCW_EN_EMIO_WP_SDIO0 {0} \ + CONFIG.PCW_EN_EMIO_WP_SDIO1 {0} \ + CONFIG.PCW_EN_ENET0 {1} \ + CONFIG.PCW_EN_ENET1 {0} \ + CONFIG.PCW_EN_GPIO {1} \ + CONFIG.PCW_EN_I2C0 {0} \ + CONFIG.PCW_EN_I2C1 {0} \ + CONFIG.PCW_EN_MODEM_UART0 {0} \ + CONFIG.PCW_EN_MODEM_UART1 {0} \ + CONFIG.PCW_EN_PJTAG {0} \ + CONFIG.PCW_EN_PTP_ENET0 {0} \ + CONFIG.PCW_EN_PTP_ENET1 {0} \ + CONFIG.PCW_EN_QSPI {1} \ + CONFIG.PCW_EN_RST0_PORT {1} \ + CONFIG.PCW_EN_RST1_PORT {0} \ + CONFIG.PCW_EN_RST2_PORT {0} \ + CONFIG.PCW_EN_RST3_PORT {0} \ + CONFIG.PCW_EN_SDIO0 {1} \ + CONFIG.PCW_EN_SDIO1 {0} \ + CONFIG.PCW_EN_SMC {0} \ + CONFIG.PCW_EN_SPI0 {0} \ + CONFIG.PCW_EN_SPI1 {0} \ + CONFIG.PCW_EN_TRACE {0} \ + CONFIG.PCW_EN_TTC0 {0} \ + CONFIG.PCW_EN_TTC1 {0} \ + CONFIG.PCW_EN_UART0 {1} \ + CONFIG.PCW_EN_UART1 {0} \ + CONFIG.PCW_EN_USB0 {1} \ + CONFIG.PCW_EN_USB1 {0} \ + CONFIG.PCW_EN_WDT {0} \ + CONFIG.PCW_FCLK0_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR1 {2} \ + CONFIG.PCW_FCLK1_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK_CLK0_BUF {TRUE} \ + CONFIG.PCW_FCLK_CLK1_BUF {FALSE} \ + CONFIG.PCW_FCLK_CLK2_BUF {FALSE} \ + CONFIG.PCW_FCLK_CLK3_BUF {FALSE} \ + CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ {100} \ + CONFIG.PCW_FPGA1_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_FPGA2_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_FPGA3_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_FPGA_FCLK0_ENABLE {1} \ + CONFIG.PCW_FPGA_FCLK1_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK2_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK3_ENABLE {0} \ + CONFIG.PCW_GPIO_BASEADDR {0xE000A000} \ + CONFIG.PCW_GPIO_EMIO_GPIO_ENABLE {0} \ + CONFIG.PCW_GPIO_EMIO_GPIO_WIDTH {64} \ + CONFIG.PCW_GPIO_HIGHADDR {0xE000AFFF} \ + CONFIG.PCW_GPIO_MIO_GPIO_ENABLE {1} \ + CONFIG.PCW_GPIO_MIO_GPIO_IO {MIO} \ + CONFIG.PCW_GPIO_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_I2C0_BASEADDR {0xE0004000} \ + CONFIG.PCW_I2C0_HIGHADDR {0xE0004FFF} \ + CONFIG.PCW_I2C0_RESET_ENABLE {0} \ + CONFIG.PCW_I2C1_BASEADDR {0xE0005000} \ + CONFIG.PCW_I2C1_HIGHADDR {0xE0005FFF} \ + CONFIG.PCW_I2C1_RESET_ENABLE {0} \ + CONFIG.PCW_I2C_PERIPHERAL_FREQMHZ {25} \ + CONFIG.PCW_I2C_RESET_ENABLE {1} \ + CONFIG.PCW_I2C_RESET_POLARITY {Active Low} \ + CONFIG.PCW_IMPORT_BOARD_PRESET {None} \ + CONFIG.PCW_INCLUDE_ACP_TRANS_CHECK {0} \ + CONFIG.PCW_INCLUDE_TRACE_BUFFER {0} \ + CONFIG.PCW_IOPLL_CTRL_FBDIV {20} \ + CONFIG.PCW_IO_IO_PLL_FREQMHZ {1000.000} \ + CONFIG.PCW_IRQ_F2P_INTR {0} \ + CONFIG.PCW_IRQ_F2P_MODE {DIRECT} \ + CONFIG.PCW_MIO_0_DIRECTION {inout} \ + CONFIG.PCW_MIO_0_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_0_PULLUP {enabled} \ + CONFIG.PCW_MIO_0_SLEW {slow} \ + CONFIG.PCW_MIO_10_DIRECTION {inout} \ + CONFIG.PCW_MIO_10_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_10_PULLUP {enabled} \ + CONFIG.PCW_MIO_10_SLEW {slow} \ + CONFIG.PCW_MIO_11_DIRECTION {inout} \ + CONFIG.PCW_MIO_11_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_11_PULLUP {enabled} \ + CONFIG.PCW_MIO_11_SLEW {slow} \ + CONFIG.PCW_MIO_12_DIRECTION {inout} \ + CONFIG.PCW_MIO_12_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_12_PULLUP {enabled} \ + CONFIG.PCW_MIO_12_SLEW {slow} \ + CONFIG.PCW_MIO_13_DIRECTION {inout} \ + CONFIG.PCW_MIO_13_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_13_PULLUP {enabled} \ + CONFIG.PCW_MIO_13_SLEW {slow} \ + CONFIG.PCW_MIO_14_DIRECTION {in} \ + CONFIG.PCW_MIO_14_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_14_PULLUP {enabled} \ + CONFIG.PCW_MIO_14_SLEW {slow} \ + CONFIG.PCW_MIO_15_DIRECTION {out} \ + CONFIG.PCW_MIO_15_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_15_PULLUP {enabled} \ + CONFIG.PCW_MIO_15_SLEW {slow} \ + CONFIG.PCW_MIO_16_DIRECTION {out} \ + CONFIG.PCW_MIO_16_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_16_PULLUP {enabled} \ + CONFIG.PCW_MIO_16_SLEW {slow} \ + CONFIG.PCW_MIO_17_DIRECTION {out} \ + CONFIG.PCW_MIO_17_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_17_PULLUP {enabled} \ + CONFIG.PCW_MIO_17_SLEW {slow} \ + CONFIG.PCW_MIO_18_DIRECTION {out} \ + CONFIG.PCW_MIO_18_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_18_PULLUP {enabled} \ + CONFIG.PCW_MIO_18_SLEW {slow} \ + CONFIG.PCW_MIO_19_DIRECTION {out} \ + CONFIG.PCW_MIO_19_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_19_PULLUP {enabled} \ + CONFIG.PCW_MIO_19_SLEW {slow} \ + CONFIG.PCW_MIO_1_DIRECTION {out} \ + CONFIG.PCW_MIO_1_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_1_PULLUP {enabled} \ + CONFIG.PCW_MIO_1_SLEW {slow} \ + CONFIG.PCW_MIO_20_DIRECTION {out} \ + CONFIG.PCW_MIO_20_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_20_PULLUP {enabled} \ + CONFIG.PCW_MIO_20_SLEW {slow} \ + CONFIG.PCW_MIO_21_DIRECTION {out} \ + CONFIG.PCW_MIO_21_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_21_PULLUP {enabled} \ + CONFIG.PCW_MIO_21_SLEW {slow} \ + CONFIG.PCW_MIO_22_DIRECTION {in} \ + CONFIG.PCW_MIO_22_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_22_PULLUP {enabled} \ + CONFIG.PCW_MIO_22_SLEW {slow} \ + CONFIG.PCW_MIO_23_DIRECTION {in} \ + CONFIG.PCW_MIO_23_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_23_PULLUP {enabled} \ + CONFIG.PCW_MIO_23_SLEW {slow} \ + CONFIG.PCW_MIO_24_DIRECTION {in} \ + CONFIG.PCW_MIO_24_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_24_PULLUP {enabled} \ + CONFIG.PCW_MIO_24_SLEW {slow} \ + CONFIG.PCW_MIO_25_DIRECTION {in} \ + CONFIG.PCW_MIO_25_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_25_PULLUP {enabled} \ + CONFIG.PCW_MIO_25_SLEW {slow} \ + CONFIG.PCW_MIO_26_DIRECTION {in} \ + CONFIG.PCW_MIO_26_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_26_PULLUP {enabled} \ + CONFIG.PCW_MIO_26_SLEW {slow} \ + CONFIG.PCW_MIO_27_DIRECTION {in} \ + CONFIG.PCW_MIO_27_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_27_PULLUP {enabled} \ + CONFIG.PCW_MIO_27_SLEW {slow} \ + CONFIG.PCW_MIO_28_DIRECTION {inout} \ + CONFIG.PCW_MIO_28_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_28_PULLUP {enabled} \ + CONFIG.PCW_MIO_28_SLEW {slow} \ + CONFIG.PCW_MIO_29_DIRECTION {in} \ + CONFIG.PCW_MIO_29_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_29_PULLUP {enabled} \ + CONFIG.PCW_MIO_29_SLEW {slow} \ + CONFIG.PCW_MIO_2_DIRECTION {inout} \ + CONFIG.PCW_MIO_2_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_2_PULLUP {disabled} \ + CONFIG.PCW_MIO_2_SLEW {slow} \ + CONFIG.PCW_MIO_30_DIRECTION {out} \ + CONFIG.PCW_MIO_30_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_30_PULLUP {enabled} \ + CONFIG.PCW_MIO_30_SLEW {slow} \ + CONFIG.PCW_MIO_31_DIRECTION {in} \ + CONFIG.PCW_MIO_31_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_31_PULLUP {enabled} \ + CONFIG.PCW_MIO_31_SLEW {slow} \ + CONFIG.PCW_MIO_32_DIRECTION {inout} \ + CONFIG.PCW_MIO_32_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_32_PULLUP {enabled} \ + CONFIG.PCW_MIO_32_SLEW {slow} \ + CONFIG.PCW_MIO_33_DIRECTION {inout} \ + CONFIG.PCW_MIO_33_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_33_PULLUP {enabled} \ + CONFIG.PCW_MIO_33_SLEW {slow} \ + CONFIG.PCW_MIO_34_DIRECTION {inout} \ + CONFIG.PCW_MIO_34_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_34_PULLUP {enabled} \ + CONFIG.PCW_MIO_34_SLEW {slow} \ + CONFIG.PCW_MIO_35_DIRECTION {inout} \ + CONFIG.PCW_MIO_35_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_35_PULLUP {enabled} \ + CONFIG.PCW_MIO_35_SLEW {slow} \ + CONFIG.PCW_MIO_36_DIRECTION {in} \ + CONFIG.PCW_MIO_36_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_36_PULLUP {enabled} \ + CONFIG.PCW_MIO_36_SLEW {slow} \ + CONFIG.PCW_MIO_37_DIRECTION {inout} \ + CONFIG.PCW_MIO_37_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_37_PULLUP {enabled} \ + CONFIG.PCW_MIO_37_SLEW {slow} \ + CONFIG.PCW_MIO_38_DIRECTION {inout} \ + CONFIG.PCW_MIO_38_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_38_PULLUP {enabled} \ + CONFIG.PCW_MIO_38_SLEW {slow} \ + CONFIG.PCW_MIO_39_DIRECTION {inout} \ + CONFIG.PCW_MIO_39_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_39_PULLUP {enabled} \ + CONFIG.PCW_MIO_39_SLEW {slow} \ + CONFIG.PCW_MIO_3_DIRECTION {inout} \ + CONFIG.PCW_MIO_3_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_3_PULLUP {disabled} \ + CONFIG.PCW_MIO_3_SLEW {slow} \ + CONFIG.PCW_MIO_40_DIRECTION {inout} \ + CONFIG.PCW_MIO_40_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_40_PULLUP {enabled} \ + CONFIG.PCW_MIO_40_SLEW {slow} \ + CONFIG.PCW_MIO_41_DIRECTION {inout} \ + CONFIG.PCW_MIO_41_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_41_PULLUP {enabled} \ + CONFIG.PCW_MIO_41_SLEW {slow} \ + CONFIG.PCW_MIO_42_DIRECTION {inout} \ + CONFIG.PCW_MIO_42_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_42_PULLUP {enabled} \ + CONFIG.PCW_MIO_42_SLEW {slow} \ + CONFIG.PCW_MIO_43_DIRECTION {inout} \ + CONFIG.PCW_MIO_43_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_43_PULLUP {enabled} \ + CONFIG.PCW_MIO_43_SLEW {slow} \ + CONFIG.PCW_MIO_44_DIRECTION {inout} \ + CONFIG.PCW_MIO_44_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_44_PULLUP {enabled} \ + CONFIG.PCW_MIO_44_SLEW {slow} \ + CONFIG.PCW_MIO_45_DIRECTION {inout} \ + CONFIG.PCW_MIO_45_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_45_PULLUP {enabled} \ + CONFIG.PCW_MIO_45_SLEW {slow} \ + CONFIG.PCW_MIO_46_DIRECTION {out} \ + CONFIG.PCW_MIO_46_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_46_PULLUP {enabled} \ + CONFIG.PCW_MIO_46_SLEW {slow} \ + CONFIG.PCW_MIO_47_DIRECTION {in} \ + CONFIG.PCW_MIO_47_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_47_PULLUP {enabled} \ + CONFIG.PCW_MIO_47_SLEW {slow} \ + CONFIG.PCW_MIO_48_DIRECTION {inout} \ + CONFIG.PCW_MIO_48_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_48_PULLUP {enabled} \ + CONFIG.PCW_MIO_48_SLEW {slow} \ + CONFIG.PCW_MIO_49_DIRECTION {inout} \ + CONFIG.PCW_MIO_49_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_49_PULLUP {enabled} \ + CONFIG.PCW_MIO_49_SLEW {slow} \ + CONFIG.PCW_MIO_4_DIRECTION {inout} \ + CONFIG.PCW_MIO_4_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_4_PULLUP {disabled} \ + CONFIG.PCW_MIO_4_SLEW {slow} \ + CONFIG.PCW_MIO_50_DIRECTION {inout} \ + CONFIG.PCW_MIO_50_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_50_PULLUP {enabled} \ + CONFIG.PCW_MIO_50_SLEW {slow} \ + CONFIG.PCW_MIO_51_DIRECTION {inout} \ + CONFIG.PCW_MIO_51_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_51_PULLUP {enabled} \ + CONFIG.PCW_MIO_51_SLEW {slow} \ + CONFIG.PCW_MIO_52_DIRECTION {out} \ + CONFIG.PCW_MIO_52_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_52_PULLUP {enabled} \ + CONFIG.PCW_MIO_52_SLEW {slow} \ + CONFIG.PCW_MIO_53_DIRECTION {inout} \ + CONFIG.PCW_MIO_53_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_53_PULLUP {enabled} \ + CONFIG.PCW_MIO_53_SLEW {slow} \ + CONFIG.PCW_MIO_5_DIRECTION {inout} \ + CONFIG.PCW_MIO_5_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_5_PULLUP {disabled} \ + CONFIG.PCW_MIO_5_SLEW {slow} \ + CONFIG.PCW_MIO_6_DIRECTION {out} \ + CONFIG.PCW_MIO_6_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_6_PULLUP {disabled} \ + CONFIG.PCW_MIO_6_SLEW {slow} \ + CONFIG.PCW_MIO_7_DIRECTION {out} \ + CONFIG.PCW_MIO_7_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_7_PULLUP {disabled} \ + CONFIG.PCW_MIO_7_SLEW {slow} \ + CONFIG.PCW_MIO_8_DIRECTION {out} \ + CONFIG.PCW_MIO_8_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_8_PULLUP {disabled} \ + CONFIG.PCW_MIO_8_SLEW {slow} \ + CONFIG.PCW_MIO_9_DIRECTION {out} \ + CONFIG.PCW_MIO_9_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_9_PULLUP {enabled} \ + CONFIG.PCW_MIO_9_SLEW {slow} \ + CONFIG.PCW_MIO_PRIMITIVE {54} \ + CONFIG.PCW_MIO_TREE_PERIPHERALS {GPIO#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#GPIO#Quad SPI Flash#ENET Reset#GPIO#GPIO#GPIO#GPIO#UART 0#UART 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#USB Reset#SD 0#GPIO#GPIO#GPIO#GPIO#Enet 0#Enet 0} \ + CONFIG.PCW_MIO_TREE_SIGNALS {gpio[0]#qspi0_ss_b#qspi0_io[0]#qspi0_io[1]#qspi0_io[2]#qspi0_io[3]/HOLD_B#qspi0_sclk#gpio[7]#qspi_fbclk#reset#gpio[10]#gpio[11]#gpio[12]#gpio[13]#rx#tx#tx_clk#txd[0]#txd[1]#txd[2]#txd[3]#tx_ctl#rx_clk#rxd[0]#rxd[1]#rxd[2]#rxd[3]#rx_ctl#data[4]#dir#stp#nxt#data[0]#data[1]#data[2]#data[3]#clk#data[5]#data[6]#data[7]#clk#cmd#data[0]#data[1]#data[2]#data[3]#reset#cd#gpio[48]#gpio[49]#gpio[50]#gpio[51]#mdc#mdio} \ + CONFIG.PCW_M_AXI_GP0_ENABLE_STATIC_REMAP {0} \ + CONFIG.PCW_M_AXI_GP0_ID_WIDTH {12} \ + CONFIG.PCW_M_AXI_GP0_SUPPORT_NARROW_BURST {0} \ + CONFIG.PCW_M_AXI_GP0_THREAD_ID_WIDTH {12} \ + CONFIG.PCW_M_AXI_GP1_ENABLE_STATIC_REMAP {0} \ + CONFIG.PCW_M_AXI_GP1_ID_WIDTH {12} \ + CONFIG.PCW_M_AXI_GP1_SUPPORT_NARROW_BURST {0} \ + CONFIG.PCW_M_AXI_GP1_THREAD_ID_WIDTH {12} \ + CONFIG.PCW_NAND_CYCLES_T_AR {1} \ + CONFIG.PCW_NAND_CYCLES_T_CLR {1} \ + CONFIG.PCW_NAND_CYCLES_T_RC {11} \ + CONFIG.PCW_NAND_CYCLES_T_REA {1} \ + CONFIG.PCW_NAND_CYCLES_T_RR {1} \ + CONFIG.PCW_NAND_CYCLES_T_WC {11} \ + CONFIG.PCW_NAND_CYCLES_T_WP {1} \ + CONFIG.PCW_NAND_GRP_D8_ENABLE {0} \ + CONFIG.PCW_NAND_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_NOR_CS0_T_CEOE {1} \ + CONFIG.PCW_NOR_CS0_T_PC {1} \ + CONFIG.PCW_NOR_CS0_T_RC {11} \ + CONFIG.PCW_NOR_CS0_T_TR {1} \ + CONFIG.PCW_NOR_CS0_T_WC {11} \ + CONFIG.PCW_NOR_CS0_T_WP {1} \ + CONFIG.PCW_NOR_CS0_WE_TIME {0} \ + CONFIG.PCW_NOR_CS1_T_CEOE {1} \ + CONFIG.PCW_NOR_CS1_T_PC {1} \ + CONFIG.PCW_NOR_CS1_T_RC {11} \ + CONFIG.PCW_NOR_CS1_T_TR {1} \ + CONFIG.PCW_NOR_CS1_T_WC {11} \ + CONFIG.PCW_NOR_CS1_T_WP {1} \ + CONFIG.PCW_NOR_CS1_WE_TIME {0} \ + CONFIG.PCW_NOR_GRP_A25_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_INT_ENABLE {0} \ + CONFIG.PCW_NOR_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_NOR_SRAM_CS0_T_CEOE {1} \ + CONFIG.PCW_NOR_SRAM_CS0_T_PC {1} \ + CONFIG.PCW_NOR_SRAM_CS0_T_RC {11} \ + CONFIG.PCW_NOR_SRAM_CS0_T_TR {1} \ + CONFIG.PCW_NOR_SRAM_CS0_T_WC {11} \ + CONFIG.PCW_NOR_SRAM_CS0_T_WP {1} \ + CONFIG.PCW_NOR_SRAM_CS0_WE_TIME {0} \ + CONFIG.PCW_NOR_SRAM_CS1_T_CEOE {1} \ + CONFIG.PCW_NOR_SRAM_CS1_T_PC {1} \ + CONFIG.PCW_NOR_SRAM_CS1_T_RC {11} \ + CONFIG.PCW_NOR_SRAM_CS1_T_TR {1} \ + CONFIG.PCW_NOR_SRAM_CS1_T_WC {11} \ + CONFIG.PCW_NOR_SRAM_CS1_T_WP {1} \ + CONFIG.PCW_NOR_SRAM_CS1_WE_TIME {0} \ + CONFIG.PCW_OVERRIDE_BASIC_CLOCK {0} \ + CONFIG.PCW_P2F_CAN0_INTR {0} \ + CONFIG.PCW_P2F_CAN1_INTR {0} \ + CONFIG.PCW_P2F_CTI_INTR {0} \ + CONFIG.PCW_P2F_DMAC0_INTR {0} \ + CONFIG.PCW_P2F_DMAC1_INTR {0} \ + CONFIG.PCW_P2F_DMAC2_INTR {0} \ + CONFIG.PCW_P2F_DMAC3_INTR {0} \ + CONFIG.PCW_P2F_DMAC4_INTR {0} \ + CONFIG.PCW_P2F_DMAC5_INTR {0} \ + CONFIG.PCW_P2F_DMAC6_INTR {0} \ + CONFIG.PCW_P2F_DMAC7_INTR {0} \ + CONFIG.PCW_P2F_DMAC_ABORT_INTR {0} \ + CONFIG.PCW_P2F_ENET0_INTR {0} \ + CONFIG.PCW_P2F_ENET1_INTR {0} \ + CONFIG.PCW_P2F_GPIO_INTR {0} \ + CONFIG.PCW_P2F_I2C0_INTR {0} \ + CONFIG.PCW_P2F_I2C1_INTR {0} \ + CONFIG.PCW_P2F_QSPI_INTR {0} \ + CONFIG.PCW_P2F_SDIO0_INTR {0} \ + CONFIG.PCW_P2F_SDIO1_INTR {0} \ + CONFIG.PCW_P2F_SMC_INTR {0} \ + CONFIG.PCW_P2F_SPI0_INTR {0} \ + CONFIG.PCW_P2F_SPI1_INTR {0} \ + CONFIG.PCW_P2F_UART0_INTR {0} \ + CONFIG.PCW_P2F_UART1_INTR {0} \ + CONFIG.PCW_P2F_USB0_INTR {0} \ + CONFIG.PCW_P2F_USB1_INTR {0} \ + CONFIG.PCW_PACKAGE_DDR_BOARD_DELAY0 {0.223} \ + CONFIG.PCW_PACKAGE_DDR_BOARD_DELAY1 {0.212} \ + CONFIG.PCW_PACKAGE_DDR_BOARD_DELAY2 {0.085} \ + CONFIG.PCW_PACKAGE_DDR_BOARD_DELAY3 {0.092} \ + CONFIG.PCW_PACKAGE_DDR_DQS_TO_CLK_DELAY_0 {0.040} \ + CONFIG.PCW_PACKAGE_DDR_DQS_TO_CLK_DELAY_1 {0.058} \ + CONFIG.PCW_PACKAGE_DDR_DQS_TO_CLK_DELAY_2 {-0.009} \ + CONFIG.PCW_PACKAGE_DDR_DQS_TO_CLK_DELAY_3 {-0.033} \ + CONFIG.PCW_PACKAGE_NAME {clg400} \ + CONFIG.PCW_PCAP_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_PCAP_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_PCAP_PERIPHERAL_FREQMHZ {200} \ + CONFIG.PCW_PERIPHERAL_BOARD_PRESET {None} \ + CONFIG.PCW_PLL_BYPASSMODE_ENABLE {0} \ + CONFIG.PCW_PRESET_BANK0_VOLTAGE {LVCMOS 3.3V} \ + CONFIG.PCW_PRESET_BANK1_VOLTAGE {LVCMOS 1.8V} \ + CONFIG.PCW_PS7_SI_REV {PRODUCTION} \ + CONFIG.PCW_QSPI_GRP_FBCLK_ENABLE {1} \ + CONFIG.PCW_QSPI_GRP_FBCLK_IO {MIO 8} \ + CONFIG.PCW_QSPI_GRP_IO1_ENABLE {0} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_ENABLE {1} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_IO {MIO 1 .. 6} \ + CONFIG.PCW_QSPI_GRP_SS1_ENABLE {0} \ + CONFIG.PCW_QSPI_INTERNAL_HIGHADDRESS {0xFCFFFFFF} \ + CONFIG.PCW_QSPI_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_QSPI_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_QSPI_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_QSPI_PERIPHERAL_FREQMHZ {200} \ + CONFIG.PCW_QSPI_QSPI_IO {MIO 1 .. 6} \ + CONFIG.PCW_SD0_GRP_CD_ENABLE {1} \ + CONFIG.PCW_SD0_GRP_CD_IO {MIO 47} \ + CONFIG.PCW_SD0_GRP_POW_ENABLE {0} \ + CONFIG.PCW_SD0_GRP_WP_ENABLE {0} \ + CONFIG.PCW_SD0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_SD0_SD0_IO {MIO 40 .. 45} \ + CONFIG.PCW_SD1_GRP_CD_ENABLE {0} \ + CONFIG.PCW_SD1_GRP_POW_ENABLE {0} \ + CONFIG.PCW_SD1_GRP_WP_ENABLE {0} \ + CONFIG.PCW_SD1_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_SDIO0_BASEADDR {0xE0100000} \ + CONFIG.PCW_SDIO0_HIGHADDR {0xE0100FFF} \ + CONFIG.PCW_SDIO1_BASEADDR {0xE0101000} \ + CONFIG.PCW_SDIO1_HIGHADDR {0xE0101FFF} \ + CONFIG.PCW_SDIO_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_SDIO_PERIPHERAL_DIVISOR0 {20} \ + CONFIG.PCW_SDIO_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_SDIO_PERIPHERAL_VALID {1} \ + CONFIG.PCW_SINGLE_QSPI_DATA_MODE {x4} \ + CONFIG.PCW_SMC_CYCLE_T0 {NA} \ + CONFIG.PCW_SMC_CYCLE_T1 {NA} \ + CONFIG.PCW_SMC_CYCLE_T2 {NA} \ + CONFIG.PCW_SMC_CYCLE_T3 {NA} \ + CONFIG.PCW_SMC_CYCLE_T4 {NA} \ + CONFIG.PCW_SMC_CYCLE_T5 {NA} \ + CONFIG.PCW_SMC_CYCLE_T6 {NA} \ + CONFIG.PCW_SMC_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_SMC_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_SMC_PERIPHERAL_FREQMHZ {100} \ + CONFIG.PCW_SMC_PERIPHERAL_VALID {0} \ + CONFIG.PCW_SPI0_BASEADDR {0xE0006000} \ + CONFIG.PCW_SPI0_GRP_SS0_ENABLE {0} \ + CONFIG.PCW_SPI0_GRP_SS1_ENABLE {0} \ + CONFIG.PCW_SPI0_GRP_SS2_ENABLE {0} \ + CONFIG.PCW_SPI0_HIGHADDR {0xE0006FFF} \ + CONFIG.PCW_SPI0_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_SPI1_BASEADDR {0xE0007000} \ + CONFIG.PCW_SPI1_GRP_SS0_ENABLE {0} \ + CONFIG.PCW_SPI1_GRP_SS1_ENABLE {0} \ + CONFIG.PCW_SPI1_GRP_SS2_ENABLE {0} \ + CONFIG.PCW_SPI1_HIGHADDR {0xE0007FFF} \ + CONFIG.PCW_SPI1_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_SPI_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_SPI_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_SPI_PERIPHERAL_FREQMHZ {166.666666} \ + CONFIG.PCW_SPI_PERIPHERAL_VALID {0} \ + CONFIG.PCW_S_AXI_ACP_ARUSER_VAL {31} \ + CONFIG.PCW_S_AXI_ACP_AWUSER_VAL {31} \ + CONFIG.PCW_S_AXI_ACP_ID_WIDTH {3} \ + CONFIG.PCW_S_AXI_GP0_ID_WIDTH {6} \ + CONFIG.PCW_S_AXI_GP1_ID_WIDTH {6} \ + CONFIG.PCW_S_AXI_HP0_DATA_WIDTH {64} \ + CONFIG.PCW_S_AXI_HP0_ID_WIDTH {6} \ + CONFIG.PCW_S_AXI_HP1_DATA_WIDTH {64} \ + CONFIG.PCW_S_AXI_HP1_ID_WIDTH {6} \ + CONFIG.PCW_S_AXI_HP2_DATA_WIDTH {64} \ + CONFIG.PCW_S_AXI_HP2_ID_WIDTH {6} \ + CONFIG.PCW_S_AXI_HP3_DATA_WIDTH {64} \ + CONFIG.PCW_S_AXI_HP3_ID_WIDTH {6} \ + CONFIG.PCW_TPIU_PERIPHERAL_CLKSRC {External} \ + CONFIG.PCW_TPIU_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TPIU_PERIPHERAL_FREQMHZ {200} \ + CONFIG.PCW_TRACE_BUFFER_CLOCK_DELAY {12} \ + CONFIG.PCW_TRACE_BUFFER_FIFO_SIZE {128} \ + CONFIG.PCW_TRACE_PIPELINE_WIDTH {8} \ + CONFIG.PCW_TTC0_BASEADDR {0xE0104000} \ + CONFIG.PCW_TTC0_CLK0_PERIPHERAL_CLKSRC {CPU_1X} \ + CONFIG.PCW_TTC0_CLK0_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC0_CLK0_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK1_PERIPHERAL_CLKSRC {CPU_1X} \ + CONFIG.PCW_TTC0_CLK1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC0_CLK1_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK2_PERIPHERAL_CLKSRC {CPU_1X} \ + CONFIG.PCW_TTC0_CLK2_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC0_CLK2_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_HIGHADDR {0xE0104fff} \ + CONFIG.PCW_TTC1_BASEADDR {0xE0105000} \ + CONFIG.PCW_TTC1_CLK0_PERIPHERAL_CLKSRC {CPU_1X} \ + CONFIG.PCW_TTC1_CLK0_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC1_CLK0_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC1_CLK1_PERIPHERAL_CLKSRC {CPU_1X} \ + CONFIG.PCW_TTC1_CLK1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC1_CLK1_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC1_CLK2_PERIPHERAL_CLKSRC {CPU_1X} \ + CONFIG.PCW_TTC1_CLK2_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC1_CLK2_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC1_HIGHADDR {0xE0105fff} \ + CONFIG.PCW_TTC_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_UART0_BASEADDR {0xE0000000} \ + CONFIG.PCW_UART0_BAUD_RATE {115200} \ + CONFIG.PCW_UART0_GRP_FULL_ENABLE {0} \ + CONFIG.PCW_UART0_HIGHADDR {0xE0000FFF} \ + CONFIG.PCW_UART0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_UART0_UART0_IO {MIO 14 .. 15} \ + CONFIG.PCW_UART1_BASEADDR {0xE0001000} \ + CONFIG.PCW_UART1_BAUD_RATE {115200} \ + CONFIG.PCW_UART1_GRP_FULL_ENABLE {0} \ + CONFIG.PCW_UART1_HIGHADDR {0xE0001FFF} \ + CONFIG.PCW_UART1_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_UART_PERIPHERAL_CLKSRC {IO PLL} \ + CONFIG.PCW_UART_PERIPHERAL_DIVISOR0 {10} \ + CONFIG.PCW_UART_PERIPHERAL_FREQMHZ {100} \ + CONFIG.PCW_UART_PERIPHERAL_VALID {1} \ + CONFIG.PCW_UIPARAM_ACT_DDR_FREQ_MHZ {525.000000} \ + CONFIG.PCW_UIPARAM_DDR_ADV_ENABLE {0} \ + CONFIG.PCW_UIPARAM_DDR_AL {0} \ + CONFIG.PCW_UIPARAM_DDR_BANK_ADDR_COUNT {3} \ + CONFIG.PCW_UIPARAM_DDR_BL {8} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY0 {0.223} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY1 {0.212} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY2 {0.085} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY3 {0.092} \ + CONFIG.PCW_UIPARAM_DDR_BUS_WIDTH {16 Bit} \ + CONFIG.PCW_UIPARAM_DDR_CL {7} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_0_LENGTH_MM {25.8} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_0_PACKAGE_LENGTH {80.4535} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_0_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_1_LENGTH_MM {25.8} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_1_PACKAGE_LENGTH {80.4535} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_1_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_2_LENGTH_MM {0} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_2_PACKAGE_LENGTH {80.4535} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_2_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_3_LENGTH_MM {0} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_3_PACKAGE_LENGTH {80.4535} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_3_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_CLOCK_STOP_EN {0} \ + CONFIG.PCW_UIPARAM_DDR_COL_ADDR_COUNT {10} \ + CONFIG.PCW_UIPARAM_DDR_CWL {6} \ + CONFIG.PCW_UIPARAM_DDR_DEVICE_CAPACITY {4096 MBits} \ + CONFIG.PCW_UIPARAM_DDR_DQS_0_LENGTH_MM {15.6} \ + CONFIG.PCW_UIPARAM_DDR_DQS_0_PACKAGE_LENGTH {105.056} \ + CONFIG.PCW_UIPARAM_DDR_DQS_0_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DQS_1_LENGTH_MM {18.8} \ + CONFIG.PCW_UIPARAM_DDR_DQS_1_PACKAGE_LENGTH {66.904} \ + CONFIG.PCW_UIPARAM_DDR_DQS_1_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DQS_2_LENGTH_MM {0} \ + CONFIG.PCW_UIPARAM_DDR_DQS_2_PACKAGE_LENGTH {89.1715} \ + CONFIG.PCW_UIPARAM_DDR_DQS_2_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DQS_3_LENGTH_MM {0} \ + CONFIG.PCW_UIPARAM_DDR_DQS_3_PACKAGE_LENGTH {113.63} \ + CONFIG.PCW_UIPARAM_DDR_DQS_3_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_0 {0.040} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_1 {0.058} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_2 {-0.009} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_3 {-0.033} \ + CONFIG.PCW_UIPARAM_DDR_DQ_0_LENGTH_MM {16.5} \ + CONFIG.PCW_UIPARAM_DDR_DQ_0_PACKAGE_LENGTH {98.503} \ + CONFIG.PCW_UIPARAM_DDR_DQ_0_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DQ_1_LENGTH_MM {18} \ + CONFIG.PCW_UIPARAM_DDR_DQ_1_PACKAGE_LENGTH {68.5855} \ + CONFIG.PCW_UIPARAM_DDR_DQ_1_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DQ_2_LENGTH_MM {0} \ + CONFIG.PCW_UIPARAM_DDR_DQ_2_PACKAGE_LENGTH {90.295} \ + CONFIG.PCW_UIPARAM_DDR_DQ_2_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DQ_3_LENGTH_MM {0} \ + CONFIG.PCW_UIPARAM_DDR_DQ_3_PACKAGE_LENGTH {103.977} \ + CONFIG.PCW_UIPARAM_DDR_DQ_3_PROPOGATION_DELAY {160} \ + CONFIG.PCW_UIPARAM_DDR_DRAM_WIDTH {16 Bits} \ + CONFIG.PCW_UIPARAM_DDR_ECC {Disabled} \ + CONFIG.PCW_UIPARAM_DDR_ENABLE {1} \ + CONFIG.PCW_UIPARAM_DDR_FREQ_MHZ {525} \ + CONFIG.PCW_UIPARAM_DDR_HIGH_TEMP {Normal (0-85)} \ + CONFIG.PCW_UIPARAM_DDR_MEMORY_TYPE {DDR 3} \ + CONFIG.PCW_UIPARAM_DDR_PARTNO {MT41J256M16 RE-125} \ + CONFIG.PCW_UIPARAM_DDR_ROW_ADDR_COUNT {15} \ + CONFIG.PCW_UIPARAM_DDR_SPEED_BIN {DDR3_1066F} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_DATA_EYE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_READ_GATE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_WRITE_LEVEL {1} \ + CONFIG.PCW_UIPARAM_DDR_T_FAW {40.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RAS_MIN {35.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RC {48.91} \ + CONFIG.PCW_UIPARAM_DDR_T_RCD {7} \ + CONFIG.PCW_UIPARAM_DDR_T_RP {7} \ + CONFIG.PCW_UIPARAM_DDR_USE_INTERNAL_VREF {0} \ + CONFIG.PCW_UIPARAM_GENERATE_SUMMARY {NA} \ + CONFIG.PCW_USB0_BASEADDR {0xE0102000} \ + CONFIG.PCW_USB0_HIGHADDR {0xE0102fff} \ + CONFIG.PCW_USB0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_USB0_PERIPHERAL_FREQMHZ {60} \ + CONFIG.PCW_USB0_RESET_ENABLE {1} \ + CONFIG.PCW_USB0_RESET_IO {MIO 46} \ + CONFIG.PCW_USB0_USB0_IO {MIO 28 .. 39} \ + CONFIG.PCW_USB1_BASEADDR {0xE0103000} \ + CONFIG.PCW_USB1_HIGHADDR {0xE0103fff} \ + CONFIG.PCW_USB1_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_USB1_PERIPHERAL_FREQMHZ {60} \ + CONFIG.PCW_USB1_RESET_ENABLE {0} \ + CONFIG.PCW_USB_RESET_ENABLE {1} \ + CONFIG.PCW_USB_RESET_POLARITY {Active Low} \ + CONFIG.PCW_USB_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_USE_AXI_FABRIC_IDLE {0} \ + CONFIG.PCW_USE_AXI_NONSECURE {0} \ + CONFIG.PCW_USE_CORESIGHT {0} \ + CONFIG.PCW_USE_CROSS_TRIGGER {0} \ + CONFIG.PCW_USE_CR_FABRIC {1} \ + CONFIG.PCW_USE_DDR_BYPASS {0} \ + CONFIG.PCW_USE_DEBUG {0} \ + CONFIG.PCW_USE_DEFAULT_ACP_USER_VAL {0} \ + CONFIG.PCW_USE_DMA0 {0} \ + CONFIG.PCW_USE_DMA1 {0} \ + CONFIG.PCW_USE_DMA2 {0} \ + CONFIG.PCW_USE_DMA3 {0} \ + CONFIG.PCW_USE_EXPANDED_IOP {0} \ + CONFIG.PCW_USE_EXPANDED_PS_SLCR_REGISTERS {0} \ + CONFIG.PCW_USE_FABRIC_INTERRUPT {0} \ + CONFIG.PCW_USE_HIGH_OCM {0} \ + CONFIG.PCW_USE_M_AXI_GP0 {1} \ + CONFIG.PCW_USE_M_AXI_GP1 {0} \ + CONFIG.PCW_USE_PROC_EVENT_BUS {0} \ + CONFIG.PCW_USE_PS_SLCR_REGISTERS {0} \ + CONFIG.PCW_USE_S_AXI_ACP {0} \ + CONFIG.PCW_USE_S_AXI_GP0 {0} \ + CONFIG.PCW_USE_S_AXI_GP1 {0} \ + CONFIG.PCW_USE_S_AXI_HP0 {1} \ + CONFIG.PCW_USE_S_AXI_HP1 {1} \ + CONFIG.PCW_USE_S_AXI_HP2 {1} \ + CONFIG.PCW_USE_S_AXI_HP3 {0} \ + CONFIG.PCW_USE_TRACE {0} \ + CONFIG.PCW_USE_TRACE_DATA_EDGE_DETECTOR {0} \ + CONFIG.PCW_VALUE_SILVERSION {3} \ + CONFIG.PCW_WDT_PERIPHERAL_CLKSRC {CPU_1X} \ + CONFIG.PCW_WDT_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_WDT_PERIPHERAL_FREQMHZ {133.333333} \ + ] $processing_system7_0 + + # Create instance: ps7_0_axi_periph, and set properties + set ps7_0_axi_periph [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 ps7_0_axi_periph ] + set_property -dict [ list \ + CONFIG.NUM_MI {4} \ + ] $ps7_0_axi_periph + + # Create instance: rst_ps7_0_100M, and set properties + set rst_ps7_0_100M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ps7_0_100M ] + + # Create instance: u_dma, and set properties + set u_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 u_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {26} \ + ] $u_dma + + # Create instance: x_dma, and set properties + set x_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 x_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {26} \ + ] $x_dma + + # Create instance: xu_dma, and set properties + set xu_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 xu_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s {0} \ + CONFIG.c_include_s2mm {1} \ + CONFIG.c_include_s2mm_dre {1} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_s2mm_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {26} \ + ] $xu_dma + + # Create interface connections + connect_bd_intf_net -intf_net HlsAxisKernelU_0_xu_port [get_bd_intf_pins HlsAxisKernelU_0/xu_port] [get_bd_intf_pins xu_dma/S_AXIS_S2MM] + connect_bd_intf_net -intf_net axi_mem_intercon_1_M00_AXI [get_bd_intf_pins axi_mem_intercon_1/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP1] + connect_bd_intf_net -intf_net axi_mem_intercon_2_M00_AXI [get_bd_intf_pins axi_mem_intercon_2/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP2] + connect_bd_intf_net -intf_net axi_mem_intercon_M00_AXI [get_bd_intf_pins axi_mem_intercon/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP0] + connect_bd_intf_net -intf_net processing_system7_0_DDR [get_bd_intf_ports DDR] [get_bd_intf_pins processing_system7_0/DDR] + connect_bd_intf_net -intf_net processing_system7_0_FIXED_IO [get_bd_intf_ports FIXED_IO] [get_bd_intf_pins processing_system7_0/FIXED_IO] + connect_bd_intf_net -intf_net processing_system7_0_M_AXI_GP0 [get_bd_intf_pins processing_system7_0/M_AXI_GP0] [get_bd_intf_pins ps7_0_axi_periph/S00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M00_AXI [get_bd_intf_pins HlsAxisKernelU_0/s_axi_control] [get_bd_intf_pins ps7_0_axi_periph/M00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M01_AXI [get_bd_intf_pins ps7_0_axi_periph/M01_AXI] [get_bd_intf_pins x_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M02_AXI [get_bd_intf_pins ps7_0_axi_periph/M02_AXI] [get_bd_intf_pins u_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M03_AXI [get_bd_intf_pins ps7_0_axi_periph/M03_AXI] [get_bd_intf_pins xu_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net u_dma_M_AXIS_MM2S [get_bd_intf_pins HlsAxisKernelU_0/u_port] [get_bd_intf_pins u_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net u_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon_1/S00_AXI] [get_bd_intf_pins u_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net x_dma_M_AXIS_MM2S [get_bd_intf_pins HlsAxisKernelU_0/x_port] [get_bd_intf_pins x_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net x_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon/S00_AXI] [get_bd_intf_pins x_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net xu_dma_M_AXI_S2MM [get_bd_intf_pins axi_mem_intercon_2/S00_AXI] [get_bd_intf_pins xu_dma/M_AXI_S2MM] + + # Create port connections + connect_bd_net -net processing_system7_0_FCLK_CLK0 [get_bd_pins HlsAxisKernelU_0/ap_clk] [get_bd_pins axi_mem_intercon/ACLK] [get_bd_pins axi_mem_intercon/M00_ACLK] [get_bd_pins axi_mem_intercon/S00_ACLK] [get_bd_pins axi_mem_intercon_1/ACLK] [get_bd_pins axi_mem_intercon_1/M00_ACLK] [get_bd_pins axi_mem_intercon_1/S00_ACLK] [get_bd_pins axi_mem_intercon_2/ACLK] [get_bd_pins axi_mem_intercon_2/M00_ACLK] [get_bd_pins axi_mem_intercon_2/S00_ACLK] [get_bd_pins processing_system7_0/FCLK_CLK0] [get_bd_pins processing_system7_0/M_AXI_GP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP1_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP2_ACLK] [get_bd_pins ps7_0_axi_periph/ACLK] [get_bd_pins ps7_0_axi_periph/M00_ACLK] [get_bd_pins ps7_0_axi_periph/M01_ACLK] [get_bd_pins ps7_0_axi_periph/M02_ACLK] [get_bd_pins ps7_0_axi_periph/M03_ACLK] [get_bd_pins ps7_0_axi_periph/S00_ACLK] [get_bd_pins rst_ps7_0_100M/slowest_sync_clk] [get_bd_pins u_dma/m_axi_mm2s_aclk] [get_bd_pins u_dma/s_axi_lite_aclk] [get_bd_pins x_dma/m_axi_mm2s_aclk] [get_bd_pins x_dma/s_axi_lite_aclk] [get_bd_pins xu_dma/m_axi_s2mm_aclk] [get_bd_pins xu_dma/s_axi_lite_aclk] + connect_bd_net -net processing_system7_0_FCLK_RESET0_N [get_bd_pins processing_system7_0/FCLK_RESET0_N] [get_bd_pins rst_ps7_0_100M/ext_reset_in] + connect_bd_net -net rst_ps7_0_100M_peripheral_aresetn [get_bd_pins HlsAxisKernelU_0/ap_rst_n] [get_bd_pins axi_mem_intercon/ARESETN] [get_bd_pins axi_mem_intercon/M00_ARESETN] [get_bd_pins axi_mem_intercon/S00_ARESETN] [get_bd_pins axi_mem_intercon_1/ARESETN] [get_bd_pins axi_mem_intercon_1/M00_ARESETN] [get_bd_pins axi_mem_intercon_1/S00_ARESETN] [get_bd_pins axi_mem_intercon_2/ARESETN] [get_bd_pins axi_mem_intercon_2/M00_ARESETN] [get_bd_pins axi_mem_intercon_2/S00_ARESETN] [get_bd_pins ps7_0_axi_periph/ARESETN] [get_bd_pins ps7_0_axi_periph/M00_ARESETN] [get_bd_pins ps7_0_axi_periph/M01_ARESETN] [get_bd_pins ps7_0_axi_periph/M02_ARESETN] [get_bd_pins ps7_0_axi_periph/M03_ARESETN] [get_bd_pins ps7_0_axi_periph/S00_ARESETN] [get_bd_pins rst_ps7_0_100M/peripheral_aresetn] [get_bd_pins u_dma/axi_resetn] [get_bd_pins x_dma/axi_resetn] [get_bd_pins xu_dma/axi_resetn] + + # Create address segments + create_bd_addr_seg -range 0x00010000 -offset 0x40000000 [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs HlsAxisKernelU_0/s_axi_control/Reg] SEG_HlsAxisKernelU_0_Reg + create_bd_addr_seg -range 0x00010000 -offset 0x40410000 [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs u_dma/S_AXI_LITE/Reg] SEG_u_dma_Reg + create_bd_addr_seg -range 0x00010000 -offset 0x40400000 [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs x_dma/S_AXI_LITE/Reg] SEG_x_dma_Reg + create_bd_addr_seg -range 0x00010000 -offset 0x40420000 [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs xu_dma/S_AXI_LITE/Reg] SEG_xu_dma_Reg + create_bd_addr_seg -range 0x20000000 -offset 0x00000000 [get_bd_addr_spaces u_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP1/HP1_DDR_LOWOCM] SEG_processing_system7_0_HP1_DDR_LOWOCM + create_bd_addr_seg -range 0x20000000 -offset 0x00000000 [get_bd_addr_spaces x_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP0/HP0_DDR_LOWOCM] SEG_processing_system7_0_HP0_DDR_LOWOCM + create_bd_addr_seg -range 0x20000000 -offset 0x00000000 [get_bd_addr_spaces xu_dma/Data_S2MM] [get_bd_addr_segs processing_system7_0/S_AXI_HP2/HP2_DDR_LOWOCM] SEG_processing_system7_0_HP2_DDR_LOWOCM + + + # Restore current instance + current_bd_instance $oldCurInst + + validate_bd_design + save_bd_design +} +# End of create_root_design() + + +################################################################## +# MAIN FLOW +################################################################## + +create_root_design "" + + diff --git a/pynq/kernel_v/binfile_example.bin b/pynq/kernel_v/binfile_example.bin new file mode 100644 index 0000000..8c01583 Binary files /dev/null and b/pynq/kernel_v/binfile_example.bin differ diff --git a/pynq/kernel_v/kernel_v.ipynb b/pynq/kernel_v/kernel_v.ipynb new file mode 100644 index 0000000..1c0e1f3 --- /dev/null +++ b/pynq/kernel_v/kernel_v.ipynb @@ -0,0 +1,794 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Testing Kernel-V\n", + "\n", + "This notebook will test an IP written in Vivado HLS." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%pybind11/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from pynq import Overlay\n", + "import pynq.lib.dma\n", + "from pynq import allocate\n", + "import numpy as np\n", + "from pynq import DefaultIP\n", + "import timeit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Program FPGA and inspect Overlay." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "overlay = Overlay('overlay/kernel_v.bit')\n", + "print(overlay.device)\n", + "overlay?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the kernel register map." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RegisterMap {\n", + " CTRL = Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0),\n", + " GIER = Register(Enable=0, RESERVED=0),\n", + " IP_IER = Register(CHAN0_INT_EN=0, CHAN1_INT_EN=0, RESERVED=0),\n", + " IP_ISR = Register(CHAN0_INT_ST=0, CHAN1_INT_ST=0, RESERVED=0),\n", + " num_active_inputs = Register(num_active_inputs=0),\n", + " output_size = Register(output_size=0),\n", + " num_refinements_0 = Register(num_refinements_0=0),\n", + " num_refinements_1 = Register(num_refinements_1=0)\n", + "}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel = overlay.HlsKernelV_0\n", + "kernel.register_map\n", + "# print(\"stream size: \", adder.stream_size)\n", + "# accel_state = adder.get_state()\n", + "# print(\"accelerator state: \", accel_state)\n", + "# dma = overlay.axi_dma_0\n", + "# dma.register_map.MM2S_DMASR\n", + "# dma.register_map.S2MM_DMACR" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Kernel IP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The kernel IP can be automatically bound by first creating our Kernel class. Then, the overlay can be instantiated again." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "class KernelDriver(DefaultIP):\n", + " def __init__(self, description):\n", + " super().__init__(description=description)\n", + " \n", + " bindto = ['xilinx.com:hls:HlsKernelV:1.0']\n", + "\n", + " def start_accel(self):\n", + " self.register_map.CTRL.AP_START = 1\n", + " self.write(0x0, 1)\n", + " self.write(0x0, 1)\n", + " while(self.read(0x0) % 2 == 0):\n", + " self.write(0x0, 1)\n", + " pass # Wait until start, i.e. bit 0, is set.\n", + "\n", + " def set_state(self, state):\n", + " # self.register_map.CTRL = state\n", + " # return self.register_map.CTRL\n", + " self.write(0x0, state)\n", + " return self.read(0x0)\n", + "\n", + " def get_state(self):\n", + " return self.register_map.CTRL\n", + " # return self.read(0x0)\n", + "\n", + " @property\n", + " def num_refinements(self):\n", + " return (self.register_map.num_refinements_0, self.register_map.num_refinements_1)\n", + " # return self.read(0x10)\n", + "\n", + " @num_refinements.setter\n", + " def num_refinements(self, R):\n", + " self.register_map.num_refinements_0 = R[0]\n", + " self.register_map.num_refinements_1 = R[1]\n", + " # self.write(0x10, R)\n", + "\n", + " @property\n", + " def num_active_inputs(self):\n", + " return self.register_map.num_active_inputs\n", + " # return self.read(0x10)\n", + "\n", + " @num_active_inputs.setter\n", + " def num_active_inputs(self, N):\n", + " self.register_map.num_active_inputs = N\n", + " # self.write(0x10, R)\n", + "\n", + " @property\n", + " def output_size(self):\n", + " return self.register_map.output_size\n", + " # return self.read(0x10)\n", + "\n", + " @output_size.setter\n", + " def output_size(self, H):\n", + " self.register_map.output_size = H\n", + " # self.write(0x10, R)\n", + "\n", + "overlay = Overlay(\"overlay/kernel_v.bit\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check again the kernel:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel_v = overlay.HlsKernelV_0\n", + "kernel_v.get_state()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernel_v.read(0x10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To show the class is working, we setup the `num_refinements` using the setter method. We then read its corresponding register." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(Register(num_refinements_0=0), Register(num_refinements_1=0))\n", + "(Register(num_refinements_0=1), Register(num_refinements_1=1))\n" + ] + } + ], + "source": [ + "print(kernel_v.num_refinements)\n", + "kernel_v.num_refinements = (1, 1)\n", + "print(kernel_v.num_refinements)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x4\n", + "0x4\n" + ] + }, + { + "data": { + "text/plain": [ + "Register(AP_START=0, AP_DONE=0, AP_IDLE=1, AP_READY=0, RESERVED_1=0, AUTO_RESTART=0, RESERVED_2=0, RESERVED_3=0, RESERVED_4=0)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(kernel_v.get_state())\n", + "# kernel_u.start_accel()\n", + "print(kernel_v.get_state())\n", + "kernel_v.get_state()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0x0\n", + "0x2\n" + ] + } + ], + "source": [ + "print(kernel_v.num_active_inputs)\n", + "kernel_v.num_active_inputs = 2\n", + "print(kernel_v.num_active_inputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Allocation and Run\n", + "\n", + "The data structures must be contiguosly allocated." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Buffers setup completed.\n", + "xus_buffer.shape: (16, 1, 4) - Bytes: 128\n", + "v_buffer.shape: (16, 64, 4, 4) - Bytes: 32768\n", + "y_buffer.shape: (64, 1, 4, 4) - Bytes: 2048\n" + ] + } + ], + "source": [ + "# The following parameters are fixed in hardware and cannot be changed:\n", + "# - The maximum output size H\n", + "# - The number of gates G\n", + "# - The tile size Tv\n", + "H = 512\n", + "G = 4\n", + "Tv = 4\n", + "data_t = np.int16\n", + "# The following parameters are customizeable in hardware and can be changed:\n", + "# - The number of refinements R\n", + "# - The output_size <= H\n", + "# - The number of active_inputs <= N\n", + "R = 16\n", + "N = 1\n", + "output_size = 256 # % H\n", + "# NOTE: Working with (R, N, out) == (16, 2, 8) \n", + "\n", + "xus = np.random.randn(R, N, G).astype(dtype=data_t)\n", + "v = np.random.randn(R, output_size // Tv, G, Tv).astype(dtype=data_t)\n", + "y = np.zeros((output_size // Tv, N, Tv, G)).astype(dtype=data_t)\n", + "\n", + "xus_buffer = pynq.allocate(shape=(R, N, G), dtype=data_t)\n", + "v_buffer = pynq.allocate(shape=(R, output_size // Tv, G, Tv), dtype=data_t)\n", + "y_buffer = pynq.allocate(shape=(output_size // Tv, N, Tv, G), dtype=data_t)\n", + "\n", + "np.copyto(xus_buffer, xus, casting='no')\n", + "np.copyto(v_buffer, v, casting='no')\n", + "np.copyto(y_buffer, y, casting='no')\n", + "\n", + "print('Buffers setup completed.')\n", + "print(f'xus_buffer.shape: {xus_buffer.shape} - Bytes: {xus_buffer.nbytes}')\n", + "print(f'v_buffer.shape: {v_buffer.shape} - Bytes: {v_buffer.nbytes}')\n", + "print(f'y_buffer.shape: {y_buffer.shape} - Bytes: {y_buffer.nbytes}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Setup the kernel and then send the data through the DMAs." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0xa\n", + "0x1\n", + "Starting transfer:\n", + "Wait xus...DONE.\n", + "Wait v...DONE.\n", + "Wait y...DONE.\n", + "\n", + "y_buffer.shape: (64, 1, 4, 4)\n" + ] + } + ], + "source": [ + "kernel_v.num_refinements = (R, R // 4)\n", + "kernel_v.output_size = output_size\n", + "kernel_v.num_active_inputs = N\n", + "print(kernel_v.get_state())\n", + "kernel_v.start_accel()\n", + "print(kernel_v.get_state())\n", + "\n", + "# Transfer\n", + "print('Starting transfer:')\n", + "overlay.xus_dma.sendchannel.transfer(xus_buffer)\n", + "overlay.v_dma.sendchannel.transfer(v_buffer)\n", + "overlay.y_dma.recvchannel.transfer(y_buffer)\n", + "# Then wait\n", + "print('Wait xus...', end='')\n", + "overlay.xus_dma.sendchannel.wait()\n", + "print('DONE.\\nWait v...', end='')\n", + "overlay.v_dma.sendchannel.wait()\n", + "print('DONE.\\nWait y...', end='')\n", + "overlay.y_dma.recvchannel.wait()\n", + "print('DONE.\\n')\n", + "\n", + "print(f'y_buffer.shape: {y_buffer.shape}')\n", + "# print(f'xu_buffer: {xu_buffer}')" + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "metadata": {}, + "outputs": [], + "source": [ + "def run_kernel(R, x_buffer, u_buffer, xu_buffer):\n", + " kernel_u.num_refinements = R\n", + " kernel_u.start_accel()\n", + " # Transfer\n", + " overlay.x_dma.sendchannel.transfer(x_buffer)\n", + " overlay.u_dma.sendchannel.transfer(u_buffer)\n", + " overlay.xu_dma.recvchannel.transfer(xu_buffer)\n", + " # Then wait\n", + " overlay.x_dma.sendchannel.wait()\n", + " overlay.u_dma.sendchannel.wait()\n", + " overlay.xu_dma.recvchannel.wait()" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 loops, best of 3: 80.5 ms per loop\n" + ] + } + ], + "source": [ + "%timeit run_kernel(R, x_buffer, u_buffer, xu_buffer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checking Correctness\n", + "\n", + "We first find the proper reshape mechanisms:" + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0.36593539 -1.03844877 0.82985754 -0.82067175] [ 0.36593539 -1.03844877 0.82985754 -0.82067175]\n", + "0.0\n", + "[-0.07974188 0.01109454 -0.18120697 0.73842526] [-0.07974188 0.01109454 -0.18120697 0.73842526]\n", + "0.0\n", + "10 loops, best of 3: 24.1 ms per loop\n", + "(128, 4, 2)\n" + ] + } + ], + "source": [ + "# =============================================================================\n", + "# Reshape: (R, I, G) => (R, I // Tu, G, Tu)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I, G)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp.reshape(R, I // Tu, Tu, G), (0, 1, 3, 2))\n", + "print(u[0, 0:4, 0], u_tmp[0, 0, 0, 0:4])\n", + "print(u[0, 3, 0] - u_tmp[0, 0, 0, 3])\n", + "\n", + "# =============================================================================\n", + "# Reshape: (R, I // Tu, G, Tu) => (I, G, R)\n", + "# =============================================================================\n", + "u = np.random.randn(R, I // Tu, G, Tu)\n", + "u_tmp = u.copy()\n", + "u_tmp = np.transpose(u_tmp, (1, 3, 2, 0)).reshape(I, G, R)\n", + "print(u[0, 0, 0, 0:4], u_tmp[0:4, 0, 0])\n", + "print(u[0, 0, 0, 3] - u_tmp[3, 0, 0])\n", + "\n", + "x = np.random.randn(N, I)\n", + "u = np.random.randn(I, G, R)\n", + "x = (x * 2).astype(np.int16)\n", + "u = (u * 2).astype(np.int16)\n", + "\n", + "%timeit xu = np.transpose(np.tensordot(x, u, axes=1), (2, 1, 0))\n", + "print(xu.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now check the Numpy computation against the FPGA result." + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10 loops, best of 3: 105 ms per loop\n", + "\n", + "All equal: True\n", + "gold[0]: [[ 8822 -32153]\n", + " [-17540 6635]\n", + " [ 6489 5700]\n", + " [ 11839 25184]]\n", + "fpga[0]: [[ 8822 -32153]\n", + " [-17540 6635]\n", + " [ 6489 5700]\n", + " [ 11839 25184]]\n" + ] + } + ], + "source": [ + "u_tmp = np.transpose(u_buffer, (1, 3, 2, 0)).reshape(I, G, R)\n", + "%timeit xu_gold = np.transpose(np.tensordot(x_buffer, u_tmp, axes=1), (2, 1, 0))\n", + "print('\\nAll equal:', np.allclose(xu_buffer, xu_gold))\n", + "print('gold[0]: ', xu_gold[0])\n", + "print('fpga[0]: ', xu_buffer[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading File into PYNQ Buffer" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "tmp = np.random.randn(R, N, G).astype(dtype=data_t)\n", + "tmp.tofile('binfile_example.bin')" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PynqBuffer([[[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]],\n", + "\n", + " [[0, 0, 0, 0]]], dtype=int16)" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xus_buffer = pynq.allocate(shape=(R, N, G), dtype=data_t)\n", + "xus_buffer" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PynqBuffer([[[-1, 0, -1, 0]],\n", + "\n", + " [[ 0, -2, 0, -1]],\n", + "\n", + " [[ 0, 0, 2, 0]],\n", + "\n", + " [[ 0, -1, 0, 0]],\n", + "\n", + " [[ 0, 0, 0, 0]],\n", + "\n", + " [[ 0, 1, 0, 0]],\n", + "\n", + " [[ 0, 0, 0, 0]],\n", + "\n", + " [[-1, 0, 0, 0]],\n", + "\n", + " [[ 0, 0, -1, 0]],\n", + "\n", + " [[ 0, -1, 0, 0]],\n", + "\n", + " [[ 1, 0, 0, 0]],\n", + "\n", + " [[ 0, 0, -1, 0]],\n", + "\n", + " [[-1, 0, -1, 0]],\n", + "\n", + " [[ 0, 0, 0, 0]],\n", + "\n", + " [[ 0, -1, -1, -1]],\n", + "\n", + " [[-1, 0, 0, 0]]], dtype=int16)" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tmp = np.fromfile('binfile_example.bin', dtype=data_t).reshape(xus_buffer.shape)\n", + "np.copyto(xus_buffer, tmp, casting='no')\n", + "xus_buffer" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "def load_from_bin(binfile, shape, dtype):\n", + " tmp_buffer = pynq.allocate(shape=shape, dtype=dtype)\n", + " tmp = np.fromfile(binfile, dtype=data_t).reshape(tmp_buffer.shape)\n", + " np.copyto(tmp_buffer, tmp, casting='no')\n", + " return tmp_buffer" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PynqBuffer([[[-1, 0, -1, 0]],\n", + "\n", + " [[ 0, -2, 0, -1]],\n", + "\n", + " [[ 0, 0, 2, 0]],\n", + "\n", + " [[ 0, -1, 0, 0]],\n", + "\n", + " [[ 0, 0, 0, 0]],\n", + "\n", + " [[ 0, 1, 0, 0]],\n", + "\n", + " [[ 0, 0, 0, 0]],\n", + "\n", + " [[-1, 0, 0, 0]],\n", + "\n", + " [[ 0, 0, -1, 0]],\n", + "\n", + " [[ 0, -1, 0, 0]],\n", + "\n", + " [[ 1, 0, 0, 0]],\n", + "\n", + " [[ 0, 0, -1, 0]],\n", + "\n", + " [[-1, 0, -1, 0]],\n", + "\n", + " [[ 0, 0, 0, 0]],\n", + "\n", + " [[ 0, -1, -1, -1]],\n", + "\n", + " [[-1, 0, 0, 0]]], dtype=int16)" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xus_buffer = load_from_bin('binfile_example.bin', shape=(R, N, G), dtype=data_t)\n", + "xus_buffer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pynq/kernel_v/overlay/kernel_v.bit b/pynq/kernel_v/overlay/kernel_v.bit new file mode 100644 index 0000000..c391107 Binary files /dev/null and b/pynq/kernel_v/overlay/kernel_v.bit differ diff --git a/pynq/kernel_v/overlay/kernel_v.hwh b/pynq/kernel_v/overlay/kernel_v.hwh new file mode 100644 index 0000000..541a112 --- /dev/null +++ b/pynq/kernel_v/overlay/kernel_v.hwh @@ -0,0 +1,9180 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/pynq/kernel_v/overlay/kernel_v.tcl b/pynq/kernel_v/overlay/kernel_v.tcl new file mode 100644 index 0000000..541d234 --- /dev/null +++ b/pynq/kernel_v/overlay/kernel_v.tcl @@ -0,0 +1,717 @@ + +################################################################ +# This is a generated script based on design: design_1 +# +# Though there are limitations about the generated script, +# the main purpose of this utility is to make learning +# IP Integrator Tcl commands easier. +################################################################ + +namespace eval _tcl { +proc get_script_folder {} { + set script_path [file normalize [info script]] + set script_folder [file dirname $script_path] + return $script_folder +} +} +variable script_folder +set script_folder [_tcl::get_script_folder] + +################################################################ +# Check if script is running in correct Vivado version. +################################################################ +set scripts_vivado_version 2020.2 +set current_vivado_version [version -short] + +if { [string first $scripts_vivado_version $current_vivado_version] == -1 } { + puts "" + catch {common::send_gid_msg -ssname BD::TCL -id 2041 -severity "ERROR" "This script was generated using Vivado <$scripts_vivado_version> and is being run in <$current_vivado_version> of Vivado. Please run the script in Vivado <$scripts_vivado_version> then open the design in Vivado <$current_vivado_version>. Upgrade the design by running \"Tools => Report => Report IP Status...\", then run write_bd_tcl to create an updated script."} + + return 1 +} + +################################################################ +# START +################################################################ + +# To test this script, run the following commands from Vivado Tcl console: +# source design_1_script.tcl + +# If there is no project opened, this script will create a +# project, but make sure you do not have an existing project +# <./myproj/project_1.xpr> in the current working folder. + +set list_projs [get_projects -quiet] +if { $list_projs eq "" } { + create_project project_1 myproj -part xc7z020clg484-1 + set_property BOARD_PART em.avnet.com:zed:part0:1.4 [current_project] +} + + +# CHANGE DESIGN NAME HERE +variable design_name +set design_name design_1 + +# If you do not already have an existing IP Integrator design open, +# you can create a design using the following command: +# create_bd_design $design_name + +# Creating design if needed +set errMsg "" +set nRet 0 + +set cur_design [current_bd_design -quiet] +set list_cells [get_bd_cells -quiet] + +if { ${design_name} eq "" } { + # USE CASES: + # 1) Design_name not set + + set errMsg "Please set the variable to a non-empty value." + set nRet 1 + +} elseif { ${cur_design} ne "" && ${list_cells} eq "" } { + # USE CASES: + # 2): Current design opened AND is empty AND names same. + # 3): Current design opened AND is empty AND names diff; design_name NOT in project. + # 4): Current design opened AND is empty AND names diff; design_name exists in project. + + if { $cur_design ne $design_name } { + common::send_gid_msg -ssname BD::TCL -id 2001 -severity "INFO" "Changing value of from <$design_name> to <$cur_design> since current design is empty." + set design_name [get_property NAME $cur_design] + } + common::send_gid_msg -ssname BD::TCL -id 2002 -severity "INFO" "Constructing design in IPI design <$cur_design>..." + +} elseif { ${cur_design} ne "" && $list_cells ne "" && $cur_design eq $design_name } { + # USE CASES: + # 5) Current design opened AND has components AND same names. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 1 +} elseif { [get_files -quiet ${design_name}.bd] ne "" } { + # USE CASES: + # 6) Current opened design, has components, but diff names, design_name exists in project. + # 7) No opened design, design_name exists in project. + + set errMsg "Design <$design_name> already exists in your project, please set the variable to another value." + set nRet 2 + +} else { + # USE CASES: + # 8) No opened design, design_name not in project. + # 9) Current opened design, has components, but diff names, design_name not in project. + + common::send_gid_msg -ssname BD::TCL -id 2003 -severity "INFO" "Currently there is no design <$design_name> in project, so creating one..." + + create_bd_design $design_name + + common::send_gid_msg -ssname BD::TCL -id 2004 -severity "INFO" "Making design <$design_name> as current_bd_design." + current_bd_design $design_name + +} + +common::send_gid_msg -ssname BD::TCL -id 2005 -severity "INFO" "Currently the variable is equal to \"$design_name\"." + +if { $nRet != 0 } { + catch {common::send_gid_msg -ssname BD::TCL -id 2006 -severity "ERROR" $errMsg} + return $nRet +} + +set bCheckIPsPassed 1 +################################################################## +# CHECK IPs +################################################################## +set bCheckIPs 1 +if { $bCheckIPs == 1 } { + set list_check_ips "\ +xilinx.com:hls:HlsKernelV:1.0\ +xilinx.com:ip:processing_system7:5.5\ +xilinx.com:ip:proc_sys_reset:5.0\ +xilinx.com:ip:axi_dma:7.1\ +" + + set list_ips_missing "" + common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + +} + +if { $bCheckIPsPassed != 1 } { + common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 +} + +################################################################## +# DESIGN PROCs +################################################################## + + + +# Procedure to create entire design; Provide argument to make +# procedure reusable. If parentCell is "", will use root. +proc create_root_design { parentCell } { + + variable script_folder + variable design_name + + if { $parentCell eq "" } { + set parentCell [get_bd_cells /] + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + + # Create interface ports + set DDR [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:ddrx_rtl:1.0 DDR ] + + set FIXED_IO [ create_bd_intf_port -mode Master -vlnv xilinx.com:display_processing_system7:fixedio_rtl:1.0 FIXED_IO ] + + + # Create ports + + # Create instance: HlsKernelV_0, and set properties + set HlsKernelV_0 [ create_bd_cell -type ip -vlnv xilinx.com:hls:HlsKernelV:1.0 HlsKernelV_0 ] + + # Create instance: axi_mem_intercon, and set properties + set axi_mem_intercon [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon + + # Create instance: axi_mem_intercon_1, and set properties + set axi_mem_intercon_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_1 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_1 + + # Create instance: axi_mem_intercon_2, and set properties + set axi_mem_intercon_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 axi_mem_intercon_2 ] + set_property -dict [ list \ + CONFIG.NUM_MI {1} \ + ] $axi_mem_intercon_2 + + # Create instance: processing_system7_0, and set properties + set processing_system7_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 processing_system7_0 ] + set_property -dict [ list \ + CONFIG.PCW_ACT_APU_PERIPHERAL_FREQMHZ {666.666687} \ + CONFIG.PCW_ACT_CAN_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_DCI_PERIPHERAL_FREQMHZ {10.158730} \ + CONFIG.PCW_ACT_ENET0_PERIPHERAL_FREQMHZ {125.000000} \ + CONFIG.PCW_ACT_ENET1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA0_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_ACT_FPGA1_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA2_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_FPGA3_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_PCAP_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_QSPI_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_SDIO_PERIPHERAL_FREQMHZ {50.000000} \ + CONFIG.PCW_ACT_SMC_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_SPI_PERIPHERAL_FREQMHZ {10.000000} \ + CONFIG.PCW_ACT_TPIU_PERIPHERAL_FREQMHZ {200.000000} \ + CONFIG.PCW_ACT_TTC0_CLK0_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC0_CLK1_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC0_CLK2_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK0_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK1_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_TTC1_CLK2_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_ACT_UART_PERIPHERAL_FREQMHZ {50.000000} \ + CONFIG.PCW_ACT_WDT_PERIPHERAL_FREQMHZ {111.111115} \ + CONFIG.PCW_APU_PERIPHERAL_FREQMHZ {666.666667} \ + CONFIG.PCW_ARMPLL_CTRL_FBDIV {40} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_CAN_PERIPHERAL_FREQMHZ {100} \ + CONFIG.PCW_CLK0_FREQ {100000000} \ + CONFIG.PCW_CLK1_FREQ {10000000} \ + CONFIG.PCW_CLK2_FREQ {10000000} \ + CONFIG.PCW_CLK3_FREQ {10000000} \ + CONFIG.PCW_CPU_CPU_PLL_FREQMHZ {1333.333} \ + CONFIG.PCW_CPU_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR0 {15} \ + CONFIG.PCW_DCI_PERIPHERAL_DIVISOR1 {7} \ + CONFIG.PCW_DDRPLL_CTRL_FBDIV {32} \ + CONFIG.PCW_DDR_DDR_PLL_FREQMHZ {1066.667} \ + CONFIG.PCW_DDR_PERIPHERAL_DIVISOR0 {2} \ + CONFIG.PCW_DDR_RAM_HIGHADDR {0x1FFFFFFF} \ + CONFIG.PCW_ENET0_ENET0_IO {MIO 16 .. 27} \ + CONFIG.PCW_ENET0_GRP_MDIO_ENABLE {1} \ + CONFIG.PCW_ENET0_GRP_MDIO_IO {MIO 52 .. 53} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR0 {8} \ + CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_ENET0_PERIPHERAL_FREQMHZ {1000 Mbps} \ + CONFIG.PCW_ENET0_RESET_ENABLE {0} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_ENET1_RESET_ENABLE {0} \ + CONFIG.PCW_ENET_RESET_ENABLE {1} \ + CONFIG.PCW_ENET_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_EN_EMIO_TTC0 {1} \ + CONFIG.PCW_EN_ENET0 {1} \ + CONFIG.PCW_EN_GPIO {1} \ + CONFIG.PCW_EN_QSPI {1} \ + CONFIG.PCW_EN_SDIO0 {1} \ + CONFIG.PCW_EN_TTC0 {1} \ + CONFIG.PCW_EN_UART1 {1} \ + CONFIG.PCW_EN_USB0 {1} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR1 {2} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR1 {1} \ + CONFIG.PCW_FPGA0_PERIPHERAL_FREQMHZ {100.000000} \ + CONFIG.PCW_FPGA1_PERIPHERAL_FREQMHZ {150.000000} \ + CONFIG.PCW_FPGA2_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_FPGA_FCLK0_ENABLE {1} \ + CONFIG.PCW_FPGA_FCLK1_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK2_ENABLE {0} \ + CONFIG.PCW_FPGA_FCLK3_ENABLE {0} \ + CONFIG.PCW_GPIO_MIO_GPIO_ENABLE {1} \ + CONFIG.PCW_GPIO_MIO_GPIO_IO {MIO} \ + CONFIG.PCW_I2C0_GRP_INT_ENABLE {0} \ + CONFIG.PCW_I2C0_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_I2C0_RESET_ENABLE {0} \ + CONFIG.PCW_I2C1_RESET_ENABLE {0} \ + CONFIG.PCW_I2C_PERIPHERAL_FREQMHZ {25} \ + CONFIG.PCW_I2C_RESET_ENABLE {1} \ + CONFIG.PCW_IOPLL_CTRL_FBDIV {30} \ + CONFIG.PCW_IO_IO_PLL_FREQMHZ {1000.000} \ + CONFIG.PCW_MIO_0_DIRECTION {inout} \ + CONFIG.PCW_MIO_0_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_0_PULLUP {disabled} \ + CONFIG.PCW_MIO_0_SLEW {slow} \ + CONFIG.PCW_MIO_10_DIRECTION {inout} \ + CONFIG.PCW_MIO_10_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_10_PULLUP {disabled} \ + CONFIG.PCW_MIO_10_SLEW {slow} \ + CONFIG.PCW_MIO_11_DIRECTION {inout} \ + CONFIG.PCW_MIO_11_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_11_PULLUP {disabled} \ + CONFIG.PCW_MIO_11_SLEW {slow} \ + CONFIG.PCW_MIO_12_DIRECTION {inout} \ + CONFIG.PCW_MIO_12_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_12_PULLUP {disabled} \ + CONFIG.PCW_MIO_12_SLEW {slow} \ + CONFIG.PCW_MIO_13_DIRECTION {inout} \ + CONFIG.PCW_MIO_13_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_13_PULLUP {disabled} \ + CONFIG.PCW_MIO_13_SLEW {slow} \ + CONFIG.PCW_MIO_14_DIRECTION {inout} \ + CONFIG.PCW_MIO_14_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_14_PULLUP {disabled} \ + CONFIG.PCW_MIO_14_SLEW {slow} \ + CONFIG.PCW_MIO_15_DIRECTION {inout} \ + CONFIG.PCW_MIO_15_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_15_PULLUP {disabled} \ + CONFIG.PCW_MIO_15_SLEW {slow} \ + CONFIG.PCW_MIO_16_DIRECTION {out} \ + CONFIG.PCW_MIO_16_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_16_PULLUP {disabled} \ + CONFIG.PCW_MIO_16_SLEW {fast} \ + CONFIG.PCW_MIO_17_DIRECTION {out} \ + CONFIG.PCW_MIO_17_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_17_PULLUP {disabled} \ + CONFIG.PCW_MIO_17_SLEW {fast} \ + CONFIG.PCW_MIO_18_DIRECTION {out} \ + CONFIG.PCW_MIO_18_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_18_PULLUP {disabled} \ + CONFIG.PCW_MIO_18_SLEW {fast} \ + CONFIG.PCW_MIO_19_DIRECTION {out} \ + CONFIG.PCW_MIO_19_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_19_PULLUP {disabled} \ + CONFIG.PCW_MIO_19_SLEW {fast} \ + CONFIG.PCW_MIO_1_DIRECTION {out} \ + CONFIG.PCW_MIO_1_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_1_PULLUP {disabled} \ + CONFIG.PCW_MIO_1_SLEW {fast} \ + CONFIG.PCW_MIO_20_DIRECTION {out} \ + CONFIG.PCW_MIO_20_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_20_PULLUP {disabled} \ + CONFIG.PCW_MIO_20_SLEW {fast} \ + CONFIG.PCW_MIO_21_DIRECTION {out} \ + CONFIG.PCW_MIO_21_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_21_PULLUP {disabled} \ + CONFIG.PCW_MIO_21_SLEW {fast} \ + CONFIG.PCW_MIO_22_DIRECTION {in} \ + CONFIG.PCW_MIO_22_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_22_PULLUP {disabled} \ + CONFIG.PCW_MIO_22_SLEW {fast} \ + CONFIG.PCW_MIO_23_DIRECTION {in} \ + CONFIG.PCW_MIO_23_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_23_PULLUP {disabled} \ + CONFIG.PCW_MIO_23_SLEW {fast} \ + CONFIG.PCW_MIO_24_DIRECTION {in} \ + CONFIG.PCW_MIO_24_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_24_PULLUP {disabled} \ + CONFIG.PCW_MIO_24_SLEW {fast} \ + CONFIG.PCW_MIO_25_DIRECTION {in} \ + CONFIG.PCW_MIO_25_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_25_PULLUP {disabled} \ + CONFIG.PCW_MIO_25_SLEW {fast} \ + CONFIG.PCW_MIO_26_DIRECTION {in} \ + CONFIG.PCW_MIO_26_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_26_PULLUP {disabled} \ + CONFIG.PCW_MIO_26_SLEW {fast} \ + CONFIG.PCW_MIO_27_DIRECTION {in} \ + CONFIG.PCW_MIO_27_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_27_PULLUP {disabled} \ + CONFIG.PCW_MIO_27_SLEW {fast} \ + CONFIG.PCW_MIO_28_DIRECTION {inout} \ + CONFIG.PCW_MIO_28_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_28_PULLUP {disabled} \ + CONFIG.PCW_MIO_28_SLEW {fast} \ + CONFIG.PCW_MIO_29_DIRECTION {in} \ + CONFIG.PCW_MIO_29_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_29_PULLUP {disabled} \ + CONFIG.PCW_MIO_29_SLEW {fast} \ + CONFIG.PCW_MIO_2_DIRECTION {inout} \ + CONFIG.PCW_MIO_2_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_2_PULLUP {disabled} \ + CONFIG.PCW_MIO_2_SLEW {fast} \ + CONFIG.PCW_MIO_30_DIRECTION {out} \ + CONFIG.PCW_MIO_30_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_30_PULLUP {disabled} \ + CONFIG.PCW_MIO_30_SLEW {fast} \ + CONFIG.PCW_MIO_31_DIRECTION {in} \ + CONFIG.PCW_MIO_31_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_31_PULLUP {disabled} \ + CONFIG.PCW_MIO_31_SLEW {fast} \ + CONFIG.PCW_MIO_32_DIRECTION {inout} \ + CONFIG.PCW_MIO_32_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_32_PULLUP {disabled} \ + CONFIG.PCW_MIO_32_SLEW {fast} \ + CONFIG.PCW_MIO_33_DIRECTION {inout} \ + CONFIG.PCW_MIO_33_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_33_PULLUP {disabled} \ + CONFIG.PCW_MIO_33_SLEW {fast} \ + CONFIG.PCW_MIO_34_DIRECTION {inout} \ + CONFIG.PCW_MIO_34_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_34_PULLUP {disabled} \ + CONFIG.PCW_MIO_34_SLEW {fast} \ + CONFIG.PCW_MIO_35_DIRECTION {inout} \ + CONFIG.PCW_MIO_35_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_35_PULLUP {disabled} \ + CONFIG.PCW_MIO_35_SLEW {fast} \ + CONFIG.PCW_MIO_36_DIRECTION {in} \ + CONFIG.PCW_MIO_36_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_36_PULLUP {disabled} \ + CONFIG.PCW_MIO_36_SLEW {fast} \ + CONFIG.PCW_MIO_37_DIRECTION {inout} \ + CONFIG.PCW_MIO_37_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_37_PULLUP {disabled} \ + CONFIG.PCW_MIO_37_SLEW {fast} \ + CONFIG.PCW_MIO_38_DIRECTION {inout} \ + CONFIG.PCW_MIO_38_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_38_PULLUP {disabled} \ + CONFIG.PCW_MIO_38_SLEW {fast} \ + CONFIG.PCW_MIO_39_DIRECTION {inout} \ + CONFIG.PCW_MIO_39_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_39_PULLUP {disabled} \ + CONFIG.PCW_MIO_39_SLEW {fast} \ + CONFIG.PCW_MIO_3_DIRECTION {inout} \ + CONFIG.PCW_MIO_3_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_3_PULLUP {disabled} \ + CONFIG.PCW_MIO_3_SLEW {fast} \ + CONFIG.PCW_MIO_40_DIRECTION {inout} \ + CONFIG.PCW_MIO_40_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_40_PULLUP {disabled} \ + CONFIG.PCW_MIO_40_SLEW {fast} \ + CONFIG.PCW_MIO_41_DIRECTION {inout} \ + CONFIG.PCW_MIO_41_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_41_PULLUP {disabled} \ + CONFIG.PCW_MIO_41_SLEW {fast} \ + CONFIG.PCW_MIO_42_DIRECTION {inout} \ + CONFIG.PCW_MIO_42_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_42_PULLUP {disabled} \ + CONFIG.PCW_MIO_42_SLEW {fast} \ + CONFIG.PCW_MIO_43_DIRECTION {inout} \ + CONFIG.PCW_MIO_43_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_43_PULLUP {disabled} \ + CONFIG.PCW_MIO_43_SLEW {fast} \ + CONFIG.PCW_MIO_44_DIRECTION {inout} \ + CONFIG.PCW_MIO_44_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_44_PULLUP {disabled} \ + CONFIG.PCW_MIO_44_SLEW {fast} \ + CONFIG.PCW_MIO_45_DIRECTION {inout} \ + CONFIG.PCW_MIO_45_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_45_PULLUP {disabled} \ + CONFIG.PCW_MIO_45_SLEW {fast} \ + CONFIG.PCW_MIO_46_DIRECTION {in} \ + CONFIG.PCW_MIO_46_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_46_PULLUP {disabled} \ + CONFIG.PCW_MIO_46_SLEW {slow} \ + CONFIG.PCW_MIO_47_DIRECTION {in} \ + CONFIG.PCW_MIO_47_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_47_PULLUP {disabled} \ + CONFIG.PCW_MIO_47_SLEW {slow} \ + CONFIG.PCW_MIO_48_DIRECTION {out} \ + CONFIG.PCW_MIO_48_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_48_PULLUP {disabled} \ + CONFIG.PCW_MIO_48_SLEW {slow} \ + CONFIG.PCW_MIO_49_DIRECTION {in} \ + CONFIG.PCW_MIO_49_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_49_PULLUP {disabled} \ + CONFIG.PCW_MIO_49_SLEW {slow} \ + CONFIG.PCW_MIO_4_DIRECTION {inout} \ + CONFIG.PCW_MIO_4_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_4_PULLUP {disabled} \ + CONFIG.PCW_MIO_4_SLEW {fast} \ + CONFIG.PCW_MIO_50_DIRECTION {inout} \ + CONFIG.PCW_MIO_50_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_50_PULLUP {disabled} \ + CONFIG.PCW_MIO_50_SLEW {slow} \ + CONFIG.PCW_MIO_51_DIRECTION {inout} \ + CONFIG.PCW_MIO_51_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_51_PULLUP {disabled} \ + CONFIG.PCW_MIO_51_SLEW {slow} \ + CONFIG.PCW_MIO_52_DIRECTION {out} \ + CONFIG.PCW_MIO_52_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_52_PULLUP {disabled} \ + CONFIG.PCW_MIO_52_SLEW {slow} \ + CONFIG.PCW_MIO_53_DIRECTION {inout} \ + CONFIG.PCW_MIO_53_IOTYPE {LVCMOS 1.8V} \ + CONFIG.PCW_MIO_53_PULLUP {disabled} \ + CONFIG.PCW_MIO_53_SLEW {slow} \ + CONFIG.PCW_MIO_5_DIRECTION {inout} \ + CONFIG.PCW_MIO_5_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_5_PULLUP {disabled} \ + CONFIG.PCW_MIO_5_SLEW {fast} \ + CONFIG.PCW_MIO_6_DIRECTION {out} \ + CONFIG.PCW_MIO_6_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_6_PULLUP {disabled} \ + CONFIG.PCW_MIO_6_SLEW {fast} \ + CONFIG.PCW_MIO_7_DIRECTION {out} \ + CONFIG.PCW_MIO_7_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_7_PULLUP {disabled} \ + CONFIG.PCW_MIO_7_SLEW {slow} \ + CONFIG.PCW_MIO_8_DIRECTION {out} \ + CONFIG.PCW_MIO_8_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_8_PULLUP {disabled} \ + CONFIG.PCW_MIO_8_SLEW {fast} \ + CONFIG.PCW_MIO_9_DIRECTION {inout} \ + CONFIG.PCW_MIO_9_IOTYPE {LVCMOS 3.3V} \ + CONFIG.PCW_MIO_9_PULLUP {disabled} \ + CONFIG.PCW_MIO_9_SLEW {slow} \ + CONFIG.PCW_MIO_TREE_PERIPHERALS {GPIO#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#Quad SPI Flash#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#GPIO#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#Enet 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#USB 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#SD 0#UART 1#UART 1#GPIO#GPIO#Enet 0#Enet 0} \ + CONFIG.PCW_MIO_TREE_SIGNALS {gpio[0]#qspi0_ss_b#qspi0_io[0]#qspi0_io[1]#qspi0_io[2]#qspi0_io[3]/HOLD_B#qspi0_sclk#gpio[7]#gpio[8]#gpio[9]#gpio[10]#gpio[11]#gpio[12]#gpio[13]#gpio[14]#gpio[15]#tx_clk#txd[0]#txd[1]#txd[2]#txd[3]#tx_ctl#rx_clk#rxd[0]#rxd[1]#rxd[2]#rxd[3]#rx_ctl#data[4]#dir#stp#nxt#data[0]#data[1]#data[2]#data[3]#clk#data[5]#data[6]#data[7]#clk#cmd#data[0]#data[1]#data[2]#data[3]#wp#cd#tx#rx#gpio[50]#gpio[51]#mdc#mdio} \ + CONFIG.PCW_NAND_GRP_D8_ENABLE {0} \ + CONFIG.PCW_NAND_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_A25_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS0_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_CS1_ENABLE {0} \ + CONFIG.PCW_NOR_GRP_SRAM_INT_ENABLE {0} \ + CONFIG.PCW_NOR_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_PCAP_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_PJTAG_PERIPHERAL_ENABLE {0} \ + CONFIG.PCW_PRESET_BANK0_VOLTAGE {LVCMOS 3.3V} \ + CONFIG.PCW_PRESET_BANK1_VOLTAGE {LVCMOS 1.8V} \ + CONFIG.PCW_QSPI_GRP_FBCLK_ENABLE {0} \ + CONFIG.PCW_QSPI_GRP_IO1_ENABLE {0} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_ENABLE {1} \ + CONFIG.PCW_QSPI_GRP_SINGLE_SS_IO {MIO 1 .. 6} \ + CONFIG.PCW_QSPI_GRP_SS1_ENABLE {0} \ + CONFIG.PCW_QSPI_PERIPHERAL_DIVISOR0 {5} \ + CONFIG.PCW_QSPI_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_QSPI_PERIPHERAL_FREQMHZ {200} \ + CONFIG.PCW_QSPI_QSPI_IO {MIO 1 .. 6} \ + CONFIG.PCW_SD0_GRP_CD_ENABLE {1} \ + CONFIG.PCW_SD0_GRP_CD_IO {MIO 47} \ + CONFIG.PCW_SD0_GRP_POW_ENABLE {0} \ + CONFIG.PCW_SD0_GRP_WP_ENABLE {1} \ + CONFIG.PCW_SD0_GRP_WP_IO {MIO 46} \ + CONFIG.PCW_SD0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_SD0_SD0_IO {MIO 40 .. 45} \ + CONFIG.PCW_SDIO_PERIPHERAL_DIVISOR0 {20} \ + CONFIG.PCW_SDIO_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_SDIO_PERIPHERAL_VALID {1} \ + CONFIG.PCW_SINGLE_QSPI_DATA_MODE {x4} \ + CONFIG.PCW_SMC_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_SPI_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_S_AXI_HP2_DATA_WIDTH {64} \ + CONFIG.PCW_TPIU_PERIPHERAL_DIVISOR0 {1} \ + CONFIG.PCW_TTC0_CLK0_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK1_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_CLK2_PERIPHERAL_FREQMHZ {133.333333} \ + CONFIG.PCW_TTC0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_TTC0_TTC0_IO {EMIO} \ + CONFIG.PCW_TTC_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_UART1_GRP_FULL_ENABLE {0} \ + CONFIG.PCW_UART1_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_UART1_UART1_IO {MIO 48 .. 49} \ + CONFIG.PCW_UART_PERIPHERAL_DIVISOR0 {20} \ + CONFIG.PCW_UART_PERIPHERAL_FREQMHZ {50} \ + CONFIG.PCW_UART_PERIPHERAL_VALID {1} \ + CONFIG.PCW_UIPARAM_ACT_DDR_FREQ_MHZ {533.333374} \ + CONFIG.PCW_UIPARAM_DDR_BANK_ADDR_COUNT {3} \ + CONFIG.PCW_UIPARAM_DDR_BL {8} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY0 {0.41} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY1 {0.411} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY2 {0.341} \ + CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY3 {0.358} \ + CONFIG.PCW_UIPARAM_DDR_CL {7} \ + CONFIG.PCW_UIPARAM_DDR_COL_ADDR_COUNT {10} \ + CONFIG.PCW_UIPARAM_DDR_CWL {6} \ + CONFIG.PCW_UIPARAM_DDR_DEVICE_CAPACITY {2048 MBits} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_0 {0.025} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_1 {0.028} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_2 {0.001} \ + CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_3 {0.001} \ + CONFIG.PCW_UIPARAM_DDR_DRAM_WIDTH {16 Bits} \ + CONFIG.PCW_UIPARAM_DDR_FREQ_MHZ {533.333313} \ + CONFIG.PCW_UIPARAM_DDR_MEMORY_TYPE {DDR 3} \ + CONFIG.PCW_UIPARAM_DDR_PARTNO {MT41J128M16 HA-15E} \ + CONFIG.PCW_UIPARAM_DDR_ROW_ADDR_COUNT {14} \ + CONFIG.PCW_UIPARAM_DDR_SPEED_BIN {DDR3_1066F} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_DATA_EYE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_READ_GATE {1} \ + CONFIG.PCW_UIPARAM_DDR_TRAIN_WRITE_LEVEL {1} \ + CONFIG.PCW_UIPARAM_DDR_T_FAW {45.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RAS_MIN {36.0} \ + CONFIG.PCW_UIPARAM_DDR_T_RC {49.5} \ + CONFIG.PCW_UIPARAM_DDR_T_RCD {7} \ + CONFIG.PCW_UIPARAM_DDR_T_RP {7} \ + CONFIG.PCW_UIPARAM_DDR_USE_INTERNAL_VREF {1} \ + CONFIG.PCW_USB0_PERIPHERAL_ENABLE {1} \ + CONFIG.PCW_USB0_PERIPHERAL_FREQMHZ {60} \ + CONFIG.PCW_USB0_RESET_ENABLE {0} \ + CONFIG.PCW_USB0_USB0_IO {MIO 28 .. 39} \ + CONFIG.PCW_USB1_RESET_ENABLE {0} \ + CONFIG.PCW_USB_RESET_ENABLE {1} \ + CONFIG.PCW_USB_RESET_SELECT {Share reset pin} \ + CONFIG.PCW_USE_S_AXI_HP0 {1} \ + CONFIG.PCW_USE_S_AXI_HP1 {1} \ + CONFIG.PCW_USE_S_AXI_HP2 {1} \ + CONFIG.preset {ZedBoard} \ + ] $processing_system7_0 + + # Create instance: ps7_0_axi_periph, and set properties + set ps7_0_axi_periph [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_interconnect:2.1 ps7_0_axi_periph ] + set_property -dict [ list \ + CONFIG.NUM_MI {4} \ + ] $ps7_0_axi_periph + + # Create instance: rst_ps7_0_100M, and set properties + set rst_ps7_0_100M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ps7_0_100M ] + + # Create instance: v_dma, and set properties + set v_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 v_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $v_dma + + # Create instance: xus_dma, and set properties + set xus_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 xus_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s_dre {1} \ + CONFIG.c_include_s2mm {0} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_m_axi_mm2s_data_width {64} \ + CONFIG.c_m_axis_mm2s_tdata_width {64} \ + CONFIG.c_mm2s_burst_size {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $xus_dma + + # Create instance: y_dma, and set properties + set y_dma [ create_bd_cell -type ip -vlnv xilinx.com:ip:axi_dma:7.1 y_dma ] + set_property -dict [ list \ + CONFIG.c_include_mm2s {0} \ + CONFIG.c_include_s2mm_dre {1} \ + CONFIG.c_include_sg {0} \ + CONFIG.c_micro_dma {0} \ + CONFIG.c_s2mm_burst_size {128} \ + CONFIG.c_s_axis_s2mm_tdata_width {256} \ + CONFIG.c_sg_include_stscntrl_strm {0} \ + CONFIG.c_sg_length_width {16} \ + ] $y_dma + + # Create interface connections + connect_bd_intf_net -intf_net HlsKernelV_0_y_port [get_bd_intf_pins HlsKernelV_0/y_port] [get_bd_intf_pins y_dma/S_AXIS_S2MM] + connect_bd_intf_net -intf_net axi_mem_intercon_1_M00_AXI [get_bd_intf_pins axi_mem_intercon_1/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP1] + connect_bd_intf_net -intf_net axi_mem_intercon_2_M00_AXI [get_bd_intf_pins axi_mem_intercon_2/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP2] + connect_bd_intf_net -intf_net axi_mem_intercon_M00_AXI [get_bd_intf_pins axi_mem_intercon/M00_AXI] [get_bd_intf_pins processing_system7_0/S_AXI_HP0] + connect_bd_intf_net -intf_net processing_system7_0_DDR [get_bd_intf_ports DDR] [get_bd_intf_pins processing_system7_0/DDR] + connect_bd_intf_net -intf_net processing_system7_0_FIXED_IO [get_bd_intf_ports FIXED_IO] [get_bd_intf_pins processing_system7_0/FIXED_IO] + connect_bd_intf_net -intf_net processing_system7_0_M_AXI_GP0 [get_bd_intf_pins processing_system7_0/M_AXI_GP0] [get_bd_intf_pins ps7_0_axi_periph/S00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M00_AXI [get_bd_intf_pins HlsKernelV_0/s_axi_control] [get_bd_intf_pins ps7_0_axi_periph/M00_AXI] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M01_AXI [get_bd_intf_pins ps7_0_axi_periph/M01_AXI] [get_bd_intf_pins v_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M02_AXI [get_bd_intf_pins ps7_0_axi_periph/M02_AXI] [get_bd_intf_pins xus_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net ps7_0_axi_periph_M03_AXI [get_bd_intf_pins ps7_0_axi_periph/M03_AXI] [get_bd_intf_pins y_dma/S_AXI_LITE] + connect_bd_intf_net -intf_net u_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon_1/S00_AXI] [get_bd_intf_pins v_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net v_dma_M_AXIS_MM2S [get_bd_intf_pins HlsKernelV_0/v_port] [get_bd_intf_pins v_dma/M_AXIS_MM2S] + connect_bd_intf_net -intf_net x_dma_M_AXI_MM2S [get_bd_intf_pins axi_mem_intercon/S00_AXI] [get_bd_intf_pins xus_dma/M_AXI_MM2S] + connect_bd_intf_net -intf_net xu_dma_M_AXI_S2MM [get_bd_intf_pins axi_mem_intercon_2/S00_AXI] [get_bd_intf_pins y_dma/M_AXI_S2MM] + connect_bd_intf_net -intf_net xus_dma_M_AXIS_MM2S [get_bd_intf_pins HlsKernelV_0/xus_port] [get_bd_intf_pins xus_dma/M_AXIS_MM2S] + + # Create port connections + connect_bd_net -net processing_system7_0_FCLK_CLK0 [get_bd_pins HlsKernelV_0/ap_clk] [get_bd_pins axi_mem_intercon/ACLK] [get_bd_pins axi_mem_intercon/M00_ACLK] [get_bd_pins axi_mem_intercon/S00_ACLK] [get_bd_pins axi_mem_intercon_1/ACLK] [get_bd_pins axi_mem_intercon_1/M00_ACLK] [get_bd_pins axi_mem_intercon_1/S00_ACLK] [get_bd_pins axi_mem_intercon_2/ACLK] [get_bd_pins axi_mem_intercon_2/M00_ACLK] [get_bd_pins axi_mem_intercon_2/S00_ACLK] [get_bd_pins processing_system7_0/FCLK_CLK0] [get_bd_pins processing_system7_0/M_AXI_GP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP0_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP1_ACLK] [get_bd_pins processing_system7_0/S_AXI_HP2_ACLK] [get_bd_pins ps7_0_axi_periph/ACLK] [get_bd_pins ps7_0_axi_periph/M00_ACLK] [get_bd_pins ps7_0_axi_periph/M01_ACLK] [get_bd_pins ps7_0_axi_periph/M02_ACLK] [get_bd_pins ps7_0_axi_periph/M03_ACLK] [get_bd_pins ps7_0_axi_periph/S00_ACLK] [get_bd_pins rst_ps7_0_100M/slowest_sync_clk] [get_bd_pins v_dma/m_axi_mm2s_aclk] [get_bd_pins v_dma/s_axi_lite_aclk] [get_bd_pins xus_dma/m_axi_mm2s_aclk] [get_bd_pins xus_dma/s_axi_lite_aclk] [get_bd_pins y_dma/m_axi_s2mm_aclk] [get_bd_pins y_dma/s_axi_lite_aclk] + connect_bd_net -net processing_system7_0_FCLK_RESET0_N [get_bd_pins processing_system7_0/FCLK_RESET0_N] [get_bd_pins rst_ps7_0_100M/ext_reset_in] + connect_bd_net -net rst_ps7_0_100M_peripheral_aresetn [get_bd_pins HlsKernelV_0/ap_rst_n] [get_bd_pins axi_mem_intercon/ARESETN] [get_bd_pins axi_mem_intercon/M00_ARESETN] [get_bd_pins axi_mem_intercon/S00_ARESETN] [get_bd_pins axi_mem_intercon_1/ARESETN] [get_bd_pins axi_mem_intercon_1/M00_ARESETN] [get_bd_pins axi_mem_intercon_1/S00_ARESETN] [get_bd_pins axi_mem_intercon_2/ARESETN] [get_bd_pins axi_mem_intercon_2/M00_ARESETN] [get_bd_pins axi_mem_intercon_2/S00_ARESETN] [get_bd_pins ps7_0_axi_periph/ARESETN] [get_bd_pins ps7_0_axi_periph/M00_ARESETN] [get_bd_pins ps7_0_axi_periph/M01_ARESETN] [get_bd_pins ps7_0_axi_periph/M02_ARESETN] [get_bd_pins ps7_0_axi_periph/M03_ARESETN] [get_bd_pins ps7_0_axi_periph/S00_ARESETN] [get_bd_pins rst_ps7_0_100M/peripheral_aresetn] [get_bd_pins v_dma/axi_resetn] [get_bd_pins xus_dma/axi_resetn] [get_bd_pins y_dma/axi_resetn] + + # Create address segments + assign_bd_address -offset 0x40000000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs HlsKernelV_0/s_axi_control/Reg] -force + assign_bd_address -offset 0x41E00000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs v_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E10000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs xus_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x41E20000 -range 0x00010000 -target_address_space [get_bd_addr_spaces processing_system7_0/Data] [get_bd_addr_segs y_dma/S_AXI_LITE/Reg] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces v_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP1/HP1_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces xus_dma/Data_MM2S] [get_bd_addr_segs processing_system7_0/S_AXI_HP0/HP0_DDR_LOWOCM] -force + assign_bd_address -offset 0x00000000 -range 0x20000000 -target_address_space [get_bd_addr_spaces y_dma/Data_S2MM] [get_bd_addr_segs processing_system7_0/S_AXI_HP2/HP2_DDR_LOWOCM] -force + + + # Restore current instance + current_bd_instance $oldCurInst + + validate_bd_design + save_bd_design +} +# End of create_root_design() + + +################################################################## +# MAIN FLOW +################################################################## + +create_root_design "" + + diff --git a/python/SVD_Approximation.ipynb b/python/SVD_Approximation.ipynb new file mode 100644 index 0000000..3cb4bcd --- /dev/null +++ b/python/SVD_Approximation.ipynb @@ -0,0 +1,4235 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "SVD Approximation", + "provenance": [], + "collapsed_sections": [ + "_trp6sx3J3RR", + "9EWRUuxXkzxh", + "9qAJGTrJxCUl", + "Fxd8syuH4WeS", + "L3OA-HB7gExG", + "pjtqPfHpFOpE", + "WpYSOigtTVxh", + "Ya5FeHCMTips", + "xKKBt8IfsoZx", + "rhuPyqNv5Pxk", + "zY1BfLH75W2l", + "ukjOI4iC-uZq", + "euTcBc3P_aoN", + "ykQbfnSia2rf", + "IvprTTb8vXiM", + "95vRr_e-z4k3" + ], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "oHvAGHE5Ya6_" + }, + "source": [ + "# SVD Approximation" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MGO1yyCnXtbR", + "outputId": "09da0a50-9663-4638-a515-84f956046399" + }, + "source": [ + "import tensorflow as tf\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from PIL import Image, ImageOps\n", + "from google.colab import drive\n", + "import re\n", + "from datetime import datetime\n", + "import os\n", + "\n", + "# Load the TensorBoard notebook extension\n", + "%load_ext tensorboard\n", + "%tensorflow_version 2.x\n", + "from tensorboard.plugins.hparams import api as hp\n", + "# Clear any logs from previous tensorboard runs\n", + "!rm -rf /tmp/mylogs\n", + "writer = tf.summary.create_file_writer('/tmp/mylogs')\n", + "\n", + "drive.mount('/gdrive', force_remount=True)" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Mounted at /gdrive\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zMLle_8oZIh5" + }, + "source": [ + "## Algorithm 1 - Stardard SVD Decomposition" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8mPVVvxkYN5T" + }, + "source": [ + "def svd1(x_in, Verbose=False, correction_factor=1e-12):\n", + " \"\"\"\n", + " @brief Short-hand for applying SVD and dealing with non-converging SVD.\n", + " \n", + " @param x input matrix\n", + " \n", + " @return the U, s, V first components\n", + " \"\"\"\n", + " x = x_in.copy()\n", + " m, n = x.shape\n", + " not_converged = True\n", + " max_tries = 1000\n", + " i = 0\n", + " while not_converged:\n", + " try:\n", + " u, s, v = np.linalg.svd(x, full_matrices=False)\n", + " except np.linalg.linalg.LinAlgError as e:\n", + " # ==================================================================\n", + " # If SVD didn't converge, add a small error to the matrix and\n", + " # repeat, otherwise the heuristic will always generate zero\n", + " # components.\n", + " # ==================================================================\n", + " if Verbose:\n", + " print(\"[WARNING] Exception caught: {}\". format(e))\n", + " if np.isnan(x).any():\n", + " print(\"[WARNING] The provided matrix contains NaNs.\")\n", + " if np.isinf(x).any():\n", + " print(\"[WARNING] The provided matrix contains Infs.\")\n", + " u = np.zeros((m, m))\n", + " s = np.zeros((n,))\n", + " v = np.zeros((n, n))\n", + " x -= correction_factor\n", + " if np.any(s): # not all zeros\n", + " not_converged = False\n", + " # If still not converging and max tries reached, return svd components\n", + " # that are close to zero.\n", + " if not_converged and i == max_tries:\n", + " not_converge = False\n", + " u = np.zeros((m, m))\n", + " s = np.zeros((n,))\n", + " v = np.zeros((n, n))\n", + " u += 1e-15\n", + " s += 1e-15\n", + " v += 1e-15\n", + " i += 1\n", + " return u.T[0], s[0], v[0]" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7b4e1BohX4uy", + "outputId": "63d3669e-3dbe-4a46-9e9c-ad9b6b776893" + }, + "source": [ + "x = np.random.randn(32, 16)\n", + "svd1(x)" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(array([-0.23517926, -0.22266451, -0.04113012, 0.12105154, 0.09157681,\n", + " 0.31938436, 0.06855524, -0.09182536, 0.1314608 , -0.09691401,\n", + " -0.18813444, -0.03940004, 0.11687518, 0.12699954, 0.06467241,\n", + " -0.33720879, -0.08737761, -0.01283995, 0.01086322, 0.32436626,\n", + " -0.15699125, -0.23119637, 0.02632368, 0.02199429, -0.10579826,\n", + " 0.3113676 , 0.26651324, 0.22477058, 0.17408367, -0.03829521,\n", + " 0.14641498, 0.2619741 ]),\n", + " 8.688512368755092,\n", + " array([-0.17232688, -0.50307578, 0.04739223, -0.25651778, 0.02782118,\n", + " -0.33456374, -0.36638351, -0.35588306, -0.24278886, 0.0009277 ,\n", + " 0.17219074, 0.20284928, 0.12503761, 0.21298591, 0.14851794,\n", + " -0.25054842]))" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 3 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VX2I2QK0a7b2" + }, + "source": [ + "## Algorithm 2 - Joshua Paper\n", + "\n", + "Decompose `k` matrices into `r` sub-matrices." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "NIsZsVlEgfVl" + }, + "source": [ + "def mse(a, b):\n", + " return ((a - b)**2).mean()\n", + "\n", + "def avg_abs_diff(a, b):\n", + " return np.absolute(a - b).mean()\n", + "\n", + "def frobenius_norm(a, b):\n", + " return np.sqrt(np.absolute(np.linalg.norm(a - b)))\n", + "\n", + "def frobenius(x):\n", + " return (x**2).sum()\n", + "\n", + "def check_threshold(U, U_step, V, V_step, stop_threshold=0.00005,\n", + " config='mse'):\n", + " \"\"\"\n", + " @brief Determines the threshold for stopping vectors refinements\n", + " \n", + " @param U []\n", + " @param U_step []\n", + " @param V []\n", + " @param V_step []\n", + " \n", + " @return whether to stop refinement process\n", + " \"\"\"\n", + " if config == 'mse':\n", + " diff_u = mse(U, U_step)\n", + " diff_v = mse(V, V_step)\n", + " elif config == 'avg':\n", + " diff_u = avg_abs_diff(U, U_step)\n", + " diff_v = avg_abs_diff(V, V_step)\n", + " elif config == 'frobenius-norm':\n", + " diff_u = frobenius_norm(U, U_step)\n", + " diff_v = frobenius_norm(V, V_step)\n", + " elif config == 'norm':\n", + " diff_u = np.linalg.norm(U - U_step)\n", + " diff_v = np.linalg.norm(V - V_step)\n", + " if diff_v < stop_threshold and diff_u < stop_threshold:\n", + " return True, diff_u, diff_v\n", + " else:\n", + " return False, diff_u, diff_v" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "DWb7XVTzithU" + }, + "source": [ + "def get_vec_from_largest_eig(x):\n", + " w, vr = np.linalg.eigh(x)\n", + " return vr[:, np.argmax(w)]" + ], + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Q-QkFZOwagdX" + }, + "source": [ + "def update_F(F, U, V):\n", + " FT = np.transpose(F, axes=(0, 2, 1))\n", + " S = V.T @ FT @ U\n", + " A = np.outer(U, V)\n", + " F_tmp = np.zeros(F.shape)\n", + " for i in range(F.shape[0]):\n", + " F_tmp[i] = F[i] - S[i] * A\n", + " return F_tmp, S\n", + "\n", + "def algorithm2(F_in, r=1, stop_threshold=0.0001, hard_stop=100,\n", + " decomposition='eigen', truncate=False, metric='mse',\n", + " report_writer=None, report_step=0, scaler=None, Verbose=False):\n", + " \"\"\"\n", + " @brief Algorithm from \"Synthesis and Optimization\n", + " of 2D Filter Designs for Heterogeneous FPGAs\"\n", + " \n", + " @param F_in List or array of matrixes to approximate \n", + " (they must have same shape)\n", + " @param r The number of sub-matrices\n", + " @param Verbose Verbose\n", + " \n", + " @return the eigenvalues lambda_ij and vectors u and v\n", + " \"\"\"\n", + " if isinstance(F_in, (list,)):\n", + " k = len(F_in)\n", + " m, n = F_in[0].shape\n", + " F = np.zeros((k, m, n))\n", + " for i in range(k):\n", + " F[i] = F_in[i].copy()\n", + " else:\n", + " k, m, n = F_in.shape\n", + " F = np.array(F_in, copy=True)\n", + " # hard_stop = 100\n", + " # stop_threshold = 0.0001 # F.std()\n", + " if truncate:\n", + " tot_elem = k * (m * n)\n", + " appr_elem = k * (m + n + r)\n", + " if appr_elem >= tot_elem:\n", + " print('[WARNING] The r value ({}) is too high and will be truncated.'.format(r))\n", + " while appr_elem >= tot_elem:\n", + " r -= 1\n", + " appr_elem = k * (m + n + r)\n", + " print('[WARNING] r set to {}.'. format(r))\n", + " if scaler is None:\n", + " scaler = np.ones((k))\n", + " u_array = np.zeros((r, m))\n", + " s_array = np.zeros((r, k))\n", + " v_array = np.zeros((r, n))\n", + " # NOTE: Given a matrix A, we have that: A @ A.T is symmetrical! Meaning\n", + " # that, for the spectral theorem, A has real eigenvalues!\n", + " for j in range(r):\n", + " # Form the (m x m) matrix Fn: sum[0,k-1](F[i] @ F[i].T)\n", + " Fn = np.zeros((m, m))\n", + " for i in range(k):\n", + " Fn += scaler[i] * F[i] @ F[i].T # SQUARED\n", + " # Calculate the eigenvector u that corresponds to the largest eigenvalue \n", + " if decomposition == 'eigen':\n", + " u = get_vec_from_largest_eig(Fn)\n", + " else:\n", + " u, _, _ = svd1(Fn)\n", + " # Form the (n x k) matrix: Fb = [F[0].T @ u, F[1].T @ u, ..., F[k-1].T @ u]\n", + " Fb = np.zeros((n, k))\n", + " for i in range(k):\n", + " Fb[:,i] = scaler[i] * F[i].T @ u\n", + " # Calculate the eigenvector v that corresponds to the largest \n", + " # eigenvalue of the (n x n) matrix: Fb @ Fb.T\n", + " Fb = Fb @ Fb.T\n", + " if decomposition == 'eigen':\n", + " v = get_vec_from_largest_eig(Fb)\n", + " else:\n", + " _, _, v = svd1(Fb)\n", + " U = u\n", + " V = v\n", + " for t in range(hard_stop):\n", + " # Form the (k x m) matrix Fb = [F[0] @ v, F[1] @ v, ..., F[k-1] @ v].T\n", + " FbT = np.zeros((m, k))\n", + " for i in range(k):\n", + " FbT[:, i] = scaler[i] * F[i] @ v\n", + " Fb = FbT.T\n", + " # Calculate the eigenvector u that corresponds to the largest \n", + " # eigenvalue of the (m x m) matrix\n", + " Fb = FbT @ Fb\n", + " if decomposition == 'eigen':\n", + " u = get_vec_from_largest_eig(Fb)\n", + " else:\n", + " u, _, _ = svd1(Fb)\n", + " # Form the (n x k) matrix Fb = [F[0].T @ u, F[1].T @ u, ..., F[k-1].T @ u].T\n", + " FbT = np.zeros((k, n))\n", + " for i in range(k):\n", + " FbT[i] = scaler[i] * F[i].T @ u\n", + " Fb = FbT.T\n", + " # Calculate the eigenvector v that corresponds to the largest \n", + " # eigenvalue of the (n x n) matrix\n", + " Fb = Fb @ FbT\n", + " if decomposition == 'eigen':\n", + " v = get_vec_from_largest_eig(Fb)\n", + " else:\n", + " _, _, v = svd1(Fb)\n", + " # Until u and v vectors change less than a pre-specified\n", + " # value that is set by the user\n", + " stop_refinement, diff_u, diff_v = check_threshold(u, U, v, V, stop_threshold, metric)\n", + " if report_writer is not None:\n", + " with report_writer.as_default():\n", + " F_tmp, _ = update_F(F, U, V)\n", + " for p, (f_in, f_tmp) in enumerate(zip(F_in, F_tmp)):\n", + " tf.summary.scalar(f'Alg2 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), report_step, description=f'Algorithm2 MSE(x, x_approx) of the {p}-th sub-matrix.')\n", + " tf.summary.scalar(f'Alg2 - Mean {p}-th sub-matrix',f_tmp.mean(), report_step, description=f'Algorithm2 Mean(x_approx) of the {p}-th sub-matrix.')\n", + " report_writer.flush()\n", + " # tf.summary.scalar('Alg2 - MSE refining', mse(F_in, F_tmp), report_step, description='Algorithm2 MSE(F, F_approx) at refining steps.')\n", + " # tf.summary.scalar('Alg2 - diff(U)', diff_u, report_step, description='Algorithm2 MSE(u(i), u(i-1)) at refining steps.')\n", + " # tf.summary.scalar('Alg2 - diff(V)', diff_v, report_step, description='Algorithm2 MSE(v(i), v(i-1)) at refining steps.')\n", + " # NOTE: The u and v norms are always equal to 1.\n", + " # tf.summary.scalar('Alg2 - norm(U)', np.linalg.norm(u), report_step, description='Algorithm2 norm(u(i)) at refining steps.')\n", + " # tf.summary.scalar('Alg2 - norm(V)', np.linalg.norm(v), report_step, description='Algorithm2 norm(v(i)) at refining steps.')\n", + " # tf.summary.scalar('Alg2 - ', np.dot(U, u), report_step, description='Algorithm2 : inner-product (orthogonal if 0).')\n", + " # tf.summary.scalar('Alg2 - ', np.dot(V, v), report_step, description='Algorithm2 : inner-product (orthogonal if 0).')\n", + " report_writer.flush()\n", + " report_step += 1\n", + " if stop_refinement:\n", + " V, U = v, u\n", + " break\n", + " V = v #* (1 + 1e-12)\n", + " U = u #* (1 + 1e-12)\n", + " if report_writer is not None and r > 1:\n", + " with report_writer.as_default():\n", + " F_tmp, _ = update_F(F, U, V)\n", + " for p, (f_in, f_tmp) in enumerate(zip(F_in, F_tmp)):\n", + " tf.summary.scalar(f'Alg2 - Final MSE {p}-th sub-matrix', mse(f_in, f_tmp), report_step, description=f'Algorithm2 MSE(x, x_approx) of the {p}-th sub-matrix.')\n", + " report_writer.flush()\n", + " report_step += 1\n", + " # F, S = update_F(F, U, V)\n", + " FT = np.transpose(F, axes=(0, 2, 1))\n", + " S = V.T @ FT @ U\n", + " A = np.outer(U, V)\n", + " for i in range(k):\n", + " F[i] = F[i] - S[i] * A\n", + " u_array[j] = U\n", + " s_array[j] = S\n", + " v_array[j] = V\n", + " return u_array, s_array, v_array" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "wlnmJleegNWF" + }, + "source": [ + "x = np.random.rand(3, 1024, 512)" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TKy8_OKSqldK", + "outputId": "f10faf56-09af-42ce-81cc-215b8b32dab3" + }, + "source": [ + "%time u_eig, s_eig, v_eig = algorithm2(x, decomposition='eigen', stop_threshold=1e-5)" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 1.42 s, sys: 478 ms, total: 1.89 s\n", + "Wall time: 1.04 s\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yypholslqoF9", + "outputId": "93441043-dd46-41ef-e018-7bbd9bae15a3" + }, + "source": [ + "%time u_svd, s_svd, v_svd = algorithm2(x, decomposition='svd', stop_threshold=1e-5)" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 3.29 s, sys: 591 ms, total: 3.88 s\n", + "Wall time: 2 s\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Uf6DAtgPgYWU", + "outputId": "d4efe923-8d50-4967-ae20-7aab3f9b17d9" + }, + "source": [ + "print(mse(u_eig, u_svd))\n", + "print(mse(s_eig, s_svd))\n", + "print(mse(v_eig, v_svd))" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0.003906249999999999\n", + "4.52364397489937e-26\n", + "0.007812499999999998\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qbtEIi3orE7O" + }, + "source": [ + "Get approximation error." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EN892KfYq6pw" + }, + "source": [ + "def algorithm2_inverse(U, S, V, F=None):\n", + " F_tmp = np.einsum('rk, rm, rn->kmn', S, U, V)\n", + " if F is None:\n", + " F = F_tmp\n", + " elif type(F) == list:\n", + " k = S.shape[1]\n", + " for i in range(k):\n", + " F.append(F_tmp[i])\n", + " return F_tmp" + ], + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NcZa2sZErOgD", + "outputId": "31c7ec2d-5b45-4a95-92ba-a4d700f5ddc1" + }, + "source": [ + "x_svd = algorithm2_inverse(u_svd, s_svd, v_svd)\n", + "x_eig = algorithm2_inverse(u_eig, s_eig, v_eig)\n", + "print(mse(x, x_svd))\n", + "print(mse(x, x_eig))" + ], + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0.08324823102157376\n", + "0.08324823102157376\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cLnD6VD1sIc2" + }, + "source": [ + "## Algortihm 3 - SVD and Refinement Steps" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "83mTyjgJrdB1" + }, + "source": [ + "def algorithm3(x_in, num_refinements=1, num_sub_matrix=1, truncate=False,\n", + " stop_threshold=0.0001, hard_stop=100, decomposition='eigen',\n", + " metric='mse', scaler=None, report_writer=None,\n", + " apply_scaling_at_alg2=True, plotdata=None):\n", + " if type(x_in) == list:\n", + " x = np.concatenate([a[np.newaxis,:] for a in x_in], axis=0)\n", + " else:\n", + " x = x_in\n", + " k, m, n = x.shape\n", + " if truncate:\n", + " tot_elem = 2 * (m * n)\n", + " appr_elem = num_refinements * (m + n + x.shape[0])\n", + " if appr_elem >= tot_elem:\n", + " print(f'[WARNING] The num_refinements value ({num_refinements}) is too high and will be truncated.')\n", + " while appr_elem >= tot_elem:\n", + " num_refinements -= 1\n", + " appr_elem = num_refinements * (m + n + x.shape[0])\n", + " print(f'[WARNING] num_refinements set to {num_refinements}.')\n", + " u = np.zeros((num_refinements, num_sub_matrix, m))\n", + " s = np.zeros((num_refinements, num_sub_matrix, k))\n", + " v = np.zeros((num_refinements, num_sub_matrix, n))\n", + " report_steps = 0\n", + " if apply_scaling_at_alg2:\n", + " # ======================================================================\n", + " # Apply scaling at Alogirthm 2 (Default)\n", + " # ======================================================================\n", + " x_approx = np.zeros(x.shape)\n", + " for i in range(num_refinements):\n", + " u[i], s[i], v[i] = algorithm2(x - x_approx, num_sub_matrix,\n", + " stop_threshold, hard_stop, decomposition,\n", + " truncate, metric, report_writer,\n", + " report_steps, scaler)\n", + " x_approx += algorithm2_inverse(u[i], s[i], v[i])\n", + " if report_writer is not None:\n", + " with report_writer.as_default():\n", + " tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).')\n", + " for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)):\n", + " tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.')\n", + " tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.')\n", + " tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.')\n", + " report_writer.flush()\n", + " if plotdata is not None:\n", + " if not plotdata: # If empty, initialize it\n", + " for p in range(k):\n", + " plotdata[f'MSE(matrix[{p}])'] = []\n", + " for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)):\n", + " plotdata[f'MSE(matrix[{p}])'].append(mse(f_in, f_tmp))\n", + " else:\n", + " # ======================================================================\n", + " # Apply scaling at Alogirthm 3 (To be checked)\n", + " # ======================================================================\n", + " x_approx = np.zeros(x.shape)\n", + " x_scaled = x.copy()\n", + " error = x - x_approx\n", + " if scaler is None:\n", + " scaler = [1.] * k\n", + " for j in range(k):\n", + " x_scaled[j] *= scaler[j]\n", + " error[j] = x_scaled[j]\n", + " for i in range(num_refinements):\n", + " u[i], s[i], v[i] = algorithm2(error, num_sub_matrix,\n", + " stop_threshold, hard_stop, decomposition,\n", + " truncate, metric, report_writer,\n", + " report_steps, scaler=None)\n", + " for j in range(k):\n", + " s[j] /= scaler[j]\n", + " x_approx += algorithm2_inverse(u[i], s[i], v[i])\n", + " # for j in range(k):\n", + " # x_approx[j] *= scaler[j]\n", + " error = x_scaled - x_approx\n", + " # for j in range(k):\n", + " # error[j] *= scaler[j]\n", + " if report_writer is not None:\n", + " with report_writer.as_default():\n", + " tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).')\n", + " for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)):\n", + " tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.')\n", + " tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.')\n", + " tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.')\n", + " report_writer.flush()\n", + " if plotdata is not None:\n", + " if not plotdata: # If empty, initialize it\n", + " for p in range(k):\n", + " plotdata[f'MSE(matrix[{p}])'] = []\n", + " for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)):\n", + " plotdata[f'MSE(matrix[{p}])'].append(mse(f_in, f_tmp))\n", + " return u, s, v" + ], + "execution_count": 57, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Gix3Fnhgs39E" + }, + "source": [ + "def algorithm3_inverse(U, S, V):\n", + " \"\"\"\n", + " @brief Given a list of u, s, v vectors, reconstruct \n", + " the two approximated matrixes.\n", + " \n", + " @param U list of u vectr\n", + " @param S list of s vectr\n", + " @param V list of v vectr\n", + " \n", + " @return the reconstructed approximated matrix (concatened)\n", + " \"\"\"\n", + " # NOTE: The shape variables are:\n", + " # r := number of refinement steps\n", + " # k := number of merged matrixes\n", + " # m := \"input\" dimension of the matrixes\n", + " # n := \"output\" dimension of the matrixes\n", + " u, s, v = np.array(U), np.array(S), np.array(V)\n", + " F = np.einsum('...rk, ...rm, ...rn', s, u, v)\n", + " F = np.einsum('rkmn->kmn', F)\n", + " return F" + ], + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fO1tncBVsbgv", + "outputId": "c750ab88-6ff5-4755-a0ec-bfcd52b82c26" + }, + "source": [ + "x = np.random.rand(3, 1024, 512)\n", + "%time u_eig, s_eig, v_eig = algorithm3(x, 4, decomposition='eigen', stop_threshold=1e-3)" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 7.58 s, sys: 1.92 s, total: 9.5 s\n", + "Wall time: 4.94 s\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TB3sjDCLuY9I", + "outputId": "66dc43e0-4ad7-43f5-9ea3-8fb5b8771c92" + }, + "source": [ + "%time u_svd, s_svd, v_svd = algorithm3(x, 4, decomposition='svd', stop_threshold=1e-3)" + ], + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 16.4 s, sys: 3.08 s, total: 19.4 s\n", + "Wall time: 9.97 s\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1d-WkM-7uG2s", + "outputId": "0dec5105-0d35-4900-d57f-f7e8e4b3a08b" + }, + "source": [ + "x_svd = algorithm3_inverse(u_svd, s_svd, v_svd)\n", + "x_eig = algorithm3_inverse(u_eig, s_eig, v_eig)\n", + "print(mse(x, x_svd))\n", + "print(mse(x, x_eig))" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0.08266490425610987\n", + "0.0826586846848718\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wE1hyhqU3cZ9" + }, + "source": [ + "### Algorithm 3 - Extra Refinements" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "7w2n71uA3hmv" + }, + "source": [ + "def algorithm3_extra_refinements(x_in, u_in, s_in, v_in, num_refinements=1, num_sub_matrix=1, truncate=False,\n", + " stop_threshold=0.0001, hard_stop=100, decomposition='eigen',\n", + " metric='mse', scaler=None,\n", + " report_writer=None):\n", + " previous_num_refinements = u_in.shape[0]\n", + " assert previous_num_refinements <= num_refinements, f'Previous #Refinements ({previous_num_refinements}) must be less then num_refinements ({num_refinements}).'\n", + " if type(x_in) == list:\n", + " x = np.concatenate([a[np.newaxis,:] for a in x_in], axis=0)\n", + " else:\n", + " x = x_in\n", + " k, m, n = x.shape\n", + " if truncate:\n", + " tot_elem = 2 * (m * n)\n", + " appr_elem = num_refinements * (m + n + x.shape[0])\n", + " if appr_elem >= tot_elem:\n", + " print(f'[WARNING] The num_refinements value ({num_refinements}) is too high and will be truncated.')\n", + " while appr_elem >= tot_elem:\n", + " num_refinements -= 1\n", + " appr_elem = num_refinements * (m + n + x.shape[0])\n", + " print(f'[WARNING] num_refinements set to {num_refinements}.')\n", + " u = np.zeros((num_refinements, num_sub_matrix, m))\n", + " s = np.zeros((num_refinements, num_sub_matrix, k))\n", + " v = np.zeros((num_refinements, num_sub_matrix, n))\n", + "\n", + " u[:previous_num_refinements] = u_in\n", + " s[:previous_num_refinements] = s_in\n", + " v[:previous_num_refinements] = v_in\n", + "\n", + " report_steps = 0\n", + " # ==========================================================================\n", + " # Scaler version\n", + " # ==========================================================================\n", + " x_approx = algorithm3_inverse(u_in, s_in, v_in)\n", + " x_scaled = x.copy()\n", + " error = x - x_approx\n", + " if scaler is None:\n", + " scaler = [1.] * k\n", + " for j in range(k):\n", + " x_scaled[j] *= scaler[j]\n", + " error[j] = x_scaled[j]\n", + " for i in range(previous_num_refinements, num_refinements):\n", + " u[i], s[i], v[i] = algorithm2(error, num_sub_matrix,\n", + " stop_threshold, hard_stop, decomposition,\n", + " truncate, metric, report_writer,\n", + " report_steps, scaler=None)\n", + " for j in range(k):\n", + " s[j] /= scaler[j]\n", + " x_approx += algorithm2_inverse(u[i], s[i], v[i])\n", + " for j in range(k):\n", + " x_approx[j] *= scaler[j]\n", + " error = x_scaled - x_approx\n", + " # for j in range(k):\n", + " # error[j] *= scaler[j]\n", + " if report_writer is not None:\n", + " with report_writer.as_default():\n", + " tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).')\n", + " for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)):\n", + " tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.')\n", + " tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.')\n", + " tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.')\n", + " report_writer.flush()\n", + " # ==========================================================================\n", + " # Original version\n", + " # ==========================================================================\n", + " # x_approx = np.zeros(x.shape)\n", + " # for i in range(num_refinements):\n", + " # u[i], s[i], v[i] = algorithm2(x - x_approx, num_sub_matrix,\n", + " # stop_threshold, hard_stop, decomposition,\n", + " # truncate, metric, report_writer,\n", + " # report_steps, scaler)\n", + " # x_approx += algorithm2_inverse(u[i], s[i], v[i])\n", + " # if report_writer is not None:\n", + " # with report_writer.as_default():\n", + " # tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).')\n", + " # for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)):\n", + " # tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.')\n", + " # tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.')\n", + " # tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.')\n", + " # report_writer.flush()\n", + " return u, s, v" + ], + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_trp6sx3J3RR" + }, + "source": [ + "## Setup TensorBoard" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "resources": { + "https://localhost:6006/?tensorboardColab=true": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "text/html; charset=utf-8" + ] + ], + "status": 200, + "status_text": "OK" + }, + "https://localhost:6006/index.js?_file_hash=8bbeb739": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "application/javascript; charset=utf-8" + ] + ], + "status": 200, + "status_text": "OK" + }, + "https://localhost:6006/icon_bundle.svg": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "image/svg+xml; charset=utf-8" + ] + ], + "status": 200, + "status_text": "OK" + }, + "https://localhost:6006/data/runs": { + "data": "WyJtbmlzdF9SNDVfc2NhbGVyMF8xLjhfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzEzLTEyMzIzOSIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzEzLTEyNTYwOCIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzEzLTEzMDA1MSIsICJtbmlzdF9SNDVfc2NhbGVyMF8zLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzEzLTEzMDQyNyIsICJtbmlzdF9SNDVfc2NhbGVyMF8xNi4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxMy0xMzA2NDYiLCAibW5pc3RfUjhfc2NhbGVyMF8xNi4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxMy0xMzA4NDciLCAibW5pc3RfUjE2X3NjYWxlcjBfMTYuMF9zY2FsZXIxXzEuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTMwOTI5IiwgIm1uaXN0X1IxNl9zY2FsZXIwXzguMF9zY2FsZXIxXzguMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTMxMDMwIiwgIm1uaXN0X1IxNl9zY2FsZXIwXzQuMF9zY2FsZXIxXzguMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTMxMTEwIiwgIm1uaXN0X1IxNl9zY2FsZXIwXzguMF9zY2FsZXIxXzIuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTMxMTQ3IiwgImJhc2VfUjhfZnJvYmVuaXVzLW5vcm1fVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTYxNTIwIiwgIm1uaXN0X1IzMl9zY2FsZXIwXzguMF9zY2FsZXIxXzEuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTYyNjI0IiwgIm1uaXN0X1IzMl9zY2FsZXIwXzguMF9zY2FsZXIxXzEuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTYyNzQyIiwgIm1uaXN0X1IzMl9zY2FsZXIwXzguMF9zY2FsZXIxXzEuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTYyOTEwIiwgIm1uaXN0X1IzMl9zY2FsZXIwXzguMF9zY2FsZXIxXzEuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTY0NDEzIiwgIm1uaXN0XzRibG9ja3NfUjE2X3NjYWxlcjBfMS4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxMy0xNzM5MTYiLCAibW5pc3RfNGJsb2Nrc19SMTZfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzEzLTE3NDA0NyIsICJtbmlzdF80YmxvY2tzX1IzMl9zY2FsZXIwXzEuMF9zY2FsZXIxXzEuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MTMtMTc0MTExIiwgIm1uaXN0XzRibG9ja3NfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxMy0xNzQxNDUiLCAiYmFzZV9SOF9mcm9iZW5pdXMtbm9ybV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0wNzU3MjMiLCAibW5pc3RfUjMyX3NjYWxlcjBfOC4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0wNzU5MjQiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfNi4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0wODEwMzQiLCAibW5pc3RfUjQ1X3NjYWxlcjBfNi4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTE3NDMiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4yX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTI5MDYiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTI5NTUiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTM0NDIiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4yX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTM4MTgiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4yX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTQ2MDkiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMS44X21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTUyMDIiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMi4wX3NjYWxlcjFfMi4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcxNC0xMTU1MjQiLCAiYmFzZV9SOF9mcm9iZW5pdXMtbm9ybV9UaDFlLTA1X2RhdGUyMDIxMDcyMC0wOTMzMzYiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMi4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcyMC0wOTM1MzQiLCAiYmFzZV9SOF9mcm9iZW5pdXMtbm9ybV9UaDFlLTA1X2RhdGUyMDIxMDcyMC0wOTM4NTQiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMi4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcyMC0xMDE2MDIiLCAibW5pc3RfUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMi4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcyMC0xMDE3MzMiLCAiY25uLWxzdG1fUjQ1X3NjYWxlcjBfMS4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcyMC0xMjA2MjMiLCAiY25uLWxzdG1fUjhfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTEyMzU1NCIsICJjbm4tbHN0bV9SOF9zY2FsZXIwXzEuMF9zY2FsZXIxXzEuMF9tc2VfVGgxZS0wNV9kYXRlMjAyMTA3MjAtMTIzNjAyIiwgImNubi1sc3RtX1I4X3NjYWxlcjBfMS4wX3NjYWxlcjFfMS4wX21zZV9UaDFlLTA1X2RhdGUyMDIxMDcyMC0xMjM4NDEiLCAiY25uLWxzdG1fUjhfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTEyMzkwNiIsICJjbm4tbHN0bV9SMjVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTEyNTkyNiIsICJiYXNlX1I4X2Zyb2Jlbml1cy1ub3JtX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTE2MjAzOCIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8yLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTE2MjI1MSIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8yLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMDkyOCIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjFfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMTExOSIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMTE1MiIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8wLjlfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMTMwMSIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjRfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMTQ1MyIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjFfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMTgwMCIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMjE1MSIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjRfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMjMyMSIsICJtbmlzdF9SNDVfc2NhbGVyMF8xLjBfc2NhbGVyMV8xLjBfbXNlX1RoMWUtMDVfZGF0ZTIwMjEwNzIwLTIxMzkyOCJd", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "OK" + }, + "https://localhost:6006/data/plugins_listing": { + "data": "eyJzY2FsYXJzIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IHRydWUsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJzY2FsYXJzIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1zY2FsYXItZGFzaGJvYXJkIn19LCAiY3VzdG9tX3NjYWxhcnMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJDdXN0b20gU2NhbGFycyIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJDVVNUT01fRUxFTUVOVCIsICJlbGVtZW50X25hbWUiOiAidGYtY3VzdG9tLXNjYWxhci1kYXNoYm9hcmQifX0sICJpbWFnZXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJpbWFnZXMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWltYWdlLWRhc2hib2FyZCJ9fSwgImF1ZGlvIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAiYXVkaW8iLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWF1ZGlvLWRhc2hib2FyZCJ9fSwgImRlYnVnZ2VyLXYyIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAiRGVidWdnZXIgVjIiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiTkdfQ09NUE9ORU5UIn19LCAiZ3JhcGhzIjogeyJkaXNhYmxlX3JlbG9hZCI6IHRydWUsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJncmFwaHMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWdyYXBoLWRhc2hib2FyZCJ9fSwgImRpc3RyaWJ1dGlvbnMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJkaXN0cmlidXRpb25zIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1kaXN0cmlidXRpb24tZGFzaGJvYXJkIn19LCAiaGlzdG9ncmFtcyI6IHsiZGlzYWJsZV9yZWxvYWQiOiBmYWxzZSwgImVuYWJsZWQiOiBmYWxzZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogImhpc3RvZ3JhbXMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLWhpc3RvZ3JhbS1kYXNoYm9hcmQifX0sICJ0ZXh0IjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAidGV4dCIsICJsb2FkaW5nX21lY2hhbmlzbSI6IHsidHlwZSI6ICJDVVNUT01fRUxFTUVOVCIsICJlbGVtZW50X25hbWUiOiAidGYtdGV4dC1kYXNoYm9hcmQifX0sICJwcl9jdXJ2ZXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJQUiBDdXJ2ZXMiLCAibG9hZGluZ19tZWNoYW5pc20iOiB7InR5cGUiOiAiQ1VTVE9NX0VMRU1FTlQiLCAiZWxlbWVudF9uYW1lIjogInRmLXByLWN1cnZlLWRhc2hib2FyZCJ9fSwgInByb2ZpbGVfcmVkaXJlY3QiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJQcm9maWxlIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1wcm9maWxlLXJlZGlyZWN0LWRhc2hib2FyZCJ9fSwgImhwYXJhbXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJocGFyYW1zIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJ0Zi1ocGFyYW1zLWRhc2hib2FyZCJ9fSwgIm1lc2giOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogZmFsc2UsICJyZW1vdmVfZG9tIjogZmFsc2UsICJ0YWJfbmFtZSI6ICJtZXNoIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIkNVU1RPTV9FTEVNRU5UIiwgImVsZW1lbnRfbmFtZSI6ICJtZXNoLWRhc2hib2FyZCJ9fSwgInRpbWVzZXJpZXMiOiB7ImRpc2FibGVfcmVsb2FkIjogZmFsc2UsICJlbmFibGVkIjogdHJ1ZSwgInJlbW92ZV9kb20iOiBmYWxzZSwgInRhYl9uYW1lIjogIlRpbWUgU2VyaWVzIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIk5HX0NPTVBPTkVOVCJ9fSwgInByb2plY3RvciI6IHsiZGlzYWJsZV9yZWxvYWQiOiB0cnVlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAicHJvamVjdG9yIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIklGUkFNRSIsICJtb2R1bGVfcGF0aCI6ICIvZGF0YS9wbHVnaW4vcHJvamVjdG9yL2luZGV4LmpzIn19LCAid2hhdGlmIjogeyJkaXNhYmxlX3JlbG9hZCI6IGZhbHNlLCAiZW5hYmxlZCI6IGZhbHNlLCAicmVtb3ZlX2RvbSI6IGZhbHNlLCAidGFiX25hbWUiOiAiV2hhdC1JZiBUb29sIiwgImxvYWRpbmdfbWVjaGFuaXNtIjogeyJ0eXBlIjogIklGUkFNRSIsICJtb2R1bGVfcGF0aCI6ICIvZGF0YS9wbHVnaW4vd2hhdGlmL2luZGV4LmpzIn19fQ==", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "OK" + }, + "https://localhost:6006/data/environment": { + "data": "eyJ2ZXJzaW9uIjogIjIuNS4wIiwgImRhdGFfbG9jYXRpb24iOiAiL2dkcml2ZS9NeSBEcml2ZS9Db2xhYiBOb3RlYm9va3Mvc3ZkLy90ZW5zb3Jib2FyZCIsICJ3aW5kb3dfdGl0bGUiOiAiIiwgImV4cGVyaW1lbnRfbmFtZSI6ICIiLCAiZXhwZXJpbWVudF9kZXNjcmlwdGlvbiI6ICIiLCAiY3JlYXRpb25fdGltZSI6IDAuMCwgImRlYnVnIjogeyJkYXRhX3Byb3ZpZGVyIjogIkdycGNEYXRhUHJvdmlkZXIoYWRkcj0nbG9jYWxob3N0OjM4NDE5JykiLCAiZmxhZ3MiOiB7ImxvZ2RpciI6ICIvZ2RyaXZlL015IERyaXZlL0NvbGFiIE5vdGVib29rcy9zdmQvL3RlbnNvcmJvYXJkIiwgImxvZ2Rpcl9zcGVjIjogIiIsICJob3N0IjogbnVsbCwgImJpbmRfYWxsIjogZmFsc2UsICJwb3J0IjogbnVsbCwgInJldXNlX3BvcnQiOiBmYWxzZSwgImxvYWRfZmFzdCI6ICJhdXRvIiwgImV4dHJhX2RhdGFfc2VydmVyX2ZsYWdzIjogIiIsICJncnBjX2NyZWRzX3R5cGUiOiAibG9jYWwiLCAiZ3JwY19kYXRhX3Byb3ZpZGVyIjogIiIsICJwdXJnZV9vcnBoYW5lZF9kYXRhIjogdHJ1ZSwgImRiIjogIiIsICJkYl9pbXBvcnQiOiBmYWxzZSwgImluc3BlY3QiOiBmYWxzZSwgInZlcnNpb25fdGIiOiBmYWxzZSwgInRhZyI6ICIiLCAiZXZlbnRfZmlsZSI6ICIiLCAicGF0aF9wcmVmaXgiOiAiIiwgIndpbmRvd190aXRsZSI6ICIiLCAibWF4X3JlbG9hZF90aHJlYWRzIjogMSwgInJlbG9hZF9pbnRlcnZhbCI6IDUuMCwgInJlbG9hZF90YXNrIjogImF1dG8iLCAicmVsb2FkX211bHRpZmlsZSI6IG51bGwsICJyZWxvYWRfbXVsdGlmaWxlX2luYWN0aXZlX3NlY3MiOiA4NjQwMCwgImdlbmVyaWNfZGF0YSI6ICJhdXRvIiwgInNhbXBsZXNfcGVyX3BsdWdpbiI6IHt9LCAiY3VzdG9tX3ByZWRpY3RfZm4iOiAiIiwgIndpdF9kYXRhX2RpciI6ICIiLCAiX190ZW5zb3Jib2FyZF9zdWJjb21tYW5kIjogInNlcnZlIn19fQ==", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "OK" + }, + "https://localhost:6006/data/plugin/scalars/tags": { + "data": "", + "ok": true, + "headers": [ + [ + "content-type", + "application/json" + ] + ], + "status": 200, + "status_text": "OK" + } + }, + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "w5HA_RoKJ1W6", + "outputId": "06f47ac5-c2bf-4ab2-ff73-e1e652859e1c" + }, + "source": [ + "tensorboard_dir = '/gdrive/My Drive/Colab Notebooks/svd/'\n", + "tensorboard_dir = re.escape(tensorboard_dir) # to include spaces\n", + "# %reload_ext tensorboard\n", + "# %rm -rf $tensorboard_dir/tensorboard/*\n", + "%tensorboard --logdir $tensorboard_dir/tensorboard" + ], + "execution_count": 19, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "\n", + " (async () => {\n", + " const url = new URL(await google.colab.kernel.proxyPort(6006, {'cache': true}));\n", + " url.searchParams.set('tensorboardColab', 'true');\n", + " const iframe = document.createElement('iframe');\n", + " iframe.src = url;\n", + " iframe.setAttribute('width', '100%');\n", + " iframe.setAttribute('height', '800');\n", + " iframe.setAttribute('frameborder', 0);\n", + " document.body.appendChild(iframe);\n", + " })();\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9EWRUuxXkzxh" + }, + "source": [ + "# Models\n", + "\n", + "Currently, we have the following designs in place:\n", + "\n", + "| Model Name | ID | #LSTMs | Input Size(s) | Hidden Size(s)| Test Accuracy | HW Requirements |\n", + "|---|---|---|---|---|---|---|\n", + "| Dense MNIST | mnist | 0 | 784 | 128 | 98% | |\n", + "| Fashion MNIST | fashion | 0 | 784 | 128 | 88% | |\n", + "| Fashion MNIST - LSTM | fashion-lstm | 2 | 128 | 256 | 86% | |\n", + "| CNN-RNN-UCF101 | cnn-lstm | 2 | 2048 | 256 | 65% |\n", + "| TrafficPredict | traffic-predict | N | ? | ? | ? | |\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LfTVudzjYwdI" + }, + "source": [ + "models = {}" + ], + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1ypgdVm6Oz1M" + }, + "source": [ + "checkpoint_dir = '/gdrive/My Drive/checkpoints/svd/'\n", + "\n", + "def save_model(model_name):\n", + " models[model_name].save(checkpoint_dir + model_name)\n", + " models[model_name].save_weights(checkpoint_dir + model_name + '.h5')\n", + " print(f'Model saved at: {checkpoint_dir + model_name}')\n", + "\n", + "def load_model(model_name):\n", + " if os.path.isdir(checkpoint_dir + model_name):\n", + " models[model_name] = tf.keras.models.load_model(checkpoint_dir + model_name)\n", + " if os.path.isfile(checkpoint_dir + model_name + '.h5'):\n", + " print(f'Model \"{model_name}\" loaded with weights.')\n", + " return True\n", + " else:\n", + " print(f'Model \"{model_name}\" loaded without weights.')\n", + " return False\n", + " else:\n", + " print(f'Model \"{model_name}\" not found in: {checkpoint_dir}')\n", + " return False" + ], + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9qAJGTrJxCUl" + }, + "source": [ + "### MNIST - Dense" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Dp2_3w2ZxGLh", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "eb81bda2-455e-4856-8876-96b297824eaa" + }, + "source": [ + "mnist = tf.keras.datasets.mnist\n", + "\n", + "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", + "x_train, x_test = x_train / 255.0, x_test / 255.0" + ], + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz\n", + "11493376/11490434 [==============================] - 0s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9gku0jMKxNlR", + "outputId": "0ce442da-227c-4747-df9d-54a7cedfc2f1" + }, + "source": [ + "models['mnist'] = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", + " tf.keras.layers.Dense(128, activation='relu', name='dense_1'),\n", + " tf.keras.layers.Dropout(0.2),\n", + " tf.keras.layers.Dense(10, name='dense_2')\n", + "], name='mnist')\n", + "\n", + "models['mnist'].summary()" + ], + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"mnist\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 128) 100480 \n", + "_________________________________________________________________\n", + "dropout (Dropout) (None, 128) 0 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 10) 1290 \n", + "=================================================================\n", + "Total params: 101,770\n", + "Trainable params: 101,770\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "c2yrclDrxi0a", + "outputId": "ecc6f68b-8fd1-4c82-ff52-8913a188c493" + }, + "source": [ + "models['mnist'].compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=['accuracy'])\n", + "\n", + "if not load_model('mnist'):\n", + " models['mnist'].fit(x_train, y_train, epochs=5)\n", + " save_model('mnist')" + ], + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model \"mnist\" loaded with weights.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TBEYbgI-x6Py", + "outputId": "e2f6a24c-7359-4333-abc6-3ed12f376791" + }, + "source": [ + "models['mnist'].evaluate(x_test, y_test, verbose=2)" + ], + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 - 1s - loss: 0.0735 - accuracy: 0.9795\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.07346832007169724, 0.9794999957084656]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 25 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Fxd8syuH4WeS" + }, + "source": [ + "\n", + "## Fashion MNIST - Dense\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "3wYmzI6-1prs", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d138db33-9403-450e-99bc-237552e27d82" + }, + "source": [ + "fashion_mnist = tf.keras.datasets.fashion_mnist\n", + "\n", + "(fashion_train_images, fashion_train_labels), (fashion_test_images, fashion_test_labels) = fashion_mnist.load_data()" + ], + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz\n", + "32768/29515 [=================================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz\n", + "26427392/26421880 [==============================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz\n", + "8192/5148 [===============================================] - 0s 0us/step\n", + "Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz\n", + "4423680/4422102 [==============================] - 0s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qOJnAUJKOBrm" + }, + "source": [ + "class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',\n", + " 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']" + ], + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "WMGwjBDxORVD", + "outputId": "50cf76eb-a8fc-4c4a-bc12-adec4856bade" + }, + "source": [ + "plt.figure()\n", + "plt.imshow(fashion_train_images[0])\n", + "plt.colorbar()\n", + "plt.grid(False)\n", + "plt.show()" + ], + "execution_count": 28, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0sbj1XatOqIn" + }, + "source": [ + "Scale these values to a range of 0 to 1 before feeding them to the neural network model. To do so, divide the values by 255. It's important that the training set and the testing set be preprocessed in the same way:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "D5ZodZNROeTO" + }, + "source": [ + "fashion_train_images = fashion_train_images / 255.0\n", + "fashion_test_images = fashion_test_images / 255.0" + ], + "execution_count": 29, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "ms2krOx9Os6d", + "outputId": "02605b2f-7420-4ac9-ef04-fabf8fa0c2f8" + }, + "source": [ + "plt.figure(figsize=(10,10))\n", + "for i in range(25):\n", + " plt.subplot(5,5,i+1)\n", + " plt.xticks([])\n", + " plt.yticks([])\n", + " plt.grid(False)\n", + " plt.imshow(fashion_train_images[i], cmap=plt.cm.binary)\n", + " plt.xlabel(class_names[fashion_train_labels[i]])\n", + "plt.show()" + ], + "execution_count": 30, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j33Mb5iAOvP4", + "outputId": "dcde8279-b8b5-4fb6-c86b-361415c312eb" + }, + "source": [ + "models['fashion'] = tf.keras.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", + " tf.keras.layers.Dense(128, activation='relu', name='dense_1'),\n", + " tf.keras.layers.Dense(10, name='dense_2')\n", + "], name='fashion_mnist')\n", + "models['fashion'].summary()" + ], + "execution_count": 31, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"fashion_mnist\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_1 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 128) 100480 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 10) 1290 \n", + "=================================================================\n", + "Total params: 101,770\n", + "Trainable params: 101,770\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Rx6Em41pO4ha" + }, + "source": [ + "models['fashion'].compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=['accuracy'])" + ], + "execution_count": 32, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K6F2FqdUO_qg" + }, + "source": [ + "Train the model." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xqWdCHIjO9Z4", + "outputId": "993ffe53-d235-44b9-c45f-6fa4506e1b16" + }, + "source": [ + "train_model = False\n", + "if not load_model('fashion') or train_model:\n", + " models['fashion'].fit(fashion_train_images, fashion_train_labels, epochs=10)\n", + " save_model('fashion')" + ], + "execution_count": 33, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model \"fashion\" loaded with weights.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WMV3QbbVPEt4", + "outputId": "6773da17-783b-4d58-c399-97b3450cac1f" + }, + "source": [ + "test_loss, test_acc = models['fashion'].evaluate(fashion_test_images, fashion_test_labels, verbose=2)\n", + "print('\\nTest accuracy:', test_acc)" + ], + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 - 1s - loss: 0.3876 - accuracy: 0.8919\n", + "\n", + "Test accuracy: 0.8919000029563904\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L3OA-HB7gExG" + }, + "source": [ + "## Fashion MNIST - LSTM" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0AyNrEeIPGq_", + "outputId": "eb6468dd-ddfe-4c3e-de0a-89c273fc3d1c" + }, + "source": [ + "batch_size = 32\n", + "num_classes = 10\n", + "epochs = 2\n", + "\n", + "row_hidden = 64\n", + "col_hidden = 64\n", + "\n", + "row, col = fashion_train_images.shape[1:]\n", + "\n", + "input = tf.keras.layers.Input(shape=(row, col))\n", + "\n", + "def lstm_pipe(in_layer, lstm_name=''):\n", + " x = tf.keras.layers.Conv1D(row_hidden, kernel_size=3, padding = 'same')(in_layer)\n", + " x = tf.keras.layers.Conv1D(row_hidden, kernel_size=3, padding = 'same')(x)\n", + " encoded_rows = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(row_hidden, return_sequences = True))(x)\n", + " return tf.keras.layers.LSTM(col_hidden, name='LSTM_' + lstm_name)(encoded_rows)\n", + "# Read it by rows\n", + "row_read = lstm_pipe(input, 'left')\n", + "# Read it by columns\n", + "transpose_read = lstm_pipe(tf.keras.layers.Permute(dims=(1,2))(input), 'right')\n", + "x = tf.concat([row_read, transpose_read], axis=1)\n", + "x = tf.keras.layers.Dropout(0.2)(x)\n", + "# prediction = tf.keras.layers.Dense(num_classes, activation='softmax')(x)\n", + "prediction = tf.keras.layers.Dense(num_classes)(x)\n", + "models['fashion-lstm'] = tf.keras.Model(input, prediction)\n", + "models['fashion-lstm'].compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=['accuracy'])\n", + "models['fashion-lstm'].summary()\n", + "\n", + "models['fashion-lstm'].get_layer('LSTM_left').get_weights()[0].shape" + ], + "execution_count": 35, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"model\"\n", + "__________________________________________________________________________________________________\n", + "Layer (type) Output Shape Param # Connected to \n", + "==================================================================================================\n", + "input_1 (InputLayer) [(None, 28, 28)] 0 \n", + "__________________________________________________________________________________________________\n", + "permute (Permute) (None, 28, 28) 0 input_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "conv1d (Conv1D) (None, 28, 64) 5440 input_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "conv1d_2 (Conv1D) (None, 28, 64) 5440 permute[0][0] \n", + "__________________________________________________________________________________________________\n", + "conv1d_1 (Conv1D) (None, 28, 64) 12352 conv1d[0][0] \n", + "__________________________________________________________________________________________________\n", + "conv1d_3 (Conv1D) (None, 28, 64) 12352 conv1d_2[0][0] \n", + "__________________________________________________________________________________________________\n", + "bidirectional (Bidirectional) (None, 28, 128) 66048 conv1d_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "bidirectional_1 (Bidirectional) (None, 28, 128) 66048 conv1d_3[0][0] \n", + "__________________________________________________________________________________________________\n", + "LSTM_left (LSTM) (None, 64) 49408 bidirectional[0][0] \n", + "__________________________________________________________________________________________________\n", + "LSTM_right (LSTM) (None, 64) 49408 bidirectional_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "tf.concat (TFOpLambda) (None, 128) 0 LSTM_left[0][0] \n", + " LSTM_right[0][0] \n", + "__________________________________________________________________________________________________\n", + "dropout_1 (Dropout) (None, 128) 0 tf.concat[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense (Dense) (None, 10) 1290 dropout_1[0][0] \n", + "==================================================================================================\n", + "Total params: 267,786\n", + "Trainable params: 267,786\n", + "Non-trainable params: 0\n", + "__________________________________________________________________________________________________\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(128, 256)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "26j0DrC2gWWE", + "outputId": "d77668ff-69c6-41e9-d8f7-70e6ebeeedf7" + }, + "source": [ + "if not load_model('fashion-lstm'):\n", + " models['fashion-lstm'].fit(fashion_train_images, fashion_train_labels, epochs=2, batch_size=batch_size)\n", + " save_model('fashion-lstm')" + ], + "execution_count": 36, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model \"fashion-lstm\" loaded with weights.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aQeDXW1ggx5p", + "outputId": "c7a172e3-57e5-41f8-b38f-efb45646c31d" + }, + "source": [ + "test_loss, test_acc = models['fashion-lstm'].evaluate(fashion_test_images, fashion_test_labels, verbose=2)\n", + "print('\\nTest accuracy:', test_acc)" + ], + "execution_count": 37, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 - 9s - loss: 0.3886 - accuracy: 0.8587\n", + "\n", + "Test accuracy: 0.8586999773979187\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pjtqPfHpFOpE" + }, + "source": [ + "## CNN-RNN-UCF101" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "axEkmpz9QoGY", + "outputId": "e15f701c-c0a3-47ff-ab4b-686cc59dd574" + }, + "source": [ + "!pip install -q git+https://github.com/tensorflow/docs" + ], + "execution_count": 38, + "outputs": [ + { + "output_type": "stream", + "text": [ + " Building wheel for tensorflow-docs (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[33m WARNING: Built wheel for tensorflow-docs is invalid: Metadata 1.2 mandates PEP 440 version, but '0.0.0f74e5ccdc126eb2c32cb344a7c134a28e633e9dc-' is not\u001b[0m\n", + " Running setup.py install for tensorflow-docs ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[33m DEPRECATION: tensorflow-docs was installed using the legacy 'setup.py install' method, because a wheel could not be built for it. A possible replacement is to fix the wheel build issue reported above. You can find discussion regarding this at https://github.com/pypa/pip/issues/8368.\u001b[0m\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pQQsKjzmh8Uz" + }, + "source": [ + "!wget -q https://git.io/JGc31 -O /tmp/ucf101_top5.tar.gz\n", + "!tar xf /tmp/ucf101_top5.tar.gz" + ], + "execution_count": 39, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "vf8KPD3XFbsu" + }, + "source": [ + "from tensorflow_docs.vis import embed\n", + "from imutils import paths\n", + "\n", + "import pandas as pd\n", + "import imageio\n", + "import cv2" + ], + "execution_count": 40, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "9gd5eo4fQk6R" + }, + "source": [ + "IMG_SIZE = 224\n", + "BATCH_SIZE = 64\n", + "EPOCHS = 10\n", + "\n", + "MAX_SEQ_LENGTH = 20\n", + "NUM_FEATURES = 2048" + ], + "execution_count": 41, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "id": "DM97Kl5AReMg", + "outputId": "037167b6-e627-46d0-f377-f3e0754e43a7" + }, + "source": [ + "train_df = pd.read_csv(\"train.csv\")\n", + "test_df = pd.read_csv(\"test.csv\")\n", + "\n", + "print(f\"Total videos for training: {len(train_df)}\")\n", + "print(f\"Total videos for testing: {len(test_df)}\")\n", + "\n", + "train_df.sample(10)" + ], + "execution_count": 42, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Total videos for training: 594\n", + "Total videos for testing: 224\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
video_nametag
421v_ShavingBeard_g17_c07.aviShavingBeard
391v_ShavingBeard_g13_c02.aviShavingBeard
464v_ShavingBeard_g24_c01.aviShavingBeard
339v_Punch_g23_c02.aviPunch
474v_ShavingBeard_g25_c04.aviShavingBeard
138v_PlayingCello_g11_c01.aviPlayingCello
363v_ShavingBeard_g08_c05.aviShavingBeard
256v_Punch_g10_c05.aviPunch
402v_ShavingBeard_g15_c01.aviShavingBeard
510v_TennisSwing_g12_c07.aviTennisSwing
\n", + "
" + ], + "text/plain": [ + " video_name tag\n", + "421 v_ShavingBeard_g17_c07.avi ShavingBeard\n", + "391 v_ShavingBeard_g13_c02.avi ShavingBeard\n", + "464 v_ShavingBeard_g24_c01.avi ShavingBeard\n", + "339 v_Punch_g23_c02.avi Punch\n", + "474 v_ShavingBeard_g25_c04.avi ShavingBeard\n", + "138 v_PlayingCello_g11_c01.avi PlayingCello\n", + "363 v_ShavingBeard_g08_c05.avi ShavingBeard\n", + "256 v_Punch_g10_c05.avi Punch\n", + "402 v_ShavingBeard_g15_c01.avi ShavingBeard\n", + "510 v_TennisSwing_g12_c07.avi TennisSwing" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8kXcyOGURj88" + }, + "source": [ + "# The following two methods are taken from this tutorial:\n", + "# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub\n", + "\n", + "\n", + "def crop_center_square(frame):\n", + " y, x = frame.shape[0:2]\n", + " min_dim = min(y, x)\n", + " start_x = (x // 2) - (min_dim // 2)\n", + " start_y = (y // 2) - (min_dim // 2)\n", + " return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]\n", + "\n", + "\n", + "def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):\n", + " cap = cv2.VideoCapture(path)\n", + " frames = []\n", + " try:\n", + " while True:\n", + " ret, frame = cap.read()\n", + " if not ret:\n", + " break\n", + " frame = crop_center_square(frame)\n", + " frame = cv2.resize(frame, resize)\n", + " frame = frame[:, :, [2, 1, 0]]\n", + " frames.append(frame)\n", + "\n", + " if len(frames) == max_frames:\n", + " break\n", + " finally:\n", + " cap.release()\n", + " return np.array(frames)" + ], + "execution_count": 43, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "_ExqycyVR0x9", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "81954a73-7752-414e-8592-a24b072cd064" + }, + "source": [ + "def build_feature_extractor():\n", + " feature_extractor = tf.keras.applications.InceptionV3(\n", + " weights=\"imagenet\",\n", + " include_top=False,\n", + " pooling=\"avg\",\n", + " input_shape=(IMG_SIZE, IMG_SIZE, 3),\n", + " )\n", + " preprocess_input = tf.keras.applications.inception_v3.preprocess_input\n", + "\n", + " inputs = tf.keras.Input((IMG_SIZE, IMG_SIZE, 3))\n", + " preprocessed = preprocess_input(inputs)\n", + "\n", + " outputs = feature_extractor(preprocessed)\n", + " return tf.keras.Model(inputs, outputs, name=\"feature_extractor\")\n", + "\n", + "feature_extractor = build_feature_extractor()" + ], + "execution_count": 44, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5\n", + "87916544/87910968 [==============================] - 1s 0us/step\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3zEsFu5HSyQ2", + "outputId": "3a809197-81dd-4713-eaac-2ec0ba6cc701" + }, + "source": [ + "label_processor = tf.keras.layers.experimental.preprocessing.StringLookup(\n", + " num_oov_indices=0, vocabulary=np.unique(train_df[\"tag\"])\n", + ")\n", + "print(label_processor.get_vocabulary())" + ], + "execution_count": 45, + "outputs": [ + { + "output_type": "stream", + "text": [ + "['', 'CricketShot', 'PlayingCello', 'Punch', 'ShavingBeard', 'TennisSwing']\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YucRb0fBTBHM", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5dfa0e46-fb4a-4587-f648-b35f5e2cc102" + }, + "source": [ + "def prepare_all_videos(df, root_dir):\n", + " num_samples = len(df)\n", + " video_paths = df[\"video_name\"].values.tolist()\n", + " labels = df[\"tag\"].values\n", + " labels = label_processor(labels[..., None]).numpy()\n", + "\n", + " # `frame_masks` and `frame_features` are what we will feed to our sequence model.\n", + " # `frame_masks` will contain a bunch of booleans denoting if a timestep is\n", + " # masked with padding or not.\n", + " frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype=\"bool\")\n", + " frame_features = np.zeros(\n", + " shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype=\"float32\"\n", + " )\n", + "\n", + " # For each video.\n", + " for idx, path in enumerate(video_paths):\n", + " # Gather all its frames and add a batch dimension.\n", + " frames = load_video(os.path.join(root_dir, path))\n", + " frames = frames[None, ...]\n", + "\n", + " # Initialize placeholders to store the masks and features of the current video.\n", + " temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype=\"bool\")\n", + " temp_frame_featutes = np.zeros(\n", + " shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype=\"float32\"\n", + " )\n", + "\n", + " # Extract features from the frames of the current video.\n", + " for i, batch in enumerate(frames):\n", + " video_length = batch.shape[1]\n", + " length = min(MAX_SEQ_LENGTH, video_length)\n", + " for j in range(length):\n", + " temp_frame_featutes[i, j, :] = feature_extractor.predict(\n", + " batch[None, j, :]\n", + " )\n", + " temp_frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked\n", + "\n", + " frame_features[idx,] = temp_frame_featutes.squeeze()\n", + " frame_masks[idx,] = temp_frame_mask.squeeze()\n", + "\n", + " return (frame_features, frame_masks), labels\n", + "\n", + "if os.path.isfile(checkpoint_dir + 'cnn-lstm/dataset.npz'):\n", + " dataset = np.load(checkpoint_dir + 'cnn-lstm/dataset.npz')\n", + " train_data = (dataset['train_data_0'], dataset['train_data_1'])\n", + " test_data = (dataset['test_data_0'], dataset['test_data_1'])\n", + " train_labels = dataset['train_labels']\n", + " test_labels = dataset['test_labels']\n", + "else:\n", + " train_data, train_labels = prepare_all_videos(train_df, \"train\")\n", + " test_data, test_labels = prepare_all_videos(test_df, \"test\")\n", + "\n", + "print(f\"Frame features in train set: {train_data[0].shape}\")\n", + "print(f\"Frame masks in train set: {train_data[1].shape}\")" + ], + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Frame features in train set: (594, 20, 2048)\n", + "Frame masks in train set: (594, 20)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WpYSOigtTVxh" + }, + "source": [ + "### The Sequence Model" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "-iV2mumoTKJl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d848afba-a7ab-4783-a573-af924f52c4dc" + }, + "source": [ + "# Utility for our sequence model.\n", + "def get_sequence_model():\n", + " class_vocab = label_processor.get_vocabulary()\n", + "\n", + " frame_features_input = tf.keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))\n", + " mask_input = tf.keras.Input((MAX_SEQ_LENGTH,), dtype=\"bool\")\n", + "\n", + " # Refer to the following tutorial to understand the significance of using `mask`:\n", + " # https://tf.keras.io/api/layers/recurrent_layers/gru/\n", + " x = tf.keras.layers.LSTM(64, return_sequences=True, name='LSTM_1')(frame_features_input, mask=mask_input)\n", + " x = tf.keras.layers.LSTM(32, name='LSTM_2')(x)\n", + " # x = tf.keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input)\n", + " # x = tf.keras.layers.GRU(8)(x)\n", + " x = tf.keras.layers.Dropout(0.4)(x)\n", + " x = tf.keras.layers.Dense(8, activation=\"relu\")(x)\n", + " output = tf.keras.layers.Dense(len(class_vocab), activation=\"softmax\")(x)\n", + "\n", + " rnn_model = tf.keras.Model([frame_features_input, mask_input], output)\n", + "\n", + " rnn_model.compile(\n", + " loss=\"sparse_categorical_crossentropy\", optimizer=\"adam\", metrics=[\"accuracy\"]\n", + " )\n", + " return rnn_model\n", + "\n", + "# Utility for running experiments.\n", + "def run_experiment():\n", + " filepath = \"/tmp/video_classifier\"\n", + " checkpoint = tf.keras.callbacks.ModelCheckpoint(\n", + " filepath, save_weights_only=True, save_best_only=True, verbose=1\n", + " )\n", + " seq_model = get_sequence_model()\n", + " history = seq_model.fit(\n", + " [train_data[0], train_data[1]],\n", + " train_labels,\n", + " validation_split=0.3,\n", + " epochs=EPOCHS,\n", + " callbacks=[checkpoint],\n", + " )\n", + " seq_model.load_weights(filepath)\n", + " _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)\n", + " print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")\n", + " return history, seq_model\n", + "\n", + "train_model = False\n", + "if not load_model('cnn-lstm') or train_model:\n", + " _, models['cnn-lstm'] = run_experiment()\n", + " save_model('cnn-lstm')" + ], + "execution_count": 47, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model \"cnn-lstm\" loaded with weights.\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ya5FeHCMTips" + }, + "source": [ + "### Inference" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ExJXj3TyTkPc", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 0 + }, + "outputId": "31f7cf7a-4e6d-4531-a877-b3b9980c30bc" + }, + "source": [ + "def prepare_single_video(frames):\n", + " frames = frames[None, ...]\n", + " frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype=\"bool\")\n", + " frame_featutes = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype=\"float32\")\n", + " for i, batch in enumerate(frames):\n", + " video_length = batch.shape[1]\n", + " length = min(MAX_SEQ_LENGTH, video_length)\n", + " for j in range(length):\n", + " frame_featutes[i, j, :] = feature_extractor.predict(batch[None, j, :])\n", + " frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked\n", + " return frame_featutes, frame_mask\n", + "\n", + "def sequence_prediction(path):\n", + " class_vocab = label_processor.get_vocabulary()\n", + "\n", + " frames = load_video(os.path.join(\"test\", path))\n", + " frame_features, frame_mask = prepare_single_video(frames)\n", + " probabilities = models['cnn-lstm'].predict([frame_features, frame_mask])[0]\n", + "\n", + " for i in np.argsort(probabilities)[::-1]:\n", + " print(f\" {class_vocab[i]}: {probabilities[i] * 100:5.2f}%\")\n", + " return frames\n", + "\n", + "# This utility is for visualization.\n", + "# Referenced from:\n", + "# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub\n", + "def to_gif(images):\n", + " converted_images = images.astype(np.uint8)\n", + " imageio.mimsave(\"animation.gif\", converted_images, fps=10)\n", + " return embed.embed_file(\"animation.gif\")\n", + "\n", + "test_video = np.random.choice(test_df[\"video_name\"].values.tolist())\n", + "print(f\"Test video path: {test_video}\")\n", + "test_frames = sequence_prediction(test_video)\n", + "to_gif(test_frames[:MAX_SEQ_LENGTH])" + ], + "execution_count": 48, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Test video path: v_TennisSwing_g05_c03.avi\n", + " Punch: 36.57%\n", + " ShavingBeard: 26.30%\n", + " CricketShot: 20.38%\n", + " PlayingCello: 10.73%\n", + " : 3.56%\n", + " TennisSwing: 2.46%\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "48QBTe1JkqDc", + "outputId": "a6046971-942c-498c-85ed-3582c8c76940" + }, + "source": [ + "_, accuracy = models['cnn-lstm'].evaluate([test_data[0], test_data[1]], test_labels)\n", + "print(f\"Test accuracy: {round(accuracy * 100, 2)}%\")" + ], + "execution_count": 49, + "outputs": [ + { + "output_type": "stream", + "text": [ + "7/7 [==============================] - 2s 31ms/step - loss: 1.5262 - accuracy: 0.6562\n", + "Test accuracy: 65.62%\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "PZlmJpC8j2PT" + }, + "source": [ + "if not os.path.isfile(checkpoint_dir + 'cnn-lstm/dataset.npz'):\n", + " np.savez_compressed(checkpoint_dir + 'cnn-lstm/dataset',\n", + " train_data_0=train_data[0],\n", + " train_data_1=train_data[1],\n", + " train_labels=train_labels,\n", + " test_data_0=test_data[0],\n", + " test_data_1=test_data[1],\n", + " test_labels=test_labels)" + ], + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xKKBt8IfsoZx" + }, + "source": [ + "### Get Weights" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "I3R4weJlstC5", + "outputId": "12e6d252-09c7-4445-9977-e5ce2e7fd2d9" + }, + "source": [ + "models['cnn-lstm'].summary()" + ], + "execution_count": 51, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"model_4\"\n", + "__________________________________________________________________________________________________\n", + "Layer (type) Output Shape Param # Connected to \n", + "==================================================================================================\n", + "input_9 (InputLayer) [(None, 20, 2048)] 0 \n", + "__________________________________________________________________________________________________\n", + "input_10 (InputLayer) [(None, 20)] 0 \n", + "__________________________________________________________________________________________________\n", + "LSTM_1 (LSTM) (None, 20, 64) 540928 input_9[0][0] \n", + " input_10[0][0] \n", + "__________________________________________________________________________________________________\n", + "LSTM_2 (LSTM) (None, 32) 12416 LSTM_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "dropout_6 (Dropout) (None, 32) 0 LSTM_2[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense_6 (Dense) (None, 8) 264 dropout_6[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense_7 (Dense) (None, 6) 54 dense_6[0][0] \n", + "==================================================================================================\n", + "Total params: 553,662\n", + "Trainable params: 553,662\n", + "Non-trainable params: 0\n", + "__________________________________________________________________________________________________\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i9HpitpOtDzO", + "outputId": "85c0a179-fd9e-4f28-8a41-05194adab3e6" + }, + "source": [ + "print(models['cnn-lstm'].get_layer('LSTM_1').get_weights()[0].shape)\n", + "print(models['cnn-lstm'].get_layer('LSTM_2').get_weights()[0].shape)" + ], + "execution_count": 52, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(2048, 256)\n", + "(64, 128)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_E2yRW5gySUT" + }, + "source": [ + "# Scaled SVD Approximation" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GbPIVFJ_AeId" + }, + "source": [ + "def get_approx_size(R, k, m, n):\n", + " return R * (m + n + k)" + ], + "execution_count": 53, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "sMPgE_j203Z6" + }, + "source": [ + "def plot_accuracies(model_labels, original_accuracies, approx_accuracies):\n", + " x = np.arange(len(model_labels)) # the label locations\n", + " width = 0.35 # the width of the bars\n", + " fig, ax = plt.subplots()\n", + " rects1 = ax.bar(x - width/2, original_accuracies, width, label='Original')\n", + " rects2 = ax.bar(x + width/2, approx_accuracies, width, label='Approximated')\n", + " # Add some text for labels, title and custom x-axis tick labels, etc.\n", + " ax.set_ylabel('Accuracy')\n", + " ax.set_title('Original Accuracy vs. Approximated.')\n", + " ax.set_xticks(x)\n", + " ax.set_xticklabels(model_labels)\n", + " ax.legend()\n", + " fig.tight_layout()\n", + " plt.show()" + ], + "execution_count": 54, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZHZO0PlTyaLh", + "outputId": "1e81b796-d8a1-48d4-f1e0-4d1b12f67db5" + }, + "source": [ + "models['fashion'].summary()\n", + "mnist_dense = [w1, b1] = models['mnist'].get_layer('dense_1').get_weights()\n", + "fashion_mnist_dense = [w2, b2] = models['fashion'].get_layer('dense_1').get_weights()\n", + "print(w1.shape, w2.shape)" + ], + "execution_count": 55, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"fashion_mnist\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_1 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 128) 100480 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 10) 1290 \n", + "=================================================================\n", + "Total params: 101,770\n", + "Trainable params: 101,770\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "(784, 128) (784, 128)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4BMrOjXEy51y" + }, + "source": [ + "R = 45\n", + "metric = 'mse'\n", + "threshold = 1e-5\n", + "scalers = [1., 1.]\n", + "\n", + "def run_alg3_for_mnist(scaler, apply_scaling_at_alg2, log_to_tensorboard=False):\n", + " if log_to_tensorboard:\n", + " logname = 'mnist'\n", + " logname += f'_R{R}_scaler0_{scaler[0]}_scaler1_{scaler[1]}_{metric}_Th{threshold}'\n", + " logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S')\n", + " writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname))\n", + " %reload_ext tensorboard\n", + " else:\n", + " writer = None\n", + "\n", + " [w1, b1] = models['mnist'].get_layer('dense_1').get_weights()\n", + " [w2, b2] = models['fashion'].get_layer('dense_1').get_weights()\n", + " plotdata = {}\n", + " w_approx = algorithm3_inverse(*algorithm3([w1, w2], \\\n", + " num_refinements=R, \\\n", + " num_sub_matrix=1, \\\n", + " decomposition='eigen', \\\n", + " stop_threshold=threshold, \\\n", + " metric=metric, \\\n", + " scaler=scaler, \\\n", + " report_writer=writer, \\\n", + " apply_scaling_at_alg2=apply_scaling_at_alg2, \\\n", + " plotdata=plotdata))\n", + " # Obtain original accuracies.\n", + " _, mnist_acc = models['mnist'].evaluate(x_test, y_test, verbose=0)\n", + " _, fashion_mnist_acc = models['fashion'].evaluate(fashion_test_images, fashion_test_labels, verbose=0)\n", + " print('')\n", + " print('[ORIGINAL] MNIST - Test accuracy: {:.1f}%'.format(100. * mnist_acc))\n", + " print('[ORIGINAL] Fashion MNIST - Test accuracy: {:.1f}%'.format(100. * fashion_mnist_acc))\n", + " \n", + " # Set approximated weights and evaluate.\n", + " models['mnist'].get_layer('dense_1').set_weights([w_approx[0], b1])\n", + " models['fashion'].get_layer('dense_1').set_weights([w_approx[1], b2])\n", + " print('')\n", + "\n", + " _, mnist_acc_approx = models['mnist'].evaluate(x_test, y_test, verbose=0)\n", + " _, fashion_mnist_acc_approx = models['fashion'].evaluate(fashion_test_images, fashion_test_labels, verbose=0)\n", + " print('[APPROX] MNIST - Test accuracy: {:.1f}%'.format(100. * mnist_acc_approx))\n", + " print('[APPROX] Fashion MNIST - Test accuracy: {:.1f}%'.format(100. * fashion_mnist_acc_approx))\n", + "\n", + " print('\\nMNIST - Accuracy drop: {:.1f}%'.format(100. * (mnist_acc - mnist_acc_approx)))\n", + " print('Fashion MNIST - Accuracy drop: {:.1f}%'.format(100. * (fashion_mnist_acc - fashion_mnist_acc_approx)))\n", + " print('')\n", + "\n", + " # Restore original weights and plot.\n", + " models['mnist'].get_layer('dense_1').set_weights([w1, b1])\n", + " models['fashion'].get_layer('dense_1').set_weights([w2, b2])\n", + "\n", + " plot_accuracies(['mnist', 'fashion'], [mnist_acc, fashion_mnist_acc],\n", + " [mnist_acc_approx, fashion_mnist_acc_approx])\n", + " return w_approx[0], w_approx[1], plotdata" + ], + "execution_count": 58, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hBB7um5b28BU" + }, + "source": [ + "## Baseline" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5Jn7ZRWr22W7" + }, + "source": [ + "_, _, plotdata = run_alg3_for_mnist(None, apply_scaling_at_alg2=True)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 319 + }, + "id": "uDtgDz6x3CUw", + "outputId": "f2e19dfb-87b6-4ff0-ff4b-d1ae9a6f3d2a" + }, + "source": [ + "for matrix in plotdata:\n", + " plt.plot(plotdata[matrix], label=matrix)\n", + " print(plotdata[matrix][40])\n", + "plt.legend()" + ], + "execution_count": 65, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0.02243629066508845\n", + "0.03342261809473339\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 65 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mEmpz_9g3DJo" + }, + "source": [ + "## Applying Scaling at Algorithm 3" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ncWsG3Eb9GF-" + }, + "source": [ + "_, _, plotdata_alg3 = run_alg3_for_mnist([1., 1000.], apply_scaling_at_alg2=False)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 319 + }, + "id": "YAr0LOLV9Kan", + "outputId": "068132a5-2021-4409-b6da-49946471640e" + }, + "source": [ + "for matrix in plotdata_alg3:\n", + " plt.plot(plotdata_alg3[matrix], label=matrix)\n", + " print(plotdata_alg3[matrix][40])\n", + "plt.legend()" + ], + "execution_count": 66, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0.025791287281387982\n", + "65961.02744391997\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 66 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H89VoqI63Its" + }, + "source": [ + "## Applying Scaling at Algorithm 2" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 169 + }, + "id": "fSBD29VE9Q_M", + "outputId": "56f656f6-eae0-4afc-9811-d28e7cbc3103" + }, + "source": [ + "_, _, plotdata_alg2 = run_alg3_for_mnist([1., 20.], apply_scaling_at_alg2=True)" + ], + "execution_count": 56, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplotdata2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrun_alg3_for_mnist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1.\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m20.\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mapply_scaling_at_alg2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'run_alg3_for_mnist' is not defined" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 283 + }, + "id": "0H6d7l1l9Mhf", + "outputId": "18146a7d-cf3d-4cc9-d657-a81b256a60e1" + }, + "source": [ + "for matrix in plotdata:\n", + " plt.plot(plotdata[matrix], label=matrix)\n", + "plt.legend()" + ], + "execution_count": 126, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 126 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "9pw_-N3ZMBpU" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OW4PnHDb9NeQ" + }, + "source": [ + "## Additional Plotting" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 264 + }, + "id": "YkjBk2uk3vh5", + "outputId": "76424b10-61dc-4028-e067-160d48e38d0c" + }, + "source": [ + "plt.subplot(1, 3, 1)\n", + "plt.imshow(w1[:16,:16]) #, cmap=plt.cm.BuPu_r)\n", + "plt.subplot(1, 3, 2)\n", + "plt.imshow(w1_approx[:16,:16]) #, cmap=plt.cm.BuPu_r)\n", + "plt.subplot(1, 3, 3)\n", + "plt.imshow(w1[:16,:16] - w1_approx[:16,:16]) #, cmap=plt.cm.BuPu_r)\n", + "\n", + "plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)\n", + "cax = plt.axes([0.85, 0.1, 0.075, 0.8])\n", + "plt.colorbar(cax=cax)\n", + "plt.show()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 265 + }, + "id": "ROem_bjX6Omt", + "outputId": "37efc5b4-9351-468b-dcc8-552050b6db35" + }, + "source": [ + "n, bins, patches = plt.hist(w1_approx)\n", + "plt.show()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAR0ElEQVR4nO3df4xlZ13H8feXDi2g0O2PodTd1dmW4rZa+bVCDZHUVvrLwjah0G0IXbBkRSGB1EQG0TgaE6kJFhoaSEPRbaJQLGrXFiXL7larSQu7ZbtQV+zsKumuhQ6lLUihpvL1j/sMuTvMzL0z99wf88z7lUzmnOc859zvPXfuZ8597rnnRmYiSarLs4ZdgCSpeYa7JFXIcJekChnuklQhw12SKjQ27AIATj311JyYmBh2GZK0ouzbt+9bmTk+37KRCPeJiQn27t077DIkaUWJiK8vtMxhGUmqkOEuSRUy3CWpQoa7JFXIcJekChnuklQhw12SKmS4S1KFDHdJqpDhLkkVMtwlqUKGuyRVyHCXGvShqy4fdgkSYLhLUpUMd0mqkOEuSRUy3CWpQoa7JFXIcJekChnuklQhw12SKmS4S/0ydeKwK9AqZrhLUoUMd0mqkOEuSRUy3CWpQoa7JFXIcJekChnuklQhw12SKmS4S1KFDHdJqlDX4R4Rx0XElyPizjK/ISLui4jpiLgtIo4v7SeU+emyfKI/pUuSFrKUI/f3AAfb5q8HbsjMFwOPA9eW9muBx0v7DaWfJGmAugr3iFgH/BrwiTIfwAXA7aXLduCKMr25zFOWX1j6S5IGpNsj9w8DvwP8sMyfAjyRmc+U+SPA2jK9FngYoCx/svQ/RkRsi4i9EbF3ZmZmmeVL8KGrLh92CT3btfvMYZegynQM94i4HHg0M/c1ecOZeXNmbsrMTePj401uWprX1NRUY9uq4R+K6jbWRZ/XAG+IiMuA5wAvAD4CrImIsXJ0vg44WvofBdYDRyJiDDgReKzxyiVJC+p45J6Z78/MdZk5AWwBdmfmW4A9wJWl21bgjjK9o8xTlu/OzGy0amkhA/yCjInJu7rr6Jd2aAh6Oc/9fcB1ETFNa0z9ltJ+C3BKab8OmOytRKk+TQ4RSfPpZljmRzLzbuDuMn0YeNU8fX4AvKmB2iRJy+QnVFW9I5P3LLp81+4zObjx7AFVIw2G4S5JFTLcpSV60Z79wy5B6shw16py7vZzh12CNBCGuyRVyHCXpAoZ7tIQeb67+sVw16qw0Fj7oMfgDXMNiuEuSRUy3LXijNIVGUepFqmd4a4VreuLd1VegzSX4S41oJcjeM+9Vz8Y7lJDPILXKDHctWKthPHuThctk/rFcFc1ug37qakph0JUPcNdq5bnnKtmhrvq1vBX3N30zt2Nbk/qF8NdK5JvXkqLM9ylAfCNVQ2a4S5JFTLcpWLX7jOHXYLUGMNdkipkuEtShQx3SaqQ4S5JFTLcpSHxEgjqJ8NdkipkuEtShQx3aUBmh2EcjtEgGO6SVCHDXZIqZLhLUoUMd0mqkOEuSRUy3FWdXq6dfnDj2UOvQWqC4S5JFeoY7hHxnIj4YkQ8EBEPRsQflvYNEXFfRExHxG0RcXxpP6HMT5flE/29C5Kkubo5cn8auCAzXwq8DLgkIs4DrgduyMwXA48D15b+1wKPl/YbSj9J0gB1DPds+Z8y++zyk8AFwO2lfTtwRZneXOYpyy+MiGisYklSR12NuUfEcRGxH3gU2AkcAp7IzGdKlyPA2jK9FngYoCx/Ejhlnm1ui4i9EbF3Zmamt3shSTpGV+Gemf+XmS8D1gGvAjb2esOZeXNmbsrMTePj471uTpLUZklny2TmE8Ae4JeANRExVhatA46W6aPAeoCy/ETgsUaqlbo1deKwK5CGqpuzZcYjYk2Zfi7wOuAgrZC/snTbCtxRpneUecry3ZmZTRYtSVpcN0fupwN7IuIA8CVgZ2beCbwPuC4ipmmNqd9S+t8CnFLarwMmmy9bOtbE5F3DLkEaKWOdOmTmAeDl87QfpjX+Prf9B8CbGqlOkrQsfkJVkipkuEtShQx3SaqQ4S5JFTLcJalChrskVchwl6QKGe6SVCHDXWqa17XRCDDcJalChrskVchwl6QKGe7SiDi48exhl6CKGO6SVCHDXZIqZLhLUoUMd0mqkOGuah2ZvGfYJUhDY7hLUoUMd0mqkOEuSRUy3CWpQoa7JFXIcJekChnuklQhw12SKmS4S1KFDHdJqpDhLkkVMtwlqUKGuyRVyHCXpAoZ7pJUIcNdkipkuEtShQx3SapQx3CPiPURsSci/i0iHoyI95T2kyNiZ0Q8VH6fVNojIm6MiOmIOBARr+j3nZAkHaubI/dngN/OzHOA84B3RcQ5wCSwKzPPAnaVeYBLgbPKzzbgY41XLUlaVMdwz8xHMvP+Mv1d4CCwFtgMbC/dtgNXlOnNwK3Zci+wJiJOb7xySdKCljTmHhETwMuB+4DTMvORsugbwGllei3wcNtqR0qbJGlAug73iPhJ4LPAezPzO+3LMjOBXMoNR8S2iNgbEXtnZmaWsqokqYOuwj0ink0r2P8yM/+mNH9zdril/H60tB8F1retvq60HSMzb87MTZm5aXx8fLn1S5Lm0c3ZMgHcAhzMzD9rW7QD2FqmtwJ3tLVfU86aOQ94sm34RpI0AGNd9HkN8FbgKxGxv7T9LvBB4DMRcS3wdeDNZdnngMuAaeAp4O2NVixJ6qhjuGfmvwCxwOIL5+mfwLt6rEuS1AM/oSpJFTLcJalChrskVchwl6QKGe6SVCHDXZIqZLhLUoUMd0mqkOEuSRUy3CWpQoa7JFXIcJekChnuklQhw12SKmS4S1KFDHdJqpDhLkkVMtwlqUKGuyRVyHCXpAoZ7pJUIcNdkipkuEtShQx3SaqQ4S5JFTLcJalChrskVchwl6QKGe6SVCHDXZIqZLhLUoUMd0mqkOEuSRUy3CWpQoa7JFXIcJekChnuGqpdu88cdglSlTqGe0R8MiIejYivtrWdHBE7I+Kh8vuk0h4RcWNETEfEgYh4RT+L14ibOnHYFUirVjdH7n8BXDKnbRLYlZlnAbvKPMClwFnlZxvwsWbK1GpycOPZwy5BWvE6hntm/jPw7TnNm4HtZXo7cEVb+63Zci+wJiJOb6pYSVJ3ljvmflpmPlKmvwGcVqbXAg+39TtS2n5MRGyLiL0RsXdmZmaZZUiS5tPzG6qZmUAuY72bM3NTZm4aHx/vtQxJUpvlhvs3Z4dbyu9HS/tRYH1bv3WlTZI0QMsN9x3A1jK9Fbijrf2actbMecCTbcM3kqQBGevUISI+BZwPnBoRR4A/AD4IfCYirgW+Dry5dP8ccBkwDTwFvL0PNUuSOugY7pl59QKLLpynbwLv6rUoSVJv/ISq+urI5D3DLkFalQx3SaqQ4S5JFTLcJalChrskVchwl6QKGe7SCnPTO3cPuwStAIa7JFXIcJfEudvPHXYJapjhrr7ww0srx9TU1LBLUB8Y7pJUIcNdkipkuKs5fiG2NDIMd0mqkOEurWATk3ctOj+XZ8WsHoa7JFXIcJekChnuklQhw12q3HwfKHPsvX6Gu7RK+Knh1cVw19B49Cj1j+GuJet0up2k4TPcJalChrskVchwl6QKGe6SVCHDXSNh1+4zh13CivWhqy4fdgkaQYa7Bs5TIJdnqV+M7Xntq5vhroFY7KvcDm48e3CFSKuE4S5JFTLc1bPlfKjJL2Venhft2b+8Ff2WrFXHcJdvZq5Ac0N+uZ8a9v2Pehnu6olnaqw8Q3vV5KuHgTLcJQ3N1NSUrx76xHDXSHGIqHu+atJiDPeKHZm8p28vhQ2WFcKhkFWrL+EeEZdExNciYjoiJvtxGxq+9jfxjgn7RQLF891Hh2cs1a3xcI+I44CbgEuBc4CrI+Kcpm9HP85PJGqY5jtjZ/Zvsv1v0zH2wejHkfurgOnMPJyZ/wt8Gtjch9tZ1X7siTR7tNzAy/Bexr39BzN8ozBktlANo/T3Ufsrl8jMZjcYcSVwSWa+o8y/FXh1Zr57Tr9twLYy+7PA1xq4+VOBbzWwnSaNYk0wmnWNYk0wmnVZU/dGsa6mavqZzByfb8FYAxtflsy8Gbi5yW1GxN7M3NTkNns1ijXBaNY1ijXBaNZlTd0bxboGUVM/hmWOAuvb5teVNknSgPQj3L8EnBURGyLieGALsKMPtyNJWkDjwzKZ+UxEvBv4PHAc8MnMfLDp21lAo8M8DRnFmmA06xrFmmA067Km7o1iXX2vqfE3VCVJw+cnVCWpQoa7JFVoRYR7RJwcETsj4qHy+6QF+v1jRDwREXfOad8QEfeVyyHcVt7oJSJOKPPTZflEH2raWvo8FBFbS9vzI2J/28+3IuLDZdnbImKmbdk7uq2p17pK+93l0hGzt//C0j6sffW8iLgrIv49Ih6MiA+29V/yvup0aYzF7mdEvL+0fy0iLu52m/2qKSJeFxH7IuIr5fcFbevM+zgOqK6JiPh+221/vG2dV5Z6pyPixoiIAdX0ljnPuR9GxMvKskHsq9dGxP0R8Uy0PgvUvmyh52JP+4rMHPkf4E+ByTI9CVy/QL8LgdcDd85p/wywpUx/HPjNMv1bwMfL9BbgtiZrAk4GDpffJ5Xpk+bptw94bZl+G/DRfu6rxeoC7gY2zbPOUPYV8DzgV0qf44F7gEuXs69ovcF/CDijbOsB4Jxu7ietS2k8AJwAbCjbOa6bbfaxppcDP1Wmfx442rbOvI/jgOqaAL66wHa/CJwHBPAPs49lv2ua0+dc4NCA99UE8AvArcCVXT4Xl72vMnNlHLnTunzB9jK9Hbhivk6ZuQv4bntb+W93AXD7POu3b/d24MIl/HfspqaLgZ2Z+e3MfBzYCVwyp76XAC+kFVpNaKSuDtsd2L7KzKcycw9Ati5ncT+tz04sRzeXxljofm4GPp2ZT2fmfwLTZXu9Xm5j2TVl5pcz879L+4PAcyPihCXcdl/qWmiDEXE68ILMvDdb6XUrCzyX+1zT1WXdpnSsKzP/KzMPAD+cs+68f/cN7KsVE+6nZeYjZfobwGlLWPcU4InMfKbMHwHWlum1wMPQOoUTeLL0b6qmH21/ntueNXt00X7a0hsj4kBE3B4R61maJur68/Ly9PfbnhhD31cRsYbWK7Ndbc1L2VfdPB4L3c+F1u1mm/2qqd0bgfsz8+m2tvkex0HVtSEivhwR/xQRv9zW/0iHbfazpllXAZ+a09bvfbXUdXvdV8O7/MBcEfEF4EXzLPpA+0xmZkQM5PzNAdW0BXhr2/zfA5/KzKcj4jdoHYVc0L5Cn+t6S2YejYjnA58ttd3aaaV+76uIGKP1hLwxMw+X5o77ajWIiJ8Drgcuamte1uPYkEeAn87MxyLilcDflRqHLiJeDTyVmV9tax7mvuqbkQn3zPzVhZZFxDcj4vTMfKS8XHl0CZt+DFgTEWPlP3n75RBmL5VwpITHiaV/UzUdBc5vm19Ha3xvdhsvBcYyc1/bbT7W1v8TtMarj9HPujLzaPn93Yj4K1ovOW9lyPuK1oc+HsrMD7fdZsd9Nc9tdLo0xkL3c7F1e7ncRi81ERHrgL8FrsnMQ7MrLPI49r2u8ir06XL7+yLiEPCS0r99SG2g+6rYwpyj9gHtq8XWPX/OunfT+75aMcMyO4DZd5G3And0u2L5Q9sDzL5D3b5++3avBHbPGR7ptabPAxdFxEnROkPkotI262rm/KGV8Jv1BuBgl/X0XFdEjEXEqaWOZwOXA7NHOEPbVxHxx7SepO9tX2EZ+6qbS2MsdD93AFuidTbGBuAsWm949Xq5jWXXVIap7qL1ZvW/znbu8DgOoq7xaH2vAxFxBq19dbgMzX0nIs4rQx/XsITnci81lVqeBbyZtvH2Ae6rhcz7d9/AvloxZ8ucQmuc9SHgC8DJpX0T8Im2fvcAM8D3aY1RXVzaz6D1RJwG/ho4obQ/p8xPl+Vn9KGmXy/bnwbePmcbh4GNc9r+hNabYw/Q+qe0sduaeq0L+AlaZ+4cKDV8BDhumPuK1hFL0gru/eXnHcvdV8BlwH/QOrvhA6Xtj4A3dLqftIaYDtG6PPWli21ziY/ZsmoCfg/4Xtt+2U/rzfkFH8cB1fXGcrv7ab0B/vq2bW6iFZ6HgI9SPiXf75rKsvOBe+dsb1D76hdpZdL3aL2SeLBTRvS6r7z8gCRVaKUMy0iSlsBwl6QKGe6SVCHDXZIqZLhLUoUMd0mqkOEuSRX6f82R+txTTebiAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "11hBiLThyDAn", + "outputId": "0d0732a9-bb50-4b99-90cb-d325bd8b77ef" + }, + "source": [ + "print(f'Mean - orig/approx: {w1.mean()} / {w1_approx.mean()}')\n", + "print(f'Standard dev - orig/approx: {w1.std()} / {w1_approx.std()}')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Mean - orig/approx: -0.0028598906937986612 / 4.747437826569501e-05\n", + "Standard dev - orig/approx: 0.16066423058509827 / 0.012329364365807786\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 301 + }, + "id": "CDhyTTdv5wt3", + "outputId": "1a07f185-9ef5-4119-bc71-c892f0b45f25" + }, + "source": [ + "cr = []\n", + "\n", + "m, n = w1.shape\n", + "for r in range(R, (min(m, n))):\n", + " tmp = 2 * m * n / get_approx_size(r, 2, m, n)\n", + " if tmp < 2:\n", + " break\n", + " cr.append(tmp)\n", + "\n", + "print('Current Compression Ratio (CR):', 2 * m * n / get_approx_size(R, 2, m, n))\n", + "plt.plot(cr)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Current Compression Ratio (CR): 6.862144420131291\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 68 + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rhuPyqNv5Pxk" + }, + "source": [ + "# More than 2 Matrix Approximation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zY1BfLH75W2l" + }, + "source": [ + "## Dividing weight matrix into 4" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "do28lklc5gLG", + "outputId": "e73d1c51-b0ed-4401-f58c-834a3ee87a27" + }, + "source": [ + "models['fashion'].summary()\n", + "mnist_dense = [w, b] = models['mnist'].get_layer('dense_1').get_weights()\n", + "print(w.shape)\n", + "r, h = w.shape[0] // 2, w.shape[1] // 2\n", + "blocks = np.reshape(w, (4, r, h))\n", + "tmp = np.reshape(blocks, w.shape)\n", + "print(blocks.shape)\n", + "print(np.allclose(w, tmp))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model: \"fashion_mnist\"\n", + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "flatten_1 (Flatten) (None, 784) 0 \n", + "_________________________________________________________________\n", + "dense_1 (Dense) (None, 128) 100480 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 10) 1290 \n", + "=================================================================\n", + "Total params: 101,770\n", + "Trainable params: 101,770\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n", + "(784, 128)\n", + "(4, 392, 64)\n", + "True\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "I7EF65FC7LR0", + "outputId": "c7b9cc8e-65ba-4140-bba1-8647764bb1e3" + }, + "source": [ + "R = 64\n", + "metric = 'mse'\n", + "threshold = 1e-5\n", + "scaler = [1.] * blocks.shape[0]\n", + "\n", + "logname = 'mnist_4blocks'\n", + "logname += f'_R{R}_scaler0_{scaler[0]}_scaler1_{scaler[1]}_{metric}_Th{threshold}'\n", + "logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S')\n", + "writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname))\n", + "%reload_ext tensorboard\n", + "\n", + "%time w_approx = algorithm3_inverse(*algorithm3(blocks, \\\n", + " num_refinements=R, \\\n", + " num_sub_matrix=1, \\\n", + " decomposition='eigen', \\\n", + " stop_threshold=threshold, \\\n", + " metric=metric, \\\n", + " scaler=scaler, \\\n", + " report_writer=writer))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 36 s, sys: 19.5 s, total: 55.5 s\n", + "Wall time: 28.6 s\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 404 + }, + "id": "QHpnUNBG8aVe", + "outputId": "05a8f3c4-21ee-43c4-8feb-e46266a7d92c" + }, + "source": [ + "k, m, n = blocks.shape\n", + "print(f'CR: {k * m * n / get_approx_size(R, k, m, n)}')\n", + "\n", + "models['mnist'].get_layer('dense_1').set_weights([w, b])\n", + "_, mnist_acc = models['mnist'].evaluate(x_test, y_test, verbose=0)\n", + "print('MNIST - Test accuracy:', mnist_acc)\n", + "\n", + "models['mnist'].get_layer('dense_1').set_weights([np.reshape(w_approx, w.shape), b])\n", + "print('')\n", + "\n", + "_, mnist_acc_approx = models['mnist'].evaluate(x_test, y_test, verbose=0)\n", + "print('MNIST - Test accuracy:', mnist_acc_approx)\n", + "\n", + "print('\\nMNIST - Accuracy drop:', mnist_acc - mnist_acc_approx)\n", + "\n", + "plot_accuracies(['mnist'], [mnist_acc], [mnist_acc_approx])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CR: 3.408695652173913\n", + "MNIST - Test accuracy: 0.9664999842643738\n", + "\n", + "MNIST - Test accuracy: 0.9455999732017517\n", + "\n", + "MNIST - Accuracy drop: 0.02090001106262207\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ukjOI4iC-uZq" + }, + "source": [ + "# Different Sizes Approximation" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9UVwhZqY_c5-", + "outputId": "9b636135-0888-4a9d-b749-6c99818afea6" + }, + "source": [ + "w1 = np.random.randint(9, size=(4, 8))\n", + "w2 = np.random.randint(9, size=(5, 4))\n", + "\n", + "max_shape = max_r, max_c = max(w1.shape[0], w2.shape[0]), max(w1.shape[1], w2.shape[1])\n", + "print(max_shape)\n", + "\n", + "padded_w1 = np.zeros(max_shape)\n", + "padded_w2 = np.zeros(max_shape)\n", + "padded_w1[:w1.shape[0], :w1.shape[1]] = w1\n", + "padded_w2[:w2.shape[0], :w2.shape[1]] = w2\n", + "\n", + "print(w1)\n", + "print(w2)\n", + "print(padded_w1)\n", + "print(padded_w2)\n", + "\n", + "def pad_matrices(matrices):\n", + " max_r = max([m.shape for m in matrices],key=lambda item:item[0])[0]\n", + " max_c = max([m.shape for m in matrices],key=lambda item:item[1])[1]\n", + " padded_matrices = []\n", + " for i in range(len(matrices)):\n", + " z = np.zeros((max_r, max_c))\n", + " z[:matrices[i].shape[0], :matrices[i].shape[1]] = matrices[i]\n", + " padded_matrices.append(z)\n", + " return padded_matrices\n", + "\n", + "pad_matrices([w1, w2])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(5, 8)\n", + "[[7 2 7 6 4 2 4 0]\n", + " [1 2 6 0 7 4 2 8]\n", + " [3 1 0 6 7 1 3 6]\n", + " [1 8 5 8 0 1 1 6]]\n", + "[[4 8 6 1]\n", + " [0 8 8 2]\n", + " [0 7 3 7]\n", + " [6 1 2 1]\n", + " [5 0 8 3]]\n", + "[[7. 2. 7. 6. 4. 2. 4. 0.]\n", + " [1. 2. 6. 0. 7. 4. 2. 8.]\n", + " [3. 1. 0. 6. 7. 1. 3. 6.]\n", + " [1. 8. 5. 8. 0. 1. 1. 6.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0.]]\n", + "[[4. 8. 6. 1. 0. 0. 0. 0.]\n", + " [0. 8. 8. 2. 0. 0. 0. 0.]\n", + " [0. 7. 3. 7. 0. 0. 0. 0.]\n", + " [6. 1. 2. 1. 0. 0. 0. 0.]\n", + " [5. 0. 8. 3. 0. 0. 0. 0.]]\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[array([[7., 2., 7., 6., 4., 2., 4., 0.],\n", + " [1., 2., 6., 0., 7., 4., 2., 8.],\n", + " [3., 1., 0., 6., 7., 1., 3., 6.],\n", + " [1., 8., 5., 8., 0., 1., 1., 6.],\n", + " [0., 0., 0., 0., 0., 0., 0., 0.]]),\n", + " array([[4., 8., 6., 1., 0., 0., 0., 0.],\n", + " [0., 8., 8., 2., 0., 0., 0., 0.],\n", + " [0., 7., 3., 7., 0., 0., 0., 0.],\n", + " [6., 1., 2., 1., 0., 0., 0., 0.],\n", + " [5., 0., 8., 3., 0., 0., 0., 0.]])]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 87 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DPDdcWtdn9YB", + "outputId": "9effbc7a-29b7-46c8-cd7b-2948d0c09f2e" + }, + "source": [ + "import operator\n", + "\n", + "def get_cr_multi_size_matrix(R, matrices):\n", + " max_m = max([m.shape for m in matrices],key=lambda item:item[0])[0]\n", + " max_n = max([m.shape for m in matrices],key=lambda item:item[1])[1]\n", + " approx_size = get_approx_size(R, len(matrices), max_m, max_n)\n", + " orig_size = sum(map(lambda x: operator.mul(*x.shape), matrices))\n", + " return orig_size / approx_size\n", + "\n", + "w1 = np.random.randint(9, size=(4, 8))\n", + "w2 = np.random.randint(9, size=(5, 4))\n", + "\n", + "orig_size = operator.mul(*w1.shape) + operator.mul(*w2.shape)\n", + "print(orig_size)\n", + "get_cr_multi_size_matrix(1, [w1, w2])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "52\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3.466666666666667" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 97 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 283 + }, + "id": "2gm0zUYTsHtz", + "outputId": "f4ef0364-7818-40cb-e4e0-7223d7d1b84a" + }, + "source": [ + "cr = []\n", + "cr_small = []\n", + "cr_big = []\n", + "\n", + "small_size = 128\n", + "medium_size = 512\n", + "large_size = 1024\n", + "w1 = np.random.randint(9, size=(medium_size, small_size))\n", + "w2 = np.random.randint(9, size=(large_size, medium_size))\n", + "\n", + "for r in range(small_size // 4, small_size):\n", + " m, n = w1.shape\n", + " tmp = 2 * m * n / get_approx_size(r, 2, m, n)\n", + " cr_small.append(tmp)\n", + " m, n = w2.shape\n", + " tmp = 2 * m * n / get_approx_size(r, 2, m, n)\n", + " cr_big.append(tmp)\n", + " tmp = get_cr_multi_size_matrix(r, [w1, w2])\n", + " if tmp < 2:\n", + " break\n", + " cr.append(tmp)\n", + "\n", + "plt.plot(cr, label='CR')\n", + "plt.plot(cr_small, label='CR_small')\n", + "plt.plot(cr_big, label='CR_big')\n", + "plt.legend()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 98 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD4CAYAAADiry33AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deXxU1fn48c/Jvu8rWUgg7AECxCCbAoogoqKidVfQWr/Vit1s6a/WttZqtba1alWsitqCVUTEDUFBAdl3AoQkZCEJ2fd9spzfH3eAgAlblpvMPO/X677uOneeYcJzz5x77jlKa40QQgjb5WB2AEIIIbqXJHohhLBxkuiFEMLGSaIXQggbJ4leCCFsnJPZAbQnKChIx8TEmB2GEEL0Gbt27SrRWge3t69XJvqYmBh27txpdhhCCNFnKKWyO9onVTdCCGHjJNELIYSNk0QvhBA2rlfW0QshbFdTUxO5ubk0NDSYHUqf5ObmRmRkJM7Ozuf9Gkn0QogelZubi7e3NzExMSilzA6nT9FaU1paSm5uLrGxsef9Oqm6EUL0qIaGBgIDAyXJXwSlFIGBgRf8a0gSvRCix0mSv3gX829nM4m+saWRJclL2Fkg7e+FEKItm0n0CsW7h9/lpb0vmR2KEKKXKygo4NZbb2XgwIGMGzeO2bNnk5qairu7OwkJCQwfPpy7776bpqYms0PtEjaT6F0cXVgQv4BdhbvYUbDD7HCEEL2U1pobbriBqVOncvToUXbt2sXTTz9NYWEhAwcOZO/evRw4cIDc3Fzef/99s8PtEjaT6AHmDZ5HsHswr+571exQhBC91Pr163F2dubBBx88uW306NFERUWdXHd0dCQpKYm8vDwzQuxyNtW80tXRlfnx83l2x7PsKtzFuNBxZockhDiLP3xykEPHq7r0nMP7+fDEtSM63J+cnMy4cWfPDQ0NDWzbto0XXnihS2Mzi02V6AFuHnwzgW6BvLLvFbNDEUL0MUePHiUhIYHQ0FDCw8MZNWqU2SF1CZsq0QO4ObkxP34+f935V/YU7WFMyBizQxJCdOBsJe/uMmLECJYvX97uvhN19CUlJUyaNIlVq1Zx3XXX9XCEXc/mSvRglOoD3AL4195/mR2KEKKXmT59Oo2NjSxevPjktv3795OTk3NyPSgoiGeeeYann37ajBC7nE0meg9nDxbEL2Br/la25283OxwhRC+ilOKjjz7iq6++YuDAgYwYMYJFixYRFhZ22nFz586lrq6OjRs3mhRp11Faa7Nj+J7ExETd2YFHGlsauWbFNYR5hvHu1e/Kk3hC9BKHDx9m2LBhZofRp7X3b6iU2qW1TmzveJss0YPRAufB0Q+yr3gf3+Z+a3Y4QghhmnMmeqVUlFJqvVLqkFLqoFJqoXV7gFJqrVIqzTr37+D191iPSVNK3dPVH+Bs5sbNpb9Pf/6555+06taefGshhOg1zqdE3wz8XGs9HLgUeEgpNRz4NfC11noQ8LV1/TRKqQDgCWA8kAQ80dEFoTs4OTjxcMLDpJWn8UXmFz31tkII0aucM9FrrfO11ruty9XAYSACuB5423rY28Dcdl4+E1irtS7TWpcDa4FZXRH4+boq5iqGBgzlpT0v0dRiG/1WCCHEhbigOnqlVAwwBtgGhGqt8627CoDQdl4SAeS0Wc+1bmvv3A8opXYqpXYWFxdfSFhn5aAceHTso+TW5PLekfe67LxCCNFXnHeiV0p5AR8Cj2qtT3tmWRtNdzrVfEdrvVhrnai1TgwODu7Mqb5nUsQkJvabyKv7XqWysbJLzy2EEL3deSV6pZQzRpL/r9Z6hXVzoVIq3Lo/HChq56V5QFSb9Ujrth7388SfU9NUw+L9i899sBBC2JDzaXWjgDeAw1rrv7XZtQo40YrmHuDjdl7+JXCVUsrfehP2Kuu2HjfYfzBz4+ayNGUpOVU5536BEMJm9db+6L/55hvmzJkDwJIlS3j44Ye75LznU6KfBNwFTFdK7bVOs4FngBlKqTTgSus6SqlEpdS/AbTWZcCTwA7r9EfrNlM8nPAwzg7O/H33380KQQhhMnvsj/6cnZpprTcBHT1WekU7x+8E7m+z/ibw5sUG2JWCPYKZP2I+/9r3L+nGWIje4ItfQ8GBrj1n2Ei4+pkOd3fUH31WVtbJ9fPpj/7gwYPMnz8fi8VCa2srH374Ic7OzsyaNYtLL72UzZs3c8kllzB//nyeeOIJioqK+O9//0tSUhLbt29n4cKFNDQ04O7uzltvvcWQIUO65OO3x2afjO3IvfH3EuYZxtPbnqaltcXscIQQPexC+qOfNavj1uCvvvoqCxcuZO/evezcuZPIyEgA0tPT+fnPf05KSgopKSksXbqUTZs28de//pU///nPAAwdOpSNGzeyZ88e/vjHP/Kb3/ym6z5gO2yum+JzcXdy5xeJv+AX3/6CD1I/4Naht5odkhD26ywlbzOc6I8+MzOTa6655qz90U+YMIGnnnqK3NxcbrzxRgYNGgRAbGwsI0eOBIwuka+44gqUUowcOfLkr4bKykruuece0tLSUEp1+70AuyvRA1zV/yqSwpJ4cc+LlDeUmx2OEKIHjRgxgl27drW770Qd/Ym6+1WrVnV4nttvv51Vq1bh7u7O7NmzWbduHQCurq4nj3FwcDi57uDgQHNzMwCPP/4406ZNIzk5mU8++YSGhoau+njtsstEr5RiUdIiaptqeXHPi2aHI4ToQV3VH31GRgYDBgzgkUce4frrr2f//v3nHUNlZSUREcazo0uWLLnwD3GB7DLRA8T5x3Hb0NtYnrqcg6UHzQ5HCNFDuqo/+vfff5/4+HgSEhJITk7m7rvvPu8YHnvsMRYtWsSYMWNOlvK7k832R38+qi3VXLfyOkI8Qlg6eymODo7d/p5C2Dvpj77zpD/6C+Dt4s1jlzzGodJD0g+OEMJm2XWiB5gVM4tJ/Sbx4p4XKagtMDscIUQv8+WXX5KQkHDadMMNN5gd1gWxu+aVZ1JK8f8u/X/c8PEN/GX7X/j7NHlqVghxysyZM5k5c6bZYXSK3ZfoAaK8o3hw9IN8dewr1h9bb3Y4QgjRpSTRW90z/B7i/OL409Y/UWWpOvcLhBCij5BEb+Xs6MyTk56kpKGE53c+b3Y4QgjRZSTRtxEfFM+9I+5lRdoKNh/fbHY4QgjRJSTRn+HHCT8mxieGP2z+A7VNtWaHI4ToBl3RH/3Z+oufOHFid4V+USTRn8HV0ZUnJz1Jfm0+f98lLXCEsDU90R/95s29q0bA7ptXtichJIG7ht/FO4feYWrUVCZHTDY7JCFs0l+2/4WUspQuPefQgKH8KulXHe7vqv7oAXJycpg6dSp5eXnceeedPPHEEwB4eXlRU1NDa2srDz/8MOvWrSMqKgpnZ2cWLFjAvHnzOvchL9D5DCX4plKqSCmV3Gbb/9qMNpWllNrbwWuzlFIHrMd1f58GXeiRsY8w0Hcgv/vud1Q0VJgdjhCii3RVf/QA27dv58MPP2T//v188MEHnNl1y4oVK8jKyuLQoUO8++67bNmypdPxX4zzKdEvAV4C3jmxQWv9gxPLSqnngcqzvH6a1rrkYgM0i6ujK09PeZrbP7+dJ7c+yV8v/yvG8LlCiK5ytpK3GS6kP3qAGTNmEBgYCMCNN97Ipk2bSEw81d3Mpk2buPnmm3FwcCAsLIxp06Z1a/wdOWeJXmu9AWh3nFfrwOG3AMu6OK5eYVjgMB5KeIg12Wv4LPMzs8MRQnSBruqPHvhe4a+3FgY7ezN2ClCotU7rYL8G1iildimlHjjbiZRSDyildiqldhYXF3cyrK4zf8R8EoITeGrrU+TVnL2+TgjR+3VVf/QAa9eupaysjPr6elauXMmkSZNO2z9p0iQ+/PBDWltbKSws5JtvvunSz3K+Opvob+PspfnJWuuxwNXAQ0qpyzo6UGu9WGudqLVODA4O7mRYXcfRwZGnpxhf9mMbHqOptXuH/BJCdK+u6o8eICkpiZtuuolRo0Zx0003nVZtA3DTTTcRGRnJ8OHDufPOOxk7diy+vr7d8rnO5rz6o1dKxQCfaq3j22xzAvKAcVrr3PM4x++BGq31X891bE/1R38hVmeu5pcbfsn9I+9n4diFZocjRJ9lb/3R19TU4OXlRWlpKUlJSXz33Xffu6hcqAvtj74zzSuvBFI6SvJKKU/AQWtdbV2+CvhjJ97PVLNiZ7E1fytvHHiD8eHjuTT8UrNDEkL0AXPmzKGiogKLxcLjjz/e6SR/Mc6Z6JVSy4CpQJBSKhd4Qmv9BnArZ1TbKKX6Af/WWs8GQoGPrDcnnIClWuvVXRt+z/pV0q/YU7SHRRsX8cG1HxDkHmR2SEKIbvbll1/yq1+d3jooNjaWjz766Lxeb1a9fFt2PZTgxUgtT+X2z24nITiB12a8JsMPCnGBDh8+zNChQ3ttC5XeTmtNSkqKDCXYnQb7D+a3l/6WbQXbeHnvy2aHI0Sf4+bmRmlpKb2xkNnbaa0pLS3Fzc3tgl4nXSBchLlxc9lbtJfXD7zO6ODRXB51udkhCdFnREZGkpubS29qRt2XuLm5ERkZeUGvkaqbi9TQ3MBdX9xFXk0e7895n0jvC/uHF0KIriRVN93AzcmNv039GwCPrn+UuqY6kyMSQoj2SaLvhCjvKJ697FlSy1N5YvMTUucohOiVJNF30uSIySwcu5DVWat5M/lNs8MRQojvkUTfBRbEL2BWzCxe2P0CG3M7flxaCCHMIIm+Cyil+MPEPzAkYAi/3PBL0so76uNNCCF6niT6LuLh7MGL01/E3cmdn6z7CaX1pWaHJIQQgCT6LhXmGcaL01+ktL6UhesX0tjSaHZIQgghib6rxQfF89Tkp9hXvI/HNz1Oq241OyQhhJ2TRN8Nroq5ioVjF/JF1hf8Y/c/zA5HCGHnpAuEbnJf/H0U1BbwVvJbhHmEcfuw280OSQhhpyTRdxOlFIuSFlFUV8Qz258h1COUK/pfYXZYQgg7JFU33cjRwZG/XPYXRgaP5LENj7GjYIfZIQkh7JAk+m7m7uTOy9NfJtI7kp+s+wmHSg+ZHZIQws6cM9Erpd5UShUppZLbbPu9UipPKbXXOs3u4LWzlFJHlFLpSqlfd2XgfYmfmx+vzXgNHxcfHlz7IBmVGWaHJISwI+dTol8CzGpn+9+11gnW6fMzdyqlHIGXgauB4cBtSqnhnQm2LwvzDGPxjMUopfjR2h9xvOa42SEJIezEORO91noDUHYR504C0rXWGVprC/AecP1FnMdmxPjG8OqVr1JrqeW+L++jsLbQ7JCEEHagM3X0Dyul9lurdvzb2R8B5LRZz7Vus2vDAofx6oxXKWso4/4191NSX2J2SEIIG3exif4VYCCQAOQDz3c2EKXUA0qpnUqpnbY+xNio4FH868p/UVhXyA/X/JCyhov5wSSEEOfnohK91rpQa92itW4FXseopjlTHhDVZj3Suq2jcy7WWidqrRODg4MvJqw+ZVzoOF6c/iI51Tncv+Z+SfZCiG5zUYleKRXeZvUGILmdw3YAg5RSsUopF+BWYNXFvJ+tGh8+3kj2VTnc9+V9Uo0jhOgW59O8chmwBRiilMpVSt0HPKuUOqCU2g9MA35qPbafUupzAK11M/Aw8CVwGHhfa32wmz5HnzWh3wReuuIlcqtzJdkLIbqF6o3jnCYmJuqdO3eaHUaP2lGwg4e+fohQj1AWz1hMuFf4uV8khBBWSqldWuvE9vbJk7G9xCVhl7B4xmJK60u5Z/U9HKs6ZnZIQggbIYm+F0kISeDfM/9NQ3MD96y+h9TyVLNDEkLYAEn0vczwwOEsmbUEBxy4d/W97CnaY3ZIQog+ThJ9LzTAbwBvX/02AW4B/HDND/km5xuzQxJC9GGS6HupSO9I3rn6HeL84nh0/aN8lPaR2SEJIfooSfS9WIBbAG/MfIOksCR+t/l3vLLvFXpjKykhRO8mib6X83T25OUrXua6gdfxr73/4vHvHqeptcnssIQQfYgMJdgHODs686dJfyLCK4JX9r1CYV0hz099Hh8XH7NDE0L0AVKi7yOUUvw44cc8OelJdhbs5K7P7yKnKufcLxRC2D1J9H3M3Li5LL5qMaUNpdz2+W0yDq0Q4pwk0fdBl4RdwtLZSwlwC+CBNQ+wPHW52SEJIXoxSfR9VLRPNP+Z/R/G9xvPH7b8gSe3PElTi9ykFUJ8nyT6PszHxYeXp7/MgvgFvJ/6Pvetkd4vhRDfJ4m+j3N0cOSn437Kc5c9x+HSw9zyyS3SbYIQ4jSS6G3ErNhZ/Gf2f3B3cmfB6gW8e+hdebhKCAFIorcpQwKGsGzOMqZETuHZHc/yyw2/pMZSY3ZYQgiTnc8IU28qpYqUUslttj2nlEpRSu1XSn2klPLr4LVZ1pGo9iql7GskEZP4uPjwwrQX+Om4n/JV9lfc8uktHCo9ZHZYQggTnU+Jfgkw64xta4F4rfUoIBVYdJbXT9NaJ3Q08onoekopFsQv4K1Zb2FpsXDn53ey9PBSqcoRwk6dM9FrrTcAZWdsW2MdExZgKxDZDbGJThoTMobl1y5nYr+JPL39aR5Z/wjlDeVmhyWE6GFdUUe/APiig30aWKOU2qWUeqAL3ktcID83P16c/iK/Tvo13+V9x02rbmLL8S1mhyWE6EGdSvRKqf8HNAP/7eCQyVrrscDVwENKqcvOcq4HlFI7lVI7i4uLOxOWOINSijuG3cGya5bh7eLNA2sf4Nkdz9LY0mh2aEKIHnDRiV4pdS8wB7hDd1D5q7XOs86LgI+ApI7Op7VerLVO1FonBgcHX2xY4iyGBAzhvTnv8YMhP+DdQ+/yg09+wOHSw2aHJYToZheV6JVSs4DHgOu01nUdHOOplPI+sQxcBSS3d6zoOe5O7vz20t/yypWvUGWp4vbPb+e1fa9JH/dC2LDzaV65DNgCDFFK5Sql7gNeAryBtdamk69aj+2nlPrc+tJQYJNSah+wHfhMa726Wz6FVUWdhdZWaVlyPiZHTGbFdSu4MvpKXtr7End8dgdp5WlmhyWE6AaqNza5S0xM1Dt3Xliz+/JaC9e+tIlrRoazaPawborMNq3NXsuftv6JKksVD456kAUjF+Ds4Gx2WEKIC6CU2tVRM3abeTLWz8OZ6UNDeG1DBm9vzjI7nD5lRv8ZfHT9RydL97d+eisHSw6aHZYQoovYTKJXSvHEtSOYMTyU339ykC8PFpgdUp8S4BbAc5c/xz+n/ZOKhgpu//x2ntvxHHVN7d6CEUL0ITaT6AEcHRT/vHUMoyP9eGTZHnZly8NBF2pa9DRWzl3JjYNu5J1D7zD347l8m/Ot2WEJITrBphI9gLuLI2/ck0iYrxv3vb2D1MJqs0Pqc7xdvHliwhO8c/U7eDh58PC6h/nZNz+joFZ+JQnRF9lcogcI9HLl3QXjcXF04M5/b+NYqVQ/XIwxIWP44NoPeGTMI2zI3cB1K6/jreS3ZCQrIfoYm0z0ANGBHrx733gam1u5841tFFU1mB1Sn+Ts6MwPR/2Qj+d+zPjw8fxt19+Y98k8Nh/fbHZoQojzZLOJHmBImDdL5l9CSU0jd76xjdIaeeT/YkV4RfDi9Bd5+YqXsbRY+NHaH7Fw3UJyqnPMDk0IcQ42negBxkT78++7E8kureOOf2+jrNZidkh92mWRl7Fy7koWjl3IlvwtzF05l3/s+ocMcCJEL2bziR5gYlwQb9xzCZkltdzx722US7LvFFdHV+4feT+fzP2EmTEzeSP5Da756BqWpy6npbXF7PCEEGewi0QPMHlQEK/fncjR4hop2XeRUM9Q/jzlzyy7ZhkxPjH8YcsfmPfJPDbkbpBBToToRewm0QNcNjiYxXeNI724hlsXb6GoWm7QdoX4oHiWzFrC85c/j6XFwkNfP8T9a+6Xp2uF6CXsKtEDTB0SwpJ7LyG3vJ5bXt1CXkW92SHZBKUUV8VcxcrrV7IoaRFp5Wnc+tmt/Oybn5FZmWl2eELYNZvp1OxC7cou5963tuPj5sx/7h9PbJBnt76fvamx1PD2obd5++DbWFosXB93PT8a9SP6efUzOzQhbNLZOjWz20QPkJxXyd1vbgfgrXsvYXSUX7e/p70prS/l9QOv8/6R99Fo5g2axw9H/ZAQjxCzQxPCpkiiP4vMklrufnMbpTUWXrlzHJcPltGtukN+TT6LDyxmZdpKHB0cuXnwzSyIX0Cwh/x7C9EVJNGfQ1F1A/e+afSL88xNo5g3LrLH3tve5FTn8Nq+1/g041MclSM3D7mZ+SPmE+oZanZoQvRpkujPQ3VDEw/+ZxffpZfyk+lx/GzGYJRSPRqDPcmpyuH1A6+z6ugqHJQDc+PmsiB+AZHecpEV4mJ0euARpdSbSqkipVRym20BSqm1Sqk069y/g9feYz0mTSl1z8V9hO7n7ebMkvlJ/CAxihfXpfPIe3tpaJKHf7pLlE8Uf5z0Rz694VPmxs1lZfpK5nw0h99s/A3p5elmhyeETTmvEr1S6jKgBnhHax1v3fYsUKa1fkYp9WvAX2v9qzNeFwDsBBIBDewCxmmtz9pRvBkl+hO01rz6bQZ/WZ3CmGg/XrtzHCE+bqbEYk8KawtZcnAJH6Z9SH1zPVOjpnJf/H0khCSYHZoQfUKXVN0opWKAT9sk+iPAVK11vlIqHPhGaz3kjNfcZj3mR9b116zHLTvbe5mZ6E9YnZzPz97fh7ebE6/dlUiCtMjpERUNFSxNWcrSlKVUNlaSEJzAvfH3MjVyKo4OjmaHJ0Sv1V1jxoZqrfOtywVAe3fTIoC23RvmWre1F+QDSqmdSqmdxcXFnQira8yKD+fD/5uIs6MDt7y2heW7cs0OyS74ufnx44Qfs+amNfw66dcU1xfz6PpHuf7j61mWskyGNhTiInTJk7Ha+FnQqbu6WuvFWutErXVicHDvaHI3LNyHTx6eTGJ/f37xwT5+89EBqbfvIR7OHtwx7A4+veFTnrv8Obydvfnztj8zY/kM/r7r7zLalRAXoDOJvtBaZYN1XtTOMXlAVJv1SOu2PsPf04V3FiTx4OUDWbrtGDe/uoWcMilV9hQnBydmxcxi6TVLeefqdxgfPp4lB5cw68NZ/Oybn7G7cLd0oCbEOXSmjv45oLTNzdgArfVjZ7wmAOMG7Fjrpt0YN2PLzvZevaGOvj1rDhbw8w/24aAUz80bxVUjwswOyS4drznOe0fe48PUD6myVDHEfwi3Dr2V2bGz8XD2MDs8IUzR6ZuxSqllwFQgCCgEngBWAu8D0UA2cIvWukwplQg8qLW+3/raBcBvrKd6Smv91rner7cmeoCskloeXrab5Lwq7pnQn0Wzh+HmLDcJzVDfXM+nGZ/yXsp7pJan4u3szfVx13PzkJsZ4DvA7PCE6FHywFQXa2xu4S9fHOHN7zIZFu7Di7clEBfibXZYdktrzZ6iPbyX8h5rj62lubWZxNBEbh58M1f2vxIXRxezQxSi20mi7ybrUgr5xQf7qW1sZtHVQ7l7QgwODvI0rZlK60tZmb6SD1I/IK8mDz9XP64deC3zBs1jgJ+U8oXtkkTfjYqqG/jV8v2sP1LMlEFBPDdvNGG+8oCV2Vp1K1uPb2V52nLWH1tPs24mITiBGwbdwMyYmXg6S7fUwrZIou9mWmv+u+0YT312GCdHxe/mDGfeuEjpK6eXKK0v5ZOjn7AifQWZlZm4O7kzo/8M5sbNZVzoOByU3Y2/I2yQJPoeklVSy2PL97M9q4xpQ4L5840jCfd1NzssYaW1Zl/xPlamr2R11mpqm2qJ8IpgzoA5XDvwWvr79Dc7RCEumiT6HtTaqlmyOYtnv0zB2cGBx64eyh1J0VJ338vUN9fz9bGv+Tj9Y7blb0OjGRU8imsHXMvMmJn4u7XbR58QvZYkehNkl9bym48O8F16KeP6+/P0jSMZHCotc3qjwtpCPs/8nFVHV5FekY6TcmJixERmx85mWtQ0aZsv+gRJ9CbRWrNidx5/+uwQ1Q3N3D9lAI9cEYeHi5PZoYkOHCk7wmeZn/F5xucU1hXi7uTO5ZGXc3Xs1UyOmCxNNUWvJYneZKU1jTzzRQof7Molws+dx+cMZ+aIULlZ24u16lZ2F+7mi8wvWJO9horGCrycvZgePZ2ZMTOZED4BZ0dns8MU4iRJ9L3EzqwyfrsymZSCaqYMCuJ3c4YzSKpzer2m1ia2529nddZqvj72NdWWarydvZkWPY0Z/Wcwsd9EKekL00mi70WaW1p5d2s2f1+bSq2lhbsu7c+jVw7Cz0MSRV/Q1NLElvwtrMlaw7qcdVRbqvF09uSyiMu4ov8VTImYInX6whSS6HuhsloLz685wrLtx/B2c+Yn0+O4a0J/XJ2k35y+oqmliW0F2/gq+yvW56ynrKEMFwcXJvSbwPTo6UyNmkqAW4DZYQo7IYm+F0spqOLpz1P4NrWY6AAPfjlzCNeMDJfmmH1MS2sLu4t2s+7YOtYdW8fx2uMoFAkhCUyNmsq0qGnE+saaHaawYZLo+4ANqcX8+fPDpBRUEx/hw2MzhzJlUJDcsO2DtNaklKWwPmc93+R8w+GywwD09+nPZZGXMTVyKmNCx+DsIDdzRdeRRN9HtLRqVu3L4/k1qeSW13PpgAB+ftUQLomRn/99WUFtAd/kfMO3ud+yPX87llYLXs5eTOg3gSkRU5gSOYUg9yCzwxR9nCT6PqaxuYVl247x0vqjlNQ0MmVQED+dMZix0fK0Zl9X11THlvwtbMzdyMbcjRTVGwOzDQsYxuSIyUyOmMyo4FE4OcizFuLCSKLvo+otLby7NYtXv82grNbClEFBLLxiEIlSwrcJWmuOlB9hU94mNuZuZF/xPlp0C17OXlwafikTIyYysd9EIrwizA5V9AHdkuiVUkOA/7XZNAD4ndb6H22OmQp8DGRaN63QWv/xXOeWRH+6msZm/rM1m9c3ZFBaa2HCgEAemhbHpLhAqcO3IVWWKrblb+O7vO/47vh3JwdAj/aOZkK/CUzoN4FLwi7Bx8XH5EhFb9TtJXqllCPGoN/jtdbZbbZPBX6htZ5zIeeTRN++OkszS7cdY/GGDIqqGxkV6cv/XT6Qq0aE4SitdGyK1prMygqaPJQAABtZSURBVEy25G9h8/HN7CjYQX1zPQ7KgfjAeMaHj2d8+HgSQhJwdXQ1O1zRC/REor8KeEJrPemM7VORRN/lGppaWLE7j9c2HCW7tI7YIE/umxzLvHGRMn6tjWpqaWJ/yX625m9ly/EtJJck06JbcHFwISEkgaSwJJLCk4gPjJeuGexUTyT6N4HdWuuXztg+FfgQyAWOYyT9gx2c4wHgAYDo6Ohx2dnZ7R0m2mhp1XyRnM/rGzLYl1tJgKcLd17anzsvjSbEW0a5smU1lhp2F+1ma/5Wtudv50j5EQDcndwZEzKGS8IuITE0kRGBIyTx24luTfRKKReMJD5Ca114xj4foFVrXaOUmg28oLUedK5zSon+wmit2Z5ZxusbM/k6pRAnB8W1o/px76QYRkX6mR2e6AHlDeXsLNzJjoId7CjYQXpFOgBujm6MDh7NuLBxjAsZx8jgkbg7yWA4tqi7E/31wENa66vO49gsIFFrXXK24yTRX7ysklqWbM7ig5051FpaSIjy4+4J/blmVLh0r2BHyhrK2FW4i92Fu9lZuJMjZUfQaJwcnBgeOJxxIeMYEzKGMSFj8HOTwoAt6O5E/x7wpdb6rXb2hQGFWmutlEoClgP99TneVBJ951U1NLFiVy7vbM0mo7iWAE8Xbh4XyW1J0cQEycDY9qbKUsXeor0nk39yaTLNrc0AxPrGMiZkDAnBCSSEJBDjEyOtufqgbkv0SilP4BgwQGtdad32IIDW+lWl1MPA/wHNQD3wM6315nOdVxJ919Fa8116Kf/Zms3aw4W0tGomxwVxW1I0Vw4PkVK+nWpobuBg6UH2FO1hT9Ee9hbtpcpSBYCfqx+jgkcxOng0o4NHEx8Uj6ezFA56O3lgSgBQWNXA/3bk8N72YxyvbCDA04UbxkRwS2IUQ8KkX3x71qpbyarMYm/xXvYV72Nv0V4yKjMAcFAODPQbyKigUYwKHsXIoJEM8B2Ao4MUEnoTSfTiNC2tmk3pJfxvxzHWHiqkqUUzKtKXeeMiuXZUP/w9pW98AZWNlSSXJLO/eD/7ivdxoOTAyVK/h5MHI4JGEB8UT3xgPPFB8YR7hkuVj4kk0YsOldY08vHe4yzflcuh/CqcHRXThoRw49gIpg2Vqh1xitaa7KpsDpQc4EDJAZJLkkkpS6GptQmAALcARgSOYETQCGMeOIJgj2CTo7YfkujFeTl4vJKPdufx8b7jFFc34uPmxNXx4Vyf0I/xAwLl6VvxPZYWC6nlqSSXJJNckszB0oNkVGbQqlsBCHYPZnjg8JPTsIBhhHiESMm/G9hPotca5A+o05pbWtmUXsKqvcf58mABtZYWQrxdmT0ynGtHhzMmyl8GRhEdqmuq40j5EZJLkjlUeohDpYfIrMxEY+SaALcAhgUMY2jAUIYGDmVYwDCivKNwUA4mR9632Ueib22FpbdA3JVwyf3gKN28doV6SwvrUopYtS+P9UeKsTS30s/XjatHhjN7ZJgkfXFe6prqSC1PPZn4U8pSOFpxlGZtNPH0cPJgSMAQBvsPZmjAUIb4DyHOP04e7roA9pHo6yvgg3shYz2EjYRr/gZRSd0Sn72qbmjiq8OFfLY/nw2pJVhaWgn1cWXmiDBmjQgjKTYAJ0cplYnzY2mxkF6RzpGyI6SUpZBSlsKR8iPUNtUCRmufaO9oBvsPZkjAEAb5DWJwwGD6efaTqp922EeiB6Pq5tDHsHoRVB+HhDvgit+Bd1jXB2nnqhuaWJdSxOcH8vk2tZiGplb8PJyZPjSEq4aHMmVQMJ6u8qtKXJhW3UpeTR6pZamklKeQWpZKankquTW5J4/xcvYizi+OOP84BvkNYpD/IOL84vB3s++Beewn0Z/QWAPf/gW2vgJOrjDlZ3DpQ+AsHX11h3pLC9+mFrPmYAFfpxRRWd+Ei5MDEwcGcsWwUKYPDSHCT36Ci4tX21RLWnkaaRVppJalklaRRlp52snmngBB7kEM9BtInF/caXN76b/f/hL9CaVHYe3vIOVT8I2C6b+FkTeDPOjRbZpbWtmRVc7aQ4V8dbiQY2V1AAwN82b60BCmDQ1hTJSfVPGITtNaU1xfTHp5+snEf7TiKEcrj1LfXH/yuGD3YAb6DWSg30AG+A4wJr8BBLjZ1kht9pvoT8j41kj4+XshZARc+XsYNENa6HQzrTVHi2tZl1LIV4eL2JVdTkurxsfNiSmDgrl8cDCXDQ4mzFd+aYmu06pbya/NJ708naOVR43kX3GUjMqM0y4Afq5+DPAdQKxv7Ml5rG8s4Z7hffKpX0n0YLTKOfQRfP0klGdCZBJMWwQDpknC7yFVDU1sSithfUoRG9KKKaxqBGBQiBdTBgUzZVAQ4wcE4OEidfui67XqVgprCzlaeZSMigwyKjPIrMwkszKT8sbyk8e5OroS7RNNjE8MMT4xxPrGEuMTQ3/f/r26GkgSfVstTbDnXdjwPFTlQvQEuPwxSfg9TGtNamEN36YWsTGthO2ZZTQ2t+LsqBgT5c+kuCAmxQUyKtIPFyep5hHdq7yhnKyqLDIqMsiqyiKzMpOsqixyq3Np0S0njwtwCzCSvk//kxeDaJ9oor2jcXMy95epJPr2NDfC7ndg49+MFjr9xsJlv4DBV4ODJJae1tDUwo6sMjall7A5vZTk45VoDR4ujiTGBDBxYCDjYwMYGeEr9fuixzS1NJFTk0NWZRbZVdlkVWWdXC5tKD3t2FCP0JMXgP7e/YnyiSLaO5pI78geeR5AEv3ZNDfC3qXw3T+gPAuCh8KEh2HULUaLHWGK8loL2zJL2Xy0lC1HS0krqgHA08WRcTEBjI81ppGRvtIfjzBFjaWG7OpsjlUdI7vKOq/OJqcq57SqIIAQ9xAivSOJ8o4i2ieaKO8oIr2MdV9X3y55LkAS/floaYaDK+C7f0LhAfAKhaQfwrj54BnUs7GI7ymubmR7ZhlbM0rZmnEq8bs6OTA6yo+kmAASY/wZ298fHzcZI1WYq7KxkpzqHHKqczhWdYxj1cfIrc4lpzqH4vri0471dvYm0juSSO9I+vv0Z+HYhRf1npLoL4TWxtO13/3TmDu6wqibIelHED7KnJjE95TVWtiRVca2jDJ2Zpdx8HgVLa0apWBIqDfj+vszNtpI/DGBHvIkpeg16prqyKvJO3khyK3OJbcml9zqXJRSrJq76qLO291DCWYB1UAL0HzmGynjf9gLwGygDrhXa737bOfsNb1XFqXA9tdg33vQVGe01LnkPhg+Vx6+6mVqG5vZc6yCXdnl7MwuY8+xCmoajX5UAjxdGBPlx5hoP8ZE+zMq0hdvKfWLXkhrfdGFkp5I9B0O+K2Umg38BCPRjwde0FqPP9s5e02iP6G+3KjH3/kmlKaDuz+Mvg3G3g0hw8yOTrSjpVWTVlTN7uwKdh8rZ8+xco4WG32oKAVxwV6MivQjIcqXUZF+DA33lrp+0aeZnehfA77RWi+zrh8Bpmqt8zs6Z69L9CdoDZkbjISf8hm0NkHkJUafOiNuAHc/syMUZ1FZ18S+3Ar25lSwL6eCfbkVlNRYAHB2VAwN82FkpC8jI4xpcKi3NO0UfUZ3J/pMoBzQwGta68Vn7P8UeEZrvcm6/jXwK631zjOOewB4ACA6OnpcdnZ2p+LqdrUlsP9/RhPN4hSjLn/oNTD6Vhg4HRylaqC301qTV1HP/txK61TBgbxKqhuMKh8XRweGhHkTH+HD8H6+jOjnw7AwH9xdpOQvep/uTvQRWus8pVQIsBb4idZ6Q5v955Xo2+q1Jfr2aA3H98C+ZXBgOdSXgXuAUcIfeTNEjZd2+X1Ia6vmWFkdB/IqSc6r5ODxKpKPV1JRZwyX56BgQLAXw8J9GBbuzfBwH4aH+xDs7So3fIWpeqzVjVLq90CN1vqvbbbZTtXNuTRb4OjXcOADSPkcmuvBux+MmGsk/ohESfp90ImS/8HjVRw6XsXB41Uczq8ir+JUvykBni4MC/dmSKgPQ8O8GRLmzeBQbyn9ix7TbYleKeUJOGitq63La4E/aq1XtznmGuBhTt2M/afW+qwjgvTZRN9WYzWkfgnJKyB9LbRYjKQ/bA4MuxaiJ8ooWH1cZV0ThwuqSMmv4nB+NYcLqkgtrKahyRgvVSnoH+DB4FAj6Q8K9WJImDexQZ5y41d0ue5M9AOAj6yrTsBSrfVTSqkHAbTWr1qbV74EzMJoXjn/bNU2YCOJvq2GSiPpH/oY0r82Svru/jBoJgydbdTpu3qbHaXoAi2tmuzSWo4UVJNSUE1aUTVHCqrJKq2jpdX4v+booOgf6MHgECP5x4V4MTDYmOQXgLhY8sBUb2KpNZL9kc8hdbXRdNPRBfpPgsGzYPBVEDDA7ChFF2tsbiGjuJbUwmrSCmtIK6omraiG7DYXAKUg0t/9ZNIfEOx5ch7sJfcAxNlJou+tWpohZ6uR8FPXQMkRY3vAAGOQ87grjQuAq5e5cYpu09jcQlZJHelFNaQX1XC02JhnlNScrAIC8HZzYkCQJ7FBngwI9iImyJMBQZ7EBHniJUM2CiTR9x1lGUZpP22t0V6/uR4cnI2WOwOnQuxU6DdG6vbtQGur5nhlPRnFtWQU15BRUktmSS0ZxbWn3QQGCPJyJTbIg5hAI/HHBHrSP9CD/oEe8gSwHZFE3xc1NcCxLUZ/O0fXQ8F+Y7urj1HKj50CMZMhNF6GRrQz9ZYWsstqySqpJaPEmGeV1JFZWktxdeNpxwZ4uhAdYCT9/gEeRAV4EB3gQXSgB6Hebjg4SHWQrZBEbwtqS4xSfua3xrwsw9ju5mu04Ok/wbgAhI+Wh7XsWG1jM8fK6sgurSWrtI7sUmM5u7SO/Mp6Wtv8d3dxciDS350ofw+iAk7MPYjy9yDS3x0/D2e5L9CHSKK3RZV5kP2dkfSPbTH64AFwcofIRKO6J/pSY9nd39xYRa9gaW7leEU92WV15FinY2V15JTXkVNWT2V902nHe7o4EunvQYS/O5H+7kT4uRPRZh7k6Sq/CHoRSfT2oLoQjm2GY1uNqeAAnBgCLWiw0fNmZKIxBQ+Ten7xPZX1TeSW15FbXk9OmTHPq6gnt7ye3PK6k11DnODi5EA/Xzf6+bmfmnzdCLfO+/m54yk3inuMJHp71FgDebsgdzvk7IDcHUb3DADOHkYVT7+xEDHWuMHrHytP7YqzqmpoIq+8nrzyeo5XGvPcinryK+o5XtFAYXUDZ6YTbzcn+vm6E+7nRrivG+G+7oT5GsthPm6E+brJDeMuIoleGH3ylGdC7i4j6R/fDfn7ocV6887V1xhYJXw0hCcY88CBcqNXnLemllYKqxo4XtFAfmX9yXl+pTEvqGw42VtoW54ujoSeSPw+boT6uhHq7UqYrxsh1m3B3q44y1jBZyWJXrSvpQmKDsHxvZC/1+icrfDQqeTv7AGhIyBspNG6J2yk0f++PMUrLlJjcwuFlY0UVJ1K/gVVDRRWNVBQ2UBhVSOFVQ00t34/LwV6uhDi40aItyuhPq6EeLsR4uNKiLcrwd5u1rkrbs72WTiRRC/OX0sTFB+B/H1GPf+JqbHy1DH+MRAyAkKHG4k/ZDgExklrH9ElWls1ZXUWCiobKK42LgoFlQ0UVTdSXG1cDIqqjV8HLe1cEHzcnAi2Jv1gbzeCvVwJ8naxzl0J9jL2BXi62NSvhLMlerlTIk7n6Axh8cZ0gtZQccwo/RcmQ0EyFB02nug9ccPXwdlI9iFDIXgoBA+BoCFG9Y+TqzmfRfRJDg6KIC9XgrzO/nfT0qoprW2kuLrRuAi0uQAUVTdQVNXIAevgMieGlTyTv4fzyfcK8nYl0NOFYOs80MuVQC8XgjxdCfBywdPFsc82N5USvbh4TQ1QkmoMvFJ02JiKU6A8C2McGkA5GL8AgoZAUJzRAihwEAQNAo9Ao4MXIbpZnaWZkmoLxTXGr4SSGgslNY3GVG2htNa6rbqR6g4uCq5ODicvAAGeLtZlFwI8XQnwdLbOje3+ni74uDn16IVBSvSiezi7WW/gjjp9e1O9cQEoSTOqgUpSjenoulP1/2A87BUYBwEDjZJ/wABjOSAWPAJ69rMIm+bh4kR0oBPRgR7nPLahqYWyWuNCUFprodR6UTi5rcZCWa2F9KIaSmsbT+uTqC0nB4W/pwsBHi74ezoT4OmCv4d18nQhwNMZPw/rfg8X/Dyd8XbtnouDlOhFz2ltgcoc4wJQmm5MJWnGU76VuZz8FQDg5mckfP/YU3P/GGPy6SetgUSvUWdppqzWSP6ltRbK2yxX1FlOXhjK6yxU1DVRXmehnVsLgNFv0c7fXnlRcUiJXvQODo6nkvWgGafva2owqnzKMqzTUWN+fLfRj/+JewFg3A/wiwK//uDf35j7RZ+aPEPkmQDRYzxcnPBwcSLS/9y/FsC42VzV0ER5XRNltY2U1zadvAhouqfgLYle9A7ObsaN3JCh39/X0mz8EijPOjVVZEN5Nhz+BOpKTz/e0QV8I8E3yrgg+EYZ6z4R1uUIcHbvgQ8lxPc5OCj8PFzw83AhNsizR97zohO9UioKeAcIxfjNvVhr/cIZx0wFPgYyrZtWaK3/eLHvKeyUo5NRfRMQ2/7+xhrjQlBxzJgqc6DCup7+NVQXwJklJfcAI+H7nJj6Wefhxtw7XMYBEDajMyX6ZuDnWuvdSilvYJdSaq3W+tAZx23UWs/pxPsIcXauXtb2/MPa399sgao8Y6rMNS4EVceNjuEqcyFnmzHS1/fO62MkfO8w40LgHWase4Vat4eCV5jxa0SIXuyiE73WOh/Ity5XK6UOAxHAmYleCHM5uZz9FwGApc5I/tXHoSq/zdw6ZW6EmkJobfr+a918jYTvFWJcDLxCjWWvUPAMPrXuESg3kYUpuqSOXikVA4wBtrWze4JSah9wHPiF1vpgB+d4AHgAIDo6uivCEuL8uXhY2/nHdXxMa6vRMVx1vtFbaE2BsVxTZFQP1RQa/QhVFxqjg51JORjJ3jMEvIKNi4BnMHgGtVkOth4TDC6e8pyB6BKdbl6plPICvgWe0lqvOGOfD9Cqta5RSs0GXtBaDzrXOaV5pejTtAZLjXEBqCkyLgA1RVBbBLXFUFN8+nJTbfvncXIDjyDwDDTmHoHWi0DgqeUTk3uA8eyBdENht7qteaVSyhn4EPjvmUkeQGtd1Wb5c6XUv5RSQVrrks68rxC9mlJGx2+u3saDYOdiqYO6EiPp15UYo4nVnlguPbWtNB3qysBS3fG5XH2MgWY8Ak4lf/eANtv8T627+xlzN1+pUrJxnWl1o4A3gMNa6791cEwYUKi11kqpJMABKG3vWCHslosHuFifATgfzY1Gwq+zXgROLpcZVUtt52VHjRvNDZVnP6ebrzXp+xkXgA7nvsbyieNdfWQQmz6gM9/QJOAu4IBSaq9122+AaACt9avAPOD/lFLNQD1wq+6Nj+IK0Zc4uVqbgYaf/2tamo1kf+IC0FBhXADaLteXQ32FsV6ZZ91e0f4N6LZcvKwXAF8j8Z9YdvNps95m2dXHuu5tLLt4yQNu3awzrW42AWe9U6S1fgl46WLfQwjRRRydjLp9z8ALe53W0FRnvQBUWqc2yye2N7ZZrykw+jZqqITGKmhtv5OwU9pUdbn6tFlub5vX6ReIE+su1n3SU2q75DeXEKJjShmtf1w8jQfMLtSJC0VDlZH0G6qsF4UqaKw2tjVWt1mvtK5XGA+8WWqMdUvN+b2fg7OR8F28Tl0I2s5dvIzP4tpm2cWzzXLbbZ7g7GkTVVN9/xMIIXqvthcKLqCq6UytraeS/onE31hlPBVtqTHmjVWnlk9eIGqN5eqCU8uWGmj5/pCGHXJ0PSP5e5yx7GFcEFw8rcsep45xdm+zfMY2Zw+jlVQPNKGVRC+E6P0cHIx6fTefrjlfs+VU0rfUtbkI1Bq/QE5cME4sW2qN45pqTy1XHT91fFOdse1c9zO+97mcrEnf3Zi8w2HB6q75jG1IohdC2B8nF3AK6PpxD1qa2lwsTlwYrPOmemOynFi27mtuOLWtm7rTkEQvhBBdxdHZ+nyCn9mRnEbaNAkhhI2TRC+EEDZOEr0QQtg4SfRCCGHjJNELIYSNk0QvhBA2ThK9EELYOEn0Qghh4zo9wlR3UEoVA9kX+fIgwJ4HNpHPL59fPr996q+1Dm5vR69M9J2hlNrZ0XBa9kA+v3x++fz2+/k7IlU3Qghh4yTRCyGEjbPFRL/Y7ABMJp/fvsnnF99jc3X0QgghTmeLJXohhBBtSKIXQggbZzOJXik1Syl1RCmVrpT6tdnxdDelVJRSar1S6pBS6qBSaqF1e4BSaq1SKs069zc71u6klHJUSu1RSn1qXY9VSm2z/h38TynlYnaM3UUp5aeUWq6USlFKHVZKTbCn718p9VPr336yUmqZUsrNnr7/C2ETiV4p5Qi8DFwNDAduU0oNNzeqbtcM/FxrPRy4FHjI+pl/DXyttR4EfG1dt2ULgcNt1v8C/F1rHQeUA/eZElXPeAFYrbUeCozG+Hewi+9fKRUBPAIkaq3jAUfgVuzr+z9vNpHogSQgXWudobW2AO8B15scU7fSWudrrXdbl6sx/pNHYHzut62HvQ3MNSfC7qeUigSuAf5tXVfAdGC59RCb/fxKKV/gMuANAK21RWtdgR19/xhDoborpZwADyAfO/n+L5StJPoIIKfNeq51m11QSsUAY4BtQKjWOt+6qwAINSmsnvAP4DGg1boeCFRorZut67b8dxALFANvWauu/q2U8sROvn+tdR7wV+AYRoKvBHZhP9//BbGVRG+3lFJewIfAo1rrqrb7tNF21ibbzyql5gBFWutdZsdiEidgLPCK1noMUMsZ1TQ2/v37Y/x6iQX6AZ7ALFOD6sVsJdHnAVFt1iOt22yaUsoZI8n/V2u9wrq5UCkVbt0fDhSZFV83mwRcp5TKwqiqm45RZ+1n/SkPtv13kAvkaq23WdeXYyR+e/n+rwQytdbFWusmYAXG34S9fP8XxFYS/Q5gkPWOuwvGTZlVJsfUraz10W8Ah7XWf2uzaxVwj3X5HuDjno6tJ2itF2mtI7XWMRjf9zqt9R3AemCe9TBb/vwFQI5Saoh10xXAIezk+8eosrlUKeVh/b9w4vPbxfd/oWzmyVil1GyMOltH4E2t9VMmh9StlFKTgY3AAU7VUf8Go57+fSAao6vnW7TWZaYE2UOUUlOBX2it5yilBmCU8AOAPcCdWutGM+PrLkqpBIwb0S5ABjAfo/BmF9+/UuoPwA8wWqDtAe7HqJO3i+//QthMohdCCNE+W6m6EUII0QFJ9EIIYeMk0QshhI2TRC+EEDZOEr0QQtg4SfRCCGHjJNELIYSN+/94YKjqjof5egAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "euTcBc3P_aoN" + }, + "source": [ + "## CNN-LSTM Model" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QzUlHRh0v5Lm", + "outputId": "f0c8f966-4aa7-43c2-8806-f08c6f5ce9e0" + }, + "source": [ + "print(models['cnn-lstm'].get_layer('LSTM_1').get_weights()[0].shape)\n", + "print(models['cnn-lstm'].get_layer('LSTM_2').get_weights()[0].shape)\n", + "print(len(models['cnn-lstm'].get_layer('LSTM_1').get_weights()))\n", + "print(len(models['cnn-lstm'].get_layer('LSTM_2').get_weights()))\n", + "\n", + "print(models['cnn-lstm'].get_layer('LSTM_2').get_weights()[1].shape)\n", + "\n", + "[w1, r1, b1] = models['cnn-lstm'].get_layer('LSTM_1').get_weights()\n", + "[w2, r2, b2] = models['cnn-lstm'].get_layer('LSTM_2').get_weights()\n", + "\n", + "[w1_pad, w2_pad] = pad_matrices([w1, w2])\n", + "print(w1_pad, w2_pad)\n", + "print(w1_pad.shape)\n", + "print(w2_pad.shape)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(2048, 256)\n", + "(64, 128)\n", + "3\n", + "3\n", + "(32, 128)\n", + "[[ 0.03283332 0.03514677 -0.00596567 ... 0.03670166 -0.00393717\n", + " -0.0284326 ]\n", + " [-0.01096857 -0.0448408 -0.02966657 ... 0.01016361 -0.05727449\n", + " 0.02986955]\n", + " [-0.00811409 -0.0135733 -0.04766004 ... 0.04387834 -0.00304517\n", + " 0.02890401]\n", + " ...\n", + " [-0.00517103 0.01756404 0.01726159 ... -0.04113563 -0.02533144\n", + " 0.04300131]\n", + " [-0.01353051 0.02406969 0.00367547 ... 0.01924247 -0.04716729\n", + " 0.02338111]\n", + " [-0.03106386 -0.03033421 0.03282609 ... 0.0066899 0.00302576\n", + " -0.03358177]] [[-0.05971472 -0.14265421 0.13733262 ... 0. 0.\n", + " 0. ]\n", + " [ 0.02199466 0.14343984 0.07385601 ... 0. 0.\n", + " 0. ]\n", + " [ 0.06103988 0.01135218 0.11987313 ... 0. 0.\n", + " 0. ]\n", + " ...\n", + " [ 0. 0. 0. ... 0. 0.\n", + " 0. ]\n", + " [ 0. 0. 0. ... 0. 0.\n", + " 0. ]\n", + " [ 0. 0. 0. ... 0. 0.\n", + " 0. ]]\n", + "(2048, 256)\n", + "(2048, 256)\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yG6MC6NUwvQm", + "outputId": "be2a4d36-d03e-414f-a3e0-cc98fea441a8" + }, + "source": [ + "R = 25\n", + "metric = 'mse'\n", + "threshold = 1e-5\n", + "scaler = [1., 1.]\n", + "\n", + "logname = 'cnn-lstm'\n", + "logname += f'_R{R}_scaler0_{scaler[0]}_scaler1_{scaler[1]}_{metric}_Th{threshold}'\n", + "logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S')\n", + "writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname))\n", + "%reload_ext tensorboard\n", + "\n", + "# %time u, s, v = algorithm3([w1_pad, w2_pad], \\\n", + "# num_refinements=R, \\\n", + "# num_sub_matrix=1, \\\n", + "# decomposition='eigen', \\\n", + "# stop_threshold=threshold, \\\n", + "# metric=metric, \\\n", + "# scaler=scaler, \\\n", + "# report_writer=writer)\n", + "\n", + "%time u, s, v = algorithm3_extra_refinements([w1_pad, w2_pad], u, s, v, \\\n", + " num_refinements=R, \\\n", + " num_sub_matrix=1, \\\n", + " decomposition='eigen', \\\n", + " stop_threshold=threshold, \\\n", + " metric=metric, \\\n", + " scaler=scaler, \\\n", + " report_writer=writer)\n", + "w_approx = algorithm3_inverse(u, s, v)\n", + "\n", + "\n", + "w1_approx, w2_approx = w_approx[0], w_approx[1]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 3min 4s, sys: 26.1 s, total: 3min 30s\n", + "Wall time: 1min 49s\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 389 + }, + "id": "Ly_vcfqcw7Be", + "outputId": "bdb3aadf-c61f-42ed-822c-8bca62d0b48a" + }, + "source": [ + "models['cnn-lstm'].get_layer('LSTM_1').set_weights([w1, r1, b1])\n", + "models['cnn-lstm'].get_layer('LSTM_2').set_weights([w2, r2, b2])\n", + "\n", + "_, cnn_lstm_acc = models['cnn-lstm'].evaluate([test_data[0], test_data[1]], test_labels, verbose=0)\n", + "print(f'CNN-LSTM - Test accuracy: {cnn_lstm_acc}')\n", + "\n", + "models['cnn-lstm'].get_layer('LSTM_1').set_weights([w1_approx[:w1.shape[0], :w1.shape[1]], r1, b1])\n", + "models['cnn-lstm'].get_layer('LSTM_2').set_weights([w2_approx[:w2.shape[0], :w2.shape[1]], r2, b2])\n", + "\n", + "_, cnn_lstm_approx_acc = models['cnn-lstm'].evaluate([test_data[0], test_data[1]], test_labels, verbose=0)\n", + "print(f'\\nCNN-LSTM - Test accuracy: {cnn_lstm_approx_acc} (approx)')\n", + "print(f'\\nAccuracy drop: {cnn_lstm_acc - cnn_lstm_approx_acc}')\n", + "\n", + "plot_accuracies(['cnn_lstm'], [cnn_lstm_acc], [cnn_lstm_approx_acc])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CNN-LSTM - Test accuracy: 0.65625\n", + "\n", + "CNN-LSTM - Test accuracy: 0.34375 (approx)\n", + "\n", + "Accuracy drop: 0.3125\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ykQbfnSia2rf" + }, + "source": [ + "# Minimization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IvprTTb8vXiM" + }, + "source": [ + "# Playing with Images" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 335 + }, + "id": "gTMiHnm-wHfV", + "outputId": "329ed68a-6e85-4956-d3c6-3687d63c0265" + }, + "source": [ + "img = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/mugs.jpg') #.convert('L')\n", + "img = img.resize([int(0.8 * s) for s in img.size])\n", + "img_array = np.array(img).transpose([2, 0, 1]).astype('float32')\n", + "num_inputs, input_size, output_size = img_array.shape\n", + "print(img_array.shape)\n", + "img" + ], + "execution_count": 110, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(3, 300, 364)\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 110 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 353 + }, + "id": "oZpmPT3Xvzjb", + "outputId": "86a8e721-10e3-4652-fe5c-906f249ca1aa" + }, + "source": [ + "%time img_approx = algorithm3_inverse(*algorithm3(img_array, num_refinements=64, num_sub_matrix=1, decomposition='eigen')).transpose([1, 2, 0])\n", + "image = img_approx.copy()\n", + "image *= (255.0 / image.max()) # .clip(min=0, max=255.0)\n", + "Image.fromarray(image.astype('uint8'))" + ], + "execution_count": 111, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 12.3 s, sys: 6.01 s, total: 18.3 s\n", + "Wall time: 9.42 s\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 111 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 354 + }, + "id": "2UqfaUomwfNm", + "outputId": "819c8b75-3c03-4882-fd1b-ac91f0d93b61" + }, + "source": [ + "%time img_approx = algorithm3_inverse(*algorithm3(img_array, num_refinements=64, num_sub_matrix=1, decomposition='svd')).transpose([1, 2, 0])\n", + "image = img_approx.copy()\n", + "image *= (255.0 / image.max()) # .clip(min=0, max=255.0)\n", + "Image.fromarray(image.astype('uint8'))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 20.2 s, sys: 12.9 s, total: 33.1 s\n", + "Wall time: 17.7 s\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 40 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "95vRr_e-z4k3" + }, + "source": [ + "### Combine three different images" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ca4XKLO6xNTc", + "outputId": "70d1cedc-7bbf-4cbd-c594-050d0bfe045c" + }, + "source": [ + "cat = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/cat.jpg').convert('LA').resize([512, 256])\n", + "mugs = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/mugs.jpg').convert('LA').resize([512, 256])\n", + "flower = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/sun_flower.png').convert('LA').resize([512, 256])\n", + "\n", + "cat_img = cat = ImageOps.grayscale(cat)\n", + "mugs_img = mugs = ImageOps.grayscale(mugs)\n", + "flower_img = flower = ImageOps.grayscale(flower)\n", + "\n", + "cat = np.array(cat).astype('float32')\n", + "mugs = np.array(mugs).astype('float32')\n", + "flower = np.array(flower).astype('float32')\n", + "\n", + "img_array = np.array([cat, mugs, flower])\n", + "print(img_array.shape)\n", + "\n", + "cat_img, mugs_img, flower_img" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "(3, 256, 512)\n" + ], + "name": "stdout" + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(,\n", + " ,\n", + " )" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "DtsG0HAx0ZF2", + "outputId": "baa91174-1101-4da8-9966-5a8fcbbb80a7" + }, + "source": [ + "R = 8\n", + "metric = 'frobenius-norm'\n", + "threshold = 1e-5\n", + "logname = 'base'\n", + "logname += '_R{}_{}_Th{}'.format(R, metric, threshold)\n", + "logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S')\n", + "writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname))\n", + "%reload_ext tensorboard\n", + "\n", + "%time img_approx = algorithm3_inverse(*algorithm3(img_array, \\\n", + " num_refinements=R, \\\n", + " num_sub_matrix=1, \\\n", + " decomposition='eigen', \\\n", + " stop_threshold=threshold, \\\n", + " metric=metric, \\\n", + " report_writer=writer))\n", + "img_approx *= (255.0 / img_approx.max()) # .clip(min=0, max=255.0)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "CPU times: user 37.2 s, sys: 19.3 s, total: 56.5 s\n", + "Wall time: 29.1 s\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 273 + }, + "id": "bAq7iFZr1VNz", + "outputId": "b709c064-c873-4de9-afcb-8a83408f0ed2" + }, + "source": [ + "cat_approx = Image.fromarray(img_approx[0].astype('uint8'))\n", + "cat_approx" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgAAAAEACAAAAADg+5G7AACkgUlEQVR4nKS965okO44kZgZ41OmRVnpsva92d2a6Mpww/TCA9Mis07PfKrpPVl4i3OkkCBgMF/L/kVTS/f56r5JEgmDV/fX7P/793//9TfhFEhRIQgBBEP3/zHz9+pUAIq9/+7d//Nuv16/MhACJhSBIotb7n//xP//7//s//uc/lwBI9IVBAagSAIIMxhVASVVE/vX69Y+/rvzr9Xpd13UlCalW1aqvf//Pr1W1MqoWg7xeGYwL/IvX//GLpbgyIsmkdGnVvfT+vaqwbkX+g6W6f1f89dcv8h8BKJJVS0FUIVhVxSvyun6xBEAokAQEAQJJCQVKAqSSn4v0bxCABIiUAECKAECUqkgVJEH+qwj5VVVVgjzh6p8QgBgAtGoVJAgEJAFemaDkuZUU0Koig1CpqqQSCIERkZlxAV7HWWgR/WEPAj8EgN8EgMUQvHr9TpJzQdHX7GFWrbVuC8Bc2SNX1REAEVCpBBYR76grtuhtAVj3qqqq8rSBoX33CAYCGRmZZALKEoWKAABlZUQQhVr5ui7yCqCYJKEgSCUIiCQhqQR9FwAFKa90YRaPlN8ieU7k/wsUpOqp69/5DYK8ER4T7qUAKQTAgkjKayWNCFrwtgDgIQAoiyRBeDmlBYACAUqqS99e8OY8P3+Mqgfbv+P5q0a85f/2wHrp2c/4pxf32LG/sOeDz1//6ZMg0ULJP7zJT4O9wx7zhhmx/GVU0sxC78t++//PV4vCuU7LBL/9YouE1+DxDDPyvsYsDyT+aXhif2mNdG6zfxQEXAWpUKMggGJfvizN2lc/oiAAtBhQPdSP23xbbI/mf2UaCbKXvEWCe3p6fj4lkj++wZnXb/NrGbBsav/6LIvOWLlFdqZgPqWtz4DHgnyIiVqZ+3uezUF9vOnbGB9DOU8ibsmQZ2VPO1q49/6DwN6ExENk2L8/Dzq7gFevX+0dHwKhqtoS8H1av4/5IZ3n4v/i9fMhAbDnlWfXfyof4fEI0rfr7BGpbRha7czu5mz1PTyNvtoagNsIf+z9mejn4P/0hJrH30+lGUQPmD2UHu/n3nxc6CkmFGeUz5k9Gvu7lHN//5zhkcbvr2MCShoNIJ71fw4FQojfbQJbRRHi36z+1uv/4kX9zW6efTaPMCrvbzWzZhk+9udYpDOR+8/PxW3haP2G3gL7kn7bGPCZlYdengHOINte9xjmAR4j/8Ok8GNEz53Vm3l+2AL6QwO0BIOS9ng+h+zXdda/amuAEYg6+n+PwdPh6Zmn9Ab6FwvMbw9rBfu37z+PzL31dcwezqT+4ZPb+vUlPj6ih/7DHnvv+e/K+HmPXuWjKI6snJX9eC+f8EFn5v/42M9ffu67+Y0esz9vwt9qgFHcY4U2aDwj8Bv+DAKxp3qklS3TvrAMAdjY3p/gYw7mTvtHnj384/VzQhoEfC6J/uZ7oz9+Xoff3rO3zOfcnm9nx/NPf/wvhrv/8v0T+vjmLADOvGGstQQ+b//Uv+fjo204dsVr0viiUd9+kNY/fwMCAQDR+vshvnujaY/4qR2/Pdt2QrYa2jpJj39/zsmfLQJ//uFzTr5ttnZg+eePfjz3z2f48aaHMv/D5+fJvsnykfnvIrcv+VAfs4jaiu3b/b9ftP/iHbxnU3gs0R7y2b7nkfcw9POyAOLnNDwtMfF3Ej+/JT++/C++eP7lH1eN/P7W/3+vnw/Cn98/HM7vd+X+I/m/PaaPQfzvXebvd8fP1965/+oVLUk4JvYYxx9g+w/fHqn9r2715xF/evnHC3xM+b+8HB9Lxj+t6r8cBrcB+cPN2BzDc+H+98Xxm2CxR/+vL/hf3u7HpD+4g/3vv1qZa3nZVy0ThfZRtJZ5o8f6khpW7DGuNu7SuM4NIA+ZouhPzy/JbdxbA4hoawQ2lwdRrd3bqWuwGtrfj4Ijf+zLceP8GZKU+oFqABWOzHuwtc2xBKhQpoKLKhbKG6W8bq3bA0eff8NOG3gZqhF1/BfUNtzbWDa6fyLeB+LzT0UbcdYZO9skiXb/of2ljUcF9oM3lJxrDhHkuVFpz16PYrsMx71G8yQb83zY5j38efQHX4RzsY8X1bCGw+wNypzd+RDEh+6Rd2lv1pYCf9eX+LbFvhldnNvOog3M/bH3xsSOv/sAC+eHY9U/vID5lb+fadSs+8My89zt+3d7rUdQtmfb0sft/0Hq4IT9WLQb2J7kQz/oelxpr9Be/7m/Ef4QwP7Fd7fpc573wNA355bJfZHHv+NZbUXs63+7w7mANqzp1dZWHRag1t3zZSAFnybD4Y3nh1qWmm7voAdHyjYMPBz4v3J9H6+ne/DDVfib952Z4o9Zm0kYiXqK3UaJzzfysSTjjXp3XtVacqsUf2S/njwHtntFtZf2bQa+yTz28h990dO9n/gDO3qyIx7KfxZGjxn45ibjWAGCz1XGyMF3nHGkQJwPbV1iE0WC1HNNepJ5fP/HQjyN5dxJM2PEQ6a22zoj4f6MJ0MzjD3G/pMn8jH0fe+96mfjPUzAKInjBY+NqQt/eI1PfJ7ON+PMwWNeKFRIKhvDUpWqqsPGEjS7sZ5Cpr6NN9Rzq2tzDh/r21LTc6YGaCDDuISxNcBj13PP7Xelfb7fTylL80CfbctmUQ4nyO87/zFkYsS99cTMf8wOjNbDjTSo2qBkL8wAlp702aIsP4+ju/q0q36Sv9fLP3YNAFx/SSoVVOKS54CIBa3Xdb36cw7/TlSjba8IYAmx1lLdAuN1RYC1KuJVoIosJYtB1v31+/fX1+/f//ynJEoFSCARYISqHOllxPUKQYtaoiKW4pXXdUUEHdtmGahcFbVWJVU3g6QKGz92EFQh4EqtXHet0rq1iioB7yhpvStQK6IYjKveN5YCqEKmB0VVKKJAkWIEFARYixkL4UhxASoBCuM77/xaVUsqUgsQFRljnwsBf1RUGV6XnAnhDABMKHqt/lGIIKiqVQIRr844sJ5nGOE5HYFQOUYOlKoEVW+IiMzIuF5ONoCqwBoBEFCZmfnQ/QSdlADr/xb7UoWULBUv6L7jIrMiFqjidYtYjOC673ut9/t+f0ECahnXBAlegVViSEKkmIKKWhWoKsSVVybDL0GkBUBVZAUL0WH6IiuGmWjJlphV1FrluKe3Ti0soeoOAhkVwStwv7FACEVQIiOCQWYsUGAgA0oCXMGLodSCxBJUABSr2mCRzoBYWIhaQpG8EiWJUQvJWwBKGAFQoaRatVQCIqiC1t0CACCvAFW6F8B4ZQKtvQQweoXQEL54BKCnwwkhGVcmr0AFgCDDWooAWKGIscV7/cGnLX+4sU+F2epsIO7BHtsK2Xdpt2YsjSdvroKn4m0L4GUggwAYRSIiQAmBQIjxAftsBcLaIBi1/xoFsjZaeJqXMfwEEIjwTAWDEfbaEIiAwjETBFu5ixCL1gBjTsb2jwrewdU9Tdtcj2W0GNmYbkg/JgBeyQOvpc6G8L0OpOJjkQ7XsY3onqMrxQIRmUsoae+dsOCPAPDjYTjw+Injzz3HQD9/fEzCPNPDgkmoCWKPk1vjjUB4jNqX1WCkI07t43zIa3+mGIpBC2wAOzPyARv4HO4RpiGEBh7uycRDYtjg2OaRba19Q4zL0Z7OvzDVP7wBu7Pt6UBkqO/7dHG4seLfCoCB3MeSXGF6oiLMsPgDhbDB/aEBzs89fw20/bwPH/yB1PYVLAH4JgCKcTu2BvBOGpzU6OoB7v7O/Zol37CeQQYRZBRJhkFpDAjfAmB0YSXy8WBPIYBtz4jgc1dpxGr+MG4S96228zES3EK4UeUzBWpLCRvTtJPiR9pyerbEmfGHBBBwftqszmZlRuFdAQiFXJECa6OnXBaBfykAW7NsoX9snTOEAySe3sXmjthJM21CBk7vt211ZasUBKL5uNmd1oN7JdlvJIIBJioTmQoUFKEoIkQbTMV5MQIWEkJihrMnAxERKlBiIAiMCO0Hx5kfhjerk/8YKgPdohhgEKHCc+m0r4bRGr35SCJERbFzA2G0O+4Vvy15bzOrpbkEieO/P5S2vYCqqiCYS3L6IbRu6n5dVz32L1FrdqV/9tZFGCfVcyjcOV17PRsBdKIJGoq1YEJrWRFQWIVxkKoNXESnsWZQkKJi33CbAB5hIS0qSUQiMoFErTDDTagYiACgyIiMyIxghCIUZniZV5GMCERmVooShQjImCK2VaIQQhQIRYMp748oBiw48PRu23M0F6Cx5WeLaZteBZtQJ5AZoKiKnhcDLVLQBoHoNEtJk9UKeNtgq0mS1z9QVar7ius2NgXBWvcXdb9vEIBm99fajvxYFrW5iKgnRmgx06agOrHJAQe7PQtbALiktQykKcRqken1Z2Re1ytedgbsH7Ocj3zs2SiJfiWDlVFxUa9X3skKLQmFhUqkMgPAQmZeEVcEMpAlIkiI11WFyCsYeV3lGS5GANGbKgKIwqDdEKBobttMTfZ+ybJnoQgIoW8rvqWhxbhpBIYXNhuB0l5A0/Vk5JUzj/BaG3UTislAYbRyP3k8oyuvfzsCsJZgKA2t98V6v98bTJAg1tJDAggSxQKZmW2hPowYxuSjEV09bH3zGGNAtG4gLABZBFsDGIjndV1XXq/MTLtwRFbEVqMYZWfE3i8ws+LFer2KF7UQ5VWEkMoUiBtxXWkBiFSkiAgCcWUVmFcg83oV1Ts50djbSqRj6vZiLAAFAGXGMD0p0WpbEQrt7SI97OQfkrAZKBARmNXJTMApfGRkXlaubB7ACpHjBloAiCgCZT1Je98kef3l/M8guEpQtABk1P371+tQlCQQ4ZXDBhEoEEaM2IlSw3QZxZdsA81dNb8lmAjabiBqlZ2esBfA8SUBkBGZmVdmZkCqQMWG53u32Gff628NwLgQeS2kFkVnOwJieK11IbPNAOwtgBGk4spaYCaZmYFoTRYPJd5UJIjGspAdajQ2iGJUQExEw72NIcczfOyYB/O4VSkJWbUjAshMUTThGpHhq4wGwCGCLADF2L899qfB+vXLajkDXEuNMKAVuL9e17UOvPG42jf1bwNAorkSYMQqvBJi+DcHl/eTHW+/vQCRVWWWtMBybvpw3Hv9r+vKCJRikSyuTCGrMVtEMiMi8oor83VFUleAv6jXX2sl7VxT5KrAVa9LZCgy8xXxYoqXPGURRLyiFhgXmfm6ahx5b5IlUEjrctB7uQQgyJpgpgQuFMBk45oIw5ZAKGCirjetER9hJBqeWZth4/ckkVfKj0IyMlNNlmwNYAFAgXA1SdD6aZNERIPl62UNAK2uxLAAQHfm8QM5/uRkpc4vN2Jl2HIyXIsTKVFgBEKJCKIMy8w9cywBwQJrU8NNYo7paAl4qIBAqbjIYkYKSgWkQCM5xnXFdcX1isi6EvwV9fpVK6DCKrHAWORVr0sBVkTmK+IXU0zXAUQGlVfWAjLJK16Om9nuc8dHM+TqJQ/ZGIAs074ECvYHmAVoRdErFpWrFDC/16n7JKDg0HEADXp3wC9JXJmAqFxsiKpvAsBy8rZTMSyx/q4asbZ3Q15OCkREMHqux7mbDXjAiT5jhmOwtL3uDda3AOS1LoEMsjK3CGwN0OZi04StYJog2kGS8XgiIhonIWCFzyBVffdGANkeQ1ypuBjXtRi1ClwpV/kxmQlCibjyirgsAAUVIwOIKxeFvMjM61rVtOYRgELGxuqb3gyMAHfAxGQ0Um2mrOkTDwHwdBpWDwEKgBHZur9o5QJGiIIRsF11DoM3TIi1yoPqOO5R+4Gtqy9XBJmwLqnZKq21aqwlho1AyWwy2sPTwJ7NAD/cMGv9YHipiObUe/GsUmoEjtWj1bjjNo7N/54HO1onFBEJpciKeDjWAJ7f+ctwrg5lmRf6Edbbk3VUG0wDMJTtpXZ1oYCE+TN8CkAzu+0I7rUY5HxWwGzEA2gN2uIDGPKEDlkkYvaOU5i+jf488/6O55uzu9tlum5nA1VVbQEAa83LPkPL+FoSdtoQemTGuKFQ77y8LsaLFQpeql9aiIgVWPV6Xa/XSyUqwmYqEYJNnnmOzF+vS3YwKq/ruq5Mm4C8MlOhYi2IdbVBY0GIyLgaA+bEjfJSXNSVQqyAwCxJWkLwSpGVyCsvxhUBJJkqRiTQJiAuxBWvy4WUEOyZZUGBDCOjNgmStatD5ApAudYSVuS6pIqqSMegYlEJ53gJCFAmqEoorqiSeGWAaN+RTWGSlMyJPcgEtc4xYnhIhQbIbfpyOOTgtVAqaY0GQIKE7vv9fr+/3l/PfYG1Zs+PTh5xIEOA3bXXr9eviF9ZxGKyfqkQEfUO6dfrdV2XShTDRisRkn0duxqZLjYvjABkMBoEXpmmEyK46iopm9yS4R/oOGdeFzPxupAv4nWxEqpaFllRuPDKCmAxX9eL8Y8glLmWCpEpxCuqpEjGla+cCD1ISqqCwc2EfgQrSXHdlS7LJVi1FlDMuoEVuiOxAC3eL6UumwtqkVVEFUtY96paMusjYDFKMoedV4KiVooIYgwoOzo0jpUckrY64o4+PXL5AF5lTFZr3XenHBCs+/319fvr65+/2RSiBaBGjR4W8i6SqhaTyOv1ev36FfnXqlAhQ79UjIi6Yq1frysjKsQ9aYmQCCSAAOGtbi+qxqVr+39d1xVoAUDoXZWQggsFM4UtAJnXi5F4vRAv4pWoS1Xlh6AoJV+5Agrldb0YvyJQedWqQkZK8YpaUiTzZQFAOaKLFoCFDAxOAWBflxGuxLc5rVXiiqwbqtCKxBK1eL+RusyRUMvT2PkA912rCsgmAGRVYRlNUbK/OhpgnMcfFu145xjlvzVARFz/aQ3w/v3P33fVpoLv9z//49///d//43d/tp3+ZXaxUUaQwKoIZC1KHMru9cr4tSqwGKEXipGsxN0C0JiHDalNBTNsaCLj9WqkFzLXo1qrVtSKekjfYJsgQ4GI/ZsnDtlzMrpR1TFU+8fW2n0d5FWstTd2+9RzYYVg4oe90zI2YhMAdNTZJbYm5nolAgEwxEiGiAWSqXD0F1iGslUoaTFWrhKZREGKQGmc4wJMx1CoJaqbDhhtloYHgFhr9Y9OManNWnoJrv9uTun9+z9/30tVprV0v3//5//87//9f3xtGG5tRiPWni6SKEUi74ulcN+JK/OKTFYgEMQFBZPFer9fryszze8Wm6UixA53gIjI15WCFqByE4d1M0Paxs0P0/4BGERFs7Pb2wWgYhVAMHSX7lX3zfuu5aQdSQWtqlVVZDFml8j9OzpbOoKFmDD8akfFiSbtEk1c2xrAsfshxOZ/5c8zgmOWGejsLs9w0elAIsjFcoZPZ6KZoZBwB6hVawmEvhKWCSOJZEHOQhcCWnchogVAUmmiDCFJ1/8QSloWgKpOGNL9/vrP//gf//Pff494EyRrovAdOiXJQorXfbPNY2SrYJPKAVwAI2Lpvq7MdGj2oQEQUFgADALjuhw5GK+xbihYa1QwSqvWqlWd5G+KwljZ+rmJMC7Co15V77veN9/v8g4qai3Uugsri1jbIVUBWIV1lZGeEJ0uLizE9lRdSRDOp5f3pQXA+5IUtZwKpFomiroAIsRgAqwllucjwCqRNX6hWaGRLoF24UG3uwGh97J/brjO9NR9CAAjdkrYavRu6k3XVwvA19fXfWu1ANQyBvh6H4+U1mneJVHBstJu7DuMzdMJHLoUsX8dYa/o2CN+eKqOznKgh6WkVtzrJjIZGaQ1QL9OciQ2+7lJRqgQRS5WueidVVWICUpgs1LQRKz8p1re0YoAl8hOCr0tAGwBiJJQpU7eUanpZnodaPrbyt2JX2OeyQhMZFJtNxw5lP2IBkqTvN7mbDxvAGItPQQAgR0blCTWWoVwDmbZh7OKY1RJelQHVyeNbRVb7hG0mX/bvaJAVPj74S1msRpaMNLIDnT823HWRnOduwKcHJlhDTwtHFZsv/GBs9qZbvW/4c3mCeZa5H7vlj5xoyYOHOpvnQG3rX1fdpuZ/tCMws/26YOjR9OuFrflHDbAwCYi2JKB7hdFO3CNhQosKNHJXxQ68zo0Ae/zgDPKNjczI5uh6dFuq3+8BL+uhi+bW5grWpnNNrKK1YDMcTe2KM9/PJGYkHXNEMRUnhwj73BuioKbIhkt4sTqg+EGSo1u6QnocRNdnj7+isVAs23mwjuZ5kgSzj3tV4fBSI+NR/43qdJ1A9qjGcZvS8RJEiF3fggJVsdNzBkwEjsZxrrBWST2iZpOMvlooOeYx0zXT/n7w+uxUJ6kj49d1gqlrVCtQl0rWKt6FCC4OBqWzx3Jj9XZoaCQCEOrFgCOApjPzdqw5/QhAE96zAK5qwoeXs2HoPs/u0v7p3H6h1MWxhve09Nyf9bON/RfqlBhoMFJvD0Ys5nrMTjbfBgOAZ+zzSOJAKIQFUSwOtBnDdBZukXGbNtmLfmQwZm/oWUsKc1A7t3eSaXjCc60PLfU1Y3EakSgBaCqTcZogK2I9Xww7T28ZWAL22NZZuJ6mubzx2vVN23awHdfqG3UtDGhnq/mYKl+M4f8aM1WYAgao7q/fP7Q4ZbzB6OxQkEVxYLxJQpgeegyIAVsK9UgsHP9IYEVDraVnTzRRQZtxI8cP3fq0cRnDp+To1F9Aw1njfQxxQ/ouOdj/tka/NqzfGyAgNMz6nFfwMYIndfQylaM+1rrBsRrLbPI7NYTdvYqKrhuk8v3fZdoBIXxjzNQYti6OAnCy23AdybmY0gkA6oIhsL7a/yrMZTert1K0EtRVRPFmvWuKmBBAdV96y5K99oyAtkKT1PIBwawNh8ZnuEJZTQnVK21CoWoBRaZtlghLFmw1qpCA35UaVXVvcrpN3YT7/daLgSRmCS06nZMq6NBvRbsHGVytlkrDg/0GMhJCHnM7NFeGI370ADjjNhhUeslAayVVSsI5AGPjriVs+SocHzBf1sWypaQYA22LQ+ahh8uW9yeNr6p6kacoFMujCA35tt+x9Fbs55beI4yN+hpV3KZjl3rAXUOKzSK9QDQGd5ssLOpW5nWKlkA0JgIhQKqbEyrlrYrUDXWd62eo1KtsgAAUthbXLX0X+AAfuLUnbEO7Hm6pIkGdatAY5Ip5HtoFwwesk1RXxN8CMnWFm18/Rce/TKqBj1XnV0yZn2v9ubhHru0B1jEUVcHRrRkfxCBoFHcgctnMhrua8PM+Tqsy4x3GhuUyq6giFoBUFVhhFA1lUGSHBBAOS+sSSvXXqHIqApJBEoVG4Roi9A2TJ685wZF/zQ6e0OSnuWNiPpCs/7OTFGrqafQXGMvbL9m1zeV/TCmD4evv8yi6TmvZ/7HDSQ5PMBB7trcRouQtHUpjtrFzgz70FAPFNjgetDVg1CI9imJT5nYgHx0RISCH6/tJjy2yp42/cEq49jdTw2A0R3eVaNF/by+uJ38A6EeHggeFm3DqzE3+jmGz9efnFQcZ6ZnwiCwtt5ZRwNUSfo6iBrP9e9/NyDdy7GXQPj8288BO9D8dAW3j/ih2BqOV8EmhGJNqdTno316OKP29462VNfMPg5Q2ormCFu5qFDNtIOwBkCB3dfYEHGrzppU99EAko26NQCr0Dqes4uP8wCpCy6ls5FbRc6et/Q428wPxsmfaSs32BdbMUitvMeSzhv907WEqmqAVo73Am2vRy6Fjy3Ra328tpGNntRSVdfvThIkn/x9r7C2LPYO5mxjwAzC+BqqQtXSWnfeKbJUVWuASutOilKxoqIFhm0tQOQqrbvWUq0yRJ/WKHaAiAWIC+vGWqjVGIDECkmKMht4Q7EqwvQy6wa1ZuVLMGlzI9uouTt2odPE6AKBcpoeqV2dK8fhPihOwWGJcYLHRnpPAOIk3DYTaE3fTCCK3iYVDwHnlghJ1w3JHbw/BKB6/TlvPxb2uUcPlfNQkVL9UQBW07ZDqoyjvxWvkwrM1vi+/a6+JtYday2QpdKqXSN/vB+pQqqoqPL80gFc1Yq11lpaS4WiCrRLoGLVApYUsbCW7sKqe0lGbctpvc60xJJYFoBawVhi1arBAJBUHTICgE612AIQsbgLIbEIg0A5AWoLgHar1gZkVd2uXGX6uLYouM1PKzQnA9rqyzsBjCK6AdDs/X4ZBGrwlaMoBwQeMPvj9TSoQ+3P93/72p6/xUm7PI7szCNf+sfnWidvHuD0SDpKbXAFRha4VWFMoen51DblB33NvEAN50pSQFVRQKhIASVG5xYfE6CnAFi/OF3L5RAldVI+HBj4UFw9yKNlH5j6WPInpNcjF0xHpWuMWofEsBdwQMNAqI0Ur6fZ20/f8/AB8PRcunYnD8Bye/5gZ2QNlJo0n6nc3kjsWP7x139igB0IQNtvp1kU2b/4MejHfx58ywQPZJ7JPYwVtiLDfOj5kwa01gDsHd1prNBtUQYDYEentkh9l+d5ZIu/Cws2+mv3Vk7SRU+3c8Q9vu+xgMdCgx8/GgVN9TI63MnHlp4eQX8zlx7lg09gt++JjeudrBcRwSmv6KocdJFUtFbfRQNhudB+OpAxxP2T2dCAX1mlN8VAg8CnbVR1a5C2c938nvOsXqeBok+53lJ9XKy9tWbBpd0kTjBBKJFcJcaCuY3WG7CRrQGB33cSvWfZ1azYhNXsKXKKAqJFj4EKyfVG5bypMZhj+fdzaQuAxi80vNy6bWMASLj2o27h3N8R5HvDgO1g9aJ1XCX43Yd6bCUbI0ep/rARMATW31iNrQEwIFajmvZ3PPP6eIQHKXiuxq0Gjqs1SqiVwKakx3foPTUIYxLYa9hc/EkD+KOTntcaY64yprVXgA9dwf3TQ0Ifz4SpNtnq82cK2Jk54PsM/JgpQNMkyrPaF9Z8t7FeJ4E9BWD85T+ZfY2JU4vhZMl+14fqbDfiu3TosWNGbT1cp2MiG0TsNAec+NIRxj1rj3n48GKIEY+RrO8j3XbFJrwIoGrC9rO7jmnvHgdshGDiD2oeaPM2Y0mg4j6r47tGPhIxpg2fEvJnn/8PL377liMAZ0vAInCwee/Dcda+CQD3BB4C5agj7akrbFxrrfR0HBut8fmbHwOeZdjfw8BolnFL7UYVYy5BfrTPeMgyxyxx//VvZtUU4DGPUox1eKwUPpSH9nqO+XHG51EqH7q5H/GouceTfpuHf7niHzP7jFE/vpnpuoyfG6r1HEzSJzmKdGsAjAR4IvcKPbCjph4UQJnoY3VtaM0kPRD7GfaB7TPS/voNP84RQSKjQq4qYGe6GZJOQ6FPV3Pv+3k+GzPs5X+oPHx4NjwgnH+3NoM3eHTomZsNzwzDtors8LU0CQ1/owE0YPSZ8q9eYnQ9R3sS1MZXW6tPsTqeypDAtfHeuerjh9kU3aXmc8Wsl9u/qkLzM7WqFrIvZSo+irhNNZSqqq1p3yJ2D70NQQ+S3QP5g37AVPg40Y54NOHZ4x127bG3NPP2uOae7ppeh+XQJ/dgLLRDPeAMdq+U/+usvFbYTbKCqwC5G9POrIDg8E9nLpKqzsWojsd32n6tiTm7FqBZte+m7Ydm+JDFXvXHr65l5mHte/oZago+ZpR7XnvbEx9CLklOMK9Va90WdrPf6WSHdd/3IQOfO0lzzYfe1m61s2EgZI+6WPUYIQca4RMSaddFGGTUTnQT6iDKFl/J1SrFtbRKa60FcQp26Hg+IVSBawUorGKwFKpaYom17d2aB6y11rqdNACJuhOiKvfUaq1VUJEm9fb6HyZQq1atmsQHglqdF07uxgTP3YNjiw4saTTBmU2ZCTQVfD+YQKmZQI4I4Pgo+IEBZr69QrXWHe6LYQEQEKTWfTuieXapZqu2UxEBCU44hsmXfQN1PudaBCWUE+h7bC0twmA1yTOKbsWQKjTZWUJUVMXO+5a6riG4dN9YHcQ39MQyS9Wd/O4S1wqRWBWMW9SqKtonIJxsPna61lr3PSCwGHkrqDopNS0AXaxwBGD4BB1y0GbAj6Xu9lAMu59TseHNx94BzlIlxyFsgNamuq6lTQX3olsDtBvdAZudWvBdAA5UOKqgVi4nNW4TUAFZxEroNmfDTriBEt3wBqrOHEaTEL6jUEWuFWsEQOszGjSbf4t6TQlPz0qhAx410ZqYNul+twi1ACyttUzOtwBY+4aAdxXXSiGwiog3ngIAuE/gUpeN/hSAhFjuimAB6NQPuEpAq9YQs9AWAM1IoREA2L2JpoK9CN1yQyMApeqC36e1N2WuwnU7Z2XmZo0ArDYxbaqOG/BNAPCkpPrStWIrIQsAZU68ai52iE1LUbQAuMBjI7W+ut0k5Io7L7gNTi089tGWv52aYw2gktBZ4JOTJNsQ7j75VeQKBYpZa+G2BlCASBP2buYaAqwBqhitARD1FABKBd3oDGsnQdkECEJmKrpNhN1XTX0D1RhgL/jeqI9fWBsPt0BnVtWe79a5g1x04OXDld1mW9cbqlrr/b7ve60htKTqWlFOlH6g1RjrDwVAHN1bVa1QfwhAr3/js4GqaCbZPd1aA4QQjerY6VzEioh1gyxA1afu6mP9HQ60qSAJI05moVT3cmiOTKjofDMtwSUArGDE+41VdTufnlTiBrAA6QjAnbIAIO5PDUAoCtgaYAuAOphz3S0A3AJQfWyzyJ1BdATg5Oy2ER8BwJnM7jJwEpUmndl3bQ2ArQE6RFG4flvnvL++vpyGZuxgXDCtgdtY9Sx/wsjh0AZne/0RnS9JoAIsV5yPBfAzQONphduRREgBZtD9wuAOAc7FLItbJhzwcqFNYPC0727zXotcJOEtjlipLoFe9wK5Omuv42yQFEWRwfvGWhYAXmRVAXClD0PAeyXXWpcFQPEGa2mVy/La89attOla617rdvUIJNYViqhK+WBpsjfbwB1rgMlc6PWfE12s4iwAnn128QnGwRwB6LXuxDH2r1GYnqPFKjQIvO/7XuuufSJ1jV56WNlRBDu0ADSDvf3KNtgaUOpPb4ZrO35e92d2WneQsMU2MbdxJ+TYrRgRa5kHMLHWV+20iO2SWRB7P/W+aYBa5SZEMjlVHUYgsYjgirW4/M55nnDtl3xu91rgvQJM95tZCq1aFW56xcEAGA1QBp5DglQR1ttsmDpuZG/Nmb/xPTdLNO/Tye8A0DGDqSDcGqA/X0YN6r27zUC/rhLayfRIh3nYMHTbjO02b7YEbQ+iG63GadG5wfvTTDzcBo4jOJACj4/wqV4A2ZwuoWrFKkdl1bXUzaa2uRu33PE6gyV1tqmJSId4m5M5OnFkvGe+16AYktFGje5by+V8BFaJXFjqMsXuVl8wZvDQd2T9cB2exBMMGhv68TPCBzhp6KqpaOUUKIy6t+Q17HscTDQmgDhpdPO4WwD2pjk07TkH/bHzLT7j/Q8G2IM+S/h4jbiMzcdx6Ydgevy31/wH4bRH24gtPvzbA21a7+yNc4I3M1XPKfjj6+yrrVn26vle7MmyD+E2Jk4pkjVVT7bKffz3tM4m/9jwn7YU2NS2/QPuzcuTATxDbOe0V/6kuaElbCYCB8z/fPRr1ONBic9JnYHtApDtn83/Z/m2hyEn5Ex9quW8pp5zHr2JSXM1n6Egdfjyx9r05Pkdz3mgwD98aKj7PflHqp/bYN+1N2g1o6WaIBbwEADI/kNodM5TNW+Q3SN1zrXd6ym0KZr2MUNa3W2khh+vSUXR06Ltl5sJbGH/MUM/ds+/fl08qHxb0Zm3LbPQ5K/ZtWKMC8KJhm2BnuLAwxyPm9DuvUqPk9dan7iVdz9dnxzgQALKevhp9vpDppEtkxrSoFuFZkT6qegONXRltlSh7iwWw6u7bFU+FiILnScMZy+Eb/OM2DeZZUWnBx/meMTRlJho5UO34FyDUw87zXs4BnEWiMLRrQJ2ugqfX2wVOmwvTKXi6P4ZuAf0cOS5o4EPWUNXz3fKxREsru2hA10w38ej7JUOdo82P5T6qYIgauq+a2vn3v8x+2WCJAcHQeGqnKrplecRTE0A9ojOTAWj68jRJfmRQAWinGsTkcDECB4C4LbgCMkdudkiAOOJ7Qt/hMU+mzyfIH1/4gBmbSdsS0CEW5WYQejSeTJqX21k4oiFMMZ3tOeJ/ujIzGR+8FMpcF+XZERjgDrOZmu1/rYewHL/Y37NFuoTKHwEz7adJ/AYg3omz47YhAD+9K5tN0tTsbxpdMy2+WZGZi89rjcoaB6BEz4Y4/qpO0fdPzNE9limVqT/9/i7xuRuE6Ce3uaBsFM9P+3eT2X+ORcbwJzvsGdxX2secHuDaCTzKQNbWw8GmEFWbXZlqAhtGAGxHma2kzAfE9+r380a2/QDpnqh3R9goweNXnq4Cvb2Ne/6TOQ4uGkrZJPHRXQrZz56RdO2BUTHfEeTdol2FEJF1DQHiC7ejuqNveHsaHo8oTrOH9sETNh8m9XjKx0553NXMygEu3u0OyPomwYwQ4iNdT4h9FMDPJZiJ3R/xH9n3vYArjp5yI8IlKahyEaSB7l/f/E8Fnl6d4Kxo3w4OaFuHdA6CmM0u7eze54xjBAtCB0N2OKvJ9AhiQAVnv3ajWt6bhsifvM0xtOalepum/tv37QBJ7d0LNaZ7j9Nydzv8WpdsPH1Y/jcqPrZTBKPJRsVxcflvmVQfWoTYLIA95+eAfQzcwB5PQCfPtBnSdLaCsDX3c8+9Bs/7/2c+2AH6f2UjyL3xzj/4JjMnfrOg7Ora5g08Srp6LKHXjrzOtiwJ2BTBVZoP403PtorHtw5oHur9TMjj/s+/NF92Q1mpCEccYrUP4Hhj0V8/vxYyoeHdkDe949+aoDHr/31kUV1keKzJcbc+GlDelq9GI0zxgQ9791bPCMzAjFvMghEbhC4XbHx0GeaKuxJU7vzmRQFPrP68XevpzN8sMh8ndAyW6lww8eDsudcQn68YSuLc+kP5qNR4UMtbPjzAQIfS0qdj7tyckNajMHRJEU/jYwfsL3g0RUc1T5rf3yP4VPnj9sszQNfD2vUk/NE1OtYr3njB9Db5uWnzpsFHj2vsxdkyXxoQw3tsoGVJG2U+zmBT31G0m3VW9CwDbl5SVWoV7bOQ3qh1YrJDmr0Inxq/zMr38Zw8ORzYX9K59OnluSSYBWaJ9o1W5uIOQ54T8Peg5/auC9PPkPrY68sg576MR9nb+DIDq7HsUlutyg4K6PIPqjoXGbEfe+IFt2P9f+DXezc6kM6etNorGXQDQZ0NEDJhThU7JbI2lh774QDtj5GsJd6W56YjTt/jkIoVLFVAwYa7LcekQH/kMWIg8e/K/Cj49XqX00EhcvMK2pbg3G3mgR5WOINHbbm1+cUj4bawIVnA++o3ZHrB1nRD3ilhH1wlhjt4UkwObN1ziz3vuqHMf9Y/lGPm52YEXsy1k41GgFo4FfdcC8tAIuECLMPe0Z8IW5a9rQ/0UOPDBbYTz06lgM+R3Vpz9Sg45nwz+nWtncPKzRu4N6sBy2N0O7lLNNcVcWox3bAfMPBNltdfHqaTxF/TCrJnRE1v9UhDUbkZ3nGC2jJvo7M9FQRU8ox0/GwG2PuvtvEvcBPqAXtSG0Ftlj31GyUCswatvfMPZ0czHUw2bnAQK69PO0i/OETj+ne2nWH3Xbg/KDNjw24tZYFg/W8rEP85bybDwHQlkqHdqc8pMvKqgKnktlzQ1bM1cbj/3ht9IuHU/KHl7ad/HvE1JN3bWFqTdKhpmlxXw0AnugKo1MBYDp/xTRpbyrWKsZbvH3w46DNhlT/N2ql99+M8AHua0/A7AxICri9tsoNdR8REc+SIfLTDeXGS002kAAnDSng4wEUDKEPvHkK2Qbs3YsUwoQO8BSAJ1bwldVhhWZTHwcoNA/QUyRGH/SxnUbNnO/YFMDqY4SIHVXxnLGf/OyTieMJDzvWM3kRbrITPUq3rN5Aea6/9efxk8cWfHuN5hCfeurMy1Zpj/i1xb23w2jzh90/gz440FcZYz26/FAcDegjTNU/TpcfNBtbAM6hw9t4PtHuVncPGn5LMVrD/nErPp77DP+bztqSDUw8aBuWfrO+W4GzNXoYGwNyvIDetI9YwLYZ+2IEriGO/+XIHz/sAgPM9b9rGe4J2RN3AFVP7J9U158m8UGrf7uHHVweAdhqjHv9LdGBOhi/BzhyAPfd3UgP+5ArPi9nNDwyvX+Pz39mCGiZOwiczyX79hyPh/tbKaKx/RNqPVFXi/9m/0ckjxfgN/V7ZpMaBOoPr215/jAW6xhubTtw6djePw2/v/LMHJ6OMx89JHvuysdyTqStl2A/vkEKu710I+jCwapnfj3MgQHnlvvb2ZyafIeem5aL8Sm3uVDQJyAgEImAi5NaVdm9PJlSz1XV7ijcSlvPTQ8DoOPrbGg7PK0egYRRpaNvZ16IDQ14dhC2KHIUmBfiKgw+kXYsQNvo9uz10m8J004PGUWloYi2AuDzZjFnAe1CvAdH9cgp6vNQAbi2nFTsoOx5gQAC4QxBOuEHUIPh87ZDj8wovfpWCRxxepi0s6VDZDAz+rh4uiA7kYQy43Kv10JiaA0KfaBU+GAZVkT5jNvRU1vZPHaH5/fo6YZ/DxD4EACLjoQafsYBgnZo2vkiNpr/MNmzJk16XTvu8pDSEUDJqc0Hi6m3/hYOPmmMPfNz0pPzWRgKwhLA+FiQjzyW2QzDuO+ZeIS28HwK389n7+3fP9AKg6FwU/M53tG6eushHYPVghG91CROxwt02Y6tRFCRkT6kAvtM4KMBRPcCovmU1nYbE41fukVhO6psJu6HwdhQYCZe+8vBAMd367b+O9qGsw/87v26RgP+yQYcz2oPoyXgYR9m/TeRfrbQ0ah4oKwNlZ/oToMv5pdzUYzuPslZW+f0/XZd2FM+Hoaeoyj36wFTMOQnH3N03r1laWT1Ix1jP9J/9Tre/nzFUf7P930z9TPtnpBtKp7Bhh7KN5A4U/PxO/FI4WyDa8/Htzs+jGYPky7yBe0EdV/Nna3Tz/mhgc98zrRJE9HHNLwiMFnkRKfoookgH2AEVoyp6oNShizomz7nclCDoYOZwOBuBri3xNYWG5+0YjwTaLpAs4A13MHAnnnys7Q1bM4A+a3Cx8zGzGyP9mPi+TOW2AivAYgmvvBt8/8ZfW056bcMZH9Ad1PBjwXbkiNAWg8TICqqu/k+8N4T+s0cRtjtqseG2rpetfoZZuVI1iIRhKpExTCBEoGQ+8NQbOIF21GqxyI0wbSNQJCMEwFAVCgU+3kNTrSPLCTmMOrBtzwO6V5WVpHlk8vlpFCvL92FJqAQVROBPAr1JAc/FncEdgM4fk4XwKhwG+XhEnTE9nOhHyb1X7w+PLcrrdGzW/zHZscOuMP47QdVNgg8YohRiIZ0Tgj5boIMZtZaXT4x5qyjOaxwf+gIoaS7nzAosrSi51gwbepmjVxDBHWxxxbG6We0DVJAoTVSu81yD9KHBcyqVdWSHFyKcGb/Qgq4S3EvBKh7ZXBhrbvuxSpUIY0VamjMXenrjtA4Lag3CjxWhVsk+jedYdu/3XaQQMEHUHDr9YGEDwHbSvG59u00eQtd8ZH28vFBtgQ8lcdR7Ba8R9bjtrs936PQ/L4DMOopAMYUgWWgFlUlpMs33wwrvSgnFIrMa63Vd192bJsJnNTaB0bCSN2TobNWrydLhpOiJnRTylq3yypBMNZ939LCgvheV9wLFHSvIm+4tBpVWMUVSITWGIAuRyq5GSCwFuHmk9jJrofcxEz6rASbrUC5VZQ6XuaKoPb+9yc+eZnZu08mcAc0fOcL31/cahkPkdiggR/f7O+5kUXfubv87nvP64nmt4Y54zyL9HjbVsXVcbXWJ9gu8xbV5+fOLdk6o8ENDhCFNqHVV2usvW3LKO9SN2OsbldGN0Tl9B1AFXysmmsFog936+S6afRY5RhwFY1qHqzLfNFzeBvszk7/s73vKdmTqHG0Pt7Aj1kGcG2FNKvZMMOKoZ4L5votK/mI8edOX/ZWZpJcek/tg5G5Ty0TfDR6ozs0ofPI5Wp3TFsG9+V7cz8I/37z2Kh5SH28apqCPiouj+xZVU9/uV7HsyRj/R8/f/eYcL6Zy/Lzc+e9Q+y4QD1UMXLQxUvELp7QjyfZ6BMb/37HjG2aTyFI++0PFu0pMDIR9L0K0CRrREQdAWD3hANIZh9r73M6H+14JFVxecHmYA2QbvQsICLTMM14TBCjj5NzH1VO/NcmIHMTC2fnYzbxw9VoF7if2kfwuNg6wFLV0nIJzyxYdekcJBYWlg8LwHAu+xy/Zsi6xOwPArA13uzEvaW/SQ5G+1ZRVfQ/Vd3HIiyF6gNIdhlSbTU0HSsl6bgAR42OKmzlcczJ4y3Yo9Dl3x3q5OHmKLUxleVnd25guqEviDxnu3IUAFlPAWBrAEkgI7JhQ41wRWZepJMC5DJxYwNQV5KZ7KT9p+3BgDsFEYpCN6Z/2KK2rbPHz/N/4IKzaDqrh2+b61++ni6l4doGdEM7ReGRoDM27FNLP5iQH3fAqNOR/G10qT8bh5PscC7yfI+k6xbKzVuqxsSOi90hXwE+JJvRHbWiBQAAu0voOXPNatc82+eISEZer19qP0YDSSJfL2uAqgJ8rLjSADcvMLNwfbQOse3I62JM4+fVOLolQIVdM2z/ovaxG71eGBXqnTbdQG2T+pirvK4MRYQohsBkMuGjyZGJRCgJsCQKab6wZskMXqORhzbTMDj6MFwn33ikFt1XcpK22wfq4s1NBe/17GlBm/FP6TqYDW04AeD6D6Hqvt/vr7fbeLnfDACBmY8rY3OhZGR0p1BkXlfmKzMZ1+v1umLbc1Du+1ARqDsjWe/f/9f67SXSGVhcl/OIUW37SyhggUBSEcKFxWQQCoKUyPz1j18s8b7F0l0FYCm4Iu5ixBWr7q8q8FpVX7XeS47yX4kStWoZhtRS/EZF4JUp6apLIF+Zr78yYkVQ6VVbImrFdSVuRerma1Wt1VjcxfqKGyAc2ofbFNxVUqRPfc8kOKcQtQkI94I9NlmAtFZN2yBvUMOhDh0ELvp0om4JMc6gMaNGxnu1YpIh+jD2uG5Ia913d0/pbjXHLPydAGT0RdMSkJncB7xnWlf4hOtkRXAFhfr166+//npqaW/WbhDRyTmjR6ClsBk66YnTGWE8/Y0P1TXcEXmzuw3wVr2rKipWvbXetwiGlkq3Yq21WBBZN+JNRETUZJ/vLdQWYQCQqGDklbKywq6DbPe2D6duYifoAsBKhc+i7/+5ICHMS+2gtBoCP1C8Ggl4YGLjuyFm2HjatXjt02wtuY2wB3MUU0ZE5FWYWtjPbETb/G9EZZ/M5U/3KuaVV3rRryvzSmcG+VgERfh4sQSUmZlXXleN/UCjQTAziMBGJK3jgCan2KSExsA0yK4VBVQtt4UplxIuBhARwAiAWGth3bfvtxa03DKGZAVrMW8iIhNV6657uQFh3gnEWqt8V0OJ6kPo4CPiNtCzuKw63uhGEw/jvQm/xk3DP24b6rJkp4YPlRk+X2io7N1au3/RfpD3fXOJOmnbIwUOfWq2IHGt1gCru4TtMPB4dk8BmP0W30zAdV0ZV1zWBdOmswUAERVBZEVk5uv1GtKQWwMwIzF73FRESaiAz2iqMYvdZ5Ls31fZc5oCJ6B88BahSuBW3VoV4KrFVQskhFWspahaC8ESVYXiktbiFEoRDCoWEOtey7rDB4uouIJd6I1VqlqYOkktYTrY9emvvdohnQw665sICD4ZnV1U7dlv9d2oECRNS+3d6cAqp07ygHyN03fQ3FnCTZr06/oNqO77fd/v2/SXupNThMBuZdXrP+Y9WmUD5HVdr+u68rr4+rf/8//8P/7667rsGQaEzjSLQK371++LWu/1fmRogQDcjCVo+9ZulaRVCC3grVLzCutGmNVVAKoVAmoti7Gf+wJYCEaqoKW1krW0staNZOvsbulHSdoEct1/veImzeng8mH3AshihjdZ+fgwiBE+Vbr1mOvP3epuDcRJRl2FWqsKiOv1KzPylaq4rutKaBFLhWZX6qpyOy3PQWyALtXtzFJD58kJ/AjObAaBhyxoa9Gq4gM6Xg9O4SkmBBl5XbkJqBGAXv8cvZXX9bquV+bF1z/+8Y9//OPXFgAKEURSkah1vy5qvd/32+qD/Q/Uh+lQK5D0rimx7oXQLRArWsDZy2HjF5mh0IoFwPmE5Zb6hUAWFnHjXuK6S1jv23WqLluyNw4IoSq4MqEP9FmBBK5XvF4sIJh4/RVCSKxFFf7KQvK9glVU3goUEktCIaTbaI1oUsEeSF5XZMb1yqrI67qChUwU0NC6QHKdXkFlK4BmvSalCwxCbT9cXY6xOQ0MfMDVqeloR5sQIo0C82rFuhvEbAz4sFdbXrgvtLfv84exaT9e22d17rA6Hyf7GmaDSAqBpPqkBVJAGFJFDaBpcW0HOCLkc8l9qHJMci33rjzesYbPaHj78N1beRuB9Duw/aVtnPyXjz/sL23Yo9eLe+563DaoD/TzY5b+nnU4VM94dn8I+33Ghf/UZOW5kP26NJwQNm/VSkPbrRxodvLAvtEX2HDkQJuZ4/7dt/8TIyqcWNYhk+ehh5bZ54AUdzywO8RtmT1wa0+GzjzMBZqye84rtY9m+a4J9y/mLgUUurP19tP4uPYY174jP+jB/Ty9nt8X6NvPk377UNzgI5Hu8bnJCx8h+UYRnhTcb4ECAVcAQlZXy0WgizI5lBqP1OyVxaz1seT7Fd0uLGATYCRFniT7bbAmiUcaDeBq/jYBUDms8pxd//9HGlM1B/0o6TjNK0ZEv3/3/cX9mB9v/fzATr7iH66lTwn6HMJ+z/ft8/j0H8dkEZhNNo5AD5NtaFrXoPsBcPzA+dMTcz3U4LXHhDOjAyt6Kh9pJMYfM/+cMN0Z7sjAcLZHLKTaZRZbAEbz9ndVIdCufolVUri5RhWEWj6oS6pAKdwxWg7QuVNXVCdnuIcTztmso9oknCIl44AP2/cQq/6uipuEQ0gxGxltvM47OYI4nXPO3x6X/MDsf/+azX7cvMcv/9df2nIwSf0P+8FrYfrFrz6guEdeCQDVYmUpyggneJrrJ9A9noOMDPNBr+vKyEAEgJ2gD27N0MIzNmUrU460VS2ginUXS3fx/eU25jeTimAyUCrExQpk3Wu5I6OxZWe8gkT6dPJMpYTI5R0TYIQiq4QMRYauuGjOF1rpZliQat0srrW+bopCCLEW4fazAwKhdYtYSJT0BQbWbfzcAZ1tGtBYllERGUwS6YyYZldEktX6zpimqwtR2efmQd1bXWZLS1vPewNbcxcmIOPi6Ao/QpPkoK6NEIGnnLo/fq1DBRKIvDKy05Jc6s9AgpFm+a/MyNd1RVxRGRJRjOI+4aDWut9fdzHEcO5Mb0wn2y6CUK23sAqrENBd+P3P+xapxdd6p+4MopDg/esKBO61UnXfd1yREQArkgwiSyFmMkIVPqeBQr2lWASzFCkGkPHCRSXU47wh5Qt5UbHe9z+/Xi+KLLBEgP+4ShfWIlWqtdbuAX9HBle3N1BAqIW6CwUu/VVgogIOrmUUV1IrwCsDLJTTSSBA98rSmi709ndv+67XhdJaX33sUHMHDzbVGq9GNR8mUHCMY1VcgUkJGsg/9qbRwIaWRGeCtibDbFh2NpVpbmYkIwNJic7vanpLUklr3YsBpIvEZYlGRELLnvT7S1ilVZHSXfj6et8isYik3vC5qwtEZSC7P/l9r8DVBffNIbF7dksu+3N8u8rLNmANO2cczj2tWvdSVQpxE3m/76/f943e0sb5VylpASite1GrqeC8IrpTLRSEaqFWQSDW5BtEQ+SAGKgY96WTAK3pC1D41FqUVKsKN1aopLy4Cn72dgPHtH4IgAaMGyySEMr+fWibgJrklWOhnpFJI9pdotl2wsEM2QwLnfHHxWK+FaWohbhDgN7rft/ve0hVDUSYJOER3UJJS1jyKRyWmbpBqrjWYq3JguIqAh9RRzo0qekKhe28WogbTKunyCuw/VcCqlVdsFvFZQi91v1+l/v7inJUuxaC5bOkte6COtu1hKxZMxSBtVB378+Vilqkls9sZZ8P4SNK1Xijz4gcsMFmcRvGbbApdYv/fyEA2MiWmKfnzNYVngoGGdZX7sS6Qf8j/DyA7iRncFMFbX4sTERi+cC0O/KmiHqvu3vSL0fOekFaAIwrfeCw7tsC0Fc0h0JimdB1GhwFrkURPHk+cACDevibaocfaAJJFt2B1YEjAXg0yKs+ARlkrXXfVbMicqhpKVkFn4t9L2p1FLzxf6AKYkAWAEohrDuYJHPhTmqhVi1VQawlRC3UetA6LdzzPNGMCacASHMSLZ7OIBwdfPghnStkDIBiVFThuuDYCmQzXFLhAnB5qfHAADTV716QHbnJyMhusHjyNY7zAszJAeMHtxLZzHnnUg8lSDmBtgNyUlUL/DASo4Vi6yg59i9pbs2m1INREc4HDp8xdkoESCIUwdkiwbGXJP2+I95tK7YzMfE5nMStmkcaCfWDPtmNDbXaij79je3pPt49eipCQQEsKEWwEEGhomMCGOLrIS8Y33AoFh4t2O7o9RekWtc7I3PVeAHaVZlPb4TXawSgj4YV83VdeV3qINA8JJ7+1E57mmzcoUS84ySU/YoqAOi9p5IPXwRIGqmxw8b74Tw7e7Z2j8Cyu5JMRoEXmHgzUSID15WRVBYgXQbCiUvJggsBIy8qKhPMITB5ylMfqMk0on8s46ya1Dk77QEqurUqu2Euh4nZOThbxAaGP9YxTPy7DSKwQDEq0yx757zikE5j4Fpkh7bB0dd7Eq9fQFXdkfHOVXUfZ2WLzZGp65VGeu7DKonxer1e16XstL5+jEIVS1hrYTGg9jOdFejp6jmKDxr75LKLcPzMiSliIOBoo6tHY57TDyQQu+HgUJIREBGXcCmRSIGpjO2dpjLZsuPovv2zvKhS5Di6vivdjn1isR93LmrXhQfDtcqhiam30dLkUtF7sI7+9p6orfT3vANhAfDNlApV8UoAmSnMiSHjZKoXT3PQ4CDABw3UO/V6QaqKCEauqktDcfjhHm+nTUA3Aj0CcF2ZNgE0KOEiQgssRd1RNwHVvdZ9z5EhDSnUzNYfstk85M5CzrSvUrCeIZ/vGqzHCVdujDLppRFCIBDKEgN5IHcgfYBbN4vSdtOhqEingfVFPxn4VnObJ9mDeSbJbts9GmCDOPNR9NnTUoWcVe5WQpsra87EujhgF46GoRh2TXNsondQoxvsBT8gCAO+rGvqSnXgsARWxZRWaGa4TS9BMJ8CYAkO5wROOarKUImxOhP3zhGAWj5VwbOBGC55BjRrb/cwqp9bwQil35knLZTbuI0sEKNaDnT5uPjoan+/yTWehRzTbhbPT797LADfJGCuhIbp80APn6MDHS0fw8g1cqtFVp9q4gVoAag9FYeePBp+0NXI1xa1h2z2uI8l2emhe0kBCVdCIheAWEu1HvtfHwifAJ3vMxlN1gDWCm2DauH9jgUU3xdKse6KL16oumvd76/3PT3iHov3TWr3DWk2sRjZ/JjiJCk97GRvs4AxgA3cOC+zADsph2A39pdIhJvfD5u7nDw+eFID8Uw69/lxJHmrWKymrH0o3BGjgb2GOzVbWpIKaxGKO5JghOq+O8MTBOtGVzgRdEHZXpPuXsOd1PPn9f/+mlXs0iC72yxg8YrG12RENg2wxbqlZd/humL6ZYy1j9frdV0xhzTexWRA7/hKlGK9f8UXL6neVff659f7rlmPJ2LcqzSC4U0TCbC1NGMsqKaHfbMRPSnR+eURMb1PW1X0G0RXi8IcatuNSAJlp0lVN4cXwXFcai2T0W5G1cJYJB2ULFd8LYAMKsrloU2chMvXfMaEM1dRFe+gRIbqvWytqyjqpoppAfCZT5bl3tA8GWTWkMJ2gXUIAkKAhYenC4naFYSxRPEKsaICRGQNLfbgVfYmAmABGAcKQonMzCAkLkJF3IWouuL3haVYX7/iNy9V3VVLv+/1+7202t6KwFpdARJNQVkNm9YMp2BEdi6wgiRqtuxDD0b0qYyvi3FdyoTIIBUmST1II85AVdIpyBEZ7QirVPVeQmndy2solsD7fr+dUdzntTH4zhrTsI+F1KkwlkCUSkyiFnUzJK5AKJXIuKPu4srCe0GKqkwLAIqZlty1RELRIL/lkdo1Th0PE5sc02NDSY1QuMVj7zlWFkmDwKlf+3RB2/DvfYkDwbg1HV+v60oY26tun5hUya/cAoBLtW6p9F5aCiWaI4acQBF6yJnI6IqhzB6200KqMtu9EZzxOA5ZY+9uQmJUsyNN2KzaaIQOONqxChHdns8MPrTMFM2+UifNACMAdcpeKMkurHaAG6iiy7wsb+zt6covFlbc1F2sgFZLWiRE3IS69w2xCohRY+cJXDUDoFMDe6HmUdUYe6cPjQGYUi3BHDGvf4y3Xupp3cakldDDFTACbBsuodSeAUJxuWe7lrthvhdKXDfiiz78GNJaUB/WwTTj6DMmW1Rb14EI+T1uAOS55Sq7dU6ABsFIp8Y1qugIlSF2jcXv2WPPVzLzOPSRkYruMqRaSAIdvZQgUkuVmZc9qsqyymq5QTiJK9akqdH1FJ0j3rCTlhSCUGGJqsW7CghhqQVgQcQixOp4q+oYfu4hQ5QyIEVeDRS5qRcvdjsH29buzdATbS1+/d8fxBQ/3l865hgAEMkzkL6XMeCtuILVxRN8AC04nbNPZ4Sr2h39abtUrd1K1Y3Bu3ynYuMTxz+5IrpsrmspawGCD+lbEhMHhzWLHkAmlKYw5PY+1c5CROYVyuq+Dl2OjaAyolskzXnGRoEMs0XG4h2Ee/qiVkmuDRLBSHammhNeNFoIhdId9uJl7O9Qk0Zv92kbjKtJFhBX3JCkuBRB/Zq84Vnj6pxRo9xZ0XY7jQ0GAgHXf3uQkK1QRr/X6hYMW78MheO/G/xkRlAQk2CtUhRqCe/QAtdb/FJqrSVKt5t81DyfoFqd+F3UdIg3+9vqOjDEWq7V2pQogKUrA0EUrQfyyuu6yOuqKxV4XcGV4BWlV/7jnZQifsWFFZlE8pX565Xmh38tBArhUBwzQwvVeBm/cFcJTk6OQNEMHaLVAbrEg8ErwCURilJmUjXeRmDaFsL2pAC03enjoCtGS48uJzOTnQXMJYcyI0ViqBvLZx/JpdYADrNsg81xbjSVa7r+2qU2Fsxxs6Wq5Qz8kQDsH2aLSVAGu5SXEFRchVUO51oA3qpatUToVoyNer6EKgXNEkFa5hKsYoZaAdPWDzsR54F9+9AYYPKhzSNr2qzOFwUj4kECmMVAbyFnbtDeQXRCVdTktfROw+kd3la3Q9ujINlBmRnh+GwAJRWXLQG43F8MhLh8OLUjw5Mla3QMKxmvpqGCKsxx5T6RG2pp7SRbC0BjUpQqUDu21z7t9Y9dwiBsxouCqtbdAjB8gH6sv6AIgjWlWyp3ShEXtVyOccun4xJassskCn1+Zy3JZ3C6QxQhrRsJ98+ddPAA6GPnH6GMHT9p9yyF0cTtJUVUIDKoK3KZ50/zVu4DGBGZigjJHYOHA2JECqAmJMQ2XduONgpqYfLfuI2uurx/hED7Q+U27FoAVw2hJa5imxyjnM7hRvMDHMIjus9U97OLKVywy+e9rQ8B4BjbEc7jDlz/pla8LQCt5aVa93utRyt9TNqEH3fSLw26Awzu47y7yZAc4e4YsQZ7E1vnCFp2jN16ydOhtbxXmOVKJvdWzq3MovkMFQSqRAIncsRxDyICwbhIXHGtDJaYzLUVnkltRkgpRAw66ICn2mDv/oHNjsUgDG9Uog+k2CCAnTIFRLrPYAUlLJaG2fUUuNZwSAsrjFHchikxAVjjoQXaFwwBqrSz33bDayRhSh96NM0BYCDlCOX1miCmBEz/akpC8aSGN6U5M3ACfVLM+3o+WiNAdHOrRCmKsQBMG/QlyFbb7pXdXeyiL7fRkWKhSyvRsx5KoNSHFmoQy1AcahNg7ODlTaTz7q6VpMOo4ejKZK/aPqfcADTk8HZmT6nBrhBRQIVKmXJtUhVfkBiX4QDJCGSAVJl+zYxOOYk+4RdwuZESWdmMlVwFIUARcDyrr+cv2BJBBwHQopotzI0BQHStlQbtk5AmzZACqxtnOvI/CkGAj1+HJlH03kkonJDERhSDHhkdcgKAUETTYawFoJIln5Bq7TO9aNFftKWnhzIafqoBcEy7xcPvs66c1AgrzQhGZCt2BDIDlcyMwhV5RSAKyUQwAsrubqBICBeAWsq0MUhHYN0WOCJXVAchxXBsJxSwLYlQnFNpItCl1CIy0kvsJrIFkBkhUQnW5Y64ELn6RNYg6HJ5lxD7sSJJENGHCoRTj53M8MBU7JAT+DPE9lAxg+6vsVG9DvaGYAhw3/ejymVMa69P9X9OZF1iUveN1UX+zghiFfvY704ZQ63SbQ0gSFhVIKpEnhpQuSsgzzHEFnY0hmy/fcyz5/scVdA975gZ1dsPtHKrLeyHItlQDYwou6WCinMSwKh1W+FuouULuZ4CW/l7nE0l+JT4Vq+aVPUnpYOj9z+/35xt7xdV4yDRSdNa1G689DCpajqYU1Y6QLk37BjnjqRcOpU2Aji5DHLDvmtbVHDP2jxSb2MMzQFeQWWYi/kLJaCKuMUuj7vfa617VWoinD2uWm5JucAgKhJJZCEjLzADeblJN6hFgn220a9/UMn6KgG1ABAr4lrXqq+qKFG4mTeBm//5BoAV7jahJa1CXWDo/npL/1xcX1/pwEZEXhfN0H19Zb502TRV8q4r16IqWEt/8beYqRWsCIpUALwXACYdwFT75WC8F1CRZjDaeWfTLtVLD7iOuTG5EzAyRgfW4N+qhVeuvTCyC9VuoNmSZqIGiaG3mJxVcXUgsvNByfECSmvdS4hGjZxNcvbNkAdsig0QgnFZHJzYIGsAU9f19ft9v7/uZc8ntgSsukE5thDN8o26NAd8v6L4+roj6s1EoBZy1a83VbG+1iLrRv76dSev18qVVEUtsygZFPnPxXJzBzerKCwYd+B9v4V3cb2//rritqssG612iFVW0LwoXheVWhml6kglhmsW7iBS7mdYE+cwJJ/u5fHSfSNoHS45hb9Xamcpoolt0l0XWvXUaC1NzY695TbizU/FbP4RgEaDo2ONfq9WMR0KQCfC2QS813rwANx818gaJHTFVyEcbWEuEgoui/SK1gCrVPfv9/r6fS+bAKtCq7B1F1Br8Q5Kq5hiFPKKZVMgIrCKxgSyean3bypd+RsuqEZGklEZlcxsmjbEZIIB3e2qRjhhVxJQIitAReorlm33Sggl1Pt+r/r9dhi3fvPWlXW70OBWxe9ipO5AuQyuMpCrymSfygyOS5elVSLrbZa/vqfDDDp/Zk257MbIoolHb1dIqvfqHgSN4upogDYNJjFLXXXuWYdzOq677dwIgFj+aK21WgA2P3wooZEANHla2Iy+FQiXOT8LgEqr0IXuHXU4ZZt2VwAyEAFW+FZzkk+j4DB2z6CieZdIKLpggfa4JimoW5NYgLdbdZrjk/ys75vf4vtrj0Fhj61pIz4oB3/VGSyGkXi6dx9AuvXp4/vna5J7eqob5QIgtbq9HacPrYBnD5VDTRgx1zbqmrLK/eVaIwAjR1YUcnHMJxE0IHADG6mzbeeWdCs3b2+FUD6SRVWUAqusYGYOAUSdyerrh4Z73tPhjKnu172X7PAZna5bZsK7paq0YRTEqdKCqnlyz1dVEzZnuVvJPW7lEdLS6Tws0wKhziYcSdDhb70n9+4BJnUBnVMbdP8rCii4JbTJSzitMUZw93Puh5UGlfXv2x7Y0pjCwvhPj8D5mTqS4LWMAkvLkYJuBDcCsDOj9/7Y42n/oUJ0Pgv7yupBzvCE84dmhSC3WsDkTOAxRh01s9d/rrqn1rm2EVSS1gBiTk6o65yb5GlHzfCiUbqTjQZjoUvyQmz/YaJCJKGIbO2nUSZnzw9jo6MBHmqkscHMSYP/2fQkaIQ4ofdPmu78JFVU5x+PBmC7AZ2rZPO+vQDsbFNnuzjbtJHDlnaMAKg1ySRqoqpT+KIJiD3iRv2DAfsnhbNe+0zUGEJQ2tbiOF+PNhfPyegzPK1ITDAYtvxUy99fnJQ3P2XvfrlZhwkYOgG3WM438GnUPqesac3dJ+NDlx4/69xt/vGEzMb+YUB+eOJTmDIrjO3tfX9p+gP4J580CoGnHwE3HTt7HQ8BOO5ti9CmpccJFDVu4Dz34+17VE87+RzW3HYcwqKAoislQKfIuY8+z7yidSIno6FRsQKUz2CZYGZUtwYNuvtHl3vFSGHUsfI8tvU4xTNOaXvhGmsxbqykmsatoceE7uff6XedIo7p3FXRVdLB6FDvoJcm0PQAUBqL8JDn3h4bUu/UrXEGz0cPAaE+2ImdXQJ8F6GPnx5m7PH7ue6TCWwz7NXZqnODAGwQ+OEGNnFwINE2AZv9MGp/9h8Te2r0xACj3mv0gNw3y8raVLCz/YgoGOwp6MKiUJcFNIFamD5W1i6MMgwMWv3P/YKsCLeO2rZ7jhWMbl3M0YOnW4wOhbm/waR24seLs330RH7qz3yyeb5Ia9z5ltzfH9H0L6Z250+qZAZDNYzc6JKcbuHfVdw3kelR74fUxgDWTWSzGVCYl1XAyc21Asv5jqW677WOF+Dr12Me+ykDrQFiQL0yA5Gg8z3R6WC6LioRKj9cXteVybwuZlCRl93ADCEi0sM8AhCMzEyDMhlVeD7tPNrCKSKEyMnV3uvH/R9dSAYH8PorCSrG8z9Lu431Ni5HaWOU757q1tUFOlLgoCkKRTwVVnN8AwIneuOV3+ul82uzB9cs/lMCbNUCNsQ8xlpPyz1moFWcS88jpn9cy4faXVBJ9X697+u6l4AhgmAS4r7d75GRRBWYYlYx4xJeULyoSC6GCgG6c5dev4iIhbW6sRbqXoG7RXOOIlGwyPtGiYrKOekIRbBWQLWgu7juO6+8QZD5uoxI9fudS3oHq4C6cFfGvUIVobd+xT8VGVrJikSBNwG+azlAl8EhgkoC3m4WhzfQJPLg3wfOtKXi0DlzUKEdwmFySyXw6pburZerqk3FBoHtjzTZB4GSOsB0tQqoKB43ENAUInCHoXBM2Ay0pYjbzX60kGxJdvC3PTSVVCu6ltk9wc3sk2OYD0IaseUkp8IKmI9yy8FoPvJ2PN3BG9LU2VtGNbPxXc/p7G0GQ2x7IHaXWoJT+hgUMoEQM6rqYioypKARjEMBmR3Mjy7flJxatAAEU3Migkl7qpWFH26cgtH7k5DSYf7tUvhMPGEjZY19/i4AkFBRs3CEa92uNAiKYtRWwpRUKNWaHJC9ABvEbMXTRJNT/Mi8SCDQfHWbAK/9/XXf7/e9lmiuEicYYM7VFytnXQqOgdkx7jqxjnM75+i3mbe7rPZIxBVx5bUypLguM5UZiOCbEBxKWDcYhROYZ6CyiCx9hftilBacvXbf96rfdzn1O7CUoUUpoIXFt5Ch5U4PICsIrp276ECs9yBcD6b6MicyVPBulIJ2oGdXjfnPMUkEVs89QdLHGWFSRzoZ9qcAHE/Ps24q2E2immJ+HB8kCcqRiA86qw3fxqqYzT/bSYCTMx6m/pj4iEg8kFALwOrJshxRTLcJjkgwiTAG2DTSaIIP38sy5NhjSapYQBtmiWv1rokZXWGhWEKtBS33n9hIlaTbkTbST+sSx3DLuUIFJZeMMuikZdJVI71f/DMAmTOp1sLtkrhT5Th2QBNBznwcIA633m5dpj23aq39pE4GOD7JCO75P2DTe/kiu7aeJczh5h3fr9Zn8z99CMD2WNosbIDJrZpHJDbY9QM1CN0ndNkRO8W1KjKALEVkwInWobCare1EkDFeQHmnHOrnPHyX0oyj4Y791Iy6cy2ohydiByPT8TStlDUcYZw7idlBSGH+CIguRqbJIw8iMLcbOMTRoWe7jLf6AGg4VnY4tv3iVvR2OedUsc714PYg/iAAj2sRBK5fXbjWlftdBetgELiOALQZ2SSI2OmXI5cPR4ZWoe3M+h8K1IoKx/cPbIFzZ7jf7qVEZ9kGGMnrFYrrLrIC4TRirroSCCiGYz3+FeEuHkFEBikyJQnuIYZSg/3Y2yUA54zb4OfrahMgMoV0xFnJVVeum6pk3fhFKjKwWgO4gpyhjgbmVDPbcLmaIl81UkF0ettm/jlQEAA7NHhWzd5zKw+fe/gwAaiqfRFsZQaHWifhXH0ui1PCNkoXeARgQYu7MPZvgVPjgCcGaathqteVD5ICYmUFY5sma5IhUMefsJ0n4Do+kpmRmYgriizHZKoAu++OvwMCMq8MZmYqCUVeCueEhsCXVS2v6JxYkHFlRgNQZ1O9LhNQcV0XO1QOull8BVAXQ5kZLAUV+EX9CwGwF+DJqTE7QAtAZ4SbI9onFPCB1LqrrhPcHppWsvEaRMgmJLvv9UkTn52vBrNntawBLmdqapjg6JL6BT2TAnuxD485rqqwM5D6N5Na3sYO6I7cJtCAXYPGzlozPFkucwtsTeB2bCucikXQTGA076E2o4HlQ5aKPXsDW92X0KFUkU6gFyOmPc1AHpAhZYCqSILsuKI5CpKhVEdpSDdsQShYAV88+sBwoMmASADg1gCEQnIPADGToot+muvuafwQAJuifpIujuA2nCzTrW26Zg+SA+qOQuY3o3BeP88N/NzmzUQ8+cTHH1vJtMdoXBXqEKJ9znLTVw+5HBB2MgzmUdCFdyda4avXcQwPM7LvbuEuqDAN34ZY6evQwDIqppCoP7kNnoXPB9WOJnSwAHBTcPUhgZ3RIp9HoEItlsRaWFyNzdkn65px9JFYzvF2A1Q3DfMQa+63n2w4od5Xba3H0joLwOpiHqTf2mCcZ460H3WDNEyUoFH53PeaJx8N0IJS8+L2888UPu7uqk2QrHYuFuZQn5ql0LBb47U/nnrrKb/TFcPjnWl0Ht172GE8p7sBgNt3PITdasbmgWIVQsUuSB96gIDbMUBELdzEWkuqwnKrju4Gq/E4QCBSqOXyuMpO/SKBZILZscVu4d9QEBjofaL7xmoPtHdmYlOAe37I6DKVsglAX6wdHtVYge+vjw3bEBRAO2Dt2RA+MeSBAbaSMSJITjEYCZdAf7tLC05On4ErIyPCPJ5X3dWzgur3lRHXffdD2g0g1N1jqhZdaHYxgatWRb4Qv15x/boU1ypESSkqlqLiH39JgTvKVZQgdOti1+5FLF1RUWJq8V5Vok/5ifsWBVW5YFxL8OEpr1+xwMzMX5dVh35fC3iviFpAveJdr7jfLGXUl/7BCxmJlaxMLOGWxHfdQLB4EU4V9IE/Pub2+qvsd8We0gdnzFFwIKLGwEfz7BGGcrUkFTNLhtkQMTIxGKBtrbmTrtCkGTIGwWkWPevZUiuMj/ThBUwgxPb+kEJbhq0mtjIjHIIATetEJ263ACTH3TH1QboNWPGKIK6imBf5uuJ6XYorCmwBKCqK1yWRaBazs5yGP7EO7YjJtopt87dD9DCV1tbRUaXMdI50XWWfIlsAWFcSFgDpxRuZgdwCQJXoZm7snoAPAUhAykQCf7N7t+4/M7v9anEUyCh1goPCCLuBTwzQzjfHMqPf1d5xawBj4F0A0iZWk4YwKEDbr/P/R1WYMhjeS4wA16434ZR61vteNclLO0Qx0JFummoQp/Fh5kFrwRnktVygWapab0LEWrUAH+Du97sErlbInAZRFWstCXLzjXU3n8wo0D14pVrr/TsKvGqVVh8M9fUu8V6MWoDuuOvK+2YpWbeCb6y0BohECUslLifZighbLXZnANOBt8kqdzvh+NNb92to1u0xjWw85x8AtKa4z7kGPjrNCqHtyTcN4BntWMzFgXcuvJvaQJ+6FeKHBvjI0vKwGtAHHazSll1tjuvgBdiv1TEB7f46MaoghzwYzQOYtXGaj9w24oyhCWqytuxEdHnI7HMTQWgMzW36SCJkUB9gE2/7uux6DGD21HZytK32ty3LB/Lue29v68nqfzfPvdLnN7Mg41N3VgH25+cN3kgzJ5/q2Ks10Y8PlubcSMA1wZL2GKf/U5FSFk4+gB2z4xHOVfzBMOjDUWsH1LamGnQXk8s8icuN1ym6pANCMm0h6INju4KzIyWMvZS0UYQgVudzuaej/UCFW4GOgMA1QXsiu+Mh3B6nOgFl+p6GweKQMT2deEjRiFmQCmp6Hjh4R5DlMjQPu5pTknZftaYk+9st2PNDnNnfIJCmpxUq1vQJOWLTXzfw3EL5UwIAXC27k22yawNrcVLVx7P+AIFqDTDdCAKayp4KH6JeI1ZOGTYSZbqpnTVASwkJ0EWzERwN4ASQXR83joiqTUCXk5Crs0VnYxBjLF3f0vazquxOQXL9ad9V3SJctarWQhExzuKSe/ly3YQWUEjXMVITVXbZrbRrih5rMYICwqdYNPsxTmtNpf5W+88lm9WxkMUAMzuHCigetf1nXfz8ev5+UMQ2AY4rHh7At5p8GhSgzMpvGmBQ2wEv0bbaQdhaFS21nRs2AgCCWm4T8ICSz9eQMltr+jeNHPe288fH0+UYoyEUH7pQ80J7mBhiYc8EbC+9azcjRYWDQqV9XkmpfLzZ0hKxFiRoLS0ugN4uIGoXNQrohMxqoOvfO866VtMLAKbpnmVgJNhbY6rL0FoS21M8D6CnAOj86Wmt9lwIMP1PyjmBB9gN2Nte6NazxwB56c6FDTpJOJyIPXILQI0AAO6V3yquL4/tp7AJv2l97Pt0bsEefrnCmCqJQ12w7SBd0u1jbUUxUskpromu2eqGYtkQsN8dQKim49I21DOZhbUkLUCIG9C6IaHqxk3zWl55SHRXkarGJ81qNA/RhFL1AcFu7jA2U481Gyg/tsYagOyOEmoibXtdM9qWdX6u/zcBUOd76CLb/bfotQpwzvEK7YRIu5nxuFxfPVq/twyZmmSHAZpHb0IMtSQMCOTRSecRzryfuBCes4AtNdiOwoDgtiBpt3s0iPP2AZ/aPPq47a4xppwT2OfzbT5mz1r5BCvUAou1NH0tWDcXF5TcAtDasIYgnY1r/mUU/KbxzPPh+F97/Z8oY6yji4697i5VO72mLRcP7bO5hC0nrWXYw0Ph+kf7kh7gnPoh1R3dsOEIAD4PWGPLq40aQRf5joDKx2zbXLqZcbVrtMS5adO4PrW6Rv+R6XypjBWBukDP66atHPa/35ArxiwsEpMRERUEcF0gIzLBiPwqg5YMqhkhZmZmqBK1utnnEsnFqgWuuqW67yW9bx/9qWDpSi13uS8Wv8C8uGIsPgLFtWz+KAczhl5zsj3gvmlVWzBmmbYww+oq2gpOyJkzB1FyOubj9ScXRS0DbgVoAXA3HhWuRLXTKGCaLYM1UaFZ/2/eC7ATxJ+uT6ga8UK7mze5E9wloegGEcMiSIJqlaqc7weIXERAWaGctkKBWsC9SsUqQauYVDlDxoxAh7J0epSQNeiq3Q2N1KH20YN2yW2EawpyxFWrVPdapfu+axWl5AKkZfRXJG+zDp26ZJTmk6+AQtooeI6ldd8lWBDcFAdwIGtHNGKrvzOzWxeEi0OBTec+YQ/21sKWK+u1j6SRuTh1+bj4gVKVkx5Yy+QstgBsXbtVY4vV1lujYc4PzTsOwvWNNwZ4yGdv4E9fVhucVRUgroLWEYBa8dR1jUOjowGy1xKr+9HLGUEOoVWxgH3UXHUdzKpCRR9A3V+7+matVbVoYqszcndyDqrPU2Izj2LVFEqMKadVV9fylZN7fe47SKgb4IlblMyZdXJRewPaGmDqeT6aRKnTO8Yf6XC7yTQ3JRu7QahcGbTpRXLSvKvWWvda3WXLq/awFkcANujcqa0eaU3sisIkezgZ62iMCXXhY92x2SO5Y4k70AtrFbDW4D8vn8KpJ5CRVxSi6MNeJLd0mRVpuey6CkCocguKR1HQDKEFWD13u5fq9iQHT3EDEQ2e64lpd2UnavZWeGzusysf8/JN2faSc+/uY+a3OmttbAno7KM20c24Tv1PZybNY1z1IQCePRtzP+IZy3gmH25YD3AvGx/vPpwEm+Z5PJg10jYgexa2z3NQrKQKlauOYEa9ptUrVD7GnfY8mr+wZg2fQha+3VrekitQ3ayQrCpalGyFym1yWJ16OiBwd+IYlNUr0DjIdVGacbPoolMIrPB5mMQccCrgEcNuvxonsDd9RXqxKyCh3DHNcS/IHtFWupr797407aIjWtr/GwhNYMLBnmbOe3kUzm4Ny161/jrPDR0/1cqh9wO3isD43xYetnw+QeCGAmHbawbaPFBMJsQo+9obQKUyye5Kf3LlAoFgBSHXNoS5+OK9WiUV7+UKYXbR8H2vqru01psRqJBjlXUwQD3KsOb+Z09v1+F4dBoQuHXEzNhMuFUc9596Xo4ynV+6O/YxwFvBHJX1fOmo2R/M7cf7IJ2EEJOV9pc54wlt9NFK4lj7xlFqtQN0g4stfyVAy8mB5UTAcnaNZWGgqbziUmX53CahvYCIuMgrI69QJJeg7OX2oAdo9IT1rDqZCHy7JCxArFjVMxJY9xYAd7G5l+quutfb50hlZa3kqirU29kCRksqlha1vJcWF0tB2nLHCPagE+ei8HgBM4FHjj2oFqaBs9+X6uOnjagGHMLmm3CW8WAGneDHvPlkJVgtTkLIuJr7RBZVZL6qT10hHgIwyMwwWIK0qle50IekjgZwRlBJMqN+ZV5s1MKtS/dhfT1PiCDSOZpERkRSwXWXqnNFG/ZxdFoD9847czPa+w2RcVGBjN/LOCSJdZdMB12vSOpepfW16q5bF8nMK19XYlWJPpnWyoCAuIDpr2pNjHIfYDevapvbPI23RoOoPuK4oFpLclfk7gGLzuYEdyNiNrXAKDiX0jTtGB7AXfbVdta/LXaNj6ZhBwH1cYBjqJrV13X8yM02RAsT1K23tgAcMR2PUk2kHxxQ/fMo/ofFlHsvvSbDfTSJbNlrCwCsAdzyg4iIDB/BSWeVsisL6pbnuM/hVGXgytIUfYhAXlAw43YfToZ3fABiXFcQWCqtr1o3bucRX5lXC0DUqgVSq7rQyHlo2Kq694Q3fEfmnCaIgc5+1dny3Zalg/BjIojH7nd93xgS1Pb7OubS3LI3A/mAZp0eQRUeCgBPDdDz7FiA5oZOGxht1Z/abDDG/H9TTw1Fu/prXs9gdjXd3lBiw5Jt1nyH6H3TGQU7dGceRBMLAcLB66KWrGRHALQClXZtSKgIXam6IllUgcWA7lVFSsEr2Ra1fq+6uSqAiCvjulKlQqhqIdNK2+uk2eiNPQ8HaAs8lSAw7jOi8lTVqiqFlt1QaRcK1Fax4nykm2RG98Nun75PrzHhyGaaG1Nh1qmpGK+egCi6tGjWUBQ0beLalBTNiLZ24XTXG4HplT4mwE46mtMGw+1zB86IsZyofAjYINP3P3AGsSg3ZXPvlAgize4LGZGBilauGEjSnTyxNC3W7ei6MyFJmojMqpVBRN0kGOyW0G5cQ8iM3O9ay1ib4Uac7sPRvQXdE51ILFzZy6ZSMMBsZ4Pe6NCgo6qu8m99B71XST4JQqtPx7TD4BRxdDOopta6PywnHBxDVtOSG3H6lLTFQY9U7q3VNICbGvRW7CNxStfvpsWs+bP6SIgWouhqX3a0oMbdkaUQqlqES2WESHGJCJWyyGIk61Kf8ReKvK6/Lufl9SMAUKlue2Le64hM8EIpriglmYlFIO7SBQaFUmAlhYp71R0sQBkqMVShBUSslazl7mLB634zIisQQScmKmTParFWrSVlUowr8/p1oQi+8HUvvBLpkpbkrVeuFUJQt178qrxS5eLQEm4JubQIyp1BXXTu80OWWBXJG1jsVmWjDQcS2sIToNPfSAIZwNRQiBNT2ckRVkft8vrTNTtOkMJMRqBgJsgCsDq8Zu06GmBHYjocz3HX7bR28A1byY8psFG3jRwl2DwAiLbmOVmNTg2hfFiSH70TQnxkJ4uxtdhUEXSrFH3cWubRiFqFWBXrTUXU+4pbun2Ive43MsUU9hnstrCoN+tda6GUFHejZEuJrCloIWZXqQFBVbl5WQhBRNo4PyzEJFg25bOBr7ptXK2uOduwghsV2MKoEKR8aqCXQ3t9hxfGXqDvLz7+OLzxFE0D12C2JoDojgpDUBnQt9PeJMKBAT8AATEr9fFqiMMBkedjOlfZ1+yCD795crlJLGqt1SVLfsdqjrM9Ekw7kv7FzLQTQ2KfWvJwqMeFnLcf/7qaLo7qQ75Bg7EVS+HkF5grXo5wGXtVoY/AKIioGo/MKBJmlbpGdEiS2T94zvJj6UYnbIEfazjXOH+R6vm+x5tmljTGCFL3Ct4X6be01P5wR73IO6LZ/ueOU2yvdKDjJLV9C0NoX/jA6KFOGvnalRkizpFGqmq5RRbVjWijsWeBXRz06Hr4wKQV3Q8qphS7CZ3zNLPbt5jav/KaoejDQ4Do8ICB1ariEmo1pdNOL1FrAXSScMyx6Eb+ZylGIBsjC+7t8VzS0aLOqWpoOL/EPMSgvudeolGphlDSfq59O7uB3PUAw+PjAf+5MQB3hG2L0BlpTtBp1H+A7GRPhGJf1+/Zjybix4S4XzMLhSr04VxY1LqX1tYAbDKFOIu5BbKxUxy5nOcg6RxvSsDkPwRk9Lul+LkCggrLhC+5FFx3ClKtWlwubkhqsoLVyS8mhBvS9QlvtVDddnYv43OpZ/H8OW+jCALTKrZ3FGmsN5uIP6/ybes+3sgxN1cLwhnF3pxzfzxnZI56npOJ3LbZTb0B5zri2+z5nxFeh7gFGFfDW2NMGZkE5d5QCp/kEA+b9e2ZSCiUyAQFZZJxJYtLUgaVNJ7oMmNERKQY6cNrkJfRvc9vk7odlV8AMOmbau9LxY4LPQiOLcPayntjgKHRvQZdJ+H95G+f8sazwpubRaCLQ7cANK02OmEIMW4V7l2upgsezndH8DXq7iK64ZZZd59NNCCA+c0L2AZrV7T0I/gO3V1ySQsUWYpYVbhlZoRIdN6Wl8/CVykko2qxWw6Hj+12aXcFEClReUkrMPezg2Y6TcNUtDfcPliRjurGw76XUOsWsAAusajVHJ1q3T4HQFHLvWx0Lx8TZVVbLPdC7EkfZLUXvVcxAcCnBkzrKRbcGrS7R/TWOCt5BOXhCbagtyPtnX8+/2Gmn5bDkzsr0wveC6iOGpeu/9YEnKvAMnMKxF3QtaMxBOw5u81nA34jyFrrfi/Vet9f73stnxdVgaqItRJvhYrMuH79+mvdcZRe7xlO0H9S4LNj4oLcyyvwYuX7riugmw5rLy2hplue/eHkFQKiOweoiqGqSMYVZEjyMYY0VwDKobuqtRb09SVGvDJeX4kF8MW7FuoVVQADt4or4RtKEJeVwtkIAGXeQFSBy/LRyL87ptllUVUoG/g06n+ogJpjI9EbnbNrtofF6ZbR4vRUB2bN2YliBgXb1Aq1dP03rVoawid8OnSTSushAT2AWl1k0uOYWhyX1qz7/V6d7woF1oq8V+DteYjrVQWtzmt/AJMknMHdSaOuqe6ybhLpVg9ZYr6wMLTaXfLRbp6KIJm8yILSPf5qMRjFywIQXCDdVL8TiQGghqQM3O9ixspYd6KkvKNwQ7jWDTKoUiuxwI1gAUt1mXZVb1azaEYJXcrKj4ak9zuCa5W0stfOze7ap38o+E0HdYHLAC8vU/RJWd1vhRsHkoYs4RQ/CdvPs2agSF2XYoXUuWctACSAUhSCkc4SAwSqoiYRxSzgcnS6EijVCp9kQqL7KHOcysYSwYidNagB/k1njwIcZbjxWwRNAIf4dE5kmGilGaP6Gk9uF7F14wyF6N4F0ni1xy98OJBj2+tozs2Fc2A5Pl4bZh0rtefcsSLX6PVDngf1h2eq+vP96IC/zmE8HYlwLjkyjgAMim4BKDfQ7AYYfRwhQbi5uijwCpNh3ZIx8whAiJrfuxGYqMpVknzCegkVUhGV2Yi6TU3nPCz31ltgFbvHeUSOczqTzTSgHbfMZZ4REZmkO7S5jVpjkY8pb3DztKHqqega99WZX9rHTreTWNxuX4fpULcILKIWsAQkCwtRrOXFKcgHAPVx4UDnM44GYI0QSl2q1wbPt3Xo1I1TTEfvgwu2D/rNA9/+9KzcRl7cIrOFqfVGI9DtNrSvZ998pu86rnLL995GXvHjnPdEn2l/bKLBFDW5NaZcVglrCUvhTgMWG30XgM59mSvvVhmPJZ2BnTedv6vn4bh5Mx8DnHyVjjUeyO2Z/qQ79Gw238sxsElR4YwhK1AMyyM5teMwHFsAJPl9IvZcO2gRTjJRxWDy5iAfQi7nbolHQDCu+AjVWZjHuEfg8HBThnjanrd0HaVXgWLnH3JjaE9UqDVAJ8YcE1B4XvvnkunxjVCSs2UfJkBqllfGAB0TDRSpCihQW1CeQt6SfBLoW2WGcZMYLCArA+4jn90ckVwhppCRGS0bgRQU6mpU1xn2ASp8nJvwsBP7mISt8waZbYw2czKNVnp2uEX27FGQ4hMBfnBsewPT1A7bieLQHzMQbG6xQ5TS/mYcoYfluloi+JRZ/6lq1U8Q2HvYPX0aAziB2Imkta+wB+2Hcbu3LXvP9e+0VafyTtigT1kyyK2dNj2i1EE8V3UTEyUJEYurM/smjQfPm8FamJBTeaEyrdj286Ejx6xi5I6gjyvpWkUf6SDCbvksWutnG2pyGuB5TZyZEYdf86UmAf9DoVsDeOrx3Okbjqh7bim2aI4COU9LZwQPC3nMUOmqKnfpVHeEkyEcpPL5Uyq364IUPljEn+/+33vLe/c5xxYuTmJ4SULdV/3JJzyVRANlDTMEdWeXdq6MAcoeRkfRvW7DjvaqeKNnSCFGJRfiysB1XYGAmwZDVwVThbgygeISyIskdVVFpF+AFEFXfZbmXFNEN7RWHQzQx972SZeaZpSlHwlevcgu9SjOuRYtAo2cp4jJTlk4LwbthkzfGLSmbuEUhFqTp+9wsIXSNsewDQXWRPN13c4rp3xGOrqPIuzAYNcL24RismNHEziS7R0eishEkSzT7q6MwwDcHHzpfd6TsXXXMRjzfMA8pihE3fda9y0XZsKHVD+mVcT2eigxkASQVzEiQ+3REKhLWwDIclfsVwRZl8SMzHilC7kTPqpWJTEmG0gAis4XUB962Eh5cldsRUepWK2w+0EwRVIJgs57a62pPnvV4B9DZru4adx+68KY0P6yBrEA1BImJax88m4ArglwCUEXRFg+r1VadpkYDjNP4XDVXafbAq1mHiqkQYBJBwYI1rqKyrXKJ88uASofvVVgRl5XRnTmHMdCdc2ZvPIWXYrho/PQyW2KfepUNchft+EmiLXnSeByKZoL+Nhn+oDdr6NB1vj+AyF1kRASYGRmXG4bt1U5RjXHTlxDx8fbkLXi9X3aztIG1FyN+uwuAdcCqLWEiiuDAKqzlo8AEJ2WHe5pbw1QrnBp8kS1mkCyODjdRJMQAmpalVvZSyM1QlVdpqcxMzEmR9Ba96oTTzHp24bFfo8DyQ6c093yGEBFOg2mpcxnp/lSGcTkED9cuF1+aX68LJIo9VEh1aliDfsmmlV1t6x1j1v5nMHl28ncXfhgpzA5G+NlaEhsWyvqCrLuK4m2AQ41hohgXrPyopAJ2zQB6VzHboEwUCFCgg86fRwdK7hVHtmHpq+CIjMC8lmRArYAWOIiiMhkt6kIVp+5GqGuDQDEGtWKZlcdLANM2vlVmt81rnZ5+CBWL3Tns1bd71U68TTocdhWOwOWF9V63yLWfa81VARxeJKh3OQMos4K5oadVhYWDIJQGVvRLbn71F3LWF1uEIUQ7Uz37LYLEJFQtwMpshCZUOSVyI7uCuVm5AivMsUALucw1QiAy8hBBYrXC90HQCVkWhoKYAKBzJ3wCDIZ6eADQURyVzy4F01gIpggfNaVt59NwIGFGCce2zV8oPntgR04NQhbA6u++YftrB2X6joGVHRylOO4qnXf95q0cGuAaOWADQE7LFJT5mKKLIos7hoOaHL0uql8DV+tbgfJPUACYtkGILgmRbrCaZBst6D5r/5YP6wHs9OZTDFyHyAWgjP3d9LO0GeGKFDSceyYQ6Xhu0bkOaObwuWmNFEikqwtAJ6B8NnU6KzcpwZwjMypfS5UlHuiHRPAPnqkvQ4vQHDM2KCjs7968f8cMP3AoHuz96IT11INliXdGqViL4ipj7l8y9KYgNYqgOsCqOXzxhsasstt5WxPuZEOdpOgTk/12bDP1B/QZ5AHmDlNooJMCqgTcd7kX/sXtrbECPgWerlYLGr69at8VPHoIFDdG9Nmkwg9dg0eYtVTN46nbx3O2dqcVLeM7I+ERZHRlaUa9NAbmp8XxzhF7Z4LbgfUmc/iYCBiqE7rWKv+0hzBjW0cOBhA5eYR4x2XvgmAkPKxEqpaay1oFwcBO/dCOgJwtuFzbjq3rfeaq0C5F0rNexjcS5Y8TJXrSUsb3/hBUqhV2MBmM9vdRjS6FaFUPjvUmAz7CTAu69ldYM0JjSSiT6PatA96/OOoTAfbj231/cWxgBv9D7Ew1vExjhkcQ8+x4Qyg/aH5XhsvA1NjOERTTa4ZGq/1u56TN1sEkq4UyzmelpmuC5Dqvt/3/Tx+FaPkhG0B5KTvapKGkW1OuM80MiSZvB6tKqxPDAAcDGABsgYQHZrothUZ01G4t8eD14AVDLbEDFcjxJVQxBUIrE0JLYQ4bcWICETkWj7op00AEd1YT7yucalDNgFdktH1H4Ok2/87O2XD1w8BIYBpGsBk9v56KJZQ8wUc4dyQIEwMszmrAz0bz08sAEf/N6swpPgMgbpucxVjCScdQFX3/fW+p3OjmYoJ5O9sPbkvMMw4MJI+EARTx77DLVMR619+FwBo9e/tmbkPvJDJ7EFZAID1Rs3pQO+3vYDooyZo3zjOJrUj5fMbZN3SkorTi12nBVu59zwdEwPYKru3k8JePQMV/i1jqv+w1fb2pCCvYIfse5v7gFIVoKj2FDH1ABNQtva0mho6wKtYrf6aCJqa//YC9DABBnzt6kgbkzk/gAB53V3w4u132sRprffXe9VAUhLYLZs2HSBhnxsYoFARF+njU+z+k8t8orc/XIDN3jrqqgv40KrHczha6Ttbx2dUEXVDxeUGEe480+9ye78MN8/NSJRC9qHJPkGumXFUC0DLdIaqqkLLJ98lT7eqciZPrTJrYdIMo+MxOvbTGLTh8uLhaPUKFBgoLPhkmVGwE3ox2RD2TmVVFjEnuPbpcaqKENy0TG11Gp+hDcQ4Gi0AuzdFk1oigOufswUhsYuwTB3e96o1JrR3ZnQ60oFXVrYKmf98HJPX6Gmm0dZQRBF3b0JTVwLRXTMa9dTqP145tKuCGSA6SsTZt1GAwowa+Ov1uiJjqZCRIsI1BiAihNBkjgcBH+jW0hta7aj6zK/YvY56rz22OOZxQgIDKUQCmMSPuJI5HZwQmfbkGBioGexuOPfqFmYQ3cAc5hFjsnqmQCvQegT2fQMRmPZlR+C8t8cD1MPgPATAwIGApkVMv5lqHhDSWmutpUEkfuBWRDhOaK9vR/GwL8ZGsuUGemVwsTWXrcRIik3eZEefAO12gnvTjQM8Og5wb7pMuD3hdb2ujAgsZgQWo5SZBPRycVkb53KNT1PtQoTiJguxzONYB/eztFvcmUi2hsfrsx+BCWU9scBREA0kqWMMOojUaNeR4N6xYmuAwTaR3HjS+Zij+7TxV8+LPjTACMCHBhgBkHBN9M6SMy91ftd9slQeAjArIQnMyAgshMLFziSlZPcDdcsBH5gFoKgF3c41UZPKXXaHAYyo24VPrHSQgVKLiM8lGNkJC3sLAOJ6Xa9MXlhxudHokk+ewy9yBav63mAoaXwBKUIrq4prdR4dApLbJUVUSeFz5kbNc3D8RlWj8KzSR61aPqrz8riKZUW9Md5ECrevhfMVMMLfj+yQvC0/Mb1LvgkA20tsE+CtOfUIhwwggOveg2wcZ7FS1f1+v9/VS98yEO0fHTrKgRP6YI51s5Akpj+AUyR0ngSqJdxiZ/ULq7pGbPO7mARs8zBps0u4J8BYpKawe//ShzZmZGaY44uQMohkBqUrlpJEFsVyivKHALTC9u6OqceX0+WqKvbWRQw0F2wpyi2wBSDIzOAcjS230d0kjvvXBWBlGwrDfc5Rif2IOPqRo6Db25iyviKqatVqTDgjeJiAUZMmArR0uoU3F2cNgBYrYOoMS+u+3+93taS3AmpsPDDNm1RKnxOIVdDuX/xNji1BAx6Ol6TGe3u+eCitIzmDJeid2e97MkHgBASCfXqHihEKOpjsXz1abh0T48+b9suyTW41TcHlku0VPO5ml2zPT3hOeXoqtHEIc0KKAJHlTH8opERgPBifn9pYjB1L4GC4bUssDGxTKXVOzlMA2hAP1dpkRMcDx4MeFXGljeo0fd4+f01pxCgAYspoPgUgOpwdQYSgTFexOAkCyiikmvZrnOB+ARxD4hzNozfQ6VWKqp0I3QLxtKy9U5qjNCfpxL8hRhpTPHAbNiR9OsVPgeXjd2zJ8D/t+PmZnfLfItKLpAFL+2qamepJh1Pi5nlat05K3JO/9748l+K5KNv1bxOS0kZgs20eGmBCfc5M7Ztvh/vKnnKyOgo/+ypG3w/62OJhPnwAcu+X9o0VrpSNsMLuZIdZRDlx0OZyMEAz+2jS2BjAFWIwvG8iyRkJFhQ4pOWCzPaRNe3MW2c9yJP2x2NSrvrbDiKai2TMEVx78tSg0xiqZ5ASllfcZspFo20X91Ux8c16GMLyqVqPK59ksVZ9Dx6HhxeKh8yb47RmsUrWkbsReLPxpob8FtusvSFAiNMlrDXJvsiO+NONSqyDXT9FoaW6cT5JCZ0r4cSufkKqfGaXYwaoyUtxEsBwCU1YDAicCXm+tvVFo2bMyj62b4PZuexYG52cVr+N/T40uHo48ePGeH3ni/3I6eDJH3ceqdvb7QH/0RfpFf2D2jlK6vHjaAA/QvecmrWxKT3RQO1VmVvOIzcMHGl+erIAgOuXebk+wgi9m6XqmOzn/1oBoFU4ZjpGNElFRghNvKJNgDWlkwdqNRJ3rmKVD/wYGyaJdZNQgXWp84gq/RDhFmDuGLhuP9SVJIJIoWHVyhCwoCV6H/hAGbnS975d+OSKJQjp/EJTjATC27vqrlKWSlhr9UqRfUAcfOQt1DnvkowX0ORF1zoZmZuKdONKx8e2N9vIDI+fjuQfLWZbowhn9ne7rOiWMqPenyDwaIBOrvv0sakrrUVpAeDAl1IG5YaLmwnAo2pom+4rMzq+Z+3OoHDHnSph3ckbKWd8+CCdmMNaqqnSOQqhM6DRAiDllWBGIFmRV/u9vIVVwK0by4laSR+HfkVGdnomKBSWnJVBZ7zZpWctpKRQuboCp33XfVd0xF5yR2elgMh05v/SbAHaHo9BAaEFl76wT75w+x1AdlMARopIbT2gs4UOSNFovEEvD9VAjM60XYs4DGpveTaA3FTgFqqK4wUQFK+XSiV0k50tAFFQXas2CrAANB0xAD4KcOaEFBEKOR4gJTMAQR1cGKTvfOIloBNk0O3G/LQtAMP6dJZBF0Jsy+72HIqKPjCjsxIaYukgpsG8aB+mLer4khg2B+OKt++KYlPGJ6kltvxMwxRBOHcem1N0OpL1TWvlyfZnyY1Nx9Y8QMeu6nu+jrmbURq1DL6JSGmwa1uDgdtj2KyD2HAHgttTBA0Cm+zQcBczms0jHcjcPUw1fzd/uyKwxItaRXGFgMXuDtjdAmz6UaveX3fdgsg+S7dkwQLkIzyPAGT2iUGv1J3ZdiOWUAXcWstpms15EIuRyBFRq2G7yVF8i6p6FwtlW728NQW9i3oviFrvbrXojNsFott+vN+yI1vpM+Fmqhyt73imgm40H4CqVgmhWh2xx9LFYuBWAkT58HN0eoiD+oSZwPG9W3sYyoJ1L0hVEaiqG52NZuCAMQNbHkY2qsPjI7qmKq73hPa75/RkAFt3xfF5gT7TuSmbUU1+uzdN7fZ82TmBKsLBoGVMKJ+8vgVAVUJcLj4Uy3zUIoHq0v6IyOxCXltx+cgeb1eOKzLEfi9/LRZwr2AZx76LQt3FgjGAW3xGo3h9LVTq/bW6twVkAaC0StT73fY8FD3NC24cUoZNc06SPRNvE/dLrRG1ztfQLQGhMutkKVNPJQ1ARgBEouJggBYASXTVm3qbay/5FoA2FBr3qw7UNcF4BMC+OOeIadWak//a6xt9xZlho4fMDOIOXlQUKi6yJoSDkg9ZtffLIpZ8+hvCHt3WAO0GlsCK1gBXdnH94Hkf0SBOrZBGlY4O3zC1HZRRrK0nmwgZNdryLWfod0JG9LXGKzfoRpmUMNWQ7m6xYE23hqD16UBEgEVNW3ZzGYa6ChYSQgqBpRwNwCrTigqIOSW2vQ1zBABcshmJC0QTC08BELgZFxsxjl837o7UrhAuw2DOBG0N4JKoNfk0IChkPHZYeyCZEVQUk6LzyKfFcGuJNCiRFAQiyhhoDmuoQuQsWsvV7b0QV/rYIIO8fQR0owYbmLIsw/HnuFsdFQtLfBcjKd7FG5TqXRDum7UqFMv+SWWwfpcqZPqb3kGlhUDUKkZNd1XoI/mj99tMbU/Opxv48cNG/ge38fn++ZYP1/E4j8PLuI7HDcv698fT+9AAHFngPsJTXdBAXduTHIOPkRlGJCYfzKvZAtAiZMCVmaTUh8A5+UvbAX9InKEYWzWIPzVAbQ3QdiXzMlYx19iMT/N/g4zmRQw+ZYsAqrD6UAkFF6JUbimxdr5S75xkraVK3Xexz78aAUi4nYTkzrxe4efSNsLFE8dxojAfwG5/q/H8dBbXUz9+gQb57efbnsP25Oadaq6NLY2dddc5ZnPp/lZnnXGtYwIMDNpZVNV936MBBgPkFvmRAFxVQd19ygA0eSzj1TQ+abgt1BLaC2gQWIhJRXZJkQ8cM1IBREqxENxJg70lYAMOdBoLIiMju9yGl9NOXA+QLzq9vPpw+AxNGZakDNZyJzFn/I8AEIG4WB1vJlEHkntJBpFvInU2jbcfH8u712QmSQcDNEq37jmL/UN0NoPegKjdqA0CRw390ADVAZ8xAQKEq0vb22HhzuDvhIBPAdgE17EBziTvVgFlOPUxkMfYvz3RY1b2ZHL4cW4Hdl/h00Fqr5JbsQ4YbTbBX8e2YfBCT1yFCejehpoMxgZhD701Gluf92+AhhntiMLzOTl7VCMxQzD2ZuQGV33Nj92+n5xbQUNEtzVQMTR+1rz3CQInsILNLJ6nse4ToOu3xnfWgCPPZNVyY8OuWgbg/FvNrWTgSwBaRYXWwrrLCOJOVqEqsEihFoTs7nE5RNB2A4OTzVdSNBGEvNK9xAILkRoXUT7+WnU7UBzJYET3OWlDUKEl3MuOYgUXXEgl4b4jahKeZUTmbrW4f3feCF39AJ8AhWvohT6+BCw2XZAjkaNwOd+EeAS145fRe3x2KkalD3SbXzQr0iC2lTuIdd+GP32yz9pCc2z5wQAD2yUNXgRa30O6dJBDswtWWoo+4nVDALpoaYtay9fEDAcXcBn7L40AFIHxMmfbDLvpzXHiTnRT6zhuoKJdsojRaLaSTpnzNu/OHyiuRTj5N4C7cN/RNbisUJVuqbhWReyWI4bvdS9V8X0vZ4PYDrozYPXBCJYAOxjWseGZxQPuQMXQR2WBZteNq14NE6tGUcgtYdVul9e8qzxb37QAVE2vKe6d3CAQO0h3NMgHUMLHnyVc2dV9xrbbdm+lMjCG/XBbH45GKdq/V2tt5/MGux5iuGO2ATGt7scqCc0DDMNgHsB9liefM4CIhVj+W8Vy5uPCWoOf1V2Zfdxf7z8EFQl2rmP1hpvDocWyzxNQh7kZiAYkaB+MwYw+BsNJW8T2N7dJ3IGMVr3FBUK16i76gDAxJC4UC6HbxeZLArLdQBdGerqr2U/zKn1ymCPBU3LpoxnGTno7Nf4cELiTHyxeFYXOxOs6R12vcQ97P28BcE+XbyCQPyVs9sRTLejxajBgF3Qbo7FmQpUHh2OcDt5pfcoMImIEAO5JbkSHJaf12F2+rkwWFMFkn3yaqLhe5uLmjHtmiCs696/CjJCS6wiAQ0+JvFY5dhkkqiIirkQC4EUii4zwvkQGriu6IpA7Dsj+/0Dxrak1u9YcrWYeHv7C+AbY/uZzR3fq3R9NADVgVeoNwIMBCOmaCo/GABotoQGYDxOgx9Gh24uZDlKI0IKm0yNXsNgYAFAsyIzHEhYAuPOiSsXIrpdsN7A1gJuGHzfQWQhkSjE1EJBcvo+zMZtQpefFMSC6LK2NhbcT5nBMRSRwVSl6LO0GulyVYCCw0ixn7MIhNwuxnjCLozZugUzrbwRCJohCQOliIZRKkVidf2YN4GOBsalgz4kHcDTA6qHltVMNR1W3gRmn9LiZx22zjqregl0atvX5kL+A5G4c7Yd5jTtpaQTzbFQZ3PiUF7DGBGxazRkRMY6qusoTQlUg0u4s7DyxE3E7/tIOSC/HCLkekth4H40R3bcBxCp3J3OYYJG1ES8qHAsef5OqPg0BEwdSA25Gt0HAuHv7wa0kp1oRA4ZOhH9vwfEChshg+BjliQU0+mkBwCTgz122Omgj8c0n6lHPumO0LWamNIpjcolaE10LnyZAHBVV+yP7/tvsDTSwiTrey8AV/GmAGPx3CAxRjBoQKPTeLbdWRXSbBKPhGhNQmkPaBxw/M1lmEtz7dy263GB1dGJJwrLPuts2AKB7m3GtFXCApdUTwSpI62YFuEqK3guFGAbYFfcFBVmdeSSVqhBaRTeGQzlDyAXSYw33+j1cOHaukEYmewrF4W1iI7EjGd8nXk+X0owjz7ICvFwCfS4zNIRcHvqInwLQPrVlm3lCKR++AqnzoUJB2g0U7ShWqNtqqfz0PG6gnDB5QGC7gaXqqCU7T/jTQ5bnHANhgcazRxq7eq/LswZNc0v0oGeSHreLTDpVKDQHFDHU3gkjkpHBpIgkldZ0JkWie2pEUBxHo0/N8RZwontb1iaCALJcPt1uQQOHwzLsdWt7zPEejxfAMSD9l6cofEdvfd3rmuafrE0ENAioutfWWXZadpffI3yCkge0ODlDadgt1f/X1tWuzXWkOAmqndn7v9aZid0HtD8E1f1mN5ln4sSv+6MOBUKAIM0DtNTRUj3d9Q8DiMOfBlD2X9lpgcxkWWCdfqbjpt0Arp30AyMiM0mAJw5CfFcwKOYvPqC6oyVU0WlgjKfMpNhQMLLomR80zAOc05YRmhDgCTQm4JH3MHXY0/bBiIzBQsAlgmapoJLuUUkx0J3LO4++yuC05GCAaWKcJSsEp6LFjlE8mTt5Aee828cv+gc4NZO1B9+Gc9QdOzb/HbO7MqL6JgHEhwf44tSQGVO7hgXX6MEQi4R2k36avR6A3B6m6BmU2YlUZ0Mbcxp5AnS/fzHSrJTt3NTddzT+yLx/0tgLj8bm5wlipH+mSXyWPAEtqp8aYzTscC/ZgBduwicM7FiXS2A6uy1zijCBBU08oQavW8Rwagp18+NxcDcNGDD3ubTUF4Pi7PDSNh+gMR9mnoc/8M1VvzzAshbnqHuiiQ16JMvU/byf+pRMsQbgT3fHgzODbDaS6mXxJwtwP4BX20wPSoC5pZzrATLXAKQWZk6vmSe9KymjkSm5aMqZL0aUz2lkJODFl7EQCNKU6u9KzwVxc/6DkCeodTfwPGVPPMJVUzYlGQ4BvWmcZRRM+EyWF7qJ9E/sRng6cboCTCHNMP94CXjXyzpx3Ji8L7Ps+ARlK2Iq8kY8LJYfX7DJ3iQIA+Pnu8KJ1rFcmRYKr1i8p0+zflYDnfBNkjgWkEnCQkpePC6YptGXAfg5Ywfc9nyuOQ7sQLTbhy1mPCe7dOZAwV0jAqnh7pJIhse9LLgEB30kkYoINjMnh/TYqexQPNNgHkD3MvkbB2CEOnJ+WCJIy54SXpTqcWfN0NrEw3aPQJVLnoTp14LQoJ6WSJW2xnnLVDalq4EzH2bhzwfDj1PgloPnAt1s/vr/BfXR5M3jw92ebgnrC67czggTQfU8vezwYEaOJc3zb8kQxo2lXVUPAmoRTrocSuUerhkji6+kZmnSyWuMKzeoj0ppIM5pE0H2JoZuJA6IEjJsAME4eYINMRBgSmIkLNvjlheEqGKmUAwPDj6+D5LQVXmndSgyyfTE6NJiX5fCbGfHDs4TGTgZ9/K65l3CRK0eDPxbDwiVCKYtr7fSQoDeljYO/7sljJaJm3SnpY8BbGD+Px5gigqYPiVjIZ//ydVsI7BVFXvxqojWTsFt5jgG8FEIISmxc/JH6wELHwPQNYCQNwtdcfgZ8MAdOApMy2WQUOSx3FKcTES4OGMJB//RAyLkSxpkMM4rAwExdhqJkWy+TozbWESWECOPMavAOuwKRTAjIniC6m7r1ElEbyF4K07b/bUJrvFoMP8xQRCONdMw4b4gp/NuxuQtsTjafLzicg7j9bHZMyQPhsCCPhxvMWY3c5O4tPW6gq3q3funE9deCE5Zzl5x0/RPCNAagMS7nys4hzoZ1jI20rcBLPUEErELgj7Jp3/T4GqiNB2QAl5lAwbTY0QzWjtdEHYYE7siLLk2TAKAVkQimCesitAKUAMpbGIDsI43uqQsLX0ibACwjaO1M55LT3GZEU4bFDxxum9+ezviGgA0zdP7aH8Gw++/+I9/G4/jSLUFZYJ33PF6/gv9uM0K1wAWT0wU4ewO1gdfzgNXKFabdF7DYLU5bAwQzTvl1KK6qh5h2uPwmUieyaBwH/nuT1jmcj6BcIdTMLIYmZGMRJxDRkDdXdXs2b2iwsqmBnlHOjcbmrIatwI/cY9k7OrXPZzJMJcUIiMiA8V0tgdw5F3xPAggrDHiT4uI7XJDcNZ7bRBYV3B5gMF9E/JcOwRwuzV4YdJGAFJNhLG/FxxNaB2Hfz0Al7LUZTzGAPRPAyCA6AGBm+pF5qBAl937WyRqDECQFB0thQX01V4PdyuPJiN/jAJroGP3mM1Sl7wW4Pziw5csO2MtJ8v99RVpWCC/Z01Dxg9yXG6M+2KeGZ2RwEER9yaDjB6UQNCEDgTM1BJndTA/ThqfN8aeKtYR/ry984rSvIZnghfK/Z8Lf73Hx0F8MMf8QpjA4Xo7F5/D0X0esMZhCXFHg+YPAQDjXDd87fS6JKO8FalaA/CV/gIBcO9IU1XPU02rD48mdX9CAEOREeVpaU6zzvS0Xv/1MQqM/5nqbQQwU1VcqRsAbZ2lGcmO88opXvjRxxMZFM8rn3ZYIgOlDHTzvAw5iqTYLFUpMyPOyQBbERg9kNVp2oekC9nDHZaY/NxNksQyBrPefaJ3kLO+B5gWMAYBxXB186Dx/cS/Qg4XPmEJO20HAdy7vE7Qt25SmNWL54DAttO56+PFyRJtAVJX17NpIHm/tC/1R34+qBn4XUBEDDCZ45oINGlMs9iaNxkD0JCz4xNC21zpnrkbJvltrfOqdwjZIDAy2YyOCX8REUkxX2f6qUWGSpnoivMKBvAkQUQx1a2TEZknA9FWE+rvS3jx1udrfQMCcpQGcFtsNrrq66fCpFJQKz2gkC3npwF8MgF+1Wbn7bVXcuxRmx7MDfYvP7+1/M/8lnAAXrGe+9fErhmh/jaARaCuZHAMYNvvR8hf5XPqVntm3Jnw7OHgHpSxn6/k52uDIY1Itn9+rGOhzLg3DV/ib9fRpAJXpdaBglXwxezaY/g6Hum29szArWyGfj140kXTKzN9AoHt7LxuW1gUC4zWgj7XhcviYWPbfRHJzDIBSANpHSPM5s3xTy7iNHCeRhAdHf3RvNBggwkfGqlamkIyD8CJGA4UfZ6de7/5Igfsj2bqteF9k3U7/p+S8Go0r1Cyl8UAAqGt+GwnmMxzzonaoOqXmq/2MfduZ1oWl/9sigTWZczfa8tD6V08MTO/BGsGvZ5gYRjM2Nxj0o7l8QDM3of5d92rw/U+t/6uVrCA2emN4Z7MFXOsogdUSEF3nJS/qAeE4IJR0PsC3K7ib5uxJasgyCmYA7RmHaRZVlK3veyGALMy81E5BYb9a34OQyOc/1hkaJ6z1W3od+jqNeTtFBmzJbYapWHW1s1Lxeoq4DEPUETBXBgTETyvVxZpquMaADbf9Fcse4BzMhjRgLfNfXGC2NKmCb7waAHjnETPkgFOf0ATUsUMU0271drAvcfajVe+d6GGSohZEwY8BZKot51nKz2pspDFNR7euuwC/TXVaUnhJxm/tT93FWxT8TrDb6w5n5cC1F6YgdleXBug/l8DiB0MUqtVWCrYc519fltnY/LoyqsEN9mcvZeup1smWPOajgnd3YHq6io8/fzDAChVM1ppUTenYqawmkJyErmwyfdjQtKsjuWkGd5AQu4J2gzd4JAzWzJYcb6BXbyvdrhw0CW6TAV56yBhjsfy6Ko35UXEyn40TWSUl2aGKS1JLMyaCidHEzr2nHpu0WbBoH+g91mUzVPuS/EWj9GAjC0KYdMiyjYNcGYDASXQ3tO0oF/wprabCbcBLDS+r12QW6jQ3ee37loFBqPjQ2NgSpHj++dF8UlyvqqLpjhbXaWnnwd4bzHIu767h5tbd7ai4SrRaxFXByXYltvm62SA9JrPiP66E/feOKd0HN0bpykaQP087YZzoiBBjxgoY9aSZ0osN/B+nufR+03Nutnut5jXw7UU8gZEEx2EiVj3gu3/cbkyTCLlnGv8XQFqqKqp6e9c7NOj8Up3Ck9HmNFDapwayHrGA2i9lmkh4R9ZAEy+NL9CADb5nPtx/vNlAPfZXKAVm/zYIGYIbKkftbw8Q8/zdGBiBi7O+iJYMOzKzWpWNFySC3lO9bFoa6u7n9xbja7qT3+2X5kKICPjn+2SNoaGr+IT5aMXtz9S0scDwIO2WP/mtnu/+5rW3AwtjTYIen+JbePSsjF7zncgB43UByfwqhZ9O/1F9Mb4k9L10FREv506K6HW85Rw2QddLcjrAb74W7TQXx4AwPl7Oq1i21j2+c+iWn4SD83k/frWGQ0rSM/7LUJeGzcVvcnTHZ1uKsH1IJ9UxmcXETSfZj+0G5MG3kJU+fvpG3SEibShr9rSpK7cNS0qLEhs0pTEM3PJ8EBvNYPoAp96nmkga1BKRD/KEa397rFZQLxNFbwPe+zupqt77I31TrWDzQsV9898HKvfb43HtLmlWfwfJwTQBMtT3Z+D3RAwZtQAFNt98JUXzquD5zEemEf/LUrNEbT7NmV8nN3tJN3wwhGM2GzzQ3V9vtpF89+MyriEWNnpkXAkIsNFMQMcflnuPIM9SjRpfYKe8XFIDRXefxhoMaowKqUIqRFoPJqeNxXi/bueqni/MRKiGV3KI4qilt7daPj1uKZLBNu3p+ZMFLmzeroSTZr647S74vbJtMvB23W9WGIb3Ka2cQ2gNxzf38fXc/q2qu0O0/2BNWQCFPr88XDoxwNsnuq/tOEIX07txzP1v6k7roZ8WxEc61H3bQGYZIdBpzPkmQ6ODEZCIO/AxsnInpLh0rzznecu9twy93tXix1031cL/eD9B6EW47x4qqAHyoKL+U9bGhhoxfPfeqrj/Vsn6bpSF/K8nPsky/mjm9m9WnkDvh9ZbyMRd6lst1PkVlJuvIpAgdVNN8aPLIQTlhnYgjg+aq+TB0hbED8GIJvYpDsbp74D4WWIbub3uYLjAX5P4hAZJJlLObu5bv0b13rWF/zD0rSbKLur3/U8UDHd8NSIhht5wjpLMZ2TgrlQRboK4e6WWAPIzMiyY3H54A6yLhUqLMBxcVWKYDf9gfoP/vxRdinivPL0IzxQHkNyPc3Hj7cRz3/r3Z3v/+q4peuwC+flNIKZ7uFnPUSVxS/Lf7TpKaNNSxEoKoDW81RThaIiJCmIAurdk9+swro9wJi27gRhy3SOy/ctgFWFSU8nVxtODLhHAQcikw3rKP751ADAWUB7NwfWA4DMuXpLP9sNcGz9Z7hqddco19bT73o/UvGMEjw5BpCMM8tCtvHILz2UX0zbH2fJVmR6PkJ+6I/6eVrOKY3+Oz5sKNRdimAX1d3F/tPv3x16FPF65696oALiRWfJ7+LLvrCVz3/qKZ33f/sVJJgnqnhKVGj2rDeD7z82AFCqUBQsFRyIvnQJygK1XX+e9rJR0QbQaPH53cFgi9FMU3XdvChyJWgHak3hxx1k9gARStojaS/qkjf3ivcw0ltwWUd6ka7O35PEezPdjvIx45zW3Xe05MsAv711LUmFrvef30Woq36/n/fzfloVqfJMRTyIbvAA+X6ejFAoxr7Q0Ag7jQvjrsPlRNNu583v7ne1oGom3YqwlmjP1tXhjfUuG7/rz+9iP53nOfnUAxQRZSXh/tMsgmJ3Z/27nubr9797CkQvdsV5OhUSM9V5GPH370A9PAj0k2AhGoEOr7VyfooHJwD18/td0rNKOVI+bPH9d6d3krJysLUlYmDHFtyr23cAcOo+3ngV8SsJbLP0NYBnnzXoIj00BmDepD+uAi0bQLUYJ4mbmzHydaqF/shGQairTr9/vlGhrvffv59Aq+vP+3m/n7e64qg6ojviIVVERz71VBUgKoZhm7lJI4SbQl5+ra0hUUU96nrsaciYLzeU0KQZY9jmsqO76/1Evzu6O/E84LN9guh+lwfe2aXz/K6nk+/fTyUh5Duq4yUms6VzulOMeP9JPo9VAAsZpVeb4SGgHfN5/Mi66nm6HwVEem4kW3j/rRPJRrC7N4VbDzAGYIv5rCWe22d58Mg+88QGttmIrh4VKdUwBPbFU8oykpxY6hBQYjxJTqgBGPl+VbUebg1EEM704w6ssAEkLCaywqXQj3s5SX/fJMOoabLr2/46aYAAkezLgw7QQRVV4+C0TgjfsY0X34y7muCwfSnuI2FTu/lZmjbPyWo/IEkc1TPFiv6p1ewwqd19+znm/b/B4A2w1zNpUZjWRfjZwduqzLZ3rAHsGjdwUZ2AFZpGd0PUzN7NiS17UKU1AEDleqqbN0FSsTBuELtVwroZPfvC1gCqJcCbIzd7f0JXj0KSetZkdHWPBiVDqZaCceS4536pAPNkziQpFrX8QJT7BJ0Gm6XpRlXrqehHKsu2zeqXXkZs/9BPtMOxis/Lbo5tUv6rrnoNNSIyA1w6GZMD3ST4G0v51fcnp0vlZ9vd/hjXtsx3Ryhny4FzHxtsbBXcPWN02uj8i3euQ9rjcyHzRgAsx78GsGWpawCrCn+zhfNGq7oZlZMduskuT7WEmnKiL+oJ7f63e9wuDPckppmiRz8qzgr1RjG6wHidzNk+2oMBlqBQh7RSgyoK0WIw3q33u/pd1G/10yCloqYPO7yTClSwqrqHNVuO4j74vUC9PT2fEu3Ndej2w1lZpr3l8/0s4KCyB7hNkb0T7g7eooRAswmoNjX20pB7aPB7QZGuEgJUe7ux/YTduiAyrFGL+3nXqRg6bDOFg0RdCLAgUFZR5634QsswCOctdHcjomwA05ad3shVK9AtCKiYbXe4J4MPQwUSOPAUvZ44Cv/JSFIF5Dknp4n0c/XtqFf4xk7uoW0jgu/W+8/Tf4r9t/Q0ItrH2wkgWCyIDbGq58msZxiAMDGs1z8YNlmH5B7tUJ4jNbq575r6tNMw+JRYhRwhTLVKzSnKNoJoCqN4YJ2dzea1KSs2u3bX6+QOhtz+7JsGzlwiCOpW/nmJEEiljyeTeqQKjQHae0nsAS7J6wTaIem0YcZ1mevi6DpTu+o6BmAoSl1z+/Lf7m32yF0ElMPrUpjlXgyX9yMZs8YDhCjngG7G9qJwC1UvsuO36713QJiS9E93OwdrtzmBj9f8J7hOZCDXPXtQUAryGoAVkG/tYrA276e6v+HxcG5YudMDPo8Q2Ap7AHRmksTBobdax+gK08HSlroGcLscOXy2ZOkeF1bGt327hcUhwydrLthG3O1kG4PUcbTjJ4zM0Rl/1j2siSy+Vp/+I+x0X7bFsBtARlBPeuSAUIrsIuLX62Vxcf+9sQ25NSJ4ukp7a+8D3/zafUICg9JC1q/HH/KMj6I9n3debi3gOXiJjQDinBPRpKjcDCvjryrFi1Uvwirj1XF+/SJf7anuPAy+XKk4zNOdSSFD7AgxPTfmDqdMcyBsKWUlUrHzXyHwgAkbgNx6ABuA6IIGR6WoItTDFo7U6+M1kzyHcpe2w/YPqufHr3+YyBLWPrBzsMUAfp31tIZg83GOIfW9b9z35NL2Mw5Dem41M1sQYirSBcavk9YD2yA12Igc4bYBOFOrGI3W+S9EeILDbSjLA+jbLYwBAJIlWyAJ/YA4h0fHlNzrZLA7PD5k+456qsRXql8A2gbAfP1FpsfcPcP4EsmIw8zoTAI5dxwJ+7QIBDKB6FY0dBST63b+KzwxnXKz4pQEvjHAh/lRVak1JWcPWpw/lnT8dSioHuL7QDlXZcMf979smvQDdOv8NdnluqxNyiLzvHLasfZ83bu25S+qr3eMkOf4bzSNYPS2W2066CDSABhLaE/XSVFK8/qcTpSm+wxBC7dICFdFxwACw7Dtuig3WKFolsINnewD4hykqtVQvDJjBhDTvk+I51dX80T3y5nYGMCLTBcZEV8GkIyjjsPQ8fUF0+GVDBbSPXuioFCiIyR2nGgEDtK62fExgAlX7WEniGKHNU9x99XDNS3Q445S3jzHXvpCDGAGezysOUax2GH++TGAmCg3yCki85yTPxKpDOLzEt58ZqngoWE2HcZFOz39DeZuonsxWntDha87CSBSs5N5uv9J0+WwcgN6QoxkwuJjbZvEGTcaQA5XLK8XwTkKb5BHvCKm4oS0WTbIXxWNg6PjYwhWuxiUMzweh2RmAGRisjjIxHLYALiSYxFAtHM8q9SFgA7zd3esf7L8bSsTLpO/d3W0l4VNTFx0YiblW/BlADf1GeTkxpsYo/hWCIExwP/4SQ05f582GRmZmcsyfXmALwgg8SQhpi5vpep6iFJY+8NxccIa3Ldh1+FvOQVvdfZMdtsAjAKs/xSwEsK85m4BpqnLhWNeqRrAdldzNpEUofNqKuVsJAiqGeTLBtTiU1WlROlFwxpWIc/xdiOhFUHyGIoHPkAQE+hyagE2IAKIDgEohItB6PACXJMZGNoOFGCVlTncLSzukzM1MfbhouFJCJ35RTxM+9nHAAacf1DAJXUm/T7/0lxB14EmPl9OZEMAAdmor/EMl+AnGtmzJKdk8conpr16EuA24ZF5KadZFeaiU3f2toRBj39gQxZpZQEoqqWOwX+uYSJHZQj2nVREC+2KHaf9/LzKyBY4BwAlVvCX69KWA3+qA4fH7WmHXco8cKuDyjnBcXyK67i0KcaXprtmt8bwPhgJLSkQWmZij3C/6ZA0IlaPTdt2uSna4GZ7gIAUyk8mNC1W+6w1wRKXYN/buwcrnQ2706+/1cBboLvpEzDSVev/zX8jwy/gH5mewAIe79JcpVCJ0UBUnY+53s8tiNMSOdZ5444zA2GUBI17negR4dFvrAEIu4UUUyPrKc7rHM8o0ch3XownItyVqVM7Y5TByDjRDyKPGkAX0DHDobexfA/a0WqnfA1T9rQJeD+kPQAuibzbIxa4deMzLaex/l5qc15Q9mtL7c8J3bfS95DHV/sHbpaw1jL890kH4Zi54OTNdkfr5mI34L7lZTScuqqdXvWIwlYXUA723SwHB+YUwCPdJR/3EG/JcY/vi5iVrNFoiFEcFjskafSedes/m/aGgxGJbpvPkDlYctSViF4ORm6nrGI122tAya3NYIbeXZTRtkORmxUFLA1275c0swSrvbDIeXN1yw3LM1Ie597hUHzx0EMgXVh/0TxHdLgvjeTGy77PCDMcO85yo8snXAhSn7+cYpmHodVtxgN8xnj4MYDP9xt6Kyj1I/Z05JEbYQycoJm6mZrKxJcbbeRwqebMk8GyBwNiPh0JBDyUrHU+6Hm2d9p4b+U6ewzLYxWOQbARsxrEcD7SKGnQYE/8y8xsKDJbwmhE7SFw+RSo4ysX3ccwHmYZd8cI9zZg52vVtEaQXYobExcRK9ovswTIjWXgNA4xPEKX9931+ceatefOLiWkax/GncL5y88krN0RSXJXok71Lj4G4I/E7+cPF+1Tywl8xxofkpaKus//491xrwU82kUwBg7d83MWAHD4lvFFGy9MsfdOZN4Iq/keZpw+Ih7MuPyGPZ2B/Fc+EZGZxyIyyWms9hdEpkRXFkMT4+Pelw8v0UtnkQSSV+3a3ppbBIGHUNatYmeqrnvUJjnXk/gFwv0F7W6pix+ojylovrcmPdDMtY9DJfW/j4eItAjZlGAAAAAASUVORK5CYII=\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 43 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 273 + }, + "id": "HyjrWdv_1dse", + "outputId": "c67a3738-2195-4264-9bed-ed364055f047" + }, + "source": [ + "mugs_approx = Image.fromarray(img_approx[1].astype('uint8'))\n", + "mugs_approx" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 273 + }, + "id": "4O3kBCij1fbQ", + "outputId": "a8f55cbe-48e9-4a89-c3ae-3220539d9747" + }, + "source": [ + "flower_approx = Image.fromarray(img_approx[2].astype('uint8'))\n", + "flower_approx" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NBaVEtJI6kZe", + "outputId": "4ac5ccd3-760a-4086-aba8-d4bfdf735b6a" + }, + "source": [ + "def frobenius(x, x_approx):\n", + " return np.sqrt(np.absolute(np.linalg.norm(x - x_approx)))\n", + "\n", + "def mse(x, x_approx):\n", + " return ((x - x_approx)**2).mean()\n", + "\n", + "print(mse(cat, cat_approx))\n", + "print(mse(mugs, mugs_approx))\n", + "print(mse(flower, flower_approx))\n", + "\n", + "# 402.30466\n", + "# 335.3521\n", + "# 365.27515" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "1273.5289\n", + "1101.8867\n", + "995.1124\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BdNuTMPCBezl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "afae78ce-4c5a-47c7-dbe8-0bf813904f49" + }, + "source": [ + "from tensorflow.python.training import gradient_descent\n", + "\n", + "x = tf.Variable(10.0, trainable=True)\n", + "\n", + "@tf.function\n", + "def f_x():\n", + " return 2 * x * x - 5 * x + 4\n", + "\n", + "for _ in range(20):\n", + " print([x.numpy(), f_x().numpy()])\n", + " opt = gradient_descent.GradientDescentOptimizer(0.1).minimize(f_x)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[10.0, 154.0]\n", + "[6.5, 56.0]\n", + "[4.4, 20.720001]\n", + "[3.14, 8.019201]\n", + "[2.384, 3.4469128]\n", + "[1.9304, 1.8008881]\n", + "[1.65824, 1.2083197]\n", + "[1.494944, 0.9949951]\n", + "[1.3969663, 0.9181981]\n", + "[1.3381798, 0.89055157]\n", + "[1.302908, 0.88059855]\n", + "[1.2817447, 0.8770151]\n", + "[1.2690468, 0.8757255]\n", + "[1.2614281, 0.87526155]\n", + "[1.2568569, 0.87509394]\n", + "[1.2541142, 0.87503386]\n", + "[1.2524685, 0.87501216]\n", + "[1.251481, 0.8750043]\n", + "[1.2508886, 0.87500143]\n", + "[1.2505331, 0.8750005]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 302 + }, + "id": "2Dd9tNm2oliL", + "outputId": "b0507c7d-a943-413d-ab20-2bab014b8cf6" + }, + "source": [ + "m, n = w1.shape\n", + "p = 2\n", + "u = tf.Variable([[np.random.rand()] * n], trainable=True)\n", + "v = tf.Variable([[np.random.rand()] * m], trainable=True)\n", + "s = tf.Variable([np.random.rand()] * p, trainable=True)\n", + "w = np.array([w1, w2])\n", + "\n", + "@tf.function\n", + "def f_x():\n", + " # # A = tf.Variable([[tf.constant_initializer(0)] * m] * n, trainable=False)\n", + " # for i in range(p):\n", + " # v_tmp = tf.transpose(v) * w[i].T\n", + " # # print('v_tmp.shape:', v_tmp.shape)\n", + " # A_tmp = v_tmp * u\n", + " # # print('A_tmp.shape:', A_tmp.shape)\n", + " # if i == 0:\n", + " # A = A_tmp * A_tmp\n", + " # else:\n", + " # A = A + (A_tmp * A_tmp)\n", + " # for i in range(p):\n", + " # s = tf.transpose(v) * w[i].T * u\n", + " # w_approx = s * u * tf.transpose(v)\n", + " # print('s.shape:', s.shape)\n", + " # print('w_approx.shape:', w_approx.shape)\n", + " # if i == 0:\n", + " # mse = (w[i].T - w_approx) * (w[i].T - w_approx)\n", + " # else:\n", + " # mse = mse + (w[i].T - w_approx) * (w[i].T - w_approx)\n", + "\n", + " uv = u * tf.transpose(v)\n", + " s_tmp = tf.expand_dims(tf.expand_dims(s, axis=1), axis=2)\n", + " approx = tf.reduce_sum(w - s_tmp * tf.stack([uv] * p), axis=0)\n", + " return -tf.norm(approx, ord='fro', axis=[-2,-1])\n", + "\n", + "norms = []\n", + "for _ in range(100):\n", + " # print([x.numpy(), f_x().numpy()])\n", + " # print(f_x().numpy())\n", + " norms.append(f_x().numpy())\n", + " opt = gradient_descent.GradientDescentOptimizer(0.0001).minimize(f_x)\n", + "\n", + "plt.plot(norms)\n", + "print(u.numpy().mean())\n", + "print(v.numpy().mean())" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "0.27518278\n", + "0.6178978\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "weED2euaqB1Z", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 483 + }, + "outputId": "0ebfcde9-61c7-43b7-bed3-5d5438cfedeb" + }, + "source": [ + "models['mnist'].get_layer('dense_1').set_weights([w1, b1])\n", + "models['fashion'].get_layer('dense_1').set_weights([w2, b2])\n", + "_, mnist_acc = models['mnist'].evaluate(x_test, y_test, verbose=0)\n", + "_, fashion_mnist_acc = models['fashion'].evaluate(test_images, test_labels, verbose=2)\n", + "print('MNIST - Test accuracy:', mnist_acc)\n", + "print('Fashion MNIST - Test accuracy:', fashion_mnist_acc)\n", + "\n", + "A = (u * tf.transpose(v)).numpy()\n", + "w1_approx = s[0].numpy() * A\n", + "w2_approx = s[1].numpy() * A\n", + "\n", + "models['mnist'].get_layer('dense_1').set_weights([w1_approx, b1])\n", + "models['fashion'].get_layer('dense_1').set_weights([w2_approx, b2])\n", + "print('')\n", + "\n", + "_, mnist_acc_approx = models['mnist'].evaluate(x_test, y_test, verbose=0)\n", + "_, fashion_mnist_acc_approx = models['fashion'].evaluate(test_images, test_labels, verbose=2)\n", + "print('MNIST - Test accuracy:', mnist_acc_approx)\n", + "print('Fashion MNIST - Test accuracy:', fashion_mnist_acc_approx)\n", + "\n", + "print('\\nMNIST - Accuracy drop:', mnist_acc - mnist_acc_approx)\n", + "print('Fashion MNIST - Accuracy drop:', fashion_mnist_acc - fashion_mnist_acc_approx)\n", + "\n", + "plot_accuracies(list(models.keys()), [mnist_acc, fashion_mnist_acc],\n", + " [mnist_acc_approx, fashion_mnist_acc_approx])" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 - 0s - loss: 0.3365 - accuracy: 0.8820\n", + "MNIST - Test accuracy: 0.9768999814987183\n", + "Fashion MNIST - Test accuracy: 0.8820000290870667\n", + "\n", + "313/313 - 0s - loss: 181.9923 - accuracy: 0.1000\n", + "MNIST - Test accuracy: 0.10279999673366547\n", + "Fashion MNIST - Test accuracy: 0.10000000149011612\n", + "\n", + "MNIST - Accuracy drop: 0.8740999847650528\n", + "Fashion MNIST - Accuracy drop: 0.7820000275969505\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3deZgU5bn+8e/N5oAgJEoSBJVxAQQRDCMuccGg5+AGLjEBg4IxoqIm5pjtRI9BMYvRE2NcYuCYoAYRJYr8lMSToxKjaFgUZRMFRcElQVxgJMgAz++PqiHN0DP0kGmmZub+XBcXXWs/VdPdd79vVVcpIjAzM8uaZvVdgJmZWT4OKDMzyyQHlJmZZZIDyszMMskBZWZmmeSAMjOzTHJAWY0k/UDS/9T1vAWsKyTtXxfrsoZJ0h8kjajvOvKRNEPS1+u7jsbOAdWESBopab6kdZLelfQrSR1qWiYifhwRBb0RazNvXZA0QdJGSZ121nM2Fkq8JmlRfddSnYg4MSLuquv1SuqafgFqUdfrtrrlgGoiJF0BXA98B2gPHA7sA/xJUqtqlsnsG1jSrsCZwEfA8J383JndL7VwDPAZYF9JhxbrSRrJvrJ64oBqAiTtBlwDXBYRf4yIiohYDnwZ6Er6AS9pjKQpkn4naQ0wMh33u5x1nSvpDUmrJf2XpOWSjs9Z/nfp48pvqSMkvSnpPUlX5qynv6RnJX0o6R1Jt1YXlNU4E/gQuBbYqhtI0qcl/VbS25I+kDQ1Z9oQSfMkrZG0TNKgdPyW7ahhW86X9CbwRDr+gbQl+pGkpyT1ylm+taT/TvfVR5KeTsc9KumyKvW+JOn0qhuYdnFdWmXci5LOSFtAN0n6e7ot8yUdVIv9NwJ4GJieZ//NkPQTSbPSdT8s6dNV9sWodP++I+nbVfZb1dfQnpKmSXpf0lJJF6Tz7peO+3w6vKekVZIG5NTx9fTxSEnPpNv8Ydr6OzIdvyLdDyNy6jhZ0gtp/SskjcnZxKfS/z+UVC7piHSZr0lanL5mHpO0T876TpD0cvq3vBVQLfa17aiI8L9G/g8YBGwEWuSZdhcwKX08BqgATiP58tI6Hfe7dHpPoBw4CmgF3JjOf3zO8pXzdgUCGJ+upw/wCXBgOr0fSSuuRTrvYuDynLoC2L+GbXoc+Bnw2XTb+uVMexSYDHwKaAkcm47vT9LiOiHdvs5Aj3Ta8srtqGFb7gZ2BVqn478GtAN2AX4BzMtZ/jZgRvoczYEj0/m+DPw1Z74+wGqgVZ5tPBd4Jme4J0ko7wL8OzAX6EDyYXkg0KnA10MbYA1wEknQv5f7/GndbwEHpdv7+zz7YlI6rTewqsproOpr6CngdqAE6JvO/8V0/guARWlNjwE3Vqnj6+njkenf+bx0f14HvJnu512AfwPWAm3T+QektTUDDgb+BpxWZRta5DzXEGBpuh9bAFcBM9Npe6Tr/hLJ6+lbaS1fr+/3dmP/V+8F+N9O+CMnLaR3q5n2U+BP6eMxwFNVpo/J+XC6mjTM0uE2wAZqDqguOfPPAoZWU8flwEM5w9UGFLA3sBnomw4/BtycPu6UTvtUnuV+DdxUzTqXs/2A2reGfdwhnad9+qH4D6BPnvlKgA+AA9LhG4Hbq1lnO+BjYJ90+EfAb9LHXwReIQn5ZjvweliVfhCXkIT26TnTZwA/zRnumf6dm+fsix45038G3JnvNQTsBWwC2uWM+wkwIWd4GjAfeAnYpUoduQH1as603mkdn80Zt7ryNZFnm39R+bcnf0D9ATg/Z7gZsI6kG/xc4LmcaQJW4oAq+j938TUN7wF7KP/xgE7p9EoraljPnrnTI2IdyYdCTd7NebwOaAsgqZukR9IusjXAj0m+qRbiHGBxRMxLhycCZ0tqSfKB+H5EfJBnub2AZQU+Rz5btl1Sc0k/TbsJ15AEHCTbsAfJB/82zxUR60lad8MlNQOGAffke7KIWEvSGhyajhpGsq1ExBPArSQtiL9LGqekK7cQI4D7I2JjWs/vqdLNx9avgzdIWg571DB9z2qm7Uny91hbZf7OOcPjSVprt0TEJzXU/becx/8AiIiq4ypfX4dJejLtMvwIuIiaX1/7ADen3YcfAu+TBFFntn3dBzW/T6yOOKCahmdJutfOyB0pqS1wIkl3WaWaLm//DtAlZ/nWwO47WNOvgJdJWhK7AT+g8H79c0kO7r8r6V3g5yQfPieRfHB8WvnPTlwB7FfNOj8maRFW+lyeeXL3zdkk3ULHk7SauqbjRRL462t4rruArwIDgXUR8Ww180HSlTYsPU5SAjy5pZiIX0ZEP5IWTjeSE2BqJKkLSetreM7++xJwkqTcD/C9ch7vTdJt914N09/OGc7dT2+T/D3aVZn/rbSetiStmzuBMZXHuurAvSQts70ioj1wB/98feV7ja8ALoyIDjn/WkfETJLX/ZbtlSS23n4rEgdUExARH5GcJHGLpEGSWkrqCtxP0lWR9xt8HlOAU9OD061IunN29GBxO5LjIOWSegAXF7JQ+kG9H8nxpL7pv4NIPpDOjYh3SLprbpf0qXRbj0kXvxM4T9JASc0kdU6fG2AeMDSdv4zkQ3t79X9C0oJsQ9ICBCAiNgO/AX6eHvhvLukISbuk058l6Yb8b7a/76eTfLu/FpicrhtJh6athJYk4bo+Xef2nEPSNdidf+6/biSvg2E58w2X1FNSm/S5p0TEppzp/yWpjZITQ84jaRVuIyJWADOBn0gqkXQwcD5QeeLNzcCcSH6e8ChJkNSFdiQtt/WS+pN8oai0imRf7Zsz7g7gP9PtQVJ7SWel0x4Feik5OaUF8A3yf4GxOuaAaiIi4mckrZQbSYLhryTfGgdup1sldx0LgcuA+0i+VZYDfyf5oK6tb5N8aKwl6eLJ+wGXxwjg4YiYHxHvVv4j+aA7Jf0Gfg7JN/6X0/ouT+ufRfJhehPJcZc/k3z4A/wXSfB9QBLm926njrtJuqreIjnI/1ye7ZsPzCbpLrqerd9vd5McR/kdNUj/Ng+StNRya9qNZL99kNaxGrgBtvxg+g/VrHIEyTGvd6vsvzvYupvvHmACSRdtCcmHcq4/k5xU8DjJiQ3/W8NmDCNpYb4NPAT8MCL+T9IQkhN4Kr+c/AfweUlfrWFdhRoNXCtpLcmx0/srJ6Rd0z8Cnkm79A6PiIdI/kb3pV22C0h6F4iI94CzSI7XrgYOAJ6pXJ+koyWV10HNVoWS7lSz2ku7Zz4k6aZ7vb7raUgknQuMioij6ruWqiTNIDlBZJurgqQt79eBlhGxcedWZk2NW1BWK5JOTbt2diVpjc3nnycIWAHSbrPRwLj6rsUsyxxQVltDSLpq3ibp6hgaboYXTNK/kxwD+Rvb70Y0a9LcxWdmZpnkFpSZmWVSg7uQ4x577BFdu3at7zLMzKyOzJ07972I6Fh1fIMLqK5duzJnzpz6LsPMzOqIpDfyjXcXn5mZZVLRAkrSb9JL4C+oZrok/VLJ5fdfUnrJfTMzMyhuC2oCya/Eq3MiyWnKBwCjSK7NZmZmBhTxGFREPJX+6rw6Q4C709/QPCepg6RO6bXUzMxqpaKigpUrV7J+/fr6LsWqUVJSQpcuXWjZsmVB89fnSRKd2fqS9SvTcdsElKRRJK0s9t57751SnJk1LCtXrqRdu3Z07dqV5ILjliURwerVq1m5ciWlpaUFLdMgTpKIiHERURYRZR07bnMmopkZ69evZ/fdd3c4ZZQkdt9991q1cOszoN5i63uqdEnHmZntEIdTttX271OfATUNODc9m+9w4CMffzIzs0pFOwYlaRIwgORW4yuBH5LcNpqIuIPkRmwnkdxTZh3JfXrMzOpE1+8/WqfrW/7Tk7c7z8qVK7nkkktYtGgRmzdv5pRTTuGGG26gVatWW8339ttv841vfIMpU6bUuL6TTjqJe++9lw4d8t0gumZjxoyhbdu2fPvb3671sllRtBZURAyLiE4R0TIiukTEnRFxRxpOROKSiNgvInpHhC8PYWYNVkRwxhlncNppp/Hqq6/yyiuvUF5ezpVXXrnVfBs3bmTPPffcbjgBTJ8+fYfCqbFocJc6qgt1/c2qsSjkG6KZ5ffEE09QUlLCeeclnUHNmzfnpptuorS0lNLSUv74xz9SXl7Opk2buOuuuzjllFNYsGAB69atY+TIkSxYsIDu3bvz9ttvc9ttt1FWVrbl0m7l5eWceOKJHHXUUcycOZPOnTvz8MMP07p1a8aPH8+4cePYsGED+++/P/fccw9t2rSp571RNxrEWXxmZlm3cOFC+vXrt9W43Xbbjb333puNGzfy/PPPM2XKFP785z9vNc/tt9/Opz71KRYtWsTYsWOZO3du3vW/+uqrXHLJJSxcuJAOHTrw+9//HoAzzjiD2bNn8+KLL3LggQdy5513FmcD64EDysxsJzjhhBP49Kc/vc34p59+mqFDhwJw0EEHcfDBB+ddvrS0lL59+wLQr18/li9fDsCCBQs4+uij6d27NxMnTmThwoXF2YB64IAyM6sDPXv23Kb1s2bNGt58801atGjBrrvu+i+tf5dddtnyuHnz5mzcuBGAkSNHcuuttzJ//nx++MMfNqoraTigzMzqwMCBA1m3bh133303AJs2beKKK65g5MiRNR4T+sIXvsD9998PwKJFi5g/f36tnnft2rV06tSJiooKJk6cuOMbkEFN8iQJM2v8dvZJP5J46KGHGD16NGPHjmXz5s2cdNJJ/PjHP2bSpEnVLjd69GhGjBhBz5496dGjB7169aJ9+/YFP+/YsWM57LDD6NixI4cddhhr166ti83JBCXXam04ysrK4l+9YaHP4svPZ/FZQ7Z48WIOPPDA+i6j1jZt2kRFRQUlJSUsW7aM448/niVLlmzz26nGIt/fSdLciCirOq9bUGZm9WjdunUcd9xxVFRUEBHcfvvtjTacassBZWZWj9q1a8e/2ivUWPkkCTMzyyQHlJmZZZIDyszMMskBZWZmmeSTJMyscRpT+G+JClvfRwXNNnXqVE4//XQWL15Mjx496raGAhR6K49CTJ06lW7dutGzZ89aLde2bVvKy8v/5ed3C8rMrA5NmjSJo446qsYf59ZG5SWNClXorTwKMXXqVBYtWlQn69oRDigzszpSXl7O008/zZ133sl9990HwIwZMzjmmGM4+eST6d69OxdddBGbN28GkpbGt771LXr16sXAgQNZtWoVAAMGDODyyy+nrKyMm2++mccff5xDDjmE3r1787WvfY1PPvmE2bNnc/DBB7N+/Xo+/vhjevXqxYIFC1i+fDkHHXQQABMmTOC0007jhBNOoGvXrtx66638/Oc/55BDDuHwww/n/fffB2D8+PEceuih9OnThzPPPJN169Yxc+ZMpk2bxne+8x369u3LsmXLWLZsGYMGDaJfv34cffTRvPzyywC8/vrrHHHEEfTu3ZurrrqqzvanA8rMrI48/PDDDBo0iG7durH77rtvuXjsrFmzuOWWW1i0aBHLli3jwQcfBODjjz+mrKyMhQsXcuyxx3LNNddsWdeGDRuYM2cOl1xyCSNHjmTy5MnMnz+fjRs38qtf/YpDDz2UwYMHc9VVV/Hd736X4cOHbwmmXAsWLODBBx9k9uzZXHnllbRp04YXXniBI444Yst1A/PdsuPII49k8ODB3HDDDcybN4/99tuPUaNGccsttzB37lxuvPFGRo8eDcA3v/lNLr74YubPn0+nTp3qbH86oMzM6sikSZO23Dpj6NChW7r5+vfvz7777kvz5s0ZNmwYTz/9NADNmjXjK1/5CgDDhw/fMh7YMn7JkiWUlpbSrVs3AEaMGMFTTz0FwNVXX82f/vQn5syZw3e/+928NR133HG0a9eOjh070r59e0499VQAevfuXatbdpSXlzNz5kzOOuss+vbty4UXXsg777wDwDPPPMOwYcMAOOecc3Zw723LJ0mYmdWB999/nyeeeIL58+cjiU2bNiGJk08+GUlbzVt1ON/4Qm7PsXr1asrLy6moqGD9+vV5l8m9TUezZs22DDdr1myrW3ZMnTqVPn36MGHCBGbMmLHNejZv3kyHDh2YN2/edmuvK25BmZnVgSlTpnDOOefwxhtvsHz5clasWEFpaSl/+ctfmDVrFq+//jqbN29m8uTJHHXUUUDyoV95QsO99967ZXyu7t27s3z5cpYuXQrAPffcw7HHHgvAhRdeyNixY/nqV7/K9773vR2uvbpbdrRr127L1dF32203SktLeeCBBwCICF588UUguWVI5TG3urzlh1tQZtY4FXhaeF2ZNGnSNiFx5plnbjledOmll7J06VKOO+44Tj/9dCBpJc2aNYvrrruOz3zmM0yePHmb9ZaUlPDb3/6Ws846i40bN3LooYdy0UUXcffdd9OyZUvOPvtsNm3axJFHHskTTzzBvvvuW+vaq7tlx9ChQ7ngggv45S9/yZQpU5g4cSIXX3wx1113HRUVFQwdOpQ+ffpw8803c/bZZ3P99dczZMiQHdh7+fl2G7aFb7dh0HDfH+MHd+Kze9f+w7lQB3fpsEPLzZgxgxtvvJFHHnlkm2l19XuhhqQ2t9twF5+ZmWWSu/jMzIpowIABDBgwIO+0ptZ6qi23oMysUQiChnbIoqmp7d/HAWVmjcIbH1awcd0ah1RGRQSrV6+mpKSk4GXcxWdmjcItf/2Ay4B9OryHqPvf5Cxe27rO19nUlJSU0KVLl4Lnd0CZWaOw5pPN/Oip1UVbv89y3fncxWdmZpnkgDIzs0xyQJmZWSY5oMzMLJMcUGZmlkkOKDMzyyQHlJmZZZIDyszMMskBZWZmmVTUgJI0SNISSUslfT/P9L0lPSnpBUkvSTqpmPWYmVnDUbSAktQcuA04EegJDJPUs8psVwH3R8QhwFDg9mLVY2ZmDUsxW1D9gaUR8VpEbADuA6reCziA3dLH7YG3i1iPmZk1IMUMqM7Aipzhlem4XGOA4ZJWAtOBy/KtSNIoSXMkzVm1alUxajUzs4yp75MkhgETIqILcBJwj6RtaoqIcRFRFhFlHTt23OlFmpnZzlfMgHoL2CtnuEs6Ltf5wP0AEfEsUALsUcSazMysgShmQM0GDpBUKqkVyUkQ06rM8yYwEEDSgSQB5T48MzMrXkBFxEbgUuAxYDHJ2XoLJV0raXA62xXABZJeBCYBI8P3azYzM4p8R92ImE5y8kPuuKtzHi8CvlDMGszMrGGq75MkzMzM8nJAmZlZJjmgzMwskxxQZmaWSQ4oMzPLJAeUmZllkgPKzMwyyQFlZmaZ5IAyM7NMckCZmVkmOaDMzCyTHFBmZpZJDigzM8skB5SZmWWSA8rMzDLJAWVmZpnkgDIzs0xyQJmZWSY5oMzMLJMcUGZmlkkOKDMzyyQHlJmZZZIDyszMMskBZWZmmeSAMjOzTHJAmZlZJjmgzMwskxxQZmaWSQ4oMzPLJAeUmZllkgPKzMwyyQFlZmaZ5IAyM7NMckCZmVkmOaDMzCyTHFBmZpZJRQ0oSYMkLZG0VNL3q5nny5IWSVoo6d5i1mNmZg1Hi2KtWFJz4DbgBGAlMFvStIhYlDPPAcB/Al+IiA8kfaZY9ZiZWcNSzBZUf2BpRLwWERuA+4AhVea5ALgtIj4AiIi/F7EeMzNrQLYbUJJOlbQjQdYZWJEzvDIdl6sb0E3SM5KekzSomhpGSZojac6qVat2oBQzM2toCgmerwCvSvqZpB51/PwtgAOAAcAwYLykDlVniohxEVEWEWUdO3as4xLMzCyLthtQETEcOARYBkyQ9Gzaomm3nUXfAvbKGe6Sjsu1EpgWERUR8TrwCklgmZlZE1dQ111ErAGmkBxH6gScDjwv6bIaFpsNHCCpVFIrYCgwrco8U0laT0jag6TL77XabICZmTVOhRyDGizpIWAG0BLoHxEnAn2AK6pbLiI2ApcCjwGLgfsjYqGkayUNTmd7DFgtaRHwJPCdiFj9r2yQmZk1DoWcZn4mcFNEPJU7MiLWSTq/pgUjYjowvcq4q3MeB/Af6T8zM7MtCgmoMcA7lQOSWgOfjYjlEfF4sQozM7OmrZBjUA8Am3OGN6XjzMzMiqaQgGqR/tAWgPRxq+KVZGZmVlhArco5qQFJQ4D3ileSmZlZYcegLgImSroVEMnVIc4talVmZtbkbTegImIZcLiktulwedGrMjOzJq+gq5lLOhnoBZRIAiAiri1iXWZm1sQV8kPdO0iux3cZSRffWcA+Ra7LzMyauEJOkjgyIs4FPoiIa4AjSC5JZGZmVjSFBNT69P91kvYEKkiux2dmZlY0hRyD+n/pLTBuAJ4HAhhf1KrMzKzJqzGg0hsVPh4RHwK/l/QIUBIRH+2U6szMrMmqsYsvIjYDt+UMf+JwMjOznaGQY1CPSzpTleeXm5mZ7QSFBNSFJBeH/UTSGklrJa0pcl1mZtbEFXIlie3d2t3MzKzObTegJB2Tb3zVGxiamZnVpUJOM/9OzuMSoD8wF/hiUSoyMzOjsC6+U3OHJe0F/KJoFZmZmVHYSRJVrQQOrOtCzMzMchVyDOoWkqtHQBJofUmuKGFmZlY0hRyDmpPzeCMwKSKeKVI9ZmZmQGEBNQVYHxGbACQ1l9QmItYVtzQzM2vKCrqSBNA6Z7g18H/FKcfMzCxRSECV5N7mPX3cpnglmZmZFRZQH0v6fOWApH7AP4pXkpmZWWHHoC4HHpD0Nskt3z9Hcgt4MzOzoinkh7qzJfUAuqejlkRERXHLMjOzpm67XXySLgF2jYgFEbEAaCtpdPFLMzOzpqyQY1AXpHfUBSAiPgAuKF5JZmZmhQVU89ybFUpqDrQqXklmZmaFnSTxR2CypF+nwxcCfyheSWZmZoUF1PeAUcBF6fBLJGfymZmZFc12u/giYjPwV2A5yb2gvggsLm5ZZmbW1FXbgpLUDRiW/nsPmAwQEcftnNLMzKwpq6mL72XgL8ApEbEUQNK3dkpVZmbW5NXUxXcG8A7wpKTxkgaSXEnCzMys6KoNqIiYGhFDgR7AkySXPPqMpF9J+rdCVi5pkKQlkpZK+n4N850pKSSV1XYDzMyscSrkJImPI+LeiDgV6AK8QHJmX43S30vdBpwI9ASGSeqZZ752wDdJTsQwMzMDCvuh7hYR8UFEjIuIgQXM3h9YGhGvRcQG4D5gSJ75xgLXA+trU4uZmTVutQqoWuoMrMgZXpmO2yK9jcdeEfFoTSuSNErSHElzVq1aVfeVmplZ5hQzoGokqRnwc+CK7c2bttrKIqKsY8eOxS/OzMzqXTED6i1gr5zhLum4Su2Ag4AZkpYDhwPTfKKEmZlBcQNqNnCApFJJrYChwLTKiRHxUUTsERFdI6Ir8BwwOCLmFLEmMzNrIIoWUBGxEbgUeIzk0kj3R8RCSddKGlys5zUzs8ahkIvF7rCImA5MrzLu6mrmHVDMWszMrGGpt5MkzMzMauKAMjOzTHJAmZlZJjmgzMwskxxQZmaWSQ4oMzPLJAeUmZllkgPKzMwyyQFlZmaZ5IAyM7NMckCZmVkmOaDMzCyTHFBmZpZJDigzM8skB5SZmWWSA8rMzDLJAWVmZpnkgDIzs0xyQJmZWSY5oMzMLJMcUGZmlkkOKDMzyyQHlJmZZZIDyszMMskBZWZmmeSAMjOzTHJAmZlZJjmgzMwskxxQZmaWSQ4oMzPLJAeUmZllkgPKzMwyyQFlZmaZ5IAyM7NMckCZmVkmOaDMzCyTihpQkgZJWiJpqaTv55n+H5IWSXpJ0uOS9ilmPWZm1nAULaAkNQduA04EegLDJPWsMtsLQFlEHAxMAX5WrHrMzKxhKWYLqj+wNCJei4gNwH3AkNwZIuLJiFiXDj4HdCliPWZm1oAUM6A6Aytyhlem46pzPvCHfBMkjZI0R9KcVatW1WGJZmaWVZk4SULScKAMuCHf9IgYFxFlEVHWsWPHnVucmZnVixZFXPdbwF45w13ScVuRdDxwJXBsRHxSxHrMzKwBKWYLajZwgKRSSa2AocC03BkkHQL8GhgcEX8vYi1mZtbAFC2gImIjcCnwGLAYuD8iFkq6VtLgdLYbgLbAA5LmSZpWzerMzKyJKWYXHxExHZheZdzVOY+PL+bzm5lZw5WJkyTMzMyqckCZmVkmOaDMzCyTHFBmZpZJDigzM8skB5SZmWWSA8rMzDLJAWVmZpnkgDIzs0xyQJmZWSY5oMzMLJMcUGZmlkkOKDMzyyQHlJmZZZIDyszMMskBZWZmmVTUGxZaAzOmfX1XkE1jPqrvCsyaJAeUmVkh/AUuvyJ+gXMXn5mZZZIDyszMMskBZWZmmeSAMjOzTHJAmZlZJjmgzMwskxxQZmaWSQ4oMzPLJAeUmZllkgPKzMwyyQFlZmaZ5IAyM7NMckCZmVkmOaDMzCyTHFBmZpZJDigzM8skB5SZmWWSA8rMzDLJAWVmZplU1ICSNEjSEklLJX0/z/RdJE1Op/9VUtdi1mNmZg1H0QJKUnPgNuBEoCcwTFLPKrOdD3wQEfsDNwHXF6seMzNrWIrZguoPLI2I1yJiA3AfMKTKPEOAu9LHU4CBklTEmszMrIFoUcR1dwZW5AyvBA6rbp6I2CjpI2B34L3cmSSNAkalg+WSlhSl4iZOsAdV9r0B1/g7k/n9Ua26eX/sk29kMQOqzkTEOGBcfdfR2EmaExFl9V2HWRb5/bHzFbOL7y1gr5zhLum4vPNIagG0B1YXsSYzM2sgihlQs4EDJJVKagUMBaZVmWcaMCJ9/CXgiYiIItZkZmYNRNG6+NJjSpcCjwHNgd9ExEJJ1wJzImIacCdwj6SlwPskIWb1x92oZtXz+2MnkxssZmaWRb6ShJmZZZIDyszMMskBZXlJmrmd6T/YWbWY/askfUPSYkkTa7FMV0kLqpl2raTj665Cy8fHoGyHSCqPiLb1XYdZISS9DBwfEStrsUxX4JGIOKhYdVnN3IJqpNJvfy9LmiDpFUkTJR0v6RlJr0rqL2mMpN9ImiHpNUnfyFm+PP2/k6SnJM2TtEDS0ZJ+CrROxxX8jdSsPki6A9gX+IOk70l6VtILkmZK6p7O00vSrPQ1/ZKkA9LFm0saL2mhpP+V1Dqdf4KkL6WPB6brm5++nztHyE0AAAIASURBVHZJxy+XdI2k59NpPeph8xs0t6AaqfTb31LgEGAhye/SXiS5QO9g4DxgHvBvwHFAO2AJ8LmIqKhsIUm6AiiJiB+lFwBuExFr3YKyhkTScqAM2ACsS38GczxwcUScKekW4LmImJj+brM58FmS91BZRMyTdD8wLSJ+J2kC8Ej671VgYES8Iulu4PmI+EX6nP8dEbdIGg18PiK+vnO3vGFzC6pxez0i5kfEZpKQejz9IfR8oGs6z6MR8UlEvAf8neRNmWs2cJ6kMUDviFi7c0o3K4r2wAPpsaWbgF7p+GeBH0j6HrBPRPwjHf96RMxLH8/ln++bSt3TeV5Jh+8CjsmZ/mANy9p2OKAat09yHm/OGd7MP3+knTvPJqr8eDsiniJ5w70FTJB0bnFKNdspxgJPpseVTgVKACLiXpKehX8A0yV9MZ2/xvdHASqX35FlmzwHlNVI0j7A3yJiPPA/wOfTSRWSWtZfZWY7pD3/vCboyMqRkvYFXouIXwIPAwcXuL4lQFdJ+6fD5wB/rptSzQFl2zMAeFHSC8BXgJvT8eOAl3yShDUwPwN+kr6ec1s0XwYWSJoHHATcXcjKImI9yfHcByTNJ+mduKNuS266fJKEmZllkltQZmaWSQ4oMzPLJAeUmZllkgPKzMwyyQFlZmaZ5IAyM7NMckCZmVkm/X/bvruXdSNHFQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4YXbwTychb0Q", + "outputId": "4fe386b3-d9e0-4702-c77a-fa7ffb8e6ebf" + }, + "source": [ + "# Use the GitHub version of TFCO\n", + "!pip install git+https://github.com/google-research/tensorflow_constrained_optimization\n", + "import tensorflow_constrained_optimization as tfco" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Collecting git+https://github.com/google-research/tensorflow_constrained_optimization\n", + " Cloning https://github.com/google-research/tensorflow_constrained_optimization to /tmp/pip-req-build-na3snryk\n", + " Running command git clone -q https://github.com/google-research/tensorflow_constrained_optimization /tmp/pip-req-build-na3snryk\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from tfco-nightly==0.3.dev20210712) (1.19.5)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from tfco-nightly==0.3.dev20210712) (1.4.1)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from tfco-nightly==0.3.dev20210712) (1.15.0)\n", + "Requirement already satisfied: tensorflow>=1.14 in /usr/local/lib/python3.7/dist-packages (from tfco-nightly==0.3.dev20210712) (2.5.0)\n", + "Requirement already satisfied: termcolor~=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.1.0)\n", + "Requirement already satisfied: astunparse~=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.6.3)\n", + "Requirement already satisfied: h5py~=3.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.1.0)\n", + "Requirement already satisfied: keras-nightly~=2.5.0.dev in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (2.5.0.dev2021032900)\n", + "Requirement already satisfied: tensorboard~=2.5 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (2.5.0)\n", + "Requirement already satisfied: absl-py~=0.10 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.12.0)\n", + "Requirement already satisfied: typing-extensions~=3.7.4 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.7.4.3)\n", + "Requirement already satisfied: gast==0.4.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.4.0)\n", + "Requirement already satisfied: grpcio~=1.34.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.34.1)\n", + "Requirement already satisfied: tensorflow-estimator<2.6.0,>=2.5.0rc0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (2.5.0)\n", + "Requirement already satisfied: wrapt~=1.12.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.12.1)\n", + "Requirement already satisfied: opt-einsum~=3.3.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.3.0)\n", + "Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.17.3)\n", + "Requirement already satisfied: google-pasta~=0.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.2.0)\n", + "Requirement already satisfied: keras-preprocessing~=1.1.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.1.2)\n", + "Requirement already satisfied: flatbuffers~=1.12.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.12)\n", + "Requirement already satisfied: wheel~=0.35 in /usr/local/lib/python3.7/dist-packages (from tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.36.2)\n", + "Requirement already satisfied: cached-property; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from h5py~=3.1.0->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.5.2)\n", + "Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (57.0.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.3.4)\n", + "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.4.4)\n", + "Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.32.1)\n", + "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.0.1)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (2.23.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.6.1)\n", + "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.8.0)\n", + "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (4.6.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.3.0)\n", + "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (4.2.2)\n", + "Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.6\" in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (4.7.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.2.8)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (2021.5.30)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (2.10)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.0.4)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.4.1)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (3.1.1)\n", + "Requirement already satisfied: pyasn1>=0.1.3 in /usr/local/lib/python3.7/dist-packages (from rsa<5,>=3.1.4; python_version >= \"3.6\"->google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow>=1.14->tfco-nightly==0.3.dev20210712) (0.4.8)\n", + "Building wheels for collected packages: tfco-nightly\n", + " Building wheel for tfco-nightly (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for tfco-nightly: filename=tfco_nightly-0.3.dev20210712-cp37-none-any.whl size=199274 sha256=0cf5a984216fd6779870302892eaf9d9d8eab8ee0dc4a74eda6e2fbf9af47c41\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-8lu3v5rh/wheels/c9/b3/c3/78e0691949466af462380554286105216cd95a9ae7cf08ee78\n", + "Successfully built tfco-nightly\n", + "Installing collected packages: tfco-nightly\n", + "Successfully installed tfco-nightly-0.3.dev20210712\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b9E3y1d01Hle", + "outputId": "3fb9f79c-60d2-44a1-ea4c-9bc7d2497548" + }, + "source": [ + "m, n = w1.shape\n", + "p = 2\n", + "u = tf.Variable([[np.random.rand()] * n], trainable=True)\n", + "v = tf.Variable([[np.random.rand()] * m], trainable=True)\n", + "s = tf.Variable([np.random.rand()] * p, trainable=True)\n", + "w = np.array([w1, w2])\n", + "\n", + "def loss_fn():\n", + " uv = u * tf.transpose(v)\n", + " s_tmp = tf.expand_dims(tf.expand_dims(s, axis=1), axis=2)\n", + " approx = tf.reduce_sum(w - s_tmp * tf.stack([uv] * p), axis=0)\n", + " for i in range(p):\n", + " A_tmp = tf.transpose(v) * w[i].T * u\n", + " if i == 0:\n", + " A = A_tmp * A_tmp\n", + " else:\n", + " A = A + (A_tmp * A_tmp)\n", + " return A - tf.norm(approx, ord='fro', axis=[-2, -1])\n", + "\n", + "class SampleProblem(tfco.ConstrainedMinimizationProblem):\n", + " def __init__(self, loss_fn, weights):\n", + " self._loss_fn = loss_fn\n", + " self._weights = weights\n", + " \n", + " @property\n", + " def num_constraints(self):\n", + " return 2\n", + " \n", + " def objective(self):\n", + " return loss_fn()\n", + " \n", + " def constraints(self):\n", + " u, s, v = self._weights\n", + " u_norm = tf.linalg.norm(u) # tf.math.reduce_euclidean_norm(u)\n", + " v_norm = tf.linalg.norm(v) # tf.math.reduce_euclidean_norm(v)\n", + " u_norm_eq_one = 1 - u_norm\n", + " v_norm_eq_one = 1 - v_norm\n", + " constraints = tf.stack([u_norm_eq_one, v_norm_eq_one])\n", + " # A = (u * tf.transpose(v)).numpy()\n", + " # for i in range(p):\n", + " # rank_eq_one = 1. - tf.Variable(tf.cast(tf.rank(s[i] * A), tf.float32))\n", + " # constraints = tf.stack([constraints, rank_eq_one])\n", + " return constraints\n", + "\n", + "problem = SampleProblem(loss_fn, [u, s, v])\n", + "optimizer = tfco.LagrangianOptimizer(\n", + " optimizer=tf.optimizers.Adagrad(learning_rate=0.1),\n", + " num_constraints=problem.num_constraints)\n", + "\n", + "var_list = [u, s, v] + list(problem.trainable_variables) + optimizer.trainable_variables()\n", + "\n", + "for i in range(1000):\n", + " optimizer.minimize(problem, var_list=var_list)\n", + " if i % 100 == 0:\n", + " print(f'step = {i}')\n", + " print(f'loss = {loss_fn()}')\n", + " # print(f'constraint = {(x + y).numpy()}')\n", + " # print(f'u = {u.numpy()}, v = {v.numpy()}')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "step = 0\n", + "loss = -662.9971313476562\n", + "step = 100\n", + "loss = -79729.0703125\n", + "step = 200\n", + "loss = -217770.546875\n", + "step = 300\n", + "loss = -395476.71875\n", + "step = 400\n", + "loss = -605416.375\n", + "step = 500\n", + "loss = -843243.875\n", + "step = 600\n", + "loss = -1106010.625\n", + "step = 700\n", + "loss = -1391545.0\n", + "step = 800\n", + "loss = -1698159.125\n", + "step = 900\n", + "loss = -2024494.0\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 483 + }, + "id": "gBBmYLTn2iCD", + "outputId": "7a717cba-5775-4e5e-a276-9d1144d1b5ad" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "313/313 - 0s - loss: 0.3365 - accuracy: 0.8820\n", + "MNIST - Test accuracy: 0.9768999814987183\n", + "Fashion MNIST - Test accuracy: 0.8820000290870667\n", + "\n", + "313/313 - 0s - loss: 5028111.0000 - accuracy: 0.1000\n", + "MNIST - Test accuracy: 0.10279999673366547\n", + "Fashion MNIST - Test accuracy: 0.10000000149011612\n", + "\n", + "MNIST - Accuracy drop: 0.8740999847650528\n", + "Fashion MNIST - Accuracy drop: 0.7820000275969505\n" + ], + "name": "stdout" + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [], + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "GY92NgoE2zrR" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/python/svd_approximation.py b/python/svd_approximation.py new file mode 100644 index 0000000..b9a2686 --- /dev/null +++ b/python/svd_approximation.py @@ -0,0 +1,1484 @@ +# -*- coding: utf-8 -*- +"""SVD Approximation + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1qyZtwulkRmO9qBgpNxnUpckRzIg9Q-tu + +# SVD Approximation +""" + +# Commented out IPython magic to ensure Python compatibility. +import tensorflow as tf +import matplotlib.pyplot as plt +import numpy as np +from PIL import Image, ImageOps +from google.colab import drive +import re +from datetime import datetime +import os + +# Load the TensorBoard notebook extension +# %load_ext tensorboard +# %tensorflow_version 2.x +from tensorboard.plugins.hparams import api as hp +# Clear any logs from previous tensorboard runs +!rm -rf /tmp/mylogs +writer = tf.summary.create_file_writer('/tmp/mylogs') + +drive.mount('/gdrive', force_remount=True) + +"""## Algorithm 1 - Stardard SVD Decomposition""" + +def svd1(x_in, Verbose=False, correction_factor=1e-12): + """ + @brief Short-hand for applying SVD and dealing with non-converging SVD. + + @param x input matrix + + @return the U, s, V first components + """ + x = x_in.copy() + m, n = x.shape + not_converged = True + max_tries = 1000 + i = 0 + while not_converged: + try: + u, s, v = np.linalg.svd(x, full_matrices=False) + except np.linalg.linalg.LinAlgError as e: + # ================================================================== + # If SVD didn't converge, add a small error to the matrix and + # repeat, otherwise the heuristic will always generate zero + # components. + # ================================================================== + if Verbose: + print("[WARNING] Exception caught: {}". format(e)) + if np.isnan(x).any(): + print("[WARNING] The provided matrix contains NaNs.") + if np.isinf(x).any(): + print("[WARNING] The provided matrix contains Infs.") + u = np.zeros((m, m)) + s = np.zeros((n,)) + v = np.zeros((n, n)) + x -= correction_factor + if np.any(s): # not all zeros + not_converged = False + # If still not converging and max tries reached, return svd components + # that are close to zero. + if not_converged and i == max_tries: + not_converge = False + u = np.zeros((m, m)) + s = np.zeros((n,)) + v = np.zeros((n, n)) + u += 1e-15 + s += 1e-15 + v += 1e-15 + i += 1 + return u.T[0], s[0], v[0] + +x = np.random.randn(32, 16) +svd1(x) + +"""## Algorithm 2 - Joshua Paper + +Decompose `k` matrices into `r` sub-matrices. +""" + +def mse(a, b): + return ((a - b)**2).mean() + +def avg_abs_diff(a, b): + return np.absolute(a - b).mean() + +def frobenius_norm(a, b): + return np.sqrt(np.absolute(np.linalg.norm(a - b))) + +def frobenius(x): + return (x**2).sum() + +def check_threshold(U, U_step, V, V_step, stop_threshold=0.00005, + config='mse'): + """ + @brief Determines the threshold for stopping vectors refinements + + @param U [] + @param U_step [] + @param V [] + @param V_step [] + + @return whether to stop refinement process + """ + if config == 'mse': + diff_u = mse(U, U_step) + diff_v = mse(V, V_step) + elif config == 'avg': + diff_u = avg_abs_diff(U, U_step) + diff_v = avg_abs_diff(V, V_step) + elif config == 'frobenius-norm': + diff_u = frobenius_norm(U, U_step) + diff_v = frobenius_norm(V, V_step) + elif config == 'norm': + diff_u = np.linalg.norm(U - U_step) + diff_v = np.linalg.norm(V - V_step) + if diff_v < stop_threshold and diff_u < stop_threshold: + return True, diff_u, diff_v + else: + return False, diff_u, diff_v + +def get_vec_from_largest_eig(x): + w, vr = np.linalg.eigh(x) + return vr[:, np.argmax(w)] + +def update_F(F, U, V): + FT = np.transpose(F, axes=(0, 2, 1)) + S = V.T @ FT @ U + A = np.outer(U, V) + F_tmp = np.zeros(F.shape) + for i in range(F.shape[0]): + F_tmp[i] = F[i] - S[i] * A + return F_tmp, S + +def algorithm2(F_in, r=1, stop_threshold=0.0001, hard_stop=100, + decomposition='eigen', truncate=False, metric='mse', + report_writer=None, report_step=0, scaler=None, Verbose=False): + """ + @brief Algorithm from "Synthesis and Optimization + of 2D Filter Designs for Heterogeneous FPGAs" + + @param F_in List or array of matrixes to approximate + (they must have same shape) + @param r The number of sub-matrices + @param Verbose Verbose + + @return the eigenvalues lambda_ij and vectors u and v + """ + if isinstance(F_in, (list,)): + k = len(F_in) + m, n = F_in[0].shape + F = np.zeros((k, m, n)) + for i in range(k): + F[i] = F_in[i].copy() + else: + k, m, n = F_in.shape + F = np.array(F_in, copy=True) + # hard_stop = 100 + # stop_threshold = 0.0001 # F.std() + if truncate: + tot_elem = k * (m * n) + appr_elem = k * (m + n + r) + if appr_elem >= tot_elem: + print('[WARNING] The r value ({}) is too high and will be truncated.'.format(r)) + while appr_elem >= tot_elem: + r -= 1 + appr_elem = k * (m + n + r) + print('[WARNING] r set to {}.'. format(r)) + if scaler is None: + scaler = np.ones((k)) + u_array = np.zeros((r, m)) + s_array = np.zeros((r, k)) + v_array = np.zeros((r, n)) + # NOTE: Given a matrix A, we have that: A @ A.T is symmetrical! Meaning + # that, for the spectral theorem, A has real eigenvalues! + for j in range(r): + # Form the (m x m) matrix Fn: sum[0,k-1](F[i] @ F[i].T) + Fn = np.zeros((m, m)) + for i in range(k): + Fn += scaler[i] * F[i] @ F[i].T # SQUARED + # Calculate the eigenvector u that corresponds to the largest eigenvalue + if decomposition == 'eigen': + u = get_vec_from_largest_eig(Fn) + else: + u, _, _ = svd1(Fn) + # Form the (n x k) matrix: Fb = [F[0].T @ u, F[1].T @ u, ..., F[k-1].T @ u] + Fb = np.zeros((n, k)) + for i in range(k): + Fb[:,i] = scaler[i] * F[i].T @ u + # Calculate the eigenvector v that corresponds to the largest + # eigenvalue of the (n x n) matrix: Fb @ Fb.T + Fb = Fb @ Fb.T + if decomposition == 'eigen': + v = get_vec_from_largest_eig(Fb) + else: + _, _, v = svd1(Fb) + U = u + V = v + for t in range(hard_stop): + # Form the (k x m) matrix Fb = [F[0] @ v, F[1] @ v, ..., F[k-1] @ v].T + FbT = np.zeros((m, k)) + for i in range(k): + FbT[:, i] = scaler[i] * F[i] @ v + Fb = FbT.T + # Calculate the eigenvector u that corresponds to the largest + # eigenvalue of the (m x m) matrix + Fb = FbT @ Fb + if decomposition == 'eigen': + u = get_vec_from_largest_eig(Fb) + else: + u, _, _ = svd1(Fb) + # Form the (n x k) matrix Fb = [F[0].T @ u, F[1].T @ u, ..., F[k-1].T @ u].T + FbT = np.zeros((k, n)) + for i in range(k): + FbT[i] = scaler[i] * F[i].T @ u + Fb = FbT.T + # Calculate the eigenvector v that corresponds to the largest + # eigenvalue of the (n x n) matrix + Fb = Fb @ FbT + if decomposition == 'eigen': + v = get_vec_from_largest_eig(Fb) + else: + _, _, v = svd1(Fb) + # Until u and v vectors change less than a pre-specified + # value that is set by the user + stop_refinement, diff_u, diff_v = check_threshold(u, U, v, V, stop_threshold, metric) + if report_writer is not None: + with report_writer.as_default(): + F_tmp, _ = update_F(F, U, V) + for p, (f_in, f_tmp) in enumerate(zip(F_in, F_tmp)): + tf.summary.scalar(f'Alg2 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), report_step, description=f'Algorithm2 MSE(x, x_approx) of the {p}-th sub-matrix.') + tf.summary.scalar(f'Alg2 - Mean {p}-th sub-matrix',f_tmp.mean(), report_step, description=f'Algorithm2 Mean(x_approx) of the {p}-th sub-matrix.') + report_writer.flush() + # tf.summary.scalar('Alg2 - MSE refining', mse(F_in, F_tmp), report_step, description='Algorithm2 MSE(F, F_approx) at refining steps.') + # tf.summary.scalar('Alg2 - diff(U)', diff_u, report_step, description='Algorithm2 MSE(u(i), u(i-1)) at refining steps.') + # tf.summary.scalar('Alg2 - diff(V)', diff_v, report_step, description='Algorithm2 MSE(v(i), v(i-1)) at refining steps.') + # NOTE: The u and v norms are always equal to 1. + # tf.summary.scalar('Alg2 - norm(U)', np.linalg.norm(u), report_step, description='Algorithm2 norm(u(i)) at refining steps.') + # tf.summary.scalar('Alg2 - norm(V)', np.linalg.norm(v), report_step, description='Algorithm2 norm(v(i)) at refining steps.') + # tf.summary.scalar('Alg2 - ', np.dot(U, u), report_step, description='Algorithm2 : inner-product (orthogonal if 0).') + # tf.summary.scalar('Alg2 - ', np.dot(V, v), report_step, description='Algorithm2 : inner-product (orthogonal if 0).') + report_writer.flush() + report_step += 1 + if stop_refinement: + V, U = v, u + break + V = v #* (1 + 1e-12) + U = u #* (1 + 1e-12) + if report_writer is not None and r > 1: + with report_writer.as_default(): + F_tmp, _ = update_F(F, U, V) + for p, (f_in, f_tmp) in enumerate(zip(F_in, F_tmp)): + tf.summary.scalar(f'Alg2 - Final MSE {p}-th sub-matrix', mse(f_in, f_tmp), report_step, description=f'Algorithm2 MSE(x, x_approx) of the {p}-th sub-matrix.') + report_writer.flush() + report_step += 1 + # F, S = update_F(F, U, V) + FT = np.transpose(F, axes=(0, 2, 1)) + S = V.T @ FT @ U + A = np.outer(U, V) + for i in range(k): + F[i] = F[i] - S[i] * A + u_array[j] = U + s_array[j] = S + v_array[j] = V + return u_array, s_array, v_array + +x = np.random.rand(3, 1024, 512) + +# Commented out IPython magic to ensure Python compatibility. +# %time u_eig, s_eig, v_eig = algorithm2(x, decomposition='eigen', stop_threshold=1e-5) + +# Commented out IPython magic to ensure Python compatibility. +# %time u_svd, s_svd, v_svd = algorithm2(x, decomposition='svd', stop_threshold=1e-5) + +print(mse(u_eig, u_svd)) +print(mse(s_eig, s_svd)) +print(mse(v_eig, v_svd)) + +"""Get approximation error.""" + +def algorithm2_inverse(U, S, V, F=None): + F_tmp = np.einsum('rk, rm, rn->kmn', S, U, V) + if F is None: + F = F_tmp + elif type(F) == list: + k = S.shape[1] + for i in range(k): + F.append(F_tmp[i]) + return F_tmp + +x_svd = algorithm2_inverse(u_svd, s_svd, v_svd) +x_eig = algorithm2_inverse(u_eig, s_eig, v_eig) +print(mse(x, x_svd)) +print(mse(x, x_eig)) + +"""## Algortihm 3 - SVD and Refinement Steps""" + +def algorithm3(x_in, num_refinements=1, num_sub_matrix=1, truncate=False, + stop_threshold=0.0001, hard_stop=100, decomposition='eigen', + metric='mse', scaler=None, report_writer=None, + apply_scaling_at_alg2=True, plotdata=None): + if type(x_in) == list: + x = np.concatenate([a[np.newaxis,:] for a in x_in], axis=0) + else: + x = x_in + k, m, n = x.shape + if truncate: + tot_elem = 2 * (m * n) + appr_elem = num_refinements * (m + n + x.shape[0]) + if appr_elem >= tot_elem: + print(f'[WARNING] The num_refinements value ({num_refinements}) is too high and will be truncated.') + while appr_elem >= tot_elem: + num_refinements -= 1 + appr_elem = num_refinements * (m + n + x.shape[0]) + print(f'[WARNING] num_refinements set to {num_refinements}.') + u = np.zeros((num_refinements, num_sub_matrix, m)) + s = np.zeros((num_refinements, num_sub_matrix, k)) + v = np.zeros((num_refinements, num_sub_matrix, n)) + report_steps = 0 + if apply_scaling_at_alg2: + # ====================================================================== + # Apply scaling at Alogirthm 2 (Default) + # ====================================================================== + x_approx = np.zeros(x.shape) + for i in range(num_refinements): + u[i], s[i], v[i] = algorithm2(x - x_approx, num_sub_matrix, + stop_threshold, hard_stop, decomposition, + truncate, metric, report_writer, + report_steps, scaler) + x_approx += algorithm2_inverse(u[i], s[i], v[i]) + if report_writer is not None: + with report_writer.as_default(): + tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).') + for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)): + tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.') + tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.') + tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.') + report_writer.flush() + if plotdata is not None: + if not plotdata: # If empty, initialize it + for p in range(k): + plotdata[f'MSE(matrix[{p}])'] = [] + for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)): + plotdata[f'MSE(matrix[{p}])'].append(mse(f_in, f_tmp)) + else: + # ====================================================================== + # Apply scaling at Alogirthm 3 (To be checked) + # ====================================================================== + x_approx = np.zeros(x.shape) + x_scaled = x.copy() + error = x - x_approx + if scaler is None: + scaler = [1.] * k + for j in range(k): + x_scaled[j] *= scaler[j] + error[j] = x_scaled[j] + for i in range(num_refinements): + u[i], s[i], v[i] = algorithm2(error, num_sub_matrix, + stop_threshold, hard_stop, decomposition, + truncate, metric, report_writer, + report_steps, scaler=None) + for j in range(k): + s[j] /= scaler[j] + x_approx += algorithm2_inverse(u[i], s[i], v[i]) + # for j in range(k): + # x_approx[j] *= scaler[j] + error = x_scaled - x_approx + # for j in range(k): + # error[j] *= scaler[j] + if report_writer is not None: + with report_writer.as_default(): + tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).') + for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)): + tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.') + tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.') + tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.') + report_writer.flush() + if plotdata is not None: + if not plotdata: # If empty, initialize it + for p in range(k): + plotdata[f'MSE(matrix[{p}])'] = [] + for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)): + plotdata[f'MSE(matrix[{p}])'].append(mse(f_in, f_tmp)) + return u, s, v + +def algorithm3_inverse(U, S, V): + """ + @brief Given a list of u, s, v vectors, reconstruct + the two approximated matrixes. + + @param U list of u vectr + @param S list of s vectr + @param V list of v vectr + + @return the reconstructed approximated matrix (concatened) + """ + # NOTE: The shape variables are: + # r := number of refinement steps + # k := number of merged matrixes + # m := "input" dimension of the matrixes + # n := "output" dimension of the matrixes + u, s, v = np.array(U), np.array(S), np.array(V) + F = np.einsum('...rk, ...rm, ...rn', s, u, v) + F = np.einsum('rkmn->kmn', F) + return F + +# Commented out IPython magic to ensure Python compatibility. +x = np.random.rand(3, 1024, 512) +# %time u_eig, s_eig, v_eig = algorithm3(x, 4, decomposition='eigen', stop_threshold=1e-3) + +# Commented out IPython magic to ensure Python compatibility. +# %time u_svd, s_svd, v_svd = algorithm3(x, 4, decomposition='svd', stop_threshold=1e-3) + +x_svd = algorithm3_inverse(u_svd, s_svd, v_svd) +x_eig = algorithm3_inverse(u_eig, s_eig, v_eig) +print(mse(x, x_svd)) +print(mse(x, x_eig)) + +"""### Algorithm 3 - Extra Refinements""" + +def algorithm3_extra_refinements(x_in, u_in, s_in, v_in, num_refinements=1, num_sub_matrix=1, truncate=False, + stop_threshold=0.0001, hard_stop=100, decomposition='eigen', + metric='mse', scaler=None, + report_writer=None): + previous_num_refinements = u_in.shape[0] + assert previous_num_refinements <= num_refinements, f'Previous #Refinements ({previous_num_refinements}) must be less then num_refinements ({num_refinements}).' + if type(x_in) == list: + x = np.concatenate([a[np.newaxis,:] for a in x_in], axis=0) + else: + x = x_in + k, m, n = x.shape + if truncate: + tot_elem = 2 * (m * n) + appr_elem = num_refinements * (m + n + x.shape[0]) + if appr_elem >= tot_elem: + print(f'[WARNING] The num_refinements value ({num_refinements}) is too high and will be truncated.') + while appr_elem >= tot_elem: + num_refinements -= 1 + appr_elem = num_refinements * (m + n + x.shape[0]) + print(f'[WARNING] num_refinements set to {num_refinements}.') + u = np.zeros((num_refinements, num_sub_matrix, m)) + s = np.zeros((num_refinements, num_sub_matrix, k)) + v = np.zeros((num_refinements, num_sub_matrix, n)) + + u[:previous_num_refinements] = u_in + s[:previous_num_refinements] = s_in + v[:previous_num_refinements] = v_in + + report_steps = 0 + # ========================================================================== + # Scaler version + # ========================================================================== + x_approx = algorithm3_inverse(u_in, s_in, v_in) + x_scaled = x.copy() + error = x - x_approx + if scaler is None: + scaler = [1.] * k + for j in range(k): + x_scaled[j] *= scaler[j] + error[j] = x_scaled[j] + for i in range(previous_num_refinements, num_refinements): + u[i], s[i], v[i] = algorithm2(error, num_sub_matrix, + stop_threshold, hard_stop, decomposition, + truncate, metric, report_writer, + report_steps, scaler=None) + for j in range(k): + s[j] /= scaler[j] + x_approx += algorithm2_inverse(u[i], s[i], v[i]) + for j in range(k): + x_approx[j] *= scaler[j] + error = x_scaled - x_approx + # for j in range(k): + # error[j] *= scaler[j] + if report_writer is not None: + with report_writer.as_default(): + tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).') + for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)): + tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.') + tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.') + tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.') + report_writer.flush() + # ========================================================================== + # Original version + # ========================================================================== + # x_approx = np.zeros(x.shape) + # for i in range(num_refinements): + # u[i], s[i], v[i] = algorithm2(x - x_approx, num_sub_matrix, + # stop_threshold, hard_stop, decomposition, + # truncate, metric, report_writer, + # report_steps, scaler) + # x_approx += algorithm2_inverse(u[i], s[i], v[i]) + # if report_writer is not None: + # with report_writer.as_default(): + # tf.summary.scalar('Alg3 - MSE', mse(x, x_approx), i, description='Algorithm3 MSE(x, x_approx).') + # for p, (f_in, f_tmp) in enumerate(zip(x, x_approx)): + # tf.summary.scalar(f'Alg3 - MSE {p}-th sub-matrix', mse(f_in, f_tmp), i, description=f'Algorithm3 MSE(x, x_approx) of the {p}-th sub-matrix.') + # tf.summary.scalar(f'Alg3 - Mean {p}-th sub-matrix', f_tmp.mean(), i, description=f'Algorithm3 Mean(x_approx) of the {p}-th sub-matrix.') + # tf.summary.scalar(f'Alg3 - Mean Original {p}-th sub-matrix', f_in.mean(), i, description=f'Algorithm3 Mean(x) of the {p}-th sub-matrix.') + # report_writer.flush() + return u, s, v + +"""## Setup TensorBoard""" + +# Commented out IPython magic to ensure Python compatibility. +tensorboard_dir = '/gdrive/My Drive/Colab Notebooks/svd/' +tensorboard_dir = re.escape(tensorboard_dir) # to include spaces +# %reload_ext tensorboard +# %rm -rf $tensorboard_dir/tensorboard/* +# %tensorboard --logdir $tensorboard_dir/tensorboard + +"""# Models + +Currently, we have the following designs in place: + +| Model Name | ID | #LSTMs | Input Size(s) | Hidden Size(s)| Test Accuracy | HW Requirements | +|---|---|---|---|---|---|---| +| Dense MNIST | mnist | 0 | 784 | 128 | 98% | | +| Fashion MNIST | fashion | 0 | 784 | 128 | 88% | | +| Fashion MNIST - LSTM | fashion-lstm | 2 | 128 | 256 | 86% | | +| CNN-RNN-UCF101 | cnn-lstm | 2 | 2048 | 256 | 65% | +| TrafficPredict | traffic-predict | N | ? | ? | ? | | + +""" + +models = {} + +checkpoint_dir = '/gdrive/My Drive/checkpoints/svd/' + +def save_model(model_name): + models[model_name].save(checkpoint_dir + model_name) + models[model_name].save_weights(checkpoint_dir + model_name + '.h5') + print(f'Model saved at: {checkpoint_dir + model_name}') + +def load_model(model_name): + if os.path.isdir(checkpoint_dir + model_name): + models[model_name] = tf.keras.models.load_model(checkpoint_dir + model_name) + if os.path.isfile(checkpoint_dir + model_name + '.h5'): + print(f'Model "{model_name}" loaded with weights.') + return True + else: + print(f'Model "{model_name}" loaded without weights.') + return False + else: + print(f'Model "{model_name}" not found in: {checkpoint_dir}') + return False + +"""### MNIST - Dense""" + +mnist = tf.keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 + +models['mnist'] = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation='relu', name='dense_1'), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(10, name='dense_2') +], name='mnist') + +models['mnist'].summary() + +models['mnist'].compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + +if not load_model('mnist'): + models['mnist'].fit(x_train, y_train, epochs=5) + save_model('mnist') + +models['mnist'].evaluate(x_test, y_test, verbose=2) + +""" +## Fashion MNIST - Dense +""" + +fashion_mnist = tf.keras.datasets.fashion_mnist + +(fashion_train_images, fashion_train_labels), (fashion_test_images, fashion_test_labels) = fashion_mnist.load_data() + +class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', + 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] + +plt.figure() +plt.imshow(fashion_train_images[0]) +plt.colorbar() +plt.grid(False) +plt.show() + +"""Scale these values to a range of 0 to 1 before feeding them to the neural network model. To do so, divide the values by 255. It's important that the training set and the testing set be preprocessed in the same way:""" + +fashion_train_images = fashion_train_images / 255.0 +fashion_test_images = fashion_test_images / 255.0 + +plt.figure(figsize=(10,10)) +for i in range(25): + plt.subplot(5,5,i+1) + plt.xticks([]) + plt.yticks([]) + plt.grid(False) + plt.imshow(fashion_train_images[i], cmap=plt.cm.binary) + plt.xlabel(class_names[fashion_train_labels[i]]) +plt.show() + +models['fashion'] = tf.keras.Sequential([ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation='relu', name='dense_1'), + tf.keras.layers.Dense(10, name='dense_2') +], name='fashion_mnist') +models['fashion'].summary() + +models['fashion'].compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) + +"""Train the model.""" + +train_model = False +if not load_model('fashion') or train_model: + models['fashion'].fit(fashion_train_images, fashion_train_labels, epochs=10) + save_model('fashion') + +test_loss, test_acc = models['fashion'].evaluate(fashion_test_images, fashion_test_labels, verbose=2) +print('\nTest accuracy:', test_acc) + +"""## Fashion MNIST - LSTM""" + +batch_size = 32 +num_classes = 10 +epochs = 2 + +row_hidden = 64 +col_hidden = 64 + +row, col = fashion_train_images.shape[1:] + +input = tf.keras.layers.Input(shape=(row, col)) + +def lstm_pipe(in_layer, lstm_name=''): + x = tf.keras.layers.Conv1D(row_hidden, kernel_size=3, padding = 'same')(in_layer) + x = tf.keras.layers.Conv1D(row_hidden, kernel_size=3, padding = 'same')(x) + encoded_rows = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(row_hidden, return_sequences = True))(x) + return tf.keras.layers.LSTM(col_hidden, name='LSTM_' + lstm_name)(encoded_rows) +# Read it by rows +row_read = lstm_pipe(input, 'left') +# Read it by columns +transpose_read = lstm_pipe(tf.keras.layers.Permute(dims=(1,2))(input), 'right') +x = tf.concat([row_read, transpose_read], axis=1) +x = tf.keras.layers.Dropout(0.2)(x) +# prediction = tf.keras.layers.Dense(num_classes, activation='softmax')(x) +prediction = tf.keras.layers.Dense(num_classes)(x) +models['fashion-lstm'] = tf.keras.Model(input, prediction) +models['fashion-lstm'].compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=['accuracy']) +models['fashion-lstm'].summary() + +models['fashion-lstm'].get_layer('LSTM_left').get_weights()[0].shape + +if not load_model('fashion-lstm'): + models['fashion-lstm'].fit(fashion_train_images, fashion_train_labels, epochs=2, batch_size=batch_size) + save_model('fashion-lstm') + +test_loss, test_acc = models['fashion-lstm'].evaluate(fashion_test_images, fashion_test_labels, verbose=2) +print('\nTest accuracy:', test_acc) + +"""## CNN-RNN-UCF101""" + +!pip install -q git+https://github.com/tensorflow/docs + +!wget -q https://git.io/JGc31 -O /tmp/ucf101_top5.tar.gz +!tar xf /tmp/ucf101_top5.tar.gz + +from tensorflow_docs.vis import embed +from imutils import paths + +import pandas as pd +import imageio +import cv2 + +IMG_SIZE = 224 +BATCH_SIZE = 64 +EPOCHS = 10 + +MAX_SEQ_LENGTH = 20 +NUM_FEATURES = 2048 + +train_df = pd.read_csv("train.csv") +test_df = pd.read_csv("test.csv") + +print(f"Total videos for training: {len(train_df)}") +print(f"Total videos for testing: {len(test_df)}") + +train_df.sample(10) + +# The following two methods are taken from this tutorial: +# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub + + +def crop_center_square(frame): + y, x = frame.shape[0:2] + min_dim = min(y, x) + start_x = (x // 2) - (min_dim // 2) + start_y = (y // 2) - (min_dim // 2) + return frame[start_y : start_y + min_dim, start_x : start_x + min_dim] + + +def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)): + cap = cv2.VideoCapture(path) + frames = [] + try: + while True: + ret, frame = cap.read() + if not ret: + break + frame = crop_center_square(frame) + frame = cv2.resize(frame, resize) + frame = frame[:, :, [2, 1, 0]] + frames.append(frame) + + if len(frames) == max_frames: + break + finally: + cap.release() + return np.array(frames) + +def build_feature_extractor(): + feature_extractor = tf.keras.applications.InceptionV3( + weights="imagenet", + include_top=False, + pooling="avg", + input_shape=(IMG_SIZE, IMG_SIZE, 3), + ) + preprocess_input = tf.keras.applications.inception_v3.preprocess_input + + inputs = tf.keras.Input((IMG_SIZE, IMG_SIZE, 3)) + preprocessed = preprocess_input(inputs) + + outputs = feature_extractor(preprocessed) + return tf.keras.Model(inputs, outputs, name="feature_extractor") + +feature_extractor = build_feature_extractor() + +label_processor = tf.keras.layers.experimental.preprocessing.StringLookup( + num_oov_indices=0, vocabulary=np.unique(train_df["tag"]) +) +print(label_processor.get_vocabulary()) + +def prepare_all_videos(df, root_dir): + num_samples = len(df) + video_paths = df["video_name"].values.tolist() + labels = df["tag"].values + labels = label_processor(labels[..., None]).numpy() + + # `frame_masks` and `frame_features` are what we will feed to our sequence model. + # `frame_masks` will contain a bunch of booleans denoting if a timestep is + # masked with padding or not. + frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool") + frame_features = np.zeros( + shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32" + ) + + # For each video. + for idx, path in enumerate(video_paths): + # Gather all its frames and add a batch dimension. + frames = load_video(os.path.join(root_dir, path)) + frames = frames[None, ...] + + # Initialize placeholders to store the masks and features of the current video. + temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool") + temp_frame_featutes = np.zeros( + shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32" + ) + + # Extract features from the frames of the current video. + for i, batch in enumerate(frames): + video_length = batch.shape[1] + length = min(MAX_SEQ_LENGTH, video_length) + for j in range(length): + temp_frame_featutes[i, j, :] = feature_extractor.predict( + batch[None, j, :] + ) + temp_frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked + + frame_features[idx,] = temp_frame_featutes.squeeze() + frame_masks[idx,] = temp_frame_mask.squeeze() + + return (frame_features, frame_masks), labels + +if os.path.isfile(checkpoint_dir + 'cnn-lstm/dataset.npz'): + dataset = np.load(checkpoint_dir + 'cnn-lstm/dataset.npz') + train_data = (dataset['train_data_0'], dataset['train_data_1']) + test_data = (dataset['test_data_0'], dataset['test_data_1']) + train_labels = dataset['train_labels'] + test_labels = dataset['test_labels'] +else: + train_data, train_labels = prepare_all_videos(train_df, "train") + test_data, test_labels = prepare_all_videos(test_df, "test") + +print(f"Frame features in train set: {train_data[0].shape}") +print(f"Frame masks in train set: {train_data[1].shape}") + +"""### The Sequence Model""" + +# Utility for our sequence model. +def get_sequence_model(): + class_vocab = label_processor.get_vocabulary() + + frame_features_input = tf.keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES)) + mask_input = tf.keras.Input((MAX_SEQ_LENGTH,), dtype="bool") + + # Refer to the following tutorial to understand the significance of using `mask`: + # https://tf.keras.io/api/layers/recurrent_layers/gru/ + x = tf.keras.layers.LSTM(64, return_sequences=True, name='LSTM_1')(frame_features_input, mask=mask_input) + x = tf.keras.layers.LSTM(32, name='LSTM_2')(x) + # x = tf.keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input) + # x = tf.keras.layers.GRU(8)(x) + x = tf.keras.layers.Dropout(0.4)(x) + x = tf.keras.layers.Dense(8, activation="relu")(x) + output = tf.keras.layers.Dense(len(class_vocab), activation="softmax")(x) + + rnn_model = tf.keras.Model([frame_features_input, mask_input], output) + + rnn_model.compile( + loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"] + ) + return rnn_model + +# Utility for running experiments. +def run_experiment(): + filepath = "/tmp/video_classifier" + checkpoint = tf.keras.callbacks.ModelCheckpoint( + filepath, save_weights_only=True, save_best_only=True, verbose=1 + ) + seq_model = get_sequence_model() + history = seq_model.fit( + [train_data[0], train_data[1]], + train_labels, + validation_split=0.3, + epochs=EPOCHS, + callbacks=[checkpoint], + ) + seq_model.load_weights(filepath) + _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels) + print(f"Test accuracy: {round(accuracy * 100, 2)}%") + return history, seq_model + +train_model = False +if not load_model('cnn-lstm') or train_model: + _, models['cnn-lstm'] = run_experiment() + save_model('cnn-lstm') + +"""### Inference""" + +def prepare_single_video(frames): + frames = frames[None, ...] + frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool") + frame_featutes = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32") + for i, batch in enumerate(frames): + video_length = batch.shape[1] + length = min(MAX_SEQ_LENGTH, video_length) + for j in range(length): + frame_featutes[i, j, :] = feature_extractor.predict(batch[None, j, :]) + frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked + return frame_featutes, frame_mask + +def sequence_prediction(path): + class_vocab = label_processor.get_vocabulary() + + frames = load_video(os.path.join("test", path)) + frame_features, frame_mask = prepare_single_video(frames) + probabilities = models['cnn-lstm'].predict([frame_features, frame_mask])[0] + + for i in np.argsort(probabilities)[::-1]: + print(f" {class_vocab[i]}: {probabilities[i] * 100:5.2f}%") + return frames + +# This utility is for visualization. +# Referenced from: +# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub +def to_gif(images): + converted_images = images.astype(np.uint8) + imageio.mimsave("animation.gif", converted_images, fps=10) + return embed.embed_file("animation.gif") + +test_video = np.random.choice(test_df["video_name"].values.tolist()) +print(f"Test video path: {test_video}") +test_frames = sequence_prediction(test_video) +to_gif(test_frames[:MAX_SEQ_LENGTH]) + +_, accuracy = models['cnn-lstm'].evaluate([test_data[0], test_data[1]], test_labels) +print(f"Test accuracy: {round(accuracy * 100, 2)}%") + +if not os.path.isfile(checkpoint_dir + 'cnn-lstm/dataset.npz'): + np.savez_compressed(checkpoint_dir + 'cnn-lstm/dataset', + train_data_0=train_data[0], + train_data_1=train_data[1], + train_labels=train_labels, + test_data_0=test_data[0], + test_data_1=test_data[1], + test_labels=test_labels) + +"""### Get Weights""" + +models['cnn-lstm'].summary() + +print(models['cnn-lstm'].get_layer('LSTM_1').get_weights()[0].shape) +print(models['cnn-lstm'].get_layer('LSTM_2').get_weights()[0].shape) + +"""# Scaled SVD Approximation""" + +def get_approx_size(R, k, m, n): + return R * (m + n + k) + +def plot_accuracies(model_labels, original_accuracies, approx_accuracies): + x = np.arange(len(model_labels)) # the label locations + width = 0.35 # the width of the bars + fig, ax = plt.subplots() + rects1 = ax.bar(x - width/2, original_accuracies, width, label='Original') + rects2 = ax.bar(x + width/2, approx_accuracies, width, label='Approximated') + # Add some text for labels, title and custom x-axis tick labels, etc. + ax.set_ylabel('Accuracy') + ax.set_title('Original Accuracy vs. Approximated.') + ax.set_xticks(x) + ax.set_xticklabels(model_labels) + ax.legend() + fig.tight_layout() + plt.show() + +models['fashion'].summary() +mnist_dense = [w1, b1] = models['mnist'].get_layer('dense_1').get_weights() +fashion_mnist_dense = [w2, b2] = models['fashion'].get_layer('dense_1').get_weights() +print(w1.shape, w2.shape) + +# Commented out IPython magic to ensure Python compatibility. +R = 45 +metric = 'mse' +threshold = 1e-5 +scalers = [1., 1.] + +def run_alg3_for_mnist(scaler, apply_scaling_at_alg2, log_to_tensorboard=False): + if log_to_tensorboard: + logname = 'mnist' + logname += f'_R{R}_scaler0_{scaler[0]}_scaler1_{scaler[1]}_{metric}_Th{threshold}' + logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S') + writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname)) +# %reload_ext tensorboard + else: + writer = None + + [w1, b1] = models['mnist'].get_layer('dense_1').get_weights() + [w2, b2] = models['fashion'].get_layer('dense_1').get_weights() + plotdata = {} + w_approx = algorithm3_inverse(*algorithm3([w1, w2], \ + num_refinements=R, \ + num_sub_matrix=1, \ + decomposition='eigen', \ + stop_threshold=threshold, \ + metric=metric, \ + scaler=scaler, \ + report_writer=writer, \ + apply_scaling_at_alg2=apply_scaling_at_alg2, \ + plotdata=plotdata)) + # Obtain original accuracies. + _, mnist_acc = models['mnist'].evaluate(x_test, y_test, verbose=0) + _, fashion_mnist_acc = models['fashion'].evaluate(fashion_test_images, fashion_test_labels, verbose=0) + print('') + print('[ORIGINAL] MNIST - Test accuracy: {:.1f}%'.format(100. * mnist_acc)) + print('[ORIGINAL] Fashion MNIST - Test accuracy: {:.1f}%'.format(100. * fashion_mnist_acc)) + + # Set approximated weights and evaluate. + models['mnist'].get_layer('dense_1').set_weights([w_approx[0], b1]) + models['fashion'].get_layer('dense_1').set_weights([w_approx[1], b2]) + print('') + + _, mnist_acc_approx = models['mnist'].evaluate(x_test, y_test, verbose=0) + _, fashion_mnist_acc_approx = models['fashion'].evaluate(fashion_test_images, fashion_test_labels, verbose=0) + print('[APPROX] MNIST - Test accuracy: {:.1f}%'.format(100. * mnist_acc_approx)) + print('[APPROX] Fashion MNIST - Test accuracy: {:.1f}%'.format(100. * fashion_mnist_acc_approx)) + + print('\nMNIST - Accuracy drop: {:.1f}%'.format(100. * (mnist_acc - mnist_acc_approx))) + print('Fashion MNIST - Accuracy drop: {:.1f}%'.format(100. * (fashion_mnist_acc - fashion_mnist_acc_approx))) + print('') + + # Restore original weights and plot. + models['mnist'].get_layer('dense_1').set_weights([w1, b1]) + models['fashion'].get_layer('dense_1').set_weights([w2, b2]) + + plot_accuracies(['mnist', 'fashion'], [mnist_acc, fashion_mnist_acc], + [mnist_acc_approx, fashion_mnist_acc_approx]) + return w_approx[0], w_approx[1], plotdata + +"""## Baseline""" + +_, _, plotdata = run_alg3_for_mnist(None, apply_scaling_at_alg2=True) + +for matrix in plotdata: + plt.plot(plotdata[matrix], label=matrix) + print(plotdata[matrix][40]) +plt.legend() + +"""## Applying Scaling at Algorithm 3""" + +_, _, plotdata_alg3 = run_alg3_for_mnist([1., 1000.], apply_scaling_at_alg2=False) + +for matrix in plotdata_alg3: + plt.plot(plotdata_alg3[matrix], label=matrix) + print(plotdata_alg3[matrix][40]) +plt.legend() + +"""## Applying Scaling at Algorithm 2""" + +_, _, plotdata_alg2 = run_alg3_for_mnist([1., 20.], apply_scaling_at_alg2=True) + +for matrix in plotdata: + plt.plot(plotdata[matrix], label=matrix) +plt.legend() + + + +"""## Additional Plotting""" + +plt.subplot(1, 3, 1) +plt.imshow(w1[:16,:16]) #, cmap=plt.cm.BuPu_r) +plt.subplot(1, 3, 2) +plt.imshow(w1_approx[:16,:16]) #, cmap=plt.cm.BuPu_r) +plt.subplot(1, 3, 3) +plt.imshow(w1[:16,:16] - w1_approx[:16,:16]) #, cmap=plt.cm.BuPu_r) + +plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9) +cax = plt.axes([0.85, 0.1, 0.075, 0.8]) +plt.colorbar(cax=cax) +plt.show() + +n, bins, patches = plt.hist(w1_approx) +plt.show() + +print(f'Mean - orig/approx: {w1.mean()} / {w1_approx.mean()}') +print(f'Standard dev - orig/approx: {w1.std()} / {w1_approx.std()}') + +cr = [] + +m, n = w1.shape +for r in range(R, (min(m, n))): + tmp = 2 * m * n / get_approx_size(r, 2, m, n) + if tmp < 2: + break + cr.append(tmp) + +print('Current Compression Ratio (CR):', 2 * m * n / get_approx_size(R, 2, m, n)) +plt.plot(cr) + +"""# More than 2 Matrix Approximation + +## Dividing weight matrix into 4 +""" + +models['fashion'].summary() +mnist_dense = [w, b] = models['mnist'].get_layer('dense_1').get_weights() +print(w.shape) +r, h = w.shape[0] // 2, w.shape[1] // 2 +blocks = np.reshape(w, (4, r, h)) +tmp = np.reshape(blocks, w.shape) +print(blocks.shape) +print(np.allclose(w, tmp)) + +# Commented out IPython magic to ensure Python compatibility. +R = 64 +metric = 'mse' +threshold = 1e-5 +scaler = [1.] * blocks.shape[0] + +logname = 'mnist_4blocks' +logname += f'_R{R}_scaler0_{scaler[0]}_scaler1_{scaler[1]}_{metric}_Th{threshold}' +logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S') +writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname)) +# %reload_ext tensorboard + +# %time w_approx = algorithm3_inverse(*algorithm3(blocks, \ + num_refinements=R, \ + num_sub_matrix=1, \ + decomposition='eigen', \ + stop_threshold=threshold, \ + metric=metric, \ + scaler=scaler, \ + report_writer=writer)) + +k, m, n = blocks.shape +print(f'CR: {k * m * n / get_approx_size(R, k, m, n)}') + +models['mnist'].get_layer('dense_1').set_weights([w, b]) +_, mnist_acc = models['mnist'].evaluate(x_test, y_test, verbose=0) +print('MNIST - Test accuracy:', mnist_acc) + +models['mnist'].get_layer('dense_1').set_weights([np.reshape(w_approx, w.shape), b]) +print('') + +_, mnist_acc_approx = models['mnist'].evaluate(x_test, y_test, verbose=0) +print('MNIST - Test accuracy:', mnist_acc_approx) + +print('\nMNIST - Accuracy drop:', mnist_acc - mnist_acc_approx) + +plot_accuracies(['mnist'], [mnist_acc], [mnist_acc_approx]) + +"""# Different Sizes Approximation""" + +w1 = np.random.randint(9, size=(4, 8)) +w2 = np.random.randint(9, size=(5, 4)) + +max_shape = max_r, max_c = max(w1.shape[0], w2.shape[0]), max(w1.shape[1], w2.shape[1]) +print(max_shape) + +padded_w1 = np.zeros(max_shape) +padded_w2 = np.zeros(max_shape) +padded_w1[:w1.shape[0], :w1.shape[1]] = w1 +padded_w2[:w2.shape[0], :w2.shape[1]] = w2 + +print(w1) +print(w2) +print(padded_w1) +print(padded_w2) + +def pad_matrices(matrices): + max_r = max([m.shape for m in matrices],key=lambda item:item[0])[0] + max_c = max([m.shape for m in matrices],key=lambda item:item[1])[1] + padded_matrices = [] + for i in range(len(matrices)): + z = np.zeros((max_r, max_c)) + z[:matrices[i].shape[0], :matrices[i].shape[1]] = matrices[i] + padded_matrices.append(z) + return padded_matrices + +pad_matrices([w1, w2]) + +import operator + +def get_cr_multi_size_matrix(R, matrices): + max_m = max([m.shape for m in matrices],key=lambda item:item[0])[0] + max_n = max([m.shape for m in matrices],key=lambda item:item[1])[1] + approx_size = get_approx_size(R, len(matrices), max_m, max_n) + orig_size = sum(map(lambda x: operator.mul(*x.shape), matrices)) + return orig_size / approx_size + +w1 = np.random.randint(9, size=(4, 8)) +w2 = np.random.randint(9, size=(5, 4)) + +orig_size = operator.mul(*w1.shape) + operator.mul(*w2.shape) +print(orig_size) +get_cr_multi_size_matrix(1, [w1, w2]) + +cr = [] +cr_small = [] +cr_big = [] + +small_size = 128 +medium_size = 512 +large_size = 1024 +w1 = np.random.randint(9, size=(medium_size, small_size)) +w2 = np.random.randint(9, size=(large_size, medium_size)) + +for r in range(small_size // 4, small_size): + m, n = w1.shape + tmp = 2 * m * n / get_approx_size(r, 2, m, n) + cr_small.append(tmp) + m, n = w2.shape + tmp = 2 * m * n / get_approx_size(r, 2, m, n) + cr_big.append(tmp) + tmp = get_cr_multi_size_matrix(r, [w1, w2]) + if tmp < 2: + break + cr.append(tmp) + +plt.plot(cr, label='CR') +plt.plot(cr_small, label='CR_small') +plt.plot(cr_big, label='CR_big') +plt.legend() + +"""## CNN-LSTM Model""" + +print(models['cnn-lstm'].get_layer('LSTM_1').get_weights()[0].shape) +print(models['cnn-lstm'].get_layer('LSTM_2').get_weights()[0].shape) +print(len(models['cnn-lstm'].get_layer('LSTM_1').get_weights())) +print(len(models['cnn-lstm'].get_layer('LSTM_2').get_weights())) + +print(models['cnn-lstm'].get_layer('LSTM_2').get_weights()[1].shape) + +[w1, r1, b1] = models['cnn-lstm'].get_layer('LSTM_1').get_weights() +[w2, r2, b2] = models['cnn-lstm'].get_layer('LSTM_2').get_weights() + +[w1_pad, w2_pad] = pad_matrices([w1, w2]) +print(w1_pad, w2_pad) +print(w1_pad.shape) +print(w2_pad.shape) + +# Commented out IPython magic to ensure Python compatibility. +R = 25 +metric = 'mse' +threshold = 1e-5 +scaler = [1., 1.] + +logname = 'cnn-lstm' +logname += f'_R{R}_scaler0_{scaler[0]}_scaler1_{scaler[1]}_{metric}_Th{threshold}' +logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S') +writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname)) +# %reload_ext tensorboard + +# %time u, s, v = algorithm3([w1_pad, w2_pad], \ +# num_refinements=R, \ +# num_sub_matrix=1, \ +# decomposition='eigen', \ +# stop_threshold=threshold, \ +# metric=metric, \ +# scaler=scaler, \ +# report_writer=writer) + +# %time u, s, v = algorithm3_extra_refinements([w1_pad, w2_pad], u, s, v, \ + num_refinements=R, \ + num_sub_matrix=1, \ + decomposition='eigen', \ + stop_threshold=threshold, \ + metric=metric, \ + scaler=scaler, \ + report_writer=writer) +w_approx = algorithm3_inverse(u, s, v) + + +w1_approx, w2_approx = w_approx[0], w_approx[1] + +models['cnn-lstm'].get_layer('LSTM_1').set_weights([w1, r1, b1]) +models['cnn-lstm'].get_layer('LSTM_2').set_weights([w2, r2, b2]) + +_, cnn_lstm_acc = models['cnn-lstm'].evaluate([test_data[0], test_data[1]], test_labels, verbose=0) +print(f'CNN-LSTM - Test accuracy: {cnn_lstm_acc}') + +models['cnn-lstm'].get_layer('LSTM_1').set_weights([w1_approx[:w1.shape[0], :w1.shape[1]], r1, b1]) +models['cnn-lstm'].get_layer('LSTM_2').set_weights([w2_approx[:w2.shape[0], :w2.shape[1]], r2, b2]) + +_, cnn_lstm_approx_acc = models['cnn-lstm'].evaluate([test_data[0], test_data[1]], test_labels, verbose=0) +print(f'\nCNN-LSTM - Test accuracy: {cnn_lstm_approx_acc} (approx)') +print(f'\nAccuracy drop: {cnn_lstm_acc - cnn_lstm_approx_acc}') + +plot_accuracies(['cnn_lstm'], [cnn_lstm_acc], [cnn_lstm_approx_acc]) + +"""# Minimization + +# Playing with Images +""" + +img = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/mugs.jpg') #.convert('L') +img = img.resize([int(0.8 * s) for s in img.size]) +img_array = np.array(img).transpose([2, 0, 1]).astype('float32') +num_inputs, input_size, output_size = img_array.shape +print(img_array.shape) +img + +# Commented out IPython magic to ensure Python compatibility. +# %time img_approx = algorithm3_inverse(*algorithm3(img_array, num_refinements=64, num_sub_matrix=1, decomposition='eigen')).transpose([1, 2, 0]) +image = img_approx.copy() +image *= (255.0 / image.max()) # .clip(min=0, max=255.0) +Image.fromarray(image.astype('uint8')) + +# Commented out IPython magic to ensure Python compatibility. +# %time img_approx = algorithm3_inverse(*algorithm3(img_array, num_refinements=64, num_sub_matrix=1, decomposition='svd')).transpose([1, 2, 0]) +image = img_approx.copy() +image *= (255.0 / image.max()) # .clip(min=0, max=255.0) +Image.fromarray(image.astype('uint8')) + +"""### Combine three different images""" + +cat = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/cat.jpg').convert('LA').resize([512, 256]) +mugs = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/mugs.jpg').convert('LA').resize([512, 256]) +flower = Image.open('/gdrive/My Drive/UNIVERSITY - PhD CHALMERS/sun_flower.png').convert('LA').resize([512, 256]) + +cat_img = cat = ImageOps.grayscale(cat) +mugs_img = mugs = ImageOps.grayscale(mugs) +flower_img = flower = ImageOps.grayscale(flower) + +cat = np.array(cat).astype('float32') +mugs = np.array(mugs).astype('float32') +flower = np.array(flower).astype('float32') + +img_array = np.array([cat, mugs, flower]) +print(img_array.shape) + +cat_img, mugs_img, flower_img + +# Commented out IPython magic to ensure Python compatibility. +R = 8 +metric = 'frobenius-norm' +threshold = 1e-5 +logname = 'base' +logname += '_R{}_{}_Th{}'.format(R, metric, threshold) +logname += '_date' + datetime.now().strftime('%Y%m%d-%H%M%S') +writer = tf.summary.create_file_writer(os.path.join('/gdrive/My Drive/Colab Notebooks/svd/tensorboard', logname)) +# %reload_ext tensorboard + +# %time img_approx = algorithm3_inverse(*algorithm3(img_array, \ + num_refinements=R, \ + num_sub_matrix=1, \ + decomposition='eigen', \ + stop_threshold=threshold, \ + metric=metric, \ + report_writer=writer)) +img_approx *= (255.0 / img_approx.max()) # .clip(min=0, max=255.0) + +cat_approx = Image.fromarray(img_approx[0].astype('uint8')) +cat_approx + +mugs_approx = Image.fromarray(img_approx[1].astype('uint8')) +mugs_approx + +flower_approx = Image.fromarray(img_approx[2].astype('uint8')) +flower_approx + +def frobenius(x, x_approx): + return np.sqrt(np.absolute(np.linalg.norm(x - x_approx))) + +def mse(x, x_approx): + return ((x - x_approx)**2).mean() + +print(mse(cat, cat_approx)) +print(mse(mugs, mugs_approx)) +print(mse(flower, flower_approx)) + +# 402.30466 +# 335.3521 +# 365.27515 + +from tensorflow.python.training import gradient_descent + +x = tf.Variable(10.0, trainable=True) + +@tf.function +def f_x(): + return 2 * x * x - 5 * x + 4 + +for _ in range(20): + print([x.numpy(), f_x().numpy()]) + opt = gradient_descent.GradientDescentOptimizer(0.1).minimize(f_x) + +m, n = w1.shape +p = 2 +u = tf.Variable([[np.random.rand()] * n], trainable=True) +v = tf.Variable([[np.random.rand()] * m], trainable=True) +s = tf.Variable([np.random.rand()] * p, trainable=True) +w = np.array([w1, w2]) + +@tf.function +def f_x(): + # # A = tf.Variable([[tf.constant_initializer(0)] * m] * n, trainable=False) + # for i in range(p): + # v_tmp = tf.transpose(v) * w[i].T + # # print('v_tmp.shape:', v_tmp.shape) + # A_tmp = v_tmp * u + # # print('A_tmp.shape:', A_tmp.shape) + # if i == 0: + # A = A_tmp * A_tmp + # else: + # A = A + (A_tmp * A_tmp) + # for i in range(p): + # s = tf.transpose(v) * w[i].T * u + # w_approx = s * u * tf.transpose(v) + # print('s.shape:', s.shape) + # print('w_approx.shape:', w_approx.shape) + # if i == 0: + # mse = (w[i].T - w_approx) * (w[i].T - w_approx) + # else: + # mse = mse + (w[i].T - w_approx) * (w[i].T - w_approx) + + uv = u * tf.transpose(v) + s_tmp = tf.expand_dims(tf.expand_dims(s, axis=1), axis=2) + approx = tf.reduce_sum(w - s_tmp * tf.stack([uv] * p), axis=0) + return -tf.norm(approx, ord='fro', axis=[-2,-1]) + +norms = [] +for _ in range(100): + # print([x.numpy(), f_x().numpy()]) + # print(f_x().numpy()) + norms.append(f_x().numpy()) + opt = gradient_descent.GradientDescentOptimizer(0.0001).minimize(f_x) + +plt.plot(norms) +print(u.numpy().mean()) +print(v.numpy().mean()) + +models['mnist'].get_layer('dense_1').set_weights([w1, b1]) +models['fashion'].get_layer('dense_1').set_weights([w2, b2]) +_, mnist_acc = models['mnist'].evaluate(x_test, y_test, verbose=0) +_, fashion_mnist_acc = models['fashion'].evaluate(test_images, test_labels, verbose=2) +print('MNIST - Test accuracy:', mnist_acc) +print('Fashion MNIST - Test accuracy:', fashion_mnist_acc) + +A = (u * tf.transpose(v)).numpy() +w1_approx = s[0].numpy() * A +w2_approx = s[1].numpy() * A + +models['mnist'].get_layer('dense_1').set_weights([w1_approx, b1]) +models['fashion'].get_layer('dense_1').set_weights([w2_approx, b2]) +print('') + +_, mnist_acc_approx = models['mnist'].evaluate(x_test, y_test, verbose=0) +_, fashion_mnist_acc_approx = models['fashion'].evaluate(test_images, test_labels, verbose=2) +print('MNIST - Test accuracy:', mnist_acc_approx) +print('Fashion MNIST - Test accuracy:', fashion_mnist_acc_approx) + +print('\nMNIST - Accuracy drop:', mnist_acc - mnist_acc_approx) +print('Fashion MNIST - Accuracy drop:', fashion_mnist_acc - fashion_mnist_acc_approx) + +plot_accuracies(list(models.keys()), [mnist_acc, fashion_mnist_acc], + [mnist_acc_approx, fashion_mnist_acc_approx]) + +# Use the GitHub version of TFCO +!pip install git+https://github.com/google-research/tensorflow_constrained_optimization +import tensorflow_constrained_optimization as tfco + +m, n = w1.shape +p = 2 +u = tf.Variable([[np.random.rand()] * n], trainable=True) +v = tf.Variable([[np.random.rand()] * m], trainable=True) +s = tf.Variable([np.random.rand()] * p, trainable=True) +w = np.array([w1, w2]) + +def loss_fn(): + uv = u * tf.transpose(v) + s_tmp = tf.expand_dims(tf.expand_dims(s, axis=1), axis=2) + approx = tf.reduce_sum(w - s_tmp * tf.stack([uv] * p), axis=0) + for i in range(p): + A_tmp = tf.transpose(v) * w[i].T * u + if i == 0: + A = A_tmp * A_tmp + else: + A = A + (A_tmp * A_tmp) + return A - tf.norm(approx, ord='fro', axis=[-2, -1]) + +class SampleProblem(tfco.ConstrainedMinimizationProblem): + def __init__(self, loss_fn, weights): + self._loss_fn = loss_fn + self._weights = weights + + @property + def num_constraints(self): + return 2 + + def objective(self): + return loss_fn() + + def constraints(self): + u, s, v = self._weights + u_norm = tf.linalg.norm(u) # tf.math.reduce_euclidean_norm(u) + v_norm = tf.linalg.norm(v) # tf.math.reduce_euclidean_norm(v) + u_norm_eq_one = 1 - u_norm + v_norm_eq_one = 1 - v_norm + constraints = tf.stack([u_norm_eq_one, v_norm_eq_one]) + # A = (u * tf.transpose(v)).numpy() + # for i in range(p): + # rank_eq_one = 1. - tf.Variable(tf.cast(tf.rank(s[i] * A), tf.float32)) + # constraints = tf.stack([constraints, rank_eq_one]) + return constraints + +problem = SampleProblem(loss_fn, [u, s, v]) +optimizer = tfco.LagrangianOptimizer( + optimizer=tf.optimizers.Adagrad(learning_rate=0.1), + num_constraints=problem.num_constraints) + +var_list = [u, s, v] + list(problem.trainable_variables) + optimizer.trainable_variables() + +for i in range(1000): + optimizer.minimize(problem, var_list=var_list) + if i % 100 == 0: + print(f'step = {i}') + print(f'loss = {loss_fn()}') + # print(f'constraint = {(x + y).numpy()}') + # print(f'u = {u.numpy()}, v = {v.numpy()}') + + + diff --git a/run_hls.tcl b/run_hls.tcl index e8d476a..d3235a4 100644 --- a/run_hls.tcl +++ b/run_hls.tcl @@ -1,80 +1,13 @@ -# -# @brief Find all files in a directory and return them in a list. -# -# @param basedir The directory to start looking in pattern. -# @param pattern A pattern, as defined by the glob command, that -# the files must match. -# @param exclude_dirs_list Ignore searching in specified directories -# -# @return The list of found files. -# -proc findFiles { basedir pattern exclude_dirs_list } { - # Fix the directory name, this ensures the directory name is in the - # native format for the platform and contains a final directory seperator - set basedir [string trimright [file join [file normalize $basedir] { }]] - set fileList {} - # Look in the current directory for matching files, -type {f r} - # means ony readable normal files are looked at, -nocomplain stops - # an error being thrown if the returned list is empty - foreach fileName [glob -nocomplain -type {f r} -path $basedir $pattern] { - lappend fileList $fileName - } - # Now look for any sub direcories in the current directory - foreach dirName [glob -nocomplain -type {d r} -path $basedir *] { - # Recusively call the routine on the sub directory and append any - # new files to the results - if {[lsearch -exact ${exclude_dirs_list} $dirName] == -1} { - set subDirList [findFiles $dirName $pattern $exclude_dirs_list] - if { [llength $subDirList] > 0 } { - foreach subDirFile $subDirList { - lappend fileList $subDirFile - } - } - } - } - return $fileList -} - -# -# @brief Greps a file content and writes matches to a file. -# -# @param re Regular expression -# @param lines Number of lines to report/include after the found match -# @param fin The fin pointer -# @param fout The fout pointer -# -proc grep {re lines fin fout} { - set cnt 0 - set match false - seek $fin 0 - while {[gets $fin line] >= 0} { - if [regexp -- $re $line] { - set cnt 0 - set match true - } - if {$match && ($cnt < $lines)} { - puts $line - puts $fout $line - set cnt [expr {$cnt +1}] - } else { - set match false - } - } -} +source tcl/utils.tcl +source tcl/lstm_params.tcl set PRJ_PATH [pwd] -exec mkdir -p -- ./hls -exec mkdir -p -- ./hls/reports -cd hls +exec mkdir -p -- ./hls_prj +exec mkdir -p -- ./hls_prj/reports +cd hls_prj -# ============================================================================== -# Top function name, testbench file -# ============================================================================== -set TOP "hls_pong" -set TB "test_game" -set SRC_DIR "" ;# Or just leave it empty for including all sub-dirs too. -set SRC_LIST [list ""] ;# If empty, it will include all files in SRC_DIR subdirs +set USE_VITIS 1 # ============================================================================== # Setups # ============================================================================== @@ -86,10 +19,11 @@ set cosim 0 set export 0 set place_and_route 0 set report_info 1 +set set_max_fifo_depth 0 # ============================================================================== # HLS Synthesis Options + Platform Selection # ============================================================================== -set scheduler_effort "medium" +set scheduler_effort "high" ;# medium set relax_ii 0 set use_hlslib 0 set use_zedboard 1 @@ -104,17 +38,30 @@ if {${use_zedboard}} { set board_name "ZCU102" } # ============================================================================== -# Hardware parameters +# Top function name, testbench file # ============================================================================== - +# NOTE: The namespace must also be included. +set TB "test_lstm_svd" +set ARGV "2 4 64 32 2" +set TOP "SvdModel2LstmSDSoCV2" ;# "HlsLstmSvd" ;# "HlsSvdKernel" ;# "HlsDenseSvd" ; #"HlsKernelS" ;# "HlsGemvKernel" ;#"HlsAxisKernelU" ;#"svd::SvdModel2LstmSDSoCV2" +set SRC_DIR "" ;# Or just leave it empty for including all sub-dirs too. +set SRC_LIST [list ""] ;# If empty, it will include all files in SRC_DIR subdirs # ============================================================================== # Project name # ============================================================================== -set PROJECT_NAME "${board_name}_${TOP}" +set prefix ":" +set TOP_NO_NAMESPACE "SvdModel2LstmSDSoCV2" ;# "HlsLstmSvd" ;# "HlsSvdKernel" ;# "HlsDenseSvd" ; #"HlsKernelS" ;# "HlsGemvKernel" ; #"HlsAxisKernelU" ;# [ regsub ***=${prefix} ${TOP} "" string ] +puts ${TOP_NO_NAMESPACE} + +if {${USE_VITIS}} { + set PROJECT_NAME "vitis_${board_name}_${TOP_NO_NAMESPACE}" +} else { + set PROJECT_NAME "hls_${board_name}_${TOP_NO_NAMESPACE}" +} # ============================================================================== # Defines # ============================================================================== -# The HLS_NO_XIL_FPO_LIB flag is used to compile hlaf precision numbers. +# The HLS_NO_XIL_FPO_LIB flag is used to compile half precision numbers. set DEFINES "-DHLS_NO_XIL_FPO_LIB" append DEFINES "" @@ -124,88 +71,109 @@ if {${use_zcu104_pynq}} { } else { append DEFINES " -DAXI_PORT_WIDTH=64" } + +append_lstm_params DEFINES # append DEFINES " -DDEBUG_INTERNAL_STREAMS" # append DEFINES " -DUSE_BLAS" # ============================================================================== # Linker Flags # ============================================================================== -set LDFLAGS "-lpthread -fopenmp" +set LDFLAGS "" +# set LDFLAGS "-lpthread -fopenmp" # append LDFLAGS " /usr/local/lib/libblas.a" # ============================================================================== -# TB arguments -# ============================================================================== -set ARGV "" -# ============================================================================== # CFlags # ============================================================================== # NOTE(21/02/2019): the '-fno-builtin' is suggested by Xilinx when using # the set_directive_resource option. +if {${USE_VITIS}} { + set CXXSTD "-std=c++14" ;#"-std=c++14 -fno-builtin" ; #"-std=c++1y" +} else { + set CXXSTD "-std=c++0x -fno-builtin" +} if {${cosim}} { - set CFLAGS "-O3 -g -std=c++0x -fno-builtin -I${PRJ_PATH}/include/${SRC_DIR}/ -DCOSIM_DESIGN ${DEFINES} -I/usr/local/include" + set CFLAGS "-O3 ${CXXSTD} -I${PRJ_PATH}/include/${SRC_DIR}/ -DCOSIM_DESIGN ${DEFINES} -I/usr/local/include" } else { - set CFLAGS "-O3 -g -std=c++0x -fno-builtin -I${PRJ_PATH}/include/${SRC_DIR}/ ${DEFINES} -I/usr/local/include" + set CFLAGS "-O3 ${CXXSTD} -I${PRJ_PATH}/include/${SRC_DIR}/ ${DEFINES} -I/usr/local/include" } # ============================================================================== # Open Project and Add Files # ============================================================================== if {${reset_project}} { - open_project -reset hls_${PROJECT_NAME} + open_project -reset ${PROJECT_NAME} } else { - open_project hls_${PROJECT_NAME} + open_project ${PROJECT_NAME} } set_top ${TOP} -set HLS_REPORT_PATH "hls_${PROJECT_NAME}/solution_${TOP}/syn/report/" -set REPORT_DIR "${PRJ_PATH}/hls/reports" -set REPORT_FILE_PATH "${PRJ_PATH}/hls/reports/" -set VIVADO_LIB "C:/Xilinx/Vivado/2018.3/include/" +set HLS_REPORT_PATH "${PROJECT_NAME}/solution_${TOP}/syn/report/" +set REPORT_DIR "${PRJ_PATH}/hls_prj/reports" +set REPORT_FILE_PATH "${PRJ_PATH}/hls_prj/reports/" +# set VIVADO_LIB "C:/Xilinx/Vivado/2018.3/include/" set BLAS_LIB "C:/Users/ste/.caffe/dependencies/libraries_v140_x64_py27_1.1.0/libraries/lib/libopenblas.a" set BLAS_LIB_DIR "C:/Users/ste/.caffe/dependencies/libraries_v140_x64_py27_1.1.0/libraries/lib" # Get Source Files (1st argument: regex, 2nd argument: excluded directory) -set src_files [findFiles "${PRJ_PATH}/src/${SRC_DIR}/" "*.cpp" "${PRJ_PATH}/src/tb"] -set include_files [findFiles "${PRJ_PATH}/include/${SRC_DIR}/" "*.h" "${PRJ_PATH}/include/tb"] +set src_files [findFiles "${PRJ_PATH}/src/${SRC_DIR}/" "*.cpp" "${PRJ_PATH}/src/testbenches"] +set include_files [findFiles "${PRJ_PATH}/include/${SRC_DIR}/" "*.h" "${PRJ_PATH}/include/testbenches"] if {${reset_project}} { - foreach f ${src_files} { - add_files ${f} -cflags ${CFLAGS} - } - foreach f ${include_files} { - add_files ${f} -cflags ${CFLAGS} - } - # if {llength $SRC_LIST -eq 0} { - # foreach f ${src_files} { - # add_files ${f} -cflags ${CFLAGS} - # } - # foreach f ${include_files} { + add_files ${PRJ_PATH}/src/kernel/u_kernel.cpp -cflags ${CFLAGS} + add_files ${PRJ_PATH}/src/kernel/s_kernel.cpp -cflags ${CFLAGS} + add_files ${PRJ_PATH}/src/kernel/v_kernel.cpp -cflags ${CFLAGS} + add_files ${PRJ_PATH}/src/kernel/svd_kernel.cpp -cflags ${CFLAGS} + add_files ${PRJ_PATH}/src/layers/dense/hls/dense_svd.cpp -cflags ${CFLAGS} + add_files ${PRJ_PATH}/src/layers/lstm/hls/lstm_svd.cpp -cflags ${CFLAGS} + add_files ${PRJ_PATH}/src/hls_utils/adder_tree.cpp -cflags ${CFLAGS} + add_files ${PRJ_PATH}/src/hls_utils/hls_metaprogramming.cpp -cflags ${CFLAGS} + + add_files ${PRJ_PATH}/include/kernel/u_kernel.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/kernel/s_kernel.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/kernel/v_kernel.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/kernel/svd_kernel.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/layers/dense/hls/dense_svd.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/layers/lstm/hls/lstm_svd.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/math_utils/activation_functions.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/hls_utils/adder_tree.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/hls_utils/hls_metaprogramming.h -cflags ${CFLAGS} + add_files ${PRJ_PATH}/include/dma/svd_parameters.h -cflags ${CFLAGS} + + # foreach f ${include_files} { + # # File svd.h contains main() + # if {${f} eq "${PRJ_PATH}/include/svd.h"} { + # } else { # add_files ${f} -cflags ${CFLAGS} # } - # } else { - # foreach f ${SRC_LIST} { + # } + # foreach f ${src_files} { + # # File svd.cpp contains main() + # if {${f} eq "${PRJ_PATH}/src/svd.cpp"} { + # } else { # add_files ${f} -cflags ${CFLAGS} # } # } - # add_files ${PRJ_PATH}/src/axis_lib.cpp -cflags ${CFLAGS} - # add_files ${PRJ_PATH}/include/axis_lib.h -cflags ${CFLAGS} - # Add Testbench Files if {${csim} || ${cosim}} { # TB Files (to avoid including multiple files with main() in them) - add_files -tb ${PRJ_PATH}/src/tb/${TB}.cpp -cflags ${CFLAGS} - add_files -tb ${PRJ_PATH}/include/tb/${TB}.h -cflags ${CFLAGS} + add_files -tb ${PRJ_PATH}/src/testbenches/${TB}.cpp -cflags ${CFLAGS} + add_files -tb ${PRJ_PATH}/include/testbenches/${TB}.h -cflags ${CFLAGS} } } -open_solution "solution_${TOP}" +if {${USE_VITIS}} { + open_solution -flow_target vivado -reset "solution_${TOP_NO_NAMESPACE}" +} else { + open_solution "solution_${TOP_NO_NAMESPACE}" +} # ============================================================================== # Set Part # ============================================================================== if {${reset_project}} { if {${use_zedboard}} { # ZedBoard - set_part {xc7z020clg484-1} -tool vivado + set_part {xc7z020clg484-1} ;#-tool vivado } else { if {${use_zcu104_pynq}} { # Pynq ZCU104 Board @@ -216,11 +184,11 @@ if {${reset_project}} { create_clock -period 5 -name default } elseif {${use_zcu102_vassilis}} { # Ultrascale+ ZCU102 - set_part {xczu9eg-ffvb1156-2-i} -tool vivado + set_part {xczu9eg-ffvb1156-2-i} ;#-tool vivado create_clock -period 10 -name default } else { # ZedBoard (default) - set_part {xc7z020clg484-1} -tool vivado + set_part {xc7z020clg484-1} ;#-tool vivado create_clock -period 10 -name default } } @@ -234,29 +202,44 @@ if {${reset_project}} { # ============================================================================== # Configure HLS # ============================================================================== +if {${USE_VITIS}} { + config_compile -name_max_length=12 -pipeline_style=frp -enable_auto_rewind=1 +} else { + config_compile -name_max_length=12 +} + if {${relax_ii}} { - config_schedule -effort ${scheduler_effort} -relax_ii_for_timing=0 + config_schedule -effort ${scheduler_effort} -relax_ii_for_timing=1 } else { - config_schedule -effort ${scheduler_effort} + config_schedule -effort ${scheduler_effort} -relax_ii_for_timing=0 } -# config_sdx -target sds ;# -optimization_level 3 +# config_sdx -target sdx ;# -optimization_level 3 -if {${use_zcu104_pynq}} { - config_interface -m_axi_addr64 +if {${use_zedboard}} { + config_interface -m_axi_addr64=0 +} +if {${USE_VITIS}} { + config_interface -m_axi_auto_max_ports=1 -m_axi_offset=slave } config_core DSP48 -latency 3 -config_dataflow -default_channel pingpong +# config_dataflow -default_channel fifo ;#pingpong +if {${set_max_fifo_depth}} { + set MAX_DEPTH 65536 + config_dataflow -fifo_depth=${MAX_DEPTH} -start_fifo_depth=${MAX_DEPTH} \ + -scalar_fifo_depth=${MAX_DEPTH} -task_level_fifo_depth=${MAX_DEPTH} \ + -override_user_fifo_depth=${MAX_DEPTH} +} # ============================================================================== # Start C-Simulation # ============================================================================== if {${csim}} { if {${build_only}} { - csim_design -clean -O -compiler gcc -ldflags ${LDFLAGS} -argv ${ARGV} -setup + csim_design -clean -O -ldflags ${LDFLAGS} -argv ${ARGV} -setup } else { - csim_design -clean -O -compiler gcc -ldflags ${LDFLAGS} -argv ${ARGV} + csim_design -clean -O -ldflags ${LDFLAGS} -argv ${ARGV} } } # ============================================================================== @@ -270,7 +253,7 @@ if {${synth}} { puts "\[INFO\] Reporting information" puts "================================================================" - set FILENAME "${REPORT_FILE_PATH}/${board_name}_${TOP}.rpt" + set FILENAME "${REPORT_FILE_PATH}/${board_name}_${TOP_NO_NAMESPACE}.rpt" set fin [open ${HLS_REPORT_PATH}/${TOP}_csynth.rpt r] set fout [open ${FILENAME} a] @@ -286,15 +269,24 @@ if {${synth}} { # Start Cosimulation # ============================================================================== if {${cosim}} { - cosim_design -trace_level port -ldflags ${LDFLAGS} -argv ${ARGV} ;#-tool auto -wave_debug + + if {${USE_VITIS}} { + cosim_design -trace_level port -ldflags ${LDFLAGS} -argv ${ARGV} \ + -enable_dataflow_profiling=0 -enable_fifo_sizing=0 + # -disable_deadlock_detection + # -disable_dependency_check + } else { + cosim_design -trace_level port -ldflags ${LDFLAGS} -argv ${ARGV} ;#-tool auto -wave_debug + } + if {${report_info}} { puts "================================================================" puts "\[INFO\] Reporting information" puts "================================================================" - set REPORT_FILENAME "${REPORT_FILE_PATH}/${board_name}_${TOP}.rpt" - set HLS_REPORT_PATH "hls_${PROJECT_NAME}/solution_${TOP}/sim/report/" + set REPORT_FILENAME "${REPORT_FILE_PATH}/${board_name}_${TOP_NO_NAMESPACE}.rpt" + set HLS_REPORT_PATH "${PROJECT_NAME}/solution_${TOP_NO_NAMESPACE}/sim/report/" set fin [open ${HLS_REPORT_PATH}/${TOP}_cosim.rpt r] set fout [open ${REPORT_FILENAME} a] @@ -316,7 +308,7 @@ if {${export}} { } puts "================================================================" -puts "\[INFO\] Closing project: ./hls/hls_${PROJECT_NAME}" +puts "\[INFO\] Closing project: ./hls_prj/${PROJECT_NAME}" puts "================================================================" exit diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0820838..ce85d5b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,34 +1,38 @@ cmake_minimum_required(VERSION 3.10) add_subdirectory(dma) add_subdirectory(kernel) -add_subdirectory(lstm) add_subdirectory(math_utils) add_subdirectory(hls_utils) add_subdirectory(testbenches) +# add_subdirectory(lstm) +# add_subdirectory(dense) +add_subdirectory(layers) # NOTE: Each library/object will have an identifier and that identifier will then # be used to link the final executable, i.e. target_link_libraries(ProjectName LibraryName) add_library(SVD_PARAMS STATIC ${CMAKE_SOURCE_DIR}/src/svd_ip.cpp) target_include_directories(SVD_PARAMS PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(SVD_PARAMS PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(SVD_PARAMS PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(SVD_PARAMS PUBLIC ${OpenCv_INCLUDE_DIRS}) target_link_libraries(SVD_PARAMS ${OpenCv_LIBS}) target_compile_options(SVD_PARAMS PRIVATE -fno-builtin) add_library(SVD_IP STATIC ${CMAKE_SOURCE_DIR}/src/svd_ip.cpp) target_include_directories(SVD_IP PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(SVD_IP PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(SVD_IP PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(SVD_IP PUBLIC ${OpenCv_INCLUDE_DIRS}) target_link_libraries(SVD_IP ${OpenCv_LIBS}) -target_link_libraries(SVD_IP SVD_KERNEL) target_compile_options(SVD_IP PRIVATE -fno-builtin) +target_link_libraries(SVD_IP SVD_KERNEL) add_library(SVD STATIC ${CMAKE_SOURCE_DIR}/src/svd.cpp) target_include_directories(SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(SVD PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(SVD PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_compile_options(SVD PRIVATE -fno-builtin) target_link_libraries(SVD ${OpenCv_LIBS}) target_link_libraries(SVD SVD_IP) target_link_libraries(SVD LSTM_SVD) -target_compile_options(SVD PRIVATE -fno-builtin) +target_link_libraries(SVD LSTM_SVD_EMULATOR) +target_link_libraries(SVD SOFT_LSTM_SVD) diff --git a/src/dma/CMakeLists.txt b/src/dma/CMakeLists.txt index 7b3291d..30d20b1 100644 --- a/src/dma/CMakeLists.txt +++ b/src/dma/CMakeLists.txt @@ -2,12 +2,17 @@ cmake_minimum_required(VERSION 3.10) add_library(SVD_DMA STATIC ${CMAKE_SOURCE_DIR}/src/dma/svd_dma.cpp) target_include_directories(SVD_DMA PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(SVD_DMA PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(SVD_DMA PUBLIC ${HLS_INCLUDE_DIRS}) target_compile_options(SVD_DMA PRIVATE -fno-builtin) target_link_libraries(SVD_DMA SVD_PARAMS) add_library(WIDTH_CONVERTER STATIC ${CMAKE_SOURCE_DIR}/src/dma/width_converter.cpp) target_include_directories(WIDTH_CONVERTER PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(WIDTH_CONVERTER PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(WIDTH_CONVERTER PUBLIC ${HLS_INCLUDE_DIRS}) target_compile_options(WIDTH_CONVERTER PRIVATE -fno-builtin) -target_link_libraries(WIDTH_CONVERTER SVD_PARAMS) \ No newline at end of file +target_link_libraries(WIDTH_CONVERTER SVD_PARAMS) + +add_library(AXIS_LIB STATIC ${CMAKE_SOURCE_DIR}/src/dma/axis_lib.cpp) +target_include_directories(AXIS_LIB PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(AXIS_LIB PUBLIC ${HLS_INCLUDE_DIRS}) +target_compile_options(AXIS_LIB PRIVATE -fno-builtin) \ No newline at end of file diff --git a/src/dma/axis_lib.cpp b/src/dma/axis_lib.cpp new file mode 100644 index 0000000..40a14bf --- /dev/null +++ b/src/dma/axis_lib.cpp @@ -0,0 +1 @@ +#include "dma/axis_lib.h" \ No newline at end of file diff --git a/src/hls_utils/CMakeLists.txt b/src/hls_utils/CMakeLists.txt index e488058..e69fc07 100644 --- a/src/hls_utils/CMakeLists.txt +++ b/src/hls_utils/CMakeLists.txt @@ -2,19 +2,19 @@ cmake_minimum_required(VERSION 3.10) add_library(DOT_PROD_DSP STATIC ${CMAKE_SOURCE_DIR}/src/hls_utils/dot_prod_dsp.cpp) target_include_directories(DOT_PROD_DSP PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(DOT_PROD_DSP PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(DOT_PROD_DSP PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(DOT_PROD_DSP PUBLIC ${OpenCv_INCLUDE_DIRS}) target_compile_options(DOT_PROD_DSP PRIVATE -fno-builtin) # target_link_libraries(DOT_PROD_DSP ${OpenCv_LIBS}) add_library(HW_TIMER STATIC ${CMAKE_SOURCE_DIR}/src/hls_utils/hw_timer.cpp) target_include_directories(HW_TIMER PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(HW_TIMER PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(HW_TIMER PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(HW_TIMER PUBLIC ${OpenCv_INCLUDE_DIRS}) target_compile_options(HW_TIMER PRIVATE -fno-builtin) add_library(ADDER_TREE STATIC ${CMAKE_SOURCE_DIR}/src/hls_utils/adder_tree.cpp) target_include_directories(ADDER_TREE PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(ADDER_TREE PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(ADDER_TREE PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(ADDER_TREE PUBLIC ${OpenCv_INCLUDE_DIRS}) target_compile_options(ADDER_TREE PRIVATE -fno-builtin) \ No newline at end of file diff --git a/src/kernel/CMakeLists.txt b/src/kernel/CMakeLists.txt index 39798b8..8c6616e 100644 --- a/src/kernel/CMakeLists.txt +++ b/src/kernel/CMakeLists.txt @@ -3,18 +3,19 @@ cmake_minimum_required(VERSION 3.10) add_library(U_KERNEL STATIC ${CMAKE_SOURCE_DIR}/src/kernel/u_kernel.cpp) target_include_directories(U_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) target_include_directories(U_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include/kernel) -target_include_directories(U_KERNEL PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(U_KERNEL PUBLIC ${HLS_INCLUDE_DIRS}) target_compile_options(U_KERNEL PRIVATE -fno-builtin) add_library(S_KERNEL STATIC ${CMAKE_SOURCE_DIR}/src/kernel/s_kernel.cpp) target_include_directories(S_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) target_include_directories(S_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include/kernel) -target_include_directories(S_KERNEL PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(S_KERNEL PUBLIC ${HLS_INCLUDE_DIRS}) target_compile_options(S_KERNEL PRIVATE -fno-builtin) add_library(V_KERNEL STATIC ${CMAKE_SOURCE_DIR}/src/kernel/v_kernel.cpp) target_include_directories(V_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) target_include_directories(V_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include/kernel) +target_include_directories(V_KERNEL PUBLIC ${HLS_INCLUDE_DIRS}) target_compile_options(V_KERNEL PRIVATE -fno-builtin) add_library(SVD_KERNEL STATIC ${CMAKE_SOURCE_DIR}/src/kernel/svd_kernel.cpp) @@ -25,6 +26,15 @@ target_link_libraries(SVD_KERNEL U_KERNEL) target_link_libraries(SVD_KERNEL S_KERNEL) target_link_libraries(SVD_KERNEL V_KERNEL) + +add_library(GEMV_KERNEL STATIC ${CMAKE_SOURCE_DIR}/src/kernel/gemv_kernel.cpp) +target_include_directories(GEMV_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(GEMV_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include/kernel) +target_compile_options(GEMV_KERNEL PRIVATE -fno-builtin) +target_link_libraries(GEMV_KERNEL U_KERNEL) +target_link_libraries(GEMV_KERNEL S_KERNEL) +target_link_libraries(GEMV_KERNEL V_KERNEL) + # add_library(DQNET STATIC ${CMAKE_SOURCE_DIR}/src/kernel/kernel.cpp) # target_include_directories(DQNET PUBLIC ${CMAKE_SOURCE_DIR}/include) # target_include_directories(DQNET PUBLIC ${CMAKE_SOURCE_DIR}/include/kernel) diff --git a/src/kernel/README.md b/src/kernel/README.md new file mode 100644 index 0000000..9791960 --- /dev/null +++ b/src/kernel/README.md @@ -0,0 +1,38 @@ +# Kernels + +## U-Kernel + +### HlsAxisKernelU + +To be used with external DMAs. +```c++ +void HlsAxisKernelU(const int num_refinements, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port); +``` + +### HlsManySamplingsKernelU + +Compared to the previous implementation, this kernel has a different number of refinements per input. The refinements and inputs must be **ordered**. Meaning that input at index zero has the lowest amount of refinements to process. + +```c++ +void HlsManySamplingsKernelU(const hls::vector num_refinements, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port); +``` + +### HlsKernelU + +Flaxible Kernel-U. + +```c++ +void HlsKernelU(const int num_active_inputs, + const int input_size, + const hls::vector num_refinements, + const bool pad_output, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port +``` \ No newline at end of file diff --git a/src/kernel/gemv_kernel.cpp b/src/kernel/gemv_kernel.cpp new file mode 100644 index 0000000..1f7c1d6 --- /dev/null +++ b/src/kernel/gemv_kernel.cpp @@ -0,0 +1,49 @@ +#include "kernel/gemv_kernel.h" + +#ifdef __VITIS_HLS__ + +void HlsGemvKernel(const int num_rows, const int num_cols, + hls::stream >& x1_port, + hls::stream >& x2_port, + hls::stream >& w1_port, + hls::stream >& w2_port, + hls::stream& y1_port, + hls::stream& y2_port) { +#pragma HLS INTERFACE s_axilite port=return bundle=ctrl +#pragma HLS INTERFACE s_axilite port=num_cols bundle=ctrl +#pragma HLS INTERFACE s_axilite port=num_rows bundle=ctrl +#pragma HLS DATAFLOW + + hls::stream > x_streams[testgemv::N]; + hls::stream > w_streams[testgemv::N]; + hls::stream y_streams[testgemv::N]; +#pragma HLS ARRAY_PARTITION variable=x_streams complete +#pragma HLS ARRAY_PARTITION variable=w_streams complete +#pragma HLS ARRAY_PARTITION variable=y_streams complete + + + const int kNumTiles = num_rows / testgemv::T; + + DMA_in: + for (int i = 0; i < kNumTiles; ++i) { + for (int j = 0; j < num_cols; ++j) { +#pragma HLS PIPELINE II=1 + x_streams[0] << x1_port.read(); + x_streams[1] << x2_port.read(); + w_streams[0] << w1_port.read(); + w_streams[1] << w2_port.read(); + } + } + + svd::GemvKernel(num_rows, num_cols, + x_streams, w_streams, y_streams); + + DMA_out: + for (int i = 0; i < num_cols; ++i) { +#pragma HLS PIPELINE II=1 + y1_port.write(y_streams[0].read()); + y2_port.write(y_streams[1].read()); + } +} + +#endif diff --git a/src/kernel/s_kernel.cpp b/src/kernel/s_kernel.cpp index e69de29..12e415d 100644 --- a/src/kernel/s_kernel.cpp +++ b/src/kernel/s_kernel.cpp @@ -0,0 +1,29 @@ +#include "kernel/s_kernel.h" +#include "dma/axis_lib.h" + +#include "hls_stream.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +#ifdef __VITIS_HLS__ +void HlsKernelS(const int num_active_inputs, + const int num_refinements[tests::params::N], + // const hls::vector num_refinements, + hls::stream& xu_port, + hls::stream& s_port, + hls::stream& xus_port) { +#pragma HLS INTERFACE axis port=xu_port +#pragma HLS INTERFACE axis port=s_port +#pragma HLS INTERFACE axis port=xus_port +#pragma HLS INTERFACE s_axilite port=return +#pragma HLS INTERFACE s_axilite port=num_active_inputs +#pragma HLS INTERFACE s_axilite port=num_refinements + svd::KernelS(num_active_inputs, num_refinements, xu_port, + s_port, xus_port); +} +#endif + +namespace svd { + +} // svd \ No newline at end of file diff --git a/src/kernel/svd_kernel.cpp b/src/kernel/svd_kernel.cpp index 7aabb45..74c7bca 100644 --- a/src/kernel/svd_kernel.cpp +++ b/src/kernel/svd_kernel.cpp @@ -1 +1,46 @@ -#include "kernel/svd_kernel.h" \ No newline at end of file +#include "kernel/svd_kernel.h" + +void HlsSvdKernel(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::svd_params::N], + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& y_port) { +#pragma HLS INTERFACE axis port=x_port +#pragma HLS INTERFACE axis port=u_port +#pragma HLS INTERFACE axis port=s_port +#pragma HLS INTERFACE axis port=v_port +#pragma HLS INTERFACE axis port=y_port +#pragma HLS INTERFACE s_axilite port=return +#pragma HLS INTERFACE s_axilite port=num_active_inputs +#pragma HLS INTERFACE s_axilite port=input_size +#pragma HLS INTERFACE s_axilite port=output_size +#pragma HLS INTERFACE s_axilite port=num_refinements +#pragma HLS DATAFLOW + svd::SvdKernel(num_active_inputs, input_size, output_size, + num_refinements, x_port, u_port, s_port, v_port, y_port); +} + +void HlsSvdKernelFixed( + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& y_port) { +#pragma HLS INTERFACE axis port=x_port +#pragma HLS INTERFACE axis port=u_port +#pragma HLS INTERFACE axis port=s_port +#pragma HLS INTERFACE axis port=v_port +#pragma HLS INTERFACE axis port=y_port +#pragma HLS INTERFACE s_axilite port=return +#pragma HLS DATAFLOW + const int kNumActiveInputs = svd::svd_params::N; + const int kInputSize = svd::svd_params::I; + const int kOutputSize = svd::svd_params::H; + const int kNumRefinements[svd::svd_params::N] = {svd::svd_params::R}; + svd::SvdKernel(kNumActiveInputs, kInputSize, kOutputSize, + kNumRefinements, x_port, u_port, s_port, v_port, y_port); +} \ No newline at end of file diff --git a/src/kernel/u_kernel.cpp b/src/kernel/u_kernel.cpp index e69de29..4f28bd3 100644 --- a/src/kernel/u_kernel.cpp +++ b/src/kernel/u_kernel.cpp @@ -0,0 +1,292 @@ +#include "kernel/u_kernel.h" +#include "kernel/gemv_kernel.h" +#include "hls_utils/adder_tree.h" +#include "dma/svd_dma.h" +#include "dma/axis_lib.h" + +#include "assert.h" +#include "ap_axi_sdata.h" +#include "hls_stream.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +#ifndef __VITIS_HLS__ +void HlsKernelU(const int num_refinements, + const typename testu::params::ActivationD x_port[testu::params::N][testu::params::I], + const typename testu::params::UPortD u_port[testu::params::R * testu::params::PrunedSizeU], + typename testu::params::ActivationD xu_port[testu::params::N][testu::params::G * testu::params::R]) { +#pragma HLS INTERFACE s_axilite port=return bundle=ctrl +#pragma HLS INTERFACE s_axilite port=num_refinements bundle=ctrl +#pragma HLS INTERFACE m_axi port=x_port offset=slave depth=testu::params::I +#pragma HLS INTERFACE m_axi port=u_port offset=slave depth=testu::params::R*testu::params::PrunedSizeU +#pragma HLS INTERFACE m_axi port=xu_port offset=slave depth=testu::params::R +#pragma HLS DATAFLOW + svd::SvdStreams streams; + svd::SvdBuffers buffers; + svd::InputDMA(num_refinements, x_port, streams, buffers); + svd::StreamSplitter(num_refinements * testu::params::G * testu::params::PrunedSizeU, u_port, streams.u_dma); + U_Dispatcher: + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < testu::params::PeU; ++j) { + for (int k = 0; k < testu::params::PrunedSizeU / testu::params::PeU; ++k) { +#pragma HLS PIPELINE II=1 +#pragma HLS LOOP_FLATTEN + for (int g = 0; g < testu::params::G; ++g) { + streams.u[g][j].write(streams.u_dma[g].read()); + } + } + } + } + svd::KernelU(num_refinements, streams); + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < testu::params::N; ++j) { +#pragma HLS PIPELINE II=1 + for (int k = 0; k < testu::params::G; ++k) { + auto tmp = hlsutils::adder_tree(streams.xu[j][k]); + xu_port[j][k * num_refinements + i] = tmp; + } + } + } +} +#else +void HlsVectorKernelU(const int num_refinements, + hls::stream > &x_port, + hls::stream > &u_port, + hls::stream > &xu_port) { + const int R_test = num_refinements; + const int kNumTilesU = testu::params::I / testu::params::Tu; + const int kDepth_X = testu::params::N * kNumTilesU; + const int kDepth_U = num_refinements * kNumTilesU * testu::params::G; + const int kDepth_XU = num_refinements * testu::params::G; + +// #pragma HLS INTERFACE m_axi port=x_port bundle=x offset=slave +// #pragma HLS INTERFACE m_axi port=u_port bundle=u offset=slave +// #pragma HLS INTERFACE m_axi port=xu_port bundle=xu offset=slave +// #pragma HLS INTERFACE s_axilite port=x_port +// #pragma HLS INTERFACE s_axilite port=u_port +// #pragma HLS INTERFACE s_axilite port=xu_port + +#pragma HLS INTERFACE axis port=x_port bundle=x_dmem +#pragma HLS INTERFACE axis port=u_port bundle=u_dmem +#pragma HLS INTERFACE axis port=xu_port bundle=xu_dmem + +#pragma HLS INTERFACE s_axilite port=return +#pragma HLS INTERFACE s_axilite port=num_refinements +#pragma HLS DATAFLOW + typedef typename testu::params::ActivationD ActivationType; + typedef hls::vector VectN_Type; + + hls::stream x_streams[testu::params::N]; + hls::stream u_streams[testu::params::G]; + hls::stream xu_streams[testu::params::N][testu::params::G]; + testu::params::VectTuType x_buffer[testu::params::N][kNumTilesU]; + testu::params::VectTuType xu[testu::params::N][testu::params::G]; +#pragma HLS STREAM variable=x_streams depth=2 +#pragma HLS STREAM variable=u_streams depth=2 +#pragma HLS STREAM variable=xu_streams depth=2 +#pragma HLS ARRAY_PARTITION variable=x_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=u_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=xu_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=x_buffer complete dim=1 +#pragma HLS ARRAY_PARTITION variable=xu complete dim=0 + + Store_X_Buffer: + for (int i = 0; i < testu::params::N; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 +#pragma HLS LOOP_FLATTEN + x_buffer[i][j] = x_port.read(); // [i * kNumTilesU + j]; + } + } + Stream_X_Tiles: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 + for (int k = 0; k < testu::params::N; ++k) { + x_streams[k] << x_buffer[k][j]; + } + } + } + U_DMA: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < testu::params::G; ++k) { +#pragma HLS PIPELINE II=1 + int u_idx = i * kNumTilesU * testu::params::G + j * testu::params::G + k; + u_streams[k] << u_port.read(); // [u_idx]; + } + } + } + U_Kernel: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 + testu::params::VectTuType x[testu::params::N]; +#pragma HLS ARRAY_PARTITION variable=x complete dim=0 + for (int ii = 0; ii < testu::params::N; ++ii) { + x[ii] = x_streams[ii].read(); + } + for (int k = 0; k < testu::params::G; ++k) { + testu::params::VectTuType u = u_streams[k].read(); + for (int ii = 0; ii < testu::params::N; ++ii) { + if (j == 0) { + xu[ii][k] = testu::params::VectTuType(0); + } + xu[ii][k] += u * x[ii]; + if (j == kNumTilesU - 1) { + xu_streams[ii][k] << xu[ii][k]; + } + } + } + } + } + XU_DMA: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R +#pragma HLS PIPELINE II=1 + for (int j = 0; j < testu::params::G; ++j) { + VectN_Type xu_out; + for (int k = 0; k < testu::params::N; ++k) { + xu_out[k] = xu_streams[k][j].read().reduce_add(); + } + // xu_port[i * testu::params::G + j] = xu_out; + xu_port << xu_out; + } + } +} + +void HlsAxisKernelU(const int num_refinements, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port) { + const int R_test = num_refinements; + const int kNumTilesU = testu::params::I / testu::params::Tu; + const int kStreamDepth_X = 2 + kNumTilesU * testu::params::N; + const int kStreamDepth_U = 8 + kNumTilesU * testu::params::N; + const int kStreamDepth_XU = 2 + testu::params::G; +#pragma HLS INTERFACE axis port=x_port +#pragma HLS INTERFACE axis port=u_port +#pragma HLS INTERFACE axis port=xu_port +#pragma HLS INTERFACE s_axilite port=return +#pragma HLS INTERFACE s_axilite port=num_refinements +#pragma HLS DATAFLOW + typedef typename testu::params::ActivationD ActivationType; + + auto x_axis = svd::AxiStreamPort(x_port); + auto u_axis = svd::AxiStreamPort(u_port); + auto xu_axis = svd::AxiStreamPort(xu_port); + + hls::stream x_streams[testu::params::N]; + hls::stream u_streams[testu::params::G]; + hls::stream xu_streams[testu::params::N][testu::params::G]; + testu::params::VectTuType x_buffer[testu::params::N][kNumTilesU]; +#pragma HLS STREAM variable=x_streams depth=kStreamDepth_X +#pragma HLS STREAM variable=u_streams depth=kStreamDepth_U +#pragma HLS STREAM variable=xu_streams depth=kStreamDepth_XU +#pragma HLS ARRAY_PARTITION variable=x_streams complete dim=1 +#pragma HLS ARRAY_PARTITION variable=u_streams complete dim=1 +#pragma HLS ARRAY_PARTITION variable=xu_streams complete dim=1 +#pragma HLS ARRAY_PARTITION variable=xu_streams complete dim=2 +#pragma HLS ARRAY_PARTITION variable=x_buffer complete dim=1 + + Store_X_Buffer: + for (int i = 0; i < testu::params::N; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 +#pragma HLS LOOP_FLATTEN + x_buffer[i][j] = x_axis.PopVector(); + } + } + Stream_X_Tiles: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 + for (int k = 0; k < testu::params::N; ++k) { + x_streams[k] << x_buffer[k][j]; + } + } + } + + U_DMA: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < testu::params::G; ++k) { +#pragma HLS PIPELINE II=1 + u_streams[k] << u_axis.PopVector(); + } + } + } + + U_Kernel: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R + for (int j = 0; j < kNumTilesU; ++j) { +#pragma HLS PIPELINE II=1 + testu::params::VectTuType x[testu::params::N]; + testu::params::VectTuType xu[testu::params::N][testu::params::G]; +#pragma HLS ARRAY_PARTITION variable=x complete dim=0 +#pragma HLS ARRAY_PARTITION variable=xu complete dim=0 + for (int ii = 0; ii < testu::params::N; ++ii) { + x[ii] = x_streams[ii].read(); + } + for (int k = 0; k < testu::params::G; ++k) { + testu::params::VectTuType u = u_streams[k].read(); + for (int ii = 0; ii < testu::params::N; ++ii) { + if (j == 0) { + xu[ii][k] = testu::params::VectTuType(0); + } + xu[ii][k] += u * x[ii]; + if (j == kNumTilesU - 1) { + xu_streams[ii][k] << xu[ii][k]; + } + } + } + } + } + XU_DMA: + for (int i = 0; i < R_test; ++i) { +#pragma HLS LOOP_TRIPCOUNT min=testu::params::R max=testu::params::R +#pragma HLS PIPELINE II=1 + testu::params::VectGN_Type xu_out; + for (int j = 0; j < testu::params::G; ++j) { + for (int k = 0; k < testu::params::N; ++k) { + xu_out[j * testu::params::N + k] = xu_streams[k][j].read().reduce_add(); + } + } + const bool kIsLast = (i == R_test - 1) ? true : false; + xu_axis.PushVector(xu_out, kIsLast); + } +} + + +void HlsKernelU(const int num_active_inputs, + const int input_size, + const int num_refinements[testu::params::N], + // const hls::vector num_refinements, + const bool pad_output, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& xu_port) { +#pragma HLS INTERFACE s_axilite port=return +#pragma HLS INTERFACE s_axilite port=num_active_inputs +#pragma HLS INTERFACE s_axilite port=input_size +#pragma HLS INTERFACE s_axilite port=num_refinements +#pragma HLS INTERFACE s_axilite port=pad_output +#pragma HLS INTERFACE axis port=x_port +#pragma HLS INTERFACE axis port=u_port +#pragma HLS INTERFACE axis port=xu_port + svd::KernelU(num_active_inputs, input_size, num_refinements, + pad_output, x_port, u_port, xu_port); +} + +#endif + +namespace svd { + +} // svd \ No newline at end of file diff --git a/src/kernel/v_kernel.cpp b/src/kernel/v_kernel.cpp index e69de29..8d92f5c 100644 --- a/src/kernel/v_kernel.cpp +++ b/src/kernel/v_kernel.cpp @@ -0,0 +1,37 @@ +#include "kernel/v_kernel.h" + +#include "hls_stream.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +#include "assert.h" + +#ifndef __VITIS_HLS__ +#else +void HlsKernelV(const int num_active_inputs, + const int output_size, + const int num_refinements[testv::params::N], + // hls::vector& num_refinements, + hls::stream& xus_port, + hls::stream& v_port, + hls::stream& y_port) { +#pragma HLS INTERFACE axis port=xus_port +#pragma HLS INTERFACE axis port=v_port +#pragma HLS INTERFACE axis port=y_port +#pragma HLS INTERFACE s_axilite port=return +#pragma HLS INTERFACE s_axilite port=num_active_inputs +#pragma HLS INTERFACE s_axilite port=output_size +#pragma HLS INTERFACE s_axilite port=num_refinements +#pragma HLS DATAFLOW +#pragma HLS ARRAY_PARTITION variable=num_refinements complete dim=1 + + + svd::KernelV(num_active_inputs, output_size, + num_refinements, xus_port, v_port, y_port); + + + // svd::KernelV(num_active_inputs, output_size, + // num_refinements, xus_port, v_port, y_port); +} +#endif // end __VITIS_HLS__ diff --git a/src/layers/CMakeLists.txt b/src/layers/CMakeLists.txt new file mode 100644 index 0000000..58f13b7 --- /dev/null +++ b/src/layers/CMakeLists.txt @@ -0,0 +1,4 @@ +cmake_minimum_required(VERSION 3.10) + +add_subdirectory(dense) +add_subdirectory(lstm) \ No newline at end of file diff --git a/src/layers/dense/CMakeLists.txt b/src/layers/dense/CMakeLists.txt new file mode 100644 index 0000000..9e7059b --- /dev/null +++ b/src/layers/dense/CMakeLists.txt @@ -0,0 +1,9 @@ +cmake_minimum_required(VERSION 3.10) +add_subdirectory(hls) +add_subdirectory(sw) + +# add_library(LSTM_DATA_HANDLER STATIC ${CMAKE_SOURCE_DIR}/src/lstm/lstm_data_handler.cpp) +# target_include_directories(LSTM_DATA_HANDLER PUBLIC ${CMAKE_SOURCE_DIR}/include) +# target_include_directories(LSTM_DATA_HANDLER PUBLIC ${HLS_INCLUDE_DIRS}) +# target_include_directories(LSTM_DATA_HANDLER PUBLIC ${OpenCv_INCLUDE_DIRS}) +# target_link_libraries(LSTM_DATA_HANDLER ${OpenCv_LIBS}) \ No newline at end of file diff --git a/src/layers/dense/hls/CMakeLists.txt b/src/layers/dense/hls/CMakeLists.txt new file mode 100644 index 0000000..4e919f3 --- /dev/null +++ b/src/layers/dense/hls/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.10) + +add_library(DENSE_SVD STATIC ${CMAKE_SOURCE_DIR}/src/layers/dense/hls/dense_svd.cpp) +target_include_directories(DENSE_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(DENSE_SVD PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(DENSE_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(DENSE_SVD ${OpenCv_LIBS}) + +# add_library(SOFT_LSTM_SVD STATIC ${CMAKE_SOURCE_DIR}/src/lstm/sw/soft_lstm_svd.cpp) +# target_include_directories(SOFT_LSTM_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) +# target_include_directories(SOFT_LSTM_SVD PUBLIC ${HLS_INCLUDE_DIRS}) +# target_include_directories(SOFT_LSTM_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +# target_link_libraries(SOFT_LSTM_SVD ${OpenCv_LIBS}) +# target_link_libraries(SOFT_LSTM_SVD BLAS_UTILS) +# target_link_libraries(SOFT_LSTM_SVD ACTIVATION_FUNCTIONS) diff --git a/src/layers/dense/hls/dense_svd.cpp b/src/layers/dense/hls/dense_svd.cpp new file mode 100644 index 0000000..6ce1ef4 --- /dev/null +++ b/src/layers/dense/hls/dense_svd.cpp @@ -0,0 +1,75 @@ +#include "layers/dense/hls/dense_svd.h" + +#ifndef __VITIS_HLS__ +#else +void HlsDenseSvd(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::dense_params::N], + // const hls::vector num_refinements, + hls::stream& x_port, + hls::stream& u_port, + hls::stream& s_port, + hls::stream& v_port, + hls::stream& bias_port, + hls::stream& y_port) { +#pragma HLS INTERFACE s_axilite port=return bundle=ctrl +#pragma HLS INTERFACE s_axilite port=num_active_inputs bundle=ctrl +#pragma HLS INTERFACE s_axilite port=input_size bundle=ctrl +#pragma HLS INTERFACE s_axilite port=output_size bundle=ctrl +#pragma HLS INTERFACE s_axilite port=num_refinements bundle=ctrl +#pragma HLS INTERFACE axis port=x_port +#pragma HLS INTERFACE axis port=u_port +#pragma HLS INTERFACE axis port=s_port +#pragma HLS INTERFACE axis port=v_port +#pragma HLS INTERFACE axis port=bias_port +#pragma HLS INTERFACE axis port=y_port + svd::DenseSvdKernel(num_active_inputs, input_size, + output_size, num_refinements, x_port, u_port, s_port, v_port, bias_port, + y_port); +} + +/** + * @brief HLS Wrapper that calls a DenseSvd accelerator. + * + * Useful in Cosimulation. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] output_size The output size + * @param[in] num_refinements The number of refinements + * @param[in] x The input array. Shape: (N, I) + * @param[in] u The u array. Shape: (R, I, G) + * @param[in] s The s array. Shape: (R, N, G) + * @param[in] v The v array. Shape: (R, H, G) + * @param[in] bias The bias array. Shape: (N, G, H) + * @param y The y array. Shape: (N, G, H) + */ +void HlsWrapperDenseSvd(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::dense_params::N], + const typename svd::dense_params::ActivationD* x, + const typename svd::dense_params::ActivationD* u, + const typename svd::dense_params::ActivationD* s, + const typename svd::dense_params::ActivationD* v, + const typename svd::dense_params::ActivationD* bias, + typename svd::dense_params::ActivationD* y) { +#ifdef __VITIS_HLS__ + hls::stream x_port("x_port"); + hls::stream u_port("u_port"); + hls::stream s_port("s_port"); + hls::stream v_port("v_port"); + hls::stream bias_port("bias_port"); + hls::stream y_port("y_port"); + svd::SetDenseSvdInputs(num_active_inputs, input_size, + output_size, num_refinements, x, u, s, v, bias, x_port, u_port, s_port, + v_port, bias_port); + HlsDenseSvd(num_active_inputs, input_size, output_size, num_refinements, + x_port, u_port, s_port, v_port, bias_port, y_port); + svd::GetSvdKernelOutputs(num_active_inputs, output_size, + y_port, y); +#endif // __VITIS_HLS__ +} + +#endif \ No newline at end of file diff --git a/src/layers/dense/sw/CMakeLists.txt b/src/layers/dense/sw/CMakeLists.txt new file mode 100644 index 0000000..0bb5b26 --- /dev/null +++ b/src/layers/dense/sw/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.10) + +# add_library(SOFT_LSTM STATIC ${CMAKE_SOURCE_DIR}/src/lstm/sw/soft_lstm.cpp) +# target_include_directories(SOFT_LSTM PUBLIC ${CMAKE_SOURCE_DIR}/include) +# target_include_directories(SOFT_LSTM PUBLIC ${HLS_INCLUDE_DIRS}) +# target_include_directories(SOFT_LSTM PUBLIC ${OpenCv_INCLUDE_DIRS}) +# target_link_libraries(SOFT_LSTM ${OpenCv_LIBS}) + +# add_library(SOFT_LSTM_SVD STATIC ${CMAKE_SOURCE_DIR}/src/lstm/sw/soft_lstm_svd.cpp) +# target_include_directories(SOFT_LSTM_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) +# target_include_directories(SOFT_LSTM_SVD PUBLIC ${HLS_INCLUDE_DIRS}) +# target_include_directories(SOFT_LSTM_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +# target_link_libraries(SOFT_LSTM_SVD ${OpenCv_LIBS}) +# target_link_libraries(SOFT_LSTM_SVD BLAS_UTILS) +# target_link_libraries(SOFT_LSTM_SVD ACTIVATION_FUNCTIONS) diff --git a/src/lstm/CMakeLists.txt b/src/layers/lstm/CMakeLists.txt similarity index 63% rename from src/lstm/CMakeLists.txt rename to src/layers/lstm/CMakeLists.txt index 63b7cc5..eea7dd9 100644 --- a/src/lstm/CMakeLists.txt +++ b/src/layers/lstm/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.10) add_subdirectory(hls) add_subdirectory(sw) -add_library(LSTM_DATA_HANDLER STATIC ${CMAKE_SOURCE_DIR}/src/lstm/lstm_data_handler.cpp) +add_library(LSTM_DATA_HANDLER STATIC ${CMAKE_SOURCE_DIR}/src/layers/lstm/lstm_data_handler.cpp) target_include_directories(LSTM_DATA_HANDLER PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(LSTM_DATA_HANDLER PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(LSTM_DATA_HANDLER PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(LSTM_DATA_HANDLER PUBLIC ${OpenCv_INCLUDE_DIRS}) target_link_libraries(LSTM_DATA_HANDLER ${OpenCv_LIBS}) \ No newline at end of file diff --git a/src/lstm/README.md b/src/layers/lstm/README.md similarity index 100% rename from src/lstm/README.md rename to src/layers/lstm/README.md diff --git a/src/layers/lstm/hls/CMakeLists.txt b/src/layers/lstm/hls/CMakeLists.txt new file mode 100644 index 0000000..fcaa0ca --- /dev/null +++ b/src/layers/lstm/hls/CMakeLists.txt @@ -0,0 +1,30 @@ +cmake_minimum_required(VERSION 3.10) + +add_library(LSTM_HARDWARE STATIC ${CMAKE_SOURCE_DIR}/src/layers/lstm/hls/lstm_hardware.cpp) +target_include_directories(LSTM_HARDWARE PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(LSTM_HARDWARE PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(LSTM_HARDWARE PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(LSTM_HARDWARE ${OpenCv_LIBS}) + +add_library(LSTM_SVD STATIC ${CMAKE_SOURCE_DIR}/src/layers/lstm/hls/lstm_svd.cpp) +target_include_directories(LSTM_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(LSTM_SVD PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(LSTM_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(LSTM_SVD ${OpenCv_LIBS}) +target_link_libraries(LSTM_SVD ${OpenCv_LIBS}) +# target_link_libraries(LSTM_SVD SVD_PARAMS) +# target_link_libraries(LSTM_SVD SVD_DMA) +# target_link_libraries(LSTM_SVD U_KERNEL) +# target_link_libraries(LSTM_SVD S_KERNEL) +# target_link_libraries(LSTM_SVD V_KERNEL) +# target_link_libraries(LSTM_SVD ACTIVATION_FUNCTIONS) +# target_link_libraries(LSTM_SVD HLS_DEBUGGING) + + +set(LSTM_SVD_EMULATOR_H ${CMAKE_SOURCE_DIR}/include/math_utils/activation_functions.h) +add_library(LSTM_SVD_EMULATOR STATIC ${CMAKE_SOURCE_DIR}/src/layers/lstm/hls/lstm_svd_emulator.cpp ${LSTM_SVD_EMULATOR_H}) +target_include_directories(LSTM_SVD_EMULATOR PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(LSTM_SVD_EMULATOR PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(LSTM_SVD_EMULATOR PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(LSTM_SVD_EMULATOR ${OpenCv_LIBS}) +# target_link_libraries(LSTM_SVD_EMULATOR ACTIVATION_FUNCTIONS) \ No newline at end of file diff --git a/src/lstm/hls/lstm_hardware.cpp b/src/layers/lstm/hls/lstm_hardware.cpp similarity index 99% rename from src/lstm/hls/lstm_hardware.cpp rename to src/layers/lstm/hls/lstm_hardware.cpp index 33939fd..8693aa1 100644 --- a/src/lstm/hls/lstm_hardware.cpp +++ b/src/layers/lstm/hls/lstm_hardware.cpp @@ -36,12 +36,15 @@ * streams * *****************************************************************************/ -#include "lstm/hls/lstm_hardware.h" -#include "dma/svd_dma.h" #include "svd_params.h" +#include "layers/lstm/hls/lstm_hardware.h" +#include "dma/svd_dma.h" #include "hls_math.h" #include "hls_half.h" +#include "assert.h" + +#include template void print_matrix(const svd::ActivationD matrix[BramRows][BramCols]) { @@ -287,11 +290,10 @@ void gemm_kernel(const bool execute, } break; case 1: { +#ifndef __VITIS_HLS__ if (accumulate) { #pragma HLS DATAFLOW ActivationD c_tmp[M][N]; -// #pragma HLS RESOURCE variable=c_tmp core=RAM_2P - hls::matrix_multiply_top(a, b, c); } +#endif } break; case 2: { @@ -1844,11 +1847,11 @@ void svd_fpga_lstm(const svd::ActivationD *x, const svd::ActivationD *c_rec, svd::ActivationD *c_cur, svd::ActivationD *out) { -#ifndef SDS_DESIGN const int kInputDepth = INPUT_SIZE; const int kHiddenDepth = HIDDEN_SIZE; const int kCurGateDepth = 4 * INPUT_SIZE * HIDDEN_SIZE; const int kRecGateDepth = 4 * HIDDEN_SIZE * HIDDEN_SIZE; +#ifndef SDS_DESIGN #pragma HLS INTERFACE s_axilite port=return bundle=ctrl #pragma HLS INTERFACE m_axi port=x depth=kInputDepth #pragma HLS INTERFACE m_axi port=h depth=kHiddenDepth @@ -1869,8 +1872,8 @@ void svd_fpga_lstm(const svd::ActivationD *x, svd::ActivationD rec_y[HIDDEN_SIZE * kNumGates]; #pragma HLS ARRAY_PARTITION variable=cur_y block factor=kNumGates #pragma HLS ARRAY_PARTITION variable=rec_y block factor=kNumGates -#pragma HLS STREAM variable=cur_y depth=HIDDEN_SIZE -#pragma HLS STREAM variable=rec_y depth=HIDDEN_SIZE +#pragma HLS STREAM variable=cur_y depth=kHiddenDepth +#pragma HLS STREAM variable=rec_y depth=kHiddenDepth const bool kWritebackOnce = true; const int kCurM = HIDDEN_SIZE * kNumGates; @@ -2287,7 +2290,7 @@ void dummy_gemm(svd::DmaInterfaceD a[2], svd::DmaInterfaceD b[2], svd::DmaInterf } } -#if 1 +#if 0 void dummy_dispatcher(hls::stream > &x, hls::stream > y[4]) { const int kNumInputElem = 32; const int kNumPE = 4; @@ -2315,7 +2318,9 @@ void test_dispatcher() { x_stream.write(x[i]); } +#ifndef __VITIS_HLS__ std::cout << "[Dispatcher] Running IP. x_stream.size() = " << x_stream.size() << "\n"; +#endif hls::stream > y_stream[kNumPE]; dummy_dispatcher(x_stream, y_stream); diff --git a/src/layers/lstm/hls/lstm_svd.cpp b/src/layers/lstm/hls/lstm_svd.cpp new file mode 100644 index 0000000..8e5d65a --- /dev/null +++ b/src/layers/lstm/hls/lstm_svd.cpp @@ -0,0 +1,511 @@ +#include "layers/lstm/hls/lstm_svd.h" +#include "layers/dense/hls/dense_svd.h" +#include "svd_params.h" +#include "dma/svd_dma.h" +#include "kernel/u_kernel.h" +#include "kernel/s_kernel.h" +#include "kernel/v_kernel.h" +#include "math_utils/activation_functions.h" +#include "hls_utils/hls_debugging.h" + +#include "hls_stream.h" +#include "ap_int.h" +#include "assert.h" + +#include + +namespace svd { + +void SvdModel2LstmSDSoCV2( + const svd::ActivationD x1_port[INPUT_SIZE], + const svd::ActivationD x2_port[INPUT_SIZE], + const svd::ActivationD h_t1_prev_port[HIDDEN_SIZE], + const svd::ActivationD h_t2_prev_port[HIDDEN_SIZE], + const svd::ActivationD c_t1_prev_port[HIDDEN_SIZE], + const svd::ActivationD c_t2_prev_port[HIDDEN_SIZE], + const ap_uint *u_cur_port, // [NUM_ITERATIONS*4*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], + const ap_uint *u_rec_port, // [NUM_ITERATIONS*4*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], + const ap_uint *v_port, // [NUM_ITERATIONS*4*2*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)], + const ap_uint *s1_port, // [NUM_ITERATIONS*8], + const ap_uint *s2_port, // [NUM_ITERATIONS*8], + const svd::WeightD bias1_port[4 * HIDDEN_SIZE], + const svd::WeightD bias2_port[4 * HIDDEN_SIZE], + const ap_uint nz_v_port[NUM_ITERATIONS * 8], + const ap_uint nz_u_port[NUM_ITERATIONS * 8], + svd::ActivationD h_t1_curr_port[HIDDEN_SIZE], + svd::ActivationD h_t2_curr_port[HIDDEN_SIZE], + svd::ActivationD c_t1_curr_port[HIDDEN_SIZE], + svd::ActivationD c_t2_curr_port[HIDDEN_SIZE] +#ifdef DEBUG_FIFOS + , + svd::CounterD *counters_port, + svd::CounterD *clk_count_port +#endif + ) { + hlsutils::Log(0, "[INFO] Running SvdModel2LstmSDSoCV2."); + const int kNumGates = 8; + const int kNumCurGates = 4; + const int kNumRecGates = 4; + const int kInputLength = INPUT_SIZE; + const int kOutputLength = HIDDEN_SIZE; + const int kNumTilesU = NUM_TILES_U; + const int kNumTilesV = NUM_TILES_V; + const int kNumZeroTilesU = NUM_ZERO_TILES_U; + const int kNumZeroTilesV = NUM_ZERO_TILES_V; + const int kNumIter = NUM_ITERATIONS; + const int kNumTimesteps = NUM_TIMESTEPS; + const int kNumNonZeroTilesU = kNumTilesU - kNumZeroTilesU; + const int kNumNonZeroTilesV = kNumTilesV - kNumZeroTilesV; + const int kTileSizeUCurrent = kInputLength / kNumTilesU; + const int kTileSizeURecur = kOutputLength / kNumTilesU; + const int kTileSizeV = kOutputLength / kNumTilesV; + assert(kNumTilesU % 2 == 0); + assert(kNumTilesV % 2 == 0); + // assert(kNumZeroTilesU % 2 == 0); + // assert(kNumZeroTilesV % 2 == 0); + // assert(kNumIter % 2 == 0); + hlsutils::Log(0, "[INFO] asserts passed."); + + const int kTileSizeU = kInputLength / kNumTilesU; + const int kPrunedLengthU = kInputLength - kNumZeroTilesU * kTileSizeU; + const int kPrunedLengthV = kOutputLength - kNumZeroTilesV * kTileSizeV; + const int kInputLengthPruned = kInputLength - kTileSizeU * kNumZeroTilesU; + const int kOutputLengthPrunedU = kOutputLength - kOutputLength / kNumTilesU * kNumZeroTilesU; + const int kOutputLengthPrunedV = kOutputLength - kOutputLength / kNumTilesV * kNumZeroTilesV; + const int kNumSamples = NUM_SAMPLES; // Used for cosimulation only + const int kNumReadsR = 8 * kNumIter; + const int kNumReadsC = 8 * kNumIter; + const int kAxiDepthR = kInputLength; + const int kAxiPortDepthX = (kInputLength * kNumTimesteps) * kNumSamples; + const int kAxiDepthU = (kNumIter * 8 * kPrunedLengthU) * kNumSamples; + const int kAxiDepthV = (kNumIter * 8 * kPrunedLengthV) * kNumSamples; + const int kAxiDepthS = (kNumIter * 8 * 2) * kNumSamples; + const int kAxiDepthCombinationsR = kNumReadsR * kNumSamples; + const int kAxiDepthCombinationsC = kNumReadsC * kNumSamples; +#ifndef SDS_DESIGN +#pragma HLS INTERFACE s_axilite port=return bundle=ctrl + + const int kUSize = kNumIter*(kNumCurGates * kInputLengthPruned + kNumRecGates * kOutputLengthPrunedU); + const int kVSize = kNumIter*(kNumCurGates * kOutputLengthPrunedV + kNumRecGates * kOutputLengthPrunedV); + const int kSSize = kNumIter * 2 * (kNumCurGates + kNumRecGates); + + const int kUCurSize = kNumIter * kNumCurGates * kInputLengthPruned; + const int kURecSize = kNumIter * kNumRecGates * kOutputLengthPrunedU; + + const int kUcurPortDepth = kUCurSize; + const int kUrecPortDepth = kURecSize; + const int kVportDepth = kVSize; + const int kS1portDepth = kSSize / 2; + const int kS2portDepth = kSSize / 2; +// #pragma HLS INTERFACE m_axi port=u_cur_port offset=slave depth=kUcurPortDepth bundle=u_cur_dmem +// #pragma HLS INTERFACE m_axi port=u_rec_port offset=slave depth=kUrecPortDepth bundle=u_rec_dmem +// #pragma HLS INTERFACE m_axi port=v_port offset=slave depth=kVportDepth bundle=v_dmem +// #pragma HLS INTERFACE m_axi port=s1_port offset=slave depth=kS1portDepth bundle=s1_dmem +// #pragma HLS INTERFACE m_axi port=s2_port offset=slave depth=kS2portDepth bundle=s2_dmem +#pragma HLS INTERFACE axis port=u_cur_port +#pragma HLS INTERFACE axis port=u_rec_port +#pragma HLS INTERFACE axis port=v_port +#pragma HLS INTERFACE axis port=s1_port +#pragma HLS INTERFACE axis port=s2_port + +#pragma HLS INTERFACE axis port=x1_port +#pragma HLS INTERFACE axis port=x2_port +#pragma HLS INTERFACE axis port=bias1_port +#pragma HLS INTERFACE axis port=bias2_port +#pragma HLS INTERFACE axis port=nz_v_port +#pragma HLS INTERFACE axis port=nz_u_port +#pragma HLS INTERFACE axis port=h_t1_prev_port +#pragma HLS INTERFACE axis port=h_t2_prev_port +#pragma HLS INTERFACE axis port=h_t1_curr_port +#pragma HLS INTERFACE axis port=h_t2_curr_port +#pragma HLS INTERFACE axis port=c_t1_prev_port +#pragma HLS INTERFACE axis port=c_t2_prev_port +#pragma HLS INTERFACE axis port=c_t1_curr_port +#pragma HLS INTERFACE axis port=c_t2_curr_port +#endif // SDS_DESIGN + +#pragma HLS DATAFLOW + // =========================================================================== + // Streams Depth Sizing + // =========================================================================== + // NOTE: We divide the FIFO depths by a certain factor to save BRAMs. Be aware + // that a wrong factor could lead to deadlocks! + const int kFIFOdepthDivider = 8; + const int kStreamDepthIter = kNumIter / kFIFOdepthDivider; + const int kFIFOdepthFactor = kNumIter * 2; + const int kStreamDepthUCurrent = kNumIter * kTileSizeUCurrent / kFIFOdepthFactor == 0 ? 2 : kNumIter * kTileSizeUCurrent / kFIFOdepthFactor; + const int kStreamDepthURecurrent = kNumIter * kTileSizeURecur / kFIFOdepthFactor == 0 ? 2 : kNumIter * kTileSizeURecur / kFIFOdepthFactor; + const int kStreamDepthV = kNumIter * kNumTilesV / kFIFOdepthFactor == 0 ? 2 : kNumIter * kNumTilesV / kFIFOdepthFactor; + const int kTileAccStreamDepth = 2; + const int kOutStreamDepth = 2; // kNumIter * kTileSizeV; + // =========================================================================== + // Current streams + // =========================================================================== + svd::WeightStream cur_u_streams[kNumCurGates][kNumNonZeroTilesU]; + svd::WeightStream cur_v_streams[kNumCurGates][kTileSizeV]; // [kNumNonZeroTilesV]; + svd::ActivationStream cur_dot1_streams[kNumCurGates]; + svd::ActivationStream cur_dot2_streams[kNumCurGates]; + svd::ActivationStream cur_out1_streams[kNumCurGates][kNumNonZeroTilesV]; + svd::ActivationStream cur_out2_streams[kNumCurGates][kNumNonZeroTilesV]; + svd::ActivationStream cur_acc1_streams[kNumCurGates][kTileSizeV]; // [kNumTilesV]; + svd::ActivationStream cur_acc2_streams[kNumCurGates][kTileSizeV]; // [kNumTilesV]; + // =========================================================================== + // Recur streams + // =========================================================================== + svd::WeightStream rec_u_streams[kNumRecGates][kNumNonZeroTilesU]; + svd::WeightStream rec_v_streams[kNumRecGates][kTileSizeV]; // [kNumNonZeroTilesV]; + svd::ActivationStream rec_dot1_streams[kNumRecGates]; + svd::ActivationStream rec_dot2_streams[kNumRecGates]; + svd::ActivationStream rec_out1_streams[kNumRecGates][kNumNonZeroTilesV]; + svd::ActivationStream rec_out2_streams[kNumRecGates][kNumNonZeroTilesV]; + svd::ActivationStream rec_acc1_streams[kNumRecGates][kTileSizeV]; // [kNumTilesV]; + svd::ActivationStream rec_acc2_streams[kNumRecGates][kTileSizeV]; // [kNumTilesV]; + // =========================================================================== + // Scalar streams + // =========================================================================== + svd::WeightStream gates_s1_streams[kNumGates]; // used for both curr and recur + svd::WeightStream gates_s2_streams[kNumGates]; // used for both curr and recur + // =========================================================================== + // Current input streams + // =========================================================================== + svd::ActivationStream x1_streams[kNumCurGates][kNumNonZeroTilesU]; + svd::ActivationStream x2_streams[kNumCurGates][kNumNonZeroTilesU]; + // =========================================================================== + // Recurrent input streams + // =========================================================================== + svd::ActivationStream h1_streams[kNumRecGates][kNumNonZeroTilesU]; + svd::ActivationStream h2_streams[kNumRecGates][kNumNonZeroTilesU]; + // =========================================================================== + // Zero Combinations DMA + // =========================================================================== + // NOTE: We divide the FIFO depths by a certain factor to save BRAMs. Be aware + // that a wrong factor could lead to deadlocks! + hls::stream > nz_v_stream1_cur[kNumCurGates]; + hls::stream > nz_v_stream1_rec[kNumRecGates]; + hls::stream > nz_v_stream2_cur[kNumCurGates]; + hls::stream > nz_v_stream2_rec[kNumRecGates]; + hls::stream > nz_u_stream1_cur[kNumCurGates]; + hls::stream > nz_u_stream1_rec[kNumRecGates]; + hls::stream > nz_u_stream2_cur[kNumCurGates]; + hls::stream > nz_u_stream2_rec[kNumRecGates]; +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_v_stream1_cur +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_v_stream1_rec +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_v_stream2_cur +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_v_stream2_rec +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_u_stream1_cur +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_u_stream1_rec +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_u_stream2_cur +#pragma HLS STREAM depth=kStreamDepthIter variable=nz_u_stream2_rec + +#pragma HLS STREAM variable=x1_streams depth=kStreamDepthUCurrent // dim=2 +#pragma HLS STREAM variable=x2_streams depth=kStreamDepthUCurrent // dim=2 +#pragma HLS STREAM variable=h1_streams depth=kStreamDepthURecurrent // dim=2 +#pragma HLS STREAM variable=h2_streams depth=kStreamDepthURecurrent // dim=2 + +#pragma HLS STREAM variable=cur_u_streams depth=kStreamDepthUCurrent // dim=2 +#pragma HLS STREAM variable=rec_u_streams depth=kStreamDepthURecurrent // dim=2 +#pragma HLS STREAM variable=cur_v_streams depth=kStreamDepthV // dim=2 +#pragma HLS STREAM variable=rec_v_streams depth=kStreamDepthV // dim=2 + +#pragma HLS STREAM variable=gates_s1_streams depth=kStreamDepthIter +#pragma HLS STREAM variable=gates_s2_streams depth=kStreamDepthIter + +#pragma HLS STREAM variable=cur_dot1_streams depth=kStreamDepthIter +#pragma HLS STREAM variable=cur_dot2_streams depth=kStreamDepthIter +#pragma HLS STREAM variable=rec_dot1_streams depth=kStreamDepthIter +#pragma HLS STREAM variable=rec_dot2_streams depth=kStreamDepthIter +#pragma HLS STREAM variable=cur_acc1_streams depth=kTileAccStreamDepth // dim=2 +#pragma HLS STREAM variable=cur_acc2_streams depth=kTileAccStreamDepth // dim=2 +#pragma HLS STREAM variable=rec_acc1_streams depth=kTileAccStreamDepth // dim=2 +#pragma HLS STREAM variable=rec_acc2_streams depth=kTileAccStreamDepth // dim=2 + +#pragma HLS STREAM variable=cur_out1_streams depth=kOutStreamDepth // dim=2 +#pragma HLS STREAM variable=cur_out2_streams depth=kOutStreamDepth // dim=2 +#pragma HLS STREAM variable=rec_out1_streams depth=kOutStreamDepth // dim=2 +#pragma HLS STREAM variable=rec_out2_streams depth=kOutStreamDepth // dim=2 + // =========================================================================== + // Partitioning + // =========================================================================== +#ifndef __VITIS_HLS__ +#pragma HLS ARRAY_PARTITION variable=cur_u_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=cur_v_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=cur_dot1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=cur_dot2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=cur_out1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=cur_out2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=cur_acc1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=cur_acc2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_u_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_v_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_dot1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_dot2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_out1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_out2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_acc1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=rec_acc2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=gates_s1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=gates_s2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=x1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=x2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=h1_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=h2_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_v_stream1_cur complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_v_stream1_rec complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_v_stream2_cur complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_v_stream2_rec complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_u_stream1_cur complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_u_stream1_rec complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_u_stream2_cur complete dim=0 +#pragma HLS ARRAY_PARTITION variable=nz_u_stream2_rec complete dim=0 +#endif + hlsutils::Log(0, "Starting ZeroTileCombinationDMA"); + svd::NZIndex2LstmDMA(nz_u_port, + nz_u_stream1_cur, nz_u_stream1_rec, nz_u_stream2_cur, + nz_u_stream2_rec); + svd::NZIndexDMA(nz_v_port, + nz_v_stream1_cur, nz_v_stream1_rec); + // =========================================================================== + // Current Input DMA + // =========================================================================== + hlsutils::Log(0, "Starting InputDMA"); + svd::InputDMA( + x1_port, nz_u_stream1_cur, x1_streams); + svd::InputDMA( + x2_port, nz_u_stream2_cur, x2_streams); + // =========================================================================== + // Recurrent Input DMA + // =========================================================================== + svd::InputDMA( + h_t1_prev_port, nz_u_stream1_rec, h1_streams); + svd::InputDMA( + h_t2_prev_port, nz_u_stream2_rec, h2_streams); + // =========================================================================== + // Gates DMA + // =========================================================================== + const int kUcurSize = kNumGates / 2 * kNumIter * kPrunedLengthU; + const int kUrecSize = kNumGates / 2 * kNumIter * kOutputLengthPrunedU; + const int kSsize = kNumGates * kNumIter; + const int kVsize = kNumGates * kNumIter * kPrunedLengthV; + const int kBitWidthU = FIX_WIDTH * 4; + const int kBitWidthV = FIX_WIDTH * 8; + const int kBitWidthS = FIX_WIDTH * 8; +#ifndef __VITIS_HLS__ + svd::WeightD u_cur_gate_streams[kNumGates / 2][kNumIter * kPrunedLengthU]; + svd::WeightD u_rec_gate_streams[kNumGates / 2][kNumIter * kOutputLengthPrunedU]; + svd::WeightD v_gate_streams[kNumGates][kNumIter * kPrunedLengthV]; +#pragma HLS ARRAY_PARTITION variable=u_cur_gate_streams complete dim=1 +#pragma HLS ARRAY_PARTITION variable=u_rec_gate_streams complete dim=1 +#pragma HLS ARRAY_PARTITION variable=v_gate_streams complete dim=1 +#pragma HLS STREAM variable=u_cur_gate_streams depth=1 dim=1 +#pragma HLS STREAM variable=u_rec_gate_streams depth=1 dim=1 +#pragma HLS STREAM variable=v_gate_streams depth=1 dim=1 + hlsutils::Log(0, "Starting ArraySplitter"); + svd::ArraySplitter, svd::WeightD, kBitWidthU, FIX_WIDTH, kUcurSize>( + u_cur_port, u_cur_gate_streams); + svd::ArraySplitter, svd::WeightD, kBitWidthU, FIX_WIDTH, kUrecSize>( + u_rec_port, u_rec_gate_streams); + svd::ArraySplitter, svd::WeightD, kBitWidthV, FIX_WIDTH, kVsize>( + v_port, v_gate_streams); +#else + hls::stream u_cur_gate_streams[kNumGates / 2]; + hls::stream u_rec_gate_streams[kNumGates / 2]; + hls::stream v_gate_streams[kNumGates]; +#pragma HLS ARRAY_PARTITION variable=u_cur_gate_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=u_rec_gate_streams complete dim=0 +#pragma HLS ARRAY_PARTITION variable=v_gate_streams complete dim=0 +#pragma HLS STREAM variable=u_cur_gate_streams depth=2 +#pragma HLS STREAM variable=u_rec_gate_streams depth=2 +#pragma HLS STREAM variable=v_gate_streams depth=2 + hlsutils::Log(0, "Starting ArraySplitter"); + svd::StreamSplitter, svd::WeightD, kBitWidthU, FIX_WIDTH>( + kUcurSize, u_cur_port, u_cur_gate_streams); + svd::StreamSplitter, svd::WeightD, kBitWidthU, FIX_WIDTH>( + kUrecSize, u_rec_port, u_rec_gate_streams); + svd::StreamSplitter, svd::WeightD, kBitWidthV, FIX_WIDTH>( + kVsize, v_port, v_gate_streams); +#endif + svd::StreamSplitter, svd::WeightD, kBitWidthS, FIX_WIDTH>( + kSsize, s1_port, gates_s1_streams); + svd::StreamSplitter, svd::WeightD, kBitWidthS, FIX_WIDTH>( + kSsize, s2_port, gates_s2_streams); + const bool kUweights = true; + // =========================================================================== + // Current Dot Product Unit + // =========================================================================== +#ifndef __VITIS_HLS__ + Current_SVD_Kernels: + for (int g = 0; g < kNumCurGates; ++g) { +#pragma HLS UNROLL + svd::DispatchGateFromArray(kUweights, kNumIter, kNumNonZeroTilesU, + kTileSizeUCurrent, u_cur_gate_streams[g], cur_u_streams[g]); + svd::DispatchGateFromArray(!kUweights, kNumIter, kNumNonZeroTilesV, kTileSizeV, + v_gate_streams[g], cur_v_streams[g]); + } +#else + svd::DispatchGateFromStream( + kUweights, kNumIter, kNumNonZeroTilesU, kTileSizeUCurrent, + u_cur_gate_streams, cur_u_streams); + svd::DispatchGateFromStream( + !kUweights, kNumIter, kNumNonZeroTilesV, kTileSizeV, + v_gate_streams, cur_v_streams); +#endif + svd::UDotUnit2Lstm( + x1_streams, x2_streams, cur_u_streams, cur_dot1_streams, + cur_dot2_streams); + svd::VDotUnit2LstmV2( + false, nullptr, nullptr, + cur_dot1_streams, cur_dot2_streams, + gates_s1_streams, gates_s2_streams, + cur_v_streams, nz_v_stream1_cur, + cur_acc1_streams, cur_acc2_streams); + // =========================================================================== + // Recur Dot Product Unit + // =========================================================================== +#ifndef __VITIS_HLS__ + Recurrent_SVD_Kernels: + for (int g = 0; g < kNumRecGates; ++g) { +#pragma HLS UNROLL + svd::DispatchGateFromArray(kUweights, kNumIter, kNumNonZeroTilesU, + kTileSizeURecur, u_rec_gate_streams[g], rec_u_streams[g]); + svd::DispatchGateFromArray(!kUweights, kNumIter, kNumNonZeroTilesV, kTileSizeV, + v_gate_streams[kNumCurGates + g], rec_v_streams[g]); + } +#else + svd::DispatchGateFromStream( + kUweights, kNumIter, kNumNonZeroTilesU, kTileSizeURecur, + u_rec_gate_streams, rec_u_streams); + svd::DispatchGateFromStream( + !kUweights, kNumIter, kNumNonZeroTilesV, kTileSizeV, + &v_gate_streams[kNumRecGates], rec_v_streams); +#endif + svd::UDotUnit2Lstm( + h1_streams, h2_streams, rec_u_streams, rec_dot1_streams, + rec_dot2_streams); + svd::VDotUnit2LstmV2( + false, nullptr, nullptr, rec_dot1_streams, rec_dot2_streams, + &gates_s1_streams[kNumRecGates], &gates_s2_streams[kNumRecGates], + rec_v_streams, nz_v_stream1_rec, rec_acc1_streams, rec_acc2_streams); + // =========================================================================== + // Output Non-Linearities + // =========================================================================== + svd::NonLinearityUnit(c_t1_prev_port, + cur_acc1_streams, rec_acc1_streams, h_t1_curr_port, c_t1_curr_port, true, + bias1_port); + svd::NonLinearityUnit(c_t2_prev_port, + cur_acc2_streams, rec_acc2_streams, h_t2_curr_port, c_t2_curr_port, true, + bias2_port); +} + +} // svd + +#ifndef __VITIS_HLS__ +#else +void HlsLstmSvd(const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::lstm_params::N], + // const hls::vector num_refinements, + // Current Gates + hls::stream& x_port, + hls::stream& u_cur_port, + hls::stream& s_cur_port, + hls::stream& v_cur_port, + // Recurrent Gates + hls::stream& h_prev_port, + hls::stream& u_rec_port, + hls::stream& s_rec_port, + hls::stream& v_rec_port, + // Non-Linearities + hls::stream& bias_port, + hls::stream& c_prev_port, + hls::stream& h_curr_port, + hls::stream& c_curr_port) { +#pragma HLS INTERFACE s_axilite port=return bundle=ctrl +#pragma HLS INTERFACE s_axilite port=num_active_inputs bundle=ctrl +#pragma HLS INTERFACE s_axilite port=input_size bundle=ctrl +#pragma HLS INTERFACE s_axilite port=num_refinements bundle=ctrl +#pragma HLS INTERFACE s_axilite port=output_size bundle=ctrl +#pragma HLS INTERFACE axis port=x_port +#pragma HLS INTERFACE axis port=u_cur_port +#pragma HLS INTERFACE axis port=s_cur_port +#pragma HLS INTERFACE axis port=v_cur_port +#pragma HLS INTERFACE axis port=h_prev_port +#pragma HLS INTERFACE axis port=u_rec_port +#pragma HLS INTERFACE axis port=s_rec_port +#pragma HLS INTERFACE axis port=v_rec_port +#pragma HLS INTERFACE axis port=bias_port +#pragma HLS INTERFACE axis port=c_prev_port +#pragma HLS INTERFACE axis port=h_curr_port +#pragma HLS INTERFACE axis port=c_curr_port + svd::LstmSvdKernel(num_active_inputs, input_size, + output_size, num_refinements, x_port, u_cur_port, s_cur_port, v_cur_port, + h_prev_port, u_rec_port, s_rec_port, v_rec_port, bias_port, c_prev_port, + h_curr_port, c_curr_port); +} +#endif // __VITIS_HLS__ + +/** + * @brief HLS Wrapper that calls a DenseSvd accelerator. + * + * Useful in Cosimulation. + * + * @param[in] num_active_inputs The number of active inputs + * @param[in] input_size The input size + * @param[in] output_size The output size + * @param[in] num_refinements The number of refinements + * @param[in] x The input array. Shape: (N, I) + * @param[in] u The u array. Shape: (R, I, G) + * @param[in] s The s array. Shape: (R, N, G) + * @param[in] v The v array. Shape: (R, H, G) + * @param[in] bias The bias array. Shape: (N, G, H) + * @param y The y array. Shape: (H / Tv, N, Tv) + */ +void HlsWrapperLstmSvd( + const int num_active_inputs, + const int input_size, + const int output_size, + const int num_refinements[svd::lstm_params::N], + // Current Gates + const typename svd::lstm_params::ActivationD* x, + const typename svd::lstm_params::ActivationD* u_cur, + const typename svd::lstm_params::ActivationD* s_cur, + const typename svd::lstm_params::ActivationD* v_cur, + // Recurrent Gates + const typename svd::lstm_params::ActivationD* h, + const typename svd::lstm_params::ActivationD* u_rec, + const typename svd::lstm_params::ActivationD* s_rec, + const typename svd::lstm_params::ActivationD* v_rec, + // Non-Linearities + const typename svd::lstm_params::ActivationD* bias, + const typename svd::lstm_params::ActivationD* c_prev, + typename svd::lstm_params::ActivationD* h_curr, + typename svd::lstm_params::ActivationD* c_curr) { +#ifdef __VITIS_HLS__ + // Current Gates + hls::stream x_port; + hls::stream u_cur_port; + hls::stream s_cur_port; + hls::stream v_cur_port; + // Recurrent Gates + hls::stream h_prev_port; + hls::stream u_rec_port; + hls::stream s_rec_port; + hls::stream v_rec_port; + // Non-Linearities + hls::stream bias_port; + hls::stream c_prev_port; + hls::stream h_curr_port; + hls::stream c_curr_port; + svd::SetLstmSvdInputs( + num_active_inputs, input_size, output_size, num_refinements, + x, u_cur, s_cur, v_cur, h, u_rec, s_rec, v_rec, bias, c_prev, + x_port, u_cur_port, s_cur_port, v_cur_port, + h_prev_port, u_rec_port, s_rec_port, v_rec_port, bias_port, c_prev_port); + HlsLstmSvd(num_active_inputs, input_size, output_size, num_refinements, + x_port, u_cur_port, s_cur_port, v_cur_port, + h_prev_port, u_rec_port, s_rec_port, v_rec_port, + bias_port, c_prev_port, h_curr_port, c_curr_port); + svd::GetLstmSvdOutputs(num_active_inputs, + output_size, h_curr, c_curr, h_curr_port, c_curr_port); +#endif // __VITIS_HLS__ +} \ No newline at end of file diff --git a/src/layers/lstm/hls/lstm_svd_emulator.cpp b/src/layers/lstm/hls/lstm_svd_emulator.cpp new file mode 100644 index 0000000..57ae4b4 --- /dev/null +++ b/src/layers/lstm/hls/lstm_svd_emulator.cpp @@ -0,0 +1 @@ +#include "layers/lstm/hls/lstm_svd_emulator.h" \ No newline at end of file diff --git a/src/layers/lstm/lstm_data_handler.cpp b/src/layers/lstm/lstm_data_handler.cpp new file mode 100644 index 0000000..a0e81bf --- /dev/null +++ b/src/layers/lstm/lstm_data_handler.cpp @@ -0,0 +1 @@ +#include "layers/lstm/lstm_data_handler.h" \ No newline at end of file diff --git a/src/layers/lstm/sw/CMakeLists.txt b/src/layers/lstm/sw/CMakeLists.txt new file mode 100644 index 0000000..0f85f74 --- /dev/null +++ b/src/layers/lstm/sw/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.10) + +add_library(SOFT_LSTM STATIC ${CMAKE_SOURCE_DIR}/src/layers/lstm/sw/soft_lstm.cpp) +target_include_directories(SOFT_LSTM PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(SOFT_LSTM PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(SOFT_LSTM PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(SOFT_LSTM ${OpenCv_LIBS}) + +add_library(SOFT_LSTM_SVD STATIC ${CMAKE_SOURCE_DIR}/src/layers/lstm/sw/soft_lstm_svd.cpp) +target_include_directories(SOFT_LSTM_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(SOFT_LSTM_SVD PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(SOFT_LSTM_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(SOFT_LSTM_SVD ${OpenCv_LIBS}) +target_link_libraries(SOFT_LSTM_SVD BLAS_UTILS) +target_link_libraries(SOFT_LSTM_SVD ACTIVATION_FUNCTIONS) diff --git a/src/lstm/sw/soft_lstm.cpp b/src/layers/lstm/sw/soft_lstm.cpp similarity index 99% rename from src/lstm/sw/soft_lstm.cpp rename to src/layers/lstm/sw/soft_lstm.cpp index c535a6d..253838a 100644 --- a/src/lstm/sw/soft_lstm.cpp +++ b/src/layers/lstm/sw/soft_lstm.cpp @@ -36,7 +36,7 @@ * streams * *****************************************************************************/ -#include "lstm/sw/soft_lstm.h" +#include "layers/lstm/sw/soft_lstm.h" #include "math_utils/blas_utils.h" #include "math_utils/activation_functions.h" diff --git a/src/lstm/sw/soft_lstm_svd.cpp b/src/layers/lstm/sw/soft_lstm_svd.cpp similarity index 96% rename from src/lstm/sw/soft_lstm_svd.cpp rename to src/layers/lstm/sw/soft_lstm_svd.cpp index 0000166..e1ff807 100644 --- a/src/lstm/sw/soft_lstm_svd.cpp +++ b/src/layers/lstm/sw/soft_lstm_svd.cpp @@ -1,4 +1,6 @@ -#include "lstm/sw/soft_lstm_svd.h" +#include "layers/lstm/sw/soft_lstm_svd.h" + +namespace svd { #ifdef __cplusplus extern "C" @@ -959,7 +961,7 @@ void SvdModelSoftwareUnbatched(const int verbose, #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmFix8(const int verbose, +void SvdModelLstmFix8(const int verbose, const Fix8D *x, const int num_samples, const int num_timesteps, @@ -995,47 +997,19 @@ void SvdModel2LstmFix8(const int verbose, const Fix8D *bias_c, const Fix8D *bias_o, Fix8D *out) { - SvdModel2LstmTemplatedLatencyCC(verbose, x, - num_samples, - num_timesteps, - n_steps, - input_size, - hidden_size, - cur_i_u, - cur_i_s, - cur_i_v, - cur_f_u, - cur_f_s, - cur_f_v, - cur_c_u, - cur_c_s, - cur_c_v, - cur_o_u, - cur_o_s, - cur_o_v, - rec_i_u, - rec_i_s, - rec_i_v, - rec_f_u, - rec_f_s, - rec_f_v, - rec_c_u, - rec_c_s, - rec_c_v, - rec_o_u, - rec_o_s, - rec_o_v, - bias_i, - bias_f, - bias_c, - bias_o, - out); + SvdModelLstmTemplatedLatencyCC(verbose, x, + num_samples, num_timesteps, n_steps, input_size, hidden_size, + cur_i_u, cur_i_s, cur_i_v, cur_f_u, cur_f_s, cur_f_v, + cur_c_u, cur_c_s, cur_c_v, cur_o_u, cur_o_s, cur_o_v, + rec_i_u, rec_i_s, rec_i_v, rec_f_u, rec_f_s, rec_f_v, + rec_c_u, rec_c_s, rec_c_v, rec_o_u, rec_o_s, rec_o_v, + bias_i, bias_f, bias_c, bias_o, out); } #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmFix16(const int verbose, +void SvdModelLstmFix16(const int verbose, const Fix16D *x, const int num_samples, const int num_timesteps, @@ -1071,47 +1045,19 @@ void SvdModel2LstmFix16(const int verbose, const Fix16D *bias_c, const Fix16D *bias_o, Fix16D *out) { - SvdModel2LstmTemplatedLatencyCC(verbose, x, - num_samples, - num_timesteps, - n_steps, - input_size, - hidden_size, - cur_i_u, - cur_i_s, - cur_i_v, - cur_f_u, - cur_f_s, - cur_f_v, - cur_c_u, - cur_c_s, - cur_c_v, - cur_o_u, - cur_o_s, - cur_o_v, - rec_i_u, - rec_i_s, - rec_i_v, - rec_f_u, - rec_f_s, - rec_f_v, - rec_c_u, - rec_c_s, - rec_c_v, - rec_o_u, - rec_o_s, - rec_o_v, - bias_i, - bias_f, - bias_c, - bias_o, - out); + SvdModelLstmTemplatedLatencyCC(verbose, x, + num_samples, num_timesteps, n_steps, input_size, hidden_size, + cur_i_u, cur_i_s, cur_i_v, cur_f_u, cur_f_s, cur_f_v, + cur_c_u, cur_c_s, cur_c_v, cur_o_u, cur_o_s, cur_o_v, + rec_i_u, rec_i_s, rec_i_v, rec_f_u, rec_f_s, rec_f_v, + rec_c_u, rec_c_s, rec_c_v, rec_o_u, rec_o_s, rec_o_v, + bias_i, bias_f, bias_c, bias_o, out); } #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmHalf(const int verbose, +void SvdModelLstmHalf(const int verbose, const HalfD *x, const int num_samples, const int num_timesteps, @@ -1147,47 +1093,19 @@ void SvdModel2LstmHalf(const int verbose, const HalfD *bias_c, const HalfD *bias_o, HalfD *out) { - SvdModel2LstmTemplatedLatencyCC(verbose, x, - num_samples, - num_timesteps, - n_steps, - input_size, - hidden_size, - cur_i_u, - cur_i_s, - cur_i_v, - cur_f_u, - cur_f_s, - cur_f_v, - cur_c_u, - cur_c_s, - cur_c_v, - cur_o_u, - cur_o_s, - cur_o_v, - rec_i_u, - rec_i_s, - rec_i_v, - rec_f_u, - rec_f_s, - rec_f_v, - rec_c_u, - rec_c_s, - rec_c_v, - rec_o_u, - rec_o_s, - rec_o_v, - bias_i, - bias_f, - bias_c, - bias_o, - out); + SvdModelLstmTemplatedLatencyCC(verbose, x, + num_samples, num_timesteps, n_steps, input_size, hidden_size, + cur_i_u, cur_i_s, cur_i_v, cur_f_u, cur_f_s, cur_f_v, + cur_c_u, cur_c_s, cur_c_v, cur_o_u, cur_o_s, cur_o_v, + rec_i_u, rec_i_s, rec_i_v, rec_f_u, rec_f_s, rec_f_v, + rec_c_u, rec_c_s, rec_c_v, rec_o_u, rec_o_s, rec_o_v, + bias_i, bias_f, bias_c, bias_o, out); } #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmSoftware(const int verbose, +void SvdModelLstmSoftware(const int verbose, const bool use_blas, const int type, // 0:float, 1:fix8, 2:fix16, 3:half16 const float *x, @@ -1329,7 +1247,7 @@ void SvdModel2LstmSoftware(const int verbose, // ===================================================================== // Call function // ===================================================================== - SvdModel2LstmFix8(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, + SvdModelLstmFix8(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, hidden_size, cur_i_u_fix, cur_i_s_fix, cur_i_v_fix, cur_f_u_fix, cur_f_s_fix, cur_f_v_fix, cur_c_u_fix, cur_c_s_fix, cur_c_v_fix, cur_o_u_fix, cur_o_s_fix, @@ -1446,7 +1364,7 @@ void SvdModel2LstmSoftware(const int verbose, // ===================================================================== // Call function // ===================================================================== - SvdModel2LstmFix16(verbose, x_fix, num_samples, num_timesteps, n_steps, + SvdModelLstmFix16(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, hidden_size, cur_i_u_fix, cur_i_s_fix, cur_i_v_fix, cur_f_u_fix, cur_f_s_fix, cur_f_v_fix, cur_c_u_fix, cur_c_s_fix, cur_c_v_fix, cur_o_u_fix, @@ -1564,8 +1482,8 @@ void SvdModel2LstmSoftware(const int verbose, // ===================================================================== // Call function // ===================================================================== - // std::cout << "Starting SvdModel2LstmHalf\n"; - SvdModel2LstmHalf(verbose, x_fix, num_samples, num_timesteps, n_steps, + // std::cout << "Starting SvdModelLstmHalf\n"; + SvdModelLstmHalf(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, hidden_size, cur_i_u_fix, cur_i_s_fix, cur_i_v_fix, cur_f_u_fix, cur_f_s_fix, cur_f_v_fix, cur_c_u_fix, cur_c_s_fix, cur_c_v_fix, cur_o_u_fix, @@ -2741,7 +2659,7 @@ void SvdModelEigenUnbatched(const int verbose, #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmHardware(const int verbose, +void SvdModelLstmHardware(const int verbose, const bool use_blas, const int type, // 0:float, 1:fix8, 2:fix16 const float *x1, @@ -2803,7 +2721,7 @@ void SvdModel2LstmHardware(const int verbose, int Tv, int ZTv, int NumTimesteps - SvdModel2LstmTemplated(x1, + SvdModelLstmTemplated(x1, x2, cur_i_u, cur_i_s, @@ -2942,7 +2860,7 @@ void SvdModel2LstmHardware(const int verbose, // ===================================================================== // Call function // ===================================================================== - SvdModel2LstmFix8(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, + SvdModelLstmFix8(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, hidden_size, cur_i_u_fix, cur_i_s_fix, cur_i_v_fix, cur_f_u_fix, cur_f_s_fix, cur_f_v_fix, cur_c_u_fix, cur_c_s_fix, cur_c_v_fix, cur_o_u_fix, cur_o_s_fix, @@ -3059,7 +2977,7 @@ void SvdModel2LstmHardware(const int verbose, // ===================================================================== // Call function // ===================================================================== - SvdModel2LstmFix16(verbose, x_fix, num_samples, num_timesteps, n_steps, + SvdModelLstmFix16(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, hidden_size, cur_i_u_fix, cur_i_s_fix, cur_i_v_fix, cur_f_u_fix, cur_f_s_fix, cur_f_v_fix, cur_c_u_fix, cur_c_s_fix, cur_c_v_fix, cur_o_u_fix, @@ -3177,8 +3095,8 @@ void SvdModel2LstmHardware(const int verbose, // ===================================================================== // Call function // ===================================================================== - // std::cout << "Starting SvdModel2LstmHalf\n"; - SvdModel2LstmHalf(verbose, x_fix, num_samples, num_timesteps, n_steps, + // std::cout << "Starting SvdModelLstmHalf\n"; + SvdModelLstmHalf(verbose, x_fix, num_samples, num_timesteps, n_steps, input_size, hidden_size, cur_i_u_fix, cur_i_s_fix, cur_i_v_fix, cur_f_u_fix, cur_f_s_fix, cur_f_v_fix, cur_c_u_fix, cur_c_s_fix, cur_c_v_fix, cur_o_u_fix, @@ -3256,7 +3174,7 @@ void print_vect(const int size, const int num_elem_to_print, T *v) { #ifdef __cplusplus extern "C" #endif -void SvdModel2LstmSoftwareBatched(const int verbose, +void SvdModelLstmSoftwareBatched(const int verbose, const bool use_blas, const float *x, // (num_samples, num_inputs, num_timesteps, input_size) const int num_inputs, @@ -3741,3 +3659,5 @@ void SvdModel2LstmSoftwareBatched(const int verbose, delete[] rec_o_us; } #endif + +} // svd diff --git a/src/lstm/hls/CMakeLists.txt b/src/lstm/hls/CMakeLists.txt deleted file mode 100644 index 0859f22..0000000 --- a/src/lstm/hls/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -cmake_minimum_required(VERSION 3.10) - -add_library(LSTM_HARDWARE STATIC ${CMAKE_SOURCE_DIR}/src/lstm/hls/lstm_hardware.cpp) -target_include_directories(LSTM_HARDWARE PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(LSTM_HARDWARE PUBLIC ${VIVADO_INCLUDE_DIRS}) -target_include_directories(LSTM_HARDWARE PUBLIC ${OpenCv_INCLUDE_DIRS}) -target_link_libraries(LSTM_HARDWARE ${OpenCv_LIBS}) - -add_library(LSTM_SVD STATIC ${CMAKE_SOURCE_DIR}/src/lstm/hls/lstm_svd.cpp) -target_include_directories(LSTM_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(LSTM_SVD PUBLIC ${VIVADO_INCLUDE_DIRS}) -target_include_directories(LSTM_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) -target_link_libraries(LSTM_SVD ${OpenCv_LIBS}) \ No newline at end of file diff --git a/src/lstm/hls/lstm_svd.cpp b/src/lstm/hls/lstm_svd.cpp deleted file mode 100644 index 6b1adc5..0000000 --- a/src/lstm/hls/lstm_svd.cpp +++ /dev/null @@ -1,372 +0,0 @@ -#include "lstm/hls/lstm_svd.h" -#include "svd_params.h" -#include "dma/svd_dma.h" -#include "kernel/u_kernel.h" -#include "kernel/s_kernel.h" -#include "kernel/v_kernel.h" -#include "math_utils/activation_functions.h" -#include "hls_utils/hls_debugging.h" - -#include "hls_stream.h" -#include "ap_int.h" - -void SvdModel2LstmSDSoCV2( - const svd::ActivationD x1_port[INPUT_SIZE], - const svd::ActivationD x2_port[INPUT_SIZE], - const svd::ActivationD h_t1_prev_port[HIDDEN_SIZE], - const svd::ActivationD h_t2_prev_port[HIDDEN_SIZE], - const svd::ActivationD c_t1_prev_port[HIDDEN_SIZE], - const svd::ActivationD c_t2_prev_port[HIDDEN_SIZE], - const ap_uint *u_cur_port, // [NUM_ITERATIONS*4*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], - const ap_uint *u_rec_port, // [NUM_ITERATIONS*4*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)], - const ap_uint *v_port, // [NUM_ITERATIONS*4*2*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)], - const ap_uint *s1_port, // [NUM_ITERATIONS*8], - const ap_uint *s2_port, // [NUM_ITERATIONS*8], - const svd::WeightD bias1_port[4 * HIDDEN_SIZE], - const svd::WeightD bias2_port[4 * HIDDEN_SIZE], - const ap_uint comb_v_port[NUM_ITERATIONS * 8], - const ap_uint comb_u_port[NUM_ITERATIONS * 8], - svd::ActivationD h_t1_curr_port[HIDDEN_SIZE], - svd::ActivationD h_t2_curr_port[HIDDEN_SIZE], - svd::ActivationD c_t1_curr_port[HIDDEN_SIZE], - svd::ActivationD c_t2_curr_port[HIDDEN_SIZE] -#ifdef DEBUG_FIFOS - , - svd::CounterD *counters_port, - svd::CounterD *clk_count_port -#endif - ) { - std::cout << "[INFO] Running SvdModel2LstmSDSoCV2." << std::endl; - const int kNumGates = 8; - const int kNumCurGates = 4; - const int kNumRecGates = 4; - const int kInputLength = INPUT_SIZE; - const int kOutputLength = HIDDEN_SIZE; - const int kNumTilesU = NUM_TILES_U; - const int kNumTilesV = NUM_TILES_V; - const int kNumZeroTilesU = NUM_ZERO_TILES_U; - const int kNumZeroTilesV = NUM_ZERO_TILES_V; - const int kNumIter = NUM_ITERATIONS; - const int kNumTimesteps = NUM_TIMESTEPS; - const int kNumNonZeroTilesU = kNumTilesU - kNumZeroTilesU; - const int kNumNonZeroTilesV = kNumTilesV - kNumZeroTilesV; - const int kNumElemsTileUCurrent = kInputLength / kNumTilesU; - const int kNumElemsTileURecur = kOutputLength / kNumTilesU; - const int kNumElemsTileV = kOutputLength / kNumTilesV; - assert(kNumTilesU % 2 == 0); - assert(kNumTilesV % 2 == 0); - // assert(kNumZeroTilesU % 2 == 0); - // assert(kNumZeroTilesV % 2 == 0); - assert(kNumIter % 2 == 0); - std::cout << "[INFO] assert passed." << std::endl; - - const int kNumElemsTileU = kInputLength / kNumTilesU; - const int kPrunedLengthU = kInputLength - kNumZeroTilesU * kNumElemsTileU; - const int kPrunedLengthV = kOutputLength - kNumZeroTilesV * kNumElemsTileV; - const int kNumSamples = NUM_SAMPLES; // Used for cosimulation only - const int kNumReadsR = 8 * kNumIter; - const int kNumReadsC = 8 * kNumIter; - const int kAxiDepthR = kInputLength; - const int kAxiPortDepthX = (kInputLength * kNumTimesteps) * kNumSamples; - const int kAxiDepthU = (kNumIter * 8 * kPrunedLengthU) * kNumSamples; - const int kAxiDepthV = (kNumIter * 8 * kPrunedLengthV) * kNumSamples; - const int kAxiDepthS = (kNumIter * 8 * 2) * kNumSamples; - const int kAxiDepthCombinationsR = kNumReadsR * kNumSamples; - const int kAxiDepthCombinationsC = kNumReadsC * kNumSamples; -#ifndef SDS_DESIGN -#pragma HLS INTERFACE s_axilite port=return bundle=ctrl - - const int kInputLengthPruned = kInputLength - kInputLength / kNumTilesU * kNumZeroTilesU; - const int kOutputLengthPrunedU = kOutputLength - kOutputLength / kNumTilesU * kNumZeroTilesU; - const int kOutputLengthPrunedV = kOutputLength - kOutputLength / kNumTilesV * kNumZeroTilesV; - const int kUSize = kNumIter*(kNumCurGates * kInputLengthPruned + kNumRecGates * kOutputLengthPrunedU); - const int kVSize = kNumIter*(kNumCurGates * kOutputLengthPrunedV + kNumRecGates * kOutputLengthPrunedV); - const int kSSize = kNumIter * 2 * (kNumCurGates + kNumRecGates); - - const int kUCurSize = kNumIter * kNumCurGates * kInputLengthPruned; - const int kURecSize = kNumIter * kNumRecGates * kOutputLengthPrunedU; - - const int kUcurPortDepth = kUCurSize; - const int kUrecPortDepth = kURecSize; - const int kVportDepth = kVSize; - const int kS1portDepth = kSSize / 2; - const int kS2portDepth = kSSize / 2; -#pragma HLS INTERFACE m_axi port=u_cur_port offset=slave depth=kUcurPortDepth bundle=u_cur_dmem -#pragma HLS INTERFACE m_axi port=u_rec_port offset=slave depth=kUrecPortDepth bundle=u_rec_dmem -#pragma HLS INTERFACE m_axi port=v_port offset=slave depth=kVportDepth bundle=v_dmem -#pragma HLS INTERFACE m_axi port=s1_port offset=slave depth=kS1portDepth bundle=s1_dmem -#pragma HLS INTERFACE m_axi port=s2_port offset=slave depth=kS2portDepth bundle=s2_dmem - -#pragma HLS INTERFACE ap_fifo port=x1_port -#pragma HLS INTERFACE ap_fifo port=x2_port -#pragma HLS INTERFACE ap_fifo port=bias1_port -#pragma HLS INTERFACE ap_fifo port=bias2_port -#pragma HLS INTERFACE ap_fifo port=comb_v_port -#pragma HLS INTERFACE ap_fifo port=comb_u_port -#pragma HLS INTERFACE ap_fifo port=h_t1_prev_port -#pragma HLS INTERFACE ap_fifo port=h_t2_prev_port -#pragma HLS INTERFACE ap_fifo port=h_t1_curr_port -#pragma HLS INTERFACE ap_fifo port=h_t2_curr_port -#pragma HLS INTERFACE ap_fifo port=c_t1_prev_port -#pragma HLS INTERFACE ap_fifo port=c_t2_prev_port -#pragma HLS INTERFACE ap_fifo port=c_t1_curr_port -#pragma HLS INTERFACE ap_fifo port=c_t2_curr_port -#endif // SDS_DESIGN - -#pragma HLS DATAFLOW - std::cout << "[INFO] DATAFLOW passed." << std::endl; - - // =========================================================================== - // Current streams - // =========================================================================== - svd::WeightStream cur_u_streams[kNumCurGates][kNumNonZeroTilesU]; - svd::WeightStream cur_v_streams[kNumCurGates][kNumElemsTileV]; // [kNumNonZeroTilesV]; - svd::ActivationStream cur_dot1_streams[kNumCurGates]; - svd::ActivationStream cur_dot2_streams[kNumCurGates]; - svd::ActivationStream cur_out1_streams[kNumCurGates][kNumNonZeroTilesV]; - svd::ActivationStream cur_out2_streams[kNumCurGates][kNumNonZeroTilesV]; - svd::ActivationStream cur_acc1_streams[kNumCurGates][kNumElemsTileV]; // [kNumTilesV]; - svd::ActivationStream cur_acc2_streams[kNumCurGates][kNumElemsTileV]; // [kNumTilesV]; -#pragma HLS ARRAY_PARTITION variable=cur_u_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=cur_v_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=cur_dot1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=cur_dot2_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=cur_out1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=cur_out2_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=cur_acc1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=cur_acc2_streams complete dim=0 - // =========================================================================== - // Recur streams - // =========================================================================== - svd::WeightStream rec_u_streams[kNumRecGates][kNumNonZeroTilesU]; - svd::WeightStream rec_v_streams[kNumRecGates][kNumElemsTileV]; // [kNumNonZeroTilesV]; - svd::ActivationStream rec_dot1_streams[kNumRecGates]; - svd::ActivationStream rec_dot2_streams[kNumRecGates]; - svd::ActivationStream rec_out1_streams[kNumRecGates][kNumNonZeroTilesV]; - svd::ActivationStream rec_out2_streams[kNumRecGates][kNumNonZeroTilesV]; - svd::ActivationStream rec_acc1_streams[kNumRecGates][kNumElemsTileV]; // [kNumTilesV]; - svd::ActivationStream rec_acc2_streams[kNumRecGates][kNumElemsTileV]; // [kNumTilesV]; -#pragma HLS ARRAY_PARTITION variable=rec_u_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=rec_v_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=rec_dot1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=rec_dot2_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=rec_out1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=rec_out2_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=rec_acc1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=rec_acc2_streams complete dim=0 - // =========================================================================== - // Scalar streams - // =========================================================================== - svd::WeightStream gates_s1_streams[kNumGates]; // used for both curr and recur - svd::WeightStream gates_s2_streams[kNumGates]; // used for both curr and recur -#pragma HLS ARRAY_PARTITION variable=gates_s1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=gates_s2_streams complete dim=0 - // =========================================================================== - // Current input streams - // =========================================================================== - svd::ActivationStream x1_streams[kNumCurGates][kNumNonZeroTilesU]; - svd::ActivationStream x2_streams[kNumCurGates][kNumNonZeroTilesU]; -#pragma HLS ARRAY_PARTITION variable=x1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=x2_streams complete dim=0 - // =========================================================================== - // Recurrent input streams - // =========================================================================== - svd::ActivationStream h1_streams[kNumRecGates][kNumNonZeroTilesU]; - svd::ActivationStream h2_streams[kNumRecGates][kNumNonZeroTilesU]; -#pragma HLS ARRAY_PARTITION variable=h1_streams complete dim=0 -#pragma HLS ARRAY_PARTITION variable=h2_streams complete dim=0 - // =========================================================================== - // Streams Depth Sizing - // =========================================================================== - // NOTE: We divide the FIFO depths by a certain factor to save BRAMs. Be aware - // that a wrong factor could lead to deadlocks! - const int kFIFOdepthFactor = kNumIter * 2; - const int kStreamDepthUCurrent = kNumIter * kNumElemsTileUCurrent / kFIFOdepthFactor == 0 ? 2 : kNumIter * kNumElemsTileUCurrent / kFIFOdepthFactor; - const int kStreamDepthURecurrent = kNumIter * kNumElemsTileURecur / kFIFOdepthFactor == 0 ? 2 : kNumIter * kNumElemsTileURecur / kFIFOdepthFactor; - const int kStreamDepthV = kNumIter * kNumTilesV / kFIFOdepthFactor == 0 ? 2 : kNumIter * kNumTilesV / kFIFOdepthFactor; - const int kTileAccStreamDepth = 2; -#pragma HLS STREAM variable=x1_streams depth=kStreamDepthUCurrent dim=2 -#pragma HLS STREAM variable=x2_streams depth=kStreamDepthUCurrent dim=2 -#pragma HLS STREAM variable=h1_streams depth=kStreamDepthURecurrent dim=2 -#pragma HLS STREAM variable=h2_streams depth=kStreamDepthURecurrent dim=2 - -#pragma HLS STREAM variable=cur_u_streams depth=kStreamDepthUCurrent dim=2 -#pragma HLS STREAM variable=rec_u_streams depth=kStreamDepthURecurrent dim=2 -#pragma HLS STREAM variable=cur_v_streams depth=kStreamDepthV dim=2 -#pragma HLS STREAM variable=rec_v_streams depth=kStreamDepthV dim=2 - -#pragma HLS STREAM variable=gates_s1_streams depth=kStreamDepthIter -#pragma HLS STREAM variable=gates_s2_streams depth=kStreamDepthIter - -#pragma HLS STREAM variable=cur_dot1_streams depth=kStreamDepthIter -#pragma HLS STREAM variable=cur_dot2_streams depth=kStreamDepthIter -#pragma HLS STREAM variable=rec_dot1_streams depth=kStreamDepthIter -#pragma HLS STREAM variable=rec_dot2_streams depth=kStreamDepthIter -#pragma HLS STREAM variable=cur_acc1_streams depth=kTileAccStreamDepth dim=2 -#pragma HLS STREAM variable=cur_acc2_streams depth=kTileAccStreamDepth dim=2 -#pragma HLS STREAM variable=rec_acc1_streams depth=kTileAccStreamDepth dim=2 -#pragma HLS STREAM variable=rec_acc2_streams depth=kTileAccStreamDepth dim=2 - -#pragma HLS STREAM variable=cur_out1_streams depth=kOutStreamDepth dim=2 -#pragma HLS STREAM variable=cur_out2_streams depth=kOutStreamDepth dim=2 -#pragma HLS STREAM variable=rec_out1_streams depth=kOutStreamDepth dim=2 -#pragma HLS STREAM variable=rec_out2_streams depth=kOutStreamDepth dim=2 - std::cout << "[INFO] Depth sizing passed." << std::endl; - - // =========================================================================== - // Zero Combinations DMA - // =========================================================================== - // NOTE: We divide the FIFO depths by a certain factor to save BRAMs. Be aware - // that a wrong factor could lead to deadlocks! - const int kFIFOdepthDivider = 8; - const int kStreamDepthIter = kNumIter / kFIFOdepthDivider; - std::cout << "[INFO] DATAFLOW passed." << std::endl; - hls::stream > comb_v_stream1_cur[kNumCurGates]; - hls::stream > comb_v_stream1_rec[kNumRecGates]; - hls::stream > comb_v_stream2_cur[kNumCurGates]; - hls::stream > comb_v_stream2_rec[kNumRecGates]; - hls::stream > comb_u_stream1_cur[kNumCurGates]; - hls::stream > comb_u_stream1_rec[kNumRecGates]; - hls::stream > comb_u_stream2_cur[kNumCurGates]; - hls::stream > comb_u_stream2_rec[kNumRecGates]; -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_v_stream1_cur -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_v_stream1_rec -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_v_stream2_cur -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_v_stream2_rec -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_u_stream1_cur -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_u_stream1_rec -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_u_stream2_cur -#pragma HLS STREAM depth=kStreamDepthIter variable=comb_u_stream2_rec -#pragma HLS ARRAY_PARTITION variable=comb_v_stream1_cur complete -#pragma HLS ARRAY_PARTITION variable=comb_v_stream1_rec complete -#pragma HLS ARRAY_PARTITION variable=comb_v_stream2_cur complete -#pragma HLS ARRAY_PARTITION variable=comb_v_stream2_rec complete -#pragma HLS ARRAY_PARTITION variable=comb_u_stream1_cur complete -#pragma HLS ARRAY_PARTITION variable=comb_u_stream1_rec complete -#pragma HLS ARRAY_PARTITION variable=comb_u_stream2_cur complete -#pragma HLS ARRAY_PARTITION variable=comb_u_stream2_rec complete - - std::cout << "Starting ZeroTileCombinationDMA" << std::endl; - hls_utils::Log(0, "Starting ZeroTileCombinationDMA"); - svd::ZeroTileCombination2LstmDMA(comb_u_port, - comb_u_stream1_cur, comb_u_stream1_rec, comb_u_stream2_cur, - comb_u_stream2_rec); - svd::ZeroTileCombinationDMA(comb_v_port, - comb_v_stream1_cur, comb_v_stream1_rec); - // =========================================================================== - // Current Input DMA - // =========================================================================== - hls_utils::Log(0, "Starting InputDMA"); - svd::InputDMA( - x1_port, comb_u_stream1_cur, x1_streams); - svd::InputDMA( - x2_port, comb_u_stream2_cur, x2_streams); - // =========================================================================== - // Recurrent Input DMA - // =========================================================================== - svd::InputDMA( - h_t1_prev_port, comb_u_stream1_rec, h1_streams); - svd::InputDMA( - h_t2_prev_port, comb_u_stream2_rec, h2_streams); - // =========================================================================== - // Gates DMA - // =========================================================================== - svd::WeightD u_cur_gate_streams[kNumGates / 2][kNumIter * kInputLength / kNumTilesU * (kNumTilesU - kNumZeroTilesU)]; - svd::WeightD u_rec_gate_streams[kNumGates / 2][kNumIter * kOutputLength / kNumTilesU * (kNumTilesU - kNumZeroTilesU)]; - svd::WeightD v_gate_streams[kNumGates][kNumIter * kOutputLength / kNumTilesV * (kNumTilesV - kNumZeroTilesV)]; -#pragma HLS STREAM variable=u_cur_gate_streams depth=1 dim=1 -#pragma HLS STREAM variable=u_rec_gate_streams depth=1 dim=1 -#pragma HLS STREAM variable=v_gate_streams depth=1 dim=1 -#pragma HLS ARRAY_PARTITION variable=u_cur_gate_streams complete dim=1 -#pragma HLS ARRAY_PARTITION variable=u_rec_gate_streams complete dim=1 -#pragma HLS ARRAY_PARTITION variable=v_gate_streams complete dim=1 - - const int kUcurSize = kNumGates / 2 * kNumIter * kInputLength / kNumTilesU * (kNumTilesU - kNumZeroTilesU); - const int kUrecSize = kNumGates / 2 * kNumIter * kOutputLength / kNumTilesU * (kNumTilesU - kNumZeroTilesU); - const int kSsize = kNumGates * kNumIter; - const int kVsize = kNumGates * kNumIter * kOutputLength / kNumTilesV * (kNumTilesV - kNumZeroTilesV); - const int kBitWidthU = FIX_WIDTH * 4; - const int kBitWidthV = FIX_WIDTH * 8; - const int kBitWidthS = FIX_WIDTH * 8; - hls_utils::Log(0, "Starting ArraySplitter"); - svd::ArraySplitter, svd::WeightD, kBitWidthU, FIX_WIDTH, kUcurSize>(u_cur_port, u_cur_gate_streams); - svd::ArraySplitter, svd::WeightD, kBitWidthU, FIX_WIDTH, kUrecSize>(u_rec_port, u_rec_gate_streams); - svd::ArraySplitter, svd::WeightD, kBitWidthV, FIX_WIDTH, kVsize>(v_port, v_gate_streams); - svd::StreamSplitter, svd::WeightD, kBitWidthS, FIX_WIDTH>(kSsize, s1_port, gates_s1_streams); - svd::StreamSplitter, svd::WeightD, kBitWidthS, FIX_WIDTH>(kSsize, s2_port, gates_s2_streams); - const bool kUweights = true; - // =========================================================================== - // Current Dot Product Unit - // =========================================================================== - Current_Gates_Dot_Product_Loop: - for (int g = 0; g < kNumCurGates; ++g) { -#pragma HLS UNROLL - hls_utils::Log(0, std::string("Starting Cur Gate n." + g)); - svd::GateDMA(kUweights, kNumIter, kNumNonZeroTilesU, kNumElemsTileUCurrent, u_cur_gate_streams[g], cur_u_streams[g]); - svd::GateDMA(!kUweights, kNumIter, kNumNonZeroTilesV, kNumElemsTileV, v_gate_streams[g], cur_v_streams[g]); - svd::UDotUnit2Lstm(x1_streams[g], - x2_streams[g], cur_u_streams[g], - cur_dot1_streams[g], cur_dot2_streams[g]); - svd::VDotUnit2LstmV2( - false, - nullptr, - nullptr, - cur_dot1_streams[g], - cur_dot2_streams[g], - gates_s1_streams[g], - gates_s2_streams[g], - cur_v_streams[g], - comb_v_stream1_cur[g], - cur_acc1_streams[g], - cur_acc2_streams[g]); - } - // =========================================================================== - // Recur Dot Product Unit - // =========================================================================== - Recur_Gates_Dot_Product_Loop: - for (int g = 0; g < kNumRecGates; ++g) { -#pragma HLS UNROLL - hls_utils::Log(0, std::string("Starting Rec Gate n." + g)); - svd::GateDMA(kUweights, kNumIter, kNumNonZeroTilesU, kNumElemsTileURecur, u_rec_gate_streams[g], rec_u_streams[g]); - svd::GateDMA(!kUweights, kNumIter, kNumNonZeroTilesV, kNumElemsTileV, v_gate_streams[kNumCurGates + g], rec_v_streams[g]); - svd::UDotUnit2Lstm(h1_streams[g], - h2_streams[g], rec_u_streams[g], - rec_dot1_streams[g], rec_dot2_streams[g]); - svd::VDotUnit2LstmV2( - false, - nullptr, - nullptr, - rec_dot1_streams[g], - rec_dot2_streams[g], - gates_s1_streams[kNumCurGates + g], - gates_s2_streams[kNumCurGates + g], - rec_v_streams[g], - comb_v_stream1_rec[g], - rec_acc1_streams[g], - rec_acc2_streams[g]); - } - // =========================================================================== - // Output Non-Linearities - // =========================================================================== - // NOTE: The output FIFOs in NonLinearityUnit have been resized! Check for deadlocks! - svd::NonLinearityUnit(c_t1_prev_port, - cur_acc1_streams, rec_acc1_streams, h_t1_curr_port, c_t1_curr_port, true, - bias1_port); - svd::NonLinearityUnit(c_t2_prev_port, - cur_acc2_streams, rec_acc2_streams, h_t2_curr_port, c_t2_curr_port, true, - bias2_port); - -#ifdef DEBUG_FIFOS - - const int kNumPEsU = NUM_TILES_U - NUM_ZERO_TILES_U; - const int kNumPEsVCur = INPUT_SIZE / NUM_TILES_V; - const int kNumPEsVRec = HIDDEN_SIZE / NUM_TILES_V; - const int kNumUprobes = kNumGates * kNumPEsU * 3; // one for each: x1, x2, u streams - const int kNumVprobes = kNumGates / 2 * (kNumPEsVCur + kNumPEsVRec); // one for v streams - const int kNumProbes = kNumUprobes + kNumVprobes; - svd::ProbeStream stop_ctrl; - svd::ProbeStream probe_ctrl[kNumUprobes]; - - svd::ClockCounter(probe_ctrl, stop_ctrl, counters_port, clk_count_port); -#endif -} \ No newline at end of file diff --git a/src/lstm/lstm_data_handler.cpp b/src/lstm/lstm_data_handler.cpp deleted file mode 100644 index 5833b8e..0000000 --- a/src/lstm/lstm_data_handler.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "lstm/lstm_data_handler.h" \ No newline at end of file diff --git a/src/lstm/sw/CMakeLists.txt b/src/lstm/sw/CMakeLists.txt deleted file mode 100644 index 24c7190..0000000 --- a/src/lstm/sw/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -cmake_minimum_required(VERSION 3.10) - -add_library(SOFT_LSTM STATIC ${CMAKE_SOURCE_DIR}/src/lstm/sw/soft_lstm.cpp) -target_include_directories(SOFT_LSTM PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(SOFT_LSTM PUBLIC ${VIVADO_INCLUDE_DIRS}) -target_include_directories(SOFT_LSTM PUBLIC ${OpenCv_INCLUDE_DIRS}) -target_link_libraries(SOFT_LSTM ${OpenCv_LIBS}) - -add_library(SOFT_LSTM_SVD STATIC ${CMAKE_SOURCE_DIR}/src/lstm/sw/soft_lstm_svd.cpp) -target_include_directories(SOFT_LSTM_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(SOFT_LSTM_SVD PUBLIC ${VIVADO_INCLUDE_DIRS}) -target_include_directories(SOFT_LSTM_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) -target_link_libraries(SOFT_LSTM_SVD ${OpenCv_LIBS}) \ No newline at end of file diff --git a/src/math_utils/CMakeLists.txt b/src/math_utils/CMakeLists.txt index 489c8e7..042ef8d 100644 --- a/src/math_utils/CMakeLists.txt +++ b/src/math_utils/CMakeLists.txt @@ -2,19 +2,19 @@ cmake_minimum_required(VERSION 3.10) add_library(BLAS_UTILS STATIC ${CMAKE_SOURCE_DIR}/src/math_utils/blas_utils.cpp) target_include_directories(BLAS_UTILS PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(BLAS_UTILS PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(BLAS_UTILS PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(BLAS_UTILS PUBLIC ${OpenCv_INCLUDE_DIRS}) target_link_libraries(BLAS_UTILS ${OpenCv_LIBS}) add_library(ACTIVATION_FUNCTIONS STATIC ${CMAKE_SOURCE_DIR}/src/math_utils/activation_functions.cpp) target_include_directories(ACTIVATION_FUNCTIONS PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(ACTIVATION_FUNCTIONS PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(ACTIVATION_FUNCTIONS PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(ACTIVATION_FUNCTIONS PUBLIC ${OpenCv_INCLUDE_DIRS}) target_link_libraries(ACTIVATION_FUNCTIONS ${OpenCv_LIBS}) add_library(DATA_HANDLER STATIC ${CMAKE_SOURCE_DIR}/src/math_utils/data_handler.cpp) target_include_directories(DATA_HANDLER PUBLIC ${CMAKE_SOURCE_DIR}/include) -target_include_directories(DATA_HANDLER PUBLIC ${VIVADO_INCLUDE_DIRS}) +target_include_directories(DATA_HANDLER PUBLIC ${HLS_INCLUDE_DIRS}) target_include_directories(DATA_HANDLER PUBLIC ${OpenCv_INCLUDE_DIRS}) target_link_libraries(DATA_HANDLER ${OpenCv_LIBS}) diff --git a/src/svd.cpp b/src/svd.cpp index a392d80..9926a52 100644 --- a/src/svd.cpp +++ b/src/svd.cpp @@ -1,7 +1,9 @@ #include "svd_params.h" #include "svd_ip.h" -#include "lstm/hls/lstm_svd.h" -#include "lstm/lstm_data_handler.h" +#include "layers/lstm/lstm_data_handler.h" +#include "layers/lstm/sw/soft_lstm_svd.h" +#include "layers/lstm/hls/lstm_svd.h" +#include "layers/lstm/hls/lstm_svd_emulator.h" #include "ap_fixed.h" @@ -11,17 +13,7 @@ int main(int argc, char const *argv[]) { std::cout << "Hello SVD!" << std::endl; - typename svd_params::ActivationD x_port[svd_params::N][svd_params::I] = {rand()}; - typename svd_params::UPortD u_port[svd_params::PrunedSizeU] = {rand()}; - typename svd_params::SPortD s_port[svd_params::N][svd_params::R] = {rand()}; - typename svd_params::VPortD v_port[svd_params::PrunedSizeV] = {rand()}; - ap_uint nz_u_port[svd_params::N] = {rand()}; - ap_uint nz_v_port[svd_params::N] = {rand()}; - typename svd_params::ActivationD y_port[svd_params::N][svd_params::G][svd_params::H] = {rand()}; - - std::cout << "Running SvdIp2Inputs." << std::endl; - SvdIp2Inputs(x_port, u_port, s_port, v_port, nz_u_port, nz_v_port, y_port); - + const bool kTestSoftwareAccelerator = false; const int kNumInputs = 2; const int kRefinementSteps = NUM_ITERATIONS; const int kLstmInputSize = INPUT_SIZE; @@ -33,15 +25,22 @@ int main(int argc, char const *argv[]) { const int kNumZeroTilesU = NUM_ZERO_TILES_U; const int kNumTilesV = NUM_TILES_V; const int kNumZeroTilesV = NUM_ZERO_TILES_V; + const int kLutSize = (FIX_WIDTH == 16) ? 512 : 256; std::cout << "Setting AcceleratorBlob." << std::endl; - typedef lstm::AcceleratorBlob AccelDataType; - AccelDataType storage = AccelDataType(kNumInputs, kRefinementSteps, kUCurSize, + typedef svd::AcceleratorBlob AcceleratorStorage; + AcceleratorStorage storage = AcceleratorStorage(kNumInputs, kRefinementSteps, kUCurSize, kURecSize, kVSize, kNumTilesU, kNumZeroTilesU, kNumTilesV, kNumZeroTilesV); - std::cout << "printing stuff..." << std::endl; - std::cout << storage.get_fix_x(0) << std::endl; - std::cout << storage.get_fix_x(0)[234] << std::endl; + std::cout << "Running SvdIp2Inputs." << std::endl; + typename svd::svd_params::ActivationD x_port[svd::svd_params::N][svd::svd_params::I] = {rand()}; + typename svd::svd_params::UPortD u_port[svd::svd_params::R * svd::svd_params::PrunedSizeU] = {rand()}; + typename svd::svd_params::SPortD s_port[svd::svd_params::N][svd::svd_params::R] = {rand()}; + typename svd::svd_params::VPortD v_port[svd::svd_params::R * svd::svd_params::PrunedSizeV] = {rand()}; + ap_uint nz_u_port[svd::svd_params::G * svd::svd_params::R] = {rand()}; + ap_uint nz_v_port[svd::svd_params::G * svd::svd_params::R] = {rand()}; + typename svd::svd_params::ActivationD y_port[svd::svd_params::N][svd::svd_params::G][svd::svd_params::H] = {rand()}; + // SvdIp2Inputs(x_port, u_port, s_port, v_port, nz_u_port, nz_v_port, y_port); std::cout << "reinterpret_cast." << std::endl; @@ -51,38 +50,162 @@ int main(int argc, char const *argv[]) { ap_uint<128>* s1_uint = reinterpret_cast*>(storage.get_fix_s(0)); ap_uint<128>* s2_uint = reinterpret_cast*>(storage.get_fix_s(1)); - std::cout << "Starting accelerator." << std::endl; - assert(storage.get_u_cur_size() == NUM_ITERATIONS*4*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)); assert(storage.get_u_rec_size() == NUM_ITERATIONS*4*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U)); assert(storage.get_v_size() == NUM_ITERATIONS*4*2*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V)); - - std::cout << (storage.get_u_cur_size() == NUM_ITERATIONS*4*INPUT_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U) ? "true" : "false") << std::endl; - std::cout << (storage.get_u_rec_size() == NUM_ITERATIONS*4*HIDDEN_SIZE / NUM_TILES_U * (NUM_TILES_U - NUM_ZERO_TILES_U) ? "true" : "false") << std::endl; - std::cout << (storage.get_v_size() == NUM_ITERATIONS*4*2*HIDDEN_SIZE / NUM_TILES_V * (NUM_TILES_V - NUM_ZERO_TILES_V) ? "true" : "false") << std::endl; - - SvdModel2LstmSDSoCV2( - storage.get_fix_x(0), - storage.get_fix_x(1), - storage.get_fix_h(0), - storage.get_fix_h(1), - storage.get_fix_c(0), - storage.get_fix_c(1), - u_cur_uint, - u_rec_uint, - v_uint, - s1_uint, - s2_uint, - storage.get_fix_bias(0), - storage.get_fix_bias(1), - storage.get_fix_z_v(), - storage.get_fix_z_u(), - storage.get_fix_h(0), - storage.get_fix_h(1), - storage.get_fix_c(0), - storage.get_fix_c(1)); - + assert(storage.get_s_size() == NUM_ITERATIONS*8); + + svd::ActivationD** h_prev_hls = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** h_curr_hls = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** c_prev_hls = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** c_curr_hls = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** h_prev_emulator = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** h_curr_emulator = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** c_prev_emulator = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** c_curr_emulator = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** h_prev_sw = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** h_curr_sw = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** c_prev_sw = new svd::ActivationD*[kNumInputs]; + svd::ActivationD** c_curr_sw = new svd::ActivationD*[kNumInputs]; + for (int i = 0; i < kNumInputs; ++i) { + h_prev_emulator[i] = new svd::ActivationD[kLstmOutputSize]; + h_curr_emulator[i] = new svd::ActivationD[kLstmOutputSize]; + c_prev_emulator[i] = new svd::ActivationD[kLstmOutputSize]; + c_curr_emulator[i] = new svd::ActivationD[kLstmOutputSize]; + h_prev_hls[i] = reinterpret_cast(storage.get_fix_h_prev(i)); + h_curr_hls[i] = reinterpret_cast(storage.get_fix_h_curr(i)); + c_prev_hls[i] = reinterpret_cast(storage.get_fix_c_prev(i)); + c_curr_hls[i] = reinterpret_cast(storage.get_fix_c_curr(i)); + } + for (int i = 0; i < NUM_TIMESTEPS; ++i) { + for (int j = 0; j < kNumInputs; ++j) { + std::swap(h_prev_hls[j], h_curr_hls[j]); + std::swap(c_prev_hls[j], c_curr_hls[j]); + } + std::cout << "Starting accelerator." << std::endl; + svd::SvdModel2LstmSDSoCV2(storage.get_fix_x(0), storage.get_fix_x(1), // [s * NUM_TIMESTEPS + t] samples? + h_prev_hls[0], h_prev_hls[1], c_prev_hls[0], c_prev_hls[1], + u_cur_uint, u_rec_uint, v_uint, s1_uint, s2_uint, + storage.get_fix_bias(0), storage.get_fix_bias(1), + storage.get_fix_nz_v(), storage.get_fix_nz_u(), + h_curr_hls[0], h_curr_hls[1], c_curr_hls[0], c_curr_hls[1]); + for (int j = 0; j < kNumInputs; ++j) { + std::cout << "Starting Emulator: " << j << std::endl; + svd::LstmSvdSoftEmulator( + kLstmInputSize, kLstmOutputSize, kRefinementSteps, + kNumTilesU, kNumZeroTilesU, + kNumTilesV, kNumZeroTilesV, 1, storage.get_fix_x(j), + storage.get_cur_gates("i")->get_u()->fix_pruned_data(), + storage.get_cur_gates("i")->get_s(j).fix_pruned_data(), + storage.get_cur_gates("i")->get_v()->fix_pruned_data(), + storage.get_cur_gates("i")->get_u()->get_nz_idx(), + storage.get_cur_gates("i")->get_v()->get_nz_idx(), + storage.get_cur_gates("f")->get_u()->fix_pruned_data(), + storage.get_cur_gates("f")->get_s(j).fix_pruned_data(), + storage.get_cur_gates("f")->get_v()->fix_pruned_data(), + storage.get_cur_gates("f")->get_u()->get_nz_idx(), + storage.get_cur_gates("f")->get_v()->get_nz_idx(), + storage.get_cur_gates("c")->get_u()->fix_pruned_data(), + storage.get_cur_gates("c")->get_s(j).fix_pruned_data(), + storage.get_cur_gates("c")->get_v()->fix_pruned_data(), + storage.get_cur_gates("c")->get_u()->get_nz_idx(), + storage.get_cur_gates("c")->get_v()->get_nz_idx(), + storage.get_cur_gates("o")->get_u()->fix_pruned_data(), + storage.get_cur_gates("o")->get_s(j).fix_pruned_data(), + storage.get_cur_gates("o")->get_v()->fix_pruned_data(), + storage.get_cur_gates("o")->get_u()->get_nz_idx(), + storage.get_cur_gates("o")->get_v()->get_nz_idx(), + storage.get_rec_gates("i")->get_u()->fix_pruned_data(), + storage.get_rec_gates("i")->get_s(j).fix_pruned_data(), + storage.get_rec_gates("i")->get_v()->fix_pruned_data(), + storage.get_rec_gates("i")->get_u()->get_nz_idx(), + storage.get_rec_gates("i")->get_v()->get_nz_idx(), + storage.get_rec_gates("f")->get_u()->fix_pruned_data(), + storage.get_rec_gates("f")->get_s(j).fix_pruned_data(), + storage.get_rec_gates("f")->get_v()->fix_pruned_data(), + storage.get_rec_gates("f")->get_u()->get_nz_idx(), + storage.get_rec_gates("f")->get_v()->get_nz_idx(), + storage.get_rec_gates("c")->get_u()->fix_pruned_data(), + storage.get_rec_gates("c")->get_s(j).fix_pruned_data(), + storage.get_rec_gates("c")->get_v()->fix_pruned_data(), + storage.get_rec_gates("c")->get_u()->get_nz_idx(), + storage.get_rec_gates("c")->get_v()->get_nz_idx(), + storage.get_rec_gates("o")->get_u()->fix_pruned_data(), + storage.get_rec_gates("o")->get_s(j).fix_pruned_data(), + storage.get_rec_gates("o")->get_v()->fix_pruned_data(), + storage.get_rec_gates("o")->get_u()->get_nz_idx(), + storage.get_rec_gates("o")->get_v()->get_nz_idx(), + storage.get_fix_bias(j), + c_prev_emulator[j], h_prev_emulator[j], + c_curr_emulator[j], h_curr_emulator[j]); + std::cout << "Swapping LSTM outputs." << std::endl; + std::swap(h_prev_emulator[j], h_curr_emulator[j]); + std::swap(c_prev_emulator[j], c_curr_emulator[j]); + } + } + const int num_errors = storage.CountMismatches(h_prev_emulator); + std::cout << "Number of mismatches: " << num_errors << std::endl; + if (kTestSoftwareAccelerator) { + for (int j = 0; j < kNumInputs; ++j) { + const bool kVerbose = true; + const bool kUseBlas = false; + const int kUsaFloat = 0; + const int kNumSamples = 1; + std::cout << "Starting BLAS." << std::endl; + svd::SvdModelLstmSoftware(kVerbose, kUseBlas, kUsaFloat, + storage.get_x(j), kNumSamples, NUM_TIMESTEPS, NUM_ITERATIONS, + INPUT_SIZE, HIDDEN_SIZE, + storage.get_cur_gates("i")->get_u()->data(), + storage.get_cur_gates("i")->get_s(j).data(), + storage.get_cur_gates("i")->get_v()->data(), + storage.get_cur_gates("f")->get_u()->data(), + storage.get_cur_gates("f")->get_s(j).data(), + storage.get_cur_gates("f")->get_v()->data(), + storage.get_cur_gates("c")->get_u()->data(), + storage.get_cur_gates("c")->get_s(j).data(), + storage.get_cur_gates("c")->get_v()->data(), + storage.get_cur_gates("o")->get_u()->data(), + storage.get_cur_gates("o")->get_s(j).data(), + storage.get_cur_gates("o")->get_v()->data(), + storage.get_rec_gates("i")->get_u()->data(), + storage.get_rec_gates("i")->get_s(j).data(), + storage.get_rec_gates("i")->get_v()->data(), + storage.get_rec_gates("f")->get_u()->data(), + storage.get_rec_gates("f")->get_s(j).data(), + storage.get_rec_gates("f")->get_v()->data(), + storage.get_rec_gates("c")->get_u()->data(), + storage.get_rec_gates("c")->get_s(j).data(), + storage.get_rec_gates("c")->get_v()->data(), + storage.get_rec_gates("o")->get_u()->data(), + storage.get_rec_gates("o")->get_s(j).data(), + storage.get_rec_gates("o")->get_v()->data(), + &storage.get_bias(j)[0 * storage.get_lstm_output_size()], + &storage.get_bias(j)[1 * storage.get_lstm_output_size()], + &storage.get_bias(j)[2 * storage.get_lstm_output_size()], + &storage.get_bias(j)[3 * storage.get_lstm_output_size()], + storage.get_h(j)); + } + } + storage.ResetLstmOutputs(); std::cout << "Cleaning up." << std::endl; + delete[] h_prev_hls; + delete[] h_curr_hls; + delete[] c_prev_hls; + delete[] c_curr_hls; + delete[] h_prev_sw; + delete[] h_curr_sw; + delete[] c_prev_sw; + delete[] c_curr_sw; + for (int i = 0; i < kNumInputs; ++i) { + delete[] h_prev_emulator[i]; + delete[] h_curr_emulator[i]; + delete[] c_prev_emulator[i]; + delete[] c_curr_emulator[i]; + } + delete[] h_prev_emulator; + delete[] h_curr_emulator; + delete[] c_prev_emulator; + delete[] c_curr_emulator; return 0; } \ No newline at end of file diff --git a/src/svd_ip.cpp b/src/svd_ip.cpp index 7b9fec5..1150bbb 100644 --- a/src/svd_ip.cpp +++ b/src/svd_ip.cpp @@ -1,12 +1,16 @@ #include "svd_ip.h" +namespace svd { + void SvdIp2Inputs( const typename svd_params::ActivationD x_port[svd_params::N][svd_params::I], - const typename svd_params::UPortD u_port[svd_params::PrunedSizeU], + const typename svd_params::UPortD u_port[svd_params::R * svd_params::PrunedSizeU], const typename svd_params::SPortD s_port[svd_params::N][svd_params::R], - const typename svd_params::VPortD v_port[svd_params::PrunedSizeV], - const ap_uint nz_u_port[svd_params::N], - const ap_uint nz_v_port[svd_params::N], + const typename svd_params::VPortD v_port[svd_params::R * svd_params::PrunedSizeV], + const ap_uint nz_u_port[svd_params::G * svd_params::R], + const ap_uint nz_v_port[svd_params::G * svd_params::R], typename svd_params::ActivationD y_port[svd_params::N][svd_params::G][svd_params::H]) { - SvdIP(x_port, u_port, s_port, v_port, nz_u_port, nz_v_port, y_port); -} \ No newline at end of file + svd::SvdIP(x_port, u_port, s_port, v_port, nz_u_port, nz_v_port, y_port); +} + +} // svd \ No newline at end of file diff --git a/src/testbenches/CMakeLists.txt b/src/testbenches/CMakeLists.txt index 0ee8d0f..74ef6ab 100644 --- a/src/testbenches/CMakeLists.txt +++ b/src/testbenches/CMakeLists.txt @@ -1,51 +1,50 @@ cmake_minimum_required(VERSION 3.10) -# add_executable(OPENCV_TEST ${CMAKE_SOURCE_DIR}/src/tb/test_hdmi.cpp) -# target_include_directories(OPENCV_TEST PUBLIC ${CMAKE_SOURCE_DIR}/include) -# target_include_directories(OPENCV_TEST PUBLIC ${VIVADO_INCLUDE_DIRS}) -# target_include_directories(OPENCV_TEST PUBLIC ${OpenCv_INCLUDE_DIRS}) -# target_link_libraries(OPENCV_TEST ${OpenCv_LIBS}) - -# add_executable(TEST_AXIS_LIB ${CMAKE_SOURCE_DIR}/src/tb/test_axis_lib.cpp) -# target_include_directories(TEST_AXIS_LIB PUBLIC ${CMAKE_SOURCE_DIR}/include) -# target_include_directories(TEST_AXIS_LIB PUBLIC ${VIVADO_INCLUDE_DIRS}) -# target_include_directories(TEST_AXIS_LIB PUBLIC ${OpenCv_INCLUDE_DIRS}) -# target_link_libraries(TEST_AXIS_LIB ${OpenCv_LIBS}) -# target_link_libraries(TEST_AXIS_LIB AXIS_LIB) - -# add_executable(TEST_CONV_LAYER ${CMAKE_SOURCE_DIR}/src/tb/test_conv_layer.cpp) -# target_include_directories(TEST_CONV_LAYER PUBLIC ${CMAKE_SOURCE_DIR}/include) -# target_include_directories(TEST_CONV_LAYER PUBLIC ${VIVADO_INCLUDE_DIRS}) -# target_link_libraries(TEST_CONV_LAYER CONV_LAYER) - -# add_executable(TEST_DENSE_LAYER ${CMAKE_SOURCE_DIR}/src/tb/test_dense_layer.cpp) -# target_include_directories(TEST_DENSE_LAYER PUBLIC ${CMAKE_SOURCE_DIR}/include) -# target_include_directories(TEST_DENSE_LAYER PUBLIC ${VIVADO_INCLUDE_DIRS}) -# target_link_libraries(TEST_DENSE_LAYER CONV_LAYER) - -# add_executable(TEST_DQNET ${CMAKE_SOURCE_DIR}/src/tb/test_dqnet.cpp) -# target_include_directories(TEST_DQNET PUBLIC ${CMAKE_SOURCE_DIR}/include) -# target_include_directories(TEST_DQNET PUBLIC ${VIVADO_INCLUDE_DIRS}) -# target_link_libraries(TEST_DQNET DQNET) - -# add_executable(TEST_GAME ${CMAKE_SOURCE_DIR}/src/tb/test_game.cpp) -# target_include_directories(TEST_GAME PUBLIC ${CMAKE_SOURCE_DIR}/include) -# target_include_directories(TEST_GAME PUBLIC ${VIVADO_INCLUDE_DIRS}) -# target_include_directories(TEST_GAME PUBLIC ${OpenCv_INCLUDE_DIRS}) -# target_link_libraries(TEST_GAME ${OpenCv_LIBS}) -# target_link_libraries(TEST_GAME GAME) - -# add_executable(TEST_PONG ${CMAKE_SOURCE_DIR}/src/tb/test_pong.cpp) -# target_include_directories(TEST_PONG PUBLIC ${CMAKE_SOURCE_DIR}/include) -# target_include_directories(TEST_PONG PUBLIC ${VIVADO_INCLUDE_DIRS}) -# target_include_directories(TEST_PONG PUBLIC ${OpenCv_INCLUDE_DIRS}) -# target_link_libraries(TEST_PONG ${OpenCv_LIBS}) -# target_link_libraries(TEST_PONG PONG) - -# # add_test(NAME TestOpenCv COMMAND OPENCV_TEST) -# # add_test(NAME TestAxisLib COMMAND TEST_AXIS_LIB) -# # add_test(NAME TestConvLayer COMMAND TEST_CONV_LAYER) -# add_test(NAME TestDenseLayer COMMAND TEST_DENSE_LAYER) -# # add_test(NAME TestDQNet COMMAND TEST_DQNET) -# # add_test(NAME TestGame COMMAND TEST_GAME) -# # add_test(NAME TestPong COMMAND TEST_PONG) \ No newline at end of file +add_executable(TEST_U_KERNEL ${CMAKE_SOURCE_DIR}/src/testbenches/test_u_kernel.cpp) +target_include_directories(TEST_U_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(TEST_U_KERNEL PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(TEST_U_KERNEL PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(TEST_U_KERNEL ${OpenCv_LIBS}) +target_link_libraries(TEST_U_KERNEL U_KERNEL) + +add_executable(TEST_V_KERNEL ${CMAKE_SOURCE_DIR}/src/testbenches/test_v_kernel.cpp) +target_include_directories(TEST_V_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(TEST_V_KERNEL PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(TEST_V_KERNEL PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(TEST_V_KERNEL ${OpenCv_LIBS}) +target_link_libraries(TEST_V_KERNEL V_KERNEL) + +add_executable(TEST_GEMV_KERNEL ${CMAKE_SOURCE_DIR}/src/testbenches/test_gemv_kernel.cpp) +target_include_directories(TEST_GEMV_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(TEST_GEMV_KERNEL PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(TEST_GEMV_KERNEL PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(TEST_GEMV_KERNEL ${OpenCv_LIBS}) +target_link_libraries(TEST_GEMV_KERNEL GEMV_KERNEL) + +add_executable(TEST_DENSE_SVD ${CMAKE_SOURCE_DIR}/src/testbenches/test_dense_svd.cpp) +target_include_directories(TEST_DENSE_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(TEST_DENSE_SVD PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(TEST_DENSE_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(TEST_DENSE_SVD ${OpenCv_LIBS}) +target_link_libraries(TEST_DENSE_SVD DENSE_SVD) + +add_executable(TEST_LSTM_SVD ${CMAKE_SOURCE_DIR}/src/testbenches/test_lstm_svd.cpp) +target_include_directories(TEST_LSTM_SVD PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(TEST_LSTM_SVD PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(TEST_LSTM_SVD PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(TEST_LSTM_SVD ${OpenCv_LIBS}) +target_link_libraries(TEST_LSTM_SVD LSTM_SVD) + +add_executable(TEST_SVD_KERNEL ${CMAKE_SOURCE_DIR}/src/testbenches/test_svd_kernel.cpp) +target_include_directories(TEST_SVD_KERNEL PUBLIC ${CMAKE_SOURCE_DIR}/include) +target_include_directories(TEST_SVD_KERNEL PUBLIC ${HLS_INCLUDE_DIRS}) +target_include_directories(TEST_SVD_KERNEL PUBLIC ${OpenCv_INCLUDE_DIRS}) +target_link_libraries(TEST_SVD_KERNEL ${OpenCv_LIBS}) +target_link_libraries(TEST_SVD_KERNEL SVD_KERNEL) + +add_test(NAME TestU_Kernel COMMAND TEST_U_KERNEL) +add_test(NAME TestV_Kernel COMMAND TEST_V_KERNEL) +add_test(NAME TestGemvKernel COMMAND TEST_GEMV_KERNEL) +add_test(NAME TestDenseSvd COMMAND TEST_DENSE_SVD) +add_test(NAME TestLstmSvd COMMAND TEST_LSTM_SVD) +add_test(NAME TestSvdKernel COMMAND TEST_SVD_KERNEL) \ No newline at end of file diff --git a/src/testbenches/test_dense_svd.cpp b/src/testbenches/test_dense_svd.cpp new file mode 100644 index 0000000..851850c --- /dev/null +++ b/src/testbenches/test_dense_svd.cpp @@ -0,0 +1,73 @@ +#include "testbenches/test_dense_svd.h" +#include "dma/axis_lib.h" + +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif +#include "ap_int.h" +#include "hls_stream.h" +#include +#include + +int main(int argc, char const *argv[]) { +#ifndef __VITIS_HLS__ + return 0; +#else + std::cout << "[INFO] Starting HlsDenseSvd test." << std::endl; + typedef typename svd::dense_params::ActivationD ActivationType; + const int kG = svd::dense_params::G; + int num_active_inputs = svd::dense_params::N; + int input_size = 16; + int output_size = 16; + int max_R = 1; + int num_tests = 2; + auto get_arg = [&](const int i, const int max_val, int& arg) { + if (argc >= i) { + arg = atoi(argv[i -1]); + arg = (arg > max_val) ? max_val : arg; + } + }; + get_arg(2, svd::dense_params::N, num_active_inputs); + get_arg(3, 512, max_R); + get_arg(4, svd::dense_params::I, input_size); + get_arg(5, svd::dense_params::H, output_size); + get_arg(6, 32, num_tests); + int num_refinements[svd::dense_params::N]; + ActivationType* x = new ActivationType[num_active_inputs * input_size]; + ActivationType* u = new ActivationType[max_R * input_size * kG]; + ActivationType* s = new ActivationType[max_R * num_active_inputs * kG]; + ActivationType* v = new ActivationType[max_R * output_size * kG]; + ActivationType* bias = new ActivationType[num_active_inputs * kG * output_size]; + ActivationType* y = new ActivationType[num_active_inputs * kG * output_size]; + auto init_random = [&](const int size, ActivationType* x) { + for (int i = 0; i < size; ++i) { + if (std::is_same::value) { + x[i] = ActivationType(rand()); + } else { + x[i] = ActivationType(rand() * 0.00001); + } + } + }; + for (int i = 0; i < svd::dense_params::N; ++i) { + num_refinements[i] = max_R; + } + init_random(num_active_inputs * input_size, x); + init_random(max_R * input_size * kG, u); + init_random(max_R * num_active_inputs * kG, s); + init_random(max_R * output_size * kG, v); + init_random(num_active_inputs * kG * output_size, bias); + std::cout << "[INFO] Calling accelerator." << std::endl; + for (int i = 0; i < num_tests; ++i) { + HlsWrapperDenseSvd(num_active_inputs, input_size, output_size, + num_refinements, x, u, s, v, bias, y); + } + delete[] x; + delete[] u; + delete[] s; + delete[] v; + delete[] bias; + delete[] y; + std::cout << "[INFO] Exiting." << std::endl; + return 0; +#endif // end __VITIS_HLS__ +} \ No newline at end of file diff --git a/src/testbenches/test_gemv_kernel.cpp b/src/testbenches/test_gemv_kernel.cpp new file mode 100644 index 0000000..5a8787e --- /dev/null +++ b/src/testbenches/test_gemv_kernel.cpp @@ -0,0 +1,76 @@ +#include "kernel/gemv_kernel.h" + +#include "hls_stream.h" +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif + +int main(int argc, char const *argv[]) { +#ifndef __VITIS_HLS__ + return 0; +#else + typedef hls::vector VectType; + testgemv::DataType x[testgemv::I]; + testgemv::DataType w[testgemv::I][testgemv::R]; + + testgemv::DataType y[testgemv::R] = {0}; + + hls::stream x_port[testgemv::N]; + hls::stream w_port[testgemv::N]; + hls::stream y_port[testgemv::N]; + for (int i = 0; i < testgemv::I; ++i) { + + x[i] = testgemv::DataType(rand() * 0.0001); + for (int j = 0; j < testgemv::R; ++j) { + w[i][j] = testgemv::DataType(rand() * 0.0001); + } + } + + for (int i = 0; i < testgemv::R; ++i) { + for (int j = 0; j < testgemv::I / testgemv::T; ++j) { + VectType tmp; + for (int k = 0; k < testgemv::T; ++k) { + tmp[k] = w[j * testgemv::T + k][i]; + } + for (int ii = 0; ii < testgemv::N; ++ii) { + w_port[ii] << tmp; + } + } + } + + for (int i = 0; i < testgemv::R; ++i) { + for (int j = 0; j < testgemv::I / testgemv::T; ++j) { + VectType tmp; + for (int k = 0; k < testgemv::T; ++k) { + tmp[k] = x[j * testgemv::T + k]; + } + for (int ii = 0; ii < testgemv::N; ++ii) { + x_port[ii] << tmp; + } + } + } + + HlsGemvKernel(testgemv::I, testgemv::R, x_port[0], x_port[1], w_port[0], w_port[1], y_port[0], y_port[1]); + for (int i = 0; i < testgemv::R; ++i) { + y[i] = 0; + for (int j = 0; j < testgemv::I; ++j) { + y[i] += x[j] * w[j][i]; + } + } + + std::cout << "Checking results." << std::endl; + int num_errors = 0; + for (int i = 0; i < testgemv::R; ++i) { + for (int j = 0; j < testgemv::N; ++j) { + auto y_test = y_port[j].read(); + if (y[i] - y_test > testgemv::DataType(0.001)) { + std::cout << i << ") test/gold: " << y_test << " / " + << y[i] << std::endl; + ++num_errors; + } + } + } + std::cout << "[INFO] Number of mismatches: " << num_errors << std::endl; + return 0; // num_errors; +#endif +} \ No newline at end of file diff --git a/src/testbenches/test_lstm_svd.cpp b/src/testbenches/test_lstm_svd.cpp new file mode 100644 index 0000000..7b1bdd0 --- /dev/null +++ b/src/testbenches/test_lstm_svd.cpp @@ -0,0 +1,90 @@ +#include "testbenches/test_lstm_svd.h" + +#include "dma/axis_lib.h" + +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif +#include "ap_int.h" +#include "hls_stream.h" +#include +#include + +int main(int argc, char const *argv[]) { +#ifndef __VITIS_HLS__ + return 0; +#else + std::cout << "[INFO] Starting HlsDenseSvd test." << std::endl; + typedef typename svd::lstm_params::ActivationD ActivationType; + const int kG = svd::lstm_params::G; + int num_active_inputs = svd::lstm_params::N; + int input_size = 16; + int output_size = 16; + int max_R = 1; + int num_tests = 2; + auto get_arg = [&](const int i, const int max_val, int& arg) { + if (argc >= i) { + arg = atoi(argv[i -1]); + arg = (arg > max_val) ? max_val : arg; + } + }; + get_arg(2, svd::lstm_params::N, num_active_inputs); + get_arg(3, 512, max_R); + get_arg(4, svd::lstm_params::I, input_size); + get_arg(5, svd::lstm_params::H, output_size); + get_arg(6, 32, num_tests); + int num_refinements[svd::lstm_params::N]; + ActivationType* x = new ActivationType[num_active_inputs * input_size]; + ActivationType* h_prev = new ActivationType[num_active_inputs * output_size]; + ActivationType* c_prev = new ActivationType[num_active_inputs * output_size]; + ActivationType* h_curr = new ActivationType[num_active_inputs * output_size]; + ActivationType* c_curr = new ActivationType[num_active_inputs * output_size]; + ActivationType* u_cur = new ActivationType[max_R * input_size * kG]; + ActivationType* s_cur = new ActivationType[max_R * num_active_inputs * kG]; + ActivationType* v_cur = new ActivationType[max_R * output_size * kG]; + ActivationType* u_rec = new ActivationType[max_R * output_size * kG]; + ActivationType* s_rec = new ActivationType[max_R * num_active_inputs * kG]; + ActivationType* v_rec = new ActivationType[max_R * output_size * kG]; + ActivationType* bias = new ActivationType[num_active_inputs * kG * output_size]; + auto init_random = [&](const int size, ActivationType* x) { + for (int i = 0; i < size; ++i) { + if (std::is_same::value) { + x[i] = ActivationType(rand()); + } else { + x[i] = ActivationType(rand() * 0.00001); + } + } + }; + for (int i = 0; i < svd::lstm_params::N; ++i) { + num_refinements[i] = max_R; + } + init_random(num_active_inputs * input_size, x); + init_random(max_R * input_size * kG, u_cur); + init_random(max_R * num_active_inputs * kG, s_cur); + init_random(max_R * output_size * kG, v_cur); + init_random(max_R * output_size * kG, u_rec); + init_random(max_R * num_active_inputs * kG, s_rec); + init_random(max_R * output_size * kG, v_rec); + init_random(num_active_inputs * kG * output_size, bias); + std::cout << "[INFO] Calling accelerator." << std::endl; + for (int i = 0; i < num_tests; ++i) { + HlsWrapperLstmSvd(num_active_inputs, input_size, output_size, + num_refinements, x, u_cur, s_cur, v_cur, h_prev, u_rec, s_rec, v_rec, + bias, c_prev, h_curr, c_curr); + } + delete[] x; + delete[] h_prev; + delete[] c_prev; + delete[] h_curr; + delete[] c_curr; + delete[] u_cur; + delete[] s_cur; + delete[] v_cur; + delete[] u_rec; + delete[] s_rec; + delete[] v_rec; + delete[] bias; + std::cout << "[INFO] Exiting." << std::endl; + return 0; +#endif // end __VITIS_HLS__ +} \ No newline at end of file diff --git a/src/testbenches/test_svd_kernel.cpp b/src/testbenches/test_svd_kernel.cpp new file mode 100644 index 0000000..20e4f09 --- /dev/null +++ b/src/testbenches/test_svd_kernel.cpp @@ -0,0 +1,85 @@ +#include "testbenches/test_svd_kernel.h" +#include "dma/axis_lib.h" + +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif +#include "ap_int.h" +#include "hls_stream.h" +#include +#include + +int main(int argc, char const *argv[]) { +#ifndef __VITIS_HLS__ + return 0; +#else + std::cout << "[INFO] Starting HlsSvdKernel test." << std::endl; + typedef typename svd::svd_params::ActivationD ActivationType; + const int kG = svd::svd_params::G; + int num_active_inputs = svd::svd_params::N; + int input_size = 16; + int output_size = 16; + int max_R = 1; + int num_tests = 2; + auto get_arg = [&](const int i, const int max_val, int& arg) { + if (argc >= i) { + arg = atoi(argv[i -1]); + arg = (arg > max_val) ? max_val : arg; + } + }; + get_arg(2, svd::svd_params::N, num_active_inputs); + get_arg(3, 512, max_R); + get_arg(4, svd::svd_params::I, input_size); + get_arg(5, svd::svd_params::H, output_size); + get_arg(6, 32, num_tests); + int num_refinements[svd::svd_params::N]; + ActivationType* x = new ActivationType[num_active_inputs * input_size]; + ActivationType* u = new ActivationType[max_R * input_size * kG]; + ActivationType* s = new ActivationType[max_R * num_active_inputs * kG]; + ActivationType* v = new ActivationType[max_R * output_size * kG]; + ActivationType* y = new ActivationType[num_active_inputs * kG * output_size]; + hls::stream x_port("x_port"); + hls::stream u_port("u_port"); + hls::stream s_port("s_port"); + hls::stream v_port("v_port"); + hls::stream y_port("y_port"); + auto init_random = [&](const int size, ActivationType* x) { + for (int i = 0; i < size; ++i) { + if (std::is_same::value) { + x[i] = ActivationType(rand()); + } else { + x[i] = ActivationType(rand() * 0.00001); + } + } + }; + auto init_zero = [&](const int size, ActivationType* x) { + for (int i = 0; i < size; ++i) { + x[i] = ActivationType(0); + } + }; + for (int i = 0; i < svd::svd_params::N; ++i) { + num_refinements[i] = max_R; + } + init_random(num_active_inputs * input_size, x); + init_random(num_active_inputs * kG * output_size, y); + init_random(max_R * input_size * kG, u); + init_random(max_R * num_active_inputs * kG, s); + init_random(max_R * output_size * kG, v); + std::cout << "[INFO] Calling accelerator." << std::endl; + for (int i = 0; i < num_tests; ++i) { + svd::SetSvdKernelInputs(num_active_inputs, input_size, + output_size, num_refinements, x, u, s, v, x_port, u_port, s_port, v_port); + HlsSvdKernel(num_active_inputs, input_size, output_size, num_refinements, + x_port, u_port, s_port, v_port, y_port); + svd::GetSvdKernelOutputs(num_active_inputs, output_size, + y_port, y); + } + delete[] x; + delete[] u; + delete[] s; + delete[] v; + delete[] y; + std::cout << "[INFO] Exiting." << std::endl; + return 0; +#endif // end __VITIS_HLS__ +} \ No newline at end of file diff --git a/src/testbenches/test_u_kernel.cpp b/src/testbenches/test_u_kernel.cpp new file mode 100644 index 0000000..0c89d23 --- /dev/null +++ b/src/testbenches/test_u_kernel.cpp @@ -0,0 +1,213 @@ +#include "testbenches/test_u_kernel.h" +#include "dma/axis_lib.h" + +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif +#include "ap_int.h" +#include "hls_stream.h" +#include +#include + +int main(int argc, char const *argv[]) { +#ifdef COSIM_DESIGN + srand(1); +#else + srand(time(NULL)); +#endif + std::cout << "[INFO] Starting HlsKernelU test." << std::endl; +#ifndef __VITIS_HLS__ + return 0; +#else + const int num_refinements = testu::params::R; + hls::vector num_refinements_vect = hls::vector(num_refinements); + for (int i = testu::params::N; i >= 0; --i) { + int R_tmp = testu::params::R - 2 * (testu::params::N - i - 1); + num_refinements_vect[i] = R_tmp > 0 ? R_tmp : 1; + } + const int kNumActiveInputs = testu::params::N - 2; + const int kInputSize_tmp = testu::params::I / 1; + const int kInputSize = (kInputSize_tmp > testu::params::I) ? testu::params::I : kInputSize_tmp; + const int kNumTilesU = kInputSize / testu::params::Tu; + typedef typename testu::params::ActivationD ActivationType; + typedef hls::vector VectN_Type; + typedef hls::vector VectG_Type; + typedef hls::vector VectTuAct_Type; + assert(testu::params::I == testu::params::PrunedSizeU); + + ActivationType x[testu::params::N][testu::params::I]; + ActivationType u[num_refinements][testu::params::PrunedSizeU][testu::params::G]; + ActivationType xu[num_refinements][testu::params::N][testu::params::G]; + + hls::stream x_port; //[testu::params::N * kNumTilesU]; + hls::stream u_port; //[num_refinements * kNumTilesU * testu::params::G]; + hls::stream xu_port; //[num_refinements * testu::params::G]; + hls::stream x_axis("x_axis"); + hls::stream u_axis("u_axis"); + hls::stream xu_gn_axis("xu_gn_axis"); + hls::stream xu_n_axis("xu_n_axis"); + hls::stream xu_g_axis("xu_g_axis"); + VectN_Type xu_gold[num_refinements * testu::params::G]; + + auto x_axis_interface = svd::AxiStreamPort(x_axis); + auto u_axis_interface = svd::AxiStreamPort(u_axis); + auto xu_gn_axis_interface = svd::AxiStreamPort(xu_gn_axis); + auto xu_n_axis_interface = svd::AxiStreamPort(xu_n_axis); + auto xu_g_axis_interface = svd::AxiStreamPort(xu_g_axis); + + for (int i = 0; i < testu::params::N; ++i) { + for (int j = 0; j < testu::params::I; ++j) { + x[i][j] = rand(); // * 0.00001; + } + } + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < testu::params::PrunedSizeU; ++j) { + for (int k = 0; k < testu::params::G; ++k) { + u[i][j][k] = rand(); // * 0.00001; + } + } + } + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < testu::params::N; ++j) { + for (int k = 0; k < testu::params::G; ++k) { + xu[i][j][k] = 0; + } + } + } + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < kInputSize; ++j) { + for (int k = 0; k < testu::params::G; ++k) { + for (int ii = 0; ii < testu::params::N; ++ii) { + xu[i][ii][k] += u[i][j][k] * x[ii][j]; + } + } + } + } + + + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < testu::params::N; ++j) { + for (int k = 0; k < testu::params::G; ++k) { + xu_gold[i * testu::params::G + k][j] = xu[i][j][k]; + } + } + } + + const int num_tests = 2; + int num_errors = 0; + + for (int t = 0; t < num_tests; ++t) { + +// #define TEST_OLD_KERNEL_U +#ifdef TEST_OLD_KERNEL_U + for (int i = 0; i < testu::params::N; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { + VectTuAct_Type x_val; + for (int k = 0; k < testu::params::Tu; ++k) { + x_val[k] = x[i][j * testu::params::Tu + k]; + } + x_port << x_val; + x_axis_interface.PushVector(x_val); + } + } + for (int i = 0; i < num_refinements; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < testu::params::G; ++k) { + VectTuAct_Type u_val; + for (int ii = 0; ii < testu::params::Tu; ++ii) { + u_val[ii] = u[i][j * testu::params::Tu + ii][k]; + } + u_port << u_val; + u_axis_interface.PushVector(u_val); + } + } + } + + std::cout << "[INFO] Starting HlsVectorKernelU." << std::endl; + HlsVectorKernelU(num_refinements, x_port, u_port, xu_port); + std::cout << "[INFO] Starting HlsAxisKernelU." << std::endl; + HlsAxisKernelU(num_refinements, x_axis, u_axis, xu_gn_axis); + + for (int i = 0; i < num_refinements; ++i) { + auto xu_gn_val = xu_gn_axis_interface.PopVector(); + for (int j = 0; j < testu::params::G; ++j) { + auto tmp = xu_port.read(); + for (int k = 0; k < testu::params::N; ++k) { + std::cout << i << ") test/gold: " << xu_gn_val[j * testu::params::N + k] << " / " + << xu_gold[i * testu::params::G + j][k] << std::endl; + if (xu_gn_val[j * testu::params::N + k] != xu_gold[i * testu::params::G + j][k]) { + ++num_errors; + } + } + } + } + std::cout << "[INFO] Number of mismatches: " << num_errors << std::endl; +#endif + // NOTE: The streaming order differs from before! kNumTilesU is swapped with + // testu::params::N. + for (int j = 0; j < kNumTilesU; ++j) { + for (int i = 0; i < kNumActiveInputs; ++i) { + VectTuAct_Type x_val; + for (int k = 0; k < testu::params::Tu; ++k) { + x_val[k] = x[i][j * testu::params::Tu + k]; + } + x_axis_interface.PushVector(x_val); + } + } + // NOTE: The streaming order differs from before! kNumTilesU is swapped with + // testu::params::G. + for (int i = 0; i < num_refinements_vect[kNumActiveInputs - 1]; ++i) { + for (int j = 0; j < kNumTilesU; ++j) { + for (int k = 0; k < testu::params::G; ++k) { + VectTuAct_Type u_val; + for (int ii = 0; ii < testu::params::Tu; ++ii) { + u_val[ii] = u[i][j * testu::params::Tu + ii][k]; + } + u_axis_interface.PushVector(u_val); + } + } + } + std::cout << "[INFO] Starting HlsKernelU." << std::endl; + + int refinements_tmp[testu::params::N]; + for (int i = 0; i < testu::params::N; ++i) { + refinements_tmp[i] = num_refinements_vect[i]; + } + HlsKernelU(kNumActiveInputs, kInputSize, refinements_tmp, false, x_axis, u_axis, xu_g_axis); + + testu::params::VectG_Type xu_g_val; + int total_cnt = 0; + int last_at = -1; + for (int i = 0; i < num_refinements_vect[kNumActiveInputs - 1]; ++i) { // R_max + for (int j = 0; j < kNumActiveInputs; ++j) { + if (i < num_refinements_vect[j]) { + bool is_last = xu_g_axis_interface.isLastPopVector(xu_g_val); + if (is_last) { + last_at = total_cnt; + std::cout << "[INFO] Last index arrived at iteration: " << last_at << std::endl; + } + ++total_cnt; + // std::cout << "\t[INFO] Reading xu[R." << i << "][N." << j << "]" << std::endl; + for (int k = 0; k < testu::params::G; ++k) { + // VectN_Type xu_gold[num_refinements * testu::params::G]; + std::cout << i << ") test/gold: " << xu_g_val[k] << " / " + << xu[i][j][k] << std::endl; + if (xu_g_val[k] != xu[i][j][k]) { + ++num_errors; + } + } + } + } + } + std::cout << "[INFO] Last index arrived at iteration: " << last_at << std::endl; + std::cout << "[INFO] Total iterations: " << total_cnt << std::endl; + std::cout << "[INFO] Number of mismatches: " << num_errors << std::endl; + + while(!xu_n_axis.empty()) { + auto xu_n_val = xu_n_axis_interface.PopVector(); + } + } + std::cout << "[INFO] Number of mismatches: " << num_errors << std::endl; + return 0; // num_errors; +#endif +} \ No newline at end of file diff --git a/src/testbenches/test_v_kernel.cpp b/src/testbenches/test_v_kernel.cpp new file mode 100644 index 0000000..35981ea --- /dev/null +++ b/src/testbenches/test_v_kernel.cpp @@ -0,0 +1,167 @@ +#include "testbenches/test_v_kernel.h" +#include "dma/axis_lib.h" + +#ifdef __VITIS_HLS__ +#include "hls_vector.h" +#endif +#include "ap_int.h" +#include "hls_stream.h" +#include +#include + +int main(int argc, char const *argv[]) { +#ifdef COSIM_DESIGN + srand(1); +#else + srand(1); + // srand(time(NULL)); +#endif + std::cout << "[INFO] Starting HlsKernelV test." << std::endl; +#ifndef __VITIS_HLS__ + return 0; +#else + int num_active_inputs = testv::params::N; + int output_size = testv::params::H; + int num_refinements = testv::params::R; + if (argc >= 2) { + num_active_inputs = atoi(argv[1]); + } + if (argc >= 3) { + output_size = atoi(argv[2]); + } + if (argc >= 4) { + num_refinements = atoi(argv[3]); + } + const int kMaxRefinements = num_refinements; + typedef hls::vector VectN; + VectN num_refinements_vect = VectN(kMaxRefinements); + const int kNumTests = 2; + const int kNumActiveInputs = (num_active_inputs > testv::params::N) ? testv::params::N : num_active_inputs; + const int kOutputSize = (output_size > testv::params::H) ? testv::params::H : output_size; + const int kNumTilesV = kOutputSize / testv::params::Tv; + for (int i = kNumActiveInputs-1; i >= 0; --i) { + // num_refinements_vect[i] = kMaxRefinements; + int R_tmp = kMaxRefinements - 2 * (kNumActiveInputs - i - 1); + num_refinements_vect[i] = R_tmp > 0 ? R_tmp : 1; + } + typedef typename testv::params::ActivationD ActivationType; + assert(testv::params::H == testv::params::PrunedSizeV); // No pruning. + + ActivationType xus[kMaxRefinements][testv::params::N][testv::params::G] = {ActivationType(0.001)}; + ActivationType v[kMaxRefinements][testv::params::PrunedSizeV][testv::params::G] = {ActivationType(0.001)}; + ActivationType y_gold[testv::params::N][testv::params::G][testv::params::H] = {0}; + + for (int i = 0; i < kMaxRefinements; ++i) { + for (int j = 0; j < testv::params::G; ++j) { + for (int k = 0; k < testv::params::N; ++k) { + if (std::is_same::value) { + xus[i][k][j] = ActivationType(rand()); + } else { + xus[i][k][j] = ActivationType(rand() * 0.00001); + } + } + for (int k = 0; k < testv::params::PrunedSizeV; ++k) { + if (std::is_same::value) { + v[i][k][j] = ActivationType(rand()); + } else { + v[i][k][j] = ActivationType(rand() * 0.00001); + } + } + } + } + + for (int i = 0; i < kMaxRefinements; ++i) { + for (int j = 0; j < kNumActiveInputs; ++j) { + if (i < num_refinements_vect[j]) { + for (int k = 0; k < kOutputSize; ++k) { + for (int ii = 0; ii < testv::params::G; ++ii) { + y_gold[j][ii][k] += v[i][k][ii] * xus[i][j][ii]; + } + } + } + } + } + + hls::stream xus_port("xus_port"); + hls::stream v_port("v_port"); + hls::stream y_port("y_port"); + + auto xus_axis = svd::AxiStreamPort(xus_port); + auto v_axis = svd::AxiStreamPort(v_port); + auto y_axis = svd::AxiStreamPort(y_port); + + int num_errors = 0; + std::cout << "[INFO] Pushing into FIFOs." << std::endl; + for (int t = 0; t < kNumTests; ++t) { + std::cout << "[INFO] Pushing into XUS." << std::endl; + typename testv::params::VectG_Type xus_val; + for (int i = 0; i < kMaxRefinements; ++i) { + for (int j = 0; j < kNumActiveInputs; ++j) { + if (i < num_refinements_vect[j]) { + for (int k = 0; k < testv::params::G; ++k) { + xus_val[k] = xus[i][j][k]; + } + xus_axis.PushVector(xus_val); + } + } + } + std::cout << "[INFO] Pushing into V." << std::endl; + typename testv::params::VectTvType v_val; + for (int i = 0; i < kMaxRefinements; ++i) { + for (int k = 0; k < kNumTilesV; ++k) { + for (int j = 0; j < testv::params::G; ++j) { + for (int ii = 0; ii < testv::params::Tv; ++ii) { + v_val[ii] = v[i][k * testv::params::Tv + ii][j]; + } + v_axis.PushVector(v_val); + } + } + } + } + std::cout << "[INFO] Starting HlsKernelV." << std::endl; + std::cout << "[INFO] v_port.size(): " << v_port.size() << std::endl; + for (int t = 0; t < kNumTests; ++t) { + int tmp[testv::params::N]; + for (int i = 0; i < testv::params::N; ++i) { + tmp[i] = num_refinements_vect[i]; + } + HlsKernelV(kNumActiveInputs, kOutputSize, tmp, xus_port, v_port, y_port); + std::cout << "[INFO] v_port.size(): " << v_port.size() << std::endl; + } + int num_elems = 0; + for (int t = 0; t < kNumTests; ++t) { + std::cout << "[INFO] Checking results test n." << t << std::endl; + int test_errors = 0; + num_elems = 0; + for (int j = 0; j < kNumTilesV; ++j) { + for (int i = 0; i < kNumActiveInputs; ++i) { + const int kGTv = testv::params::G * testv::params::Tv; + auto y_val = y_axis.PopVector(); + for (int k = 0; k < testv::params::Tv; ++k) { + for (int ii = 0; ii < testv::params::G; ++ii) { + if (y_val[k * testv::params::G + ii] != y_gold[i][ii][j * testv::params::Tv + k]) { + std::cout << "N:" << i << "][NTv:" << j << "][Tv:" << k << "][G:" + << ii << "] test/gold: " + << y_val[k * testv::params::G + ii] << " / " + << y_gold[i][ii][j * testv::params::Tv + k] << std::endl; + ++test_errors; + } else { + // std::cout << "\tN:" << i << "][NTv:" << j << "][Tv:" << k << "][G:" + // << ii << "] test/gold: " + // << y_val[k * testv::params::G + ii] << " / " + // << y_gold[i][ii][j * testv::params::Tv + k] << std::endl; + } + ++num_elems; + } + } + } + } + std::cout << "[INFO] Number of mismatches per test / total: " << test_errors + << " / " << num_elems << std::endl; + num_errors += test_errors; + } + std::cout << "[INFO] Total number of mismatches / total: " << num_errors + << " / " << num_elems * kNumTests << std::endl; + return 0; // num_errors; +#endif // end __VITIS_HLS__ +} diff --git a/tcl/lstm_params.tcl b/tcl/lstm_params.tcl new file mode 100644 index 0000000..c56e150 --- /dev/null +++ b/tcl/lstm_params.tcl @@ -0,0 +1,28 @@ +proc append_lstm_params {&defines} { + dict set params NUM_GATES 4 + dict set params NUM_INPUTS 2 + dict set params NUM_SAMPLES 2 + dict set params INPUT_SIZE 256 + dict set params HIDDEN_SIZE 128 + dict set params NUM_ITERATIONS 32 + dict set params NUM_TILES_U 4 + dict set params NUM_ZERO_TILES_U 2 + dict set params NUM_TILES_V 16 + dict set params NUM_ZERO_TILES_V 4 + dict set params NUM_TIMESTEPS 28 + dict set params FIX_WIDTH 16 + dict set params FIX_FRACT_WIDTH 6 + + set tmp {} + append tmp " " + foreach key [dict keys $params] { + set value [dict get $params $key] + append tmp "-D${key}=${value} " + } + puts "================================================================" + puts "\[INFO\] LSTM parameters:" + puts $tmp + puts "================================================================" + upvar 1 ${&defines} defines ;# To have a "pass by reference" argument. + append defines $tmp +} \ No newline at end of file diff --git a/tcl/utils.tcl b/tcl/utils.tcl new file mode 100644 index 0000000..3793db7 --- /dev/null +++ b/tcl/utils.tcl @@ -0,0 +1,63 @@ +# +# @brief Find all files in a directory and return them in a list. +# +# @param basedir The directory to start looking in pattern. +# @param pattern A pattern, as defined by the glob command, that +# the files must match. +# @param exclude_dirs_list Ignore searching in specified directories +# +# @return The list of found files. +# +proc findFiles { basedir pattern exclude_dirs_list } { + # Fix the directory name, this ensures the directory name is in the + # native format for the platform and contains a final directory seperator + set basedir [string trimright [file join [file normalize $basedir] { }]] + set fileList {} + # Look in the current directory for matching files, -type {f r} + # means ony readable normal files are looked at, -nocomplain stops + # an error being thrown if the returned list is empty + foreach fileName [glob -nocomplain -type {f r} -path $basedir $pattern] { + lappend fileList $fileName + } + # Now look for any sub direcories in the current directory + foreach dirName [glob -nocomplain -type {d r} -path $basedir *] { + # Recusively call the routine on the sub directory and append any + # new files to the results + if {[lsearch -exact ${exclude_dirs_list} $dirName] == -1} { + set subDirList [findFiles $dirName $pattern $exclude_dirs_list] + if { [llength $subDirList] > 0 } { + foreach subDirFile $subDirList { + lappend fileList $subDirFile + } + } + } + } + return $fileList +} + +# +# @brief Greps a file content and writes matches to a file. +# +# @param re Regular expression +# @param lines Number of lines to report/include after the found match +# @param fin The fin pointer +# @param fout The fout pointer +# +proc grep {re lines fin fout} { + set cnt 0 + set match false + seek $fin 0 + while {[gets $fin line] >= 0} { + if [regexp -- $re $line] { + set cnt 0 + set match true + } + if {$match && ($cnt < $lines)} { + puts $line + puts $fout $line + set cnt [expr {$cnt +1}] + } else { + set match false + } + } +} \ No newline at end of file diff --git a/token b/token new file mode 100644 index 0000000..5cf751e --- /dev/null +++ b/token @@ -0,0 +1,5 @@ +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 + +git remote remove origin +git remote add origin https://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@github.com/ribesstefano/hls_svd.git +git pull https://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@github.com/ribesstefano/hls_svd.git \ No newline at end of file