From 57c34e78f90483bd4ee80bf0fc9f0b6974b2ad74 Mon Sep 17 00:00:00 2001 From: Sebastian Bach Date: Fri, 29 Mar 2024 15:05:23 +0100 Subject: [PATCH] Update 5 (#5) - split README.md - extended script converter to transcompiler with optimization --- README.md | 152 +------ scripts/test_transcompiler.sh | 59 +++ src/CMakeLists.txt | 4 +- src/asm/build_and_run.sh | 8 +- src/asm/linux_x86_64/tool.s | 1 - src/lib_resources/BUILD.txt | 4 +- src/lib_resources/CMakeLists.txt | 7 + src/script_converter/CMakeLists.txt | 57 --- src/script_converter/converter.cpp | 231 ----------- src/script_converter/converter.h | 15 - src/script_converter/main.cpp | 13 - src/script_lib/engine.cpp | 8 +- src/script_resources/script_test.txt | 45 ++ src/script_resources/work.txt | 16 + src/script_transcompiler/CMakeLists.txt | 44 ++ src/script_transcompiler/backend.cpp | 21 + src/script_transcompiler/backend.h | 9 + src/script_transcompiler/backend_cpp.cpp | 152 +++++++ .../backend_intermediate.cpp | 57 +++ .../backend_linux_x86_64.cpp | 390 ++++++++++++++++++ src/script_transcompiler/backend_python.cpp | 80 ++++ src/script_transcompiler/data.h | 103 +++++ src/script_transcompiler/frontend.cpp | 149 +++++++ src/script_transcompiler/frontend.h | 12 + src/script_transcompiler/main.cpp | 42 ++ src/script_transcompiler/optimization.cpp | 308 ++++++++++++++ src/script_transcompiler/optimization.h | 14 + src/script_transcompiler/transcompiler.cpp | 82 ++++ src/script_transcompiler/transcompiler.h | 8 + user_guide.md | 161 ++++++++ 30 files changed, 1776 insertions(+), 476 deletions(-) create mode 100755 scripts/test_transcompiler.sh delete mode 100644 src/script_converter/CMakeLists.txt delete mode 100644 src/script_converter/converter.cpp delete mode 100644 src/script_converter/converter.h delete mode 100644 src/script_converter/main.cpp create mode 100644 src/script_resources/script_test.txt create mode 100644 src/script_resources/work.txt create mode 100644 src/script_transcompiler/CMakeLists.txt create mode 100644 src/script_transcompiler/backend.cpp create mode 100644 src/script_transcompiler/backend.h create mode 100644 src/script_transcompiler/backend_cpp.cpp create mode 100644 src/script_transcompiler/backend_intermediate.cpp create mode 100644 src/script_transcompiler/backend_linux_x86_64.cpp create mode 100644 src/script_transcompiler/backend_python.cpp create mode 100644 src/script_transcompiler/data.h create mode 100644 src/script_transcompiler/frontend.cpp create mode 100644 src/script_transcompiler/frontend.h create mode 100644 src/script_transcompiler/main.cpp create mode 100644 src/script_transcompiler/optimization.cpp create mode 100644 src/script_transcompiler/optimization.h create mode 100644 src/script_transcompiler/transcompiler.cpp create mode 100644 src/script_transcompiler/transcompiler.h create mode 100644 user_guide.md diff --git a/README.md b/README.md index 025b8f2..7795929 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Coverage: https://sebastianbach.github.io/full-stack/coverage.html # About -You have a simple, nice, useful C++ function. How do you make it available to users? +You have a simple, nice, useful C++ function. How do you make it available to consumers? # Content @@ -178,152 +178,4 @@ See also ```.github/workflows/build.yml```. # Usage -## Command Line Tool *title_case* - -This command line tool takes the given command line argument, converts the data, and prints the result to ```std::cout```. - -``` -title_case "this is some text" - -# prints -This is Some Text -``` - -## Command Line Tool *title_case_console* - -Interactive command line tool. Enter the text to convert or "exit" to end the program. - -## Command Line Tool *title_case_files* - -The first command line argument is the file to read the data from, the second is the file to save the result to. - -```sh -title_case_file source_file.txt target_file.txt -``` - -## Web App - -Start the ```web.py``` script by providing the location of the resource files and the folder containing the ```title_case``` tool. - -```sh -python web.py C:\web\resources C:\build\product -``` - -Open ```localhost:5000``` for a synchronous web app. Open ```localhost:5000/interactive``` for an asynchronous web app. - -## Container - -Build the *docker* image with: - -```sh -docker build --tag title-case-web . -``` - -The multi-stage build process will build the ```title_case``` tool and copy all necessary files. - - -To start the container, run: - -```sh -docker run --rm -it -p 5000:5000 title-case-web -``` - -Open ```localhost:5000``` for a synchronous web app. Open ```localhost:5000/interactive``` for an asynchronous web app. - - -## WebAssembly - -WebAssembly requires to access the HTML document via a web server. A simple server can be started with Python: - -```sh -python -m http.server -``` - -Open ```http://localhost:8000/``` to start the WebAssembly app. - - -## Scripting Language - -The domain-specific scripting language is a simple language designed to perform basic tasks. The language consists of five commands: - -| Command | Operand (optional) | Description | -| --- | --- | --- | -| ```text``` | *text to load and store in memory* | Stores the given text in the program's memory. | -| ```process``` | - | Processes the text in memory. | -| ```print``` | - | Prints the text in memory to the screen. | -| ```load``` | *path to text file* | Reads the specified text file and stores the text in memory. | -| ```save``` | *path to text file* | Saves the text in memory to the specified text file. | - -An example program is: - -``` -text this is a headline -process -print -``` - -This will print ```This Is a Headline```. - -### Command Line Tool *console* - -The scripting **console** allows to enter and execute code. The console application can be closed by entering ```exit``` or pressing ```CTRL+C```. - -### Command Line Tool *interpreter* - -The **interpreter** loads and executes a script stored in the specified source file. - -```sh -interpreter script.txt -``` - -### Command Line Tool *compiler* & *runtime* - -The **compiler** loads a source file and generates byte-code, that can be executed by the **runtime**. - -```sh -compiler script.txt bytecode.code - -runtime bytecode.code -``` - -### Command Line Tool *converter* - -The **converter** loads a source file and generates equivalent C++ or Python source code. -In Python the generated code uses the ```text_conversion``` module, in C++ the generated code uses the static library. - -```sh -converter script.txt python_script.py py -``` - -The arguments are: - -* Path to the script source file. -* Path to the target file to create. -* The target language, either ```py``` for Python or ```cpp``` for C++. - - -## Java Command Line Tool - -Execute the command line tool (JAR) like this: - -```sh -java -jar text_conversion.jar "this is a headline" -``` - -Make sure the ```libjava_text_conversion``` shared library can be found by Java. Set the command line argument ```java.library.path``` if needed. - -## Jupyter Notebook - -Start the Jupyter Notebook by simply running the ```start_notebook.sh``` script. It will start the notebook server with the notebook selected. - -## Assembly Command Line Tool - -The command line tool written in assembly is used like this: - -```shell -title_case "this is a headline" - -# will print -# Input: this is a headline -# Output: This Is a Headline -``` +See the [user guide](user_guide.md) on how to use the included software. diff --git a/scripts/test_transcompiler.sh b/scripts/test_transcompiler.sh new file mode 100755 index 0000000..8c9c0ac --- /dev/null +++ b/scripts/test_transcompiler.sh @@ -0,0 +1,59 @@ +echo test transcompiler + +cd .. + +cd build +cmake -DCMAKE_BUILD_TYPE=Release -DADD_SCRIPT_TOOLS=ON -DADD_PYTHON_MODULE=ON .. + +cmake --build . -j --config Release +ctest -C Release +cmake --install . + +cd temp +mkdir test_transcompiler +cd test_transcompiler + + +mkdir cpp +cd cpp + +./../../../src/script_transcompiler/transcompiler "../../../../src/script_resources/script_test.txt" "result.cpp" "cpp" + +g++ result.cpp -o transcompiler_result -ltext_conversion -L../../../product/lib/lib -I../../../product/lib/header + +echo run binary: + +./transcompiler_result + + +cd .. +mkdir intermediate +cd intermediate + +./../../../src/script_transcompiler/transcompiler "../../../../src/script_resources/script_test.txt" "result.txt" "i" + + +cd .. +mkdir py +cd py + +./../../../src/script_transcompiler/transcompiler "../../../../src/script_resources/script_test.txt" "result.py" "py" + +echo run python script: + +export PYTHONPATH=$PYTHONPATH:../../../product/python +python3 result.py + +cd .. +mkdir linux_x86_64 +cd linux_x86_64 + +echo build linux_x86_64 + +./../../../src/script_transcompiler/transcompiler "../../../../src/script_resources/script_test.txt" "result.s" "linux_x86_64" + +gcc -o result result.s ../../../product/rust/libtext_conversion_c.a -m64 -lc -nostartfiles + +echo run binary from assembly: + +./result diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3980c3c..489a30e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(lib) add_subdirectory(lib_resources) add_subdirectory(test_lib) add_subdirectory(lib_c) +add_subdirectory(test_lib_c) if(ADD_ASSEMBLY_PROGRAM) add_subdirectory(asm) @@ -41,12 +42,11 @@ if(ADD_SCRIPT_TOOLS) add_subdirectory(script_interpreter) add_subdirectory(script_compiler) add_subdirectory(script_runtime) - add_subdirectory(script_converter) + add_subdirectory(script_transcompiler) add_subdirectory(script_ide) endif() if(ADD_RUST_APP) - add_subdirectory(test_lib_c) add_subdirectory(rust_cmdl) endif() diff --git a/src/asm/build_and_run.sh b/src/asm/build_and_run.sh index 44cc655..95e45bc 100755 --- a/src/asm/build_and_run.sh +++ b/src/asm/build_and_run.sh @@ -65,12 +65,16 @@ if [ "$TARGET" = "Release" ]; then elif [ "$TARGET" = "Debug" ]; then - echo "-> build debug and run gdb" + echo "-> build debug" + echo "" gcc -o $BUILD_FOLDER/$BINARY $SOURCE_FILE $LIB_TITLE_CASE -nostartfiles -no-pie -g -lc -lstdc++ - if [ "$RUN" = "r-> un" ]; then + if [ "$RUN" = "run" ]; then + echo "-> debug $BINARY"; + echo "---------------------------------" gdb --args $BUILD_FOLDER/$BINARY "$TEST_ARG" + echo "---------------------------------" fi else diff --git a/src/asm/linux_x86_64/tool.s b/src/asm/linux_x86_64/tool.s index f6f0ac2..8bd05e2 100644 --- a/src/asm/linux_x86_64/tool.s +++ b/src/asm/linux_x86_64/tool.s @@ -91,7 +91,6 @@ _start: # rax: length of the string string_length: xor %rcx, %rcx # clear rcx, which will hold the length - xor %rax, %rax # clear rax, which will be used for the null terminator comparison .next_char: cmpb $0, (%rsi, %rcx) # compare to null terminator diff --git a/src/lib_resources/BUILD.txt b/src/lib_resources/BUILD.txt index ec53ac8..8277c50 100644 --- a/src/lib_resources/BUILD.txt +++ b/src/lib_resources/BUILD.txt @@ -1,4 +1,6 @@ +Commit: @GIT_COMMIT_ID@ + @CMAKE_SYSTEM_NAME@ - @CMAKE_SYSTEM_PROCESSOR@ @CURRENT_DATE@ - @CURRENT_TIME@ -@CMAKE_SYSTEM@ \ No newline at end of file +@CMAKE_SYSTEM@ diff --git a/src/lib_resources/CMakeLists.txt b/src/lib_resources/CMakeLists.txt index c3dc8b0..13b80be 100644 --- a/src/lib_resources/CMakeLists.txt +++ b/src/lib_resources/CMakeLists.txt @@ -2,6 +2,13 @@ string(TIMESTAMP CURRENT_DATE "%Y-%m-%d") string(TIMESTAMP CURRENT_TIME "%H:%M:%S") +execute_process( + COMMAND git rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_ID + OUTPUT_STRIP_TRAILING_WHITESPACE +) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/BUILD.txt ${CMAKE_INSTALL_PREFIX}/product/lib/BUILD.txt @ONLY) diff --git a/src/script_converter/CMakeLists.txt b/src/script_converter/CMakeLists.txt deleted file mode 100644 index c5371a9..0000000 --- a/src/script_converter/CMakeLists.txt +++ /dev/null @@ -1,57 +0,0 @@ -add_executable(script_converter) - -target_sources(script_converter PRIVATE main.cpp - converter.cpp) - -target_include_directories(script_converter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(script_converter PRIVATE script_lib) -set_target_properties(script_converter PROPERTIES OUTPUT_NAME "converter") - -install(TARGETS script_converter RUNTIME DESTINATION product/script) - -set_target_properties(script_converter PROPERTIES FOLDER "script") - - -# tests - -add_test(NAME script_converter_no_args COMMAND script_converter) -set_tests_properties(script_converter_no_args PROPERTIES WILL_FAIL TRUE) - -add_test(NAME script_converter_one_arg COMMAND script_converter something) -set_tests_properties(script_converter_one_arg PROPERTIES WILL_FAIL TRUE) - -add_test(NAME script_converter_two_args COMMAND script_converter something something) -set_tests_properties(script_converter_two_args PROPERTIES WILL_FAIL TRUE) - -add_test(NAME script_converter_invalid_target COMMAND script_converter something something abc) -set_tests_properties(script_converter_invalid_target PROPERTIES WILL_FAIL TRUE) - -add_test(NAME script_converter_invalid_source COMMAND script_converter no_source_file.txt result.py py) -set_tests_properties(script_converter_invalid_source PROPERTIES WILL_FAIL TRUE) - - -set(OUTPUT_FILE ${PROJECT_SOURCE_DIR}/build/result.cpp) - -add_test(NAME script_converter_cpp COMMAND script_converter ${PROJECT_SOURCE_DIR}/src/script_resources/script_full.txt ${OUTPUT_FILE} cpp) - -set(REFERENCE_FILE ${PROJECT_SOURCE_DIR}/src/script_resources/result.cpp) - -add_test(NAME script_converter_cpp_compare COMMAND ${CMAKE_COMMAND} - -DOUTPUT_FILE=${OUTPUT_FILE} - -DREFERENCE_FILE=${REFERENCE_FILE} - -P ${PROJECT_SOURCE_DIR}/src/cmake/compare.cmake -) - - -set(OUTPUT_FILE ${PROJECT_SOURCE_DIR}/build/result.py) - -add_test(NAME script_converter_py COMMAND script_converter ${PROJECT_SOURCE_DIR}/src/script_resources/script_full.txt ${OUTPUT_FILE} py) - -set(REFERENCE_FILE ${PROJECT_SOURCE_DIR}/src/script_resources/result.py) - -add_test(NAME script_converter_py_compare COMMAND ${CMAKE_COMMAND} - -DOUTPUT_FILE=${OUTPUT_FILE} - -DREFERENCE_FILE=${REFERENCE_FILE} - -P ${PROJECT_SOURCE_DIR}/src/cmake/compare.cmake -) - diff --git a/src/script_converter/converter.cpp b/src/script_converter/converter.cpp deleted file mode 100644 index f0ad069..0000000 --- a/src/script_converter/converter.cpp +++ /dev/null @@ -1,231 +0,0 @@ -#include "converter.h" -#include "script.h" -#include -#include -#include -#include - -enum class TARGET -{ - UKNOWN, - CPP, - PY -}; - -inline TARGET to_target(const char* arg) -{ - if (std::strcmp(arg, "cpp") == 0) - return TARGET::CPP; - - if (std::strcmp(arg, "py") == 0) - return TARGET::PY; - - return TARGET::UKNOWN; -} - -bool load_file(std::vector& lines, const std::string& src) -{ - std::ifstream source{src}; - - if (!source.is_open()) - { - print_error("Could not open source file."); - return false; - } - - std::string line; - while (std::getline(source, line)) - { - if (line.empty()) - continue; - - if ((line.back() == '\r' || line.back() == '\n')) - line.erase(line.size() - 1); - - lines.push_back(line); - } - - source.close(); - return true; -} - -bool make_cpp_file(std::vector& lines, const char* dst) -{ - std::ofstream file_stream(dst); - - if (const auto is_open = file_stream.is_open(); !is_open) - return is_open; - - const char* tab = " "; - const char* nl = "\n"; - - file_stream << "#include " << nl; - file_stream << "#include " << nl; - file_stream << "#include " << nl; - file_stream << "#include \"text_conversion.h\"" << nl; - - file_stream << nl; - - file_stream << "int main()" << nl; - - file_stream << "{" << nl; - - auto first_text = true; - - for (const auto& line : lines) - { - script::command cmd; - std::string operand; - script::parse(line, cmd, operand); - - if (cmd == script::command::INVALID) - continue; - - if (cmd == script::command::COMMENT) - { - file_stream << tab << "// " << line.substr(1) << nl; - } - else if (cmd == script::command::TEXT) - { - if (first_text) - { - first_text = false; - file_stream << tab << "std::string text = \"" << operand - << "\";" << nl << nl; - } - else - { - file_stream << tab << "text = \"" << operand << "\";" << nl - << nl; - } - } - else if (cmd == script::command::PROCESS) - { - file_stream << tab - << "text_conversion::convert_to_title_case(text);" << nl - << nl; - } - else if (cmd == script::command::PRINT) - { - file_stream << tab << "std::cout << text << std::endl;" << nl << nl; - } - else if (cmd == script::command::SAVE) - { - file_stream << tab << "std::ofstream(\"" << operand - << "\") << text;" << nl << nl; - } - else if (cmd == script::command::LOAD) - { - file_stream << tab << "{" << nl; - file_stream - << tab << tab - << "const std::string " - "temp((std::istreambuf_iterator(std::ifstream(\"" - << operand - << "\").rdbuf())), std::istreambuf_iterator());" << nl; - file_stream << tab << tab << "text = temp;" << nl; - file_stream << tab << "}" << nl << nl; - } - } - - file_stream << tab << "return 0;" << nl; - file_stream << "}" << nl; - file_stream << nl; - - file_stream.close(); - - return true; -} - -bool make_py_file(std::vector& lines, const char* dst) -{ - std::ofstream file_stream(dst); - - if (!file_stream.is_open()) - return false; - - const char* tab = " "; - const char* nl = "\n"; - - file_stream << "import text_conversion" << nl << nl; - - file_stream << "if __name__ == '__main__':" << nl; - - for (const auto& line : lines) - { - script::command cmd; - std::string operand; - script::parse(line, cmd, operand); - - if (cmd == script::command::INVALID) - continue; - - if (cmd == script::command::COMMENT) - { - file_stream << tab << line << nl; - } - else if (cmd == script::command::TEXT) - { - file_stream << tab << "text = \"" << operand << "\"" << nl; - } - else if (cmd == script::command::PROCESS) - { - file_stream << tab << "text = text_conversion.title_case(text)" - << nl; - } - else if (cmd == script::command::PRINT) - { - file_stream << tab << "print(text)" << nl; - } - else if (cmd == script::command::SAVE) - { - file_stream << tab << "with open(\"" << operand - << "\", \"w\") as file:" << nl; - file_stream << tab << tab << "file.write(text)" << nl; - } - else if (cmd == script::command::LOAD) - { - file_stream << tab << "with open(\"" << operand - << "\", 'r') as file:" << nl; - file_stream << tab << tab << "text = file.read()" << nl; - } - } - - file_stream << nl; - - file_stream.close(); - - return true; -} - -bool converter(int argc, char* argv[]) -{ - if (argc != 4) - { - print_error("Invalid command line arguments."); - return false; - } - - std::cout << argv[1] << "\n"; - std::cout << argv[2] << "\n"; - std::cout << argv[3] << "\n"; - - const auto target = to_target(argv[3]); - - if (target == TARGET::UKNOWN) - { - print_error("Invalid target language argument."); - return false; - } - - std::vector lines; - if (!load_file(lines, argv[1])) - return false; - - if (target == TARGET::CPP) - return make_cpp_file(lines, argv[2]); - else if (target == TARGET::PY) - return make_py_file(lines, argv[2]); - - return true; -} diff --git a/src/script_converter/converter.h b/src/script_converter/converter.h deleted file mode 100644 index ab267d8..0000000 --- a/src/script_converter/converter.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef CONVERTER_H__ -#define CONVERTER_H__ - -#include - -inline void print_error(const char* msg) -{ - std::cout << "\033[31m"; - std::cout << "Error: " << msg << std::endl; - std::cout << "\033[0m"; -} - -bool converter(int argc, char* argv[]); - -#endif diff --git a/src/script_converter/main.cpp b/src/script_converter/main.cpp deleted file mode 100644 index 95fe8ee..0000000 --- a/src/script_converter/main.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "converter.h" -#include - -int main(int argc, char* argv[]) -{ - if (!converter(argc, argv)) - { - print_error("failure."); - return EXIT_FAILURE; - } - - return EXIT_SUCCESS; -} diff --git a/src/script_lib/engine.cpp b/src/script_lib/engine.cpp index f1fdb50..6bc30c2 100644 --- a/src/script_lib/engine.cpp +++ b/src/script_lib/engine.cpp @@ -70,9 +70,9 @@ void set_commands(std::vector& cmds) args.memory.clear(); - std::string line; - while (std::getline(file, line)) - args.memory += line; + std::stringstream buffer; + buffer << file.rdbuf(); + args.memory = buffer.str(); file.close(); }); @@ -89,7 +89,7 @@ void set_commands(std::vector& cmds) return; } - output_file << args.memory << std::endl; + output_file << args.memory; output_file.close(); }); diff --git a/src/script_resources/script_test.txt b/src/script_resources/script_test.txt new file mode 100644 index 0000000..cfd8b18 --- /dev/null +++ b/src/script_resources/script_test.txt @@ -0,0 +1,45 @@ + + + + + + +save new_file.txt + +text program start +process +print + +text process text and store to file +print + + +text this is a headline +process +process +process +print + +save new_file.txt + +text now loading file again +load new_file.txt +print + +text now save another text +print +text some text +save next_file.txt + +text now load again +print +load next_file.txt +process +print + +text this is useless text + +text this is even more useless +process + + diff --git a/src/script_resources/work.txt b/src/script_resources/work.txt new file mode 100644 index 0000000..f408813 --- /dev/null +++ b/src/script_resources/work.txt @@ -0,0 +1,16 @@ + +text load and process input.txt: +print + +load input.txt +print +process +print +save output.txt + +text stored result to output.txt, check file: +print + +# check +load output.txt +print diff --git a/src/script_transcompiler/CMakeLists.txt b/src/script_transcompiler/CMakeLists.txt new file mode 100644 index 0000000..b8b1f80 --- /dev/null +++ b/src/script_transcompiler/CMakeLists.txt @@ -0,0 +1,44 @@ +add_executable(script_transcompiler) + +target_sources(script_transcompiler PRIVATE main.cpp + transcompiler.cpp + frontend.cpp + optimization.cpp + backend.cpp + backend_linux_x86_64.cpp + backend_python.cpp + backend_cpp.cpp + backend_intermediate.cpp) + +target_include_directories(script_transcompiler PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(script_transcompiler PRIVATE script_lib) +set_target_properties(script_transcompiler PROPERTIES OUTPUT_NAME "transcompiler") + +install(TARGETS script_transcompiler RUNTIME DESTINATION product/script) + +set_target_properties(script_transcompiler PROPERTIES FOLDER "script") + + +# basic invalid args tests + +add_test(NAME script_transcompiler_no_args COMMAND script_transcompiler) +set_tests_properties(script_transcompiler_no_args PROPERTIES WILL_FAIL TRUE) + +add_test(NAME script_transcompiler_one_arg COMMAND script_transcompiler something) +set_tests_properties(script_transcompiler_one_arg PROPERTIES WILL_FAIL TRUE) + +add_test(NAME script_transcompiler_two_args COMMAND script_transcompiler something something) +set_tests_properties(script_transcompiler_two_args PROPERTIES WILL_FAIL TRUE) + +add_test(NAME script_transcompiler_invalid_target COMMAND script_transcompiler something something abc) +set_tests_properties(script_transcompiler_invalid_target PROPERTIES WILL_FAIL TRUE) + + +# basic tests + +set(COMPLEX_SOURCE_FILE ${PROJECT_SOURCE_DIR}/src/script_resources/script_test.txt) + +add_test(NAME script_transcompiler_test_cpp COMMAND script_transcompiler ${COMPLEX_SOURCE_FILE} basic.cpp cpp) +add_test(NAME script_transcompiler_test_py COMMAND script_transcompiler ${COMPLEX_SOURCE_FILE} basic.py py) +add_test(NAME script_transcompiler_test_i COMMAND script_transcompiler ${COMPLEX_SOURCE_FILE} basic.txt i) +add_test(NAME script_transcompiler_test_linux_x86_64 COMMAND script_transcompiler ${COMPLEX_SOURCE_FILE} basic.s linux_x86_64) diff --git a/src/script_transcompiler/backend.cpp b/src/script_transcompiler/backend.cpp new file mode 100644 index 0000000..eb47104 --- /dev/null +++ b/src/script_transcompiler/backend.cpp @@ -0,0 +1,21 @@ +#include "backend.h" + +void generate_py(data& data); +void generate_cpp(data& data); +void generate_intermediate(data& data); +void generate_linux_x86_64(data& data); + +void generate_code(data& data, TARGET target) +{ + if (target == TARGET::PY) + return generate_py(data); + + if (target == TARGET::CPP) + return generate_cpp(data); + + if (target == TARGET::INTERMEDIATE) + return generate_intermediate(data); + + if(target == TARGET::LINUX_X86_64) + return generate_linux_x86_64(data); +} diff --git a/src/script_transcompiler/backend.h b/src/script_transcompiler/backend.h new file mode 100644 index 0000000..f0a4aea --- /dev/null +++ b/src/script_transcompiler/backend.h @@ -0,0 +1,9 @@ +#ifndef BACKEND_H__ +#define BACKEND_H__ + +#include "data.h" + +void generate_code(data& data, TARGET target); + + +#endif diff --git a/src/script_transcompiler/backend_cpp.cpp b/src/script_transcompiler/backend_cpp.cpp new file mode 100644 index 0000000..f621d2f --- /dev/null +++ b/src/script_transcompiler/backend_cpp.cpp @@ -0,0 +1,152 @@ +#include "data.h" + +namespace +{ + +struct cpp_file +{ + bool use_iostream = false; + bool use_string = false; + bool use_text_conversion = false; + bool use_fstream = false; + + std::vector lines; +}; + +inline bool generate_cpp_code(const intermediate& i, cpp_file& file, data&) +{ + file.lines.push_back("// " + i.line); + + switch (i.command) + { + case (intermediate::cmd::comment): + { + file.lines.push_back("//" + i.operand); + break; + } + case (intermediate::cmd::text_init_memory): + { + file.lines.push_back("std::string text = \"" + i.operand + "\";"); + file.use_string = true; + break; + } + case (intermediate::cmd::text_memory): + { + file.lines.push_back("text = \"" + i.operand + "\";"); + break; + } + case (intermediate::cmd::print_memory): + { + file.lines.push_back("std::cout << text << std::endl;"); + file.use_iostream = true; + break; + } + case (intermediate::cmd::load_init_memory): + { + file.lines.push_back("std::string text;"); + } + [[fallthrough]]; + case (intermediate::cmd::load_memory): + { + file.lines.push_back("{"); + file.lines.push_back(" std::ifstream file{\"" + i.operand + "\"};"); + file.lines.push_back(" if (!file.is_open())"); + file.lines.push_back(" return EXIT_FAILURE;"); + file.lines.push_back(" std::stringstream buffer;"); + file.lines.push_back(" buffer << file.rdbuf();"); + file.lines.push_back(" text = buffer.str();"); + file.lines.push_back(" file.close();"); + file.lines.push_back("}"); + + file.use_fstream = true; + break; + } + case (intermediate::cmd::create_file): + { + file.lines.push_back("{"); + file.lines.push_back(" std::ofstream file{\"" + i.operand + "\"};"); + file.lines.push_back(" file.close();"); + file.lines.push_back("}"); + + file.use_fstream = true; + break; + } + case (intermediate::cmd::save_memory): + { + file.lines.push_back("{"); + file.lines.push_back(" std::ofstream output_file{\"" + i.operand + + "\"};"); + file.lines.push_back(" if (!output_file.is_open())"); + file.lines.push_back(" return EXIT_FAILURE;"); + file.lines.push_back(" output_file << text << std::endl;"); + file.lines.push_back(" output_file.close();"); + file.lines.push_back("}"); + file.use_fstream = true; + break; + } + case (intermediate::cmd::process_memory): + { + file.lines.push_back("text_conversion::convert_to_title_case(text);"); + file.use_text_conversion = true; + break; + } + case (intermediate::cmd::print_text): + { + file.lines.push_back("std::cout << \"" + i.operand + + "\" << std::endl;"); + file.use_iostream = true; + break; + } + case (intermediate::cmd::save_text): + { + file.lines.push_back("{"); + file.lines.push_back(" std::ofstream output_file{\"" + i.operandB + + "\"};"); + file.lines.push_back(" if (!output_file.is_open())"); + file.lines.push_back(" return EXIT_FAILURE;"); + file.lines.push_back(" output_file << \"" + i.operand + + "\" << std::endl;"); + file.lines.push_back(" output_file.close();"); + file.lines.push_back("}"); + file.use_fstream = true; + break; + break; + } + } + + return true; +} +} // namespace + +void generate_cpp(data& data) +{ + cpp_file file; + + for (const auto& ic : data.c) + { + if (!generate_cpp_code(ic, file, data)) + return; + } + + if (file.use_text_conversion) + data.result.push_back("#include \"text_conversion.h\""); + + data.result.push_back("#include "); + if (file.use_iostream) + data.result.push_back("#include "); + if (file.use_string) + data.result.push_back("#include "); + if (file.use_fstream) + { + data.result.push_back("#include "); + data.result.push_back("#include "); + } + + data.result.push_back("\nint main() {"); + + for (const auto& line : file.lines) + data.result.push_back(" " + line); + + data.result.push_back(" return EXIT_SUCCESS;"); + data.result.push_back("}\n"); +} diff --git a/src/script_transcompiler/backend_intermediate.cpp b/src/script_transcompiler/backend_intermediate.cpp new file mode 100644 index 0000000..e1eefe8 --- /dev/null +++ b/src/script_transcompiler/backend_intermediate.cpp @@ -0,0 +1,57 @@ +#include "backend.h" + +namespace +{ +bool generate_intermediate(const intermediate& i, data& data) +{ + data.result.push_back("from: " + i.line); + + switch (i.command) + { + case (intermediate::cmd::comment): + data.result.push_back("COMMENT: " + i.operand); + break; + case (intermediate::cmd::create_file): + data.result.push_back("CREATE FILE: " + i.operand); + break; + case (intermediate::cmd::load_init_memory): + data.result.push_back("LOAD INIT MEMORY: " + i.operand); + break; + case (intermediate::cmd::load_memory): + data.result.push_back("LOAD INTO MEMORY: " + i.operand); + break; + case (intermediate::cmd::print_memory): + data.result.push_back("PRINT MEMORY"); + break; + case (intermediate::cmd::process_memory): + data.result.push_back("PROCESS MEMORY"); + break; + case (intermediate::cmd::save_memory): + data.result.push_back("SAVE MEMORY: " + i.operand); + break; + case (intermediate::cmd::text_init_memory): + data.result.push_back("TEXT INIT MEMORY: " + i.operand); + break; + case (intermediate::cmd::text_memory): + data.result.push_back("TEXT TO MEMORY: " + i.operand); + break; + case (intermediate::cmd::print_text): + data.result.push_back("PRINT TEXT: " + i.operand); + break; + case (intermediate::cmd::save_text): + data.result.push_back("SAVE TEXT: '" + i.operand + "' to " + i.operandB); + break; + } + + return true; +} +} // namespace + +void generate_intermediate(data& data) +{ + for (const auto& ic : data.c) + { + if (!generate_intermediate(ic, data)) + return; + } +} diff --git a/src/script_transcompiler/backend_linux_x86_64.cpp b/src/script_transcompiler/backend_linux_x86_64.cpp new file mode 100644 index 0000000..fb4ba80 --- /dev/null +++ b/src/script_transcompiler/backend_linux_x86_64.cpp @@ -0,0 +1,390 @@ +#include "data.h" + +#include + +namespace +{ +struct asm_file +{ + std::vector data; + + std::vector lines; + + bool need_memory = false; + bool exit_failure = false; + unsigned int data_cnt = 0; + bool need_new_line = false; +}; + +// free memory stored at allocated_memory +// after call, allocated_memory_size is 0 +inline void _free_memory(std::vector& lines) +{ + lines.push_back("# free memory"); + lines.push_back("movq allocated_memory(%rip), %rdi"); + lines.push_back("movq allocated_memory_size(%rip), %rsi"); + lines.push_back("movq $11, %rax"); + lines.push_back("syscall"); + lines.push_back("movq $0, allocated_memory_size(%rip)"); +} + +// alloc memory of size stored in allocated_memory_size +// after success, memory address is stored in allocated_memory +inline void _alloc_memory(std::vector& lines) +{ + lines.push_back("movq $0, %rdi "); + lines.push_back("movq allocated_memory_size(%rip), %rsi"); + lines.push_back("movq $7, %rdx"); + lines.push_back("movq $34, %r10 "); + lines.push_back("movq $-1, %r8"); + lines.push_back("movq $0, %r9"); + lines.push_back("movq $9, %rax"); + lines.push_back("syscall"); + + lines.push_back("cmpq $-1, %rax "); + lines.push_back("je exit_failure"); + + lines.push_back("movq %rax, allocated_memory(%rip)"); +} + +inline std::tuple get_new_text(asm_file& file) +{ + const auto text_id = "text_" + std::to_string(file.data_cnt); + const auto text_length = text_id + "_len"; + + file.data_cnt += 1; + + return {text_id, text_length}; +} + +inline void _print_stdout(asm_file& file) +{ + file.lines.push_back("mov $1, %rax"); + file.lines.push_back("mov $1, %rdi"); + file.lines.push_back("syscall"); +} + +inline void _print_new_line(asm_file& file) +{ + file.lines.push_back("lea new_line(%rip), %rsi"); + file.lines.push_back("mov $new_line_length, %rdx"); + _print_stdout(file); + + file.need_new_line = true; +} + +inline void add_new_line(asm_file& file) +{ + file.data.push_back("new_line: .string \"\\n\""); + file.data.push_back("new_line_length = . - new_line"); +} + +inline void generate_linux_x86_64_asm(const intermediate& i, asm_file& file) +{ + file.lines.push_back("# " + i.line); + + switch (i.command) + { + case (intermediate::cmd::comment): + { + file.lines.push_back("#" + i.operand); + break; + } + + case (intermediate::cmd::process_memory): + { + file.lines.push_back("mov allocated_memory(%rip), %rdi"); + file.lines.push_back("mov allocated_memory_size(%rip), %rsi"); + file.lines.push_back("call text_conversion_c"); + + file.need_memory = true; + + break; + } + case (intermediate::cmd::print_text): + { + const auto [text_id, text_length] = get_new_text(file); + + file.data.push_back(text_id + ": .string \"" + i.operand + "\\n\""); + file.data.push_back(text_length + " = . - " + text_id); + + file.lines.push_back("lea " + text_id + "(%rip), %rsi"); + file.lines.push_back("mov $" + text_length + ", %rdx"); + + _print_stdout(file); + + break; + } + + case (intermediate::cmd::print_memory): + { + file.lines.push_back("movq allocated_memory(%rip), %rsi"); + file.lines.push_back("movq allocated_memory_size(%rip), %rdx"); + + _print_stdout(file); + + _print_new_line(file); + + file.need_memory = true; + + break; + } + + case (intermediate::cmd::text_memory): + { + _free_memory(file.lines); + } + [[fallthrough]]; + case (intermediate::cmd::text_init_memory): + { + const auto [text_id, text_length] = get_new_text(file); + + file.data.push_back(text_id + ": .string \"" + i.operand + "\""); + file.data.push_back(text_length + " = . - " + text_id); + + file.lines.push_back("mov $" + text_length + ", %rax"); + file.lines.push_back("movq %rax, allocated_memory_size(%rip)"); + + _alloc_memory(file.lines); + + file.lines.push_back("# copy"); + file.lines.push_back("movq allocated_memory(%rip), %rdi"); + file.lines.push_back("lea " + text_id + "(%rip), %rsi"); + file.lines.push_back("mov $" + text_length + ", %rcx"); + file.lines.push_back("rep movsb"); + + file.exit_failure = true; + file.need_memory = true; + + break; + } + + case (intermediate::cmd::create_file): + { + const auto [text_id, text_length] = get_new_text(file); + + file.data.push_back(text_id + ": .string \"" + i.operand + "\""); + // file.data.push_back(text_length + " = . - " + text_id); + + file.lines.push_back("# create file"); + + file.lines.push_back("mov $2, %rax"); + file.lines.push_back("lea " + text_id + "(%rip), %rdi"); + file.lines.push_back("mov $577, %rsi"); + file.lines.push_back("mov $0644, %rdx"); + file.lines.push_back("syscall"); + + file.lines.push_back("cmpq $-1, %rax"); + file.lines.push_back("je exit_failure"); + + file.lines.push_back("mov %rax, %rdi"); + file.lines.push_back("mov $3, %rax"); + file.lines.push_back("syscall"); + + break; + } + + case (intermediate::cmd::save_memory): + { + const auto [file_name_id, text_length] = get_new_text(file); + + // file name + file.data.push_back(file_name_id + ": .string \"" + i.operand + "\""); + //file.data.push_back(text_length + " = . - " + file_name_id); + + // create file + file.lines.push_back("mov $2, %rax "); + file.lines.push_back("lea " + file_name_id + "(%rip), %rdi"); + file.lines.push_back("mov $0101, %rsi"); + file.lines.push_back("mov $0644, %rdx"); + file.lines.push_back("syscall"); + + file.lines.push_back("cmpq $-1, %rax"); + file.lines.push_back("je exit_failure"); + + // write to file + file.lines.push_back("mov %rax, %rdi"); + file.lines.push_back("mov $1, %rax "); + file.lines.push_back("movq allocated_memory(%rip), %rsi"); + file.lines.push_back("movq allocated_memory_size(%rip), %rdx"); + file.lines.push_back("syscall"); + // close file + file.lines.push_back("mov $3, %rax"); + file.lines.push_back("syscall"); + + file.need_memory = true; + file.exit_failure = true; + + break; + } + + case (intermediate::cmd::save_text): + { + const auto [text_id, text_length] = get_new_text(file); + const auto [file_name_id, file_name_length] = get_new_text(file); + + // file name + file.data.push_back(file_name_id + ": .string \"" + i.operandB + "\""); + // file.data.push_back(file_name_length + " = . - " + file_name_id); + // text + file.data.push_back(text_id + ": .string \"" + i.operand + "\""); + file.data.push_back(text_length + " = . - " + text_id); + // create file + file.lines.push_back("mov $2, %rax "); + file.lines.push_back("lea " + file_name_id + "(%rip), %rdi"); + file.lines.push_back("mov $0101, %rsi"); + file.lines.push_back("mov $0644, %rdx"); + file.lines.push_back("syscall"); + + file.lines.push_back("cmpq $-1, %rax"); + file.lines.push_back("je exit_failure"); + + // write to file + file.lines.push_back("mov %rax, %rdi"); + file.lines.push_back("mov $1, %rax "); + file.lines.push_back("lea " + text_id + "(%rip), %rsi"); + file.lines.push_back("mov $" + text_length + ", %rdx"); + file.lines.push_back("syscall"); + // close file + file.lines.push_back("mov $3, %rax"); + file.lines.push_back("syscall"); + + file.need_memory = true; + file.exit_failure = true; + break; + } + case (intermediate::cmd::load_memory): + { + _free_memory(file.lines); + } + [[fallthrough]]; + case (intermediate::cmd::load_init_memory): + { + const auto [file_name_id, file_name_length] = get_new_text(file); + + // file name + file.data.push_back(file_name_id + ": .string \"" + i.operand + "\""); + //file.data.push_back(file_name_length + " = . - " + file_name_id); + + file.lines.push_back("movq $2, %rax"); + file.lines.push_back("lea " + file_name_id + "(%rip), %rdi"); + file.lines.push_back("movq $0, %rsi"); + file.lines.push_back("syscall"); + + file.lines.push_back("cmpq $-1, %rax"); + file.lines.push_back("je exit_failure"); + + file.lines.push_back("movq %rax, %rdi"); + file.lines.push_back("movq %rax, %rbx"); + + // file size + + file.lines.push_back("# file size"); + file.lines.push_back("movq $5, %rax"); + file.lines.push_back("subq $128, %rsp"); + file.lines.push_back("movq %rsp, %rsi "); + file.lines.push_back("syscall"); + + file.lines.push_back("movq 48(%rsp), %rdx"); + file.lines.push_back("movq %rdx, allocated_memory_size(%rip)"); + + _alloc_memory(file.lines); + + // read data into memory + file.lines.push_back("# read data into memory"); + file.lines.push_back("movq $0, %rax"); + file.lines.push_back("movq %rbx, %rdi"); + file.lines.push_back("movq allocated_memory(%rip), %rsi"); + file.lines.push_back("movq allocated_memory_size(%rip), %rdx"); + file.lines.push_back("syscall"); + + // close file + file.lines.push_back("# close file"); + file.lines.push_back("mov $3, %rax"); + file.lines.push_back("syscall"); + + file.need_memory = true; + file.exit_failure = true; + + break; + } + } + + file.lines.push_back(""); +} + +} // namespace + +void generate_linux_x86_64(data& data) +{ + asm_file file; + + for (const auto& i : data.c) + generate_linux_x86_64_asm(i, file); + + if (file.need_new_line) + add_new_line(file); + + if (file.need_memory) + _free_memory(file.lines); + + if (file.exit_failure) + { + file.data.push_back("text_failure: .string \"an error occured\""); + file.data.push_back("text_failure_len = . - text_failure"); + } + + // exit success + file.lines.push_back("mov $60, %rax"); + file.lines.push_back("xor %rdi, %rdi"); + file.lines.push_back("syscall"); + + // --- + + data.result.push_back("# Linux x86_64 AMD64"); + data.result.push_back("# AT&T syntax"); + + data.result.push_back(".section .data"); + + if (file.need_memory) + { + data.result.push_back("allocated_memory: .quad 0"); + data.result.push_back("allocated_memory_size: .quad 0"); + } + + for (const auto& d : file.data) + { + data.result.push_back(d); + } + + data.result.push_back(".section .text"); + + data.result.push_back(".global _start"); + data.result.push_back("_start:"); + + for (const auto& l : file.lines) + { + data.result.push_back(l); + } + + if (file.exit_failure) + { + data.result.push_back(""); + data.result.push_back("exit_failure:"); + + if (file.need_memory) + _free_memory(data.result); + + data.result.push_back("lea text_failure(%rip), %rsi"); + data.result.push_back("mov $text_failure_len, %rdx"); + data.result.push_back("mov $1, %rax"); + data.result.push_back("mov $1, %rdi"); + data.result.push_back("syscall"); + + data.result.push_back("mov $60, %rax"); + data.result.push_back("mov $1, %edi"); + data.result.push_back("syscall"); + } + + data.result.push_back("\n"); +} diff --git a/src/script_transcompiler/backend_python.cpp b/src/script_transcompiler/backend_python.cpp new file mode 100644 index 0000000..32da7f9 --- /dev/null +++ b/src/script_transcompiler/backend_python.cpp @@ -0,0 +1,80 @@ +#include "data.h" + +inline void add_cmd(data& data, const std::string& cmd) +{ + data.result.push_back(" " + cmd); +} + +inline bool generate_code(data& data, const intermediate& i) +{ + add_cmd(data, "# " + i.line); + + switch (i.command) + { + case (intermediate::cmd::comment): + add_cmd(data, "#" + i.operand); + break; + + case (intermediate::cmd::load_init_memory): + [[fallthrough]]; + case (intermediate::cmd::load_memory): + add_cmd(data, "with open(\"" + i.operand + "\", \"r\") as file:"); + add_cmd(data, " text = file.read()"); + break; + + case (intermediate::cmd::print_memory): + + add_cmd(data, "print(text)"); + + break; + + case (intermediate::cmd::process_memory): + + add_cmd(data, "text = text_conversion.title_case(text)"); + + break; + + case (intermediate::cmd::save_memory): + + add_cmd(data, "with open(\"" + i.operand + "\", \"w\") as file:"); + add_cmd(data, " file.write(text)"); + + break; + + case (intermediate::cmd::create_file): + + add_cmd(data, "open('" + i.operand + "', 'a')"); + + break; + + case (intermediate::cmd::text_init_memory): + [[fallthrough]]; + case (intermediate::cmd::text_memory): + add_cmd(data, "text = \"" + i.operand + "\""); + + break; + + case (intermediate::cmd::print_text): + add_cmd(data, "print(\"" + i.operand + "\")"); + break; + + case (intermediate::cmd::save_text): + add_cmd(data, "with open(\"" + i.operandB + "\", \"w\") as file:"); + add_cmd(data, " file.write(\"" + i.operand + "\")"); + break; + } + + return true; +} + +void generate_py(data& data) +{ + data.result.push_back("import text_conversion"); + data.result.push_back("if __name__ == '__main__':"); + + for (const auto& ic : data.c) + { + if (!generate_code(data, ic)) + return; + } +} diff --git a/src/script_transcompiler/data.h b/src/script_transcompiler/data.h new file mode 100644 index 0000000..234ecd2 --- /dev/null +++ b/src/script_transcompiler/data.h @@ -0,0 +1,103 @@ +#ifndef DATA_H__ +#define DATA_H__ + +#include "script.h" +#include +#include + +enum class TARGET +{ + INTERMEDIATE, + CPP, + PY, + LINUX_X86_64, + INVALID +}; + +struct issue +{ + enum class type + { + INFO, + WARNING, + ERROR + }; + + enum class phase + { + PARSING, + OPTIMIZAZION, + CODE_GENERATION, + STORE + }; + + type t; + phase p; + std::string msg; + std::string line; + unsigned int number; +}; + +using issues = std::vector; + +struct intermediate +{ + enum class cmd + { + comment, + text_init_memory, + text_memory, + load_init_memory, + load_memory, + process_memory, + print_memory, + save_memory, + create_file, + + // code blocks + + print_text, // directly print text + save_text // directy save text to file + }; + + cmd command; + std::string operand; + std::string line; + unsigned int number; + std::string operandB; +}; + +using code = std::vector; + +using generated = std::vector; + +struct data +{ + issues i; + code c; + generated result; + bool success = true; + + void add_issue(issue::type type, issue::phase phase, const std::string& msg, + const std::string& line, unsigned int number) + { + i.push_back(issue{type, phase, msg, line, number}); + if (type == issue::type::ERROR) + success = false; + } + + void add_cmd(intermediate::cmd cmd, const std::string& operand, + const std::string& line, unsigned int number) + { + c.push_back(intermediate{cmd, operand, line, number, ""}); + } + + void add_cmd(intermediate::cmd cmd, const std::string& operand, + const std::string& line, unsigned int number, + const std::string& operandB) + { + c.push_back(intermediate{cmd, operand, line, number, operandB}); + } +}; + +#endif diff --git a/src/script_transcompiler/frontend.cpp b/src/script_transcompiler/frontend.cpp new file mode 100644 index 0000000..f32e41f --- /dev/null +++ b/src/script_transcompiler/frontend.cpp @@ -0,0 +1,149 @@ +#include "frontend.h" +#include "script.h" +#include +#include + +namespace +{ + +bool parse_line(const std::string& line, unsigned int number, data& data, + bool& memory_set) +{ + script::command cmd; + std::string operand; + + script::parse(line, cmd, operand); + + switch (cmd) + { + case (script::command::INVALID): + + data.add_issue(issue::type::ERROR, issue::phase::PARSING, + "Invalid line.", line, number); + + return false; + break; + + case (script::command::COMMENT): + { + auto arg = line; + arg.erase(0, 1); + + data.add_cmd(intermediate::cmd::comment, arg, line, number); + break; + } + case (script::command::TEXT): + { + data.add_cmd(intermediate::cmd::text_memory, operand, line, number); + memory_set = true; + break; + } + case (script::command::PRINT): + { + if (!memory_set) + { + data.add_issue( + issue::type::WARNING, issue::phase::PARSING, + "calling 'print' before setting memory. Line removed", line, + number); + } + else + { + data.add_cmd(intermediate::cmd::print_memory, operand, line, + number); + } + + break; + } + case (script::command::PROCESS): + { + + if (memory_set) + data.add_cmd(intermediate::cmd::process_memory, "", line, number); + else + data.add_issue( + issue::type::WARNING, issue::phase::PARSING, + "calling 'process' before setting memory. Line removed", line, + number); + + break; + } + case (script::command::LOAD): + { + if (operand.empty()) + { + data.add_issue(issue::type::ERROR, issue::phase::PARSING, + "No operand given for 'load'.", line, number); + return false; + } + + data.add_cmd(intermediate::cmd::load_memory, operand, line, number); + + memory_set = true; + + break; + } + case (script::command::SAVE): + { + if (operand.empty()) + { + data.add_issue(issue::type::ERROR, issue::phase::PARSING, + "No operand given for 'save'.", line, number); + return false; + } + + if (memory_set) + data.add_cmd(intermediate::cmd::save_memory, operand, line, number); + else + data.add_cmd(intermediate::cmd::create_file, operand, line, number); + + break; + } + default: + { + data.add_issue(issue::type::ERROR, issue::phase::PARSING, + "Unknown command.", line, number); + + return false; + break; + } + } + + return true; +} + +} // namespace + +void parse_source(const std::string& src, data& data) +{ + + std::ifstream source{src}; + + if (!source.is_open()) + { + data.add_issue(issue::type::ERROR, issue::phase::PARSING, + "Could not open file.", src, 0); + data.success = false; + return; + } + + auto lineNumber = 0; + auto memory_set = false; + + std::string line; + while (std::getline(source, line)) + { + lineNumber++; + + if (line.empty()) + continue; + + if ((line.back() == '\r' || line.back() == '\n')) + line.erase(line.size() - 1); + + if (!parse_line(line, lineNumber, data, memory_set)) + return; + } + + source.close(); +} diff --git a/src/script_transcompiler/frontend.h b/src/script_transcompiler/frontend.h new file mode 100644 index 0000000..22f309f --- /dev/null +++ b/src/script_transcompiler/frontend.h @@ -0,0 +1,12 @@ +#ifndef FRONTEND_H__ +#define FRONTEND_H__ + + + +#include "data.h" + +void parse_source(const std::string& src, data&data); + +#endif + + diff --git a/src/script_transcompiler/main.cpp b/src/script_transcompiler/main.cpp new file mode 100644 index 0000000..2ec1f2f --- /dev/null +++ b/src/script_transcompiler/main.cpp @@ -0,0 +1,42 @@ + +#include + +#include "transcompiler.h" +#include "data.h" +#include + +int main(int argc, char* argv[]) +{ + data data; + + transcompiler(data, argc, argv); + + for (const auto& i : data.i) + { + if (i.p == issue::phase::PARSING) + std::cout << "Parsing "; + else if (i.p == issue::phase::OPTIMIZAZION) + std::cout << "Optimization "; + else if (i.p == issue::phase::CODE_GENERATION) + std::cout << "Code Generation "; + else + std::cout << "Storing Result "; + + if (i.t == issue::type::WARNING) + std::cout << "Warning: "; + else if (i.t == issue::type::INFO) + std::cout << "Info: "; + else + std::cout << "Error: "; + + std::cout << i.msg << std::endl; + + //if (i.number > 0) + std::cout << "Line " << i.number << ": " << i.line << std::endl; + } + + if (!data.success) + return EXIT_FAILURE; + + return EXIT_SUCCESS; +} diff --git a/src/script_transcompiler/optimization.cpp b/src/script_transcompiler/optimization.cpp new file mode 100644 index 0000000..c82e184 --- /dev/null +++ b/src/script_transcompiler/optimization.cpp @@ -0,0 +1,308 @@ +#include "optimization.h" +#include "script.h" +#include + +namespace +{ + +inline auto set_memory(intermediate::cmd cmd) +{ + return cmd == intermediate::cmd::text_memory || + cmd == intermediate::cmd::load_memory; +} + +void _remove_set_memory(data& data) +{ + code temp; + temp.reserve(data.c.size()); + + std::optional prev; + + for (const auto& line : data.c) + { + if (line.command == intermediate::cmd::comment) + { + temp.push_back(line); + continue; + } + + if (set_memory(line.command)) + { + if (prev.has_value()) + { + const auto& prev_line = prev.value(); + data.add_issue(issue::type::INFO, issue::phase::OPTIMIZAZION, + "Remove line.", prev_line.line, + prev_line.number); + } + + prev = line; + } + else + { + if (prev.has_value()) + { + temp.push_back(prev.value()); + prev = std::optional(); + } + + temp.push_back(line); + } + } + + if (prev.has_value()) + temp.push_back(prev.value()); + + data.c = temp; +} + +void _remove_redundant_process(data& data) +{ + code temp; + temp.reserve(data.c.size()); + + std::optional prev; + + for (const auto& line : data.c) + { + if (line.command == intermediate::cmd::comment) + { + temp.push_back(line); + continue; + } + + if (line.command == intermediate::cmd::process_memory) + { + if (prev.has_value()) + { + const auto& prev_line = prev.value(); + data.add_issue(issue::type::INFO, issue::phase::OPTIMIZAZION, + "Remove line.", prev_line.line, + prev_line.number); + } + + prev = line; + } + else + { + if (prev.has_value()) + { + temp.push_back(prev.value()); + prev = std::optional(); + } + + temp.push_back(line); + } + } + + if (prev.has_value()) + temp.push_back(prev.value()); + + data.c = temp; +} + +inline auto block_start(intermediate::cmd cmd) +{ + return cmd == intermediate::cmd::load_memory || + cmd == intermediate::cmd::text_memory; +} + +void _remove_dangling_process(code& block, data& data) +{ + + if (block.size() < 2) + return; + + const auto last_line = block.at(block.size() - 1); + + if (last_line.command == intermediate::cmd::process_memory) + { + + data.add_issue(issue::type::INFO, issue::phase::OPTIMIZAZION, + "Remove 'process' without effect.", last_line.line, + last_line.number); + + block.pop_back(); + } +} + +void _check_print_text(code& code_block) +{ + auto copy = code_block; + + if (copy.size() != 2) + return; + + const auto& first_line = copy.at(0); + const auto& second_line = copy.at(1); + + if (first_line.command != intermediate::cmd::text_memory) + return; + + if (second_line.command != intermediate::cmd::print_memory) + return; + + code_block.clear(); + code_block.push_back(intermediate{intermediate::cmd::print_text, + first_line.operand, first_line.line, + first_line.number, ""}); +} + +void _check_save_text(code& code_block) +{ + auto copy = code_block; + + if (copy.size() != 2) + return; + + const auto& first_line = copy.at(0); + const auto& second_line = copy.at(1); + + if (first_line.command != intermediate::cmd::text_memory) + return; + + if (second_line.command != intermediate::cmd::save_memory) + return; + + code_block.clear(); + code_block.push_back(intermediate{intermediate::cmd::save_text, + first_line.operand, first_line.line, + first_line.number, second_line.operand}); +} + +void _pre_process(code& block) +{ + + auto copy = block; + + if (copy.size() < 3) + return; + + const auto first_line = copy.at(0); + const auto second_line = copy.at(1); + + if (first_line.command != intermediate::cmd::text_memory) + return; + + if (second_line.command != intermediate::cmd::process_memory) + return; + + // preprocess + + script::engine engine(nullptr); + + engine.run(script::command::TEXT, first_line.operand); + engine.run(script::command::PROCESS, ""); + + const auto res = engine.get_memory(); + + block.clear(); + block.push_back(intermediate{intermediate::cmd::text_memory, res, + first_line.line, first_line.number, ""}); + + for (auto i = 2u; i < copy.size(); ++i) + block.push_back(copy.at(i)); +} + +void _check_no_output(code& block, data& data) +{ + if (block.size() < 2) + return; + + auto has_output = false; + + for (const auto& line : block) + { + if (line.command == intermediate::cmd::print_memory || + line.command == intermediate::cmd::save_memory) + return; + } + + if (!has_output) + { + data.add_issue(issue::type::INFO, issue::phase::OPTIMIZAZION, + "Delete code block without effect.", block.at(0).line, + block.at(0).number); + + block.clear(); + } +} + +} // namespace + +// dead code removal; pre-processing +void optimizationA(data& data) +{ + // dead code removal: setting memory + + _remove_set_memory(data); + _remove_redundant_process(data); +} + +void optimizationB(data& data) +{ + std::vector blocks; + + for (const auto& line : data.c) + { + if (line.command == intermediate::cmd::comment) + continue; + + if (line.command == intermediate::cmd::create_file) + { + code block; + block.push_back(line); + blocks.push_back(block); + continue; + } + + if (block_start(line.command)) + { + code block; + blocks.push_back(block); + } + + blocks[blocks.size() - 1].push_back(line); + } + + // optimize blocks + + for (auto& block : blocks) + { + _check_no_output(block, data); + + if (block.empty()) + continue; + + _remove_dangling_process(block, data); + _pre_process(block); + _check_print_text(block); + _check_save_text(block); + } + + // write blocks back + + data.c.clear(); + + for (const auto& block : blocks) + for (const auto& line : block) + data.c.push_back(line); +} + +void memory_init(data& data) +{ + for (auto& line : data.c) + { + if (line.command == intermediate::cmd::text_memory) + { + line.command = intermediate::cmd::text_init_memory; + return; + } + + if (line.command == intermediate::cmd::load_memory) + { + line.command = intermediate::cmd::load_init_memory; + return; + } + } +} diff --git a/src/script_transcompiler/optimization.h b/src/script_transcompiler/optimization.h new file mode 100644 index 0000000..2552eeb --- /dev/null +++ b/src/script_transcompiler/optimization.h @@ -0,0 +1,14 @@ +#ifndef OPTIMIZATION_H__ +#define OPTIMIZATION_H__ + +#include "data.h" + +// dead code removal; pre-processing +void optimizationA(data& data); + +// conversion to intermediate format +void optimizationB(data& data); + +void memory_init(data&data); + +#endif diff --git a/src/script_transcompiler/transcompiler.cpp b/src/script_transcompiler/transcompiler.cpp new file mode 100644 index 0000000..ff2682a --- /dev/null +++ b/src/script_transcompiler/transcompiler.cpp @@ -0,0 +1,82 @@ +#include "transcompiler.h" +#include "backend.h" +#include "frontend.h" +#include "optimization.h" +#include +#include + +TARGET get_target(char* arg) +{ + if (std::strcmp(arg, "py") == 0) + return TARGET::PY; + if (std::strcmp(arg, "cpp") == 0) + return TARGET::CPP; + if (std::strcmp(arg, "linux_x86_64") == 0) + return TARGET::LINUX_X86_64; + if (std::strcmp(arg, "i") == 0) + return TARGET::INTERMEDIATE; + + return TARGET::INVALID; +} + +void write_to_file(data& data, const std::string& dst) +{ + if (data.c.empty()) + { + data.add_issue(issue::type::ERROR, issue::phase::STORE, + "Generated code is empty.", "", 0); + return; + } + + std::ofstream file_stream(dst); + + if (!file_stream.is_open()) + { + data.add_issue(issue::type::ERROR, issue::phase::STORE, + "Could not write to output file.", "", 0); + return; + } + + for (const auto& r : data.result) + { + file_stream << r << "\n"; + } + + file_stream.close(); +} + +void transcompiler(data& data, int argc, char* argv[]) +{ + if (argc != 4) + { + data.add_issue(issue::type::ERROR, issue::phase::PARSING, + "Invalid command line arguments.", "", 0); + return; + } + + const auto target = get_target(argv[3]); + if (target == TARGET::INVALID) + { + data.add_issue(issue::type::ERROR, issue::phase::PARSING, + "Invalid 'target' argument.", "", 0); + return; + } + + parse_source(argv[1], data); + + if (!data.success) + return; + + optimizationA(data); + + optimizationB(data); + + memory_init(data); + + generate_code(data, target); + + if (!data.success) + return; + + write_to_file(data, argv[2]); +} diff --git a/src/script_transcompiler/transcompiler.h b/src/script_transcompiler/transcompiler.h new file mode 100644 index 0000000..6450b7f --- /dev/null +++ b/src/script_transcompiler/transcompiler.h @@ -0,0 +1,8 @@ +#ifndef CONVERTER__ +#define CONVERTER__ + +#include "data.h" + +void transcompiler(data& data, int argc, char* argv[]); + +#endif diff --git a/user_guide.md b/user_guide.md new file mode 100644 index 0000000..14ff1ae --- /dev/null +++ b/user_guide.md @@ -0,0 +1,161 @@ + +# Basic Command Line Tools + +## Command Line Tool *title_case* + +This command line tool takes the given command line argument, converts the data, and prints the result to ```std::cout```. + +``` +title_case "this is some text" + +# prints +This is Some Text +``` + +## Command Line Tool *title_case_console* + +Interactive command line tool. Enter the text to convert or "exit" to end the program. + +## Command Line Tool *title_case_files* + +The first command line argument is the file to read the data from, the second is the file to save the result to. + +```sh +title_case_file source_file.txt target_file.txt +``` + +# Web Applications + +## Local Usage + +Start the ```web.py``` script by providing the location of the resource files and the folder containing the ```title_case``` tool. + +```sh +python web.py C:\web\resources C:\build\product +``` + +Open ```localhost:5000``` for a synchronous web app. Open ```localhost:5000/interactive``` for an asynchronous web app. + +## Container + +Build the *docker* image with: + +```sh +docker build --tag title-case-web . +``` + +The multi-stage build process will build the ```title_case``` tool and copy all necessary files. + + +To start the container, run: + +```sh +docker run --rm -it -p 5000:5000 title-case-web +``` + +Open ```localhost:5000``` for a synchronous web app. Open ```localhost:5000/interactive``` for an asynchronous web app. + + +## WebAssembly Web App + +WebAssembly requires to access the HTML document via a web server. A simple server can be started with Python: + +```sh +python -m http.server +``` + +Open ```http://localhost:8000/``` to start the WebAssembly app. + + +# Scripting Language + +The domain-specific scripting language is a simple language designed to perform basic tasks. The language consists of five commands: + +| Command | Operand (optional) | Description | +| --- | --- | --- | +| ```text``` | *text to load and store in memory* | Stores the given text in the program's memory. | +| ```process``` | - | Processes the text in memory. | +| ```print``` | - | Prints the text in memory to the screen. | +| ```load``` | *path to text file* | Reads the specified text file and stores the text in memory. | +| ```save``` | *path to text file* | Saves the text in memory to the specified text file. | + +An example program is: + +``` +text this is a headline +process +print +``` + +This will print ```This Is a Headline```. + +## Command Line Tool *console* + +The scripting **console** allows to enter and execute code. The console application can be closed by entering ```exit``` or pressing ```CTRL+C```. + +## Command Line Tool *interpreter* + +The **interpreter** loads and executes a script stored in the specified source file. + +```sh +interpreter script.txt +``` + +## Command Line Tool *compiler* & *runtime* + +The **compiler** loads a source file and generates byte-code, that can be executed by the **runtime**. + +```sh +compiler script.txt bytecode.code + +runtime bytecode.code +``` + +## Command Line Tool *transcompiler* + +The **transcompiler** loads a source file and generates equivalent code in another programming language. +It also performs code optimisations. + + +```sh +transcompiler script.txt python_script.py py +``` + +The arguments are: + +* Path to the script source file. +* Path to the target file to create. +* The target language. + +Supported targets are: + +* ```py```: Python using the ```text_conversion``` module. +* ```cpp```: C++ using the C++ library. +* ```linux_x86_64```:Linux x86 64 assembly using the C library. +* ```i```: The intermediate representation. + +# Java Command Line Tool + +Execute the command line tool (JAR) like this: + +```sh +java -jar text_conversion.jar "this is a headline" +``` + +Make sure the ```libjava_text_conversion``` shared library can be found by Java. Set the command line argument ```java.library.path``` if needed. + +# Jupyter Notebook + +Start the Jupyter Notebook by simply running the ```start_notebook.sh``` script. It will start the notebook server with the notebook selected. + +# Assembly Command Line Tool + +The command line tool written in assembly is used like this: + +```shell +title_case "this is a headline" + +# will print +# Input: this is a headline +# Output: This Is a Headline +```