From 47a2b38f130139f4e48493cf9df4834903c80ff0 Mon Sep 17 00:00:00 2001 From: Tristan Date: Sat, 22 Jul 2023 17:31:56 -0400 Subject: [PATCH] tmp --- .vscode/c_cpp_properties.json | 19 +++++ .vscode/settings.json | 4 +- Makefile | 2 +- build/main | Bin 33681 -> 33985 bytes src/common/types.h | 6 ++ src/lexer/lexer.c | 132 ++++++++++++++++++++++++++++- src/lexer/lexer.h | 16 +++- src/lexer/{ => token}/token.h | 2 +- src/lexer/{ => token}/token_type.h | 1 + src/lexer/types.h | 6 -- src/main.c | 11 ++- src/main.simpl | 1 + src/utils/ints.c | 18 ++++ src/utils/ints.h | 15 ++++ src/utils/strings.c | 42 +++++++++ src/utils/strings.h | 25 ++++++ 16 files changed, 283 insertions(+), 17 deletions(-) create mode 100644 .vscode/c_cpp_properties.json create mode 100644 src/common/types.h rename src/lexer/{ => token}/token.h (90%) rename src/lexer/{ => token}/token_type.h (98%) delete mode 100644 src/lexer/types.h create mode 100644 src/main.simpl create mode 100644 src/utils/ints.c create mode 100644 src/utils/ints.h create mode 100644 src/utils/strings.c create mode 100644 src/utils/strings.h diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..834113c --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,19 @@ +{ + "configurations": [ + { + "name": "Mac", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [], + "macFrameworkPath": [ + "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks" + ], + "compilerPath": "/usr/bin/clang", + "cStandard": "c17", + "cppStandard": "c++17", + "intelliSenseMode": "macos-clang-arm64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index d8cb326..7ae686b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,7 @@ { "files.associations": { - "string": "cpp" + "string": "cpp", + "token.h": "c", + "token_type.h": "c" } } \ No newline at end of file diff --git a/Makefile b/Makefile index 3cdc525..b080298 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: output execute output: - gcc src/main.c -o build/main + gcc src/main.c src/utils/ints.c src/utils/strings.c src/lexer/lexer.c -o build/main execute: ./build/main \ No newline at end of file diff --git a/build/main b/build/main index 013ca82cdb0db22fe1225cfcd03934c4a1cf265c..5a947f3b693775ebaa06dce977ce1fc2c0082856 100755 GIT binary patch literal 33985 zcmeI5e{56N702)Mi-|)Z2@pt^AH=9o9B6@!c9b7U9)zrcQIb*G(FAyy#1IpcgxDF7 zFwMoF!pc_MMe2Skp<}8|>BOpaO(s!-qK$Q;8(|ZS{ZR$99}bAEqpN0jO1X2; zteUp{cdl~%?m73K^UnR;^IrbC_x<{#+aFI7LJ=qy)G4U#8A2QucC>`}6lx8suGg)6 zqPo6%LoI1ji!@GE-6N5oBN(Xb8>=^MOzoETTB=T%vK4Bo2uT)Q_qq1@OaYg4f7;{5 zWZs`(j3JrkHC{4=f4q%`u6uo*+r8tebbpH~4Sy@l5hN%3vzh+DP!?Tp^!hs7tvko% z)cJZ$fAh?Kl9NlaZ_?{}XX{>fYm@G7-O(oHRDa(z{WY6TNKW@*cF7B^Lj+aVSFhZ- zQm?9A|CF&BnwbQ+%93_fp`=Q_^H6ntXIs)T+}9uQ8$~nh6Cy1St!tuyTU)F5uE+HF z9@mWc=-yOFrg-jG?&j^fHS0fLy?Tu?OF^rjl5dcyoHrAU`eeV<3h^i`9aH5;`KHM!UzP|ZwMrnT!}pU9=;t{ezFrqP zac^JPwBOSNnMx5V#o_Auhtuv{?s)S>+q*YUW?%gbbaqr@PLly`vTRFs%Cjovp`0JC zR2-NB`Jv|~xzK${MSYY5YSUaSpK>=A#PigCug|r6!Kwv_%e@_gP#!`h|8?L)*D_Nk zUQ|j@Nxvu{M9>J5)LfVV6JP>NfC(@GCcp%k025#WOn?b60Vco%m;e)C0!)AjFaajO z1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b60Vco%m;e*_9}tME%J5jX z8Xg-G!E-~U{n+1h-1ejLU`Q>E_NjXGT_Ji#az!j&AO^6#CqGi)AF$05e~T>XEwow0 z5~<78Vxy#=lJ$bkinc0(4n@Qe-f{7}^5AJj^cc3mlfUQhZ zVQ=^u5}1=EJQfhazCda8dNyKF{Fk3c93z<`))&yB(_%+SRz*zuP_>v%!MqB33u_ef zUeJwZYB9$g5u@?gT6Ot=Y4dl)KZ3ro?p5J9*xeKVVMWFd4hAENdlWjvU1rY7k>E#t zR%5)JZ)Vzj-G*HRdKqh`HgLbWk9D65kAZ(YpbTRz&)BkPURo>!4krRLqp!$yof)+$ z{tcm_lK#-pmVR0jTC=cP63x1O;5TLEB3O3&z)NAZD2lyZXYwqVyQT8VDYYR=v&VXRy7oN%98v|i0-Ze(F?LV@~d zDPlW^ILErrhvd2hkXOjz2=bIzfL+XiHHhSzJag~n^e+l@!)~ErR}8xb(+-;@r(O6s{_>zIK1=H_h@sSo;h*re zG%1EFki~?w7z*x<;bK}03n+$Zh@k*6OfzCwNHM^cVxTt2s{ zuPWcYr2nKk8!^iF*#vr@m0pqUoTT>Dr1p%YcA?SsxP10~_Hz4UVq;s2t96-uj(4K4 zW6N!sC>Qg^BVwKa4vfM-KExu5QE9L3Y*Z?AB^f0Hn`SMvN0;`cQ~k8LkDS`Hw|5eD z89SsGA&$Y_Z=qYK2;~wS|G^?G8)=X|;V#cl_rp$Hzu`a6FZTyS&M@`HUhO=;v8~>2 zUWIruZU4F)&Oc0m2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b60Vco%m;e)C0!)Aj zFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b60Vco%m;e)C0!)Aj zFaajO1em~oOu!BcoCP5~sPt`e<17F)P>lWmMw`C-xpJ^A{yzL&hPjtfE)#~nYi7r(f^WuyJr>DKyDJtM!d_ff#_uxc@!FK$d{1Q6b z+C@XBw^`qgxJA1vj{DHl(?LIb1;!myYW5vcdMxcSBnl;0!ATJ)#B~O0E*i&hpa#iy zQ@)6^97uMUGM()}@&Z$yz1)!T{dsAhx6F_$OgZ25S8d8Y=J;C3@I$U-*@oX_xg$+J zoF;!GP5w@r{QWc;=XZ!~RQg^#zBVuDyY)DyM35fmbKKKs$MW90PCC}fSOjX(I$+r; zU)n)mZy(368!M=fui*bZ!#;X%(jW*1F_i?I9P-7%PPb$GbNQoG3-Kk(xNU59^v_Jx|G55M|J{dX>wXz!d?m(AKWLFt~o{L#18 zZpo{g{=-$nPh@}2`gYgTYo5%RAG~w*P{YAL{c2{<1D4hmow1XD%?S4zPzZ{@A?T$u5b-mm9WO@8~K`Zso#UHf3lzW}ZpTDAZH delta 1508 zcmaJ>Z%kWN6hHUf779ZOj#>U}QbCc;I9z51q*5O1Oe7|-saY`5y>(D*0~9E)$)J+9 z)W4e_`gWU)7!!!mq;(&}>`RG8A=?Kf)1-V@@K0iA7H!Gm@Lz0ANIPiHLDjwo<&((8q(_A_Ws0sre;z$clv8_Il(tr-x9hggowjB#gpXZqVzt87Qi&VJ;yPJdx*j2DWtEyN8E!EdSdh5D3M6bFiN`?M>Qrl4T4 zS)61Tu3MESHIS%Hiu3$Y%ySp>=X}Rw`D?7Qv)-RK!g8SoBlfJNHqfG3a66_p5QN29 zyrseNY1UwRegdgIViqTgEM{X^&L+mRP9MsY?DS_8)%FnU=Uw>1Dfw^Wxi3`-Vitr( zRW2!#%T;;UZ633`+^q-AKC{Omdp&OR6BhF*sdoQMQvg-5NE#+;JlykpQSJ0z#WKIHI#p0@;+33yr_A91){ zGY&}}m+T&oeHvU8HfFHIKVQXn$ur^>`b>e?k8*gW%Y z->X-sFL#}N^!4zEcdV1%m*@M3D_f+k zH!3%NG|qY|)0cnO$KrK8dk=4}t_8nLEM2MTHGNRm@^@MK!skiW^V(T+ +#include + +/** + * @brief Pushes a token to the back of a token array. + * + * @param tokens The token array. + * @param token The token to push. + */ +void push_back(Token tokens[], Token token) +{ + tokens[sizeof(tokens) + 1] = token; +} + +/** + * @brief Pops a token from the front of a token array. + * + * @param tokens The token array. + * @return Token The popped token. + */ +string pop(string *tokens) +{ + string res = tokens[0]; + for (int i = 1; i < sizeof(tokens); i++) + { + tokens[i - 1] = tokens[i]; + } + return res; +} + +/** + * @brief Tokenizes a string. + * + * @param src The string to tokenize. + * @return const Token* The tokens. + */ +Token *tokenize(string src) +{ + // The list of tokens + Token tokens[] = {}; + + // Split the src + string *split_src = split(src, " "); + + // Build each token + while (sizeof(split_src) > 0) + { + // Pop the token + string token = pop(split_src); + + // Check the token type + if (token == ' ' || token == '\n' || token == '\t') + { + continue; + } + else if (token == '(') + { + push_back(tokens, (Token){TOKEN_TYPE_LEFT_PAREN, "("}); + } + else if (token == ')') + { + push_back(tokens, (Token){TOKEN_TYPE_RIGHT_PAREN, ")"}); + } + else if (token == '=') + { + push_back(tokens, (Token){TOKEN_TYPE_EQUAL, "="}); + } + else if (token == '+') + { + push_back(tokens, (Token){TOKEN_TYPE_PLUS, "+"}); + } + else if (token == '-') + { + push_back(tokens, (Token){TOKEN_TYPE_MINUS, "-"}); + } + else if (token == '*') + { + push_back(tokens, (Token){TOKEN_TYPE_MULTIPLY, "*"}); + } + else if (token == '/') + { + Token token = {TOKEN_TYPE_DIVIDE, "/"}; + push_back(tokens, token); + } + else + { + if (is_int(token)) + { + string num = ""; + while (sizeof(split_src) > 0 && is_int(split_src[0])) + { + string n = pop(split_src); + num = strcat(num, n); + } + push_back(tokens, (Token){TOKEN_TYPE_NUMBER, num}); + } + else if (is_alpha(token)) + { + string str = ""; + while (sizeof(split_src) > 0 && is_alpha(split_src[0])) + { + string n = pop(split_src); + str = strcat(str, n); + } + if (token == "let") + { + push_back(tokens, (Token){TOKEN_TYPE_LET, "let"}); + } + else + { + push_back(tokens, (Token){TOKEN_TYPE_IDENTIFIER, str}); + } + } + else + { + return tokens; + } + } + } + + return tokens; +} #endif // LEXER_C \ No newline at end of file diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index 9bc37ce..cf4cefe 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -1,5 +1,15 @@ -#ifndef LEXER_h -#define LEXER_h +#ifndef LEXER_H +#define LEXER_H +#include "token/token.h" +#include "../common/types.h" -#endif // LEXER_h \ No newline at end of file +/** + * @brief Tokenizes a string. + * + * @param src The string to tokenize. + * @return const Token* The tokens. + */ +Token *tokenize(string src); + +#endif // LEXER_H \ No newline at end of file diff --git a/src/lexer/token.h b/src/lexer/token/token.h similarity index 90% rename from src/lexer/token.h rename to src/lexer/token/token.h index ff791a9..5b3df39 100644 --- a/src/lexer/token.h +++ b/src/lexer/token/token.h @@ -2,7 +2,7 @@ #define TOKEN_H #include "token_type.h" -#include "types.h" +#include "../../common/types.h" /** * @brief A token is a pair of a token type and a value. diff --git a/src/lexer/token_type.h b/src/lexer/token/token_type.h similarity index 98% rename from src/lexer/token_type.h rename to src/lexer/token/token_type.h index 66737b8..30003cf 100644 --- a/src/lexer/token_type.h +++ b/src/lexer/token/token_type.h @@ -14,6 +14,7 @@ typedef enum TokenType { TOKEN_TYPE_EOF, + TOKEN_TYPE_LET, TOKEN_TYPE_NUMBER, TOKEN_TYPE_PLUS, TOKEN_TYPE_MINUS, diff --git a/src/lexer/types.h b/src/lexer/types.h deleted file mode 100644 index f160bac..0000000 --- a/src/lexer/types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef TYPES_H -#define TYPES_H - -typedef char* string; - -#endif // TYPES_H \ No newline at end of file diff --git a/src/main.c b/src/main.c index 72b9b43..ee80b0b 100644 --- a/src/main.c +++ b/src/main.c @@ -1,7 +1,14 @@ #include +#include "lexer/lexer.h" +#include "common/types.h" + int main(void) { - printf("Hello, world!\n"); - return 0; + string src = "let x = 5"; + Token *tokens = tokenize(src); + for (int i = 0; i < sizeof(tokens); i++) + { + printf("Token: %s\n", tokens[i].value); + } } \ No newline at end of file diff --git a/src/main.simpl b/src/main.simpl new file mode 100644 index 0000000..edacaa6 --- /dev/null +++ b/src/main.simpl @@ -0,0 +1 @@ +let x = 45 \ No newline at end of file diff --git a/src/utils/ints.c b/src/utils/ints.c new file mode 100644 index 0000000..78aa8ab --- /dev/null +++ b/src/utils/ints.c @@ -0,0 +1,18 @@ +#ifndef UTILS_INTS_C +#define UTILS_INTS_C + +#include + +/** + * @brief Checks if a character is an integer. + * + * @param c The character to check. + * @return true If the character is an integer. + * @return false If the character is not an integer. + */ +bool is_int(char c) +{ + return c >= '0' && c <= '9'; +} + +#endif // UTILS_INTS_C \ No newline at end of file diff --git a/src/utils/ints.h b/src/utils/ints.h new file mode 100644 index 0000000..f4a92cf --- /dev/null +++ b/src/utils/ints.h @@ -0,0 +1,15 @@ +#ifndef UTILS_INTS_H +#define UTILS_INTS_H + +#include + +/** + * @brief Checks if a character is an integer. + * + * @param c The character to check. + * @return true If the character is an integer. + * @return false If the character is not an integer. + */ +bool is_int(char c); + +#endif // UTILS_INTS_H \ No newline at end of file diff --git a/src/utils/strings.c b/src/utils/strings.c new file mode 100644 index 0000000..9c396f4 --- /dev/null +++ b/src/utils/strings.c @@ -0,0 +1,42 @@ +#ifndef UTILS_STRINGS_H +#define UTILS_STRINGS_H + +#include "../common/types.h" + +#include +#include +#include + +/** + * @brief Splits a string by a delimiter. + * + * @param src The string to split. + * @param delim The delimiter to split by. + * @return string* The split string. + */ +string *split(string src, string delim) +{ + string *res = malloc(sizeof(string)); + string token = strtok(src, delim); + + while (token != NULL) + { + res[sizeof(res) + 1] = token; + token = strtok(NULL, delim); + } + return res; +} + +/** + * @brief Checks if a character is a digit. + * + * @param c The character to check. + * @return true If the character is a digit. + * @return false If the character is not a digit. + */ +bool is_alpha(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +#endif // UTILS_STRING_H \ No newline at end of file diff --git a/src/utils/strings.h b/src/utils/strings.h new file mode 100644 index 0000000..908d3f4 --- /dev/null +++ b/src/utils/strings.h @@ -0,0 +1,25 @@ +#ifndef UTILS_STRINGS_H +#define UTILS_STRINGS_H + +#include "../common/types.h" +#include + +/** + * @brief Splits a string by a delimiter. + * + * @param src The string to split. + * @param delim The delimiter to split by. + * @return string* The split string. + */ +string *split(string src, string delim); + +/** + * @brief Checks if a character is a digit. + * + * @param c The character to check. + * @return true If the character is a digit. + * @return false If the character is not a digit. + */ +bool is_alpha(char c); + +#endif // UTILS_STRING_H \ No newline at end of file