Skip to content

leo4048111/LameCC

Repository files navigation

LameCC

A lame c compiler which implements a basic lexer, an LR(1) parser ,a recursive descent parser and LLVM IR generator.

Download this project

git clone --recurse-submodules https://github.com/leo4048111/LameCC

Basic features

  • Lexer ☑
  • LR(1) Parser ☑
  • Recursive Descent Parser ☑
  • Semantic Analysis ☑
  • Intermediate Code Generator(in both Quaternion and LLVM IR forms) ☑
  • Code Optimization ☑
  • Assembly Generator ☑

Miscellaneous features

  • Prettified json dump
  • Log info/error
  • Visualized LR(1) canonical collection, ACTION GOTO table and LR(1) parsing process
  • Other features

Build prerequisites

  • OS: Windows or GNU/Linux
  • Cmake version >= 3.8
  • Installed LLVM libraries and cpp headers, make sure you have set CMAKE_PREFIX_PATH or LLVM_DIR env variable to LLVM directory properly
  • If you are running Windows and have installed MinGW64, simply run build.bat

Parsing capability

  • function definitions/local & extern declarations
  • int/float/char var declaration/definition
  • if-else statement
  • int/float/void/char function declaration/definition
  • while statement
  • value statement(complex expression, function call, etc...)
  • return statement
  • GCC dialect __asm__ statement

Usage

Example input source file(see ./testcases/test.cpp):

extern "C" int putchar(char a);
extern "C" int puts(char *a);

void putInt(int i)
{
    if (i < 0)
    {
        putchar('-');
        i = -i;
    }
    if (i >= 10)
        putInt(i / 10);

    putchar(i % 10 + '0');
}

// nonvoid return type function decl with params
int NonVoidFuncDeclWithParams(int parm1, int parm2);

// nonvoid return type function decl without params
char NonVoidFuncDeclWithoutParams();

// nonvoid return type function definition with params
float NonVoidFuncDefWithoutParamsWithEmptyBody()
{
    return 0xAF.D65P-5; // some float representations
}

// nonvoid return type function definition with params
int NonVoidFuncDefWithParamsWithEmptyBody(int param1, int param2)
{
    return 0;
}

// void return type function decl with params
void VoidFuncDeclWithParams(int parm1, int parm2);

// nonvoid return type function decl without params
void VoidFuncDeclWithoutParams();

// void return type function definition with params with empty body
void VoidFuncDefWithoutParamsWithEmptyBody()
{
}

// void return type function definition with params with empty body
void VoidFuncDefWithParamsWithEmptyBody(int param1, int param2)
{
}

void integerLiteralTest()
{
    puts("Integer literal test:");
    int a = 0;
    while(a < 100) {
        putInt(a);
        putchar(' ');
        a++;
    }
    putchar('\n');
}

void arrayTest()
{
    puts("Array test:");
    int a[10];
    int idx = 0;
    puts("Before sorted:");
    while(idx < 10)
    {
        a[idx] = 10 - idx;
        putInt(a[idx]);
        putchar(' ');
        idx++;
    }
    puts("");
    puts("Bubble sorted:");
    int i = 0;
    while(i < 10)
    {
        int j = i;
        while(j < 10)
        {
            if (a[i] > a[j])
            {
                int tmp = a[i];
                a[i] = a[j];
                a[j] = tmp;
            }
            j = j + 1;
        }
        i++;
    }

    i = 0;
    while(i < 10)
    {
        putInt(a[i]);
        putchar(' ');
        i = i + 1;
    }

    puts("");
}

int inlineAsmTest(int num1, int num2)
{
    int result = 0;
    __asm__ ("movl %1, %%eax;"
             "movl %2, %%ebx;"
             "addl %%ebx, %%eax;"
             "movl %%eax, %0;"
             :"=r"(result)
             :"r"(num1), "r"(num2)
             :);

    return result;
}

int main()
{
    // external function linkage test
    puts("------------------------------------------------------------");
    puts(".____                          _________ _________"); 
    puts("|    |   _____    _____   ____ \\_   ___ \\\\_   ___ \\"); 
    puts("|    |   \\__  \\  /     \\_/ __ \\/    \\  \\//    \\  \\/");
    puts("|    |___ / __ \\|  Y Y  \\  ___/\\     \\___\\     \\____");
    puts("|_______ (____  /__|_|  /\\___  >\\______  /\\______  /");
    puts("------------------------------------------------------------");

    // run integer literal test
    integerLiteralTest();

    // run array test
    arrayTest();

    // run inline asm test(which is a asm implementation of sum function)
    puts("Inline asm test:");
    int res = inlineAsmTest(1, 2);
    putInt(res);
}

Command options:

PS D:\Projects\CPP\Homework\LameCC\build> .\LameCC.exe -?
Usage:
  LameCC.exe <input file> [options]
Available options:
  -?, --help         show all available options
  -o, --out          set output file path
  -T, --dump-tokens  dump tokens in json format
  -A, --dump-ast     dump AST Nodes in json format
      --LR1          specify grammar with a json file and use LR(1) parser
      --log          print LR(1) parsing process

Run command:

PS D:\Projects\CPP\Homework\LameCC\build> .\LameCC.exe ../testcases/test.cpp

Token dump:

[
  {
    "id": 1,
    "type": "TOKEN_KWINT",
    "content": "int",
    "position": [
      2,
      1
    ]
  },
  {
    "id": 2,
    "type": "TOKEN_IDENTIFIER",
    "content": "NonVoidFuncDeclWithParams",
    "position": [
      2,
      5
    ]
  },
  {
    "id": 3,
    "type": "TOKEN_LPAREN",
    "content": "(",
    "position": [
      2,
      30
    ]
  },
  {
    "id": 4,
    "type": "TOKEN_KWINT",
    "content": "int",
    "position": [
      2,
      31
    ]
  },
  ...

AST dump:

{
  "type": "TranslationUnitDecl",
  "children": [
    {
      "type": "FunctionDecl",
      "functionType": "int(int, int)",
      "name": "NonVoidFuncDeclWithParams",
      "params": [
        {
          "type": "ParmVarDecl",
          "name": "parm1"
        },
        {
          "type": "ParmVarDecl",
          "name": "parm2"
        }
      ],
      "body": "empty"
    },
    {
      "type": "FunctionDecl",
      "functionType": "char()",
      "name": "NonVoidFuncDeclWithoutParams",
      "params": [],
      "body": "empty"
    },
    {
      "type": "FunctionDecl",
      "functionType": "float()",
      "name": "NonVoidFuncDefWithoutParamsWithEmptyBody",
      "params": [],
      "body": [
        {
          "type": "CompoundStmt",
          "children": [
            {
              "type": "ReturnStmt",
              "value": [
                {
                  "type": "FloatingLiteral",
                  "value": "5.494911"
                }
              ]
            }
          ]
        }
      ]
    },
    ...

LLVM IR:

; ModuleID = 'LCC_LLVMIRGenerator'
source_filename = "LCC_LLVMIRGenerator"

declare i32 @NonVoidFuncDeclWithParams(i32, i32)

declare i8 @NonVoidFuncDeclWithoutParams()

define float @NonVoidFuncDefWithoutParamsWithEmptyBody() {
entry:
  %retVal = alloca float, align 4
  store float 0x4015FACA00000000, ptr %retVal, align 4
  br label %return

return:                                           ; preds = %entry
  %0 = load float, ptr %retVal, align 4
  ret float %0
}

define i32 @NonVoidFuncDefWithParamsWithEmptyBody(i32 %param1, i32 %param2) {
entry:
  %param22 = alloca i32, align 4
  %param11 = alloca i32, align 4
  %retVal = alloca i32, align 4
  store i32 %param1, ptr %param11, align 4
  store i32 %param2, ptr %param22, align 4
  store i32 0, ptr %retVal, align 4
  br label %return

return:                                           ; preds = %entry
  %0 = load i32, ptr %retVal, align 4
  ret i32 %0
}

declare void @VoidFuncDeclWithParams(i32, i32)

declare void @VoidFuncDeclWithoutParams()

define void @VoidFuncDefWithoutParamsWithEmptyBody() {
entry:
  br label %return

return:                                           ; preds = %entry
  ret void
}

define void @VoidFuncDefWithParamsWithEmptyBody(i32 %param1, i32 %param2) {
entry:
  %param22 = alloca i32, align 4
  %param11 = alloca i32, align 4
  store i32 %param1, ptr %param11, align 4
  store i32 %param2, ptr %param22, align 4
  br label %return

return:                                           ; preds = %entry
  ret void
}

define i32 @main() {
entry:
  %mid = alloca i32, align 4
  %target = alloca i32, align 4
  %right = alloca i32, align 4
  %left = alloca i32, align 4
  %retVal = alloca i32, align 4
  store i32 0, ptr %left, align 4
  store i32 100, ptr %right, align 4
  %calltmp = call i32 @NonVoidFuncDefWithParamsWithEmptyBody(i32 99, i32 100)
  %remtmp = srem i32 %calltmp, 2
  %addtmp = add i32 %remtmp, 5
  %0 = load i32, ptr %right, align 4
  %1 = load i32, ptr %left, align 4
  %multmp = mul i32 %0, %1
  %subtmp = sub i32 %addtmp, %multmp
  store i32 %subtmp, ptr %target, align 4
  br label %while.cond

while.cond:                                       ; preds = %if.end, %entry
  %2 = load i32, ptr %left, align 4
  %3 = load i32, ptr %right, align 4
  %lttmp = icmp slt i32 %2, %3
  br i1 %lttmp, label %while.body, label %while.end

while.body:                                       ; preds = %while.cond
  %4 = load i32, ptr %left, align 4
  %5 = load i32, ptr %right, align 4
  %addtmp1 = add i32 %4, %5
  %divtmp = sdiv i32 %addtmp1, 2
  store i32 %divtmp, ptr %mid, align 4
  %6 = load i32, ptr %mid, align 4
  %7 = load i32, ptr %target, align 4
  %eqtmp = icmp eq i32 %6, %7
  br i1 %eqtmp, label %if.body, label %if.else

if.body:                                          ; preds = %while.body
  %8 = load i32, ptr %mid, align 4
  store i32 %8, ptr %retVal, align 4
  br label %if.end

if.else:                                          ; preds = %while.body
  %9 = load i32, ptr %mid, align 4
  %10 = load i32, ptr %target, align 4
  %lttmp2 = icmp slt i32 %9, %10
  br i1 %lttmp2, label %if.body5, label %if.else4

if.body5:                                         ; preds = %if.else
  %11 = load i32, ptr %mid, align 4
  %addtmp6 = add i32 %11, 1
  %12 = load i32, ptr %left, align 4
  store i32 %addtmp6, ptr %left, align 4
  br label %if.end3

if.else4:                                         ; preds = %if.else
  %13 = load i32, ptr %mid, align 4
  %14 = load i32, ptr %right, align 4
  store i32 %13, ptr %right, align 4
  br label %if.end3

if.end3:                                          ; preds = %if.else4, %if.body5
  br label %if.end

if.end:                                           ; preds = %if.end3, %if.body
  br label %while.cond

while.end:                                        ; preds = %while.cond
  %15 = load i32, ptr %left, align 4
  store i32 %15, ptr %retVal, align 4
  br label %return

return:                                           ; preds = %while.end
  %16 = load i32, ptr %retVal, align 4
  ret i32 %16
}

LR(1) Canonical Collections(The following statements will be printed if --log and --LR1 options are set during startup): image
ACTION GOTO Table:
image
Parsing Process:
image

Credit

About

A lame c compiler which implements a basic lexer, an LR(1) parser and a recursive descent parser.

Topics

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages