(print Haplolang is an imperative, interpreted programming language inspired by Emacs Lisp.)

> (print "Hello, World!")
"Hello, World!"
empty

(print Here are some examples!)

> (setq 'hello "Hello, World!")
"Hello, World!"
> hello
"Hello, World!"

> (defunc 'test (+ 2 3))
empty
> test
5

$ cat samples/if_condition_true.haplo
(if (> 100 5)
    (print "Correct!")
    (print "Wrong :("))
$ ./haplo samples/if_condition_true.haplo
"Correct!"
empty

> (list 1 2 3)
list: 1 2 3
> (append 4 (list 1 2 3 ))
list: 1 2 3 4
> (head (list 1 2 3 ))
3
> (tail (list 1 2 3))
list: 1 2

$ cat samples/while_loop.haplo
(
 (setq 'a 0)
 (while (< (a) 20)
   (
    (print (a))
    (setq 'a
          (+ (a) 1)))
   )
 )
$ ./haplo samples/while_loop.haplo
0
1
2
3
...

(print Design and implementation)

(print The design and implementation of Haplolang is quite simple and elegant. In this document I will give you an high-level description of the inner workings of the language. My hope is that, after reading this document, you will have a better idea on how to implement a similar thing yourself, and to demystify the idea that writing programming languages is really hard.)

(print Haplolang is greatly inspired by Emacs Lisp, since it is the Lisp language I use more and I am more familiar with.)

(print Lexer)

(print The syntax of any language in the Lisp family is notoriously minimalist, with only a few syntax characters namely (, ), #, ', ". The lexer looks for these characters and returns a corresponding enum value. If the lexer does not find any of them, then we are lexing an atom which is either a string, integer, float, bool or symbol. The lexer returns the length of the atom and its type to the parser.)

typedef enum {
  HAPLO_LEX_OPEN = 0,  // '('
  HAPLO_LEX_CLOSE,     // ')'
  HAPLO_LEX_ATOM,      // see struct HaploAtom in atom.h
  HAPLO_LEX_COMMENT,   // '#'
  HAPLO_LEX_QUOTE,     // '''
  HAPLO_LEX_NONE,
  _HAPLO_LEX_MAX,
} HaploToken;

// On success, returns a non-negative integer representing an
// HaploToken.  On failure, returns a negative integer with the error
// value.  If token_len is specified, its value will be set to the
// length of the token. If atom is specified and the type of token is
// ATOM, a new Atom will be allocated and the user will be responsible
// to free it using haplo_atom_free. If token_char is non null, it
// will be used to map the HaploToken enum to a char, otherwise a
// default HaploTokenChar will be used instead
// (haplo_default_token_char).
int haplo_lexer_next_token(char* input, int input_size,
                           int* token_len, HaploAtom *atom,
                           HaploTokenChar token_char);

typedef enum {
  HAPLO_ATOM_STRING = 0,    // "Hello World"
  HAPLO_ATOM_INTEGER,       // 69, -420
  HAPLO_ATOM_FLOAT,         // 69.420
  HAPLO_ATOM_BOOL,          // true, false
  HAPLO_ATOM_SYMBOL,        // print, +
  HAPLO_ATOM_QUOTE,         // 'test
  _HAPLO_ATOM_MAX,
} HaploAtomType;

typedef struct {
  HaploAtomType type;
  union {
    char* string;
    long int integer;
    double floating_point;
    bool boolean;
    char* symbol;
    char* quote;
  };
} HaploAtom;

(print Parser)

(print The parser is responsible to build the AST, which is represented as a binary tree of expressions. An expression contains either an atom or both an head and tail expressions.)

struct HaploExpr {
  bool is_atom;
  union {
    HaploAtom atom;
    struct {
      HaploExpr* head;
      HaploExpr* tail;
    };
  };
};

(print When an atom is lexed, the head of the current expressions is set with the atom, and the parser moves to the tail of the expression. If an open parentheses is encountered, then the current head is set as a new expression and the parser moves to this expression. Additionally, the parser ensures that for every open parentheses there is a closing one.)

HaploExpr *haplo_parser_parse_rec(HaploParser *parser)
{
  if (parser == NULL) return NULL;
  parser->error = 0;

  HaploExpr *expr = calloc(sizeof(HaploExpr), 1);
  
  // Head expression
  
  if (haplo_parser_peek_next_token(parser) < 0)
  {
    haplo_expr_free(expr);
    if (parser->error == -HAPLO_ERROR_LEXER_END_OF_INPUT)
      return NULL;
    
    HAPLO_PARSER_ERROR();
  }
  
  switch (parser->last_token)
  {
  // Cases...
  }

  // Optional tail expression
  expr->tail = haplo_parser_parse_rec(parser);
  return expr;
}

(print Interpreter)

(print The interpreter always evaluates the head with the tail set as argument. When a non symbol is evaluated, it just returns a copy of itself, otherwise the symbol is looked for in a symbol table which contains either a haplo function (AST), a c function, or a variable (which is a value).)

HaploValue haplo_interpreter_interpret(HaploInterpreter *interpreter,
                                       HaploExpr *expr)
{
  if (interpreter == NULL)
  {
    return (HaploValue) {
      .type = HAPLO_VAL_ERROR,
      .error = -HAPLO_ERROR_INTERPRETER_NULL,
    };
  }
  if (expr == NULL)
  {
    return (HaploValue) {
      .type = HAPLO_VAL_EMPTY,
    };
  }

  if (expr->is_atom)
    return haplo_interpreter_eval_atom(expr->atom); // Returns a copy of itself

  HaploValue out_val;
  HaploValue func = haplo_interpreter_interpret(interpreter, expr->head);

  // Handle Special symbols
  // ...
  
  HaploValueList *args = haplo_interpreter_interpret_tail(interpreter, expr->tail);

  out_val = haplo_interpreter_call(interpreter, func, args);

  haplo_value_free(func);
  haplo_value_list_free(args);
  return out_val;
}


HaploValueList *haplo_interpreter_interpret_tail(HaploInterpreter *interpreter,
                                                   HaploExpr *expr)
{
  if (interpreter == NULL || expr == NULL)
    return NULL;

  HaploValue head = haplo_interpreter_interpret(interpreter, expr->head);
  HaploValueList *tail = haplo_interpreter_interpret_tail(interpreter, expr->tail);

  if (head.type == HAPLO_VAL_SYMBOL)
  {
    head = haplo_interpreter_call(interpreter, head, tail);
    haplo_value_list_free(tail);
    return haplo_value_list_push_front(head, NULL);
  }

  return haplo_value_list_push_front(head, tail);
}

// Should not free args here
HaploValue haplo_interpreter_call(HaploInterpreter *interpreter,
                                  HaploValue value,
                                  HaploValueList* args)
{
  if (interpreter == NULL)
  {
    return (HaploValue) {
      .type = HAPLO_VAL_ERROR,
      .error = -HAPLO_ERROR_INTERPRETER_NULL,
    };
  }

  if (value.type != HAPLO_VAL_SYMBOL)
  {
    return haplo_value_deep_copy(value);
  }

  HaploSymbol symbol;
  int err = haplo_symbol_map_lookup(interpreter->symbol_map,
                                    value.symbol,
                                    &symbol);
  if (err < 0)
  {
    return (HaploValue) {
      .type = HAPLO_VAL_ERROR,
      .error = -HAPLO_ERROR_INTERPRETER_UNKNOWN_SYMBOL,
    };
  }

  switch(symbol.type)
  {
  case HAPLO_SYMBOL_C_FUNCTION:
    return symbol.c_func.run(interpreter, args);
  case HAPLO_SYMBOL_FUNCTION:
    return haplo_interpreter_interpret(interpreter, symbol.func);
  case HAPLO_SYMBOL_VARIABLE:
    return symbol.var;
  default:
    break;
  }
  return (HaploValue) {
    .type = HAPLO_VAL_ERROR,
    .error = -HAPLO_ERROR_INTERPRETER_UNKNOWN_SYMBOL_TYPE,
  };
}

typedef struct {
  HaploSymbolList **_map;
  int capacity;
} HaploSymbolMap;

typedef struct {
  HaploSymbolKey key;
  struct HaploSymbolList *next;
  HaploSymbol val;
} HaploSymbolList;

typedef char* HaploSymbolKey;

typedef enum {
  HAPLO_SYMBOL_C_FUNCTION = 0,   // stdlib
  HAPLO_SYMBOL_FUNCTION,
  HAPLO_SYMBOL_VARIABLE,
  _HAPLO_SYMBOL_MAX,
} HaploSymbolType;

typedef struct {
  HaploSymbolType type;
  union {
    HaploFunction c_func;    // function implemented in c
    HaploExpr* func;         // function defined as an AST
    HaploValue var;          // a variable
  };
} HaploSymbol;

(print If, While, defunc are special symbols for the interpreter as they handle the arguments differently. For example, the if statement evaluates one expression or the other depending on the outcome of a condition. If we implemented if as a regular function then all of its arguments would be evaluated first therefore executing both outcomes. For this reason, we need to walk the AST differently for certain functionalities.)

(print Standard Library)

(print By default, the symbol table in the interpreter is initialized by default with some functions from the standard library, the code of these functions lives under the stdlib directory. These functions are implemented in C and are automatically registered via a macro. This macro uses a compiler directive to register a constructor function, this may not be the best choice since it relies on a compiler specific flag, but I thought it would not be an issue for now.)

(print The standard library implements things like integer and floating point operations and comparisons, logical operators, lists and printing)

#define HAPLO_STD_FUNC(fn)    \
  HAPLO_STD_FUNC_STR(fn, #fn)

#define HAPLO_STD_FUNC_STR(fn, func_string)    \
    HaploValue __haplo_std_##fn(HaploInterpreter *, HaploValueList *); \
    __attribute__((constructor)) static void __haplo_std_register_##fn(void) \
    {                                                \
      if (__haplo_std_symbol_map._map == NULL) \
      { \
        haplo_symbol_map_init(&__haplo_std_symbol_map, \
                              HAPLO_INTERPRETER_SYMBOL_MAP_CAPACITY); \
      } \
      haplo_symbol_map_update(&__haplo_std_symbol_map,\
                              func_string,            \
                              (HaploSymbol){ \
                                .type = HAPLO_SYMBOL_C_FUNCTION,  \
                                .c_func = (HaploFunction) {\
                                  .run = __haplo_std_##fn \
                                }               \
                              });               \
    } \
    HaploValue __haplo_std_##fn(HaploInterpreter *interpreter, HaploValueList *args)

// and BOOLEAN ...
// Returns: BOOLEAN | ERROR
HAPLO_STD_FUNC_STR(logical_and, "and")
{
  if (haplo_value_list_len(args) < 2)
  {
    return (HaploValue) {
      .type = HAPLO_VAL_ERROR,
      .error = -HAPLO_ERROR_INTERPRETER_WRONG_NUMBER_OF_ARGS,
    };
  }

  HaploValueList *this = args;
  bool result = true;
  while(this != NULL) {

    if (this->val.type != HAPLO_VAL_BOOL) {
      return (HaploValue) {
        .type = HAPLO_VAL_ERROR,
        .error = -HAPLO_ERROR_INTERPRETER_INVALID_TYPE,
      };
    }
    
    result &= this->val.boolean;
    this = this->next;
  }

  return (HaploValue) {
    .type = HAPLO_VAL_BOOL,
    .boolean = result,
  };
}

(print Cli interpreter)

(print The cli interpreter simply calls the interpreter either with input from stdin or from a file. It also supports a few flags.)

void process_line(Interpreter *interpreter, char* input, ssize_t len)
{
  int err;
  Parser parser = {0};
  err = parser_init(&parser, input, len);
  if (err < 0)
  {
    fprintf(stderr, "Error %d after parser_init\n", err);
    return;
  }

  Expr *expr = parser_parse(&parser);
  if (expr == NULL)
  {
    if (parser.error != 0)
    {
      fprintf(stderr, "Error parser_parse returned a null expression\n");
    }
    return;
  }

  Value val = interpreter_interpret(interpreter, expr);
  
  char buf[1024] = {0};
  
  haplo_value_string(val, buf, 1024);
  printf("%s\n", buf);

  haplo_value_free(val);
  expr_free(expr);
  return;
}

(print Testing)

(print This project comes with a decent amount of testing. Unit tests live under tests/ and can be run via make tests, while the tests for the cli tool are under samples/ and can be run via haplo_tests_e2e.sh. In both cases, tests only need to be implemented without the need to register them somewhere. I think it is interesting to look at how the unit tests are registered because it is pretty cool, it uses a neat trick with the linker but I will not explain it here.)

// Register a test case, It should end with HAPLO_TEST_SUCCESS or
// HAPLO_TEST_FAILED
// Thanks Sam P.
#define HAPLO_TEST(suiteName, uTtestName)                      \
    static int suiteName##_##uTtestName(void);                 \
    static HaploTest Record_##suiteName##_##uTtestName         \
    __attribute__((used, section(".utest_records"), aligned(sizeof(_Alignof(HaploTest))))) = { \
        .marker = 0xDeadBeaf,                                  \
        .testSuite = #suiteName,                               \
        .functionName = #uTtestName,                           \
        .testName = #suiteName "_" #uTtestName,                \
        .fileName = __FILE__,                                  \
        .lineNumber = __LINE__,                                \
        .functionPointer = suiteName##_##uTtestName            \
    };                                                         \
    static int suiteName##_##uTtestName(void)

#define HAPLO_TEST_SUCCESS \
  do { return 0; } while(0)

#define HAPLO_TEST_FAILED \
  do { return -1; } while(0)

HAPLO_TEST(lexer_test, trim)
{
  {
    char *input = "  ( 123";
    int expected_val = 2;
    int trimmed = lexer_trim_left(input, strlen(input), NULL, NULL);
    if (trimmed != expected_val)
    {
      fprintf(stderr, "Error lexer_trim_left on \"%s\", expected %d, got %d",
              input, expected_val, trimmed);
      goto test_failed;
    }
  }
  
  // ...
  
  HAPLO_TEST_SUCCESS;

 test_failed:
  HAPLO_TEST_FAILED;
}

(`print` The Design and Implementation of Haplolang)

(`print` A lisp-like programming language (to have fun))

(`print` Here are some examples!)

(`print` Design and implementation)

(`print` Lexer)

(`print` Parser)

(`print` Interpreter)

(`print` Standard Library)

(`print` Cli interpreter)

(`print` Testing)

(print The Design and Implementation of Haplolang)

(print A lisp-like programming language (to have fun))

(print Here are some examples!)

(print Design and implementation)

(print Lexer)

(print Parser)

(print Interpreter)

(print Standard Library)

(print Cli interpreter)

(print Testing)