Line Encoding Bitstreams with Ragel

Ragel is a neat code generator for compiling efficient state machine programs. A quick way to describe it is "regular expressions on steroids". The input is essentially regular expressions interspersed with C code that will run whenever the corresponding transistions are taken.

I wanted to practice using it and thought that the line codes described on Wikipedia would provide very simple state machines to try out. The output of this program provides input to WaveDrom, which is another fun tool for creating timing diagrams from text. In WaveDrom, h means "high state", l means "low state", and . means hold the previous state.

Here's the source (lc.rl):

/* lc -- Line encode a bitstream with output in WaveDrom format.
   Copyright (C) 2022 Remington Furman
   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#include <stdio.h>
#include <string.h>
#include <getopt.h>
#include <stdlib.h>

struct line_code
{
    int cs;   /* Current state. */
    int bit;  /* Current bit value. */
};

%%{
    machine line_code;
    access fsm->;

    action low     { fsm->bit  = 0;  putchar('l'); }
    action high    { fsm->bit  = 1;  putchar('h'); }
    action toggle  { fsm->bit ^= 1;  putchar(fsm->bit ? 'h' : 'l'); }
    action toggle2 { fsm->bit ^= 1;  putchar(fsm->bit ? 'h' : 'l');
                     fsm->bit ^= 1;  putchar(fsm->bit ? 'h' : 'l'); }
    action hold    {                 putchar('.'); }
    action nl      {                 putchar('\n'); }

    zero_low_h  = ('0' @low  ('0' @hold)*); # Emit one  low then hold for rest
    zero_high_h = ('0' @high ('0' @hold)*); # Emit one high then hold for rest
    one_low_h   = ('1' @low  ('1' @hold)*); # Emit one  low then hold for rest
    one_high_h  = ('1' @high ('1' @hold)*); # Emit one high then hold for rest

    # Line code machines
    nrz_l := ((zero_low_h  | one_high_h )** 0 @nl)*; # Non-return-to-zero level
    nrz_i := ((zero_high_h | one_low_h  )** 0 @nl)*; # Non-return-to-zero invert
    nrz_s := (('0' @toggle | '1' @hold  )*  0 @nl)*; # Non-return-to-zero space
    nrz_m := (('0' @hold   | '1' @toggle)*  0 @nl)*; # Non-return-to-zero mark

    bph_l := (('0' @low @high    | '1' @high @low   )* 0 @nl)*; # Bi-phase level
    bph_s := (('0' @toggle2      | '1' @toggle @hold)* 0 @nl)*; # Bi-phase space
    bph_m := (('0' @toggle @hold | '1' @toggle2     )* 0 @nl)*; # Bi-phase mark

    # Aliases
    pass   := any @{ fhold; fgoto nrz_l; };
    invert := any @{ fhold; fgoto nrz_i; };
    manch  := any @{ fhold; fgoto bph_l; };
    main   := any @{ fhold; fgoto nrz_l; };
}%%

%% write data;

void line_code_init(struct line_code *fsm, int first_bit, int start)
{
    fsm->bit = first_bit;

    %% write init;

    fsm->cs = start;
}

void line_code_execute(struct line_code *fsm, const char *data, int len)
{
    const char *p = data;
    const char *pe = data + len;

    %% write exec;
}

int line_code_finish(struct line_code *fsm)
{
    if (fsm->cs == line_code_error) {
        return -1;
    }

    if (fsm->cs >= line_code_first_final) {
        return 1;
    }
    return 0;
}

#define NUM_ELEMENTS(a) (sizeof(a)/sizeof(a[0]))
#define XSTRINGIFY(s) #s
#define STRINGIFY(s) XSTRINGIFY(s)
#define MACHINE(name, desc) { STRINGIFY(name), desc, &line_code_en_ ## name }

struct { const char *name; char* desc; const int *start; } machines[] = {
    MACHINE(nrz_l,  "Non-return-to-zero level"),
    MACHINE(nrz_i,  "Non-return-to-zero invert"),
    MACHINE(nrz_s,  "Non-return-to-zero space"),
    MACHINE(nrz_m,  "Non-return-to-zero mark"),
    MACHINE(bph_l,  "Bi-phase level"),
    MACHINE(bph_s,  "Bi-phase space"),
    MACHINE(bph_m,  "Bi-phase mark"),
    MACHINE(pass,   "Passthrough (alias of nrz_l)"),
    MACHINE(invert, "Invert (alias of nrz_i)"),
    MACHINE(manch,  "Manchester (alias of bph_l)"),
};

void usage(char *name) {
    printf("%s [flags] input" , name);
    printf("    input\n"
           "        One or more strings of 0's and 1's\n"
           "    -i n\n"
           "        Initial line state (default: 0)\n"
           "    -c name\n"
           "        Line code to use (default: pass)\n");
    printf("\nAvailable codes:\n");
    for (int i = 0; i < NUM_ELEMENTS(machines); i++) {
        printf("  %-7s- %s\n", machines[i].name, machines[i].desc);
    }
}

int main(int argc, char **argv)
{
    struct line_code lc;

    int first_bit = 0;
    int start = line_code_en_nrz_l;

    int opt;
    while ((opt = getopt(argc, argv, "f:m:h")) != -1) {
        switch (opt) {
            case 'f': {
                first_bit = !!atoi(optarg);
                break;
            }
            case 'm': {
                int found = 0;
                for (int i = 0; i < NUM_ELEMENTS(machines); i++) {
                    if (strcmp(optarg, machines[i].name) == 0) {
                        start = *machines[i].start;
                        found = 1;
                        break;
                    }
                }
                if (!found) {
                    printf("Invalid machine name: %s\n", optarg);
                    usage(argv[0]);
                    exit(EXIT_FAILURE);
                }
                break;
            }
            case 'h': {
                usage(argv[0]);
                exit(EXIT_SUCCESS);
                break;
            }
            default: {
                usage(argv[0]);
                exit(EXIT_FAILURE);
                break;
            }
        }
    }

    /* Initialize state machine */
    line_code_init(&lc, first_bit, start);

    for (int i = optind; i < argc; i++) {
        /* Encode each input argument. */
        line_code_execute(&lc, argv[i], strlen(argv[i])+1);
    }

    if (line_code_finish(&lc) != 1) {
        fprintf(stderr, "%s: bad input\n", argv[0]);
        usage(argv[0]);
        exit(EXIT_FAILURE);
    }

    return 0;
}

Here are the generated state machines:

Line encoding state machines

Figure 1: Line encoding state machines

And here is some example output:

$ cat ./lc.sh
#!/bin/sh
INPUT=01001100011100001111

for machine in nrz_l nrz_i nrz_s nrz_m bph_l bph_s bph_m pass invert manch ; do
    echo ${machine} $(./lc -m ${machine} ${INPUT})
done

$ ./lc.sh
pass lhl.h.l..h..l...h...
invert hlh.l.h..l..h...l...
nrz_l lhl.h.l..h..l...h...
nrz_i hlh.l.h..l..h...l...
nrz_s h.lh..lhl...hlhl....
nrz_m .h..lh...lhl....hlhl
manch lhhllhlhhlhllhlhlhhlhlhllhlhlhlhhlhlhlhl
bph_l lhhllhlhhlhllhlhlhhlhlhllhlhlhlhhlhlhlhl
bph_s hlh.lhlhl.h.lhlhlhl.h.l.hlhlhlhlh.l.h.l.
bph_m h.lhl.h.lhlhl.h.l.hlhlhlh.l.h.l.hlhlhlhl
{signal: [
  {name: 'pass',   wave: 'lhl.h.l..h..l...h...'},
  {name: 'invert', wave: 'hlh.l.h..l..h...l...'},
  {name: 'nrz_l',  wave: 'lhl.h.l..h..l...h...'},
  {name: 'nrz_i',  wave: 'hlh.l.h..l..h...l...'},
  {name: 'nrz_s',  wave: 'h.lh..lhl...hlhl....'},
  {name: 'nrz_m',  wave: '.h..lh...lhl....hlhl'},
  {name: 'manch',  wave: 'lhhllhlhhlhllhlhlhhlhlhllhlhlhlhhlhlhlhl'},
  {name: 'bph_l',  wave: 'lhhllhlhhlhllhlhlhhlhlhllhlhlhlhhlhlhlhl'},
  {name: 'bph_s',  wave: 'hlh.lhlhl.h.lhlhlhl.h.l.hlhlhlhlh.l.h.l.'},
  {name: 'bph_m',  wave: 'h.lhl.h.lhlhl.h.l.hlhlhlh.l.h.l.hlhlhlhl'},
]}

Line encoding output rendered by WaveDrom

Figure 2: Line encoding output rendered by WaveDrom

All the code (including a Makefile) can be found here:

linecode.tar.gz

© Copyright 2023, Remington Furman

blog@remcycles.net

@remcycles@subdued.social