A fun, readable tour of xv6’s keyboard path. We’ll peek at the raw scancodes, the tiny state machine in kbd.c.This is the first stop on our journey to understand where printf() ultimately gets its input and how your keystrokes travel through the kernel.


0. Where we are in the series

We’re following an end-to-end story: from keys → console → syscalls → printf → terminal. This first post stays at the “keys → console” boundary: how a single hardware byte turns into a character the console understands.


1. Overview

  • The PC keyboard sends scancodes (make/break + optional E0 prefix).

  • kbd.c is a small state machine: read a byte, update modifier bits (Shift/Ctrl/Alt/Caps), map to a character/control-code.

  • kbdintr() wires the keyboard to the console layer via a simple callback (consoleintr(kbdgetc)).

  • This article ships with source + line-by-line commentary for fast reference.


2. Quick mental model

  1. Hardware: The keyboard controller exposes two I/O ports:
    • KBSTATP (0x64): status. Bit KBS_DIB means “a byte is waiting.”
    • KBDATAP (0x60): data. Read the scancode from here.
  2. Scancodes:
    • Make (press) codes (e.g., 0x1E means “A-key pressed.”)
    • Break (release) codes: make | 0x80 (e.g., 0x9E for “A released”).
    • E0 prefix: some keys start with 0xE0 (arrows, right Ctrl/Alt, keypad Enter…); the next byte is “extended.”
  3. State (shift bits): tracks SHIFT/CTL/ALT (hold), CAPS/NUM/SCROLL (toggle), and a temporary E0ESC flag.
  4. Mapping: pick a lookup table based on (shift & (CTL|SHIFT))normalmap, shiftmap, or ctlmap.
  5. Caps logic: only after mapping, flip case for letters if CAPSLOCK is set.
  6. Return: a visible character (like 'a' or '\n') or a special keycode (e.g., KEY_RT), or 0/-1 for no output/state-only changes.

3. Configurations (kbd.h)

Keyboard constants

// PC keyboard interface constants
#define KBSTATP  0x64  // kbd controller status port (read)
#define KBS_DIB  0x01  // data in buffer
#define KBDATAP  0x60  // kbd data port (read)

#define NO       0

// modifier bits
#define SHIFT    (1<<0)
#define CTL      (1<<1)
#define ALT      (1<<2)
#define CAPSLOCK (1<<3)
#define NUMLOCK  (1<<4)
#define SCROLLLOCK (1<<5)
#define E0ESC    (1<<6)

// Special keycodes returned to console (not printable chars)
#define KEY_HOME 0xE0
#define KEY_END  0xE1
#define KEY_UP   0xE2
#define KEY_DN   0xE3
#define KEY_LF   0xE4
#define KEY_RT   0xE5
#define KEY_PGUP 0xE6
#define KEY_PGDN 0xE7
#define KEY_INS  0xE8
#define KEY_DEL  0xE9

// C('A') == Control-A
#define C(x) (x - '@')

Modifier/update tables

static uchar shiftcode[256] = {
  [0x1D] CTL, [0x2A] SHIFT, [0x36] SHIFT, [0x38] ALT,
  [0x9D] CTL, [0xB8] ALT
};

static uchar togglecode[256] = {
  [0x3A] CAPSLOCK, [0x45] NUMLOCK, [0x46] SCROLLLOCK
};
  • shiftcode drives hold-type modifiers (press=OR set, release=AND clear).
  • togglecode drives toggle-type modifiers (press=XOR flip; release does nothing).

4. The character maps

The three lookup tables convert a scancode into a “meaning” under different modifier states. Only highlights shown here; full tables are in the appendix.

static uchar normalmap[256] = {
  // ... 0x00
  NO, 0x1B, '1','2','3','4','5','6','7','8','9','0','-','=', '\b','\t',
  'q','w','e','r','t','y','u','i','o','p','[',']','\n', NO, 'a','s',
  'd','f','g','h','j','k','l',';','\'','`', NO,'\\','z','x','c','v',
  'b','n','m',',','.', '/', NO, '*', /* ... */
  [0x9C] '\n', [0xB5] '/',           // keypad Enter/Divide
  [0xC8] KEY_UP,   [0xD0] KEY_DN,
  [0xC9] KEY_PGUP, [0xD1] KEY_PGDN,
  [0xCB] KEY_LF,   [0xCD] KEY_RT,
  [0x97] KEY_HOME, [0xCF] KEY_END,
  [0xD2] KEY_INS,  [0xD3] KEY_DEL
};

static uchar shiftmap[256] = {
  // ... 0x00
  NO, 033, '!','@','#','$','%','^','&','*','(',')','_','+','\b','\t',
  'Q','W','E','R','T','Y','U','I','O','P','{','}','\n', NO, 'A','S',
  'D','F','G','H','J','K','L',':','"','~', NO,'|','Z','X','C','V',
  'B','N','M','<','>','?',
  /* same special keys as normalmap ... */
};

static uchar ctlmap[256] = {
  // Ctrl combos like C('Q'), C('\\'), etc.
  /* ... */ [0x2E] C('C') /* Ctrl+C */, /* ... */
  [0xC8] KEY_UP, [0xCD] KEY_RT, /* arrows still special */
};

Picking the map

static uchar *charcode[4] = {
  normalmap,   // idx = 0      (no CTL/SHIFT)
  shiftmap,    // idx = 1      (SHIFT only)
  ctlmap,      // idx = 2      (CTL only)
  ctlmap       // idx = 3      (CTL + SHIFT → still ctlmap)
};

The index idx = shift & (CTL | SHIFT) masks off everything but the CTL/SHIFT bits:

  • 0: normalmap
  • 1: shiftmap
  • 2 or 3: ctlmap (Ctrl dominates).

5. The core: kbdgetc()

int kbdgetc(void) {
  static uint shift;
  static uchar *charcode[4] = { normalmap, shiftmap, ctlmap, ctlmap };
  uint st, data, c;

  // 1) Is there a byte waiting at the controller?
  st = inb(KBSTATP);
  if ((st & KBS_DIB) == 0)   // nothing to read
    return -1;

  // 2) Read one raw scancode byte.
  data = inb(KBDATAP);

  // 3) Extended prefix? (E0)
  if (data == 0xE0) {
    shift |= E0ESC;          // remember: next byte is "extended"
    return 0;
  } else if (data & 0x80) {  // 4) Break code? (key release)
    data = (shift & E0ESC ? data : data & 0x7F);
    shift &= ~(shiftcode[data] | E0ESC); // clear hold bits, drop E0 flag
    return 0;
  } else if (shift & E0ESC) {// 5) The "next after E0" make code
    data |= 0x80;            // push into high half [0x80..] of the tables
    shift &= ~E0ESC;
  }

  // 6) Update hold/toggle modifiers.
  shift |=  shiftcode[data]; // Shift/Ctrl/Alt press → set bit(s)
  shift ^=  togglecode[data];// Caps/Num/Scroll press → flip bit

  // 7) Decode char/special using current CTL/SHIFT state.
  c = charcode[shift & (CTL | SHIFT)][data];

  // 8) Post-process CapsLock: flip case *only* for letters.
  if (shift & CAPSLOCK) {
    if ('a' <= c && c <= 'z')      c += 'A' - 'a';
    else if ('A' <= c && c <= 'Z') c += 'a' - 'A';
  }
  return c;  // 0 if none, >0 for character/special keycode
}

Why |= for hold-keys and ^= for toggle-keys?

  • Hold (Shift/Ctrl/Alt): press sets the bit (OR), release clears it (AND in the break branch).
  • Toggle (Caps/Num/Scroll): each press flips the bit (XOR); release does nothing.

Why flip case after mapping?

  • Shift was already applied by choosing shiftmap.
  • CapsLock is a case toggle: apply it only to letters after mapping so that Shift and Caps can cancel each other (Caps+Shift+A → 'a'). Symbols like 1/! are unaffected by CapsLock.

6. Hook into the console: kbdintr()

void kbdintr(void) {
  consoleintr(kbdgetc);
}

This hands the “get-one-char” callback to the console input layer. The console will keep calling kbdgetc() until it gets -1 (no more input), and it feeds any positive c into its line-editing buffer (handling backspace, echo, etc.).


7. Walkthroughs (byte-by-byte)

Example A — press a

  • Hardware sends: 0x1E
  • Not E0, not break → update modifiers (none), map with normalmap[0x1E]='a', Caps off → return 'a'.

Example B — hold Shift, press aA

  • Press Shift: 0x2Ashift |= SHIFT → return 0 (no char).
  • Press a: 0x1E → map with shiftmap[0x1E]='A', Caps off → return 'A'.
  • Release a: 0x9E → break path, clear nothing (not a modifier), return 0.
  • Release Shift: 0xAA → clear SHIFT, return 0.

Example C — Caps on, press aA

  • Press Caps: 0x3Ashift ^= CAPSLOCK (toggle on), return 0.
  • Press a: 0x1E → map 'a' via normalmap, then Caps flips to 'A', return 'A'.

Example D — Right arrow (E0 4D)

  • 0xE0 → set E0ESC, return 0.
  • 0x4D with E0ESC set → data |= 0x800xCD, clear E0ESC.
  • normalmap[0xCD] = KEY_RT → return KEY_RT.

Example E — Ctrl + C (ETX, 3)

  • Press Ctrl: 0x1Dshift |= CTL.
  • Press C: 0x2Ectlmap[0x2E] = C('C') = 3 → return 3.

Example F — Caps + Shift + aa (they cancel)

  • Shift applied first (shiftmap gives 'A'), Caps flips 'A' → 'a' → return 'a'.

8. Appendix — Complete source

kbd.c (core)
#include "types.h"
#include "x86.h"
#include "defs.h"
#include "kbd.h"

int kbdgetc(void)
{
  static uint shift;      // modifier state bits
  static uchar *charcode[4] = {
    normalmap, shiftmap, ctlmap, ctlmap
  };
  uint st, data, c;

  st = inb(KBSTATP);
  if((st & KBS_DIB) == 0)     // no data available
    return -1;

  data = inb(KBDATAP);        // read one scan byte

  if(data == 0xE0){           // extended prefix
    shift |= E0ESC;
    return 0;
  } else if(data & 0x80){     // break (key release)
    data = (shift & E0ESC ? data : data & 0x7F);
    shift &= ~(shiftcode[data] | E0ESC);
    return 0;
  } else if(shift & E0ESC){   // the make after E0
    data |= 0x80;
    shift &= ~E0ESC;
  }

  shift |= shiftcode[data];   // hold-type modifiers: set
  shift ^= togglecode[data];  // toggle-type modifiers: flip

  c = charcode[shift & (CTL | SHIFT)][data];
  if(shift & CAPSLOCK){
    if('a' <= c && c <= 'z')
      c += 'A' - 'a';
    else if('A' <= c && c <= 'Z')
      c += 'a' - 'A';
  }
  return c;
}

void
kbdintr(void)    // keyboard interrupt handler
{
  consoleintr(kbdgetc);
}
kbd.h (constants, maps)
// PC keyboard interface constants

#define KBSTATP         0x64    // status port (read)
#define KBS_DIB         0x01    // data in buffer
#define KBDATAP         0x60    // data port (read)

#define NO              0

#define SHIFT           (1<<0)
#define CTL             (1<<1)
#define ALT             (1<<2)

#define CAPSLOCK        (1<<3)
#define NUMLOCK         (1<<4)
#define SCROLLLOCK      (1<<5)

#define E0ESC           (1<<6)

// Special keycodes
#define KEY_HOME        0xE0
#define KEY_END         0xE1
#define KEY_UP          0xE2
#define KEY_DN          0xE3
#define KEY_LF          0xE4
#define KEY_RT          0xE5
#define KEY_PGUP        0xE6
#define KEY_PGDN        0xE7
#define KEY_INS         0xE8
#define KEY_DEL         0xE9

// C('A') == Control-A
#define C(x) (x - '@')

static uchar shiftcode[256] =
{
  [0x1D] CTL,
  [0x2A] SHIFT,
  [0x36] SHIFT,
  [0x38] ALT,
  [0x9D] CTL,
  [0xB8] ALT
};

static uchar togglecode[256] =
{
  [0x3A] CAPSLOCK,
  [0x45] NUMLOCK,
  [0x46] SCROLLLOCK
};

// normal map
static uchar normalmap[256] =
{
  NO,   0x1B, '1',  '2',  '3',  '4',  '5',  '6',
  '7',  '8',  '9',  '0',  '-',  '=',  '\b', '\t',
  'q',  'w',  'e',  'r',  't',  'y',  'u',  'i',
  'o',  'p',  '[',  ']',  '\n', NO,   'a',  's',
  'd',  'f',  'g',  'h',  'j',  'k',  'l',  ';',
  '\'', '`',  NO,   '\\', 'z',  'x',  'c',  'v',
  'b',  'n',  'm',  ',',  '.',  '/',  NO,   '*',
  NO,   ' ',  NO,   NO,   NO,   NO,   NO,   NO,
  NO,   NO,   NO,   NO,   NO,   NO,   NO,   '7',
  '8',  '9',  '-',  '4',  '5',  '6',  '+',  '1',
  '2',  '3',  '0',  '.',  NO,   NO,   NO,   NO,
  [0x9C] '\n',      // KP_Enter
  [0xB5] '/',       // KP_Div
  [0xC8] KEY_UP,    [0xD0] KEY_DN,
  [0xC9] KEY_PGUP,  [0xD1] KEY_PGDN,
  [0xCB] KEY_LF,    [0xCD] KEY_RT,
  [0x97] KEY_HOME,  [0xCF] KEY_END,
  [0xD2] KEY_INS,   [0xD3] KEY_DEL
};

// with SHIFT
static uchar shiftmap[256] =
{
  NO,   033,  '!',  '@',  '#',  '$',  '%',  '^',
  '&',  '*',  '(',  ')',  '_',  '+',  '\b', '\t',
  'Q',  'W',  'E',  'R',  'T',  'Y',  'U',  'I',
  'O',  'P',  '{',  '}',  '\n', NO,   'A',  'S',
  'D',  'F',  'G',  'H',  'J',  'K',  'L',  ':',
  '"',  '~',  NO,   '|',  'Z',  'X',  'C',  'V',
  'B',  'N',  'M',  '<',  '>',  '?',  NO,   '*',
  NO,   ' ',  NO,   NO,   NO,   NO,   NO,   NO,
  NO,   NO,   NO,   NO,   NO,   NO,   NO,   '7',
  '8',  '9',  '-',  '4',  '5',  '6',  '+',  '1',
  '2',  '3',  '0',  '.',  NO,   NO,   NO,   NO,
  [0x9C] '\n',      // KP_Enter
  [0xB5] '/',       // KP_Div
  [0xC8] KEY_UP,    [0xD0] KEY_DN,
  [0xC9] KEY_PGUP,  [0xD1] KEY_PGDN,
  [0xCB] KEY_LF,    [0xCD] KEY_RT,
  [0x97] KEY_HOME,  [0xCF] KEY_END,
  [0xD2] KEY_INS,   [0xD3] KEY_DEL
};

// with CTRL
static uchar ctlmap[256] =
{
  NO,      NO,      NO,      NO,      NO,      NO,      NO,      NO,
  NO,      NO,      NO,      NO,      NO,      NO,      NO,      NO,
  C('Q'),  C('W'),  C('E'),  C('R'),  C('T'),  C('Y'),  C('U'),  C('I'),
  C('O'),  C('P'),  NO,      NO,      '\r',    NO,      C('A'),  C('S'),
  C('D'),  C('F'),  C('G'),  C('H'),  C('J'),  C('K'),  C('L'),  NO,
  NO,      NO,      NO,      C('\\'), C('Z'),  C('X'),  C('C'),  C('V'),
  C('B'),  C('N'),  C('M'),  NO,      NO,      C('/'),  NO,      NO,
  [0x9C] '\r',      // KP_Enter
  [0xB5] C('/'),    // KP_Div
  [0xC8] KEY_UP,    [0xD0] KEY_DN,
  [0xC9] KEY_PGUP,  [0xD1] KEY_PGDN,
  [0xCB] KEY_LF,    [0xCD] KEY_RT,
  [0x97] KEY_HOME,  [0xCF] KEY_END,
  [0xD2] KEY_INS,   [0xD3] KEY_DEL
};

Credits

  • xv6 source (MIT 6.S081/6.828 lineage).