Skip to content

Commit 819098f

Browse files
committed
Unify OSC scanning into one pass
Five separate scanners were each sweeping the PTY input to find their own OSC code (color queries, PWD, 51/E, 52, 133), doing O(5N) work per write. Replace them with a single `OscIterator` that yields `(code, payload, term)` for any well-formed OSC, and one `dispatchPostWriteOscs` that handles codes 7/51/52/133 in document order. Colour queries (pre-vtWrite) use the same iterator. Drop the now-unused OscScanner, findOscTerminator, and parseDecimal helpers. Engine micro-benchmarks on bulk input improve ~20-28%: plain: 65.8 → 81.5 MB/s styled: 60.8 → 77.7 MB/s unicode: 40.1 → 48.3 MB/s stream: 63.2 → 73.3 MB/s PTY-path numbers are process-I/O-bound and unchanged within noise.
1 parent 895e55b commit 819098f

1 file changed

Lines changed: 133 additions & 170 deletions

File tree

src/module.zig

Lines changed: 133 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,9 @@ fn fnWriteInput(raw_env: ?*c.emacs_env, _: isize, args: [*c]c.emacs_value, _: ?*
194194
term.vtWrite(norm_buf[0..npos]);
195195
}
196196

197-
// Scan for OSC sequences that libghostty-vt discards.
198-
extractAndSetPwd(term, raw);
199-
extractOsc51(env, raw);
200-
extractOsc52(env, raw);
201-
extractOsc133(env, raw);
197+
// Scan for OSC sequences that libghostty-vt discards (7, 51, 52, 133).
198+
// One pass, dispatched by code in document order.
199+
dispatchPostWriteOscs(env, term, raw);
202200

203201
return env.nil();
204202
}
@@ -207,130 +205,139 @@ fn fnWriteInput(raw_env: ?*c.emacs_env, _: isize, args: [*c]c.emacs_value, _: ?*
207205
// OSC sequence helpers
208206
// ---------------------------------------------------------------------------
209207

210-
/// Find the end of an OSC sequence payload starting at `start`.
211-
/// Scans for the terminator: BEL (0x07) or ST (ESC \).
212-
/// Returns the index of the first terminator byte, or data.len if none found.
213-
fn findOscTerminator(data: []const u8, start: usize) usize {
214-
var pos = start;
215-
while (pos < data.len) {
216-
if (data[pos] == 0x07) return pos; // BEL
217-
if (data[pos] == 0x1b and pos + 1 < data.len and data[pos + 1] == '\\') return pos; // ST
218-
pos += 1;
219-
}
220-
return data.len;
221-
}
208+
/// An OSC sequence extracted by `OscIterator`.
209+
const OscEntry = struct {
210+
/// Decimal OSC code (e.g. 4, 7, 10, 11, 51, 52, 133).
211+
code: u32,
212+
/// Payload bytes between the code's trailing `;` and the terminator.
213+
payload: []const u8,
214+
/// Terminator bytes (BEL or ESC \) — forwarded back on replies.
215+
term: []const u8,
216+
};
222217

223-
/// Iterator-style scanner that yields successive OSC sequences matching `prefix`.
224-
/// Each call to `next()` returns the payload slice (after the prefix, before the
225-
/// terminator), or null when no more matches exist.
226-
const OscScanner = struct {
218+
/// Single-pass iterator over well-formed OSC sequences in a byte slice.
219+
/// Advances past `ESC ]`, parses the decimal code up to `;`, locates the
220+
/// BEL/ST terminator, and yields `(code, payload, term)`. Partial
221+
/// sequences at the end of the buffer stop iteration so the caller
222+
/// doesn't act on half-received data.
223+
const OscIterator = struct {
227224
data: []const u8,
228-
prefix: []const u8,
229225
pos: usize = 0,
230226

231-
const Match = struct {
232-
payload: []const u8,
233-
end: usize,
234-
};
227+
fn next(self: *OscIterator) ?OscEntry {
228+
while (self.pos < self.data.len) {
229+
const intro = std.mem.indexOfPos(u8, self.data, self.pos, "\x1b]") orelse {
230+
self.pos = self.data.len;
231+
return null;
232+
};
233+
const code_start = intro + 2;
235234

236-
fn next(self: *OscScanner) ?Match {
237-
while (self.pos + self.prefix.len < self.data.len) {
238-
if (std.mem.startsWith(u8, self.data[self.pos..], self.prefix)) {
239-
const payload_start = self.pos + self.prefix.len;
240-
const payload_end = findOscTerminator(self.data, payload_start);
241-
self.pos = payload_end;
242-
return .{ .payload = self.data[payload_start..payload_end], .end = payload_end };
243-
} else {
244-
self.pos += 1;
235+
// Decimal code up to the first `;`.
236+
var code_end = code_start;
237+
while (code_end < self.data.len and self.data[code_end] >= '0' and self.data[code_end] <= '9') {
238+
code_end += 1;
239+
}
240+
if (code_end == code_start or code_end >= self.data.len or self.data[code_end] != ';') {
241+
self.pos = code_start;
242+
continue;
243+
}
244+
const payload_start = code_end + 1;
245+
246+
// Terminator search. A partial OSC (no terminator before EOF)
247+
// stops iteration entirely: bytes after an unterminated payload
248+
// are opaque, so there's no safe way to continue scanning.
249+
var end = payload_start;
250+
var term_len: usize = 0;
251+
while (end < self.data.len) : (end += 1) {
252+
if (self.data[end] == 0x07) {
253+
term_len = 1;
254+
break;
255+
}
256+
if (self.data[end] == 0x1b and end + 1 < self.data.len and self.data[end + 1] == '\\') {
257+
term_len = 2;
258+
break;
259+
}
245260
}
261+
if (term_len == 0) {
262+
self.pos = self.data.len;
263+
return null;
264+
}
265+
266+
self.pos = end + term_len;
267+
const code = std.fmt.parseInt(u32, self.data[code_start..code_end], 10) catch continue;
268+
return .{
269+
.code = code,
270+
.payload = self.data[payload_start..end],
271+
.term = self.data[end .. end + term_len],
272+
};
246273
}
247274
return null;
248275
}
249276
};
250277

251-
/// Scan data for OSC 51;E elisp eval sequences.
252-
/// OSC 51 format: ESC ] 51 ; E <quoted-args> (ST | BEL)
253-
/// Passes the payload (after 'E') to ghostel--osc51-eval for dispatch.
254-
fn extractOsc51(env: emacs.Env, data: []const u8) void {
255-
var scanner = OscScanner{ .data = data, .prefix = "\x1b]51;" };
256-
while (scanner.next()) |match| {
257-
const payload = match.payload;
258-
if (payload.len < 2) continue;
259-
// Sub-command must be 'E'
260-
if (payload[0] != 'E') continue;
261-
_ = env.call1(
262-
emacs.sym.@"ghostel--osc51-eval",
263-
env.makeString(payload[1..]),
264-
);
265-
}
266-
}
267-
268-
/// Scan data for OSC 7 sequences and set the terminal PWD.
269-
/// OSC 7 format: ESC ] 7 ; <url> (ST | BEL)
270-
fn extractAndSetPwd(term: *Terminal, data: []const u8) void {
271-
var scanner = OscScanner{ .data = data, .prefix = "\x1b]7;" };
272-
while (scanner.next()) |match| {
273-
if (match.payload.len > 0) {
274-
const gs = gt.GhosttyString{ .ptr = match.payload.ptr, .len = match.payload.len };
275-
term.setPwd(&gs) catch {};
278+
/// Dispatch OSC 7 / 51 / 52 / 133 from `data` in document order.
279+
/// These are the sequences that libghostty-vt discards, so ghostel
280+
/// has to scan for them itself. All four used to scan the buffer
281+
/// independently; one unified pass is strictly less work for bulk
282+
/// output and preserves source-order dispatch.
283+
///
284+
/// Runs AFTER `vtWrite` so libghostty has already seen the bytes —
285+
/// OSC 7 calls back into libghostty (`setPwd`) and the others call
286+
/// Elisp.
287+
fn dispatchPostWriteOscs(env: emacs.Env, term: *Terminal, data: []const u8) void {
288+
var it = OscIterator{ .data = data };
289+
while (it.next()) |osc| {
290+
switch (osc.code) {
291+
// OSC 7: working directory as a file:// URL.
292+
7 => {
293+
if (osc.payload.len == 0) continue;
294+
const gs = gt.GhosttyString{ .ptr = osc.payload.ptr, .len = osc.payload.len };
295+
term.setPwd(&gs) catch {};
296+
},
297+
// OSC 51;E: whitelisted Elisp eval (ghostel extension).
298+
51 => {
299+
if (osc.payload.len < 2 or osc.payload[0] != 'E') continue;
300+
_ = env.call1(
301+
emacs.sym.@"ghostel--osc51-eval",
302+
env.makeString(osc.payload[1..]),
303+
);
304+
},
305+
// OSC 52: clipboard set. Queries ("?") are ignored.
306+
52 => {
307+
const semi = std.mem.indexOfScalar(u8, osc.payload, ';') orelse continue;
308+
const selection = osc.payload[0..semi];
309+
const b64 = osc.payload[semi + 1 ..];
310+
if (b64.len == 0) continue;
311+
if (b64.len == 1 and b64[0] == '?') continue;
312+
_ = env.call2(
313+
emacs.sym.@"ghostel--osc52-handle",
314+
env.makeString(selection),
315+
env.makeString(b64),
316+
);
317+
},
318+
// OSC 133: semantic prompt markers (A/B/C/D).
319+
133 => {
320+
if (osc.payload.len == 0) continue;
321+
const marker_type = osc.payload[0];
322+
if (marker_type != 'A' and marker_type != 'B' and marker_type != 'C' and marker_type != 'D') continue;
323+
const has_param = osc.payload.len > 1 and osc.payload[1] == ';';
324+
const param_data = if (has_param) osc.payload[2..] else &[_]u8{};
325+
const type_str: [1]u8 = .{marker_type};
326+
const param_val = if (has_param and param_data.len > 0)
327+
env.makeString(param_data)
328+
else
329+
env.nil();
330+
_ = env.call2(
331+
emacs.sym.@"ghostel--osc133-marker",
332+
env.makeString(&type_str),
333+
param_val,
334+
);
335+
},
336+
else => {},
276337
}
277338
}
278339
}
279340

280-
/// Scan data for OSC 52 clipboard sequences.
281-
/// OSC 52 format: ESC ] 52 ; <selection> ; <base64-data> (ST | BEL)
282-
/// Calls ghostel--osc52-handle with the selection and base64 data.
283-
fn extractOsc52(env: emacs.Env, data: []const u8) void {
284-
var scanner = OscScanner{ .data = data, .prefix = "\x1b]52;" };
285-
while (scanner.next()) |match| {
286-
const payload = match.payload;
287-
// Find the ';' separating selection from data
288-
const semi = std.mem.indexOfScalar(u8, payload, ';') orelse continue;
289-
const selection = payload[0..semi];
290-
const b64 = payload[semi + 1 ..];
291-
if (b64.len == 0) continue;
292-
// Ignore clipboard queries ('?')
293-
if (b64.len == 1 and b64[0] == '?') continue;
294-
_ = env.call2(
295-
emacs.sym.@"ghostel--osc52-handle",
296-
env.makeString(selection),
297-
env.makeString(b64),
298-
);
299-
}
300-
}
301-
302-
/// Scan data for OSC 133 semantic prompt markers.
303-
/// OSC 133 format: ESC ] 133 ; <type> [; <param>] (ST | BEL)
304-
/// type: A = prompt start, B = command start, C = output start, D = command finished
305-
/// For type D, param is the exit status.
306-
fn extractOsc133(env: emacs.Env, data: []const u8) void {
307-
var scanner = OscScanner{ .data = data, .prefix = "\x1b]133;" };
308-
while (scanner.next()) |match| {
309-
const payload = match.payload;
310-
if (payload.len == 0) continue;
311-
const marker_type = payload[0];
312-
313-
// Only handle known types
314-
if (marker_type != 'A' and marker_type != 'B' and marker_type != 'C' and marker_type != 'D') continue;
315-
316-
// Check for optional parameter after ';'
317-
const has_param = payload.len > 1 and payload[1] == ';';
318-
const param_data = if (has_param) payload[2..] else &[_]u8{};
319-
320-
const type_str: [1]u8 = .{marker_type};
321-
const param_val = if (has_param and param_data.len > 0)
322-
env.makeString(param_data)
323-
else
324-
env.nil();
325-
326-
_ = env.call2(
327-
emacs.sym.@"ghostel--osc133-marker",
328-
env.makeString(&type_str),
329-
param_val,
330-
);
331-
}
332-
}
333-
334341
/// Send `OSC N;rgb:RRRR/GGGG/BBBB <term>` for a dynamic color (OSC 10/11).
335342
fn sendDynamicColorReply(
336343
env: emacs.Env,
@@ -375,13 +382,6 @@ fn sendPaletteColorReply(
375382
_ = env.call1(emacs.sym.@"ghostel--flush-output", env.makeString(written));
376383
}
377384

378-
/// Parse a non-negative decimal integer. Returns null on empty input,
379-
/// any non-digit byte, or numeric overflow of `u32`.
380-
fn parseDecimal(s: []const u8) ?u32 {
381-
if (s.len == 0) return null;
382-
return std.fmt.parseInt(u32, s, 10) catch null;
383-
}
384-
385385
/// Scan data for OSC 4/10/11 color queries and emit responses in source
386386
/// order. libghostty applies OSC 4/10/11 **sets** internally but silently
387387
/// drops the query form (`?` value), so ghostel scans the raw input and
@@ -402,72 +402,35 @@ fn extractOscColorQueries(env: emacs.Env, term: *Terminal, data: []const u8) voi
402402
var palette: [256]gt.ColorRgb = undefined;
403403
var palette_loaded = false;
404404

405-
var pos: usize = 0;
406-
while (pos + 1 < data.len) {
407-
// Find next OSC introducer "ESC ]".
408-
const osc_rel = std.mem.indexOfPos(u8, data, pos, "\x1b]") orelse break;
409-
const code_start = osc_rel + 2;
410-
411-
// Read the decimal OSC code up to the first ';'.
412-
var code_end = code_start;
413-
while (code_end < data.len and data[code_end] >= '0' and data[code_end] <= '9') {
414-
code_end += 1;
415-
}
416-
if (code_end == code_start or code_end >= data.len or data[code_end] != ';') {
417-
pos = code_start;
418-
continue;
419-
}
420-
const payload_start = code_end + 1;
421-
422-
// Find the terminator (BEL or ST). Require a real one — partial OSCs
423-
// split across chunks are left for the next call so we don't reply
424-
// before the client has finished writing its query.
425-
var end = payload_start;
426-
var term_len: usize = 0;
427-
while (end < data.len) : (end += 1) {
428-
if (data[end] == 0x07) {
429-
term_len = 1;
430-
break;
431-
}
432-
if (data[end] == 0x1b and end + 1 < data.len and data[end + 1] == '\\') {
433-
term_len = 2;
434-
break;
435-
}
436-
}
437-
if (term_len == 0) break;
438-
439-
const payload = data[payload_start..end];
440-
const term_bytes = data[end .. end + term_len];
441-
pos = end + term_len;
442-
443-
const code = parseDecimal(data[code_start..code_end]) orelse continue;
444-
switch (code) {
405+
var it = OscIterator{ .data = data };
406+
while (it.next()) |osc| {
407+
switch (osc.code) {
445408
10 => {
446-
if (!std.mem.eql(u8, payload, "?")) continue;
409+
if (!std.mem.eql(u8, osc.payload, "?")) continue;
447410
var fg: gt.ColorRgb = undefined;
448411
if (!term.getColorForeground(&fg)) continue;
449-
sendDynamicColorReply(env, 10, fg, term_bytes);
412+
sendDynamicColorReply(env, 10, fg, osc.term);
450413
},
451414
11 => {
452-
if (!std.mem.eql(u8, payload, "?")) continue;
415+
if (!std.mem.eql(u8, osc.payload, "?")) continue;
453416
var bg: gt.ColorRgb = undefined;
454417
if (!term.getColorBackground(&bg)) continue;
455-
sendDynamicColorReply(env, 11, bg, term_bytes);
418+
sendDynamicColorReply(env, 11, bg, osc.term);
456419
},
457420
4 => {
458421
// Payload is a ';'-separated list of `index;value` pairs.
459422
// Reply only to pairs whose value is literally "?".
460-
var it = std.mem.splitScalar(u8, payload, ';');
461-
while (it.next()) |index_tok| {
462-
const value_tok = it.next() orelse break;
423+
var sub = std.mem.splitScalar(u8, osc.payload, ';');
424+
while (sub.next()) |index_tok| {
425+
const value_tok = sub.next() orelse break;
463426
if (!std.mem.eql(u8, value_tok, "?")) continue;
464-
const idx = parseDecimal(index_tok) orelse continue;
427+
const idx = std.fmt.parseInt(u32, index_tok, 10) catch continue;
465428
if (idx >= 256) continue;
466429
if (!palette_loaded) {
467430
if (!term.getColorPalette(&palette)) break;
468431
palette_loaded = true;
469432
}
470-
sendPaletteColorReply(env, @intCast(idx), palette[idx], term_bytes);
433+
sendPaletteColorReply(env, @intCast(idx), palette[idx], osc.term);
471434
}
472435
},
473436
else => {},

0 commit comments

Comments
 (0)