Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for possessive quantification, atomic groups, and lookarounds #604

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,13 @@ fileprivate extension Compiler.ByteCodeGen {
let intercept = builder.makeAddress()
let success = builder.makeAddress()

builder.buildSave(success)
builder.buildSave(intercept)
// Positive lookaheads propagate captures through the success SP
builder.buildSave(success, keepsCaptures: true)
// Negative lookaheads should not propagate captures, so the intercept SP
// does not keep captures
let id = builder.buildSaveWithID(intercept, keepsCaptures: false)
try emitNode(child)
builder.buildClearThrough(intercept)
builder.buildClearThrough(id)
if !positive {
builder.buildClear()
}
Expand Down Expand Up @@ -347,10 +350,10 @@ fileprivate extension Compiler.ByteCodeGen {
let intercept = builder.makeAddress()
let success = builder.makeAddress()

builder.buildSaveAddress(success)
builder.buildSave(intercept)
builder.buildSaveAddress(success, keepsCaptures: true)
let id = builder.buildSaveWithID(intercept)
try emitNode(child)
builder.buildClearThrough(intercept)
builder.buildClearThrough(id)
builder.buildFail()

builder.label(intercept)
Expand Down Expand Up @@ -569,8 +572,9 @@ fileprivate extension Compiler.ByteCodeGen {
}

// Set up a dummy save point for possessive to update
var dummyID: SavePointID? = nil
if updatedKind == .possessive {
builder.pushEmptySavePoint()
dummyID = builder.pushEmptySavePoint()
}

// min-trip-count:
Expand Down Expand Up @@ -620,13 +624,16 @@ fileprivate extension Compiler.ByteCodeGen {
}

// exit-policy:
// <possessive: clearSavePoint>
// condBranch(to: exit, ifZeroElseDecrement: %extraTrips)
// <eager: split(to: loop, saving: exit)>
// <possesive:
// clearSavePoint
// split(to: loop, saving: exit)>
// <reluctant: save(restoringAt: loop)
builder.label(exitPolicy)
if updatedKind == .possessive {
builder.buildClear(possessiveQuantDummy: dummyID!)
}
switch extraTrips {
case nil: break
case 0: builder.buildBranch(to: exit)
Expand All @@ -640,8 +647,7 @@ fileprivate extension Compiler.ByteCodeGen {
case .eager:
builder.buildSplit(to: loopBody, saving: exit)
case .possessive:
builder.buildClear()
builder.buildSplit(to: loopBody, saving: exit)
builder.buildSplit(to: loopBody, saving: exit, id: dummyID!)
case .reluctant:
builder.buildSave(loopBody)
// FIXME: Is this re-entrant? That is would nested
Expand Down
13 changes: 9 additions & 4 deletions Sources/_StringProcessing/Engine/Backtracking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

extension Processor {
struct SavePoint {
var id: SavePointID?
var pc: InstructionAddress
var pos: Position?
// Quantifiers may store a range of positions to restore to
Expand All @@ -25,7 +26,7 @@ extension Processor {

// FIXME: Save minimal info (e.g. stack position and
// perhaps current start)
var captureEnds: [_StoredCapture]
var captureEnds: [_StoredCapture]?

// The int registers store values that can be relevant to
// backtracking, such as the number of trips in a quantification.
Expand All @@ -37,7 +38,7 @@ extension Processor {
pc: InstructionAddress,
pos: Position?,
stackEnd: CallStackAddress,
captureEnds: [_StoredCapture],
captureEnds: [_StoredCapture]?,
intRegisters: [Int],
PositionRegister: [Input.Index]
) {
Expand Down Expand Up @@ -75,22 +76,26 @@ extension Processor {

func makeSavePoint(
_ pc: InstructionAddress,
addressOnly: Bool = false
addressOnly: Bool = false,
withID id: SavePointID? = nil,
keepCaptures: Bool = false
) -> SavePoint {
SavePoint(
id: id,
pc: pc,
pos: addressOnly ? nil : currentPosition,
rangeStart: nil,
rangeEnd: nil,
stackEnd: .init(callStack.count),
captureEnds: storedCaptures,
captureEnds: keepCaptures ? nil : storedCaptures,
intRegisters: registers.ints,
posRegisters: registers.positions)
}

func startQuantifierSavePoint() -> SavePoint {
// Restores to the instruction AFTER the current quantifier instruction
SavePoint(
id: nil,
pc: controller.pc + 1,
pos: nil,
rangeStart: nil,
Expand Down
71 changes: 71 additions & 0 deletions Sources/_StringProcessing/Engine/InstPayload.swift
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,12 @@ extension Instruction.Payload {
var capture: CaptureRegister {
interpret()
}
init(saveID: SavePointID) {
self.init(saveID)
}
var saveID: SavePointID {
interpret()
}

// MARK: Packed operand payloads

Expand Down Expand Up @@ -348,6 +354,17 @@ extension Instruction.Payload {
}

// MARK: Struct payloads
init(
save: InstructionAddress,
id: SavePointID?,
splitTo: InstructionAddress? = nil,
keepsCaptures: Bool
) {
self.init(SavePayload(address: save, id: id, splitTo: splitTo, keepsCaptures: keepsCaptures).rawValue)
}
var savePayload: SavePayload {
SavePayload.init(rawValue: rawValue & _payloadMask)
}

init(_ model: _CharacterClassModel) {
self.init(CharacterClassPayload(model).rawValue)
Expand Down Expand Up @@ -613,3 +630,57 @@ struct AssertionPayload: RawRepresentable {
}
}
}

struct SavePayload: RawRepresentable {
let rawValue: UInt64

init(rawValue: UInt64) {
self.rawValue = rawValue
assert(rawValue & _opcodeMask == 0)
}

init(
address: InstructionAddress,
id: SavePointID?,
splitTo: InstructionAddress?,
keepsCaptures: Bool
) {
let idVal: UInt64
if let id = id {
idVal = UInt64(id.rawValue) << 9
} else {
idVal = 1 << 8
}
let splitVal: UInt64
if let splitAddr = splitTo {
splitVal = UInt64(splitAddr.rawValue) << 19
} else {
splitVal = 1 << 18
}
let keepsCapturesVal: UInt64 = keepsCaptures ? 1 << 30 : 0
assert(UInt64(address.rawValue) & idVal & splitVal & keepsCapturesVal == 0)
self.rawValue = UInt64(address.rawValue) + idVal + splitVal + keepsCapturesVal
}

var addr: InstructionAddress {
TypedInt(self.rawValue & 0xFF)
}

var hasID: Bool {
self.rawValue & (1 << 8) == 0
}
var id: SavePointID {
TypedInt((self.rawValue >> 9) & 0xFF)
}

var hasSplit: Bool {
self.rawValue & (1 << 18) == 0
}
var split: InstructionAddress {
TypedInt((self.rawValue >> 19) & 0xFF)
}

var keepsCaptures: Bool {
self.rawValue & (1 << 30) != 0
}
}
3 changes: 3 additions & 0 deletions Sources/_StringProcessing/Engine/Instruction.swift
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,9 @@ extension Instruction {
///
/// Precondition: There is a save point to remove
case clear

/// Remove the save point with the given id
case clearPossessiveQuantDummy

/// Remove save points up to and including the operand
///
Expand Down
87 changes: 66 additions & 21 deletions Sources/_StringProcessing/Engine/MEBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ extension MEProgram {
// We currently deduce the capture count from the capture register number.
nextCaptureRegister.rawValue
}

var nextSavePointID: SavePointID = 0

init() {}
}
Expand All @@ -60,11 +62,28 @@ extension MEProgram.Builder {
struct AddressFixup {
var first: AddressToken
var second: AddressToken? = nil

init(_ a: AddressToken) { self.first = a }
init(_ a: AddressToken, _ b: AddressToken) {
var id: SavePointID?
var keepsCaptures: Bool

init(
_ a: AddressToken,
id: SavePointID?,
keepsCaptures: Bool
) {
self.first = a
self.id = id
self.keepsCaptures = keepsCaptures
}
init(
_ a: AddressToken,
_ b: AddressToken,
id: SavePointID?,
keepsCaptures: Bool
) {
self.first = a
self.second = b
self.id = id
self.keepsCaptures = keepsCaptures
}
}
}
Expand Down Expand Up @@ -117,27 +136,42 @@ extension MEProgram.Builder {
fixup(to: t)
}

mutating func buildSave(_ t: AddressToken) {
mutating func buildSave(_ t: AddressToken, keepsCaptures: Bool = false) {
instructions.append(.init(.save))
fixup(to: t)
fixup(to: t, keepsCaptures: keepsCaptures)
}
mutating func buildSaveAddress(_ t: AddressToken) {
mutating func buildSaveAddress(_ t: AddressToken, keepsCaptures: Bool = false) {
instructions.append(.init(.saveAddress))
fixup(to: t)
fixup(to: t, keepsCaptures: keepsCaptures)
}

mutating func buildSaveWithID(_ t: AddressToken, keepsCaptures: Bool = false) -> SavePointID {
let id = makeSavePointID()
instructions.append(.init(.save))
fixup(to: t, id: id, keepsCaptures: keepsCaptures)
return id
}
mutating func buildSaveAddressWithID(_ t: AddressToken) -> SavePointID {
let id = makeSavePointID()
instructions.append(.init(.saveAddress))
fixup(to: t, id: id)
return id
}
mutating func buildSplit(
to: AddressToken, saving: AddressToken
to: AddressToken, saving: AddressToken, id: SavePointID? = nil
) {
instructions.append(.init(.splitSaving))
fixup(to: (to, saving))
fixup(to: (to, saving), id: id)
}

mutating func buildClear() {
instructions.append(.init(.clear))
}
mutating func buildClearThrough(_ t: AddressToken) {
instructions.append(.init(.clearThrough))
fixup(to: t)
mutating func buildClear(possessiveQuantDummy id: SavePointID) {
instructions.append(.init(.clearPossessiveQuantDummy, .init(saveID: id)))
}
mutating func buildClearThrough(_ id: SavePointID) {
instructions.append(.init(.clearThrough, .init(saveID: id)))
}
mutating func buildFail() {
instructions.append(.init(.fail))
Expand Down Expand Up @@ -357,15 +391,16 @@ extension MEProgram.Builder {
payload = .init(addr: addr, int: inst.payload.int)
case .condBranchSamePosition:
payload = .init(addr: addr, position: inst.payload.position)
case .branch, .save, .saveAddress, .clearThrough:
case .branch:
payload = .init(addr: addr)

case .save, .saveAddress:
payload = .init(save: addr, id: tok.id, keepsCaptures: tok.keepsCaptures)
case .splitSaving:
guard let fix2 = tok.second else {
throw Unreachable("TODO: reason")
}
let saving = addressTokens[fix2.rawValue]!
payload = .init(addr: addr, addr2: saving)
payload = .init(save: saving, id: tok.id, splitTo: addr, keepsCaptures: tok.keepsCaptures)

default: throw Unreachable("TODO: reason")

Expand Down Expand Up @@ -435,22 +470,28 @@ extension MEProgram.Builder {
// Associate the most recently added instruction with
// the provided token, ensuring it is fixed up during
// assembly
mutating func fixup(to t: AddressToken) {
mutating func fixup(
to t: AddressToken,
id: SavePointID? = nil,
keepsCaptures: Bool = false
) {
assert(!instructions.isEmpty)
addressFixups.append(
(InstructionAddress(instructions.endIndex-1), .init(t)))
(InstructionAddress(instructions.endIndex-1), .init(t, id: id, keepsCaptures: keepsCaptures)))
}

// Associate the most recently added instruction with
// the provided tokens, ensuring it is fixed up during
// assembly
mutating func fixup(
to ts: (AddressToken, AddressToken)
to ts: (AddressToken, AddressToken),
id: SavePointID? = nil,
keepsCaptures: Bool = false
) {
assert(!instructions.isEmpty)
addressFixups.append((
InstructionAddress(instructions.endIndex-1),
.init(ts.0, ts.1)))
.init(ts.0, ts.1, id: id, keepsCaptures: keepsCaptures)))
}

// Push an "empty" save point which will, upon restore, just restore from
Expand All @@ -459,11 +500,11 @@ extension MEProgram.Builder {
//
// This is useful for possessive quantification that needs some initial save
// point to "ratchet" upon a successful match.
mutating func pushEmptySavePoint() {
mutating func pushEmptySavePoint() -> SavePointID {
if failAddressToken == nil {
failAddressToken = makeAddress()
}
buildSaveAddress(failAddressToken!)
return buildSaveAddressWithID(failAddressToken!)
}

}
Expand Down Expand Up @@ -528,6 +569,10 @@ extension MEProgram.Builder {
defer { nextPositionRegister.rawValue += 1 }
return r
}
mutating func makeSavePointID() -> SavePointID {
defer { nextSavePointID.rawValue += 1 }
return nextSavePointID
}

// TODO: A register-mapping helper struct, which could release
// registers without monotonicity required
Expand Down
Loading