Day 06: Tuning Trouble
module Day06
using ..CircularBuffers
function find_start_marker(n::Int, s::String)
for i in n:length(s)
if allunique(s[i-n+1:i])
return i
end
end
nothing
end
<<day06-circular-buffer>>
function main(io::IO, io_out::IO=stdout)
input = readline(io)
println(io_out, "Part 1: $(find_start_marker_bitmask(4, input))")
println(io_out, "Part 2: $(find_start_marker_bitmask(14, input))")
end
end # module
@day 6
๐ฎ๐ฎ๐ฎ๐ฎ๐ฎ Day 6
๐ฎ๐ฎ๐ฎ๐ฎ๐ฎ Part 1: 1155
๐ฎ๐ฎ๐ฎ๐ฎ๐ฎ Part 2: 2789
Let's go crazy
This is rather mad, but we can implement a circular buffer, so theoretically we would not need to load all data at once. Call me crazy, but this sliding window thing that is introduced here is typically something seemingly innocuous that comes back with a revenge next week in Advent of Code.
function find_start_marker_cb(n::Int, s::String)
b = CircularBuffer{Char}(n)
for (i, c) in enumerate(s)
push!(b, c)
if length(b) == n && allunique(b)
return i
end
end
-1
end
A circular buffer sits on a Vector
of constant size. Each time we push!
an element to the buffer, we assign it to the current endloc
pointer, overwriting the oldest value. This pointer gets advanced by one, wrapping around when needed.
module CircularBuffers
export CircularBuffer, content, pushpop!
mutable struct CircularBuffer{T}
content::Vector{T}
endloc::Int
length::Int
end
<<circular-buffer-constructors>>
<<circular-buffer-methods>>
end
For now we have two constructors: one that has the buffer size given, and leaves the contents uninitialised, the other where you give prefilled contents and we assume the buffer is completely filled.
CircularBuffer{T}(size::Int) where T =
CircularBuffer{T}(Vector{T}(undef, size), 1, 0)
CircularBuffer{T}(content::Vector{T}) where T =
CircularBuffer{T}(content, 1, length(content))
Julia provides an interface definition for collections. This interface is not in any way regulated by the type system though. It is convenient to build some unit tests.
using Test, AOC2022.CircularBuffers
@testset "CircularBuffers" begin
b = CircularBuffer{Int}(4)
@test isempty(b)
foreach(i->push!(b, i), 1:3)
@test length(b) == 3
@test sort(content(b)) == [1, 2, 3]
foreach(i->push!(b, i), 1:3)
@test 1 โ b && 2 โ b && 3 โ b
@test 4 โ b
@test length(b) == 4
empty!(b)
@test isempty(b)
@test eltype(b) == Int
end
General collection
A general collection in Julia is expected to have the following methods defined.
Base.isempty(b::CircularBuffer{T}) where T = b.length == 0
function Base.empty!(b::CircularBuffer{T}) where T
b.length = 0
b.endloc = 1
end
Base.length(b::CircularBuffer{T}) where T = b.length
Base.checked_length(b::CircularBuffer{T}) where T = b.length
Iterable collection
In many cases we need to see the contents but are not interested in the order of things. The contents
function only worries about rearranging stuff when the length of the buffer contents is shorter than the buffer size.
function content(b::CircularBuffer{T}) where T
if b.length < length(b.content)
start = mod1(b.endloc-b.length, length(b.content))
if start+b.length <= length(b.content)
b.content[start:start+b.length-1]
else
rest = start + b.length - length(b.content)
[b.content[start:end];b.content[1:rest]]
end
else
b.content
end
end
Base.in(item::T, b::CircularBuffer{T}) where T = item in content(b)
Base.eltype(::CircularBuffer{T}) where T = T
Base.unique(b::CircularBuffer{T}) where T = unique(content(b))
Base.unique(f::Function, b::CircularBuffer{T}) where T = unique(f, content(b))
function Base.push!(b::CircularBuffer{T}, item::T) where T
b.content[b.endloc] = item
b.endloc = mod1(b.endloc+1, length(b.content))
b.length = min(length(b.content), b.length+1)
end
function pushpop!(b::CircularBuffer{T}, item::T) where T
oldvalue = b.content[b.endloc]
b.content[b.endloc] = item
b.endloc = mod1(b.endloc+1, length(b.content))
oldvalue
end
Base.allunique(b::CircularBuffer{T}) where T = allunique(content(b))
Using a bitset
Julia has a datatype BitSet
, but that doesn't keep track of the number of items in the set. We can make our own BitCounter
. This doesn't exactly count the number of unique items in the set, but doubles are counted only once due to the virtue of the exclusive-or operator. This means that the maximum count is still the length of the set, and this only occurs when all inserted elements are unique.
mutable struct BitCounter{T}
bits::T
count::T
end
BitCounter{T}() where T <: Integer = BitCounter{T}(0, 0)
function insert!(b::BitCounter{T}, i::T) where T <: Integer
if b.bits & (1 << i) == 0
b.count += 1
end
b.bits โป= (1 << i)
end
function remove!(b::BitCounter{T}, i::T) where T <: Integer
if b.bits & (1 << i) != 0
b.count -= 1
end
b.bits โป= (1 << i)
end
function find_start_marker_bitmask(n::Int, s::String)
b = CircularBuffer{Char}(Vector{Char}(s[1:n]))
x = BitCounter{Int64}()
for c in s[1:n]
insert!(x, c - 'a')
end
for (j, c) in enumerate(s[n+1:end])
out = pushpop!(b, c)
remove!(x, out - 'a')
insert!(x, c - 'a')
if x.count == n
return j+n
end
end
-1
end
Benchmarks
The crazy thing is: the circular buffer version is faster than the first version, which I don't understand at all. What I do understand is why the bitmask version beats all the others.
using BenchmarkTools
using AOC2022: with_cache
using AOC2022.Day06: find_start_marker, find_start_marker_cb, find_start_marker_bitmask
input = open(readline, "../../data/day06.txt", "r")
find_start_marker(14, input) ==
find_start_marker_cb(14, input) ==
find_start_marker_bitmask(14, input)
true
with_cache("../artifacts/day06-benchmark-1.bin") do
@benchmark find_start_marker(14, input)
end
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min โฆ max): 317.248 ฮผs โฆ 17.777 ms โ GC (min โฆ max): 0.00% โฆ 96.95%
Time (median): 338.184 ฮผs โ GC (median): 0.00%
Time (mean ยฑ ฯ): 492.035 ฮผs ยฑ 1.338 ms โ GC (mean ยฑ ฯ): 22.98% ยฑ 8.21%
โโโ
โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
317 ฮผs Histogram: frequency by time 577 ฮผs <
Memory estimate: 1007.12 KiB, allocs estimate: 13937.
with_cache("../artifacts/day06-benchmark-2.bin") do
@benchmark find_start_marker_cb(14, input)
end
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min โฆ max): 244.910 ฮผs โฆ 17.946 ms โ GC (min โฆ max): 0.00% โฆ 97.51%
Time (median): 271.987 ฮผs โ GC (median): 0.00%
Time (mean ยฑ ฯ): 425.628 ฮผs ยฑ 1.305 ms โ GC (mean ยฑ ฯ): 24.82% ยฑ 7.90%
โ
โโ
โโโโโโโ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
245 ฮผs Histogram: frequency by time 504 ฮผs <
Memory estimate: 923.52 KiB, allocs estimate: 11181.
with_cache("../artifacts/day06-benchmark-3.bin") do
@benchmark find_start_marker_bitmask(14, input)
end
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min โฆ max): 12.393 ฮผs โฆ 180.870 ฮผs โ GC (min โฆ max): 0.00% โฆ 0.00%
Time (median): 13.224 ฮผs โ GC (median): 0.00%
Time (mean ยฑ ฯ): 13.314 ฮผs ยฑ 1.881 ฮผs โ GC (mean ยฑ ฯ): 0.00% ยฑ 0.00%
โโโโโโ
โโโโโโโโโโโโ
โ
โ
โ
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ โ
12.4 ฮผs Histogram: frequency by time 15.1 ฮผs <
Memory estimate: 4.23 KiB, allocs estimate: 4.
Rust
mod aoc;
use crate::aoc::{Result, input_error};
use std::io;
fn read_input() -> Result<Vec<u8>> {
let mut input = String::new();
io::stdin().read_line(&mut input).map_err(input_error)?;
Ok(input.bytes().collect())
}
#[derive(Debug)]
struct CircularBuffer<T> {
content: Vec<T>,
endloc: usize,
}
impl<T> CircularBuffer<T> {
fn new(content: Vec<T>) -> Self {
CircularBuffer { content: content, endloc: 0 }
}
fn push(self: &mut Self, mut item: T) -> T {
std::mem::swap(&mut item, &mut self.content[self.endloc]);
self.endloc = (self.endloc + 1) % self.content.len();
item
}
}
#[derive(Debug)]
struct BitSet {
bits: usize,
count: usize
}
impl BitSet {
fn new() -> Self { BitSet { bits: 0, count: 0 } }
fn insert(self: &mut Self, i: usize) {
if self.bits & (1 << i) == 0 {
self.count += 1;
}
self.bits ^= 1 << i;
}
fn remove(self: &mut Self, i: usize) {
if self.bits & (1 << i) != 0 {
self.count -= 1;
}
self.bits ^= 1 << i;
}
}
fn find_start_marker(input: &Vec<u8>, n: usize) -> usize {
let mut buf = CircularBuffer::new(input[..n].to_vec());
let mut set = BitSet::new();
for c in buf.content.iter() {
set.insert((*c - b'a') as usize);
}
let mut pos: usize = n+1;
for c in input[n..input.len()-1].iter() {
let d = buf.push(*c);
set.remove((d - b'a') as usize);
set.insert((*c - b'a') as usize);
if set.count == n { break; }
pos += 1;
}
pos
}
fn main() -> Result<()> {
let input = read_input()?;
println!("Part 1: {}", find_start_marker(&input, 4));
println!("Part 2: {}", find_start_marker(&input, 14));
Ok(())
}