The Token Games

gemini-2.5-flash_vs_gemini-2.5-pro_20260124_012058.jsonl — 10 puzzles

gemini-2.5-flash

-1

gemini-2.5-pro

-3

#1 Solved

Puzzle Code

import math

def is_prime(n):
    if n < 2: return False
    if n == 2: return True
    if n % 2 == 0: return False
    for i in range(3, int(math.sqrt(n)) + 1, 2):
        if n % i == 0:
            return False
    return True

def sum_digits(n):
    return sum(int(d) for d in str(n))

def mystery(x):
    # Condition 1: x must be an integer
    if not isinstance(x, int):
        return False

    # Condition 2: x must be positive
    if x <= 0:
        return False

    # Condition 3: x must be a perfect square
    try:
        sqrt_x = math.isqrt(x)
    except ValueError:
        return False
    if sqrt_x * sqrt_x != x:
        return False

    # Condition 4: The sum of the digits of x must be a prime number
    s_digits = sum_digits(x)
    if not is_prime(s_digits):
        return False

    # Condition 5: The string representation of x must contain the digit '5' exactly once
    str_x = str(x)
    if str_x.count('5') != 1:
        return False

    # Condition 6: x must be greater than 1000 and less than 10000
    if not (1000 < x < 10000):
        return False

    return True

Solver Attempt

1156

Sample Solution

2500

Proposer: gemini-2.5-flash Solver: gemini-2.5-pro

0 vs 0

#2 Sample Solution Wrong

Puzzle Code

import datetime
import zlib

def is_prime(n):
    if n < 2: return False
    # A simple primality test is sufficient here.
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

def mystery(x):
    # Condition 1: The input must be a string.
    if not isinstance(x, str):
        return False

    # Condition 2: It must be parseable as a date and time in a specific format.
    # This immediately narrows the search space from all possible strings.
    try:
        dt = datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
    except ValueError:
        return False

    # Condition 3: The event must have occurred on a Wednesday.
    # In Python's datetime library, Monday is 0 and Sunday is 6.
    if dt.weekday() != 2:
        return False

    # Condition 4: The hour of the event must be a prime number.
    if not is_prime(dt.hour):
        return False

    # The next conditions depend on the POSIX timestamp, a non-obvious transformation.
    try:
        # Note: timestamp() can fail for dates before the epoch on some systems.
        timestamp = int(dt.timestamp())
    except (ValueError, OSError):
        return False

    # Condition 5: The sum of the digits of the integer timestamp must be divisible by 9.
    # This is a classic divisibility rule.
    if sum(int(digit) for digit in str(timestamp)) % 9 != 0:
        return False

    # Condition 6: The CRC32 checksum of the timestamp's string representation
    # must match a specific "magic" value. This is the lock on the puzzle box.
    # It is computationally infeasible to reverse this. The only way to find the
    # correct timestamp is to use the other conditions to generate likely
    # candidates and test them.
    checksum = zlib.crc32(str(timestamp).encode('ascii'))
    if checksum != 217983273:
        return False

    return True

Sample Solution

"1994-01-26 13:37:00"

Proposer: gemini-2.5-pro Solver: gemini-2.5-flash

0 vs -1

#3 Solved

Puzzle Code

import math

def is_prime(n):
    if n < 2: return False
    if n == 2: return True
    if n % 2 == 0: return False
    for i in range(3, int(math.sqrt(n)) + 1, 2):
        if n % i == 0:
            return False
    return True

def mystery(x):
    # Condition 1: The input must be a string.
    if not isinstance(x, str):
        return False

    # Condition 2: The string length must be exactly 7 characters.
    if len(x) != 7:
        return False

    # Condition 3: The string must represent a valid hexadecimal number.
    # We attempt to convert it to an integer (base 16).
    try:
        n = int(x, 16)
    except ValueError:
        return False

    # Condition 4: The 13th bit (0-indexed) of the integer 'n' must be set.
    # This means (n // 2^13) % 2 == 1, or more efficiently, (n >> 13) & 1 == 1.
    if not ((n >> 13) & 1):
        return False

    # Condition 5: The count of set bits (1s) in the binary representation of 'n' must be a prime number.
    set_bits_count = bin(n).count('1')
    if not is_prime(set_bits_count):
        return False

    # Condition 6: The sum of the ASCII values of all characters in the original string 'x' must be divisible by 11.
    ascii_sum = sum(ord(c) for c in x)
    if ascii_sum % 11 != 0:
        return False

    # Condition 7: The string 'x' must contain at least one uppercase hexadecimal digit (A-F).
    if not any(c.isupper() for c in x):
        return False

    return True

Solver Attempt

"777F330"

Sample Solution

'100A009'

Proposer: gemini-2.5-flash Solver: gemini-2.5-pro

0 vs -1

#4 Sample Solution Wrong

Puzzle Code

import hashlib
import math

def is_prime(n):
    """A reasonably fast primality test."""
    if not isinstance(n, int) or n < 2:
        return False
    if n == 2 or n == 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True

def mystery(x):
    # Condition 1: Input must be a string.
    if not isinstance(x, str):
        return False

    # Condition 2: String must contain only lowercase ASCII letters.
    # This significantly constrains the character set.
    if not x.isascii() or not x.islower():
        return False

    # Condition 3: The length of the string must be a prime number.
    # This is a structural clue about the solution.
    if not is_prime(len(x)):
        return False

    # Condition 4: The number of vowels in the string must also be a prime number.
    # This provides another filter on the content of the string.
    vowel_count = sum(1 for char in x if char in 'aeiou')
    if not is_prime(vowel_count):
        return False

    # The next conditions are based on a cryptographic hash of the input,
    # making them hard to reverse. This is the core of the puzzle.
    try:
        h = hashlib.sha256(x.encode('utf-8')).hexdigest()
        n = int(h, 16)
    except (ValueError, TypeError):
        return False

    # Condition 5: The popcount (number of set bits) of the hash value
    # must be a prime number. This is a non-obvious property that is
    # quick to check but hard to reverse.
    popcount = bin(n).count('1')
    if not is_prime(popcount):
        return False

    # Condition 6: The "lock". The hash value, treated as an integer,
    # must satisfy a specific modular arithmetic congruence. This is
    # computationally infeasible to reverse. A solver must find a
    # candidate string using the other clues and test it against this.
    MODULUS = 999999937  # A large prime number
    REMAINDER = 703940130
    if n % MODULUS != REMAINDER:
        return False

    return True

Sample Solution

"pythonsriddle"

Proposer: gemini-2.5-pro Solver: gemini-2.5-flash

0 vs -2

#5 Sample Solution Wrong

Puzzle Code

import math
import base64

def is_perfect_square(num):
    """Checks if a number is a perfect square."""
    if num < 0:
        return False
    if num == 0:
        return True
    sqrt_num = int(num**0.5)
    return sqrt_num * sqrt_num == num

def is_perfect_cube(num):
    """Checks if a number is a perfect cube."""
    if num < 0:
        return False
    if num == 0:
        return True
    # Use binary search to find integer cube root for robustness with large numbers
    low, high = 1, num
    while low <= high:
        mid = (low + high) // 2
        cube = mid * mid * mid
        if cube == num:
            return True
        elif cube < num:
            low = mid + 1
        else: # cube > num
            high = mid - 1
    return False

def sum_digits(n):
    """Calculates the sum of the digits of a number."""
    return sum(int(d) for d in str(n))

def is_power_of_two(n):
    """Checks if a number is a positive power of two."""
    return n > 0 and (n & (n - 1) == 0)

def mystery(x):
    # Condition 1: The input must be a string.
    if not isinstance(x, str):
        return False

    # Condition 2: The string must be a valid base64 encoded string.
    # This immediately constrains the search space to valid base64 formats.
    try:
        decoded_bytes = base64.b64decode(x, validate=True)
    except (base64.binascii.Error, TypeError):
        return False

    # Condition 3: The decoded bytes, when interpreted as ASCII, must form a string
    # representing a positive integer 'n'.
    try:
        decoded_str = decoded_bytes.decode('ascii')
        n = int(decoded_str)
    except (UnicodeDecodeError, ValueError):
        return False
    
    if n <= 0:
        return False

    # Condition 4: The integer 'n' must be a perfect cube.
    # This significantly reduces the number of integers to check.
    if not is_perfect_cube(n):
        return False

    # Condition 5: The sum of the digits of 'n' must be a perfect square.
    # This adds another mathematical constraint on the numerical value.
    s_digits = sum_digits(n)
    if not is_perfect_square(s_digits):
        return False

    # Condition 6: The original base64 string 'x' must contain at least two different uppercase letters.
    # This is a property of the encoded string itself, not the decoded number.
    upper_chars = set(c for c in x if 'A' <= c <= 'Z')
    if len(upper_chars) < 2:
        return False

    # Condition 7: The length of the original base64 string 'x' must be a power of two.
    # Another structural constraint on the encoded string.
    if not is_power_of_two(len(x)):
        return False

    # Condition 8: The integer 'n' must be greater than 1,000,000,000 (1 billion).
    # Ensures the number is large enough, making brute force harder.
    if n <= 1000000000:
        return False

    return True

Sample Solution

"MTEwNDA5MzY3NQ=="

Proposer: gemini-2.5-flash Solver: gemini-2.5-pro

-1 vs -2

#6 Solved

Puzzle Code

import math

def mystery(x):
    # Condition 1: The input must be a float.
    if not isinstance(x, float):
        return False

    # Condition 2: Use the canonical string representation of the float.
    # This avoids issues with precision and makes the puzzle deterministic.
    # We also disallow scientific notation for simplicity.
    s = repr(x)
    if 'e' in s.lower():
        return False

    # Condition 3: The string representation must be in the form "3.f..."
    try:
        int_part, frac_part = s.split('.')
    except ValueError:
        # This happens if there is no decimal point.
        return False

    if int_part != '3':
        return False

    # Condition 4: The fractional part must have an even number of digits.
    # This is necessary for the de-interleaving step.
    if len(frac_part) % 2 != 0:
        return False

    # Condition 5: The fractional digits must come from a restricted set.
    # This significantly prunes the search space.
    if any(c in '09' for c in frac_part):
        return False

    # Condition 6: De-interleave the fractional digits to form two numbers, A and B.
    # For example, if frac_part is "1234", a_str becomes "13" and b_str becomes "24".
    a_str = frac_part[0::2]
    b_str = frac_part[1::2]

    try:
        A = int(a_str)
        B = int(b_str)
    except ValueError:
        return False # Should not happen with the digit check, but good practice.

    # Condition 7: A and B must be coprime. This is a subtle but strong constraint.
    if math.gcd(A, B) != 1:
        return False

    # Condition 8: (A, B, C) must form a Pythagorean triple.
    # This is the main mathematical core of the puzzle.
    c_squared = A*A + B*B
    C = math.isqrt(c_squared)

    if C*C != c_squared:
        return False

    # Condition 9: The "lock". The sum of the digits of the hypotenuse C
    # must equal a specific magic number.
    s_digits_C = sum(int(d) for d in str(C))
    if s_digits_C != 11:
        return False

    return True

Solver Attempt

3.3536

Sample Solution

3.3536

Proposer: gemini-2.5-pro Solver: gemini-2.5-flash

-1 vs -2

#7 Solved

Puzzle Code

import math

def is_prime(n):
    """Checks if a number is prime."""
    if n < 2: return False
    if n == 2: return True
    if n % 2 == 0: return False
    for i in range(3, int(math.sqrt(n)) + 1, 2):
        if n % i == 0:
            return False
    return True

def mystery(x):
    # Condition 1: The input must be a string.
    if not isinstance(x, str):
        return False

    # Condition 2: The string must be exactly 5 characters long.
    # This immediately limits the search space for the solver.
    if len(x) != 5:
        return False

    # Condition 3: The string must contain only '0' or '1' characters.
    # This implies it's a binary string.
    if not all(c in '01' for c in x):
        return False

    # Condition 4: The string must be a palindrome.
    # This adds a structural constraint on the binary string.
    if x != x[::-1]:
        return False

    # Condition 5: When the string 'x' is interpreted as a binary number 'n',
    # 'n' must be a prime number.
    # This is the core mathematical constraint and the lock for the puzzle.
    try:
        n = int(x, 2)
    except ValueError:
        # This should theoretically not happen if Condition 3 is met.
        return False
    
    # Optimization: For a number to be prime (and > 1), its binary representation
    # cannot start with '0' if it's longer than a single digit, as that would imply
    # a leading zero, which means it's effectively a shorter number.
    # For a 5-character binary string, '0xxxx' implies a value less than 16.
    # Primes like 2, 3, 5, 7, 11, 13 are possible, but for a 5-char string,
    # '00010' (2), '00011' (3) would not be considered "5-char binary numbers".
    # The smallest prime 5-char binary number is 10001 (17).
    if x[0] == '0':
        return False

    if not is_prime(n):
        return False

    return True

Solver Attempt

"10001"

Sample Solution

"10001"

Proposer: gemini-2.5-flash Solver: gemini-2.5-pro

-1 vs -2

#8 Solved

Puzzle Code

import math

def is_prime(n):
    """A reasonably fast primality test."""
    if not isinstance(n, int) or n < 2:
        return False
    if n == 2 or n == 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True

def mystery(x):
    # Condition 1: Input must be a string containing exactly one underscore.
    # This suggests the input is composed of two parts.
    if not isinstance(x, str) or x.count('_') != 1:
        return False

    try:
        name_part, num_part_str = x.split('_')
    except ValueError:
        return False

    # Condition 2: The part before the underscore must be purely alphabetic
    # and not empty.
    if not name_part.isalpha():
        return False

    # Condition 3: The part after the underscore must represent a valid integer.
    try:
        num = int(num_part_str)
    except ValueError:
        return False

    # Condition 4: This number must be prime.
    # This is a strong mathematical constraint on the second part.
    if not is_prime(num):
        return False

    # Condition 5: A critical link between the two parts. The length of the
    # name must be exactly one less than the prime number. This is the main
    # clue that allows for a structured, rather than random, search.
    if len(name_part) != num - 1:
        return False

    # Condition 6: A property of the name part. It must contain strictly
    # more vowels than consonants. Vowels are 'aeiou', case-insensitive.
    vowels = "aeiou"
    vowel_count = sum(1 for char in name_part.lower() if char in vowels)
    consonant_count = len(name_part) - vowel_count
    if vowel_count <= consonant_count:
        return False

    # Condition 7: The "lock". A checksum is calculated from the name part
    # by summing the squares of the alphabetical positions of its letters
    # (a=1, b=2, ...). This checksum must satisfy a specific modular congruence
    # with the prime number. This is computationally difficult to reverse.
    try:
        # Using a generator expression for efficiency.
        checksum = sum((ord(c) - ord('a') + 1)**2 for c in name_part.lower())
    except TypeError:
        return False

    if checksum % num != 6:
        return False

    return True

Solver Attempt

"aaaafh_7"

Sample Solution

"MarieCurie_11"

Proposer: gemini-2.5-pro Solver: gemini-2.5-flash

-1 vs -2

#9 Solved

Puzzle Code

import math

def is_prime(n):
    """Checks if a number is prime."""
    if not isinstance(n, int) or n < 2: return False
    if n == 2: return True
    if n % 2 == 0: return False
    for i in range(3, int(math.sqrt(n)) + 1, 2):
        if n % i == 0:
            return False
    return True

def is_perfect_square(num):
    """Checks if a non-negative integer is a perfect square."""
    if not isinstance(num, int) or num < 0: return False
    if num == 0: return True
    sqrt_num = int(math.isqrt(num))
    return sqrt_num * sqrt_num == num

def sum_digits(n):
    """Calculates the sum of the digits of a non-negative integer."""
    if not isinstance(n, int) or n < 0: return -1 # Indicate error or invalid input
    return sum(int(d) for d in str(n))

def mystery(x):
    # Condition 1: The input must be a string.
    if not isinstance(x, str):
        return False

    # Condition 2: The string must be a comma-separated list of exactly three positive integers.
    parts = x.split(',')
    if len(parts) != 3:
        return False
    
    numbers = []
    for p in parts:
        try:
            num = int(p)
            if num <= 0: # Numbers must be positive
                return False
            numbers.append(num)
        except ValueError: # Not a valid integer string
            return False

    # Prepare to store properties for each number in the list
    num_properties = [] 

    for num in numbers:
        # Condition 3a: Each integer in the list must be a perfect square.
        if not is_perfect_square(num):
            return False
        
        sqrt_num = int(math.isqrt(num))

        # Condition 3b: The sum of digits of each number must be greater than 1.
        sd = sum_digits(num)
        if sd <= 1:
            return False
        
        # Determine the properties of the sum of digits (sd) for each number
        is_prime_sd = is_prime(sd)
        is_ps_sd = is_perfect_square(sd)
        is_neither_sd = not is_prime_sd and not is_ps_sd # sd is neither prime nor perfect square

        num_properties.append({
            'num': num,
            'sqrt_num': sqrt_num,
            'sd': sd,
            'is_prime_sd': is_prime_sd,
            'is_ps_sd': is_ps_sd,
            'is_neither_sd': is_neither_sd
        })

    # Condition 4: The three sum-of-digits properties (prime, perfect square, neither)
    # must be uniquely distributed among the three numbers.
    # Specifically, exactly one number's sum of digits must be prime (and not a perfect square),
    # exactly one number's sum of digits must be a perfect square (and not prime),
    # and exactly one number's sum of digits must be neither prime nor a perfect square.
    
    # Collect the boolean flags for each property across all three numbers
    props_prime_counts = [p['is_prime_sd'] for p in num_properties]
    props_ps_counts = [p['is_ps_sd'] for p in num_properties]
    props_neither_counts = [p['is_neither_sd'] for p in num_properties]

    # Check that each property count is exactly 1
    if sum(props_prime_counts) != 1 or \
       sum(props_ps_counts) != 1 or \
       sum(props_neither_counts) != 1:
        return False
    
    # Condition 5: The square roots of the three numbers, when sorted, must form a Pythagorean triple.
    # i.e., a^2 + b^2 = c^2 for a < b < c.
    sqrt_numbers = sorted([p['sqrt_num'] for p in num_properties])
    a, b, c = sqrt_numbers[0], sqrt_numbers[1], sqrt_numbers[2]
    if a*a + b*b != c*c:
        return False

    # Condition 6 (The "lock"): The length of the original input string 'x' must be a prime number.
    if not is_prime(len(x)):
        return False

    return True

Solver Attempt

"144,1225,1369"

Sample Solution

"144,1225,1369"

Proposer: gemini-2.5-flash Solver: gemini-2.5-pro

-1 vs -2

#10 Sample Solution Wrong

Puzzle Code

import math

def mystery(x):
    # Condition 1: Input must be a positive integer.
    if not isinstance(x, int) or x <= 0:
        return False

    # Condition 2: The integer's decimal representation must be a palindrome.
    s = str(x)
    if s != s[::-1]:
        return False

    # Helper function to convert an integer to its standard Roman numeral representation.
    # Handles numbers from 1 to 3999.
    def to_roman(num):
        if not (0 < num < 4000):
            return "" # Out of standard range
        val = [1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1]
        syb = ["M", "CM", "D", "CD", "C", "XC", "L", "XL", "X", "IX", "V", "IV", "I"]
        roman_num = ''
        i = 0
        while num > 0:
            for _ in range(num // val[i]):
                roman_num += syb[i]
                num -= val[i]
            i += 1
        return roman_num

    # Condition 3: The Roman numeral representation of x must also be a palindrome.
    # This is a key constraint, requiring symmetry in two different numeral systems.
    roman_s = to_roman(x)
    if not roman_s or roman_s != roman_s[::-1]:
        return False
        
    # Helper function for primality test.
    def is_prime(n):
        if n < 2: return False
        if n == 2: return True
        if n % 2 == 0: return False
        for i in range(3, int(math.sqrt(n)) + 1, 2):
            if n % i == 0:
                return False
        return True

    # Condition 4: The number of unique symbols used in the Roman numeral
    # must be a prime number. (e.g., 'CCC' uses 1 symbol, 'XIX' uses 2).
    unique_symbols = set(c for c in roman_s)
    if not is_prime(len(unique_symbols)):
        return False

    # Condition 5 (The "lock"): The integer x must be divisible by 3.
    # This is a simple but effective final filter.
    if x % 3 != 0:
        return False

    return True

Sample Solution

303

Proposer: gemini-2.5-pro Solver: gemini-2.5-flash

-1 vs -3