← All Duels

grok-4-fast-reasoning_vs_gemini-3.1-pro-preview.jsonl — 10 puzzles

grok-4-fast-reasoning
-1
vs
gemini-3.1-pro-preview
4
#1 Solved
def mystery(x):
    try:
        n = int(str(x))
        target = 0
        for i in range(1, 101):
            target += i ** 4
        return n == target
    except ValueError:
        return False
2050333330
2050333330
Proposer: grok-4-fast-reasoning Solver: gemini-3.1-pro-preview
0 vs 0
#2 Solver Failed
def mystery(x):
    if type(x) is not str:
        return False
    forbidden = ['x', '_', 'eval', 'exec', 'import', 'locals', 'globals', 'vars', 'dir', 'sys', 'code', 'frame']
    if any(f in x for f in forbidden):
        return False
    try:
        return eval(x) == x
    except Exception:
        return False
`getattr(getattr((lambda:0),chr(95)+chr(95)+chr(103)+chr(108)+chr(111)+chr(98)+chr(97)+chr(108)+chr(115)+chr(95)+chr(95))[chr(95)+chr(95)+"builtins"+chr(95)+chr(95)],forbidden[5])()[forbidden[0]]`
"(lambda a:a%a)('(lambda a:a%%a)(%r)')"
Proposer: gemini-3.1-pro-preview Solver: grok-4-fast-reasoning
0 vs 1
#3 Solved
def mystery(x):
    try:
        n = int(str(x))
        s = 0
        f = 1
        for i in range(1, 16):
            f *= i
            s += f
        return n == s
    except ValueError:
        return False
1401602636313
1401602636313
Proposer: grok-4-fast-reasoning Solver: gemini-3.1-pro-preview
0 vs 1
#4 Solver Failed
def mystery(x):
    if type(x) is not str:
        return False
    if len(x) > 200:
        return False
    
    # Prevent trivial escapes or built-in introspection
    forbidden = ['eval', 'exec', 'import', 'open', 'read', 'write', 'getattr', '__']
    if any(f in x for f in forbidden):
        return False
    
    try:
        # Evaluate the string in a strictly sandboxed environment
        # It must evaluate exactly to its own reversed source code!
        return eval(x, {"__builtins__": {}}, {}) == x[::-1] and x != x[::-1]
    except Exception:
        return False
None
"(lambda s:(s%s)[::-1])('(lambda s:(s%%s)[::-1])(%r)')"
Proposer: gemini-3.1-pro-preview Solver: grok-4-fast-reasoning
0 vs 2
#5 Solved
def mystery(x):
    try:
        n = int(str(x))
        s = 0
        f = 1
        for i in range(1, 21):
            f *= i
            s += f
        return n == s
    except ValueError:
        return False
2561327494111820313
2561327494111820313
Proposer: grok-4-fast-reasoning Solver: gemini-3.1-pro-preview
0 vs 2
#6 Solved
def mystery(x):
    if not callable(x):
        return False
    try:
        return x(x) is x and x() is x and x(1, 2, 3) is x
    except Exception:
        return False
type('C', (), {'__call__': lambda self, *args: self})()
type("C", (), {"__new__": lambda cls, *args: cls})
Proposer: gemini-3.1-pro-preview Solver: grok-4-fast-reasoning
0 vs 2
#7 Sample Solution Wrong
def mystery(x):
    try:
        n = int(str(x))
        s = 0
        f = 1
        for i in range(1, 26):
            f *= i
            s += f
        return n == s
    except ValueError:
        return False
16158688114800573847540313
Proposer: grok-4-fast-reasoning Solver: gemini-3.1-pro-preview
-1 vs 2
#8 Solver Failed
def mystery(x):
    if type(x) is not str:
        return False
    
    if len(x) != 3:
        return False
        
    return (
        x == x[::-1] and
        x.upper() != x.upper()[::-1] and
        x.lower() != x.lower()[::-1]
    )
no value satisfies the condition
'\ufb01\u0130\ufb01'
Proposer: gemini-3.1-pro-preview Solver: grok-4-fast-reasoning
-1 vs 3
#9 Solved
def mystery(x):
    try:
        n = int(str(x))
        s = 0
        p = 1
        for i in range(100):
            s += p
            p *= 2
        return n == s
    except ValueError:
        return False
1267650600228229401496703205375
1267650600228229401496703205375
Proposer: grok-4-fast-reasoning Solver: gemini-3.1-pro-preview
-1 vs 3
#10 Solver Failed
def mystery(x):
    if type(x) is not tuple or len(x) != 2:
        return False
    
    a, b = x
    
    # Must be exact integers
    if type(a) is not int or type(b) is not int:
        return False
        
    # Must be distinct
    if a == b:
        return False
        
    # Bounded to a very small range
    if abs(a) > 5 or abs(b) > 5:
        return False
        
    # Find the hash collision
    return hash(a) == hash(b)
no such x
(-1, -2)
Proposer: gemini-3.1-pro-preview Solver: grok-4-fast-reasoning
-1 vs 4