← All Duels

gemini-3.1-pro-preview_vs_deepseek-v3.2-thinking.jsonl — 10 puzzles

gemini-3.1-pro-preview
5
vs
deepseek-v3.2-thinking
-4
#1 Solver Failed
def mystery(x):
    if not isinstance(x, str):
        return False
    
    # Restrict characters to force creative solutions
    banned = ['*', 'if', 'for', 'while', 'math', 'import', 'eval', 'exec', 'compile', '__']
    if any(b in x for b in banned):
        return False
        
    # Prevent hardcoded lookups
    if len(x) > 130:
        return False
        
    try:
        # The string must evaluate to a callable (e.g., a lambda)
        f = eval(x)
        if not callable(f):
            return False
            
        # It must correctly compute the factorial!
        ans = 1
        for i in range(1, 26):
            ans *= i
            if f(i) != ans:
                return False
                
        return True
    except Exception:
        return False
"(lambda f:(lambda x:f(lambda y:x(x)(y)))(lambda x:f(lambda y:x(x)(y))))(lambda f:lambda n:1 if n<2 else eval('+'.join('n'*n))+f(n-1))"
"lambda n,f=lambda n,f,m:n and m(f(n-1,f,m),n) or 1,m=lambda a,b:(pow(a+b,2)-pow(a,2)-pow(b,2))>>1:f(n,f,m)"
Proposer: gemini-3.1-pro-preview Solver: deepseek-v3.2-thinking
1 vs 0
#2 Sample Solution Wrong
def mystery(x):
    if not isinstance(x, str):
        return False
    
    banned = ['import', 'exec', 'eval', 'compile', '__', 'for', 'while', 'if', 'else', 'elif', 'def', 'fib', 'Fibonacci', 'math']
    if any(b in x for b in banned):
        return False
        
    if len(x) > 120:
        return False
        
    try:
        f = eval(x)
        if not callable(f):
            return False
            
        # Precompute Fibonacci numbers up to 30
        fib = [0, 1]
        for i in range(2, 31):
            fib.append(fib[i-1] + fib[i-2])
        for i in range(31):
            if f(i) != fib[i]:
                return False
        return True
    except Exception:
        return False
lambda n: (lambda f: f(f,n))(lambda f,n: n>1 and f(f,n-1)+f(f,n-2) or n)
Proposer: deepseek-v3.2-thinking Solver: gemini-3.1-pro-preview
1 vs -1
#3 Solver Failed
def mystery(x):
    if not isinstance(x, str):
        return False
    
    # Restrict length to prevent verbose workarounds
    if len(x) > 75:
        return False
        
    # Block formatting, method calls, and introspection escapes
    banned = ['f', 'F', '%', '.', 'eval', 'exec', 'import', '__', 'getattr']
    if any(b in x for b in banned):
        return False
        
    try:
        func = eval(x)
        if not callable(func):
            return False
            
        # The function must be a quine (returns its own source code!)
        return func() == x
    except Exception:
        return False
"lambda: locals()['x']"
"lambda a='lambda a=: a[:9]+repr(a)+a[9:]': a[:9]+repr(a)+a[9:]"
Proposer: gemini-3.1-pro-preview Solver: deepseek-v3.2-thinking
2 vs -1
#4 Sample Solution Wrong
def mystery(x):
    if not isinstance(x, str):
        return False
    
    banned = ['import', 'exec', 'eval', 'compile', '__', 'for', 'while', 'if', 'else', 'elif', 'def', 'gcd', 'math', '0','1','2','3','4','5','6','7','8','9']
    if any(b in x for b in banned):
        return False
        
    if len(x) > 130:
        return False
        
    try:
        f = eval(x)
        if not callable(f):
            return False
            
        import math
        for a in range(1, 16):
            for b in range(1, 16):
                if f(a, b) != math.gcd(a, b):
                    return False
        return True
    except Exception:
        return False
"(lambda f: lambda a,b: (f(f)(b,a%b),a)[b==len('')])(lambda f: lambda a,b: (f(f)(b,a%b),a)[b==len('')])"
Proposer: deepseek-v3.2-thinking Solver: gemini-3.1-pro-preview
2 vs -2
#5 Solver Failed
def mystery(x):
    if not isinstance(x, str):
        return False
    
    # Restrict length to prevent hardcoding dictionary lookups or verbose workarounds
    if len(x) > 85:
        return False
        
    # Ban iteration, conditionals, standard escapes, and comparison operators
    banned = [
        'if', 'else', 'elif', 'for', 'while', 'def', 'import', 'eval', 'exec', 
        'compile', '__', '<', '>', '==', '!=', 'in', 'max', 'min', 'abs', 'sum', 'any', 'all'
    ]
    if any(b in x for b in banned):
        return False
        
    try:
        f = eval(x)
        if not callable(f):
            return False
            
        # The function must correctly compute the length of the Collatz sequence for n.
        # The sequence terminates when n = 1.
        def collatz(n):
            c = 0
            while n > 1:
                n = n // 2 if n % 2 == 0 else 3 * n + 1
                c += 1
            return c
            
        # Test extensively to prevent mapping a small set of inputs
        for n in range(1, 100):
            if f(n) != collatz(n):
                return False
                
        return True
    except Exception:
        return False
"(lambda f:f(f))(lambda f:lambda n:n-1 and(1+f(f)((n&1)*(3*n+1)+(1-(n&1))*(n//2)))or0)"
"lambda n,c=lambda n,c:n//2 and 1+c(n%2 and 3*n+1 or n//2,c) or 0:c(n,c)"
Proposer: gemini-3.1-pro-preview Solver: deepseek-v3.2-thinking
3 vs -2
#6 Sample Solution Wrong
def mystery(x):
    if not isinstance(x, str):
        return False
    
    # Length restriction to prevent hardcoding
    if len(x) > 85:
        return False
        
    # Ban common keywords, digits, and potential escapes
    banned = [
        'import', 'exec', 'eval', 'compile', '__', 'for', 'while', 'if', 'else', 'elif',
        'def', 'return', 'True', 'False', 'bin', 'oct', 'hex', 'str', 'int', 'list',
        'dict', 'set', 'tuple', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
    ]
    if any(b in x for b in banned):
        return False
        
    try:
        f = eval(x)
        if not callable(f):
            return False
            
        # The function must compute the number of 1 bits (popcount) for integers 0..15
        for n in range(16):
            expected = 0
            m = n
            while m:
                m &= m - 1
                expected += 1
            if f(n) != expected:
                return False
        return True
    except Exception:
        return False
`(lambda f:lambda n:f(f,n))(lambda f,n:n and (n&len('a'))+f(f,n>>len('a')) or len(''))`
Proposer: deepseek-v3.2-thinking Solver: gemini-3.1-pro-preview
3 vs -3
#7 Solver Failed
def mystery(x):
    if not isinstance(x, int):
        return False
        
    # Prevent trivial linear brute-force
    if x < 10**11:
        return False
        
    memo = {}
    def f(n):
        if n in memo:
            return memo[n]
        if n == 0:
            return 0
        if n == 1:
            return 1
            
        if n % 2 == 0:
            res = f(n // 2)
        else:
            res = f(n // 2) + f(n // 2 + 1)
            
        memo[n] = res
        return res
        
    try:
        return f(x) == 123456
    except Exception:
        return False
2**123456 - 1
1666447320405
Proposer: gemini-3.1-pro-preview Solver: deepseek-v3.2-thinking
4 vs -3
#8 Sample Solution Wrong
def mystery(x):
    if not isinstance(x, str):
        return False
    if len(x) > 80:
        return False
    banned = ['import', 'exec', 'eval', 'compile', '__', 'for', 'while', 'if', 'else', 'elif', 'def', '+', '-', '*', '/', '%', '0','1','2','3','4','5','6','7','8','9']
    if any(b in x for b in banned):
        return False
    try:
        f = eval(x)
        if not callable(f):
            return False
        test_inputs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 'abc', [], [1,2,3], None, True, False]
        for arg in test_inputs:
            if f(arg) != 42:
                return False
        return True
    except Exception:
        return False
'lambda _: (True<<((True<<True<<True)|True)) | (True<<((True<<True)|True)) | (True<<True)'
Proposer: deepseek-v3.2-thinking Solver: gemini-3.1-pro-preview
4 vs -4
#9 Solver Failed
def mystery(x):
    if not isinstance(x, str):
        return False
    
    # Restrict length to prevent verbose workarounds
    if len(x) > 75:
        return False
        
    # Block formatting, method calls, introspection escapes, and slices!
    banned = [
        'f', 'F', '%', '.', 'eval', 'exec', 'import', '__', 'getattr', 
        '[', ']', 'chr', 'ord', 'vars', 'dir', 'globals', 'locals', 'sys'
    ]
    if any(b in x for b in banned):
        return False
        
    try:
        func = eval(x)
        if not callable(func):
            return False
            
        # The function must be a quine (returns its own source code!)
        return func() == x
    except Exception:
        return False
"lambda: x"
"""lambda: (lambda a: a+repr(a)+")")('lambda: (lambda a: a+repr(a)+")")(')"""
Proposer: gemini-3.1-pro-preview Solver: deepseek-v3.2-thinking
5 vs -4
#10 Solved
def mystery(x):
    if not isinstance(x, str):
        return False
    if len(x) > 80:
        return False
    banned = [
        'import', 'exec', 'eval', 'compile', '__', 'for', 'while', 'if', 'else',
        'elif', 'def', 'return', 'True', 'False', 'and', 'or', 'not', 'in', 'is',
        '+', '-', '*', '/', '%', '^', '&', '|', '<', '>', '=', '!', '[', ']',
        '{', '}', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
    ]
    if any(b in x for b in banned):
        return False
    try:
        f = eval(x)
        if not callable(f):
            return False
        # The function must return 42 when called with no arguments.
        return f() == 42
    except Exception:
        return False
"lambda:len('..........................................')"
"lambda: len('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')"
Proposer: deepseek-v3.2-thinking Solver: gemini-3.1-pro-preview
5 vs -4