Longest Repeated Substring

Find the longest substring that appears at least twice in a string

Language Selection

Choose your preferred programming language

Showing: Python

Problem Statement

Given a string s, return the longest substring that appears at least twice in s. If there are multiple such substrings, return any one of them.

Example 1:

Input: s = "banana"
Output: "ana"
Explanation: "ana" appears twice in "banana"

Example 2:

Input: s = "abcd"
Output: ""
Explanation: No substring appears more than once

Example 3:

Input: s = "aabcaabdaab"
Output: "aab"
Explanation: "aab" appears three times

Constraints:

  • 2 <= s.length <= 3 * 10^4
  • s consists of lowercase English letters only

Algorithm

  1. Use binary search to find the maximum length of repeated substring
  2. For each length, use rolling hash to find duplicates
  3. Store hash values and their positions to detect collisions

Solution

Python:

def longestDupSubstring(s):
    """
    Rolling hash with binary search approach
    Time: O(n log n) - binary search * rolling hash
    Space: O(n) - hash storage
    """
    n = len(s)
    base = 26
    mod = 10**9 + 7
    
    def hasDuplicate(length):
        if length == 0:
            return ""
        
        # Rolling hash
        hash_val = 0
        power = 1
        
        # Calculate hash for first substring
        for i in range(length):
            hash_val = (hash_val * base + ord(s[i])) % mod
            if i < length - 1:
                power = (power * base) % mod
        
        seen = {hash_val: 0}
        
        # Rolling hash for remaining substrings
        for i in range(length, n):
            # Remove leftmost character
            hash_val = (hash_val - ord(s[i - length]) * power) % mod
            # Add rightmost character
            hash_val = (hash_val * base + ord(s[i])) % mod
            
            if hash_val in seen:
                return s[seen[hash_val]:seen[hash_val] + length]
            seen[hash_val] = i - length + 1
        
        return ""
    
    # Binary search for maximum length
    left, right = 0, n - 1
    result = ""
    
    while left <= right:
        mid = (left + right) // 2
        candidate = hasDuplicate(mid)
        
        if candidate:
            result = candidate
            left = mid + 1
        else:
            right = mid - 1
    
    return result

Java:

class Solution {
    /**
     * Rolling hash with binary search approach
     * Time: O(n log n) - binary search * rolling hash
     * Space: O(n) - hash storage
     */
    public String longestDupSubstring(String s) {
        int n = s.length();
        int base = 26;
        int mod = 1000000007;
        
        int left = 0, right = n - 1;
        String result = "";
        
        while (left <= right) {
            int mid = (left + right) / 2;
            String candidate = hasDuplicate(s, mid, base, mod);
            
            if (!candidate.isEmpty()) {
                result = candidate;
                left = mid + 1;
            } else {
                right = mid - 1;
            }
        }
        
        return result;
    }
    
    private String hasDuplicate(String s, int length, int base, int mod) {
        if (length == 0) return "";
        
        int n = s.length();
        long hash = 0;
        long power = 1;
        
        // Calculate hash for first substring
        for (int i = 0; i < length; i++) {
            hash = (hash * base + s.charAt(i)) % mod;
            if (i < length - 1) {
                power = (power * base) % mod;
            }
        }
        
        Map<Long, Integer> seen = new HashMap<>();
        seen.put(hash, 0);
        
        // Rolling hash for remaining substrings
        for (int i = length; i < n; i++) {
            // Remove leftmost character
            hash = (hash - s.charAt(i - length) * power) % mod;
            if (hash < 0) hash += mod;
            // Add rightmost character
            hash = (hash * base + s.charAt(i)) % mod;
            
            if (seen.containsKey(hash)) {
                int start = seen.get(hash);
                return s.substring(start, start + length);
            }
            seen.put(hash, i - length + 1);
        }
        
        return "";
    }
}

Go:

// longestDupSubstring - Rolling hash with binary search approach
// Time: O(n log n) - binary search * rolling hash
// Space: O(n) - hash storage
func longestDupSubstring(s string) string {
    n := len(s)
    base := 26
    mod := 1000000007
    
    var hasDuplicate func(int) string
    hasDuplicate = func(length int) string {
        if length == 0 {
            return ""
        }
        
        hash := 0
        power := 1
        
        // Calculate hash for first substring
        for i := 0; i < length; i++ {
            hash = (hash*base + int(s[i])) % mod
            if i < length-1 {
                power = (power * base) % mod
            }
        }
        
        seen := make(map[int]int)
        seen[hash] = 0
        
        // Rolling hash for remaining substrings
        for i := length; i < n; i++ {
            // Remove leftmost character
            hash = (hash - int(s[i-length])*power) % mod
            if hash < 0 {
                hash += mod
            }
            // Add rightmost character
            hash = (hash*base + int(s[i])) % mod
            
            if start, exists := seen[hash]; exists {
                return s[start : start+length]
            }
            seen[hash] = i - length + 1
        }
        
        return ""
    }
    
    // Binary search for maximum length
    left, right := 0, n-1
    result := ""
    
    for left <= right {
        mid := (left + right) / 2
        candidate := hasDuplicate(mid)
        
        if candidate != "" {
            result = candidate
            left = mid + 1
        } else {
            right = mid - 1
        }
    }
    
    return result
}

JavaScript:

/**
 * Rolling hash with binary search approach
 * Time: O(n log n) - binary search * rolling hash
 * Space: O(n) - hash storage
 */
function longestDupSubstring(s) {
    const n = s.length;
    const base = 26;
    const mod = 1000000007;
    
    function hasDuplicate(length) {
        if (length === 0) return "";
        
        let hash = 0;
        let power = 1;
        
        // Calculate hash for first substring
        for (let i = 0; i < length; i++) {
            hash = (hash * base + s.charCodeAt(i)) % mod;
            if (i < length - 1) {
                power = (power * base) % mod;
            }
        }
        
        const seen = new Map();
        seen.set(hash, 0);
        
        // Rolling hash for remaining substrings
        for (let i = length; i < n; i++) {
            // Remove leftmost character
            hash = (hash - s.charCodeAt(i - length) * power) % mod;
            if (hash < 0) hash += mod;
            // Add rightmost character
            hash = (hash * base + s.charCodeAt(i)) % mod;
            
            if (seen.has(hash)) {
                const start = seen.get(hash);
                return s.substring(start, start + length);
            }
            seen.set(hash, i - length + 1);
        }
        
        return "";
    }
    
    // Binary search for maximum length
    let left = 0, right = n - 1;
    let result = "";
    
    while (left <= right) {
        const mid = Math.floor((left + right) / 2);
        const candidate = hasDuplicate(mid);
        
        if (candidate) {
            result = candidate;
            left = mid + 1;
        } else {
            right = mid - 1;
        }
    }
    
    return result;
}

C#:

public class Solution {
    /// <summary>
    /// Rolling hash with binary search approach
    /// Time: O(n log n) - binary search * rolling hash
    /// Space: O(n) - hash storage
    /// </summary>
    public string LongestDupSubstring(string s) {
        int n = s.Length;
        int baseNum = 26;
        int mod = 1000000007;
        
        int left = 0, right = n - 1;
        string result = "";
        
        while (left <= right) {
            int mid = (left + right) / 2;
            string candidate = HasDuplicate(s, mid, baseNum, mod);
            
            if (!string.IsNullOrEmpty(candidate)) {
                result = candidate;
                left = mid + 1;
            } else {
                right = mid - 1;
            }
        }
        
        return result;
    }
    
    private string HasDuplicate(string s, int length, int baseNum, int mod) {
        if (length == 0) return "";
        
        int n = s.Length;
        long hash = 0;
        long power = 1;
        
        // Calculate hash for first substring
        for (int i = 0; i < length; i++) {
            hash = (hash * baseNum + s[i]) % mod;
            if (i < length - 1) {
                power = (power * baseNum) % mod;
            }
        }
        
        var seen = new Dictionary<long, int>();
        seen[hash] = 0;
        
        // Rolling hash for remaining substrings
        for (int i = length; i < n; i++) {
            // Remove leftmost character
            hash = (hash - s[i - length] * power) % mod;
            if (hash < 0) hash += mod;
            // Add rightmost character
            hash = (hash * baseNum + s[i]) % mod;
            
            if (seen.ContainsKey(hash)) {
                int start = seen[hash];
                return s.Substring(start, length);
            }
            seen[hash] = i - length + 1;
        }
        
        return "";
    }
}

Key Insights

  1. Rolling Hash: Efficient way to compute hash of all substrings of a given length
  2. Binary Search: Find maximum length of repeated substring efficiently
  3. Hash Collisions: Use modulo arithmetic to handle large numbers
  4. Space-Time Tradeoff: Trade space for time efficiency

Edge Cases

  1. No Repeated Substring: Return empty string
  2. Single Character: String with only one character
  3. All Same Characters: String like “aaaa”
  4. Very Long String: Handle large inputs efficiently

Follow-up Questions

  1. Multiple Occurrences: Find substring that appears at least k times
  2. Case Insensitive: Handle case-insensitive matching
  3. Unicode Support: Extend to Unicode characters
  4. Streaming: Handle streaming input

Common Mistakes

  1. Hash Collisions: Not handling hash collisions properly
  2. Modulo Arithmetic: Incorrect modulo operations
  3. Binary Search: Wrong binary search implementation
  4. Memory Usage: Inefficient memory usage for large strings