Language Selection
Choose your preferred programming language
Showing: Python
Problem Statement
Given a string s, return the longest substring that appears at least twice in s. If there are multiple such substrings, return any one of them.
Example 1:
Input: s = "banana"
Output: "ana"
Explanation: "ana" appears twice in "banana"
Example 2:
Input: s = "abcd"
Output: ""
Explanation: No substring appears more than once
Example 3:
Input: s = "aabcaabdaab"
Output: "aab"
Explanation: "aab" appears three times
Constraints:
- 2 <= s.length <= 3 * 10^4
- s consists of lowercase English letters only
Approach 1: Rolling Hash with Binary Search
Algorithm
- Use binary search to find the maximum length of repeated substring
- For each length, use rolling hash to find duplicates
- Store hash values and their positions to detect collisions
Solution
Python:
def longestDupSubstring(s):
"""
Rolling hash with binary search approach
Time: O(n log n) - binary search * rolling hash
Space: O(n) - hash storage
"""
n = len(s)
base = 26
mod = 10**9 + 7
def hasDuplicate(length):
if length == 0:
return ""
# Rolling hash
hash_val = 0
power = 1
# Calculate hash for first substring
for i in range(length):
hash_val = (hash_val * base + ord(s[i])) % mod
if i < length - 1:
power = (power * base) % mod
seen = {hash_val: 0}
# Rolling hash for remaining substrings
for i in range(length, n):
# Remove leftmost character
hash_val = (hash_val - ord(s[i - length]) * power) % mod
# Add rightmost character
hash_val = (hash_val * base + ord(s[i])) % mod
if hash_val in seen:
return s[seen[hash_val]:seen[hash_val] + length]
seen[hash_val] = i - length + 1
return ""
# Binary search for maximum length
left, right = 0, n - 1
result = ""
while left <= right:
mid = (left + right) // 2
candidate = hasDuplicate(mid)
if candidate:
result = candidate
left = mid + 1
else:
right = mid - 1
return result
Java:
class Solution {
/**
* Rolling hash with binary search approach
* Time: O(n log n) - binary search * rolling hash
* Space: O(n) - hash storage
*/
public String longestDupSubstring(String s) {
int n = s.length();
int base = 26;
int mod = 1000000007;
int left = 0, right = n - 1;
String result = "";
while (left <= right) {
int mid = (left + right) / 2;
String candidate = hasDuplicate(s, mid, base, mod);
if (!candidate.isEmpty()) {
result = candidate;
left = mid + 1;
} else {
right = mid - 1;
}
}
return result;
}
private String hasDuplicate(String s, int length, int base, int mod) {
if (length == 0) return "";
int n = s.length();
long hash = 0;
long power = 1;
// Calculate hash for first substring
for (int i = 0; i < length; i++) {
hash = (hash * base + s.charAt(i)) % mod;
if (i < length - 1) {
power = (power * base) % mod;
}
}
Map<Long, Integer> seen = new HashMap<>();
seen.put(hash, 0);
// Rolling hash for remaining substrings
for (int i = length; i < n; i++) {
// Remove leftmost character
hash = (hash - s.charAt(i - length) * power) % mod;
if (hash < 0) hash += mod;
// Add rightmost character
hash = (hash * base + s.charAt(i)) % mod;
if (seen.containsKey(hash)) {
int start = seen.get(hash);
return s.substring(start, start + length);
}
seen.put(hash, i - length + 1);
}
return "";
}
}
Go:
// longestDupSubstring - Rolling hash with binary search approach
// Time: O(n log n) - binary search * rolling hash
// Space: O(n) - hash storage
func longestDupSubstring(s string) string {
n := len(s)
base := 26
mod := 1000000007
var hasDuplicate func(int) string
hasDuplicate = func(length int) string {
if length == 0 {
return ""
}
hash := 0
power := 1
// Calculate hash for first substring
for i := 0; i < length; i++ {
hash = (hash*base + int(s[i])) % mod
if i < length-1 {
power = (power * base) % mod
}
}
seen := make(map[int]int)
seen[hash] = 0
// Rolling hash for remaining substrings
for i := length; i < n; i++ {
// Remove leftmost character
hash = (hash - int(s[i-length])*power) % mod
if hash < 0 {
hash += mod
}
// Add rightmost character
hash = (hash*base + int(s[i])) % mod
if start, exists := seen[hash]; exists {
return s[start : start+length]
}
seen[hash] = i - length + 1
}
return ""
}
// Binary search for maximum length
left, right := 0, n-1
result := ""
for left <= right {
mid := (left + right) / 2
candidate := hasDuplicate(mid)
if candidate != "" {
result = candidate
left = mid + 1
} else {
right = mid - 1
}
}
return result
}
JavaScript:
/**
* Rolling hash with binary search approach
* Time: O(n log n) - binary search * rolling hash
* Space: O(n) - hash storage
*/
function longestDupSubstring(s) {
const n = s.length;
const base = 26;
const mod = 1000000007;
function hasDuplicate(length) {
if (length === 0) return "";
let hash = 0;
let power = 1;
// Calculate hash for first substring
for (let i = 0; i < length; i++) {
hash = (hash * base + s.charCodeAt(i)) % mod;
if (i < length - 1) {
power = (power * base) % mod;
}
}
const seen = new Map();
seen.set(hash, 0);
// Rolling hash for remaining substrings
for (let i = length; i < n; i++) {
// Remove leftmost character
hash = (hash - s.charCodeAt(i - length) * power) % mod;
if (hash < 0) hash += mod;
// Add rightmost character
hash = (hash * base + s.charCodeAt(i)) % mod;
if (seen.has(hash)) {
const start = seen.get(hash);
return s.substring(start, start + length);
}
seen.set(hash, i - length + 1);
}
return "";
}
// Binary search for maximum length
let left = 0, right = n - 1;
let result = "";
while (left <= right) {
const mid = Math.floor((left + right) / 2);
const candidate = hasDuplicate(mid);
if (candidate) {
result = candidate;
left = mid + 1;
} else {
right = mid - 1;
}
}
return result;
}
C#:
public class Solution {
/// <summary>
/// Rolling hash with binary search approach
/// Time: O(n log n) - binary search * rolling hash
/// Space: O(n) - hash storage
/// </summary>
public string LongestDupSubstring(string s) {
int n = s.Length;
int baseNum = 26;
int mod = 1000000007;
int left = 0, right = n - 1;
string result = "";
while (left <= right) {
int mid = (left + right) / 2;
string candidate = HasDuplicate(s, mid, baseNum, mod);
if (!string.IsNullOrEmpty(candidate)) {
result = candidate;
left = mid + 1;
} else {
right = mid - 1;
}
}
return result;
}
private string HasDuplicate(string s, int length, int baseNum, int mod) {
if (length == 0) return "";
int n = s.Length;
long hash = 0;
long power = 1;
// Calculate hash for first substring
for (int i = 0; i < length; i++) {
hash = (hash * baseNum + s[i]) % mod;
if (i < length - 1) {
power = (power * baseNum) % mod;
}
}
var seen = new Dictionary<long, int>();
seen[hash] = 0;
// Rolling hash for remaining substrings
for (int i = length; i < n; i++) {
// Remove leftmost character
hash = (hash - s[i - length] * power) % mod;
if (hash < 0) hash += mod;
// Add rightmost character
hash = (hash * baseNum + s[i]) % mod;
if (seen.ContainsKey(hash)) {
int start = seen[hash];
return s.Substring(start, length);
}
seen[hash] = i - length + 1;
}
return "";
}
}
Key Insights
- Rolling Hash: Efficient way to compute hash of all substrings of a given length
- Binary Search: Find maximum length of repeated substring efficiently
- Hash Collisions: Use modulo arithmetic to handle large numbers
- Space-Time Tradeoff: Trade space for time efficiency
Edge Cases
- No Repeated Substring: Return empty string
- Single Character: String with only one character
- All Same Characters: String like “aaaa”
- Very Long String: Handle large inputs efficiently
Follow-up Questions
- Multiple Occurrences: Find substring that appears at least k times
- Case Insensitive: Handle case-insensitive matching
- Unicode Support: Extend to Unicode characters
- Streaming: Handle streaming input
Common Mistakes
- Hash Collisions: Not handling hash collisions properly
- Modulo Arithmetic: Incorrect modulo operations
- Binary Search: Wrong binary search implementation
- Memory Usage: Inefficient memory usage for large strings