30. 串联所有单词的子串

发表于 2024-06-07 分类于算法 Disqus：本文字数： 422 阅读时长 ≈ 2 分钟

难度：Hard

标签：哈希表、字符串、滑动窗口

链接：https://leetcode.cn/problems/substring-with-concatenation-of-all-words/description/

在B站上看见一个up的视频，感觉说的很清晰，言简意赅：【五分钟力扣 Leetcode 第30题串联所有单词的子串除 Python入门算法刷题极简解法 23行代码 97%】。

看完视频后，我觉得关键点在于，可以通过比较两个哈希表是否“相等” 来判断子串是否满足要求。

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        int words_num = words.size();
        int uni_len = words[0].size();
        int n = s.size();
        vector<int> res;
        unordered_map<string, int> dict;
        // 记录每个单词的出现次数
        for (auto& w : words) {
            dict[w]++;
        }
        // 外循环遍历的次数为单个单词的长度
        for (int i = 0; i < uni_len; i++) {
            int start = i; // start为子串开始的索引
            unordered_map<string, int> cache; // cache用于记录内循环中，位于dict中的单词的出现次数
            // 内循环，每次增加一个单词长度（即以一个单词长度为最小单位）
            for (int j = i; j < n; j += uni_len) {
                string sub = s.substr(j, uni_len);
                // 判断当前截取的字符串是否存在于dict中
                if (dict.find(sub) != dict.end()) {
                    cache[sub]++;
                    // 当sub在cache中出现次数大于dict中，说明当前start不可能为子串的开始索引（因为单词数目都不相等了），不断移动start位置（移动单元为uni_len），直到cache[sub] <= dict[sub]
                    while (cache[sub] > dict[sub]) {
                        string remove_str = s.substr(start, uni_len);
                        cache[remove_str]--;
                        start += uni_len;
                    }
                    // 若cache和dict中内容一致，说明找到了一个符合要求的子串
                    if (dict == cache) {
                        res.push_back(start);
                    }
                } else {
                    // 若当前截取字符串不存在于dict中，跳过这个单词
                    start = j + uni_len;
                    cache.clear();
                }
            }
        }
        return res;
    }
};