feat(0047): 添加筛法计算不同质因数个数及基于数位的动态搜索策略

2026-02-19 13:27:37 +08:00
parent e5064b278d
commit f42b8e6461
2 changed files with 66 additions and 1 deletions
--- a/solutions/0047.PrimesFactors/euler_47.py
+++ b/solutions/0047.PrimesFactors/euler_47.py
@@ -145,6 +145,19 @@ def factorize_list(n: int) -> List[int]:
    return result


+def distinct_prime_factors_sieve(limit: int) -> List[int]:
+    """
+    返回一个列表pf，其中pf[i]是i的不同质因数个数，i从0到limit。
+    使用筛法计算，复杂度O(limit log log limit)。
+    """
+    pf = [0] * (limit + 1)
+    for p in range(2, limit + 1):
+        if pf[p] == 0:  # p是质数
+            for multiple in range(p, limit + 1, p):
+                pf[multiple] += 1
+    return pf
+
+
@timer
 def main(limit: int = 4) -> None:
    n = 1155
@@ -164,5 +177,56 @@ def main(limit: int = 4) -> None:
        n += 1


+@timer
+def main_key(limit: int = 4) -> None:
+    if limit < 2:
+        raise ValueError("limit must be at least 2")
+    lease = [2, 3]
+    # 预计算一个足够大的范围，这里假设答案不会超过10^7，但为了效率，动态扩展
+    # 我们使用一个缓存，按需扩展
+    _pf_cache = []  # 缓存数组，索引i对应数字i的质因数个数
+    _pf_cache_limit = 0
+
+    def ensure_cache(upto: int) -> None:
+        nonlocal _pf_cache, _pf_cache_limit
+        if upto <= _pf_cache_limit:
+            return
+        # 扩展缓存，每次至少扩展到upto，或者按一定步长扩展
+        new_limit = max(upto, _pf_cache_limit * 2 if _pf_cache_limit else upto + 10000)
+        # 重新计算整个缓存，或者增量更新？为了简单，重新计算整个范围
+        # 但增量更新较复杂，我们重新计算到new_limit
+        _pf_cache = distinct_prime_factors_sieve(new_limit)
+        _pf_cache_limit = new_limit
+
+    for i in range(2, limit + 1):
+        if i in [2, 3]:
+            n = lease[i - 2]
+        else:
+            n = lease[i - 3] + lease[i - 4]
+        start = 10 ** (n - 1)
+        # 确保缓存至少覆盖 start + 一个估计的窗口大小，比如10000
+        ensure_cache(start + 10000)
+        keep_ok = False
+        res = []
+        current = start
+        while True:
+            # 如果当前数字超出缓存，扩展缓存
+            if current > _pf_cache_limit:
+                ensure_cache(current + 10000)
+            if _pf_cache[current] == i:
+                res.append(current)
+                keep_ok = True
+                if len(res) == i and keep_ok:
+                    print(f"{i} - {res}")
+                    if i > 3:
+                        lease.append(len(str(max(res))))
+                    break
+            else:
+                res = []
+                keep_ok = False
+            current += 1
+
+
 if __name__ == "__main__":
    main()
+    main_key()
--- a/solutions/0047.PrimesFactors/readme.md
+++ b/solutions/0047.PrimesFactors/readme.md
@@ -4,4 +4,5 @@
 另外我觉得获得搜索开始的起始点，可能是除了快速计算质因数外，最重要的问题了。
 我只是使用最简单的前n个素数积作为起点，似乎也不是最好的估计。

-这里我只是提供一个简单的假设，n质数n连续的数字可能需要从n-1和n-2这两组数的平均数位之和，作为搜索的起点.
+这里我只是提供一个简单的假设，n质数n连续的数字可能需要从n-1和n-2这两组数的最大数位之和，作为搜索的起点，
+需要验证6质数6连续的情况。我现在这个算法还得再优化，才能更好的计算出结果。