בעיות מורכבות בתורת ההסתברות עם פתרונות בפייתון

מבוא

תורת ההסתברות ממלאת תפקיד מרכזי בלמידת מכונה, סטטיסטיקה וניתוח נתונים. במאמר זה ננתח 13 בעיות קלאסיות עם מימושים בפייתון, המדגימים כיצד עקרונות הסתברותיים מיושמים בפועל במדעי המחשב, ניתוח נתונים ו-AI.

Contents

בעיה 1: קירוב ערך פאי באמצעות שיטת מונטה קרלו

בעיה 1: קירוב ערך פאי באמצעות שיטת מונטה קרלו

import random
import matplotlib.pyplot as plt

# קירוב פאי על ידי יצירת נקודות אקראיות בריבוע
num_points = 100000
inside_circle = 0

for _ in range(num_points):
    x, y = random.random(), random.random()
    if x**2 + y**2 <= 1:
        inside_circle += 1

pi_approx = (inside_circle / num_points) * 4
print(f"Estimated π: {pi_approx}")

בעיה 2: בעיית יום ההולדת

import math

def birthday_problem(n: int) -> float:
    """
    Calculates the probability that at least two people share a birthday
    in a group of n individuals.
    """
    probability = 1.0
    for i in range(n):
        probability *= (365 - i) / 365
    return 1 - probability

n = 23
print(f"Birthday collision probability for {n} people: {birthday_problem(n):.4f}")

בעיה 3: חוק המספרים הגדולים

import numpy as np
import matplotlib.pyplot as plt

# הדגמה של חוק המספרים הגדולים על ידי הטלות מטבע
trials = 10000
results = np.random.binomial(1, 0.5, trials)
cumulative_mean = np.cumsum(results) / np.arange(1, trials + 1)

plt.plot(cumulative_mean)
plt.axhline(0.5, color='red', linestyle='dashed')
plt.xlabel('Number of Trials')
plt.ylabel('Mean')
plt.title('Law of Large Numbers')
plt.show()

בעיה 4: הסתברות בייסיאנית

def bayes_theorem(prior: float, likelihood: float, false_positive_rate: float) -> float:
    """
    Applies Bayes' theorem to calculate the posterior probability
    of an event given the test result.
    """
    prob_not = 1 - prior
    evidence = likelihood * prior + false_positive_rate * prob_not
    return (likelihood * prior) / evidence

# הסתברות למחלה בהתבסס על תוצאה חיובית בבדיקה
prior = 0.01
sensitivity = 0.99
false_positive = 0.05

posterior = bayes_theorem(prior, sensitivity, false_positive)
print(f"Posterior probability: {posterior:.4f}")

בעיה 5: שרשראות מרקוב

import numpy as np

# שרשרת מרקוב עם שני מצבים
states = ["A", "B"]
transition_matrix = [[0.9, 0.1], [0.5, 0.5]]
current_state = 0
sequence = []

for _ in range(100):
    sequence.append(states[current_state])
    current_state = np.random.choice([0, 1], p=transition_matrix[current_state])

print("Markov chain sequence:", sequence)

בעיה 6: הילוך אקראי

import numpy as np
import matplotlib.pyplot as plt

# הילוך אקראי חד-ממדי
steps = 1000
position = 0
trajectory = [position]

for _ in range(steps):
    step = np.random.choice([-1, 1])
    position += step
    trajectory.append(position)

plt.plot(trajectory)
plt.xlabel('Steps')
plt.ylabel('Position')
plt.title('Random Walk')
plt.show()

print(f"Returns to origin: {trajectory.count(0)}")

בעיה 7: פרדוקס מונטי הול

import numpy as np

def monty_hall_simulation(trials: int = 10000) -> float:
    """
    Simulates the Monty Hall problem and returns the win rate when switching doors.
    """
    switch_wins = 0
    for _ in range(trials):
        doors = [0, 0, 1]
        np.random.shuffle(doors)
        choice = np.random.randint(0, 3)
        for i in range(3):
            if i != choice and doors[i] == 0:
                open_door = i
                break
        switch_choice = 3 - choice - open_door
        if doors[switch_choice] == 1:
            switch_wins += 1
    return switch_wins / trials

print(f"Win rate when switching: {monty_hall_simulation():.4f}")

בעיה 8: גרף אקראי (ארדש-רניי)

import networkx as nx
import matplotlib.pyplot as plt

# יצירת גרף אקראי
n = 100
p = 0.05
G = nx.erdos_renyi_graph(n, p)

nx.draw(G, node_size=20)
plt.title("Erdős–Rényi Graph")
plt.show()

print(f"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}")

בעיה 9: התפלגות סכום משתנים אקראיים

import numpy as np
import matplotlib.pyplot as plt

# סכום של שני משתנים אחידים
x = np.random.uniform(0, 1, 10000)
y = np.random.uniform(0, 1, 10000)
sum_xy = x + y

plt.hist(sum_xy, bins=50, density=True)
plt.title("Sum of Two Uniform Variables")
plt.show()

בעיה 10: אמידת λ בפואסון

import numpy as np
from scipy.stats import poisson

# אמידת λ באמצעות ממוצע הדגימה
true_lambda = 3
sample = np.random.poisson(true_lambda, 1000)
estimated_lambda = np.mean(sample)

print(f"True λ: {true_lambda}, Estimated λ: {estimated_lambda:.4f}")

בעיה 11: שיטת בוטסטראפ לאומדן שונות

import numpy as np
import matplotlib.pyplot as plt

# אמידת שונות ממוצע הדגימה בשיטת bootstrap
bootstrap_samples = 1000
sample_data = np.random.normal(0, 1, 100)
means = []

for _ in range(bootstrap_samples):
    resample = np.random.choice(sample_data, size=len(sample_data), replace=True)
    means.append(np.mean(resample))

plt.hist(means, bins=30, density=True)
plt.title("Bootstrap Distribution of Mean")
plt.show()

print(f"Estimated variance: {np.var(means):.4f}")

בעיה 12: פרדוקס סימפסון

import pandas as pd

# הדגמה של פרדוקס סימפסון עם נתונים מצטברים
data = pd.DataFrame({
    "Group": ["A", "A", "B", "B"],
    "Gender": ["M", "F", "M", "F"],
    "Accepted": [80, 20, 90, 30],
    "Applied": [100, 100, 100, 100]
})

data["Rate"] = data["Accepted"] / data["Applied"]
print(data)

# חישוב לפי קבוצה בלבד
group_rate = data.groupby("Group").agg({"Accepted": "sum", "Applied": "sum"})
group_rate["Rate"] = group_rate["Accepted"] / group_rate["Applied"]
print("\nSimpson's Paradox example:")
print(group_rate)

בעיה 13: התפלגות בינומיאלית ומשפט הגבול המרכזי

import numpy as np
import matplotlib.pyplot as plt

# יצירת דגימות מהתפלגות בינומית ובדיקת CLT
samples = 1000
n = 50
p = 0.3

sample_means = [np.mean(np.random.binomial(n, p, n)) for _ in range(samples)]

plt.hist(sample_means, bins=30, density=True)
plt.title("Central Limit Theorem (Binomial)")
plt.xlabel("Sample Mean")
plt.ylabel("Density")
plt.show()