Skip to content

Parallel Algorithms

Parallel Algorithms#

Parallel algorithms split work into smaller pieces that execute simultaneously across multiple CPU cores. Think of assembling furniture—instead of one person doing everything sequentially, a team can work on different parts at the same time, dramatically reducing completion time.

The key is identifying which work can happen independently. For example, when sorting a large array, you can divide it into chunks, sort each chunk in parallel, then merge the results—much faster than sorting the entire array sequentially.

Common Approaches#

Divide and Conquer#

Divide-and-conquer naturally lends itself to parallelization. Split the problem into independent subproblems, solve them in parallel, then combine the results. Merge sort is a classic example—divide the array in half, sort both halves concurrently, then merge them back together.

import java.util.concurrent.*;
import java.util.Arrays;

// Parallel merge sort using Fork/Join framework
public class ParallelMergeSort {
    // Thread pool that manages worker threads for parallel execution
    private final ForkJoinPool pool;

    public ParallelMergeSort() {
        // Use the common pool shared across the JVM for efficiency
        this.pool = ForkJoinPool.commonPool();
    }

    // Entry point: submits the sorting task to the thread pool
    public int[] parallelMergeSort(int[] array) {
        return pool.invoke(new MergeSortTask(array, 0, array.length - 1));
    }

    // RecursiveTask enables fork/join parallelism with return values
    private class MergeSortTask extends RecursiveTask<int[]> {
        private final int[] array;
        private final int left, right;  // Boundaries of the subarray to sort

        MergeSortTask(int[] array, int left, int right) {
            this.array = array;
            this.left = left;
            this.right = right;
        }

        @Override
        protected int[] compute() {
            // Base case: small subarrays don't need further splitting
            if (right - left < 2) {
                return Arrays.copyOfRange(array, left, right + 1);
            }

            // Divide: split array into two halves
            int mid = (left + right) / 2;
            MergeSortTask leftTask = new MergeSortTask(array, left, mid);
            MergeSortTask rightTask = new MergeSortTask(array, mid + 1, right);

            // Fork: schedule left task on another thread asynchronously
            leftTask.fork();
            // Compute right task on current thread
            int[] rightResult = rightTask.compute();
            // Join: wait for left task to complete and get result
            int[] leftResult = leftTask.join();

            // Conquer: merge the two sorted halves
            return merge(leftResult, rightResult);
        }

        // Merge two sorted arrays into a single sorted array
        private int[] merge(int[] left, int[] right) {
            int[] result = new int[left.length + right.length];
            int i = 0, j = 0, k = 0;

            // Compare elements from both arrays, take smaller one
            while (i < left.length && j < right.length) {
                if (left[i] <= right[j]) {
                    result[k++] = left[i++];
                } else {
                    result[k++] = right[j++];
                }
            }

            // Copy remaining elements from left array
            while (i < left.length) result[k++] = left[i++];
            // Copy remaining elements from right array
            while (j < right.length) result[k++] = right[j++];

            return result;
        }
    }
}

// Usage
ParallelMergeSort sorter = new ParallelMergeSort();
int[] unsorted = {5, 2, 8, 1, 9, 3, 7, 4, 6};
int[] sorted = sorter.parallelMergeSort(unsorted);
System.out.println("Sorted: " + Arrays.toString(sorted));
// Output: Sorted: [1, 2, 3, 4, 5, 6, 7, 8, 9]
import java.util.concurrent.*

// Parallel merge sort using Fork/Join framework
class ParallelMergeSort {
    // Shared thread pool for parallel task execution
    private val pool = ForkJoinPool.commonPool()

    // Entry point: submits the sorting task to the thread pool
    fun parallelMergeSort(array: IntArray): IntArray {
        return pool.invoke(MergeSortTask(array, 0, array.size - 1))
    }

    // Inner class for recursive parallel sorting tasks
    private inner class MergeSortTask(
        private val array: IntArray,
        private val left: Int,   // Start index of subarray
        private val right: Int   // End index of subarray
    ) : RecursiveTask<IntArray>() {

        override fun compute(): IntArray {
            // Base case: small subarrays don't need further splitting
            if (right - left < 2) {
                return array.sliceArray(left..right)
            }

            // Divide: split array into two halves
            val mid = (left + right) / 2
            val leftTask = MergeSortTask(array, left, mid)
            val rightTask = MergeSortTask(array, mid + 1, right)

            // Fork: schedule left task on another thread asynchronously
            leftTask.fork()
            // Compute right task on current thread (work-stealing optimization)
            val rightResult = rightTask.compute()
            // Join: wait for left task to complete and get result
            val leftResult = leftTask.join()

            // Conquer: merge the two sorted halves
            return merge(leftResult, rightResult)
        }

        // Merge two sorted arrays into a single sorted array
        private fun merge(left: IntArray, right: IntArray): IntArray {
            val result = IntArray(left.size + right.size)
            var i = 0; var j = 0; var k = 0

            // Compare elements from both arrays, take smaller one
            while (i < left.size && j < right.size) {
                if (left[i] <= right[j]) {
                    result[k++] = left[i++]
                } else {
                    result[k++] = right[j++]
                }
            }

            // Copy remaining elements from each array
            while (i < left.size) result[k++] = left[i++]
            while (j < right.size) result[k++] = right[j++]

            return result
        }
    }
}

// Usage
val sorter = ParallelMergeSort()
val unsorted = intArrayOf(5, 2, 8, 1, 9, 3, 7, 4, 6)
val sorted = sorter.parallelMergeSort(unsorted)
println("Sorted: ${sorted.contentToString()}")
// Output: Sorted: [1, 2, 3, 4, 5, 6, 7, 8, 9]

TypeScript/JavaScript doesn't support traditional threading or parallel algorithms. JavaScript runs in a single-threaded event loop model.

Dart doesn't support traditional threading or parallel algorithms. Dart uses isolates with separate memory spaces that communicate via message passing.

import Foundation

// Parallel merge sort using Grand Central Dispatch for true concurrency
class ParallelMergeSort {
    // Concurrent queue for parallel execution across CPU cores
    private let queue = DispatchQueue.global(qos: .userInitiated)

    // Public entry point for parallel merge sort
    func parallelMergeSort(_ array: [Int]) -> [Int] {
        // Base case: arrays of 0 or 1 elements are already sorted
        if array.count <= 1 {
            return array
        }

        // Split array into two halves
        let mid = array.count / 2
        let left = Array(array[0..<mid])
        let right = Array(array[mid..<array.count])

        var leftSorted: [Int] = []
        var rightSorted: [Int] = []

        // Create dispatch group to synchronize parallel tasks
        let group = DispatchGroup()

        // Sort left half on a background thread
        group.enter()
        queue.async {
            leftSorted = self.parallelMergeSort(left)
            group.leave()
        }

        // Sort right half on another background thread
        group.enter()
        queue.async {
            rightSorted = self.parallelMergeSort(right)
            group.leave()
        }

        // Wait for both sorting tasks to complete
        group.wait()

        // Merge the two sorted halves
        return merge(leftSorted, rightSorted)
    }

    // Merge two sorted arrays into a single sorted array
    private func merge(_ left: [Int], _ right: [Int]) -> [Int] {
        var result: [Int] = []
        result.reserveCapacity(left.count + right.count)
        var i = 0, j = 0

        // Compare elements from both arrays and add smaller one
        while i < left.count && j < right.count {
            if left[i] <= right[j] {
                result.append(left[i])
                i += 1
            } else {
                result.append(right[j])
                j += 1
            }
        }

        // Append remaining elements from left array
        while i < left.count {
            result.append(left[i])
            i += 1
        }

        // Append remaining elements from right array
        while j < right.count {
            result.append(right[j])
            j += 1
        }

        return result
    }
}

// Usage
let sorter = ParallelMergeSort()
let unsorted = [5, 2, 8, 1, 9, 3, 7, 4, 6]
let sorted = sorter.parallelMergeSort(unsorted)
print("Sorted: \(sorted)")
// Output: Sorted: [1, 2, 3, 4, 5, 6, 7, 8, 9]
import multiprocessing
from concurrent.futures import ProcessPoolExecutor

# Parallel merge sort using ProcessPoolExecutor for multi-core execution
class ParallelMergeSort:
    def parallel_merge_sort(self, array):
        # Base case: arrays of 0 or 1 elements are already sorted
        if len(array) <= 1:
            return array

        # Divide: split array into two halves
        mid = len(array) // 2
        left = array[:mid]
        right = array[mid:]

        # Create process pool for parallel execution across CPU cores
        with ProcessPoolExecutor() as executor:
            # Submit left half sorting to run on a separate process
            left_future = executor.submit(self.parallel_merge_sort, left)
            # Submit right half sorting to run on another process
            right_future = executor.submit(self.parallel_merge_sort, right)

            # Wait for both sorting tasks to complete and get results
            left_sorted = left_future.result()
            right_sorted = right_future.result()

        # Conquer: merge the two sorted halves
        return self.merge(left_sorted, right_sorted)

    # Merge two sorted arrays into a single sorted array
    def merge(self, left, right):
        result = []
        i = j = 0

        # Compare elements from both arrays, take smaller one
        while i < len(left) and j < len(right):
            if left[i] <= right[j]:
                result.append(left[i])
                i += 1
            else:
                result.append(right[j])
                j += 1

        # Append remaining elements from each array
        result.extend(left[i:])
        result.extend(right[j:])
        return result

# Usage
sorter = ParallelMergeSort()
unsorted = [5, 2, 8, 1, 9, 3, 7, 4, 6]
sorted_array = sorter.parallel_merge_sort(unsorted)
print(f"Sorted: {sorted_array}")
# Output: Sorted: [1, 2, 3, 4, 5, 6, 7, 8, 9]

Map-Reduce#

Map-reduce provides a simple pattern for large-scale parallel processing. The map phase applies a function to each input element in parallel, producing intermediate results. The reduce phase combines those results, also in parallel. This pattern handles massive datasets efficiently—think counting word frequencies in millions of documents.

import java.util.concurrent.*;
import java.util.stream.*;
import java.util.List;
import java.util.Map;

// Map-reduce implementation using Java parallel streams
public class MapReduceExample {
    // Parallel word count across multiple documents
    public Map<String, Integer> wordCount(List<String> documents) {
        return documents.parallelStream()  // Process documents in parallel
            // Map phase: split each document into individual words
            .flatMap(doc -> Arrays.stream(doc.split("\\s+")))
            // Reduce phase: group by word and count occurrences (thread-safe)
            .collect(Collectors.groupingByConcurrent(
                word -> word.toLowerCase(),
                Collectors.summingInt(word -> 1)
            ));
    }

    // Parallel sum of squares calculation
    public int sumOfSquares(List<Integer> numbers) {
        return numbers.parallelStream()  // Process numbers in parallel
            .mapToInt(n -> n * n)        // Map phase: compute square of each number
            .sum();                       // Reduce phase: sum all squares
    }

    // Parallel average calculation
    public double average(List<Double> values) {
        return values.parallelStream()   // Process values in parallel
            .mapToDouble(Double::doubleValue)
            .average()                   // Reduce phase: compute average
            .orElse(0.0);                // Default value if empty
    }
}

// Usage
MapReduceExample mapReduce = new MapReduceExample();

// Word count example
List<String> documents = Arrays.asList("hello world", "hello java", "world of java");
Map<String, Integer> wordCounts = mapReduce.wordCount(documents);
System.out.println("Word counts: " + wordCounts);
// Output: Word counts: {hello=2, world=2, java=2, of=1}

// Sum of squares
List<Integer> numbers = Arrays.asList(1, 2, 3, 4, 5);
int sumSquares = mapReduce.sumOfSquares(numbers);
System.out.println("Sum of squares: " + sumSquares);
// Output: Sum of squares: 55

// Average
List<Double> values = Arrays.asList(10.0, 20.0, 30.0, 40.0, 50.0);
double avg = mapReduce.average(values);
System.out.println("Average: " + avg);
// Output: Average: 30.0
import java.util.concurrent.*

// Map-reduce implementation using Kotlin with Java parallel streams
class MapReduceExample {
    // Parallel word count across multiple documents
    fun wordCount(documents: List<String>): Map<String, Int> {
        return documents.parallelStream()  // Process documents in parallel
            // Map phase: split each document into words
            .flatMap { doc -> doc.split("\\s+").stream() }
            // Reduce phase: group by word and count occurrences (thread-safe)
            .collect(Collectors.groupingByConcurrent(
                { it.lowercase() },
                Collectors.summingInt { 1 }
            ))
    }

    // Parallel sum of squares calculation
    fun sumOfSquares(numbers: List<Int>): Int {
        return numbers.parallelStream()  // Process numbers in parallel
            .mapToInt { it * it }        // Map phase: compute square of each number
            .sum()                        // Reduce phase: sum all squares
    }

    // Parallel average calculation
    fun average(values: List<Double>): Double {
        return values.parallelStream()   // Process values in parallel
            .mapToDouble { it }
            .average()                   // Reduce phase: compute average
            .orElse(0.0)                 // Default value if empty
    }
}

// Usage
val mapReduce = MapReduceExample()

// Word count example
val documents = listOf("hello world", "hello kotlin", "world of kotlin")
val wordCounts = mapReduce.wordCount(documents)
println("Word counts: $wordCounts")
// Output: Word counts: {hello=2, world=2, kotlin=2, of=1}

// Sum of squares
val numbers = listOf(1, 2, 3, 4, 5)
val sumSquares = mapReduce.sumOfSquares(numbers)
println("Sum of squares: $sumSquares")
// Output: Sum of squares: 55

// Average
val values = listOf(10.0, 20.0, 30.0, 40.0, 50.0)
val avg = mapReduce.average(values)
println("Average: $avg")
// Output: Average: 30.0

TypeScript/JavaScript doesn't support traditional threading or parallel algorithms. JavaScript runs in a single-threaded event loop model.

Dart doesn't support traditional threading or parallel algorithms. Dart uses isolates with separate memory spaces that communicate via message passing.

import Foundation

// Parallel map-reduce implementation using Grand Central Dispatch
class MapReduceExample {
    // Concurrent queue for parallel execution
    private let queue = DispatchQueue.global(qos: .userInitiated)

    // Thread-safe word count using parallel document processing
    func wordCount(documents: [String]) -> [String: Int] {
        // Thread-safe dictionary access using a serial queue
        let lock = DispatchQueue(label: "wordcount.lock")
        var wordCounts: [String: Int] = [:]

        // Dispatch group to synchronize all parallel tasks
        let group = DispatchGroup()

        // Process each document in parallel on separate threads
        for document in documents {
            group.enter()
            queue.async {
                // Map phase: count words in this document
                let words = document.components(separatedBy: .whitespaces)
                var localCounts: [String: Int] = [:]
                for word in words {
                    let lowercaseWord = word.lowercased()
                    localCounts[lowercaseWord, default: 0] += 1
                }

                // Reduce phase: merge local counts into global counts (thread-safe)
                lock.sync {
                    for (word, count) in localCounts {
                        wordCounts[word, default: 0] += count
                    }
                }
                group.leave()
            }
        }

        // Wait for all parallel tasks to complete
        group.wait()
        return wordCounts
    }

    // Parallel sum of squares using concurrent enumeration
    func sumOfSquares(_ numbers: [Int]) -> Int {
        // Thread-safe accumulator
        let lock = DispatchQueue(label: "sum.lock")
        var result = 0

        // Process chunks in parallel using concurrent perform
        DispatchQueue.concurrentPerform(iterations: numbers.count) { index in
            // Map phase: compute square of current number
            let square = numbers[index] * numbers[index]

            // Reduce phase: add to result (thread-safe)
            lock.sync {
                result += square
            }
        }

        return result
    }

    // Parallel average calculation
    func average(_ values: [Double]) -> Double {
        guard !values.isEmpty else { return 0 }

        // Thread-safe accumulator
        let lock = DispatchQueue(label: "avg.lock")
        var sum = 0.0

        // Process values in parallel
        DispatchQueue.concurrentPerform(iterations: values.count) { index in
            let value = values[index]

            // Thread-safe addition to sum
            lock.sync {
                sum += value
            }
        }

        // Calculate average from parallel sum
        return sum / Double(values.count)
    }
}

// Usage
let mapReduce = MapReduceExample()

// Word count example
let documents = ["hello world", "hello swift", "world of swift"]
let wordCounts = mapReduce.wordCount(documents: documents)
print("Word counts: \(wordCounts)")
// Output: Word counts: ["hello": 2, "world": 2, "swift": 2, "of": 1]

// Sum of squares
let numbers = [1, 2, 3, 4, 5]
let sumSquares = mapReduce.sumOfSquares(numbers)
print("Sum of squares: \(sumSquares)")
// Output: Sum of squares: 55

// Average
let values = [10.0, 20.0, 30.0, 40.0, 50.0]
let avg = mapReduce.average(values)
print("Average: \(avg)")
// Output: Average: 30.0
from concurrent.futures import ThreadPoolExecutor
from collections import Counter

# Helper function for map phase (must be at module level for pickling)
def square(x):
    return x * x

# Parallel map-reduce implementation using threading
class MapReduceExample:
    # Word count for documents (used in map phase)
    def word_count(self, documents):
        word_counts = Counter()
        for document in documents:
            words = document.split()
            for word in words:
                word_counts[word.lower()] += 1
        return dict(word_counts)

    # Parallel sum of squares using thread pool
    def sum_of_squares(self, numbers):
        with ThreadPoolExecutor() as executor:
            # Map phase: compute squares in parallel across threads
            squares = list(executor.map(square, numbers))
        # Reduce phase: sum all squared values
        return sum(squares)

    # Parallel average calculation using thread pool
    def average(self, values):
        if not values:
            return 0
        with ThreadPoolExecutor() as executor:
            # Map phase: process values in parallel (identity function)
            processed = list(executor.map(lambda x: x, values))
        # Reduce phase: compute sum and divide by count
        return sum(processed) / len(processed)

    # Parallel word count across multiple documents
    def parallel_word_count(self, documents):
        with ThreadPoolExecutor() as executor:
            # Map phase: submit each document for word counting on separate threads
            futures = [executor.submit(self.word_count, [doc]) for doc in documents]
            # Wait for all map tasks to complete
            results = [future.result() for future in futures]

        # Reduce phase: combine all word counts into final result
        final_counts = Counter()
        for result in results:
            final_counts.update(result)
        return dict(final_counts)

# Usage
map_reduce = MapReduceExample()

# Word count example
documents = ["hello world", "hello python", "world of python"]
word_counts = map_reduce.word_count(documents)
print(f"Word counts: {word_counts}")
# Output: Word counts: {'hello': 2, 'world': 2, 'python': 2, 'of': 1}

# Sum of squares
numbers = [1, 2, 3, 4, 5]
sum_squares = map_reduce.sum_of_squares(numbers)
print(f"Sum of squares: {sum_squares}")
# Output: Sum of squares: 55

# Average
values = [10.0, 20.0, 30.0, 40.0, 50.0]
avg = map_reduce.average(values)
print(f"Average: {avg}")
# Output: Average: 30.0