วิเคราะห์ผ่าน Algorithm ในหัวข้อก่อนหน้า

Sort

เราจะมาลองวิเคราะห์จาก 3 Sorts คือ Bubble Sort, Insertion Sort, Selection Sort จาก Session ที่แล้วกัน

Bubble Sort

#include <iostream>
#include <vector>
using namespace std;

void bubbleSort(vector<int> &arr) {
  int n = arr.size();
  bool swapped;

  for (int i = 0; i < n - 1; i++) {
    swapped = false;
    for (int j = 0; j < n - i - 1; j++) {
      if (arr[j] > arr[j + 1]) {
        // Swap arr[j] and arr[j+1]
        int temp = arr[j];
        arr[j] = arr[j + 1];
        arr[j + 1] = temp;
        swapped = true;
      }
    }

    // Visualize current state of array
    cout << "Pass " << i + 1 << ": ";
    for (int k = 0; k < n; k++) {
      cout << arr[k] << " ";
    }
    cout << "\n";

    // If no swaps occurred, array is sorted
    if (!swapped)
      break;
  }
}

int main() {
  vector<int> arr = {5, 3, 1, 4, 2};

  cout << "Initial Array: ";
  for (int i = 0; i < arr.size(); i++) {
    cout << arr[i] << " ";
  }
  cout << "\n\n";

  bubbleSort(arr);

  cout << "\nSorted Array: ";
  for (int i = 0; i < arr.size(); i++) {
    cout << arr[i] << " ";
  }

  return 0;
}

การวิเคราะห์ Big O

มี loop ภายนอกที่วนรอบ n-1 ครั้ง เมื่อ n คือขนาดของ Array ดังนั้นมีความซับซ้อน O(n)
ในแต่ละรอบของ loop ภายนอก จะมีลูปภายในที่วนรอบ n-i-1 ครั้ง เมื่อ i คือรอบปัจจุบัน ดังนั้นมีความซับซ้อน O(n)
ดังนั้นความซับซ้อนโดยรวมจะเป็น O(n) * O(n) = O(n^2)

ในกรณีเลวร้ายที่สุด (Worst Case) คือเมื่อ Array เรียงในลำดับตรงกันข้ามกับลำดับที่ต้องการ Algorithm จะต้องทำงานครบทุกรอบ ทำให้มีความซับซ้อนเป็น O(n^2)

อย่างไรก็ตาม ในกรณีดีที่สุด (Best Case) คือเมื่อ Array เรียงลำดับอยู่แล้ว Algorithm จะทำงานเพียงรอบเดียวและหยุดทันที ทำให้มีความซับซ้อนเป็น O(n)

ดังนั้น Bubble Sort จึงเหมาะสำหรับการเรียงข้อมูลขนาดเล็กเท่านั้น เนื่องจากมีประสิทธิภาพต่ำสำหรับข้อมูลขนาดใหญ่ โดยมีอัลกอริธึมการเรียงข้อมูลอื่นๆ ที่มีประสิทธิภาพสูงกว่า เช่น Merge Sort, Quick Sort เป็นต้น

Insertion Sort

#include <iostream>
#include <vector>
using namespace std;

void insertionSort(vector<int> &arr) {
  int n = arr.size();

  for (int i = 1; i < n; i++) {
    int key = arr[i];
    int j = i - 1;

    // Move elements of arr[0..i-1], that are greater than key, to one position
    // ahead of their current position
    while (j >= 0 && arr[j] > key) {
      arr[j + 1] = arr[j];
      j--;
    }

    // Insert key into the sorted part
    arr[j + 1] = key;

    // Print current state of array
    cout << "Pass " << i << ": ";
    for (int k = 0; k < n; k++) {
      cout << arr[k] << " ";
    }
    cout << "\n";
  }
}

int main() {
  vector<int> arr = {5, 3, 1, 4, 2};

  cout << "Initial Array: ";
  for (int i = 0; i < arr.size(); i++) {
    cout << arr[i] << " ";
  }
  cout << "\n\n";

  insertionSort(arr);

  cout << "\nSorted Array: ";
  for (int i = 0; i < arr.size(); i++) {
    cout << arr[i] << " ";
  }

  return 0;
}

การวิเคราะห์ Big O

มี loop ภายนอกที่วนรอบ n-1 ครั้ง เมื่อ n คือขนาดของ Array
ในแต่ละรอบของ loop ภายนอก อาจมี loop ภายในที่วนรอบสูงสุด n-1 ครั้งเช่นกัน ในกรณีเลวร้ายที่สุด
ดังนั้นความซับซ้อนโดยรวมในกรณีเลวร้ายที่สุด (Worst Case) จะเป็น O(n) * O(n) = O(n^2)

ในกรณีเลวร้ายที่สุด (Worst Case) ของ Insertion Sort นั้น จะเกิดขึ้นเมื่ออินพุตอาร์เรย์อยู่ในลำดับย้อนกลับ หรือเรียงลำดับจากมากไปน้อย จะได้ว่าจำนวนครั้งในการทำงานเป็น O(n^2)

ในกรณีดีที่สุด (Best Case) คือเมื่อ Array เรียงลำดับอยู่แล้ว Algorithm จะทำงานเพียงรอบเดียวและไม่มี loop ภายใน ทำให้มีความซับซ้อนเป็น O(n)

Selection Sort

#include <iostream>
#include <vector>
using namespace std;

void selectionSort(vector<int> &arr) {
  int n = arr.size();

  for (int i = 0; i < n - 1; i++) {
    // Find the minimum element in unsorted array
    int minIndex = i;
    for (int j = i + 1; j < n; j++) {
      if (arr[j] < arr[minIndex]) {
        minIndex = j;
      }
    }

    // Swap the found minimum element with the first element
    int temp = arr[minIndex];
    arr[minIndex] = arr[i];
    arr[i] = temp;

    // Print current state of array
    cout << "Pass " << i + 1 << ": ";
    for (int k = 0; k < n; k++) {
      cout << arr[k] << " ";
    }
    cout << "\n";
  }
}

int main() {
  vector<int> arr = {5, 3, 1, 4, 2};

  cout << "Initial Array: ";
  for (int i = 0; i < arr.size(); i++) {
    cout << arr[i] << " ";
  }
  cout << "\n\n";

  selectionSort(arr);

  cout << "\nSorted Array: ";
  for (int i = 0; i < arr.size(); i++) {
    cout << arr[i] << " ";
  }

  return 0;
}

การวิเคราะห์ความซับซ้อน

มี loop ภายนอกที่ทำงาน n-1 รอบ เมื่อ n คือขนาดของ Array
ในแต่ละรอบของ loop ภายนอก จะมี loop ภายในที่ค้นหาข้อมูลที่มีค่าน้อยที่สุด ซึ่งต้องเปรียบเทียบข้อมูลทั้งหมด n-i-1 ครั้ง เมื่อ i คือรอบปัจจุบัน
ดังนั้นจำนวนการเปรียบเทียบทั้งหมดจะเป็น (n-1) + (n-2) + ... + 2 + 1 = n(n-1)/2
คิดเป็นความซับซ้อนด้านเวลาเท่ากับ O(n^2)

กรณีที่แย่ที่สุด (Worst Case) เกิดขึ้นเมื่ออาร์เรย์เรียงลำดับย้อนกลับ คือข้อมูลที่มีค่าน้อยที่สุดอยู่ท้ายสุด ในแต่ละรอบจะต้องเปรียบเทียบข้อมูลทุกตัวเพื่อหาข้อมูลที่มีค่าน้อยที่สุด จำนวนการเปรียบเทียบและการสลับข้อมูลจะมากที่สุด ความซับซ้อนด้านเวลาในกรณีที่แย่ที่สุดจึงเป็น O(n^2) เช่นกัน

กรณีที่ดีที่สุด (Best Case) เกิดขึ้นเมื่อ Array เรียงลำดับอยู่แล้ว algorithm ยังคงต้องค้นหาข้อมูลที่มีค่าน้อยที่สุดในทุกรอบ แม้ว่าจะไม่มีการสลับข้อมูลเลย ดังนั้นความซับซ้อนด้านเวลาในกรณีที่ดีที่สุดจึงยังคงเป็น O(n^2) เช่นเดียวกับกรณีทั่วไป

STL Sort

นอกเหนือจากการเขียน sort algorithm เองแล้ว C++ ยังเตรียม STL สำหรับการ sort ให้ด้วย

std::sort() เป็น function สำเร็จรูปในการเรียงลำดับข้อมูลประเภท Random Access ได้แก่ Array, Vector เป็นต้น โดยใช้ algorithm Introsort ซึ่งเป็นผสมผสานระหว่าง Quicksort, Heapsort และ Insertion Sort เข้าด้วยกัน

ตัวอย่างการใช้งาน

#include <algorithm>
#include <iostream>
#include <vector>
using namespace std;

int main() {
  vector<int> v = {5, 2, 8, 1, 9};

  // เรียงลำดับจากน้อยไปมาก
  sort(v.begin(), v.end());

  // loop vector v for print all variable
  // v = {1, 2, 5, 8, 9}
  for (int i = 0; i < v.size(); i++) {
    cout << v[i] << " ";
  }

  // เรียงลำดับจากมากไปน้อย
  sort(v.begin(), v.end(), greater<int>());

  // loop vector v for print all variable
  // v = {9, 8, 5, 2, 1}
  for (int i = 0; i < v.size(); i++) {
    cout << v[i] << " ";
  }

  return 0;
}

การวิเคราะห์ Big O ของ std::sort() จาก Reference https://www.geeksforgeeks.org/internal-details-of-stdsort-in-c/

กรณีทั่วไป (Average Case), กรณีเลวร้าย (Worst Case), กรณีดีที่สุด (Best Case) มีความซับซ้อนเป็น O(n log n) จากการใช้ Introsort

โดยสรุป std::sort() จะมีประสิทธิภาพการทำงานในระดับ O(n log n) ในสถานการณ์ทั่วไป ซึ่งถือว่าเป็นประสิทธิภาพที่ดีสำหรับ algorithm การเรียงลำดับ

โดยจุดเด่นของการใช้ std::sort() คือ

ง่ายต่อการใช้งาน มีประสิทธิภาพสูงในสถานการณ์ทั่วไป
สามารถกำหนดการเรียงลำดับตามเงื่อนไขที่ต้องการได้ด้วยการส่ง compare function เข้าไป

ดังนั้น std::sort() จึงเป็นทางเลือกที่ดีสำหรับการเรียงลำดับข้อมูลใน C++ ทั้งในแง่ของความสะดวกและประสิทธิภาพ เว้นแต่ในกรณีที่มีข้อจำกัดหรือความต้องการพิเศษ จึงอาจต้องพิจารณาการใช้ algorithm การเรียงลำดับแบบอื่น

Search

วิเคราะห์จาก 2 Search ที่ผ่านมาคือ Linear Search และ Binary Search

Linear Search

#include <iostream>
using namespace std;

// Linear search function
int linearSearch(int arr[], int n, int x) {
  for (int i = 0; i < n; i++) {
    if (arr[i] == x) {
      return i; // Return the index if x is found
    }
  }
  return -1; // Return -1 if x is not found
}

int main() {
  int arr[] = {2, 5, 1, 9, 3};
  int n = sizeof(arr) / sizeof(arr[0]); // Size of the array
  int x = 3;                            // Element to search

  int result = linearSearch(arr, n, x);

  if (result == -1) {
    cout << "Element not found in the array" << endl;
  } else {
    cout << "Element found at index " << result << endl;
  }

  return 0;
}

วิเคราะห์ความซับซ้อน

บรรทัดที่ 4: ประกาศ linearSearch มีความซับซ้อน O(1)
บรรทัดที่ 5: for ทำงาน n รอบ มีความซับซ้อน O(n)
บรรทัดที่ 6-8: เงื่อนไข if และ return มีความซับซ้อน O(1)
บรรทัดที่ 10: return -1 มีความซับซ้อน O(1)
ดังนั้น linearSearch โดยรวมมีความซับซ้อน O(n)

กรณีแย่ที่สุด (Worst Case) ของ Linear Search คือ เมื่อค่าที่ต้องการค้นหาไม่ได้อยู่ใน Array เลย ซึ่งจะต้องทำการเปรียบเทียบทุกค่าใน Array ดังนั้น Worst Case ของ Linear Search มี Time Complexity เท่ากับ O(n) โดยที่ n คือขนาดของ Array

กรณีดีที่สุด (Best Case) ของ Linear Search คือ เมื่อค่าที่ต้องการค้นหาอยู่ที่ index แรกของ Array ซึ่งจะต้องทำการเปรียบเทียบเพียงครั้งเดียว ดังนั้น Best Case ของ Linear Search มี Time Complexity เท่ากับ O(1)

ข้อสังเกต

Linear Search เป็น Algorithm ที่เรียบง่าย แต่ไม่มีประสิทธิภาพสำหรับข้อมูลขนาดใหญ่ เนื่องจากมี Time Complexity ในกรณีเลวที่สุดเท่ากับ O(n)
ในทางปฏิบัติ มักจะใช้ Algorithm การค้นหาที่มีประสิทธิภาพดีกว่า เช่น Binary Search สำหรับข้อมูลที่เรียงลำดับแล้ว หรือใช้ Hash Table สำหรับการค้นหาแบบ O(1)
Linear Search อาจจะมีประโยชน์สำหรับข้อมูลขนาดเล็ก หรือในกรณีที่ไม่ได้มีข้อกำหนดด้านประสิทธิภาพมากนัก

Binary Search

ฉบับ Bubble Sort

#include <iostream>
using namespace std;

// Function to perform bubble sort
void bubbleSort(int arr[], int n) {
  for (int i = 0; i < n - 1; i++) {
    for (int j = 0; j < n - i - 1; j++) {
      if (arr[j] > arr[j + 1]) {
        // Swap arr[j] and arr[j+1]
        int temp = arr[j];
        arr[j] = arr[j + 1];
        arr[j + 1] = temp;
      }
    }
  }
}

// Function to perform binary search
int binarySearch(int arr[], int n, int target) {
  int left = 0, right = n - 1;
  while (left <= right) {
    int mid = left + (right - left) / 2;
    if (arr[mid] == target)
      return mid; // Target found at index mid
    else if (arr[mid] < target)
      left = mid + 1; // Search in the right half
    else
      right = mid - 1; // Search in the left half
  }
  return -1; // Target not found
}

int main() {
  int arr[] = {1, 3, 22, 5, 13, 6, 8, 11, 15, 17, 24}; // Fixed array
  int n = sizeof(arr) / sizeof(arr[0]);                // Size of the array

  bubbleSort(arr, n); // Sort the array using bubble sort

  cout << "Sorted array: ";
  for (int i = 0; i < n; i++) {
    cout << arr[i] << " ";
  }
  cout << endl;

  int target = 13; // Target element to search

  int result = binarySearch(arr, n, target); // Perform binary search

  if (result == -1)
    cout << "Target element not found in the array." << endl;
  else
    cout << "Target element found at index " << result << endl;

  return 0;
}

วิเคราะห์ความซับซ้อน แบ่งเป็น 2 ช็อตคือ

Bubble Sort (ที่ต้อง Sort ก่อน Search) = มีความซับซ้อนโดยรวมเป็น O(n^2)
Binary Search

บรรทัดที่ 20: ความซับซ้อน O(1) เป็นการประกาศฟังก์ชัน binarySearch
บรรทัดที่ 21: ความซับซ้อน O(1) กำหนดค่าตัวแปร left และ right
บรรทัดที่ 22-30: ความซับซ้อน O(log n) เนื่องจากมีการวนลูปค้นหาแบบ binary search โดยในแต่ละรอบจะตัดครึ่งขนาดของช่วงที่ค้นหา
ดังนั้นฟังก์ชัน binarySearch มีความซับซ้อนโดยรวมเป็น O(log n)

สุดท้ายที่ main

บรรทัดที่ 33: ความซับซ้อน O(1) ประกาศฟังก์ชัน main
บรรทัดที่ 34-35: ความซับซ้อน O(1) ประกาศ array และคำนวณขนาด
บรรทัดที่ 37: ความซับซ้อน O(n^2) เรียกใช้ฟังก์ชัน bubbleSort
บรรทัดที่ 39-43: ความซับซ้อน O(n) แสดงผล array ที่เรียงลำดับแล้ว
บรรทัดที่ 45-46: ความซับซ้อน O(1) กำหนดค่า target และเรียกใช้ฟังก์ชัน binarySearch ซึ่งมีความซับซ้อน O(log n)
บรรทัดที่ 48-52: ความซับซ้อน O(1) ตรวจสอบและแสดงผลลัพธ์การค้นหา

เพราะฉะนั้น สำหรับ Binary Search

กรณีที่ดีที่สุด (Best Case) คือ O(1) เมื่อค่าที่ค้นหาอยู่ตรงกลางพอดี ใช้การเปรียบเทียบเพียงครั้งเดียว

กรณีแย่ที่สุด (Worst Case) คือ O(log n) เมื่อค่าที่ค้นหาอยู่ตำแหน่งแรกหรือสุดท้าย หรือไม่มีอยู่ใน array ต้องทำการค้นหาจนกว่าช่วงจะเหลือ 1 ค่า (ค่าสุดท้ายของการค้นหาทีละครึ่ง)

แต่โดยรวมแล้ว ความซับซ้อนของโปรแกรมทั้งหมดจะขึ้นอยู่กับขนาดของ Array (n) เป็นหลัก เนื่องจากมีการเรียกใช้ Bubble Sort ที่มีความซับซ้อน O(n^2) ในขณะที่ Binary Search มีความซับซ้อนเพียง O(log n) ดังนั้น Big O ของโปรแกรมทั้งหมดจึงเป็น O(n^2) (ตาม Big O สูงสุด) ดังนั้น “ขึ้นอยู่กับ Sort ที่เลือกใช้ + จังหวะที่มีการใช้ Sort ด้วยเช่นเดียวกัน”

เพิ่มเติม - Hash

จาก Hash table และ Hash function

#include <iostream>
#include <string>
#include <vector>

using namespace std;

// Define the hash table entry structure
struct HashEntry {
  string key;
  int value;
  HashEntry() : key(""), value(-1) {}
  HashEntry(string k, int v) : key(k), value(v) {}
};

class HashTable {
private:
  // Vector to store the hash table entries
  vector<HashEntry> table;
  int capacity; // Total capacity of the hash table

  // Hash function to map keys to indices
  int hashFunction(string key) {
    int hash = 0;
    for (char c : key) {
      hash =
          (hash * 31 + c) % capacity; // Use a simple polynomial hash function
    }
    return hash;
  }

public:
  HashTable(int cap) : capacity(cap), table(cap) {}

  // Insert a key-value pair into the hash table
  void insert(string key, int value) {
    int index = hashFunction(key);
    table[index] = HashEntry(key, value);
  }

  // Search for a key in the hash table
  int search(string key) {
    int index = hashFunction(key);
    if (table[index].key == key) {
      return table[index].value;
    }
    return -1; // Key not found
  }

  // Remove a key-value pair from the hash table
  void remove(string key) {
    int index = hashFunction(key);
    if (table[index].key == key) {
      table[index] = HashEntry("", -1); // Mark as deleted
    }
  }
};

int main() {
  HashTable ht(10); // Create a hash table with capacity 10

  // Insert some key-value pairs
  ht.insert("apple", 5);
  ht.insert("banana", 10);
  ht.insert("orange", 15);

  // Search for a key
  cout << "Value of 'banana': " << ht.search("banana")
       << endl; // Output: Value of 'banana': 10

  // Remove a key
  ht.remove("apple");

  // Search for the removed key
  cout << "Value of 'apple': " << ht.search("apple")
       << endl; // Output: Value of 'apple': -1

  return 0;
}

วิเคราะห์จาก code

hashFunction มีความซับซ้อน O(n) เมื่อ n คือความยาวของ key เนื่องจากมีการวน loop ตามความยาวของ key
function insert ความซับซ้อน O(1) เป็นการเรียกใช้ hashFunction และกำหนดค่าใน table
function search ความซับซ้อน O(1) เป็นการเรียกใช้ hashFunction และตรวจสอบค่าใน table
function remove ความซับซ้อน O(1) เป็นการเรียกใช้ hashFunction และกำหนดค่าใน table เป็น default
ที่ main บรรทัดที่ 50-64: ความซับซ้อน O(1) เป็นการประกาศ HashTable และเรียกใช้ insert, search, remove

แยก Best Case / Worst Case

insert

ในกรณี Best Case และ Average Case เมื่อไม่เกิด collision ความซับซ้อนจะเป็น O(1) เพราะใช้การคำนวณ hash และเพิ่มค่าเข้าไปใน table เพียงครั้งเดียว
แต่ในกรณี Worst Case ที่เกิด collision เต็มที่ ค่าทั้งหมดจะถูกเก็บใน bucket เดียวกัน ซึ่งใน code นี้ใช้การเขียนทับค่าเดิม ทำให้ Worst Case ของ insert ยังเป็น O(1) (แต่ทั้งนี้ขึ้นอยู่กับวิธีการจัดการเคส collision เช่นเดียวกัน)

กรณี Best Case เมื่อค่าที่ต้องการค้นหาอยู่ใน bucket แรกที่คำนวณได้จาก hash function ความซับซ้อนจะเป็น O(1)
กรณี Worst Case จะเกิดเมื่อเกิด collision เต็มที่ ค่าทั้งหมดอยู่ใน bucket เดียวกัน และค่าที่ต้องการก็จะไม่มีอยู่เลย การค้นหาจะยังคงเป็น O(1) เหมือนเดิม (แต่จะหาค่าไม่เจอ)

remove

remove ใช้ในการลบคู่ key-value ออกจาก hash table
วิเคราะห์ความซับซ้อนได้เหมือนกับฟังก์ชัน search คือในกรณี Best จะเป็น O(1) และกรณี Worst จะเป็น O(1) เช่นกัน

** ตระกูลของ Recursive เราจะวิเคราะห์ Complexity แบบ เจาะลึกกันในหัวข้อ Divide & Conquer (เช่น DFS, BFS) และ Graph Algorithm (Graph Travelsal)

Sort​

Bubble Sort​

Insertion Sort​

Selection Sort​

STL Sort​

Search​

Linear Search​

Binary Search​

เพิ่มเติม - Hash​